|
|
|
/**
|
|
|
|
* Implements a simple `Parser` with built-in functions to parse simple
|
|
|
|
* UnrealScript's types and support for saving / restoring parser states.
|
|
|
|
* Copyright 2021 Anton Tarasenko
|
|
|
|
*------------------------------------------------------------------------------
|
|
|
|
* This file is part of Acedia.
|
|
|
|
*
|
|
|
|
* Acedia is free software: you can redistribute it and/or modify
|
|
|
|
* it under the terms of the GNU General Public License as published by
|
|
|
|
* the Free Software Foundation, version 3 of the License, or
|
|
|
|
* (at your option) any later version.
|
|
|
|
*
|
|
|
|
* Acedia is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU General Public License
|
|
|
|
* along with Acedia. If not, see <https://www.gnu.org/licenses/>.
|
|
|
|
*/
|
|
|
|
class Parser extends AcediaObject
|
|
|
|
dependson(Text)
|
|
|
|
dependson(UnicodeData);
|
|
|
|
|
|
|
|
// Max value of a byte
|
|
|
|
var public int BYTE_MAX;
|
|
|
|
// Code points of symbols with important meaning in parsing.
|
|
|
|
var public int CODEPOINT_BACKSLASH;
|
|
|
|
var public int CODEPOINT_USMALL;
|
|
|
|
var public int CODEPOINT_ULARGE;
|
|
|
|
|
|
|
|
// Text object wit content that this `Parser` is supposed to parse
|
|
|
|
// (only copies of any `Text` passed to us are stored here).
|
|
|
|
var private Text content;
|
|
|
|
// Incremented each time `Parser` is reinitialized with new `content`.
|
|
|
|
// Can be used to make `Parser` object completely independent from
|
|
|
|
// it's past after each re-initialization.
|
|
|
|
// This helps to avoid needless reallocations.
|
|
|
|
var private int version;
|
|
|
|
|
|
|
|
// Describes current state of the `Parser`, instance of this struct
|
|
|
|
// can be used to revert parser back to this state.
|
|
|
|
struct ParserState
|
|
|
|
{
|
|
|
|
// Record to which object (and of what version) this state belongs to.
|
|
|
|
// This information is used to make sure that we apply this state
|
|
|
|
// only to same `Parser` (of the same version) that it originated from.
|
|
|
|
var private AcediaObject ownerObject;
|
|
|
|
var private int ownerVersion;
|
|
|
|
// Has parser failed at some point?
|
|
|
|
var private bool failed;
|
|
|
|
// Points at the next symbol to be used next in parsing.
|
|
|
|
var private int pointer;
|
|
|
|
};
|
|
|
|
var private ParserState currentState;
|
|
|
|
// For convenience `Parser` will store one internal state that designates
|
|
|
|
// a state that's safe to revert to when some parsing attempt goes wrong.
|
|
|
|
// @see `Confirm()`, `R()`
|
|
|
|
var private ParserState confirmedState;
|
|
|
|
|
|
|
|
// Describes rules for translating escaped sequences ("\r", "\n", "\t")
|
|
|
|
// into appropriate code points.
|
|
|
|
var private const array<UnicodeData.CodePointMapping> escapeCharactersMap;
|
|
|
|
|
|
|
|
// Used to store a result of a `ParseSign()` function.
|
|
|
|
enum ParsedSign
|
|
|
|
{
|
|
|
|
SIGN_Missing,
|
|
|
|
SIGN_Plus,
|
|
|
|
SIGN_Minus
|
|
|
|
};
|
|
|
|
|
|
|
|
// TODO: add finalizer
|
|
|
|
|
|
|
|
// Common logic for parser initialization.
|
|
|
|
// Uses `source` as is, without copying, so public initialization method
|
|
|
|
// must do it itself.
|
|
|
|
private final function Parser _initialize(Text source)
|
|
|
|
{
|
|
|
|
if (source == none) return self;
|
|
|
|
|
|
|
|
content = source;
|
|
|
|
version += 1;
|
|
|
|
currentState.ownerObject = self;
|
|
|
|
currentState.ownerVersion = version;
|
|
|
|
currentState.failed = false;
|
|
|
|
currentState.pointer = 0;
|
|
|
|
confirmedState = currentState;
|
|
|
|
return self;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Initializes `Parser` with new data from a `Text`. Never fails.
|
|
|
|
*
|
|
|
|
* Any data from before this call is lost, any checkpoints are invalidated.
|
|
|
|
*
|
|
|
|
* @param source `Text` object `Parser` will need to parse.
|
|
|
|
* If `none` is passed - parser won't be initialized.
|
|
|
|
* @return Returns the caller `Parser`, to allow for function chaining.
|
|
|
|
*/
|
|
|
|
public final function Parser Initialize(Text source)
|
|
|
|
{
|
|
|
|
if (source == none) {
|
|
|
|
return self;
|
|
|
|
}
|
|
|
|
return _initialize(source.Copy());
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Initializes `Parser` with new data from a plain `string`.
|
|
|
|
*
|
|
|
|
* Can fail if passed `none` as a parameter.
|
|
|
|
*
|
|
|
|
* Any data from before this call is lost, any checkpoints are invalidated.
|
|
|
|
*
|
|
|
|
* @param source String `Parser` will need to parse.
|
|
|
|
* @return Returns the caller `Parser`, to allow for function chaining.
|
|
|
|
*/
|
|
|
|
public final function Parser InitializeS(string source)
|
|
|
|
{
|
|
|
|
_initialize(_.text.FromString(source));
|
|
|
|
return self;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Checks if `Parser` is in a failed state.
|
|
|
|
*
|
|
|
|
* Parser enters a failed state whenever any parsing call returns without
|
|
|
|
* completing it's job. `Parser` in a failed state will automatically fail
|
|
|
|
* any further parsing attempts until it gets reset via `R()` call.
|
|
|
|
*
|
|
|
|
* @return Returns 'false' if `Parser()` is in a failed state and
|
|
|
|
* `true` otherwise.
|
|
|
|
*/
|
|
|
|
public final function bool Ok()
|
|
|
|
{
|
|
|
|
return (!currentState.failed);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Returns copy of the current state of this parser.
|
|
|
|
*
|
|
|
|
* As long as caller `Parser` was not reinitialized, returned `ParserState`
|
|
|
|
* structure can be used to revert this `Parser` to it's current condition
|
|
|
|
* by a `RestoreState()` call.
|
|
|
|
*
|
|
|
|
* @see `RestoreState()`
|
|
|
|
* @return Copy of the current state of the caller `Parser`.
|
|
|
|
*/
|
|
|
|
public final function ParserState GetCurrentState()
|
|
|
|
{
|
|
|
|
return currentState;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Returns copy of (currently) last confirmed state of this parser.
|
|
|
|
*
|
|
|
|
* As long as caller `Parser` was not reinitialized, returned `ParserState`
|
|
|
|
* structure can be used to revert this `Parser` to it's current confirmed
|
|
|
|
* state by a `RestoreState()` call.
|
|
|
|
*
|
|
|
|
* @see `RestoreState()`, `Confirm()`, `R()`
|
|
|
|
* @return Copy of (currently) last confirmed state of this parser.
|
|
|
|
*/
|
|
|
|
public final function ParserState GetConfirmedState()
|
|
|
|
{
|
|
|
|
return confirmedState;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Checks if given `stateToCheck` is valid for the caller `Parser`, i.e.:
|
|
|
|
* 1. It is a state generated by either `GetCurrentState()` or
|
|
|
|
* `GetConfirmedState()` calls on the caller `Parser`.
|
|
|
|
* 2. Caller `Parser` was not reinitialized since a call
|
|
|
|
* that generated given `stateToCheck`.
|
|
|
|
*
|
|
|
|
* @param stateToCheck `ParserState` to check for validity for
|
|
|
|
* caller `Parser`.
|
|
|
|
* @return `true` if given `stateToCheck` is valid and `false` otherwise.
|
|
|
|
*/
|
|
|
|
public final function bool IsStateValid(ParserState stateToCheck)
|
|
|
|
{
|
|
|
|
if (stateToCheck.ownerObject != self) return false;
|
|
|
|
if (stateToCheck.ownerVersion != version) return false;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Checks if calling `RestoreState()` for passed state will return a `Parser`
|
|
|
|
* in an "Ok" state (not failed), i.e. state is valid and
|
|
|
|
* was generated when `Parser` was in a non-failed state.
|
|
|
|
*
|
|
|
|
* @param stateToCheck `ParserState` to check for corresponding to
|
|
|
|
* `Parser` being in a non-failed state.
|
|
|
|
* By definition must also be valid for the caller `Parser`.
|
|
|
|
* @return `true` if given `stateToCheck` is valid and `false` otherwise.
|
|
|
|
*/
|
|
|
|
public final function bool IsStateOk(ParserState stateToCheck)
|
|
|
|
{
|
|
|
|
if (!IsStateValid(stateToCheck)) return false;
|
|
|
|
return (!stateToCheck.failed);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Resets parser to a state, given by `stateToRestore` argument
|
|
|
|
* (so a state `Parser` was in at the moment given `stateToRestore`
|
|
|
|
* was obtained).
|
|
|
|
*
|
|
|
|
* If given `stateToRestore` is from a different `Parser` or
|
|
|
|
* the owner `Parser` was reinitialized after passed state was obtained, -
|
|
|
|
* function will simply put caller `Parser` into a failed state.
|
|
|
|
* Note that caller `Parser` being put in a failed state after this call
|
|
|
|
* doesn't mean that described issues are actually present:
|
|
|
|
* `stateToRestore` can also describe a failed state of the `Parser`.
|
|
|
|
*
|
|
|
|
* @param stateToRestore `ParserState` that this method will attempt
|
|
|
|
* to set for the caller `Parser`.
|
|
|
|
* @return Returns the caller `Parser`, to allow for function chaining.
|
|
|
|
*/
|
|
|
|
public final function Parser RestoreState(ParserState stateToRestore)
|
|
|
|
{
|
|
|
|
if (!IsStateValid(stateToRestore))
|
|
|
|
{
|
|
|
|
currentState.failed = true;
|
|
|
|
return self;
|
|
|
|
}
|
|
|
|
currentState = stateToRestore;
|
|
|
|
return self;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Remembers current state of `Parser` in an internal checkpoint variable,
|
|
|
|
* that can later be restored by an `R()` call.
|
|
|
|
*
|
|
|
|
* Can only save non-failed states and will only fail if caller `Parser` is
|
|
|
|
* in a failed state.
|
|
|
|
*
|
|
|
|
* `Confirm()` and `R()` are essentially convenience wrapper functions for
|
|
|
|
* `GetCurrentState()` and `RestoreState()` calls +
|
|
|
|
* state storage variable.
|
|
|
|
*
|
|
|
|
* @return `true` if current state is recorded in `Parser` as confirmed and
|
|
|
|
* `false` otherwise.
|
|
|
|
*/
|
|
|
|
public final function bool Confirm()
|
|
|
|
{
|
|
|
|
if (!Ok()) return false;
|
|
|
|
|
|
|
|
confirmedState = currentState;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Resets `Parser` to a last state recorded as confirmed by a last successful
|
|
|
|
* `Confirm()` function call. If there weren't any such call -
|
|
|
|
* reverts `Parser` to it's state right after initialization.
|
|
|
|
*
|
|
|
|
* Always resets failed state of a `Parser`. Cannot fail.
|
|
|
|
*
|
|
|
|
* `Confirm()` and `R()` are essentially convenience wrapper functions for
|
|
|
|
* `GetCurrentState()` and `RestoreState()` calls + state storage variable.
|
|
|
|
*
|
|
|
|
* @return Returns the caller `Parser`, to allow for function chaining.
|
|
|
|
*/
|
|
|
|
public final function Parser R()
|
|
|
|
{
|
|
|
|
currentState = confirmedState;
|
|
|
|
return self;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Shifts parsing pointer forward.
|
|
|
|
*
|
|
|
|
* Can only shift forward. To revert to a previous state in case of failure use
|
|
|
|
* combination of `GetCurrentState()` and `RestoreState()` functions.
|
|
|
|
*
|
|
|
|
* @param shift How much to shift parsing pointer?
|
|
|
|
* Values of zero and below are discarded and `1` is used instead
|
|
|
|
* (i.e. by default this method shifts pointer by `1` position).
|
|
|
|
* @return Returns the caller `Parser`, to allow for function chaining.
|
|
|
|
*/
|
|
|
|
protected final function Parser ShiftPointer(optional int shift)
|
|
|
|
{
|
|
|
|
if (content == none) return self;
|
|
|
|
|
|
|
|
shift = Max(1, shift);
|
|
|
|
currentState.pointer = Min( currentState.pointer + shift,
|
|
|
|
content.GetLength());
|
|
|
|
return self;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Returns a code point from this `Parser`'s content, relative to next
|
|
|
|
* code point that caller `Parser` must handle.
|
|
|
|
*
|
|
|
|
* @param `shift` If `0` (default value) or negative value is passed -
|
|
|
|
* simply asks for the code point that caller `Parser` must handle.
|
|
|
|
* Otherwise shifts that index `shift` code points, i.e.
|
|
|
|
* `1` to return next code point or `2` to return code point after
|
|
|
|
* the next one.
|
|
|
|
* @return Returns character at a given shift. If `shift` is too small/large
|
|
|
|
* and does not fit `Parser`'s contents, returns invalid character.
|
|
|
|
* `GetCodePoint()` with default (`0`) parameter can also return
|
|
|
|
* invalid character if caller `Parser` was not initialized,
|
|
|
|
* it's contents are empty or it has already consumed all input.
|
|
|
|
*/
|
|
|
|
protected final function Text.Character GetCharacter(optional int shift)
|
|
|
|
{
|
|
|
|
local Text.Character invalidCharacter;
|
|
|
|
local int absoluteAddress;
|
|
|
|
if (content == none) return _.text.GetInvalidCharacter();
|
|
|
|
|
|
|
|
absoluteAddress = currentState.pointer + Max(0, shift);
|
|
|
|
if (absoluteAddress < 0 || absoluteAddress >= content.GetLength())
|
|
|
|
{
|
|
|
|
invalidCharacter.codePoint = -1;
|
|
|
|
return invalidCharacter;
|
|
|
|
}
|
|
|
|
return content.GetRawCharacter(absoluteAddress);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Forces caller `Parser` to enter a failed state.
|
|
|
|
*
|
|
|
|
* @return Returns the caller `Parser`, to allow for a quick exit from
|
|
|
|
* a parsing function by `return Fail();`.
|
|
|
|
*/
|
|
|
|
public final function Parser Fail()
|
|
|
|
{
|
|
|
|
currentState.failed = true;
|
|
|
|
return self;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Returns amount of code points that have already been parsed,
|
|
|
|
* provided that caller `Parser` is in a correct state.
|
|
|
|
*
|
|
|
|
* @return Returns how many Unicode code points have already been parsed if
|
|
|
|
* caller `Parser` is in correct state;
|
|
|
|
* otherwise return value is undefined.
|
|
|
|
*/
|
|
|
|
public final function int GetParsedLength()
|
|
|
|
{
|
|
|
|
return Max(0, currentState.pointer);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Returns amount of code points that have not yet been parsed,
|
|
|
|
* provided that caller `Parser` is in a correct state.
|
|
|
|
*
|
|
|
|
* @return Returns how many Unicode code points are still unparsed if
|
|
|
|
* caller `Parser` is in correct state;
|
|
|
|
* otherwise return value is undefined.
|
|
|
|
*/
|
|
|
|
public final function int GetRemainingLength()
|
|
|
|
{
|
|
|
|
if (content == none) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
return Max(0, content.GetLength() - currentState.pointer);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Checks if caller `Parser` has already parsed all of it's content.
|
|
|
|
* Uninitialized `Parser` has no content and, therefore, parsed it all.
|
|
|
|
*
|
|
|
|
* Should return `true` iff `GetRemainingLength() == 0`.
|
|
|
|
*
|
|
|
|
* @return `true` if caller `Parser` has no more data to parse.
|
|
|
|
*/
|
|
|
|
public final function bool HasFinished()
|
|
|
|
{
|
|
|
|
if (content == none) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return (currentState.pointer >= content.GetLength());
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Returns still unparsed part of caller `Parser`'s source as `Text`.
|
|
|
|
*
|
|
|
|
* @return Unparsed part of caller `Parser`'s source as `Text`.
|
|
|
|
*/
|
|
|
|
public final function Text GetRemainder()
|
|
|
|
{
|
|
|
|
local int i;
|
|
|
|
local MutableText result;
|
|
|
|
result = _.text.Empty();
|
|
|
|
for (i = 0; i < GetRemainingLength(); i += 1) {
|
|
|
|
result.AppendCharacter(GetCharacter(i));
|
|
|
|
}
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Returns still unparsed part of caller `Parser`'s source as a plain `string`.
|
|
|
|
*
|
|
|
|
* @return Unparsed part of caller `Parser`'s source as a plain `plain`.
|
|
|
|
*/
|
|
|
|
public final function string GetRemainderS()
|
|
|
|
{
|
|
|
|
local int i;
|
|
|
|
local string result;
|
|
|
|
local TextAPI api;
|
|
|
|
api = _.text;
|
|
|
|
for (i = 0; i < GetRemainingLength(); i += 1) {
|
|
|
|
result $= api.CharacterToString(GetCharacter(i));
|
|
|
|
}
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Auxiliary method for other methods that have to return resulting
|
|
|
|
* `MutableText` instance.
|
|
|
|
*
|
|
|
|
* If passed instance if `none` or not currently allocated - a new one is
|
|
|
|
* created, otherwise existing one is emptied.
|
|
|
|
*
|
|
|
|
* @param result `MutableText` instance to empty/create.
|
|
|
|
* @return Returns the caller `Parser`, to allow for function chaining.
|
|
|
|
*/
|
|
|
|
protected final function Parser ResetResultText(out MutableText result)
|
|
|
|
{
|
|
|
|
if (result == none || !result.IsAllocated()) {
|
|
|
|
result = _.text.Empty();
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
result.Clear();
|
|
|
|
}
|
|
|
|
return self;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Matches any sequence of whitespace symbols, without returning it.
|
|
|
|
* Starts from where previous parsing function finished.
|
|
|
|
*
|
|
|
|
* Can never cause parser to enter failed state.
|
|
|
|
*
|
|
|
|
* What symbols exactly are considered whitespace refer to the description of
|
|
|
|
* `TextAPI.IsWhitespace()` function.
|
|
|
|
*
|
|
|
|
* @param whitespacesAmount Returns how many whitespace symbols
|
|
|
|
* were skipped. Any given value is discarded.
|
|
|
|
* @return Returns the caller `Parser`, to allow for function chaining.
|
|
|
|
*/
|
|
|
|
public final function Parser Skip(optional out int whitespacesAmount)
|
|
|
|
{
|
|
|
|
local TextAPI api;
|
|
|
|
if (!Ok()) return self;
|
|
|
|
|
|
|
|
api = _.text;
|
|
|
|
whitespacesAmount = 0;
|
|
|
|
// Cycle will end once we either reach a non-whitespace symbol or
|
|
|
|
// there's not more code points to get
|
|
|
|
while (api.IsWhitespace(GetCharacter(whitespacesAmount))) {
|
|
|
|
whitespacesAmount += 1;
|
|
|
|
}
|
|
|
|
if (whitespacesAmount > 0) {
|
|
|
|
ShiftPointer(whitespacesAmount);
|
|
|
|
}
|
|
|
|
return self;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Function that tries to match given data in `Parser`'s content,
|
|
|
|
* starting from where previous parsing function finished.
|
|
|
|
*
|
|
|
|
* Does nothing if caller `Parser` was in failed state.
|
|
|
|
*
|
|
|
|
* @param word `Text` that must be matched to the `Parser`'s
|
|
|
|
* contents, starting from where previous parsing function finished.
|
|
|
|
* @param caseSensitivity Specifies whether `Match()` should try and
|
|
|
|
* ignore the difference in case, where applicable.
|
|
|
|
* By default it does not ignore case difference.
|
|
|
|
* @return Returns the caller `Parser`, to allow for function chaining.
|
|
|
|
*/
|
|
|
|
public final function Parser Match(
|
|
|
|
Text word,
|
|
|
|
optional Text.CaseSensitivity caseSensitivity)
|
|
|
|
{
|
|
|
|
local int i;
|
|
|
|
local int wordLength;
|
|
|
|
local TextAPI api;
|
|
|
|
if (word == none) return Fail();
|
|
|
|
if (!Ok()) return self;
|
|
|
|
if (word.GetLength() > GetRemainingLength()) return Fail();
|
|
|
|
|
|
|
|
api = _.text;
|
|
|
|
wordLength = word.GetLength();
|
|
|
|
for (i = 0; i < wordLength; i += 1)
|
|
|
|
{
|
|
|
|
if (!api.AreEqual( word.GetCharacter(i), GetCharacter(i),
|
|
|
|
caseSensitivity))
|
|
|
|
{
|
|
|
|
return Fail();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
ShiftPointer(wordLength);
|
|
|
|
return self;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Function that tries to match given `string`, starting from where
|
|
|
|
* previous parsing function finished.
|
|
|
|
*
|
|
|
|
* Does nothing if caller `Parser` was in failed state.
|
|
|
|
*
|
|
|
|
* @param word `string` that must be matched to the `Parser`'s
|
|
|
|
* contents, starting from where previous parsing function finished.
|
|
|
|
* @param caseSensitivity Specifies whether `Match()` should try and
|
|
|
|
* ignore the difference in case, where applicable.
|
|
|
|
* By default it does not ignore case difference.
|
|
|
|
* @return Returns the caller `Parser`, to allow for function chaining.
|
|
|
|
*/
|
|
|
|
public final function Parser MatchS(
|
|
|
|
string word,
|
|
|
|
optional Text.CaseSensitivity caseSensitivity)
|
|
|
|
{
|
|
|
|
local Text wrapper;
|
|
|
|
wrapper = _.text.FromString(word);
|
|
|
|
Match(wrapper, caseSensitivity);
|
|
|
|
wrapper.FreeSelf();
|
|
|
|
return self;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Internal function for parsing unsigned integers in any base from 2 to 36.
|
|
|
|
*
|
|
|
|
* This parsing can fail, putting `Parser` into a failed state.
|
|
|
|
*
|
|
|
|
* @param result If parsing is successful, this value will contain
|
|
|
|
* parsed integer, otherwise value is undefined.
|
|
|
|
* Any passed value is discarded.
|
|
|
|
* @param base Base, in which integer in question is recorded.
|
|
|
|
* @param numberLength If this parameter is less or equal to zero,
|
|
|
|
* function will stop parsing the moment it can't recognize a character as
|
|
|
|
* belonging to a number in a given base.
|
|
|
|
* It will only fail if it couldn't parse a single character;
|
|
|
|
* If this parameter is set to be positive (`> 0`), function will
|
|
|
|
* attempt to use exactly `numberLength` character for parsing and will
|
|
|
|
* fail if they would not constitute a valid number.
|
|
|
|
* @param consumedCodePoints Amount of code point used (consumed) to parse
|
|
|
|
* this number; undefined, if parsing is unsuccessful.
|
|
|
|
* Any passed value is discarded.
|
|
|
|
* @return Returns the caller `Parser`, to allow for function chaining.
|
|
|
|
*/
|
|
|
|
public final function Parser MUnsignedInteger(
|
|
|
|
out int result,
|
|
|
|
optional int base,
|
|
|
|
optional int numberLength,
|
|
|
|
optional out int consumedCodePoints)
|
|
|
|
{
|
|
|
|
local bool parsingFixedLength;
|
|
|
|
local int nextPosition;
|
|
|
|
numberLength = Max(0, numberLength);
|
|
|
|
parsingFixedLength = (numberLength != 0);
|
|
|
|
if (base == 0) {
|
|
|
|
base = 10;
|
|
|
|
}
|
|
|
|
else if (base < 2 || base > 36) {
|
|
|
|
return Fail();
|
|
|
|
}
|
|
|
|
result = 0;
|
|
|
|
consumedCodePoints = 0;
|
|
|
|
while (!HasFinished())
|
|
|
|
{
|
|
|
|
if (parsingFixedLength && consumedCodePoints >= numberLength) break;
|
|
|
|
nextPosition = _.text.CharacterToInt(GetCharacter(), base);
|
|
|
|
if (nextPosition < 0) break;
|
|
|
|
|
|
|
|
result = result * base + nextPosition;
|
|
|
|
consumedCodePoints += 1;
|
|
|
|
ShiftPointer();
|
|
|
|
}
|
|
|
|
if ( parsingFixedLength && consumedCodePoints != numberLength
|
|
|
|
|| consumedCodePoints < 1)
|
|
|
|
{
|
|
|
|
return Fail();
|
|
|
|
}
|
|
|
|
return self;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Parses escaped sequence of the type that is usually used in
|
|
|
|
* string literals: backslash "\"", followed by any character
|
|
|
|
* (called escaped character later) or, in special cases, several characters.
|
|
|
|
* For most characters escaped sequence resolved into
|
|
|
|
* an escaped character's code point.
|
|
|
|
*
|
|
|
|
* Several escaped symbols:
|
|
|
|
* \n, \r, \t, \b, \f, \v
|
|
|
|
* are translated into a different code point corresponding to
|
|
|
|
* a control symbols, normally denoted by these sequences.
|
|
|
|
*
|
|
|
|
* A Unicode code point can also be directly entered with either of the two
|
|
|
|
* commands:
|
|
|
|
* \u0056
|
|
|
|
* \U56
|
|
|
|
* The difference is that `\u` allows you to enter two-byte code point, while
|
|
|
|
* `\U` only allows to define code points that fit into 1 byte,
|
|
|
|
* but is more compact.
|
|
|
|
*
|
|
|
|
* @param denotedCodePoint If parsing is successful, parameter will contain
|
|
|
|
* appropriate code point, denoted by a parsed escaped sequence;
|
|
|
|
* If parsing is unsuccessful, value is undefined.
|
|
|
|
* Any passed value is discarded.
|
|
|
|
* @return Returns the caller `Parser`, to allow for function chaining.
|
|
|
|
*/
|
|
|
|
public final function Parser MEscapedSequence(
|
|
|
|
out Text.Character denotedCharacter)
|
|
|
|
{
|
|
|
|
local int i;
|
|
|
|
if (!Ok()) return self;
|
|
|
|
// Need at least two characters to parse escaped sequence
|
|
|
|
if (GetRemainingLength() < 2) return Fail();
|
|
|
|
if (GetCharacter().codePoint != CODEPOINT_BACKSLASH) return Fail();
|
|
|
|
|
|
|
|
denotedCharacter = GetCharacter(1);
|
|
|
|
ShiftPointer(2);
|
|
|
|
// Escaped character denotes some special code point
|
|
|
|
for (i = 0; i < escapeCharactersMap.length; i += 1)
|
|
|
|
{
|
|
|
|
if (escapeCharactersMap[i].from == denotedCharacter.codePoint)
|
|
|
|
{
|
|
|
|
denotedCharacter.codePoint = escapeCharactersMap[i].to;
|
|
|
|
return self;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// Escaped character denotes declaration of arbitrary Unicode code point
|
|
|
|
if (denotedCharacter.codePoint == CODEPOINT_USMALL) {
|
|
|
|
MUnsignedInteger(denotedCharacter.codePoint, 16, 4);
|
|
|
|
}
|
|
|
|
else if (denotedCharacter.codePoint == CODEPOINT_ULARGE) {
|
|
|
|
MUnsignedInteger(denotedCharacter.codePoint, 16, 2);
|
|
|
|
}
|
|
|
|
return self;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Attempts to parse a string literal: a string enclosed in either of
|
|
|
|
* the following quotation marks: ", ', `.
|
|
|
|
*
|
|
|
|
* String literals can contain escaped sequences.
|
|
|
|
* String literals MUST end with closing quotation mark.
|
|
|
|
* @see `MEscapedSequence()`
|
|
|
|
*
|
|
|
|
* @param result If parsing is successful, this `MutableText` will contain
|
|
|
|
* the contents of string literal with resolved escaped sequences;
|
|
|
|
* if parsing has failed, it's value is undefined.
|
|
|
|
* Any passed contents are simply discarded.
|
|
|
|
* If passed `MutableText` equals to `none`, new instance will be
|
|
|
|
* automatically allocated. This will be done regardless of whether
|
|
|
|
* parsing fails.
|
|
|
|
* @return Returns the caller `Parser`, to allow for function chaining.
|
|
|
|
*/
|
|
|
|
public final function Parser MStringLiteral(out MutableText result)
|
|
|
|
{
|
|
|
|
local TextAPI api;
|
|
|
|
local Text.Character nextCharacter;
|
|
|
|
local Text.Character usedQuotationMark;
|
|
|
|
local Text.Character escapedCharacter;
|
|
|
|
ResetResultText(result);
|
|
|
|
if (!Ok()) return self;
|
|
|
|
usedQuotationMark = GetCharacter();
|
|
|
|
if (!_.text.IsQuotationMark(usedQuotationMark)) return Fail();
|
|
|
|
|
|
|
|
ShiftPointer(); // Skip opening quotation mark
|
|
|
|
api = _.text;
|
|
|
|
while (!HasFinished())
|
|
|
|
{
|
|
|
|
nextCharacter = GetCharacter();
|
|
|
|
// Closing quote
|
|
|
|
if (api.AreEqual(nextCharacter, usedQuotationMark))
|
|
|
|
{
|
|
|
|
ShiftPointer();
|
|
|
|
return self;
|
|
|
|
}
|
|
|
|
// Escaped characters
|
|
|
|
if (api.IsCodePoint(nextCharacter, CODEPOINT_BACKSLASH))
|
|
|
|
{
|
|
|
|
if (!MEscapedSequence(escapedCharacter).Ok()) {
|
|
|
|
return Fail(); // Backslash MUST mean valid escape sequence
|
|
|
|
}
|
|
|
|
result.AppendCharacter(escapedCharacter);
|
|
|
|
}
|
|
|
|
// Any other code point
|
|
|
|
else
|
|
|
|
{
|
|
|
|
result.AppendCharacter(nextCharacter);
|
|
|
|
ShiftPointer();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// Content ended without a closing quote.
|
|
|
|
return Fail();
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Attempts to parse a string literal: a string enclosed in either of
|
|
|
|
* the following quotation marks: ", ', `.
|
|
|
|
*
|
|
|
|
* String literals can contain escaped sequences.
|
|
|
|
* String literals MUST end with closing quotation mark.
|
|
|
|
* @see `MEscapedSequence()`
|
|
|
|
*
|
|
|
|
* @param result If parsing is successful, this `string` will contain the
|
|
|
|
* contents of string literal with resolved escaped sequences;
|
|
|
|
* if parsing has failed, it's value is undefined.
|
|
|
|
* Any passed contents are simply discarded.
|
|
|
|
* @return Returns the caller `Parser`, to allow for function chaining.
|
|
|
|
*/
|
|
|
|
public final function Parser MStringLiteralS(out string result)
|
|
|
|
{
|
|
|
|
local MutableText wrapper;
|
|
|
|
if (!Ok()) return self;
|
|
|
|
|
|
|
|
wrapper = _.text.Empty();
|
|
|
|
if (MStringLiteral(wrapper).Ok()) {
|
|
|
|
result = wrapper.ToPlainString();
|
|
|
|
}
|
|
|
|
wrapper.FreeSelf();
|
|
|
|
return self;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Matches everything until it finds one of the breaking symbols:
|
|
|
|
* 1. a specified code point (by default `0`);
|
|
|
|
* 2. (optionally) whitespace symbol (@see `TextAPI.IsWhitespace()`);
|
|
|
|
* 3. (optionally) quotation symbol (@see `TextAPI.IsQuotation()`).
|
|
|
|
* This method cannot fail.
|
|
|
|
*
|
|
|
|
* @param result Any content before one of the break symbols
|
|
|
|
* will be recorded into this `MutableText`. If passed `MutableText` equals
|
|
|
|
* to `none`, new instance will be automatically allocated. This will be
|
|
|
|
* done regardless of whether parsing fails.
|
|
|
|
* @param codePointBreak Method will stop parsing upon encountering this
|
|
|
|
* code point (it will not be included in the `result`)
|
|
|
|
* @param whitespacesBreak `true` if you want to also treat any
|
|
|
|
* whitespace character as a break symbol
|
|
|
|
* (@see `TextAPI.IsWhitespace()` for what symbols are
|
|
|
|
* considered whitespaces).
|
|
|
|
* @param quotesBreak `true` if you want to also treat any
|
|
|
|
* quotation mark character as a break symbol
|
|
|
|
* (@see `TextAPI.IsQuotation()` for what symbols are
|
|
|
|
* considered quotation marks).
|
|
|
|
* @return Returns the caller `Parser`, to allow for function chaining.
|
|
|
|
*/
|
|
|
|
public final function Parser MUntil(
|
|
|
|
out MutableText result,
|
|
|
|
optional Text.Character characterBreak,
|
|
|
|
optional bool whitespacesBreak,
|
|
|
|
optional bool quotesBreak)
|
|
|
|
{
|
|
|
|
local Text.Character nextCharacter;
|
|
|
|
local TextAPI api;
|
|
|
|
ResetResultText(result);
|
|
|
|
if (!Ok()) return self;
|
|
|
|
|
|
|
|
api = _.text;
|
|
|
|
while (!HasFinished())
|
|
|
|
{
|
|
|
|
nextCharacter = GetCharacter();
|
|
|
|
if (api.AreEqual(nextCharacter, characterBreak)) break;
|
|
|
|
if (whitespacesBreak && api.IsWhitespace(nextCharacter)) break;
|
|
|
|
if (quotesBreak && api.IsQuotationMark(nextCharacter)) break;
|
|
|
|
|
|
|
|
result.AppendCharacter(nextCharacter);
|
|
|
|
ShiftPointer();
|
|
|
|
}
|
|
|
|
return self;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Matches everything until it finds one of the breaking symbols:
|
|
|
|
* 1. a specified code point (by default `0`);
|
|
|
|
* 2. (optionally) whitespace symbol (@see `TextAPI.IsWhitespace()`);
|
|
|
|
* 3. (optionally) quotation symbol (@see `TextAPI.IsQuotation()`).
|
|
|
|
* This method cannot fail.
|
|
|
|
*
|
|
|
|
* @param result Any content before one of the break symbols
|
|
|
|
* will be recorded into this `string`.
|
|
|
|
* @param codePointBreak Method will stop parsing upon encountering this
|
|
|
|
* code point (it will not be included in the `result`)
|
|
|
|
* @param whitespacesBreak `true` if you want to also treat any
|
|
|
|
* whitespace character as a break symbol
|
|
|
|
* (@see `TextAPI.IsWhitespace()` for what symbols are
|
|
|
|
* considered whitespaces).
|
|
|
|
* @param quotesBreak `true` if you want to also treat any
|
|
|
|
* quotation mark character as a break symbol
|
|
|
|
* (@see `TextAPI.IsQuotation()` for what symbols are
|
|
|
|
* considered quotation marks).
|
|
|
|
* @return Returns the caller `Parser`, to allow for function chaining.
|
|
|
|
*/
|
|
|
|
public final function Parser MUntilS(
|
|
|
|
out string result,
|
|
|
|
optional Text.Character characterBreak,
|
|
|
|
optional bool whitespacesBreak,
|
|
|
|
optional bool quotesBreak)
|
|
|
|
{
|
|
|
|
local MutableText wrapper;
|
|
|
|
if (!Ok()) return self;
|
|
|
|
|
|
|
|
wrapper = _.text.Empty();
|
|
|
|
MUntil(wrapper, characterBreak, whitespacesBreak, quotesBreak);
|
|
|
|
result = wrapper.ToPlainString();
|
|
|
|
wrapper.FreeSelf();
|
|
|
|
return self;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Matches everything until it finds one of the breaking sequences:
|
|
|
|
* 1. Any of the specified `separators`.
|
|
|
|
* 2. (optionally) whitespace symbol (@see `TextAPI.IsWhitespace()`);
|
|
|
|
* 3. (optionally) quotation symbol (@see `TextAPI.IsQuotation()`).
|
|
|
|
* This method cannot fail.
|
|
|
|
*
|
|
|
|
* @param result Any content before one of the breaking sequences
|
|
|
|
* will be recorded into this `MutableText`. If passed `MutableText` equals
|
|
|
|
* to `none`, new instance will be automatically allocated. This will be
|
|
|
|
* done regardless of whether parsing fails.
|
|
|
|
* @param separators Method will stop parsing upon encountering any
|
|
|
|
* of these `Text`s (but they will not be included in the `result`).
|
|
|
|
* @param whitespacesBreak `true` if you want to also treat any
|
|
|
|
* whitespace character as a break symbol
|
|
|
|
* (@see `TextAPI.IsWhitespace()` for what symbols are
|
|
|
|
* considered whitespaces).
|
|
|
|
* @param quotesBreak `true` if you want to also treat any
|
|
|
|
* quotation mark character as a break symbol
|
|
|
|
* (@see `TextAPI.IsQuotation()` for what symbols are
|
|
|
|
* considered quotation marks).
|
|
|
|
* @return Returns the caller `Parser`, to allow for function chaining.
|
|
|
|
*/
|
|
|
|
public final function Parser MUntilMany(
|
|
|
|
out MutableText result,
|
|
|
|
array<Text> separators,
|
|
|
|
optional bool whitespacesBreak,
|
|
|
|
optional bool quotesBreak)
|
|
|
|
{
|
|
|
|
local bool foundEnd;
|
|
|
|
local int i, pointerShift;
|
|
|
|
local array<int> completions;
|
|
|
|
local Text.Character nextCharacter, separatorCharacter;
|
|
|
|
ResetResultText(result);
|
|
|
|
if (!Ok()) return self;
|
|
|
|
|
|
|
|
completions.length = separators.length;
|
|
|
|
while (pointerShift < GetRemainingLength())
|
|
|
|
{
|
|
|
|
nextCharacter = GetCharacter(pointerShift);
|
|
|
|
if (whitespacesBreak && _.text.IsWhitespace(nextCharacter)) break;
|
|
|
|
if (quotesBreak && _.text.IsQuotationMark(nextCharacter)) break;
|
|
|
|
for (i = 0; i < separators.length; i += 1)
|
|
|
|
{
|
|
|
|
if (separators[i] == none) continue;
|
|
|
|
|
|
|
|
separatorCharacter = separators[i].GetCharacter(completions[i]);
|
|
|
|
if (_.text.AreEqual(nextCharacter, separatorCharacter))
|
|
|
|
{
|
|
|
|
completions[i] += 1;
|
|
|
|
if (completions[i] == separators[i].GetLength())
|
|
|
|
{
|
|
|
|
foundEnd = true;
|
|
|
|
pointerShift -= completions[i] - 1;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
completions[i] = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (foundEnd) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
pointerShift += 1;
|
|
|
|
}
|
|
|
|
for (i = 0; i < pointerShift; i += 1)
|
|
|
|
{
|
|
|
|
result.AppendCharacter(GetCharacter());
|
|
|
|
ShiftPointer();
|
|
|
|
}
|
|
|
|
return self;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Matches everything until it finds one of the breaking sequences:
|
|
|
|
* 1. Any of the specified `separators`.
|
|
|
|
* 2. (optionally) whitespace symbol (@see `TextAPI.IsWhitespace()`);
|
|
|
|
* 3. (optionally) quotation symbol (@see `TextAPI.IsQuotation()`).
|
|
|
|
* This method cannot fail.
|
|
|
|
*
|
|
|
|
* @param result Any content before one of the breaking sequences
|
|
|
|
* will be recorded into this `MutableText`. If passed `MutableText` equals
|
|
|
|
* to `none`, new instance will be automatically allocated.
|
|
|
|
* @param separators Method will stop parsing upon encountering any
|
|
|
|
* of these `string`s (but they won't not be included in the `result`).
|
|
|
|
* @param whitespacesBreak `true` if you want to also treat any
|
|
|
|
* whitespace character as a break symbol
|
|
|
|
* (@see `TextAPI.IsWhitespace()` for what symbols are
|
|
|
|
* considered whitespaces).
|
|
|
|
* @param quotesBreak `true` if you want to also treat any
|
|
|
|
* quotation mark character as a break symbol
|
|
|
|
* (@see `TextAPI.IsQuotation()` for what symbols are
|
|
|
|
* considered quotation marks).
|
|
|
|
* @return Returns the caller `Parser`, to allow for function chaining.
|
|
|
|
*/
|
|
|
|
public final function Parser MUntilManyS(
|
|
|
|
out string result,
|
|
|
|
array<Text> endWords,
|
|
|
|
optional bool whitespacesBreak,
|
|
|
|
optional bool quotesBreak)
|
|
|
|
{
|
|
|
|
local MutableText wrapper;
|
|
|
|
if (!Ok()) return self;
|
|
|
|
|
|
|
|
wrapper = _.text.Empty();
|
|
|
|
MUntilMany(wrapper, endWords, whitespacesBreak, quotesBreak);
|
|
|
|
result = wrapper.ToPlainString();
|
|
|
|
wrapper.FreeSelf();
|
|
|
|
return self;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Parses a string as either "simple" or "quoted".
|
|
|
|
* Not being able to read any symbols is not considered a failure.
|
|
|
|
*
|
|
|
|
* Reading empty string (either to lack of further data or
|
|
|
|
* instantly encountering a break symbol) is not considered a failure.
|
|
|
|
*
|
|
|
|
* Quoted string starts with quotation mark and ends either
|
|
|
|
* at the corresponding closing (un-escaped) mark
|
|
|
|
* or when `Parser`'s input has been fully consumed.
|
|
|
|
* If string started with a quotation mark, this method will act exactly
|
|
|
|
* like `MStringLiteral()`.
|
|
|
|
*
|
|
|
|
* @param result If parsing is successful - string's contents will be
|
|
|
|
* recorded here; if parsing has failed - value is undefined.
|
|
|
|
* Any passed value is discarded.
|
|
|
|
* If passed `MutableText` equals to `none`, new instance will be
|
|
|
|
* automatically allocated.
|
|
|
|
* @return Returns the caller `Parser`, to allow for function chaining.
|
|
|
|
*/
|
|
|
|
public final function Parser MString(out MutableText result)
|
|
|
|
{
|
|
|
|
if (!Ok()) return self;
|
|
|
|
|
|
|
|
if (_.text.IsQuotationMark(GetCharacter())) {
|
|
|
|
MStringLiteral(result);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
MUntil(result,, true, true);
|
|
|
|
}
|
|
|
|
return self;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Parses a string as either "simple" or "quoted".
|
|
|
|
* Not being able to read any symbols is not considered a failure.
|
|
|
|
*
|
|
|
|
* Reading empty string (either to lack of further data or
|
|
|
|
* instantly encountering a break symbol) is not considered a failure.
|
|
|
|
*
|
|
|
|
* Quoted string starts with quotation mark and ends either
|
|
|
|
* at the corresponding closing (un-escaped) mark
|
|
|
|
* or when `Parser`'s input has been fully consumed.
|
|
|
|
* If string started with a quotation mark, this method will act exactly
|
|
|
|
* like `MStringLiteral()`.
|
|
|
|
*
|
|
|
|
* @param result If parsing is successful - string's contents will be
|
|
|
|
* recorded here; if parsing has failed - value is undefined.
|
|
|
|
* Any passed value is discarded.
|
|
|
|
* @return Returns the caller `Parser`, to allow for function chaining.
|
|
|
|
*/
|
|
|
|
public final function Parser MStringS(out string result)
|
|
|
|
{
|
|
|
|
local MutableText wrapper;
|
|
|
|
if (!Ok()) return self;
|
|
|
|
|
|
|
|
wrapper = _.text.Empty();
|
|
|
|
MString(wrapper);
|
|
|
|
result = wrapper.ToPlainString();
|
|
|
|
wrapper.FreeSelf();
|
|
|
|
return self;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Matches a non-empty sequence of whitespace symbols.
|
|
|
|
*
|
|
|
|
* Cannot fail (not being able to read any input is not considered a failure).
|
|
|
|
*
|
|
|
|
* @param result If parsing was successful - whitespaces will be recorded
|
|
|
|
* into this `MutableText`, otherwise - undefined.
|
|
|
|
* Any passed value is discarded.
|
|
|
|
* If passed `MutableText` equals to `none`, new instance will be
|
|
|
|
* automatically allocated.
|
|
|
|
* @return Returns the caller `Parser`, to allow for function chaining.
|
|
|
|
*/
|
|
|
|
public final function Parser MWhitespaces(out MutableText result)
|
|
|
|
{
|
|
|
|
local Text.Character nextCharacter;
|
|
|
|
local TextAPI api;
|
|
|
|
if (!Ok()) return self;
|
|
|
|
|
|
|
|
api = _.text;
|
|
|
|
if (result == none) {
|
|
|
|
result = api.Empty();
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
result.Clear();
|
|
|
|
}
|
|
|
|
while (!HasFinished())
|
|
|
|
{
|
|
|
|
nextCharacter = GetCharacter();
|
|
|
|
if (!api.IsWhitespace(nextCharacter)) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
result.AppendCharacter(nextCharacter);
|
|
|
|
ShiftPointer();
|
|
|
|
}
|
|
|
|
return self;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Matches a non-empty sequence of whitespace symbols.
|
|
|
|
*
|
|
|
|
* Cannot fail (not being able to read any input is not considered a failure).
|
|
|
|
*
|
|
|
|
* @param result If parsing was successful - whitespaces will be
|
|
|
|
* recorded here, otherwise - undefined.
|
|
|
|
* Any passed value is discarded.
|
|
|
|
* @return Returns the caller `Parser`, to allow for function chaining.
|
|
|
|
*/
|
|
|
|
public final function Parser MWhitespacesS(out string result)
|
|
|
|
{
|
|
|
|
local MutableText wrapper;
|
|
|
|
if (!Ok()) return self;
|
|
|
|
|
|
|
|
wrapper = _.text.Empty();
|
|
|
|
MWhitespaces(wrapper);
|
|
|
|
result = wrapper.ToPlainString();
|
|
|
|
wrapper.FreeSelf();
|
|
|
|
return self;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Parses next code point as itself.
|
|
|
|
*
|
|
|
|
* Can only fail if caller `Parser` has already exhausted all available data.
|
|
|
|
*
|
|
|
|
* @param result If parsing was successful - next Unicode code point,
|
|
|
|
* otherwise - value is undefined.
|
|
|
|
* Any passed value is discarded.
|
|
|
|
* @return Returns the caller `Parser`, to allow for function chaining.
|
|
|
|
*/
|
|
|
|
public final function Parser MCharacter(out Text.Character result)
|
|
|
|
{
|
|
|
|
if (!Ok()) return self;
|
|
|
|
if (HasFinished()) return Fail();
|
|
|
|
|
|
|
|
result = GetCharacter();
|
|
|
|
ShiftPointer();
|
|
|
|
return self;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Parses next code point as as byte.
|
|
|
|
* Can fail if caller `Parser` has already exhausted all available data or
|
|
|
|
* next Unicode code point cannot fit into the `byte` value range.
|
|
|
|
*
|
|
|
|
* @param result If parsing was successful - next Unicode code point as
|
|
|
|
* a byte, otherwise - value is undefined.
|
|
|
|
* Any passed value is discarded.
|
|
|
|
* @return Returns the caller `Parser`, to allow for function chaining.
|
|
|
|
*/
|
|
|
|
public final function Parser MByte(out byte result)
|
|
|
|
{
|
|
|
|
local Text.Character character;
|
|
|
|
if (!Ok()) return self;
|
|
|
|
|
|
|
|
if (!MCharacter(character).Ok())
|
|
|
|
{
|
|
|
|
return Fail();
|
|
|
|
}
|
|
|
|
if (character.codePoint < 0 || character.codePoint > BYTE_MAX)
|
|
|
|
{
|
|
|
|
return Fail();
|
|
|
|
}
|
|
|
|
result = character.codePoint;
|
|
|
|
return self;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Tries to parse a sign: either "+" or "-".
|
|
|
|
*
|
|
|
|
* @param result Value of `ParsedSign` will be recorded here,
|
|
|
|
* depending on what sign was encountered.
|
|
|
|
* `SIGN_Missing` value is only possible if we allow sign to be missing.
|
|
|
|
* @param allowMissingSign By default `false` means that parsing will fail
|
|
|
|
* if next character is neither "+" or "-";
|
|
|
|
* `true` means that parsing will not fail even if there is not sign, -
|
|
|
|
* method will then consume in input and will return `SIGN_Missing`
|
|
|
|
* as a result.
|
|
|
|
* @return Returns the caller `Parser`, to allow for function chaining.
|
|
|
|
*/
|
|
|
|
public final function Parser MSign(
|
|
|
|
out ParsedSign result,
|
|
|
|
optional bool allowMissingSign
|
|
|
|
)
|
|
|
|
{
|
|
|
|
local ParserState checkpoint;
|
|
|
|
if (!Ok()) return self;
|
|
|
|
|
|
|
|
// Read sign
|
|
|
|
checkpoint = GetCurrentState();
|
|
|
|
if (MatchS("-").Ok()) {
|
|
|
|
result = SIGN_Minus;
|
|
|
|
}
|
|
|
|
else if (RestoreState(checkpoint).MatchS("+").Ok()) {
|
|
|
|
result = SIGN_Plus;
|
|
|
|
}
|
|
|
|
else if (allowMissingSign)
|
|
|
|
{
|
|
|
|
result = SIGN_Missing;
|
|
|
|
RestoreState(checkpoint);
|
|
|
|
}
|
|
|
|
return self;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Tries to parse a number prefix that determines a base system for denoting
|
|
|
|
* integer numbers:
|
|
|
|
* 1. `0x` means hexadecimal;
|
|
|
|
* 2. `0b` means binary;
|
|
|
|
* 3. `0o` means octal;
|
|
|
|
* 4. otherwise we use decimal system.
|
|
|
|
*
|
|
|
|
* This parsing method cannot fail.
|
|
|
|
*
|
|
|
|
* Parser consumes appropriate prefix; nothing if decimal system is determined.
|
|
|
|
*
|
|
|
|
* @return Returns the caller `Parser`, to allow for function chaining.
|
|
|
|
*/
|
|
|
|
public final function Parser MBase(out int base)
|
|
|
|
{
|
|
|
|
local ParserState checkpoint;
|
|
|
|
if (!Ok()) return self;
|
|
|
|
|
|
|
|
checkpoint = GetCurrentState();
|
|
|
|
if (MatchS("0x").Ok()) {
|
|
|
|
base = 16;
|
|
|
|
}
|
|
|
|
else if (RestoreState(checkpoint).MatchS("0b").Ok()) {
|
|
|
|
base = 2;
|
|
|
|
}
|
|
|
|
else if (RestoreState(checkpoint).MatchS("0o").Ok()) {
|
|
|
|
base = 8;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
RestoreState(checkpoint);
|
|
|
|
base = 10;
|
|
|
|
}
|
|
|
|
return self;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Parses signed integer either in a directly given base (`base`) or in an
|
|
|
|
* auto-determined one (based on prefix, @see `MBase()`).
|
|
|
|
*
|
|
|
|
* Integers are expected in form: (+/-)(0x/0b/0o)<sequence of digits>.
|
|
|
|
* Examples: 78, 0o34, -2, 0b0101001, -0x78aC.
|
|
|
|
*
|
|
|
|
* @param result If parsing is successful - parsed value will be
|
|
|
|
* recorded here; if parsing fails - value is undetermined.
|
|
|
|
* Any passed value is discarded.
|
|
|
|
* @param base Base in which function must attempt to parse a number;
|
|
|
|
* Default value (`0`) means function must auto-determine base,
|
|
|
|
* based on the prefix, otherwise must be between 2 and 36.
|
|
|
|
* @return Returns the caller `Parser`, to allow for function chaining.
|
|
|
|
*/
|
|
|
|
public final function Parser MInteger(out int result, optional int base)
|
|
|
|
{
|
|
|
|
local ParsedSign integerSign;
|
|
|
|
if (!Ok()) return self;
|
|
|
|
|
|
|
|
MSign(integerSign, true);
|
|
|
|
if (base == 0) {
|
|
|
|
MBase(base);
|
|
|
|
}
|
|
|
|
MUnsignedInteger(result, base);
|
|
|
|
if (integerSign == SIGN_Minus) {
|
|
|
|
result *= -1;
|
|
|
|
}
|
|
|
|
return self;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Internal function for parsing fractional part (including the dot ".")
|
|
|
|
// of the text representation for floating point number (decimal system only).
|
|
|
|
// Cannot fail, returns `0.0` if it couldn't parse anything.
|
|
|
|
protected final function Parser MFractionalPart(out float result)
|
|
|
|
{
|
|
|
|
local ParserState checkpoint;
|
|
|
|
local int fractionalInt;
|
|
|
|
local int digitsRead;
|
|
|
|
if (!Ok()) return self;
|
|
|
|
|
|
|
|
result = 0.0;
|
|
|
|
checkpoint = GetCurrentState();
|
|
|
|
if (!MatchS(".").Ok())
|
|
|
|
{
|
|
|
|
RestoreState(checkpoint);
|
|
|
|
return self;
|
|
|
|
}
|
|
|
|
checkpoint = GetCurrentState();
|
|
|
|
if (!MUnsignedInteger(fractionalInt,,, digitsRead).Ok())
|
|
|
|
{
|
|
|
|
fractionalInt = 0.0;
|
|
|
|
RestoreState(checkpoint);
|
|
|
|
return self;
|
|
|
|
}
|
|
|
|
result = float(fractionalInt) * (0.1 ** digitsRead);
|
|
|
|
return self;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Internal function for parsing exponent part (including the symbol "e")
|
|
|
|
// of the text representation for floating point number (decimal system only).
|
|
|
|
// Can only fail if symbol "e" / "E" is present, but there is no valid
|
|
|
|
// integer right after it (whitespace symbols in-between are forbidden).
|
|
|
|
// Returns `0.0` if there was not exponent to parse.
|
|
|
|
protected final function Parser MExponentPart(out int result)
|
|
|
|
{
|
|
|
|
local ParserState checkpoint;
|
|
|
|
local ParsedSign exponendSign;
|
|
|
|
if (!Ok()) return self;
|
|
|
|
|
|
|
|
// Is there even an exponential part?
|
|
|
|
checkpoint = GetCurrentState();
|
|
|
|
if (!MatchS("e", SCASE_INSENSITIVE).Ok())
|
|
|
|
{
|
|
|
|
RestoreState(checkpoint);
|
|
|
|
return self;
|
|
|
|
}
|
|
|
|
// If yes - parse it:
|
|
|
|
result = 0.0;
|
|
|
|
MSign(exponendSign, true).MUnsignedInteger(result, 10);
|
|
|
|
if (exponendSign == SIGN_Minus)
|
|
|
|
{
|
|
|
|
result *= -1;
|
|
|
|
}
|
|
|
|
return self;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Internal function for parsing optional suffix of the text representation
|
|
|
|
// for floating point number ("f" or "F").
|
|
|
|
// Cannot fail. Can only consume one Unicode code point,
|
|
|
|
// when it is either "f" or "F".
|
|
|
|
protected final function Parser MFloatSuffix()
|
|
|
|
{
|
|
|
|
local ParserState checkpoint;
|
|
|
|
if (!Ok()) return self;
|
|
|
|
|
|
|
|
checkpoint = GetCurrentState();
|
|
|
|
if (!MatchS("f", SCASE_INSENSITIVE).Ok())
|
|
|
|
{
|
|
|
|
RestoreState(checkpoint);
|
|
|
|
}
|
|
|
|
return self;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Parses signed floating point number in JSON form + optional "f" / "F"
|
|
|
|
* suffix at the end.
|
|
|
|
*
|
|
|
|
* @param result If parsing is successful - parsed value will be
|
|
|
|
* recorded here; if parsing fails - value is undetermined.
|
|
|
|
* Any passed value is discarded.
|
|
|
|
* @return Returns the caller `Parser`, to allow for function chaining.
|
|
|
|
*/
|
|
|
|
public final function Parser MNumber(out float result)
|
|
|
|
{
|
|
|
|
local ParsedSign sign;
|
|
|
|
local int integerPart, exponentPart;
|
|
|
|
local float fractionalPart;
|
|
|
|
if (!Ok()) return self;
|
|
|
|
|
|
|
|
self.MSign(sign, true)
|
|
|
|
.MUnsignedInteger(integerPart, 10)
|
|
|
|
.MFractionalPart(fractionalPart)
|
|
|
|
.MExponentPart(exponentPart)
|
|
|
|
.MFloatSuffix();
|
|
|
|
if (!Ok()) {
|
|
|
|
return self;
|
|
|
|
}
|
|
|
|
result = float(integerPart) + fractionalPart;
|
|
|
|
result *= 10.0 ** exponentPart;
|
|
|
|
if (sign == SIGN_Minus) {
|
|
|
|
result *= -1;
|
|
|
|
}
|
|
|
|
return self;
|
|
|
|
}
|
|
|
|
|
|
|
|
defaultproperties
|
|
|
|
{
|
|
|
|
// Start with no initializations done
|
|
|
|
version = 0
|
|
|
|
BYTE_MAX = 255
|
|
|
|
CODEPOINT_BACKSLASH = 92 // \
|
|
|
|
CODEPOINT_USMALL = 117 // u
|
|
|
|
CODEPOINT_ULARGE = 85 // U
|
|
|
|
escapeCharactersMap(0)=(from=110,to=10) // \n
|
|
|
|
escapeCharactersMap(1)=(from=114,to=13) // \r
|
|
|
|
escapeCharactersMap(2)=(from=116,to=9) // \t
|
|
|
|
escapeCharactersMap(3)=(from=98,to=8) // \b
|
|
|
|
escapeCharactersMap(4)=(from=102,to=12) // \f
|
|
|
|
escapeCharactersMap(5)=(from=118,to=11) // \v
|
|
|
|
}
|