/**
* Implements a simple `Parser` with built-in functions to parse simple
* UnrealScript's types and support for saving / restoring parser states.
* Copyright 2021-2022 Anton Tarasenko
*------------------------------------------------------------------------------
* This file is part of Acedia.
*
* Acedia is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, version 3 of the License, or
* (at your option) any later version.
*
* Acedia is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Acedia. If not, see .
*/
class Parser extends AcediaObject
dependson(Text)
dependson(UnicodeData);
// Max value of a byte
var public int BYTE_MAX;
// Code points of symbols with important meaning in parsing.
var public int CODEPOINT_BACKSLASH;
var public int CODEPOINT_USMALL;
var public int CODEPOINT_ULARGE;
// Text object wit content that this `Parser` is supposed to parse
// (only copies of any `Text` passed to us are stored here).
var private Text content;
// Incremented each time `Parser` is reinitialized with new `content`.
// Can be used to make `Parser` object completely independent from
// its past after each re-initialization.
// This helps to avoid needless reallocations.
var private int version;
// Describes current state of the `Parser`, instance of this struct
// can be used to revert parser back to this state.
struct ParserState
{
// Record to which object (and of what version) this state belongs to.
// This information is used to make sure that we apply this state
// only to same `Parser` (of the same version) that it originated from.
var private AcediaObject ownerObject;
var private int ownerVersion;
// Has parser failed at some point?
var private bool failed;
// Points at the next symbol to be used next in parsing.
var private int pointer;
};
var private ParserState currentState;
// For convenience `Parser` will store one internal state that designates
// a state that's safe to revert to when some parsing attempt goes wrong.
// @see `Confirm()`, `R()`
var private ParserState confirmedState;
// Describes rules for translating escaped sequences ("\r", "\n", "\t")
// into appropriate code points.
var private const array escapeCharactersMap;
// Used to store a result of a `ParseSign()` function.
enum ParsedSign
{
SIGN_Missing,
SIGN_Plus,
SIGN_Minus
};
// TODO: add finalizer
// Common logic for parser initialization.
// Uses `source` as is, without copying, so public initialization method
// must do it itself.
private final function Parser _initialize(Text source)
{
if (source == none) return self;
content = source;
version += 1;
currentState.ownerObject = self;
currentState.ownerVersion = version;
currentState.failed = false;
currentState.pointer = 0;
confirmedState = currentState;
return self;
}
/**
* Initializes `Parser` with new data from a `Text`. Never fails.
*
* Any data from before this call is lost, any checkpoints are invalidated.
*
* @param source `Text` object `Parser` will need to parse.
* If `none` is passed - parser won't be initialized.
* @return Returns the caller `Parser`, to allow for function chaining.
*/
public final function Parser Initialize(Text source)
{
if (source == none) {
return self;
}
return _initialize(source.Copy());
}
/**
* Initializes `Parser` with new data from a plain `string`.
*
* Can fail if passed `none` as a parameter.
*
* Any data from before this call is lost, any checkpoints are invalidated.
*
* @param source String `Parser` will need to parse.
* @return Returns the caller `Parser`, to allow for function chaining.
*/
public final function Parser InitializeS(string source)
{
_initialize(_.text.FromString(source));
return self;
}
/**
* Checks if `Parser` is in a failed state.
*
* Parser enters a failed state whenever any parsing call returns without
* completing its job. `Parser` in a failed state will automatically fail
* any further parsing attempts until it gets reset via `R()` call.
*
* @return Returns 'false' if `Parser()` is in a failed state and
* `true` otherwise.
*/
public final function bool Ok()
{
return (!currentState.failed);
}
/**
* Returns copy of the current state of this parser.
*
* As long as caller `Parser` was not reinitialized, returned `ParserState`
* structure can be used to revert this `Parser` to its current condition
* by a `RestoreState()` call.
*
* @see `RestoreState()`
* @return Copy of the current state of the caller `Parser`.
*/
public final function ParserState GetCurrentState()
{
return currentState;
}
/**
* Returns copy of (currently) last confirmed state of this parser.
*
* As long as caller `Parser` was not reinitialized, returned `ParserState`
* structure can be used to revert this `Parser` to its current confirmed
* state by a `RestoreState()` call.
*
* @see `RestoreState()`, `Confirm()`, `R()`
* @return Copy of (currently) last confirmed state of this parser.
*/
public final function ParserState GetConfirmedState()
{
return confirmedState;
}
/**
* Checks if given `stateToCheck` is valid for the caller `Parser`, i.e.:
* 1. It is a state generated by either `GetCurrentState()` or
* `GetConfirmedState()` calls on the caller `Parser`.
* 2. Caller `Parser` was not reinitialized since a call
* that generated given `stateToCheck`.
*
* @param stateToCheck `ParserState` to check for validity for
* caller `Parser`.
* @return `true` if given `stateToCheck` is valid and `false` otherwise.
*/
public final function bool IsStateValid(ParserState stateToCheck)
{
if (stateToCheck.ownerObject != self) return false;
if (stateToCheck.ownerVersion != version) return false;
return true;
}
/**
* Checks if calling `RestoreState()` for passed state will return a `Parser`
* in an "Ok" state (not failed), i.e. state is valid and
* was generated when `Parser` was in a non-failed state.
*
* @param stateToCheck `ParserState` to check for corresponding to
* `Parser` being in a non-failed state.
* By definition must also be valid for the caller `Parser`.
* @return `true` if given `stateToCheck` is valid and `false` otherwise.
*/
public final function bool IsStateOk(ParserState stateToCheck)
{
if (!IsStateValid(stateToCheck)) return false;
return (!stateToCheck.failed);
}
/**
* Resets parser to a state, given by `stateToRestore` argument
* (so a state `Parser` was in at the moment given `stateToRestore`
* was obtained).
*
* If given `stateToRestore` is from a different `Parser` or
* the owner `Parser` was reinitialized after passed state was obtained, -
* function will simply put caller `Parser` into a failed state.
* Note that caller `Parser` being put in a failed state after this call
* doesn't mean that described issues are actually present:
* `stateToRestore` can also describe a failed state of the `Parser`.
*
* @param stateToRestore `ParserState` that this method will attempt
* to set for the caller `Parser`.
* @return Returns the caller `Parser`, to allow for function chaining.
*/
public final function Parser RestoreState(ParserState stateToRestore)
{
if (!IsStateValid(stateToRestore))
{
currentState.failed = true;
return self;
}
currentState = stateToRestore;
return self;
}
/**
* Remembers current state of `Parser` in an internal checkpoint variable,
* that can later be restored by an `R()` call.
*
* Can only save non-failed states and will only fail if caller `Parser` is
* in a failed state.
*
* `Confirm()` and `R()` are essentially convenience wrapper functions for
* `GetCurrentState()` and `RestoreState()` calls +
* state storage variable.
*
* @return `true` if current state is recorded in `Parser` as confirmed and
* `false` otherwise.
*/
public final function bool Confirm()
{
if (!Ok()) return false;
confirmedState = currentState;
return true;
}
/**
* Resets `Parser` to a last state recorded as confirmed by a last successful
* `Confirm()` function call. If there weren't any such call -
* reverts `Parser` to its state right after initialization.
*
* Always resets failed state of a `Parser`. Cannot fail.
*
* `Confirm()` and `R()` are essentially convenience wrapper functions for
* `GetCurrentState()` and `RestoreState()` calls + state storage variable.
*
* @return Returns the caller `Parser`, to allow for function chaining.
*/
public final function Parser R()
{
currentState = confirmedState;
return self;
}
/**
* Shifts parsing pointer forward.
*
* Can only shift forward. To revert to a previous state in case of failure use
* combination of `GetCurrentState()` and `RestoreState()` functions.
*
* @param shift How much to shift parsing pointer?
* Values of zero and below are discarded and `1` is used instead
* (i.e. by default this method shifts pointer by `1` position).
* @return Returns the caller `Parser`, to allow for function chaining.
*/
protected final function Parser ShiftPointer(optional int shift)
{
if (content == none) return self;
shift = Max(1, shift);
currentState.pointer = Min( currentState.pointer + shift,
content.GetLength());
return self;
}
/**
* Returns a code point from this `Parser`'s content, relative to next
* code point that caller `Parser` must handle.
*
* @param `shift` If `0` (default value) or negative value is passed -
* simply asks for the code point that caller `Parser` must handle.
* Otherwise shifts that index `shift` code points, i.e.
* `1` to return next code point or `2` to return code point after
* the next one.
* @return Returns character at a given shift. If `shift` is too small/large
* and does not fit `Parser`'s contents, returns invalid character.
* `GetCodePoint()` with default (`0`) parameter can also return
* invalid character if caller `Parser` was not initialized,
* its contents are empty or it has already consumed all input.
*/
protected final function Text.Character GetCharacter(optional int shift)
{
local Text.Character invalidCharacter;
local int absoluteAddress;
if (content == none) return _.text.GetInvalidCharacter();
absoluteAddress = currentState.pointer + Max(0, shift);
if (absoluteAddress < 0 || absoluteAddress >= content.GetLength())
{
invalidCharacter.codePoint = -1;
return invalidCharacter;
}
return content.GetRawCharacter(absoluteAddress);
}
/**
* Forces caller `Parser` to enter a failed state.
*
* @return Returns the caller `Parser`, to allow for a quick exit from
* a parsing function by `return Fail();`.
*/
public final function Parser Fail()
{
currentState.failed = true;
return self;
}
/**
* Returns amount of code points that have already been parsed,
* provided that caller `Parser` is in a correct state.
*
* @return Returns how many Unicode code points have already been parsed if
* caller `Parser` is in correct state;
* otherwise return value is undefined.
*/
public final function int GetParsedLength()
{
return Max(0, currentState.pointer);
}
/**
* Returns amount of code points that have not yet been parsed,
* provided that caller `Parser` is in a correct state.
*
* @return Returns how many Unicode code points are still unparsed if
* caller `Parser` is in correct state;
* otherwise return value is undefined.
*/
public final function int GetRemainingLength()
{
if (content == none) {
return 0;
}
return Max(0, content.GetLength() - currentState.pointer);
}
/**
* Checks if caller `Parser` has already parsed all of its content.
* Uninitialized `Parser` has no content and, therefore, parsed it all.
*
* Should return `true` iff `GetRemainingLength() == 0`.
*
* @return `true` if caller `Parser` has no more data to parse.
*/
public final function bool HasFinished()
{
if (content == none) {
return true;
}
return (currentState.pointer >= content.GetLength());
}
/**
* Returns still unparsed part of caller `Parser`'s source as `Text`.
*
* @return Unparsed part of caller `Parser`'s source as `Text`.
*/
public final function Text GetRemainder()
{
local int i;
local MutableText result;
result = _.text.Empty();
for (i = 0; i < GetRemainingLength(); i += 1) {
result.AppendCharacter(GetCharacter(i));
}
return result;
}
/**
* Returns still unparsed part of caller `Parser`'s source as a plain `string`.
*
* @return Unparsed part of caller `Parser`'s source as a plain `plain`.
*/
public final function string GetRemainderS()
{
local int i;
local string result;
local TextAPI api;
api = _.text;
for (i = 0; i < GetRemainingLength(); i += 1) {
result $= api.CharacterToString(GetCharacter(i));
}
return result;
}
/**
* Auxiliary method for other methods that have to return resulting
* `MutableText` instance.
*
* If passed instance if `none` or not currently allocated - a new one is
* created, otherwise existing one is emptied.
*
* @param result `MutableText` instance to empty/create.
* @return Returns the caller `Parser`, to allow for function chaining.
*/
protected final function Parser ResetResultText(out MutableText result)
{
if (result == none || !result.IsAllocated()) {
result = _.text.Empty();
}
else {
result.Clear();
}
return self;
}
/**
* Matches any sequence of whitespace symbols, without returning it.
* Starts from where previous parsing function finished.
*
* Can never cause parser to enter failed state.
*
* What symbols exactly are considered whitespace refer to the description of
* `TextAPI.IsWhitespace()` function.
*
* @param whitespacesAmount Returns how many whitespace symbols
* were skipped. Any given value is discarded.
* @return Returns the caller `Parser`, to allow for function chaining.
*/
public final function Parser Skip(optional out int whitespacesAmount)
{
local TextAPI api;
if (!Ok()) return self;
api = _.text;
whitespacesAmount = 0;
// Cycle will end once we either reach a non-whitespace symbol or
// there's not more code points to get
while (api.IsWhitespace(GetCharacter(whitespacesAmount))) {
whitespacesAmount += 1;
}
if (whitespacesAmount > 0) {
ShiftPointer(whitespacesAmount);
}
return self;
}
/**
* Function that tries to match given data in `Parser`'s content,
* starting from where previous parsing function finished.
*
* Does nothing if caller `Parser` was in failed state.
*
* @param word `Text` that must be matched to the `Parser`'s
* contents, starting from where previous parsing function finished.
* @param caseSensitivity Specifies whether `Match()` should try and
* ignore the difference in case, where applicable.
* By default it does not ignore case difference.
* @return Returns the caller `Parser`, to allow for function chaining.
*/
public final function Parser Match(
Text word,
optional Text.CaseSensitivity caseSensitivity)
{
local int i;
local int wordLength;
local TextAPI api;
if (word == none) return Fail();
if (!Ok()) return self;
if (word.GetLength() > GetRemainingLength()) return Fail();
api = _.text;
wordLength = word.GetLength();
for (i = 0; i < wordLength; i += 1)
{
if (!api.AreEqual( word.GetCharacter(i), GetCharacter(i),
caseSensitivity))
{
return Fail();
}
}
ShiftPointer(wordLength);
return self;
}
/**
* Function that tries to match given `string`, starting from where
* previous parsing function finished.
*
* Does nothing if caller `Parser` was in failed state.
*
* @param word `string` that must be matched to the `Parser`'s
* contents, starting from where previous parsing function finished.
* @param caseSensitivity Specifies whether `Match()` should try and
* ignore the difference in case, where applicable.
* By default it does not ignore case difference.
* @return Returns the caller `Parser`, to allow for function chaining.
*/
public final function Parser MatchS(
string word,
optional Text.CaseSensitivity caseSensitivity)
{
local Text wrapper;
wrapper = _.text.FromString(word);
Match(wrapper, caseSensitivity);
wrapper.FreeSelf();
return self;
}
/**
* Internal function for parsing unsigned integers in any base from 2 to 36.
*
* This parsing can fail, putting `Parser` into a failed state.
*
* @param result If parsing is successful, this value will contain
* parsed integer, otherwise value is undefined.
* Any passed value is discarded.
* @param base Base, in which integer in question is recorded.
* @param numberLength If this parameter is less or equal to zero,
* function will stop parsing the moment it can't recognize a character as
* belonging to a number in a given base.
* It will only fail if it couldn't parse a single character;
* If this parameter is set to be positive (`> 0`), function will
* attempt to use exactly `numberLength` character for parsing and will
* fail if they would not constitute a valid number.
* @param consumedCodePoints Amount of code point used (consumed) to parse
* this number; undefined, if parsing is unsuccessful.
* Any passed value is discarded.
* @return Returns the caller `Parser`, to allow for function chaining.
*/
public final function Parser MUnsignedInteger(
out int result,
optional int base,
optional int numberLength,
optional out int consumedCodePoints)
{
local bool parsingFixedLength;
local int nextPosition;
numberLength = Max(0, numberLength);
parsingFixedLength = (numberLength != 0);
if (base == 0) {
base = 10;
}
else if (base < 2 || base > 36) {
return Fail();
}
result = 0;
consumedCodePoints = 0;
while (!HasFinished())
{
if (parsingFixedLength && consumedCodePoints >= numberLength) break;
nextPosition = _.text.CharacterToInt(GetCharacter(), base);
if (nextPosition < 0) break;
// This `result = result * base + nextPosition`,
// but protected from integer overflow
result = SafeIntegerCombination(result, base, nextPosition);
consumedCodePoints += 1;
ShiftPointer();
}
if ( parsingFixedLength && consumedCodePoints != numberLength
|| consumedCodePoints < 1)
{
return Fail();
}
return self;
}
/**
* Auxiliary method for a adding another numeric position `addition` to the
* given `baseValue` in base the `multiplier`, effectively performing:
* `baseValue * multiplier + addition`, but safely truncated at
* `MaxInt = 0x7fffffff`, preventing integer overflow.
*
* @param baseValue Initial value to modify by multiplication and addition.
* @param multiplier Value to first multiply given `baseValue` first.
* @param addition Value to add to the result of multiplication of
* `baseValue` and `multiplier`.
* @return `baseValue * multiplier + addition` truncated at
* `MaxInt = 0x7fffffff`, preventing integer overflow.
*/
protected final function int SafeIntegerCombination(
int baseValue,
int multiplier,
int addition)
{
local int safeThreshold;
safeThreshold = MaxInt / multiplier;
if (baseValue > safeThreshold) {
return MaxInt;
}
baseValue *= multiplier;
if (MaxInt - baseValue < addition) {
return MaxInt;
}
return baseValue + addition;
}
/**
* Parses escaped sequence of the type that is usually used in
* string literals: backslash "\"", followed by any character
* (called escaped character later) or, in special cases, several characters.
* For most characters escaped sequence resolved into
* an escaped character's code point.
*
* Several escaped symbols:
* \n, \r, \t, \b, \f, \v
* are translated into a different code point corresponding to
* a control symbols, normally denoted by these sequences.
*
* A Unicode code point can also be directly entered with either of the two
* commands:
* \u0056
* \U56
* The difference is that `\u` allows you to enter two-byte code point, while
* `\U` only allows to define code points that fit into 1 byte,
* but is more compact.
*
* @param denotedCodePoint If parsing is successful, parameter will contain
* appropriate code point, denoted by a parsed escaped sequence;
* If parsing is unsuccessful, value is undefined.
* Any passed value is discarded.
* @return Returns the caller `Parser`, to allow for function chaining.
*/
public final function Parser MEscapedSequence(
out Text.Character denotedCharacter)
{
local int i;
if (!Ok()) return self;
// Need at least two characters to parse escaped sequence
if (GetRemainingLength() < 2) return Fail();
if (GetCharacter().codePoint != CODEPOINT_BACKSLASH) return Fail();
denotedCharacter = GetCharacter(1);
ShiftPointer(2);
// Escaped character denotes some special code point
for (i = 0; i < escapeCharactersMap.length; i += 1)
{
if (escapeCharactersMap[i].from == denotedCharacter.codePoint)
{
denotedCharacter.codePoint = escapeCharactersMap[i].to;
return self;
}
}
// Escaped character denotes declaration of arbitrary Unicode code point
if (denotedCharacter.codePoint == CODEPOINT_USMALL) {
MUnsignedInteger(denotedCharacter.codePoint, 16, 4);
}
else if (denotedCharacter.codePoint == CODEPOINT_ULARGE) {
MUnsignedInteger(denotedCharacter.codePoint, 16, 2);
}
return self;
}
/**
* Attempts to parse a "name": a string literal that:
* 1. Contains only digits and latin characters;
* 2. Starts with a latin character.
* These restrictions help to avoid possible issues that arise from having
* different code pages and are used. For example, only "names" are considered
* to be valid aliases.
*
* @param result If parsing is successful, this `MutableText` will contain
* the contents of the matched "name", if parsing has failed, its value
* is undefined. Any passed contents are simply discarded.
* If passed `MutableText` equals to `none`, new instance will be
* automatically allocated. This will be done regardless of whether
* parsing fails.
* @return Returns the caller `Parser`, to allow for function chaining.
*/
public final function Parser MName(out MutableText result)
{
local TextAPI api;
local Text.Character nextCharacter;
ResetResultText(result);
if (!Ok()) return self;
if (GetRemainingLength() <= 0) return Fail();
api = _.text;
nextCharacter = GetCharacter();
if (!api.IsAlpha(nextCharacter)) return Fail();
result.AppendCharacter(nextCharacter);
ShiftPointer();
nextCharacter = GetCharacter();
while (api.IsAlpha(nextCharacter) || api.IsDigit(nextCharacter))
{
result.AppendCharacter(nextCharacter);
ShiftPointer();
nextCharacter = GetCharacter();
}
return self;
}
/**
* Attempts to parse a "name": a string literal that:
* 1. Contains only digits and latin characters;
* 2. Starts with a latin character.
* These restrictions help to avoid possible issues that arise from having
* different code pages and are used. For example, only "names" are considered
* to be valid aliases.
*
* @param result If parsing is successful, this `string` will contain the
* contents of the matched "name" with resolved escaped sequences;
* if parsing has failed, its value is undefined.
* Any passed contents are simply discarded.
* @return Returns the caller `Parser`, to allow for function chaining.
*/
public final function Parser MNameS(out string result)
{
local MutableText wrapper;
if (!Ok()) return self;
wrapper = _.text.Empty();
if (MName(wrapper).Ok()) {
result = wrapper.ToString();
}
wrapper.FreeSelf();
return self;
}
/**
* Attempts to parse a string literal: a string enclosed in either of
* the following quotation marks: ", ', `.
*
* String literals can contain escaped sequences.
* String literals MUST end with closing quotation mark.
* @see `MEscapedSequence()`
*
* @param result If parsing is successful, this `MutableText` will contain
* the contents of string literal with resolved escaped sequences;
* if parsing has failed, its value is undefined.
* Any passed contents are simply discarded.
* If passed `MutableText` equals to `none`, new instance will be
* automatically allocated. This will be done regardless of whether
* parsing fails.
* @return Returns the caller `Parser`, to allow for function chaining.
*/
public final function Parser MStringLiteral(out MutableText result)
{
local TextAPI api;
local Text.Character nextCharacter;
local Text.Character usedQuotationMark;
local Text.Character escapedCharacter;
ResetResultText(result);
if (!Ok()) return self;
usedQuotationMark = GetCharacter();
if (!_.text.IsQuotationMark(usedQuotationMark)) return Fail();
ShiftPointer(); // Skip opening quotation mark
api = _.text;
while (!HasFinished())
{
nextCharacter = GetCharacter();
// Closing quote
if (api.AreEqual(nextCharacter, usedQuotationMark))
{
ShiftPointer();
return self;
}
// Escaped characters
if (api.IsCodePoint(nextCharacter, CODEPOINT_BACKSLASH))
{
if (!MEscapedSequence(escapedCharacter).Ok()) {
return Fail(); // Backslash MUST mean valid escape sequence
}
result.AppendCharacter(escapedCharacter);
}
// Any other code point
else
{
result.AppendCharacter(nextCharacter);
ShiftPointer();
}
}
// Content ended without a closing quote.
return Fail();
}
/**
* Attempts to parse a string literal: a string enclosed in either of
* the following quotation marks: ", ', `.
*
* String literals can contain escaped sequences.
* String literals MUST end with closing quotation mark.
* @see `MEscapedSequence()`
*
* @param result If parsing is successful, this `string` will contain the
* contents of string literal with resolved escaped sequences;
* if parsing has failed, its value is undefined.
* Any passed contents are simply discarded.
* @return Returns the caller `Parser`, to allow for function chaining.
*/
public final function Parser MStringLiteralS(out string result)
{
local MutableText wrapper;
if (!Ok()) return self;
wrapper = _.text.Empty();
if (MStringLiteral(wrapper).Ok()) {
result = wrapper.ToString();
}
wrapper.FreeSelf();
return self;
}
/**
* Matches everything until it finds one of the breaking symbols:
* 1. a specified code point (by default `0`);
* 2. (optionally) whitespace symbol (@see `TextAPI.IsWhitespace()`);
* 3. (optionally) quotation symbol (@see `TextAPI.IsQuotation()`).
* This method cannot fail.
*
* @param result Any content before one of the break symbols
* will be recorded into this `MutableText`. If passed `MutableText` equals
* to `none`, new instance will be automatically allocated. This will be
* done regardless of whether parsing fails.
* @param codePointBreak Method will stop parsing upon encountering this
* code point (it will not be included in the `result`)
* @param whitespacesBreak `true` if you want to also treat any
* whitespace character as a break symbol
* (@see `TextAPI.IsWhitespace()` for what symbols are
* considered whitespaces).
* @param quotesBreak `true` if you want to also treat any
* quotation mark character as a break symbol
* (@see `TextAPI.IsQuotation()` for what symbols are
* considered quotation marks).
* @return Returns the caller `Parser`, to allow for function chaining.
*/
public final function Parser MUntil(
out MutableText result,
optional Text.Character characterBreak,
optional bool whitespacesBreak,
optional bool quotesBreak)
{
local Text.Character nextCharacter;
local TextAPI api;
ResetResultText(result);
if (!Ok()) return self;
api = _.text;
while (!HasFinished())
{
nextCharacter = GetCharacter();
if (api.AreEqual(nextCharacter, characterBreak)) break;
if (whitespacesBreak && api.IsWhitespace(nextCharacter)) break;
if (quotesBreak && api.IsQuotationMark(nextCharacter)) break;
result.AppendCharacter(nextCharacter);
ShiftPointer();
}
return self;
}
/**
* Matches everything until it finds one of the breaking symbols:
* 1. a specified code point (by default `0`);
* 2. (optionally) whitespace symbol (@see `TextAPI.IsWhitespace()`);
* 3. (optionally) quotation symbol (@see `TextAPI.IsQuotation()`).
* This method cannot fail.
*
* @param result Any content before one of the break symbols
* will be recorded into this `string`.
* @param codePointBreak Method will stop parsing upon encountering this
* code point (it will not be included in the `result`)
* @param whitespacesBreak `true` if you want to also treat any
* whitespace character as a break symbol
* (@see `TextAPI.IsWhitespace()` for what symbols are
* considered whitespaces).
* @param quotesBreak `true` if you want to also treat any
* quotation mark character as a break symbol
* (@see `TextAPI.IsQuotation()` for what symbols are
* considered quotation marks).
* @return Returns the caller `Parser`, to allow for function chaining.
*/
public final function Parser MUntilS(
out string result,
optional Text.Character characterBreak,
optional bool whitespacesBreak,
optional bool quotesBreak)
{
local MutableText wrapper;
if (!Ok()) return self;
wrapper = _.text.Empty();
MUntil(wrapper, characterBreak, whitespacesBreak, quotesBreak);
result = wrapper.ToString();
wrapper.FreeSelf();
return self;
}
/**
* Matches everything until it finds one of the breaking sequences:
* 1. Any of the specified `separators`.
* 2. (optionally) whitespace symbol (@see `TextAPI.IsWhitespace()`);
* 3. (optionally) quotation symbol (@see `TextAPI.IsQuotation()`).
* This method cannot fail.
*
* @param result Any content before one of the breaking sequences
* will be recorded into this `MutableText`. If passed `MutableText` equals
* to `none`, new instance will be automatically allocated. This will be
* done regardless of whether parsing fails.
* @param separators Method will stop parsing upon encountering any
* of these `Text`s (but they will not be included in the `result`).
* @param whitespacesBreak `true` if you want to also treat any
* whitespace character as a break symbol
* (@see `TextAPI.IsWhitespace()` for what symbols are
* considered whitespaces).
* @param quotesBreak `true` if you want to also treat any
* quotation mark character as a break symbol
* (@see `TextAPI.IsQuotation()` for what symbols are
* considered quotation marks).
* @return Returns the caller `Parser`, to allow for function chaining.
*/
public final function Parser MUntilMany(
out MutableText result,
array separators,
optional bool whitespacesBreak,
optional bool quotesBreak)
{
local bool foundEnd;
local int i, pointerShift;
local array completions;
local Text.Character nextCharacter, separatorCharacter;
ResetResultText(result);
if (!Ok()) return self;
completions.length = separators.length;
while (pointerShift < GetRemainingLength())
{
nextCharacter = GetCharacter(pointerShift);
if (whitespacesBreak && _.text.IsWhitespace(nextCharacter)) break;
if (quotesBreak && _.text.IsQuotationMark(nextCharacter)) break;
for (i = 0; i < separators.length; i += 1)
{
if (separators[i] == none) continue;
separatorCharacter = separators[i].GetCharacter(completions[i]);
if (_.text.AreEqual(nextCharacter, separatorCharacter))
{
completions[i] += 1;
if (completions[i] == separators[i].GetLength())
{
foundEnd = true;
pointerShift -= completions[i] - 1;
break;
}
}
else {
completions[i] = 0;
}
}
if (foundEnd) {
break;
}
pointerShift += 1;
}
for (i = 0; i < pointerShift; i += 1)
{
result.AppendCharacter(GetCharacter());
ShiftPointer();
}
return self;
}
/**
* Matches everything until it finds one of the breaking sequences:
* 1. Any of the specified `separators`.
* 2. (optionally) whitespace symbol (@see `TextAPI.IsWhitespace()`);
* 3. (optionally) quotation symbol (@see `TextAPI.IsQuotation()`).
* This method cannot fail.
*
* @param result Any content before one of the breaking sequences
* will be recorded into this `MutableText`. If passed `MutableText` equals
* to `none`, new instance will be automatically allocated.
* @param separators Method will stop parsing upon encountering any
* of these `string`s (but they won't not be included in the `result`).
* @param whitespacesBreak `true` if you want to also treat any
* whitespace character as a break symbol
* (@see `TextAPI.IsWhitespace()` for what symbols are
* considered whitespaces).
* @param quotesBreak `true` if you want to also treat any
* quotation mark character as a break symbol
* (@see `TextAPI.IsQuotation()` for what symbols are
* considered quotation marks).
* @return Returns the caller `Parser`, to allow for function chaining.
*/
public final function Parser MUntilManyS(
out string result,
array separators,
optional bool whitespacesBreak,
optional bool quotesBreak)
{
local MutableText wrapper;
if (!Ok()) return self;
wrapper = _.text.Empty();
MUntilMany(wrapper, separators, whitespacesBreak, quotesBreak);
result = wrapper.ToString();
wrapper.FreeSelf();
return self;
}
/**
* Parses a string as either "simple" or "quoted".
* Not being able to read any symbols is not considered a failure.
*
* Reading empty string (either to lack of further data or
* instantly encountering a break symbol) is not considered a failure.
*
* Quoted string starts with quotation mark and ends either
* at the corresponding closing (un-escaped) mark
* or when `Parser`'s input has been fully consumed.
* If string started with a quotation mark, this method will act exactly
* like `MStringLiteral()`.
*
* @param result If parsing is successful - string's contents will be
* recorded here; if parsing has failed - value is undefined.
* Any passed value is discarded.
* If passed `MutableText` equals to `none`, new instance will be
* automatically allocated.
* @return Returns the caller `Parser`, to allow for function chaining.
*/
public final function Parser MString(out MutableText result)
{
if (!Ok()) return self;
if (_.text.IsQuotationMark(GetCharacter())) {
MStringLiteral(result);
}
else {
MUntil(result,, true, true);
}
return self;
}
/**
* Parses a string as either "simple" or "quoted".
* Not being able to read any symbols is not considered a failure.
*
* Reading empty string (either to lack of further data or
* instantly encountering a break symbol) is not considered a failure.
*
* Quoted string starts with quotation mark and ends either
* at the corresponding closing (un-escaped) mark
* or when `Parser`'s input has been fully consumed.
* If string started with a quotation mark, this method will act exactly
* like `MStringLiteral()`.
*
* @param result If parsing is successful - string's contents will be
* recorded here; if parsing has failed - value is undefined.
* Any passed value is discarded.
* @return Returns the caller `Parser`, to allow for function chaining.
*/
public final function Parser MStringS(out string result)
{
local MutableText wrapper;
if (!Ok()) return self;
wrapper = _.text.Empty();
MString(wrapper);
result = wrapper.ToString();
wrapper.FreeSelf();
return self;
}
/**
* Matches a non-empty sequence of whitespace symbols.
*
* Cannot fail (not being able to read any input is not considered a failure).
*
* @param result If parsing was successful - whitespaces will be recorded
* into this `MutableText`, otherwise - undefined.
* Any passed value is discarded.
* If passed `MutableText` equals to `none`, new instance will be
* automatically allocated.
* @return Returns the caller `Parser`, to allow for function chaining.
*/
public final function Parser MWhitespaces(out MutableText result)
{
local Text.Character nextCharacter;
local TextAPI api;
if (!Ok()) return self;
api = _.text;
if (result == none) {
result = api.Empty();
}
else {
result.Clear();
}
while (!HasFinished())
{
nextCharacter = GetCharacter();
if (!api.IsWhitespace(nextCharacter)) {
break;
}
result.AppendCharacter(nextCharacter);
ShiftPointer();
}
return self;
}
/**
* Matches a non-empty sequence of whitespace symbols.
*
* Cannot fail (not being able to read any input is not considered a failure).
*
* @param result If parsing was successful - whitespaces will be
* recorded here, otherwise - undefined.
* Any passed value is discarded.
* @return Returns the caller `Parser`, to allow for function chaining.
*/
public final function Parser MWhitespacesS(out string result)
{
local MutableText wrapper;
if (!Ok()) return self;
wrapper = _.text.Empty();
MWhitespaces(wrapper);
result = wrapper.ToString();
wrapper.FreeSelf();
return self;
}
/**
* Parses next code point as itself.
*
* Can only fail if caller `Parser` has already exhausted all available data.
*
* @param result If parsing was successful - next Unicode code point,
* otherwise - value is undefined.
* Any passed value is discarded.
* @return Returns the caller `Parser`, to allow for function chaining.
*/
public final function Parser MCharacter(out Text.Character result)
{
if (!Ok()) return self;
if (HasFinished()) return Fail();
result = GetCharacter();
ShiftPointer();
return self;
}
/**
* Parses next code point as as byte.
* Can fail if caller `Parser` has already exhausted all available data or
* next Unicode code point cannot fit into the `byte` value range.
*
* @param result If parsing was successful - next Unicode code point as
* a byte, otherwise - value is undefined.
* Any passed value is discarded.
* @return Returns the caller `Parser`, to allow for function chaining.
*/
public final function Parser MByte(out byte result)
{
local Text.Character character;
if (!Ok()) return self;
if (!MCharacter(character).Ok())
{
return Fail();
}
if (character.codePoint < 0 || character.codePoint > BYTE_MAX)
{
return Fail();
}
result = character.codePoint;
return self;
}
/**
* Tries to parse a sign: either "+" or "-".
*
* @param result Value of `ParsedSign` will be recorded here,
* depending on what sign was encountered.
* `SIGN_Missing` value is only possible if we allow sign to be missing.
* @param allowMissingSign By default `false` means that parsing will fail
* if next character is neither "+" or "-";
* `true` means that parsing will not fail even if there is not sign, -
* method will then consume in input and will return `SIGN_Missing`
* as a result.
* @return Returns the caller `Parser`, to allow for function chaining.
*/
public final function Parser MSign(
out ParsedSign result,
optional bool allowMissingSign
)
{
local ParserState checkpoint;
if (!Ok()) return self;
// Read sign
checkpoint = GetCurrentState();
if (MatchS("-").Ok()) {
result = SIGN_Minus;
}
else if (RestoreState(checkpoint).MatchS("+").Ok()) {
result = SIGN_Plus;
}
else if (allowMissingSign)
{
result = SIGN_Missing;
RestoreState(checkpoint);
}
return self;
}
/**
* Tries to parse a number prefix that determines a base system for denoting
* integer numbers:
* 1. `0x` means hexadecimal;
* 2. `0b` means binary;
* 3. `0o` means octal;
* 4. otherwise we use decimal system.
*
* This parsing method cannot fail.
*
* Parser consumes appropriate prefix; nothing if decimal system is determined.
*
* @return Returns the caller `Parser`, to allow for function chaining.
*/
public final function Parser MBase(out int base)
{
local ParserState checkpoint;
if (!Ok()) return self;
checkpoint = GetCurrentState();
if (MatchS("0x").Ok()) {
base = 16;
}
else if (RestoreState(checkpoint).MatchS("0b").Ok()) {
base = 2;
}
else if (RestoreState(checkpoint).MatchS("0o").Ok()) {
base = 8;
}
else
{
RestoreState(checkpoint);
base = 10;
}
return self;
}
/**
* Parses signed integer either in a directly given base (`base`) or in an
* auto-determined one (based on prefix, @see `MBase()`).
*
* Integers are expected in form: (+/-)(0x/0b/0o).
* Examples: 78, 0o34, -2, 0b0101001, -0x78aC.
*
* @param result If parsing is successful - parsed value will be
* recorded here; if parsing fails - value is undetermined.
* Any passed value is discarded.
* @param base Base in which function must attempt to parse a number;
* Default value (`0`) means function must auto-determine base,
* based on the prefix, otherwise must be between 2 and 36.
* @return Returns the caller `Parser`, to allow for function chaining.
*/
public final function Parser MInteger(out int result, optional int base)
{
local ParsedSign integerSign;
if (!Ok()) return self;
MSign(integerSign, true);
if (base == 0) {
MBase(base);
}
MUnsignedInteger(result, base);
if (integerSign == SIGN_Minus) {
result *= -1;
}
return self;
}
// Internal function for parsing fractional part (including the dot ".")
// of the text representation for floating point number (decimal system only).
// Cannot fail, returns `0.0` if it couldn't parse anything.
protected final function Parser MFractionalPart(out float result)
{
local ParserState checkpoint;
local int fractionalInt;
local int digitsRead;
if (!Ok()) return self;
result = 0.0;
checkpoint = GetCurrentState();
if (!MatchS(".").Ok())
{
RestoreState(checkpoint);
return self;
}
checkpoint = GetCurrentState();
if (!MUnsignedInteger(fractionalInt,,, digitsRead).Ok())
{
fractionalInt = 0.0;
RestoreState(checkpoint);
return self;
}
result = float(fractionalInt) * (0.1 ** digitsRead);
return self;
}
// Internal function for parsing exponent part (including the symbol "e")
// of the text representation for floating point number (decimal system only).
// Can only fail if symbol "e" / "E" is present, but there is no valid
// integer right after it (whitespace symbols in-between are forbidden).
// Returns `0.0` if there was not exponent to parse.
protected final function Parser MExponentPart(out int result)
{
local ParserState checkpoint;
local ParsedSign exponendSign;
if (!Ok()) return self;
// Is there even an exponential part?
checkpoint = GetCurrentState();
if (!MatchS("e", SCASE_INSENSITIVE).Ok())
{
RestoreState(checkpoint);
return self;
}
// If yes - parse it:
result = 0.0;
MSign(exponendSign, true).MUnsignedInteger(result, 10);
if (exponendSign == SIGN_Minus)
{
result *= -1;
}
return self;
}
// Internal function for parsing optional suffix of the text representation
// for floating point number ("f" or "F").
// Cannot fail. Can only consume one Unicode code point,
// when it is either "f" or "F".
protected final function Parser MFloatSuffix()
{
local ParserState checkpoint;
if (!Ok()) return self;
checkpoint = GetCurrentState();
if (!MatchS("f", SCASE_INSENSITIVE).Ok())
{
RestoreState(checkpoint);
}
return self;
}
/**
* Parses signed floating point number in JSON form + optional "f" / "F"
* suffix at the end.
*
* @param result If parsing is successful - parsed value will be
* recorded here; if parsing fails - value is undetermined.
* Any passed value is discarded.
* @return Returns the caller `Parser`, to allow for function chaining.
*/
public final function Parser MNumber(out float result)
{
local ParsedSign sign;
local int integerPart, exponentPart;
local float fractionalPart;
if (!Ok()) return self;
self.MSign(sign, true)
.MUnsignedInteger(integerPart, 10)
.MFractionalPart(fractionalPart)
.MExponentPart(exponentPart)
.MFloatSuffix();
if (!Ok()) {
return self;
}
result = float(integerPart) + fractionalPart;
result *= 10.0 ** exponentPart;
if (sign == SIGN_Minus) {
result *= -1;
}
return self;
}
defaultproperties
{
// Start with no initializations done
version = 0
BYTE_MAX = 255
CODEPOINT_BACKSLASH = 92 // \
CODEPOINT_USMALL = 117 // u
CODEPOINT_ULARGE = 85 // U
escapeCharactersMap(0)=(from=110,to=10) // \n
escapeCharactersMap(1)=(from=114,to=13) // \r
escapeCharactersMap(2)=(from=116,to=9) // \t
escapeCharactersMap(3)=(from=98,to=8) // \b
escapeCharactersMap(4)=(from=102,to=12) // \f
escapeCharactersMap(5)=(from=118,to=11) // \v
}