UnrealScript library and basis for all Acedia Framework mods
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 

1312 lines
43 KiB

/**
* Implements a simple `Parser` with built-in functions to parse simple
* UnrealScript's types and support for saving / restoring parser states.
* Copyright 2020 Anton Tarasenko
*------------------------------------------------------------------------------
* This file is part of Acedia.
*
* Acedia is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, version 3 of the License, or
* (at your option) any later version.
*
* Acedia is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Acedia. If not, see <https://www.gnu.org/licenses/>.
*/
class Parser extends AcediaObject
dependson(Text)
dependson(UnicodeData);
var public int BYTE_MAX;
var public int CODEPOINT_BACKSLASH;
var public int CODEPOINT_USMALL;
var public int CODEPOINT_ULARGE;
// The sequence of Unicode code points that this `Parser` is supposed to parse.
var private array<Text.Character> content;
// Incremented each time `Parser` is reinitialized with new `content`.
// Can be used to make `Parser` object completely independent from
// it's past, necessary since garbage collection is extra expensive in UE2
// and we want to reuse created objects as much as possible.
var private int version;
// Describes current state of the `Parser`, instance of this struct
// can be used to revert parser back to this state.
struct ParserState
{
// Record to which object (and of what version) this state belongs to.
// This information is used to make sure that we apply this state
// only to same `Parser` (of the same version) that it originated from.
var private AcediaObject ownerObject;
var private int ownerVersion;
// Has parser failed at some point?
var private bool failed;
// Points at the next symbol to be used next in parsing.
var private int pointer;
};
var private ParserState currentState;
// For convenience `Parser` will store one internal state that designates
// a state that's safe to revert to when some parsing attempt goes wrong.
// @see `Confirm()`, `R()`
var private ParserState confirmedState;
// Describes rules for translating escaped sequences ("\r", "\n", "\t")
// into appropriate code points.
var private const array<UnicodeData.CodePointMapping> escapeCharactersMap;
// Used to store a result of a `ParseSign()` function.
enum ParsedSign
{
SIGN_Missing,
SIGN_Plus,
SIGN_Minus
};
/**
* Initializes `Parser` with new data from a raw data
* (sequence of Unicode code points). Never fails.
*
* Any data from before this call is lost, any checkpoints are invalidated.
*
* @param source Sequence of Unicode code points that represents
* a string `Parser` will need to parse.
* @return Returns the calling object, to allow for function chaining.
*/
public final function Parser InitializeRaw(array<Text.Character> source)
{
content = source;
version += 1;
currentState.ownerObject = self;
currentState.ownerVersion = version;
currentState.failed = false;
currentState.pointer = 0;
confirmedState = currentState;
return self;
}
/**
* Initializes `Parser` with new data from a `string`. Never fails.
*
* Any data from before this call is lost, any checkpoints are invalidated.
*
* @param source String `Parser` will need to parse.
* @return Returns the calling object, to allow for function chaining.
*/
public final function Parser Initialize
(
string source,
optional Text.StringType sourceType
)
{
InitializeRaw(_().text.StringToRaw(source, sourceType));
return self;
}
/**
* Initializes `Parser` with new data from a `Test`.
*
* Can fail if passed `none` as a parameter.
*
* Any data from before this call is lost, any checkpoints are invalidated.
*
* @param source `Text` object `Parser` will need to parse.
* If `none` is passed - parser won't be initialized.
* @return Returns the calling object, to allow for function chaining.
*/
public final function Parser InitializeT(Text source)
{
if (source == none) return self;
InitializeRaw(source.ToRaw());
return self;
}
/**
* Checks if `Parser` is in a failed state.
*
* Parser enters a failed state whenever any parsing call returns without
* completing it's job. `Parser` in a failed state will automatically fail
* any further parsing attempts until it gets reset via `R()` call.
*
* @return Returns 'false' if `Parser()` is in a failed state and
* `true` otherwise.
*/
public final function bool Ok()
{
return (!currentState.failed);
}
/**
* Returns copy of the current state of this parser.
*
* As long as caller `Parser` was not reinitialized, returned `ParserState`
* structure can be used to revert this `Parser` to it's current condition
* by a `RestoreState()` call.
*
* @see `RestoreState()`
* @return Copy of the current state of the caller `Parser`.
*/
public final function ParserState GetCurrentState()
{
return currentState;
}
/**
* Returns copy of (currently) last confirmed state of this parser.
*
* As long as caller `Parser` was not reinitialized, returned `ParserState`
* structure can be used to revert this `Parser` to it's current confirmed
* state by a `RestoreState()` call.
*
* @see `RestoreState()`, `Confirm()`, `R()`
* @return Copy of (currently) last confirmed state of this parser.
*/
public final function ParserState GetConfirmedState()
{
return confirmedState;
}
/**
* Checks if given `stateToCheck` is valid for the caller `Parser`, i.e.:
* 1. It is a state generated by either `GetCurrentState()` or
* `GetConfirmedState()` calls on the caller `Parser`.
* 2. Caller `Parser` was not reinitialized since a call
* that generated given `stateToCheck`.
*
* @param stateToCheck `ParserState` to check for validity for
* caller `Parser`.
* @return `true` if given `stateToCheck` is valid and `false` otherwise.
*/
public final function bool IsStateValid(ParserState stateToCheck)
{
if (stateToCheck.ownerObject != self) return false;
if (stateToCheck.ownerVersion != version) return false;
return true;
}
/**
* Checks if calling `RestoreState()` for passed state will return a `Parser`
* in an "Ok" state (not failed), i.e. state is valid and
* was generated when `Parser` was in a non-failed state.
*
* @param stateToCheck `ParserState` to check for corresponding to
* `Parser` being in a non-failed state.
* By definition must also be valid for the caller `Parser`.
* @return `true` if given `stateToCheck` is valid and `false` otherwise.
*/
public final function bool IsStateOk(ParserState stateToCheck)
{
if (!IsStateValid(stateToCheck)) return false;
return (!stateToCheck.failed);
}
/**
* Resets parser to a state, given by `stateToRestore` argument
* (so a state `Parser` was in at the moment given `stateToRestore`
* was obtained).
*
* If given `stateToRestore` is from a different `Parser` or
* the owner `Parser` was reinitialized after passed state was obtained, -
* function will simply put caller `Parser` into a failed state.
* Note that caller `Parser` being put in a failed state after this call
* doesn't mean that described issues are actually present:
* `stateToRestore` can also describe a failed state of the `Parser`.
*
* @param stateToRestore `ParserState` that this method will attempt
* to set for the caller `Parser`.
* @return Returns the calling object, to allow for function chaining.
*/
public final function Parser RestoreState(ParserState stateToRestore)
{
if (!IsStateValid(stateToRestore))
{
currentState.failed = true;
return self;
}
currentState = stateToRestore;
return self;
}
/**
* Remembers current state of `Parser` in an internal checkpoint variable,
* that can later be restored by an `R()` call.
*
* Can only save non-failed states and will only fail if caller `Parser` is
* in a failed state.
*
* `Confirm()` and `R()` are essentially convenience wrapper functions for
* `GetCurrentState()` and `RestoreState()` calls +
* state storage variable.
*
* @return `true` if current state is recorded in `Parser` as confirmed and
* `false` otherwise.
*/
public final function bool Confirm()
{
if (!Ok()) return false;
confirmedState = currentState;
return true;
}
/**
* Resets `Parser` to a last state recorded as confirmed by a last successful
* `Confirm()` function call. If there weren't any such call -
* reverts `Parser` to it's state right after initialization.
*
* Always resets failed state of a `Parser`. Cannot fail.
*
* `Confirm()` and `R()` are essentially convenience wrapper functions for
* `GetCurrentState()` and `RestoreState()` calls + state storage variable.
*
* @return Returns the calling object, to allow for function chaining.
*/
public final function Parser R()
{
currentState = confirmedState;
return self;
}
/**
* Shifts parsing pointer forward.
*
* Can only shift forward. To revert to a previous state in case of failure use
* combination of `GetCurrentState()` and `RestoreState()` functions.
*
* @param shift How much to shift parsing pointer?
* Values of zero and below are discarded and `1` is used instead
* (i.e. by default this method shifts pointer by `1` position).
* @return Returns the calling object, to allow for function chaining.
*/
protected final function Parser ShiftPointer(optional int shift)
{
shift = Max(1, shift);
currentState.pointer = Min(currentState.pointer + shift, content.length);
return self;
}
/**
* Returns a code point from this `Parser`'s content, relative to next
* code point that caller `Parser` must handle.
*
* @param `shift` If `0` (default value) or negative value is passed -
* simply asks for the code point that caller `Parser` must handle.
* Otherwise shifts that index `shift` code points, i.e.
* `1` to return next code point or `2` to return code point after
* the next one.
* @return Returns code point at a given shift. If `shift` is too small/large
* and does not fit `Parser`'s contents, returns `-1`.
* `GetCodePoint()` with default (`0`) parameter can also return `-1` if
* contents of the caller `Parser` are empty or it has already consumed
* all input.
*/
protected final function Text.Character GetCharacter(optional int shift)
{
local Text.Character invalidCharacter;
local int absoluteAddress;
absoluteAddress = currentState.pointer + Max(0, shift);
if (absoluteAddress < 0 || absoluteAddress >= content.length)
{
invalidCharacter.codePoint = -1;
return invalidCharacter;
}
return content[absoluteAddress];
}
/**
* Forces caller `Parser` to enter a failed state.
*
* @return Returns the calling object, to allow for a quick exit from
* a parsing function by `return Fail();`.
*/
protected final function Parser Fail()
{
currentState.failed = true;
return self;
}
/**
* Returns amount of code points that have already been parsed,
* provided that caller `Parser` is in a correct state.
*
* @return Returns how many Unicode code points have already been parsed if
* caller `Parser` is in correct state;
* otherwise return value is undefined.
*/
public final function int GetParsedLength()
{
return Max(0, currentState.pointer);
}
/**
* Returns amount of code points that have not yet been parsed,
* provided that caller `Parser` is in a correct state.
*
* @return Returns how many Unicode code points are still unparsed if
* caller `Parser` is in correct state;
* otherwise return value is undefined.
*/
public final function int GetRemainingLength()
{
return Max(0, content.length - currentState.pointer);
}
/**
* Checks if caller `Parser` has already parsed all of it's content.
* Uninitialized `Parser` has no content and, therefore, parsed it all.
*
* Should return `true` iff `GetRemainingLength() == 0`.
*
* @return `true` if caller `Parser` has no more data to parse.
*/
public final function bool HasFinished()
{
return (currentState.pointer >= content.length);
}
/**
* Returns still unparsed part of caller `Parser`'s source as an array of
* Unicode code points.
*
* @return Unparsed part of caller `Parser`'s source as an array of
* Unicode code points.
*/
public final function array<Text.Character> GetRemainderRaw()
{
local int i;
local array<Text.Character> result;
for (i = 0; i < GetRemainingLength(); i += 1)
{
result[result.length] = GetCharacter(i);
}
return result;
}
/**
* Returns still unparsed part of caller `Parser`'s source as a `string`.
*
* @return Unparsed part of caller `Parser`'s source as a `string`.
*/
public final function string GetRemainder()
{
local int i;
local array<Text.Character> rawResult;
for (i = 0; i < GetRemainingLength(); i += 1)
{
rawResult[rawResult.length] = GetCharacter(i);
}
return _().text.RawToString(rawResult, STRING_Plain);
}
/**
* Returns still unparsed part of caller `Parser`'s source as `Text`.
*
* @return Unparsed part of caller `Parser`'s source as `Text`.
*/
public final function Text GetRemainderT()
{
local int i;
local array<Text.Character> rawResult;
for (i = 0; i < GetRemainingLength(); i += 1)
{
rawResult[rawResult.length] = GetCharacter(i);
}
return _().text.FromRaw(rawResult);
}
/**
* Matches any sequence of whitespace symbols, without returning it.
* Starts from where previous parsing function finished.
*
* Can never cause parser to enter failed state.
*
* What symbols exactly are considered whitespace refer to the description of
* `TextAPI.IsWhitespace()` function.
*
* @param whitespacesAmount Returns how many whitespace symbols
* were skipped. Any given value is discarded.
* @return Returns the calling object, to allow for function chaining.
*/
public final function Parser Skip(optional out int whitespacesAmount)
{
local TextAPI api;
if (!Ok()) return self;
api = _().text;
whitespacesAmount = 0;
// Cycle will end once we either reach a non-whitespace symbol or
// there's not more code points to get
while (api.IsWhitespace(GetCharacter(whitespacesAmount)))
{
whitespacesAmount += 1;
}
ShiftPointer(whitespacesAmount);
return self;
}
/**
* Function that tries to match given data in `Parser`'s content,
* starting from where previous parsing function finished.
*
* Does nothing if caller `Parser` was in failed state.
*
* @param data Data that must be matched to the `Parser`'s
* contents, starting from where previous parsing function finished.
* @param caseInsensitive If `false` the matching will have to be exact,
* using `true` will make this method to ignore the case,
* where it's applicable.
* @return Returns the calling object, to allow for function chaining.
*/
public final function Parser MatchRaw
(
array<Text.Character> data,
optional bool caseInsensitive
)
{
local int i;
local TextAPI api;
if (!Ok()) return self;
if (data.length > GetRemainingLength()) return Fail();
api = _().text;
for (i = 0; i < data.length; i += 1)
{
if (!api.AreEqual(data[i], GetCharacter(i), caseInsensitive))
{
return Fail();
}
}
ShiftPointer(data.length);
return self;
}
/**
* Function that tries to match given `string`, starting from where
* previous parsing function finished.
*
* Does nothing if caller `Parser` was in failed state.
*
* @param word String that must be matched to the `Parser`'s
* contents, starting from where previous parsing function finished.
* @param caseInsensitive If `false` the matching will have to be exact,
* using `true` will make this method to ignore the case,
* where it's applicable.
* @return Returns the calling object, to allow for function chaining.
*/
public final function Parser Match(string word, optional bool caseInsensitive)
{
return MatchRaw(_().text.StringToRaw(word), caseInsensitive);
}
/**
* Function that tries to match given `Text`, starting from where
* previous parsing function finished.
*
* Does nothing if caller `Parser` was in failed state.
*
* @param word Text that must be matched to the `Parser`'s
* contents, starting from where previous parsing function finished.
* @param caseInsensitive If `false` the matching will have to be exact,
* using `true` will make this method to ignore the case,
* where it's applicable.
* @return Returns the calling object, to allow for function chaining.
*/
public final function Parser MatchT(Text word, optional bool caseInsensitive)
{
if (!Ok()) return self;
if (word == none) return Fail();
return MatchRaw(word.ToRaw(), caseInsensitive);
}
/**
* Internal function for parsing unsigned integers in any base from 2 to 36.
*
* This parsing can fail, putting `Parser` into a failed state.
*
* @param result If parsing is successful, this value will contain
* parsed integer, otherwise value is undefined.
* Any passed value is discarded.
* @param base Base, in which integer in question is recorded.
* @param numberLength If this parameter is less or equal to zero,
* function will stop parsing the moment it can't recognize a character as
* belonging to a number in a given base.
* It will only fail if it couldn't parse a single character;
* If this parameter is set to be positive (`> 0`), function will
* attempt to use exactly `numberLength` character for parsing and will
* fail if they would not constitute a valid number.
* @param consumedCodePoints Amount of code point used (consumed) to parse
* this number; undefined, if parsing is unsuccessful.
* Any passed value is discarded.
* @return Returns the calling object, to allow for function chaining.
*/
public final function Parser MUnsignedInteger
(
out int result,
optional int base,
optional int numberLength,
optional out int consumedCodePoints
)
{
local bool parsingFixedLength;
local int nextPosition;
numberLength = Max(0, numberLength);
parsingFixedLength = (numberLength != 0);
if (base == 0)
{
base = 10;
}
else if (base < 2 || base > 36)
{
return Fail();
}
result = 0;
consumedCodePoints = 0;
while (!HasFinished())
{
if (parsingFixedLength && consumedCodePoints >= numberLength) break;
nextPosition = _().text.CharacterToInt(GetCharacter(), base);
if (nextPosition < 0) break;
result = result * base + nextPosition;
consumedCodePoints += 1;
ShiftPointer();
}
if ( parsingFixedLength && consumedCodePoints != numberLength
|| consumedCodePoints < 1)
{
return Fail();
}
return self;
}
/**
* Parses escaped sequence of the type that is usually used in
* string literals: backslash "\"", followed by any character
* (called escaped character later) or, in special cases, several characters.
* For most characters escaped sequence resolved into
* an escaped character's code point.
*
* Several escaped symbols:
* \n, \r, \t, \b, \f, \v
* are translated into a different code point corresponding to
* a control symbols, normally denoted by these sequences.
*
* A Unicode code point can also be directly entered with either of the two
* commands:
* \U0056
* \u56
* The difference is that `\U` allows you to enter two-byte code point, while
* `\u` only allows to define code points that fit into 1 byte,
* but is more compact.
*
* @param denotedCodePoint If parsing is successful, parameter will contain
* appropriate code point, denoted by a parsed escaped sequence;
* If parsing is unsuccessful, value is undefined.
* Any passed value is discarded.
* @return Returns the calling object, to allow for function chaining.
*/
public final function Parser MEscapedSequence
(
out Text.Character denotedCharacter
)
{
local int i;
if (!Ok()) return self;
// Need at least two characters to parse escaped sequence
if (GetRemainingLength() < 2) return Fail();
if (GetCharacter().codePoint != CODEPOINT_BACKSLASH) return Fail();
denotedCharacter = GetCharacter(1);
ShiftPointer(2);
// Escaped character denotes some special code point
for (i = 0; i < escapeCharactersMap.length; i += 1)
{
if (escapeCharactersMap[i].from == denotedCharacter.codePoint)
{
denotedCharacter.codePoint = escapeCharactersMap[i].to;
return self;
}
}
// Escaped character denotes declaration of arbitrary Unicode code point
if (denotedCharacter.codePoint == CODEPOINT_ULARGE)
{
MUnsignedInteger(denotedCharacter.codePoint, 16, 4);
}
else if (denotedCharacter.codePoint == CODEPOINT_USMALL)
{
MUnsignedInteger(denotedCharacter.codePoint, 16, 2);
}
return self;
}
/**
* Attempts to parse a string literal: a string enclosed in either of
* the following quotation marks: ", ', `.
* String literals can contain escaped sequences.
* String literals MUST end with closing quotation mark.
* @see `MEscapedSequence()`
*
* @param result If parsing is successful, this array will contain the
* contents of string literal with resolved escaped sequences;
* if parsing has failed, it's value is undefined.
* Any passed contents are simply discarded.
* @return Returns the calling object, to allow for function chaining.
*/
public final function Parser MStringLiteralRaw(out array<Text.Character> result)
{
local TextAPI api;
local Text.Character nextCharacter;
local Text.Character usedQuotationMark;
local Text.Character escapedCharacter;
if (!Ok()) return self;
usedQuotationMark = GetCharacter();
if (!_().text.IsQuotationMark(usedQuotationMark)) return Fail();
ShiftPointer(); // Skip opening quotation mark
api = _().text;
result.length = 0;
while (!HasFinished())
{
nextCharacter = GetCharacter();
// Closing quote
if (api.AreEqual(nextCharacter, usedQuotationMark))
{
ShiftPointer();
return self;
}
// Escaped characters
if (api.IsCodePoint(nextCharacter, CODEPOINT_BACKSLASH))
{
if (!MEscapedSequence(escapedCharacter).Ok())
{
return Fail(); // Backslash MUST mean valid escape sequence
}
result[result.length] = escapedCharacter;
}
// Any other code point
else
{
result[result.length] = nextCharacter;
ShiftPointer();
}
}
// Content ended without a closing quote.
return Fail();
}
/**
* Attempts to parse a string literal: a string enclosed in either of
* the following quotation marks: ", ', `.
* String literals can contain escaped sequences.
* String literals MUST end with closing quotation mark.
* @see `MEscapedSequence()`
*
* @param result If parsing is successful, this `string` will contain the
* contents of string literal with resolved escaped sequences;
* if parsing has failed, it's value is undefined.
* Any passed contents are simply discarded.
* @return Returns the calling object, to allow for function chaining.
*/
public final function Parser MStringLiteral(out string result)
{
local array<Text.Character> rawResult;
if (!Ok()) return self;
if (MStringLiteralRaw(rawResult).Ok())
{
result = _().text.RawToString(rawResult, STRING_Plain);
}
return self;
}
/**
* Attempts to parse a string literal: a string enclosed in either of
* the following quotation marks: ", ', `.
* String literals can contain escaped sequences.
* String literals MUST end with closing quotation mark.
* @see `MEscapedSequence()`
*
* @param result If parsing is successful, this `Text` will contain the
* contents of string literal with resolved escaped sequences;
* if parsing has failed, it's value is undefined.
* Any passed contents are simply discarded.
* @return Returns the calling object, to allow for function chaining.
*/
public final function Parser MStringLiteralT(out Text result)
{
local array<Text.Character> rawResult;
if (!Ok()) return self;
if (MStringLiteralRaw(rawResult).Ok())
{
result = _().text.FromRaw(rawResult);
}
return self;
}
/**
* Matches everything until it finds one of the breaking symbols:
* 1. a specified code point (by default `0`);
* 2. (optionally) whitespace symbol (@see `TextAPI.IsWhitespace()`);
* 3. (optionally) quotation symbol (@see `TextAPI.IsQuotation()`).
* This method cannot fail.
*
* @param result Any content before one of the break symbols
* will be recorded into this array as a sequence of Unicode code points.
* @param codePointBreak Method will stop parsing upon encountering this
* code point (it will not be included in the `result`)
* @param whitespacesBreak `true` if you want to also treat any
* whitespace character as a break symbol
* (@see `TextAPI.IsWhitespace()` for what symbols are
* considered whitespaces)
* @param quotesBreak `true` if you want to also treat any
* quotation mark character as a break symbol
* (@see `TextAPI.IsQuotation()` for what symbols are
* considered quotation marks).
* @return Returns the calling object, to allow for function chaining.
*/
public final function Parser MUntilRaw
(
out array<Text.Character> result,
optional Text.Character characterBreak,
optional bool whitespacesBreak,
optional bool quotesBreak
)
{
local Text.Character nextCharacter;
local TextAPI api;
if (!Ok()) return self;
api = _().text;
result.length = 0;
while (!HasFinished())
{
nextCharacter = GetCharacter();
if (api.AreEqual(nextCharacter, characterBreak)) break;
if (whitespacesBreak && api.IsWhitespace(nextCharacter)) break;
if (quotesBreak && api.IsQuotationMark(nextCharacter)) break;
result[result.length] = nextCharacter;
ShiftPointer();
}
return self;
}
/**
* Matches everything until it finds one of the breaking symbols:
* 1. a specified code point (by default `0`);
* 2. (optionally) whitespace symbol (@see `TextAPI.IsWhitespace()`);
* 3. (optionally) quotation symbol (@see `TextAPI.IsQuotation()`).
* This method cannot fail.
*
* @param result Any content before one of the break symbols
* will be recorded into this `string`.
* @param codePointBreak Method will stop parsing upon encountering this
* code point (it will not be included in the `result`)
* @param whitespacesBreak `true` if you want to also treat any
* whitespace character as a break symbol
* (@see `TextAPI.IsWhitespace()` for what symbols are
* considered whitespaces)
* @param quotesBreak `true` if you want to also treat any
* quotation mark character as a break symbol
* (@see `TextAPI.IsQuotation()` for what symbols are
* considered quotation marks).
* @return Returns the calling object, to allow for function chaining.
*/
public final function Parser MUntil
(
out string result,
optional Text.Character characterBreak,
optional bool whitespacesBreak,
optional bool quotesBreak
)
{
local array<Text.Character> rawResult;
if (!Ok()) return self;
MUntilRaw(rawResult, characterBreak, whitespacesBreak, quotesBreak);
result = _().text.RawToString(rawResult, STRING_Plain);
return self;
}
/**
* Matches everything until it finds one of the breaking symbols:
* 1. a specified code point (by default `0`);
* 2. (optionally) whitespace symbol (@see `TextAPI.IsWhitespace()`);
* 3. (optionally) quotation symbol (@see `TextAPI.IsQuotation()`).
* This method cannot fail.
*
* @param result Any content before one of the break symbols
* will be recorded into this `Text`.
* @param codePointBreak Method will stop parsing upon encountering this
* code point (it will not be included in the `result`)
* @param whitespacesBreak `true` if you want to also treat any
* whitespace character as a break symbol
* (@see `TextAPI.IsWhitespace()` for what symbols are
* considered whitespaces)
* @param quotesBreak `true` if you want to also treat any
* quotation mark character as a break symbol
* (@see `TextAPI.IsQuotation()` for what symbols are
* considered quotation marks).
* @return Returns the calling object, to allow for function chaining.
*/
public final function Parser MUntilT
(
out Text result,
optional Text.Character characterBreak,
optional bool whitespacesBreak,
optional bool quotesBreak
)
{
local array<Text.Character> rawResult;
if (!Ok()) return self;
MUntilRaw(rawResult, characterBreak, whitespacesBreak, quotesBreak);
result = _().text.FromRaw(rawResult);
return self;
}
/**
* Parses a string as either "simple" or "quoted".
* Not being able to read any symbols is not considered a failure.
*
* Reading empty string (either to lack of further data or
* instantly encountering a break symbol) is not considered a failure.
*
* Quoted string starts with quotation mark and ends either
* at the corresponding closing (un-escaped) mark
* or when `Parser`'s input has been fully consumed.
* If string started with a quotation mark, this method will act exactly
* like `MStringLiteralRaw()`.
*
* @param result If parsing is successful - string's contents will be
* recorded here; if parsing has failed - value is undefined.
* Any passed value is discarded.
* @return Returns the calling object, to allow for function chaining.
*/
public final function Parser MStringRaw(out array<Text.Character> result)
{
if (!Ok()) return self;
if (_().text.IsQuotationMark(GetCharacter()))
{
MStringLiteralRaw(result);
}
else
{
MUntilRaw(result,, true, true);
}
return self;
}
/**
* Parses a string as either "simple" or "quoted".
* Not being able to read any symbols is not considered a failure.
*
* Reading empty string (either to lack of further data or
* instantly encountering a break symbol) is not considered a failure.
*
* Quoted string starts with quotation mark and ends either
* at the corresponding closing (un-escaped) mark
* or when `Parser`'s input has been fully consumed.
* If string started with a quotation mark, this method will act exactly
* like `MStringLiteral()`.
*
* @param result If parsing is successful - string's contents will be
* recorded here; if parsing has failed - value is undefined.
* Any passed value is discarded.
* @return Returns the calling object, to allow for function chaining.
*/
public final function Parser MString(out string result)
{
local array<Text.Character> rawResult;
if (!Ok()) return self;
MStringRaw(rawResult);
result = _().text.RawToString(rawResult, STRING_Plain);
return self;
}
/**
* Parses a string as either "simple" or "quoted".
* Not being able to read any symbols is not considered a failure.
*
* Reading empty string (either to lack of further data or
* instantly encountering a break symbol) is not considered a failure.
*
* Quoted string starts with quotation mark and ends either
* at the corresponding closing (un-escaped) mark
* or when `Parser`'s input has been fully consumed.
* If string started with a quotation mark, this method will act exactly
* like `MStringLiteralT()`.
*
* @param result If parsing is successful - string's contents will be
* recorded here; if parsing has failed - value is undefined.
* Any passed value is discarded.
* @return Returns the calling object, to allow for function chaining.
*/
public final function Parser MStringT(out Text result)
{
local array<Text.Character> rawResult;
if (!Ok()) return self;
MStringRaw(rawResult);
result = _().text.FromRaw(rawResult);
return self;
}
/**
* Matches a non-empty sequence of whitespace symbols.
*
* Cannot fail (not being able to read any input is not considered a failure).
*
* @param result If parsing was successful - whitespaces' Unicode code points
* will be recorded in this array, otherwise - undefined.
* Any passed value is discarded.
* @return Returns the calling object, to allow for function chaining.
*/
public final function Parser MWhitespacesRaw(out array<Text.Character> result)
{
local Text.Character nextCharacter;
local TextAPI api;
if (!Ok()) return self;
api = _().text;
result.length = 0;
while (!HasFinished())
{
nextCharacter = GetCharacter();
if (!api.IsWhitespace(nextCharacter)) break;
result[result.length] = nextCharacter;
ShiftPointer();
}
return self;
}
/**
* Matches a non-empty sequence of whitespace symbols.
*
* Cannot fail (not being able to read any input is not considered a failure).
*
* @param result If parsing was successful - whitespaces will be
* recorded here, otherwise - undefined.
* Any passed value is discarded.
* @return Returns the calling object, to allow for function chaining.
*/
public final function Parser MWhitespaces(out string result)
{
local array<Text.Character> rawResult;
if (!Ok()) return self;
MWhitespacesRaw(rawResult);
result = _().text.RawToString(rawResult, STRING_Plain);
return self;
}
/**
* Matches a non-empty sequence of whitespace symbols.
*
* Cannot fail (not being able to read any input is not considered a failure).
*
* @param result If parsing was successful - whitespaces will be
* recorded here, otherwise - undefined.
* Any passed value is discarded.
* @return Returns the calling object, to allow for function chaining.
*/
public final function Parser MWhitespacesT(out Text result)
{
local array<Text.Character> rawResult;
if (!Ok()) return self;
MWhitespacesRaw(rawResult);
result = _().text.FromRaw(rawResult);
return self;
}
/**
* Parses next code point as itself.
*
* Can only fail if caller `Parser` has already exhausted all available data.
*
* @param result If parsing was successful - next Unicode code point,
* otherwise - value is undefined.
* Any passed value is discarded.
* @return Returns the calling object, to allow for function chaining.
*/
public final function Parser MCharacter(out Text.Character result)
{
if (!Ok()) return self;
if (HasFinished()) return Fail();
result = GetCharacter();
ShiftPointer();
return self;
}
/**
* Parses next code point as as byte.
* Can fail if caller `Parser` has already exhausted all available data or
* next Unicode code point cannot fit into the `byte` value range.
*
* @param result If parsing was successful - next Unicode code point as
* a byte, otherwise - value is undefined.
* Any passed value is discarded.
* @return Returns the calling object, to allow for function chaining.
*/
public final function Parser MByte(out byte result)
{
local Text.Character character;
if (!Ok()) return self;
if (!MCharacter(character).Ok())
{
return Fail();
}
if (character.codePoint < 0 || character.codePoint > BYTE_MAX)
{
return Fail();
}
result = character.codePoint;
return self;
}
/**
* Tries to parse a sign: either "+" or "-".
*
* @param result Value of `ParsedSign` will be recorded here,
* depending on what sign was encountered.
* `SIGN_Missing` value is only possible if we allow sign to be missing.
* @param allowMissingSign By default `false` means that parsing will fail
* if next character is neither "+" or "-";
* `true` means that parsing will not fail even if there is not sign, -
* method will then consume in input and will return `SIGN_Missing`
* as a result.
* @return Returns the calling object, to allow for function chaining.
*/
public final function Parser MSign
(
out ParsedSign result,
optional bool allowMissingSign
)
{
local ParserState checkpoint;
if (!Ok()) return self;
// Read sign
checkpoint = GetCurrentState();
if (Match("-").Ok())
{
result = SIGN_Minus;
}
else if (RestoreState(checkpoint).Match("+").Ok())
{
result = SIGN_Plus;
}
else if (allowMissingSign)
{
result = SIGN_Missing;
RestoreState(checkpoint);
}
return self;
}
/**
* Tries to parse a number prefix that determines a base system for denoting
* integer numbers:
* 1. `0x` means hexadecimal;
* 2. `0b` means binary;
* 3. `0o` means octal;
* 4. otherwise we use decimal system.
*
* This parsing method cannot fail.
*
* Parser consumes appropriate prefix; nothing if decimal system is determined.
*
* @return Returns the calling object, to allow for function chaining.
*/
public final function Parser MBase(out int base)
{
local ParserState checkpoint;
if (!Ok()) return self;
checkpoint = GetCurrentState();
if (Match("0x").Ok())
{
base = 16;
}
else if (RestoreState(checkpoint).Match("0b").Ok())
{
base = 2;
}
else if (RestoreState(checkpoint).Match("0o").Ok())
{
base = 8;
}
else
{
RestoreState(checkpoint);
base = 10;
}
return self;
}
/**
* Parses signed integer either in a directly given base (`base`) or in an
* auto-determined one (based on prefix, @see `MBase()`).
*
* Integers are expected in form: (+/-)(0x/0b/0o)<sequence of digits>.
* Examples: 78, 0o34, -2, 0b0101001, -0x78aC.
*
* @param result If parsing is successful - parsed value will be
* recorded here; if parsing fails - value is undetermined.
* Any passed value is discarded.
* @param base base in which function must attempt to parse a number;
* Default value (`0`) means function must auto-determine base,
* based on the prefix, otherwise must be between 2 and 36.
* @return Returns the calling object, to allow for function chaining.
*/
public final function Parser MInteger(out int result, optional int base)
{
local ParsedSign integerSign;
if (!Ok()) return self;
MSign(integerSign, true);
if (base == 0)
{
MBase(base);
}
MUnsignedInteger(result, base);
if (integerSign == SIGN_Minus)
{
result *= -1;
}
return self;
}
// Internal function for parsing fractional part (including the dot ".")
// of the text representation for floating point number (decimal system only).
// Cannot fail, returns `0.0` if it couldn't parse anything.
protected final function Parser MFractionalPart(out float result)
{
local ParserState checkpoint;
local int fractionalInt;
local int digitsRead;
if (!Ok()) return self;
result = 0.0;
checkpoint = GetCurrentState();
if (!Match(".").Ok())
{
RestoreState(checkpoint);
return self;
}
checkpoint = GetCurrentState();
if (!MUnsignedInteger(fractionalInt,,, digitsRead).Ok())
{
fractionalInt = 0.0;
RestoreState(checkpoint);
return self;
}
result = float(fractionalInt) * (0.1 ** digitsRead);
return self;
}
// Internal function for parsing exponent part (including the symbol "e")
// of the text representation for floating point number (decimal system only).
// Can only fail if symbol "e" / "E" is present, but there is no valid
// integer right after it (whitespace symbols in-between are forbidden).
// Returns `0.0` if there was not exponent to parse.
protected final function Parser MExponentPart(out int result)
{
local ParserState checkpoint;
local ParsedSign exponendSign;
if (!Ok()) return self;
// Is there even an exponential part?
checkpoint = GetCurrentState();
if (!Match("e", true).Ok())
{
RestoreState(checkpoint);
return self;
}
// If yes - parse it:
result = 0.0;
MSign(exponendSign, true).MUnsignedInteger(result, 10);
if (exponendSign == SIGN_Minus)
{
result *= -1;
}
return self;
}
// Internal function for parsing optional suffix of the text representation
// for floating point number ("f" or "F").
// Cannot fail. Can only consume one Unicode code point,
// when it is either "f" or "F".
protected final function Parser MFloatSuffix()
{
local ParserState checkpoint;
if (!Ok()) return self;
checkpoint = GetCurrentState();
if (!Match("f", true).Ok())
{
RestoreState(checkpoint);
}
return self;
}
/**
* Parses signed floating point number in JSON form + optional "f" / "F"
* suffix at the end.
*
* @param result If parsing is successful - parsed value will be
* recorded here; if parsing fails - value is undetermined.
* Any passed value is discarded.
* @return Returns the calling object, to allow for function chaining.
*/
public final function Parser MNumber(out float result)
{
local ParsedSign sign;
local int integerPart, exponentPart;
local float fractionalPart;
if (!Ok()) return self;
self.MSign(sign, true)
.MUnsignedInteger(integerPart, 10)
.MFractionalPart(fractionalPart)
.MExponentPart(exponentPart)
.MFloatSuffix();
if (!Ok())
{
return self;
}
result = float(integerPart) + fractionalPart;
result *= 10.0 ** exponentPart;
if (sign == SIGN_Minus)
{
result *= -1;
}
return self;
}
defaultproperties
{
// Start with no initializations done
version = 0
BYTE_MAX = 255
CODEPOINT_BACKSLASH = 92 // \
CODEPOINT_USMALL = 117 // u
CODEPOINT_ULARGE = 85 // U
escapeCharactersMap(0)=(from=110,to=10) // \n
escapeCharactersMap(1)=(from=114,to=13) // \r
escapeCharactersMap(2)=(from=116,to=9) // \t
escapeCharactersMap(3)=(from=98,to=8) // \b
escapeCharactersMap(4)=(from=102,to=12) // \f
escapeCharactersMap(5)=(from=118,to=11) // \v
}