AcediaCore/sources/Text/TextAPI.uc

/**
 *      API that provides functions for working with characters and for creating
 *  `Text` and `Parser` instances.
 *      Copyright 2020 - 2021 Anton Tarasenko
 *------------------------------------------------------------------------------
 * This file is part of Acedia.
 *
 * Acedia is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, version 3 of the License, or
 * (at your option) any later version.
 *
 * Acedia is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with Acedia.  If not, see <https://www.gnu.org/licenses/>.
 */
class TextAPI extends AcediaObject
    dependson(Text);

/**
 *  Creates a new `Formatting` structure that defines a default,
 *  "empty formatting" (no specifics about how to format text)
 *
 *  Cannot fail.
 *
 *  @return Empty formatting object.
 */
public final function Text.Formatting EmptyFormatting()
{
    local Text.Formatting emptyFormatting;
    return emptyFormatting;
}

/**
 *  Creates a new `Formatting` structure that defines a specified color.
 *
 *  Cannot fail.
 *
 *  @param  color   Color that formatting must have.
 *  @return Formatting object that describes text colored with `color`.
 */
public final function Text.Formatting FormattingFromColor(Color color)
{
    local Text.Formatting coloredFormatting;
    coloredFormatting.isColored = true;
    coloredFormatting.color = color;
    return coloredFormatting;
}

/**
 *  Checks if two `Text.Formatting` structures are the same.
 *
 *  To be considered the same both formatting must be either colorless or
 *  both have the same color.
 *
 *  @param  formatting1 Formatting to compare.
 *  @param  formatting2 Formatting to compare.
 *  @return `true` if formattings are equal and `false` otherwise.
 */
public final function bool IsFormattingEqual(
    Text.Formatting formatting1,
    Text.Formatting formatting2)
{
    if (formatting1.isColored != formatting2.isColored) {
        return false;
    }
    if (!formatting1.isColored) {
        return true;
    }
    return _.color.AreEqualWithAlpha(formatting1.color, formatting2.color);
}

/**
 *  Checks if given character is lower case.
 *
 *      Result of this method describes whether character is
 *  precisely "lower case", instead of just "not being upper of title case".
 *      That is, this method will return `true` for characters that aren't
 *  considered either lowercase or uppercase (like "#", "@" or "&").
 *
 *  @param  character   Character to test for lower case.
 *  @return `true` if given character is lower case.
 */
public final function bool IsLower(Text.Character character)
{
    //  Small Latin letters
    if (character.codePoint >= 97 && character.codePoint <= 122) {
        return true;
    }
    //  Small Cyrillic (Russian) letters
    if (character.codePoint >= 1072 && character.codePoint <= 1103) {
        return true;
    }
    //  `ё`
    if (character.codePoint == 1105) {
        return true;
    }
    return false;
}

/**
 *  Checks if given character is upper case.
 *
 *      Result of this method describes whether character is
 *  precisely "upper case", instead of just "not being upper of title case".
 *      That is, this method will return `true` for characters that aren't
 *  considered either uppercase or uppercase (like "#", "@" or "&").
 *
 *  @param  character   Character to test for upper case.
 *  @return `true` if given character is upper case.
 */
public final function bool IsUpper(Text.Character character)
{
    //  Capital Latin letters
    if (character.codePoint >= 65 && character.codePoint <= 90) {
        return true;
    }
    //  Capital Cyrillic (Russian) letters
    if (character.codePoint >= 1040 && character.codePoint <= 1071) {
        return true;
    }
    //  `Ё`
    if (character.codePoint == 1025) {
        return true;
    }
    return false;
}

/**
 *  Checks if given character corresponds to a digit.
 *
 *  @param  codePoint   Unicode code point to check for being a digit.
 *  @return `true` if given Unicode code point is a digit, `false` otherwise.
 */
public final function bool IsDigit(Text.Character character)
{
    if (character.codePoint >= 48 && character.codePoint <= 57) {
        return true;
    }
    return false;
}

/**
 *  Checks if given character is an ASCII character.
 *
 *  @param  character   Character to check for being a digit.
 *  @return `true` if given character is a digit, `false` otherwise.
 */
public final function bool IsASCII(Text.Character character)
{
    if (character.codePoint >= 0 && character.codePoint <= 127) {
        return true;
    }
    return false;
}

/**
 *  Checks if given character represents some kind of white space
 *  symbol (like space ~ 0x0020, tab ~ 0x0009, etc.),
 *  according to either Unicode or a more classic space symbol definition,
 *  that includes:
 *      whitespace, tab, line feed, line tabulation, form feed, carriage return.
 *
 *  @param  character   Character to check for being a whitespace.
 *  @return `true` if given character is a whitespace, `false` otherwise.
 */
public final function bool IsWhitespace(Text.Character character)
{
    switch (character.codePoint)
    {
    //  Classic whitespaces
    case 0x0020:    //  Whitespace
    case 0x0009:    //  Tab
    case 0x000A:    //  Line feed
    case 0x000B:    //  Line tabulation
    case 0x000C:    //  Form feed
    case 0x000D:    //  Carriage return
    //  Unicode Characters in the 'Separator, Space' Category
    case 0x00A0:    //  No-break space
    case 0x1680:    //  Ogham space mark
    case 0x2000:    //  En quad
    case 0x2001:    //  Em quad
    case 0x2002:    //  En space
    case 0x2003:    //  Em space
    case 0x2004:    //  Three-per-em space
    case 0x2005:    //  Four-per-em space
    case 0x2006:    //  Six-per-em space
    case 0x2007:    //  Figure space
    case 0x2008:    //  Punctuation space
    case 0x2009:    //  Thin space
    case 0x200A:    //  Hair space
    case 0x202F:    //  Narrow no-break space
    case 0x205F:    //  Medium mathematical space
    case 0x3000:    //  Ideographic space
        return true;
    default:
        return false;
    }
    return false;
}

/**
 *  Checks if passed character is one of the following quotation mark symbols:
 *      `"`, `'`, `\``.
 *
 *  @param  character   Character to check for being a quotation mark.
 *  @return `true` if given Unicode code point denotes one of the recognized
 *      quote symbols, `false` otherwise.
 */
public final function bool IsQuotationMark(Text.Character character)
{
    if (character.codePoint == 0x0022) return true;
    if (character.codePoint == 0x0027) return true;
    if (character.codePoint == 0x0060) return true;
    return false;
}

/**
 *  Extracts a character at position `position` from a given plain `string`.
 *
 *  For extracting multiple character or character from colored/formatted
 *  `string` we advice to convert `string` into `Text` instead.
 *
 *  @param  source      `string`, from which to extract the character.
 *  @param  position    Position of the character to extract, starts from `0`.
 *  @return Returns character at given position in the given source.
 *      If specified position is invalid (`< 0` or `>= Len(source)`),
 *      returns invalid character.
 */
public final function Text.Character GetCharacter(
    string          source,
    optional int    position)
{
    local Text.Character result;
    if (position < 0)               return GetInvalidCharacter();
    if (position >= Len(source))    return GetInvalidCharacter();

    result.codePoint = Asc(Mid(source, position, 1));
    return result;
}

/**
 *  Creates a `string` that consists only of a given character.
 *
 *  @param  character   Character that will be converted into a string.
 *  @return `string` that consists only of a given character,
 *      if given character is valid. Empty `string` otherwise.
 */
public final function string CharacterToString(Text.Character character)
{
    if (!IsValidCharacter(character)) {
        return "";
    }
    return Chr(character.codePoint);
}

/**
 *  Converts given character into a number it represents in some base
 *  (from 2 to 36), i.e.:
 *  1 -> 1
 *  7 -> 7
 *  a -> 10
 *  e -> 14
 *  z -> 35
 *
 *  @param  character   Character to convert into integer.
 *      Case does not matter, i.e. "a" and "A" will be treated the same.
 *  @param  base        Base to use for conversion.
 *      Valid values are from `2` to `36` (inclusive);
 *      If invalid value was specified (such as default `0`),
 *      the base of `36` is assumed, since that would allow for all possible
 *      characters to be converted.
 *  @return Positive integer value that is denoted by
 *      given character in given base;
 *      `-1` if given character does not represent anything in the given base.
 */
public final function int CharacterToInt(
    Text.Character  character,
    optional int    base
)
{
    local int number;
    if (base < 2 || base > 36) {
        base = 36;
    }
    character = ToLower(character);
    //  digits
    if (character.codePoint >= 0x0030 && character.codePoint <= 0x0039) {
        number = character.codePoint - 0x0030;
    }
    //  a-z
    else if (character.codePoint >= 0x0061 && character.codePoint <= 0x007a) {
        number = character.codePoint - 0x0061 + 10;
    }
    else {
        return -1;
    }
    if (number >= base) {
        return -1;
    }
    return number;
}

/**
 *  Checks if given `character` can be represented by a given `codePoint` in
 *  Unicode standard.
 *
 *  @param  character   Character to check.
 *  @param  codePoint   Code point to check.
 *  @return `true` if given character can be represented by a given code point
 *      and `false` otherwise.
 */
public final function bool IsCodePoint(Text.Character character, int codePoint)
{
    return (character.codePoint == codePoint);
}

/**
 *  Extracts formatting of the given character.
 *
 *  @param  character   Character to get formatting of.
 *  @return Returns formatting of the given character.
 *      Always returns 'null' (not colored) formatting for invalid characters.
 */
public final function Text.Formatting GetCharacterFormatting(
    Text.Character character)
{
    local Text.Formatting emptyFormatting;
    if(IsValidCharacter(character)) {
        return character.formatting;
    }
    return emptyFormatting;
}

/**
 *  Changes formatting of a given character.
 *
 *  @param  character       Character to change formatting of.
 *  @param  newFormatting   New formatting to set.
 *  @return Same character as `character`, but with new formatting.
 *      Invalid characters are not altered.
 */
public final function Text.Character SetFormatting(
    Text.Character  character,
    Text.Formatting newFormatting)
{
    if(!IsValidCharacter(character)) {
        return character;
    }
    character.formatting = newFormatting;
    return character;
}

/**
 *  Returns color of a given `Character` with set default color.
 *
 *  `Character`s can have their color set to "default", meaning they would use
 *  whatever considered default color in the context.
 *
 *  @param  character       `Character`, which color to return.
 *  @param  defaultColor    Color, considered default.
 *  @return Supposed color of a given `Character`, assuming default color is
 *      `defaultColor`.
 */
public final function Color GetCharacterColor(
    Text.Character  character,
    optional Color  defaultColor)
{
    if (character.formatting.isColored) {
        return character.formatting.color;
    }
    return defaultColor;
}

/**
 *  Returns character that is considered invalid.
 *
 *  It is not unique, there can be different invalid characters.
 *
 *  @return Invalid character instance.
 */
public final function Text.Character GetInvalidCharacter()
{
    local Text.Character result;
    result.codePoint = -1;
    return result;
}

/**
 *  Checks if given character is invalid.
 *
 *  @param  character   Character to check.
 *  @return `true` if passed character is valid and `false` otherwise.
 */
public final function bool IsValidCharacter(Text.Character character)
{
    return (character.codePoint >= 0);
}

/**
 *  Checks if given characters are equal, with or without accounting
 *  for their case.
 *
 *      This method supports comparison both sensitive and not sensitive to
 *  the case and difference in formatting (color of the characters).
 *      By default comparison is case-sensitive, but ignores
 *  formatting information.
 *
 *      Invalid characters are always considered equal to each other
 *  (precise value of their `codePoint` or `formatting` is irrelevant).
 *
 *  @param  codePoint1          Character to compare.
 *  @param  codePoint2          Character to compare.
 *  @param  caseSensitivity     Defines whether comparison should be
 *      case-sensitive. By default it is.
 *  @param  formatSensitivity   Defines whether comparison should be
 *      sensitive for color information. By default it is not.
 *  @return `true` if given characters are considered equal,
 *      `false` otherwise.
 */
public final function bool AreEqual(
    Text.Character                  character1,
    Text.Character                  character2,
    optional Text.CaseSensitivity   caseSensitivity,
    optional Text.FormatSensitivity formatSensitivity
)
{
    //  These handle checks with invalid characters
    if (character1.codePoint < 0 && character2.codePoint < 0)   return true;
    if (character1.codePoint < 0 || character2.codePoint < 0)   return false;

    if (caseSensitivity == SCASE_INSENSITIVE)
    {
        character1 = ToLower(character1);
        character2 = ToLower(character2);
    }
    if (    formatSensitivity == SFORM_SENSITIVE
        &&  !IsFormattingEqual(character1.formatting, character2.formatting))
    {
        return false;
    }
    return (character1.codePoint == character2.codePoint);
}

/**
 *  Converts Unicode code point into it's lower case folding,
 *  as defined by Unicode standard.
 *
 *  @param  codePoint   Code point to convert into lower case.
 *  @return Lower case folding of the given code point. If Unicode standard does
 *  not define any lower case folding (like "&" or "!") for given code point, -
 *  function returns given code point unchanged.
 */
public final function Text.Character ToLower(Text.Character character)
{
    local int newCodePoint;
    newCodePoint =
        class'UnicodeData'.static.ToLowerCodePoint(character.codePoint);
    if (newCodePoint >= 0) {
        character.codePoint = newCodePoint;
    }
    return character;
}

/**
 *  Converts Unicode code point into it's upper case version,
 *  as defined by Unicode standard.
 *
 *  @param  codePoint   Code point to convert into upper case.
 *  @return Upper case version of the given code point. If Unicode standard does
 *  not define any upper case version (like "&" or "!") for given code point, -
 *  function returns given code point unchanged.
 */
public final function Text.Character ToUpper(Text.Character character)
{
    local int newCodePoint;
    newCodePoint =
        class'UnicodeData'.static.ToUpperCodePoint(character.codePoint);
    if (newCodePoint >= 0) {
        character.codePoint = newCodePoint;
    }
    return character;
}

/**
 *      Prepares an array of parts from a given single `Text`.
 *      First character is treated as a separator with which the rest of
 *  the given `Text` is split into parts:
 *      ~ "/ab/c/d" => ["ab", "c", "d"]
 *      ~ "zWordzomgzz" => ["Word", "omg", "", ""]
 *
 *  This method is useful to easily prepare array of words for `Parser`'s
 *  methods.
 *
 *  @param  source  `Text` that contains separator with parts to
 *      separate and extract.
 *  @return Separated words. Empty array if passed `source` was empty,
 *      otherwise contains at least one element.
 */
public final function array<MutableText> Parts(Text source)
{
    local array<MutableText> result;
    if (source == none)             return result;
    if (source.GetLength() <= 0)    return result;
    result = source.SplitByCharacter(source.GetCharacter(0));
    //  Since we use first character as a separator:
    //      1. `result` is guaranteed to be non-empty;
    //      2. We can just drop first (empty) substring.
    result[0].FreeSelf();
    result.Remove(0, 1);
    return result;
}

/**
 *  Creates a new, empty `MutableText`.
 *
 *  This is a shortcut, same result can be achieved by
 *  `_.memory.Allocate(class'MutableText')`.
 *
 *  @return new instance of `Text` with empty contents.
 */
public final function MutableText Empty()
{
    return MutableText(_.memory.Allocate(class'MutableText'));
}

/**
 *  Creates a `Text` that will contain a given plain `string`.
 *
 *  To create `MutableText` instead use `FromStringM()` method.
 *
 *  @param  source  Plain `string` that will be copied into returned `Text`.
 *  @return New instance of `Text` that will contain passed plain `string`.
 */
public final function Text FromString(string source)
{
    return class'Text'.static.ConstFromPlainString(source);
}

/**
 *  Creates a `MutableText` that will contain a given plain `string`.
 *
 *  To create immutable `Text` instead use `FromString()` method.
 *
 *  @param  source  Plain `string` that will be copied into
 *      returned `MutableText`.
 *  @return New instance of `MutableText` that will contain passed
 *      plain `string`.
 */
public final function MutableText FromStringM(string source)
{
    local MutableText newText;
    newText = MutableText(_.memory.Allocate(class'MutableText'));
    return newText.AppendPlainString(source);
}

/**
 *  Creates a `Text` that will contain a given colored `string`.
 *
 *  To create `MutableText` instead use `FromColoredStringM()` method.
 *
 *  @param  source  Colored `string` that will be copied into returned `Text`.
 *  @return New instance of `Text` that will contain passed colored `string`.
 */
public final function Text FromColoredString(string source)
{
    return class'Text'.static.ConstFromColoredString(source);
}

/**
 *  Creates a `MutableText` that will contain a given colored `string`.
 *
 *  To create immutable `Text` instead use `FromColoredString()` method.
 *
 *  @param  source  Colored `string` that will be copied into
 *      returned `MutableText`.
 *  @return New instance of `MutableText` that will contain passed
 *      colored `string`.
 */
public final function MutableText FromColoredStringM(string source)
{
    local MutableText newText;
    newText = MutableText(_.memory.Allocate(class'MutableText'));
    return newText.AppendColoredString(source);
}

/**
 *  Creates a `Text` that will contain a given formatted `string`.
 *
 *  To create `MutableText` instead use `FromFormattedStringM()` method.
 *
 *  @param  source  Formatted `string` that will be copied into returned `Text`.
 *  @return New instance of `Text` that will contain passed formatted `string`.
 */
public final function Text FromFormattedString(string source)
{
    return class'Text'.static.ConstFromFormattedString(source);
}

/**
 *  Creates a `MutableText` that will contain a given formatted `string`.
 *
 *  To create immutable `Text` instead use `FromFormattedString()` method.
 *
 *  @param  source  Formatted `string` that will be copied into
 *      returned `MutableText`.
 *  @return New instance of `MutableText` that will contain passed
 *      formatted `string`.
 */
public final function MutableText FromFormattedStringM(string source)
{
    local MutableText newText;
    newText = MutableText(_.memory.Allocate(class'MutableText'));
    return newText.AppendFormattedString(source);
}

/**
 *  Method for creating a new, uninitialized parser object.
 *
 *  This is a shortcut, same result can be achieved by
 *  `_.memory.Allocate(class'Parser')`.
 *
 *  @return New, uninitialized `Parser`.
 */
public final function Parser NewParser()
{
    return Parser(_.memory.Allocate(class'Parser'));
}

/**
 *  Method for creating a new parser, initialized with contents of given `Text`.
 *
 *  @param  source  Returned `Parser` will be setup to parse the contents of
 *      the passed `Text`.
 *      If `none` value is passed, - parser won't be initialized.
 *  @return Guaranteed to be not `none` and contain a valid `Parser`.
 *      If passed argument also is not `none`, - guaranteed to be
 *      initialized with it's content.
 */
public final function Parser Parse(Text source)
{
    local Parser parser;
    parser = NewParser();
    parser.Initialize(source);
    return parser;
}

/**
 *  Method for creating a new parser, initialized with a given plain `string`.
 *
 *  @param  source  Returned `Parser` will be setup to parse this
 *      plain `string`.
 *  @return Guaranteed to be not `none` and contain a valid `Parser`,
 *      initialized with contents of a `source` (treated as a plain `string`).
 */
public final function Parser ParseString(string source)
{
    local Parser parser;
    parser = NewParser();
    parser.InitializeS(source);
    return parser;
}

//TODO: remove this
public final function int GetHash(string source)
{
    return 0;
}

defaultproperties
{
}