You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
676 lines
21 KiB
676 lines
21 KiB
/** |
|
* API that provides functions for working with characters and for creating |
|
* `Text` and `Parser` instances. |
|
* Copyright 2020 - 2021 Anton Tarasenko |
|
*------------------------------------------------------------------------------ |
|
* This file is part of Acedia. |
|
* |
|
* Acedia is free software: you can redistribute it and/or modify |
|
* it under the terms of the GNU General Public License as published by |
|
* the Free Software Foundation, version 3 of the License, or |
|
* (at your option) any later version. |
|
* |
|
* Acedia is distributed in the hope that it will be useful, |
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
* GNU General Public License for more details. |
|
* |
|
* You should have received a copy of the GNU General Public License |
|
* along with Acedia. If not, see <https://www.gnu.org/licenses/>. |
|
*/ |
|
class TextAPI extends AcediaObject |
|
dependson(Text); |
|
|
|
/** |
|
* Creates a new `Formatting` structure that defines a default, |
|
* "empty formatting" (no specifics about how to format text) |
|
* |
|
* Cannot fail. |
|
* |
|
* @return Empty formatting object. |
|
*/ |
|
public final function Text.Formatting EmptyFormatting() |
|
{ |
|
local Text.Formatting emptyFormatting; |
|
return emptyFormatting; |
|
} |
|
|
|
/** |
|
* Creates a new `Formatting` structure that defines a specified color. |
|
* |
|
* Cannot fail. |
|
* |
|
* @param color Color that formatting must have. |
|
* @return Formatting object that describes text colored with `color`. |
|
*/ |
|
public final function Text.Formatting FormattingFromColor(Color color) |
|
{ |
|
local Text.Formatting coloredFormatting; |
|
coloredFormatting.isColored = true; |
|
coloredFormatting.color = color; |
|
return coloredFormatting; |
|
} |
|
|
|
/** |
|
* Checks if two `Text.Formatting` structures are the same. |
|
* |
|
* To be considered the same both formatting must be either colorless or |
|
* both have the same color. |
|
* |
|
* @param formatting1 Formatting to compare. |
|
* @param formatting2 Formatting to compare. |
|
* @return `true` if formattings are equal and `false` otherwise. |
|
*/ |
|
public final function bool IsFormattingEqual( |
|
Text.Formatting formatting1, |
|
Text.Formatting formatting2) |
|
{ |
|
if (formatting1.isColored != formatting2.isColored) { |
|
return false; |
|
} |
|
if (!formatting1.isColored) { |
|
return true; |
|
} |
|
return _.color.AreEqualWithAlpha(formatting1.color, formatting2.color); |
|
} |
|
|
|
/** |
|
* Checks if given character is lower case. |
|
* |
|
* Result of this method describes whether character is |
|
* precisely "lower case", instead of just "not being upper of title case". |
|
* That is, this method will return `true` for characters that aren't |
|
* considered either lowercase or uppercase (like "#", "@" or "&"). |
|
* |
|
* @param character Character to test for lower case. |
|
* @return `true` if given character is lower case. |
|
*/ |
|
public final function bool IsLower(Text.Character character) |
|
{ |
|
// Small Latin letters |
|
if (character.codePoint >= 97 && character.codePoint <= 122) { |
|
return true; |
|
} |
|
// Small Cyrillic (Russian) letters |
|
if (character.codePoint >= 1072 && character.codePoint <= 1103) { |
|
return true; |
|
} |
|
// `ё` |
|
if (character.codePoint == 1105) { |
|
return true; |
|
} |
|
return false; |
|
} |
|
|
|
/** |
|
* Checks if given character is upper case. |
|
* |
|
* Result of this method describes whether character is |
|
* precisely "upper case", instead of just "not being upper of title case". |
|
* That is, this method will return `true` for characters that aren't |
|
* considered either uppercase or uppercase (like "#", "@" or "&"). |
|
* |
|
* @param character Character to test for upper case. |
|
* @return `true` if given character is upper case. |
|
*/ |
|
public final function bool IsUpper(Text.Character character) |
|
{ |
|
// Capital Latin letters |
|
if (character.codePoint >= 65 && character.codePoint <= 90) { |
|
return true; |
|
} |
|
// Capital Cyrillic (Russian) letters |
|
if (character.codePoint >= 1040 && character.codePoint <= 1071) { |
|
return true; |
|
} |
|
// `Ё` |
|
if (character.codePoint == 1025) { |
|
return true; |
|
} |
|
return false; |
|
} |
|
|
|
/** |
|
* Checks if given character corresponds to a digit. |
|
* |
|
* @param codePoint Unicode code point to check for being a digit. |
|
* @return `true` if given Unicode code point is a digit, `false` otherwise. |
|
*/ |
|
public final function bool IsDigit(Text.Character character) |
|
{ |
|
if (character.codePoint >= 48 && character.codePoint <= 57) { |
|
return true; |
|
} |
|
return false; |
|
} |
|
|
|
/** |
|
* Checks if given character is an ASCII character. |
|
* |
|
* @param character Character to check for being a digit. |
|
* @return `true` if given character is a digit, `false` otherwise. |
|
*/ |
|
public final function bool IsASCII(Text.Character character) |
|
{ |
|
if (character.codePoint >= 0 && character.codePoint <= 127) { |
|
return true; |
|
} |
|
return false; |
|
} |
|
|
|
/** |
|
* Checks if given character represents some kind of white space |
|
* symbol (like space ~ 0x0020, tab ~ 0x0009, etc.), |
|
* according to either Unicode or a more classic space symbol definition, |
|
* that includes: |
|
* whitespace, tab, line feed, line tabulation, form feed, carriage return. |
|
* |
|
* @param character Character to check for being a whitespace. |
|
* @return `true` if given character is a whitespace, `false` otherwise. |
|
*/ |
|
public final function bool IsWhitespace(Text.Character character) |
|
{ |
|
switch (character.codePoint) |
|
{ |
|
// Classic whitespaces |
|
case 0x0020: // Whitespace |
|
case 0x0009: // Tab |
|
case 0x000A: // Line feed |
|
case 0x000B: // Line tabulation |
|
case 0x000C: // Form feed |
|
case 0x000D: // Carriage return |
|
// Unicode Characters in the 'Separator, Space' Category |
|
case 0x00A0: // No-break space |
|
case 0x1680: // Ogham space mark |
|
case 0x2000: // En quad |
|
case 0x2001: // Em quad |
|
case 0x2002: // En space |
|
case 0x2003: // Em space |
|
case 0x2004: // Three-per-em space |
|
case 0x2005: // Four-per-em space |
|
case 0x2006: // Six-per-em space |
|
case 0x2007: // Figure space |
|
case 0x2008: // Punctuation space |
|
case 0x2009: // Thin space |
|
case 0x200A: // Hair space |
|
case 0x202F: // Narrow no-break space |
|
case 0x205F: // Medium mathematical space |
|
case 0x3000: // Ideographic space |
|
return true; |
|
default: |
|
return false; |
|
} |
|
return false; |
|
} |
|
|
|
/** |
|
* Checks if passed character is one of the following quotation mark symbols: |
|
* `"`, `'`, `\``. |
|
* |
|
* @param character Character to check for being a quotation mark. |
|
* @return `true` if given Unicode code point denotes one of the recognized |
|
* quote symbols, `false` otherwise. |
|
*/ |
|
public final function bool IsQuotationMark(Text.Character character) |
|
{ |
|
if (character.codePoint == 0x0022) return true; |
|
if (character.codePoint == 0x0027) return true; |
|
if (character.codePoint == 0x0060) return true; |
|
return false; |
|
} |
|
|
|
/** |
|
* Extracts a character at position `position` from a given plain `string`. |
|
* |
|
* For extracting multiple character or character from colored/formatted |
|
* `string` we advice to convert `string` into `Text` instead. |
|
* |
|
* @param source `string`, from which to extract the character. |
|
* @param position Position of the character to extract, starts from `0`. |
|
* @return Returns character at given position in the given source. |
|
* If specified position is invalid (`< 0` or `>= Len(source)`), |
|
* returns invalid character. |
|
*/ |
|
public final function Text.Character GetCharacter( |
|
string source, |
|
optional int position) |
|
{ |
|
local Text.Character result; |
|
if (position < 0) return GetInvalidCharacter(); |
|
if (position >= Len(source)) return GetInvalidCharacter(); |
|
|
|
result.codePoint = Asc(Mid(source, position, 1)); |
|
return result; |
|
} |
|
|
|
/** |
|
* Creates a `string` that consists only of a given character. |
|
* |
|
* @param character Character that will be converted into a string. |
|
* @return `string` that consists only of a given character, |
|
* if given character is valid. Empty `string` otherwise. |
|
*/ |
|
public final function string CharacterToString(Text.Character character) |
|
{ |
|
if (!IsValidCharacter(character)) { |
|
return ""; |
|
} |
|
return Chr(character.codePoint); |
|
} |
|
|
|
/** |
|
* Converts given character into a number it represents in some base |
|
* (from 2 to 36), i.e.: |
|
* 1 -> 1 |
|
* 7 -> 7 |
|
* a -> 10 |
|
* e -> 14 |
|
* z -> 35 |
|
* |
|
* @param character Character to convert into integer. |
|
* Case does not matter, i.e. "a" and "A" will be treated the same. |
|
* @param base Base to use for conversion. |
|
* Valid values are from `2` to `36` (inclusive); |
|
* If invalid value was specified (such as default `0`), |
|
* the base of `36` is assumed, since that would allow for all possible |
|
* characters to be converted. |
|
* @return Positive integer value that is denoted by |
|
* given character in given base; |
|
* `-1` if given character does not represent anything in the given base. |
|
*/ |
|
public final function int CharacterToInt( |
|
Text.Character character, |
|
optional int base |
|
) |
|
{ |
|
local int number; |
|
if (base < 2 || base > 36) { |
|
base = 36; |
|
} |
|
character = ToLower(character); |
|
// digits |
|
if (character.codePoint >= 0x0030 && character.codePoint <= 0x0039) { |
|
number = character.codePoint - 0x0030; |
|
} |
|
// a-z |
|
else if (character.codePoint >= 0x0061 && character.codePoint <= 0x007a) { |
|
number = character.codePoint - 0x0061 + 10; |
|
} |
|
else { |
|
return -1; |
|
} |
|
if (number >= base) { |
|
return -1; |
|
} |
|
return number; |
|
} |
|
|
|
/** |
|
* Checks if given `character` can be represented by a given `codePoint` in |
|
* Unicode standard. |
|
* |
|
* @param character Character to check. |
|
* @param codePoint Code point to check. |
|
* @return `true` if given character can be represented by a given code point |
|
* and `false` otherwise. |
|
*/ |
|
public final function bool IsCodePoint(Text.Character character, int codePoint) |
|
{ |
|
return (character.codePoint == codePoint); |
|
} |
|
|
|
/** |
|
* Extracts formatting of the given character. |
|
* |
|
* @param character Character to get formatting of. |
|
* @return Returns formatting of the given character. |
|
* Always returns 'null' (not colored) formatting for invalid characters. |
|
*/ |
|
public final function Text.Formatting GetCharacterFormatting( |
|
Text.Character character) |
|
{ |
|
local Text.Formatting emptyFormatting; |
|
if(IsValidCharacter(character)) { |
|
return character.formatting; |
|
} |
|
return emptyFormatting; |
|
} |
|
|
|
/** |
|
* Changes formatting of a given character. |
|
* |
|
* @param character Character to change formatting of. |
|
* @param newFormatting New formatting to set. |
|
* @return Same character as `character`, but with new formatting. |
|
* Invalid characters are not altered. |
|
*/ |
|
public final function Text.Character SetFormatting( |
|
Text.Character character, |
|
Text.Formatting newFormatting) |
|
{ |
|
if(!IsValidCharacter(character)) { |
|
return character; |
|
} |
|
character.formatting = newFormatting; |
|
return character; |
|
} |
|
|
|
/** |
|
* Returns color of a given `Character` with set default color. |
|
* |
|
* `Character`s can have their color set to "default", meaning they would use |
|
* whatever considered default color in the context. |
|
* |
|
* @param character `Character`, which color to return. |
|
* @param defaultColor Color, considered default. |
|
* @return Supposed color of a given `Character`, assuming default color is |
|
* `defaultColor`. |
|
*/ |
|
public final function Color GetCharacterColor( |
|
Text.Character character, |
|
optional Color defaultColor) |
|
{ |
|
if (character.formatting.isColored) { |
|
return character.formatting.color; |
|
} |
|
return defaultColor; |
|
} |
|
|
|
/** |
|
* Returns character that is considered invalid. |
|
* |
|
* It is not unique, there can be different invalid characters. |
|
* |
|
* @return Invalid character instance. |
|
*/ |
|
public final function Text.Character GetInvalidCharacter() |
|
{ |
|
local Text.Character result; |
|
result.codePoint = -1; |
|
return result; |
|
} |
|
|
|
/** |
|
* Checks if given character is invalid. |
|
* |
|
* @param character Character to check. |
|
* @return `true` if passed character is valid and `false` otherwise. |
|
*/ |
|
public final function bool IsValidCharacter(Text.Character character) |
|
{ |
|
return (character.codePoint >= 0); |
|
} |
|
|
|
/** |
|
* Checks if given characters are equal, with or without accounting |
|
* for their case. |
|
* |
|
* This method supports comparison both sensitive and not sensitive to |
|
* the case and difference in formatting (color of the characters). |
|
* By default comparison is case-sensitive, but ignores |
|
* formatting information. |
|
* |
|
* Invalid characters are always considered equal to each other |
|
* (precise value of their `codePoint` or `formatting` is irrelevant). |
|
* |
|
* @param codePoint1 Character to compare. |
|
* @param codePoint2 Character to compare. |
|
* @param caseSensitivity Defines whether comparison should be |
|
* case-sensitive. By default it is. |
|
* @param formatSensitivity Defines whether comparison should be |
|
* sensitive for color information. By default it is not. |
|
* @return `true` if given characters are considered equal, |
|
* `false` otherwise. |
|
*/ |
|
public final function bool AreEqual( |
|
Text.Character character1, |
|
Text.Character character2, |
|
optional Text.CaseSensitivity caseSensitivity, |
|
optional Text.FormatSensitivity formatSensitivity |
|
) |
|
{ |
|
// These handle checks with invalid characters |
|
if (character1.codePoint < 0 && character2.codePoint < 0) return true; |
|
if (character1.codePoint < 0 || character2.codePoint < 0) return false; |
|
|
|
if (caseSensitivity == SCASE_INSENSITIVE) |
|
{ |
|
character1 = ToLower(character1); |
|
character2 = ToLower(character2); |
|
} |
|
if ( formatSensitivity == SFORM_SENSITIVE |
|
&& !IsFormattingEqual(character1.formatting, character2.formatting)) |
|
{ |
|
return false; |
|
} |
|
return (character1.codePoint == character2.codePoint); |
|
} |
|
|
|
/** |
|
* Converts Unicode code point into it's lower case folding, |
|
* as defined by Unicode standard. |
|
* |
|
* @param codePoint Code point to convert into lower case. |
|
* @return Lower case folding of the given code point. If Unicode standard does |
|
* not define any lower case folding (like "&" or "!") for given code point, - |
|
* function returns given code point unchanged. |
|
*/ |
|
public final function Text.Character ToLower(Text.Character character) |
|
{ |
|
local int newCodePoint; |
|
newCodePoint = |
|
class'UnicodeData'.static.ToLowerCodePoint(character.codePoint); |
|
if (newCodePoint >= 0) { |
|
character.codePoint = newCodePoint; |
|
} |
|
return character; |
|
} |
|
|
|
/** |
|
* Converts Unicode code point into it's upper case version, |
|
* as defined by Unicode standard. |
|
* |
|
* @param codePoint Code point to convert into upper case. |
|
* @return Upper case version of the given code point. If Unicode standard does |
|
* not define any upper case version (like "&" or "!") for given code point, - |
|
* function returns given code point unchanged. |
|
*/ |
|
public final function Text.Character ToUpper(Text.Character character) |
|
{ |
|
local int newCodePoint; |
|
newCodePoint = |
|
class'UnicodeData'.static.ToUpperCodePoint(character.codePoint); |
|
if (newCodePoint >= 0) { |
|
character.codePoint = newCodePoint; |
|
} |
|
return character; |
|
} |
|
|
|
/** |
|
* Prepares an array of parts from a given single `Text`. |
|
* First character is treated as a separator with which the rest of |
|
* the given `Text` is split into parts: |
|
* ~ "/ab/c/d" => ["ab", "c", "d"] |
|
* ~ "zWordzomgzz" => ["Word", "omg", "", ""] |
|
* |
|
* This method is useful to easily prepare array of words for `Parser`'s |
|
* methods. |
|
* |
|
* @param source `Text` that contains separator with parts to |
|
* separate and extract. |
|
* @return Separated words. Empty array if passed `source` was empty, |
|
* otherwise contains at least one element. |
|
*/ |
|
public final function array<MutableText> Parts(Text source) |
|
{ |
|
local array<MutableText> result; |
|
if (source == none) return result; |
|
if (source.GetLength() <= 0) return result; |
|
result = source.SplitByCharacter(source.GetCharacter(0)); |
|
// Since we use first character as a separator: |
|
// 1. `result` is guaranteed to be non-empty; |
|
// 2. We can just drop first (empty) substring. |
|
result[0].FreeSelf(); |
|
result.Remove(0, 1); |
|
return result; |
|
} |
|
|
|
/** |
|
* Creates a new, empty `MutableText`. |
|
* |
|
* This is a shortcut, same result can be achieved by |
|
* `_.memory.Allocate(class'MutableText')`. |
|
* |
|
* @return new instance of `Text` with empty contents. |
|
*/ |
|
public final function MutableText Empty() |
|
{ |
|
return MutableText(_.memory.Allocate(class'MutableText')); |
|
} |
|
|
|
/** |
|
* Creates a `Text` that will contain a given plain `string`. |
|
* |
|
* To create `MutableText` instead use `FromStringM()` method. |
|
* |
|
* @param source Plain `string` that will be copied into returned `Text`. |
|
* @return New instance of `Text` that will contain passed plain `string`. |
|
*/ |
|
public final function Text FromString(string source) |
|
{ |
|
return class'Text'.static.ConstFromPlainString(source); |
|
} |
|
|
|
/** |
|
* Creates a `MutableText` that will contain a given plain `string`. |
|
* |
|
* To create immutable `Text` instead use `FromString()` method. |
|
* |
|
* @param source Plain `string` that will be copied into |
|
* returned `MutableText`. |
|
* @return New instance of `MutableText` that will contain passed |
|
* plain `string`. |
|
*/ |
|
public final function MutableText FromStringM(string source) |
|
{ |
|
local MutableText newText; |
|
newText = MutableText(_.memory.Allocate(class'MutableText')); |
|
return newText.AppendPlainString(source); |
|
} |
|
|
|
/** |
|
* Creates a `Text` that will contain a given colored `string`. |
|
* |
|
* To create `MutableText` instead use `FromColoredStringM()` method. |
|
* |
|
* @param source Colored `string` that will be copied into returned `Text`. |
|
* @return New instance of `Text` that will contain passed colored `string`. |
|
*/ |
|
public final function Text FromColoredString(string source) |
|
{ |
|
return class'Text'.static.ConstFromColoredString(source); |
|
} |
|
|
|
/** |
|
* Creates a `MutableText` that will contain a given colored `string`. |
|
* |
|
* To create immutable `Text` instead use `FromColoredString()` method. |
|
* |
|
* @param source Colored `string` that will be copied into |
|
* returned `MutableText`. |
|
* @return New instance of `MutableText` that will contain passed |
|
* colored `string`. |
|
*/ |
|
public final function MutableText FromColoredStringM(string source) |
|
{ |
|
local MutableText newText; |
|
newText = MutableText(_.memory.Allocate(class'MutableText')); |
|
return newText.AppendColoredString(source); |
|
} |
|
|
|
/** |
|
* Creates a `Text` that will contain a given formatted `string`. |
|
* |
|
* To create `MutableText` instead use `FromFormattedStringM()` method. |
|
* |
|
* @param source Formatted `string` that will be copied into returned `Text`. |
|
* @return New instance of `Text` that will contain passed formatted `string`. |
|
*/ |
|
public final function Text FromFormattedString(string source) |
|
{ |
|
return class'Text'.static.ConstFromFormattedString(source); |
|
} |
|
|
|
/** |
|
* Creates a `MutableText` that will contain a given formatted `string`. |
|
* |
|
* To create immutable `Text` instead use `FromFormattedString()` method. |
|
* |
|
* @param source Formatted `string` that will be copied into |
|
* returned `MutableText`. |
|
* @return New instance of `MutableText` that will contain passed |
|
* formatted `string`. |
|
*/ |
|
public final function MutableText FromFormattedStringM(string source) |
|
{ |
|
local MutableText newText; |
|
newText = MutableText(_.memory.Allocate(class'MutableText')); |
|
return newText.AppendFormattedString(source); |
|
} |
|
|
|
/** |
|
* Method for creating a new, uninitialized parser object. |
|
* |
|
* This is a shortcut, same result can be achieved by |
|
* `_.memory.Allocate(class'Parser')`. |
|
* |
|
* @return New, uninitialized `Parser`. |
|
*/ |
|
public final function Parser NewParser() |
|
{ |
|
return Parser(_.memory.Allocate(class'Parser')); |
|
} |
|
|
|
/** |
|
* Method for creating a new parser, initialized with contents of given `Text`. |
|
* |
|
* @param source Returned `Parser` will be setup to parse the contents of |
|
* the passed `Text`. |
|
* If `none` value is passed, - parser won't be initialized. |
|
* @return Guaranteed to be not `none` and contain a valid `Parser`. |
|
* If passed argument also is not `none`, - guaranteed to be |
|
* initialized with it's content. |
|
*/ |
|
public final function Parser Parse(Text source) |
|
{ |
|
local Parser parser; |
|
parser = NewParser(); |
|
parser.Initialize(source); |
|
return parser; |
|
} |
|
|
|
/** |
|
* Method for creating a new parser, initialized with a given plain `string`. |
|
* |
|
* @param source Returned `Parser` will be setup to parse this |
|
* plain `string`. |
|
* @return Guaranteed to be not `none` and contain a valid `Parser`, |
|
* initialized with contents of a `source` (treated as a plain `string`). |
|
*/ |
|
public final function Parser ParseString(string source) |
|
{ |
|
local Parser parser; |
|
parser = NewParser(); |
|
parser.InitializeS(source); |
|
return parser; |
|
} |
|
|
|
//TODO: remove this |
|
public final function int GetHash(string source) |
|
{ |
|
return 0; |
|
} |
|
|
|
defaultproperties |
|
{ |
|
} |