From dbd81e04ef2748420637ab5ee249469cec976dad Mon Sep 17 00:00:00 2001 From: Anton Tarasenko Date: Sun, 18 Jul 2021 17:10:18 +0700 Subject: [PATCH] Initial Avarice feature commit --- sources/Avarice/Avarice.uc | 80 ++++++ sources/Avarice/AvariceAPI.uc | 54 ++++ sources/Avarice/AvariceClient.uc | 88 ++++++ sources/Avarice/AvariceMessage.uc | 93 +++++++ sources/Avarice/AvariceTCPLink.uc | 170 ++++++++++++ .../Avarice/Tests/TEST_UTF8EncoderDecoder.uc | Bin 0 -> 16540 bytes sources/Avarice/Utf8Decoder.uc | 260 ++++++++++++++++++ sources/Avarice/Utf8Encoder.uc | 121 ++++++++ sources/Global.uc | 2 + sources/Manifest.uc | 2 + 10 files changed, 870 insertions(+) create mode 100644 sources/Avarice/Avarice.uc create mode 100644 sources/Avarice/AvariceAPI.uc create mode 100644 sources/Avarice/AvariceClient.uc create mode 100644 sources/Avarice/AvariceMessage.uc create mode 100644 sources/Avarice/AvariceTCPLink.uc create mode 100644 sources/Avarice/Tests/TEST_UTF8EncoderDecoder.uc create mode 100644 sources/Avarice/Utf8Decoder.uc create mode 100644 sources/Avarice/Utf8Encoder.uc diff --git a/sources/Avarice/Avarice.uc b/sources/Avarice/Avarice.uc new file mode 100644 index 0000000..1209199 --- /dev/null +++ b/sources/Avarice/Avarice.uc @@ -0,0 +1,80 @@ +/** + * + * Copyright 2021 Anton Tarasenko + *------------------------------------------------------------------------------ + * This file is part of Acedia. + * + * Acedia is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, version 3 of the License, or + * (at your option) any later version. + * + * Acedia is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Acedia. If not, see . + */ +class Avarice extends Feature + config(AcediaAvarice); + +struct AvariceLink +{ + var string name; + var string host; +}; + +var private config array link; + +var private LoggerAPI.Definition errorBadAddress; + +protected function OnEnabled() +{ + local int i; + local string host; + local int port; + local AvariceTCPLink nextTCPLink; + for (i = 0; i < link.length; i += 1) + { + if (!ParseAddress(link[i].host, host, port)) { + _.logger.Auto(errorBadAddress).Arg(_.text.FromString(link[i].name)); + } + nextTCPLink = AvariceTCPLink(_.memory.Allocate(class'AvariceTCPLink')); + nextTCPLink.Connect(link[i].name, host, port); + } +} + +protected function OnDisabled() +{ + local LevelInfo level; + local AvariceTCPLink nextTCPLink; + level = _.unreal.GetLevel(); + foreach level.DynamicActors(class'AvariceTCPLink', nextTCPLink) { + nextTCPLink.Destroy(); + } +} + +private final function bool ParseAddress( + string address, + out string host, + out int port) +{ + local bool success; + local Parser parser; + parser = _.text.ParseString(address); + parser.Skip() + .MUntilS(host, _.text.GetCharacter(":")) + .MatchS(":") + .MUnsignedInteger(port) + .Skip(); + success = parser.Ok() && parser.GetRemainingLength() == 0; + parser.FreeSelf(); + return success; +} + +defaultproperties +{ + errorBadAddress = (l=LOG_Error,m="Cannot parse address \"%1\"") +} \ No newline at end of file diff --git a/sources/Avarice/AvariceAPI.uc b/sources/Avarice/AvariceAPI.uc new file mode 100644 index 0000000..1f06036 --- /dev/null +++ b/sources/Avarice/AvariceAPI.uc @@ -0,0 +1,54 @@ +/** + * Copyright 2020 - 2021 Anton Tarasenko + *------------------------------------------------------------------------------ + * This file is part of Acedia. + * + * Acedia is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, version 3 of the License, or + * (at your option) any later version. + * + * Acedia is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Acedia. If not, see . + */ +class AvariceAPI extends AcediaObject; + +public final function AvariceMessage MessageFromText(Text message) +{ + local Parser parser; + local AvariceMessage result; + local AssociativeArray parsedMessage; + if (message == none) return none; + parser = _.text.Parse(message); + parsedMessage = _.json.ParseObjectWith(parser); + parser.FreeSelf(); + if (!HasNecessaryMessageKeys(parsedMessage)) + { + _.memory.Free(parsedMessage); + return none; + } + result = AvariceMessage(_.memory.Allocate(class'AvariceMessage')); + result.SetID(parsedMessage.GetText(P("i"))); + result.SetGroup(parsedMessage.GetText(P("g"))); + result.data = parsedMessage.TakeItem(P("p")); + _.memory.Free(parsedMessage); + return result; +} + +private final function bool HasNecessaryMessageKeys(AssociativeArray message) +{ + if (message == none) return false; + if (!message.HasKey(P("i"))) return false; + if (!message.HasKey(P("g"))) return false; + + return true; +} + +defaultproperties +{ +} \ No newline at end of file diff --git a/sources/Avarice/AvariceClient.uc b/sources/Avarice/AvariceClient.uc new file mode 100644 index 0000000..4b37733 --- /dev/null +++ b/sources/Avarice/AvariceClient.uc @@ -0,0 +1,88 @@ +class AvariceClient extends AcediaObject; + +enum AvariceClientState +{ + ACS_Waiting, + ACS_ReadingID, + ACS_ReadingLength, + ACS_ReadingPayload, + ACS_Invalid +}; + +var private int currentID; +var private int currentMessageLength; +var private array currentPayload; + +var private AvariceClientState currentState; +var private int bytesLeftToRead; +var private byte buffer[255]; +var private array longBuffer; +var private int pendingBytes; + +public final function PushByte(byte nextByte) +{ + if (nextByte == 0) + { + if (bytesLeftToRead > 0) + { + // ACK for short message (with id) + } + currentState = ACS_Waiting; + ResetBuffer(); + return; + } + else if (currentState == ACS_Invalid) + { + // ACK of invalid message's end + return; + } + else if (currentState == ACS_Waiting) + { + currentID = nextByte; + currentID = currentID << 8; + currentState = ACS_ReadingID; + } + else if (currentState == ACS_ReadingID) + { + currentID += nextByte; + currentState = ACS_ReadingLength; + bytesLeftToRead = 2; + } + else if (currentState == ACS_ReadingLength) + { + bytesLeftToRead -= 1; + if (bytesLeftToRead > 0) + { + currentMessageLength = nextByte; + currentMessageLength = currentMessageLength << 8; + } + else + { + currentMessageLength += nextByte; + currentState = ACS_ReadingPayload; + bytesLeftToRead = currentMessageLength; + } + } + else if (currentState == ACS_ReadingPayload) + { + currentPayload[currentPayload.length] = nextByte; + // Decode payload into `AvariceMessage` + // Send messages via Acedia's signals + bytesLeftToRead -= 1; + if (bytesLeftToRead == 0) + { + currentState = ACS_Waiting; + // ACK into buffer + } + } +} + +private final function ResetBuffer() +{ + pendingBytes = 0; + longBuffer.length = 0; +} + +defaultproperties +{ +} \ No newline at end of file diff --git a/sources/Avarice/AvariceMessage.uc b/sources/Avarice/AvariceMessage.uc new file mode 100644 index 0000000..42551d2 --- /dev/null +++ b/sources/Avarice/AvariceMessage.uc @@ -0,0 +1,93 @@ +class AvariceMessage extends AcediaObject; + +var private Text messageID; +var private Text messageGroup; + +var public AcediaObject data; + +var private AssociativeArray messageTemplate; + +public static function StaticConstructor() +{ + if (StaticConstructorGuard()) return; + super.StaticConstructor(); + + default.messageTemplate = __().collections.EmptyAssociativeArray(); + ResetTemplate(default.messageTemplate); +} + +protected function Finalizer() +{ + __().memory.Free(messageID); + __().memory.Free(messageGroup); + __().memory.Free(data); + messageID = none; + messageGroup = none; + data = none; +} + +private static final function ResetTemplate(AssociativeArray template) +{ + if (template == none) { + return; + } + template.SetItem(P("i"), none); + template.SetItem(P("g"), none); + template.SetItem(P("p"), none); +} + +public final function SetID(Text id) +{ + _.memory.Free(messageID); + messageID = none; + if (id != none) { + messageID = id.Copy(); + } +} + +public final function Text GetID() +{ + if (messageID != none) { + return messageID.Copy(); + } + return none; +} + +public final function SetGroup(Text group) +{ + _.memory.Free(messageGroup); + messageGroup = none; + if (group != none) { + messageGroup = group.Copy(); + } +} + +public final function Text GetGroup() +{ + if (messageGroup != none) { + return messageGroup.Copy(); + } + return none; +} + +public final function MutableText ToText() +{ + local MutableText result; + local AssociativeArray template; + if (messageID == none) return none; + if (messageGroup == none) return none; + + template = default.messageTemplate; + template.SetItem(P("i"), messageID); + template.SetItem(P("g"), messageGroup); + if (data != none) { + template.SetItem(P("p"), data); + } + result = _.json.Print(template); + ResetTemplate(template); + return result; +} + +defaultproperties +{ +} \ No newline at end of file diff --git a/sources/Avarice/AvariceTCPLink.uc b/sources/Avarice/AvariceTCPLink.uc new file mode 100644 index 0000000..335a1c8 --- /dev/null +++ b/sources/Avarice/AvariceTCPLink.uc @@ -0,0 +1,170 @@ +class AvariceTcpLink extends TcpLink + dependson(LoggerAPI); + +var private Global _; + +var private string linkName; +var private string linkHost; +var private int linkPort; +var private IpAddr remoteAddress; +var private int ttt; + +var private bool didWorkLastTick; + +var private array buffer; + +var private Utf8Encoder encoder; +var private Utf8Decoder decoder; + +var private LoggerAPI.Definition infoSuccess; +var private LoggerAPI.Definition fatalBadPort; +var private LoggerAPI.Definition fatalCannotBindPort; +var private LoggerAPI.Definition fatalCannotResolveHost; +var private LoggerAPI.Definition fatalCannotConnect; + +public final function bool Connect(string name, string host, int port) +{ + local InternetLink.IpAddr ip; + local int usedPort; + // Apparently `TcpLink` ignores default values for these variables, + // so we set them here + linkMode = MODE_Binary; + receiveMode = RMODE_Manual; + _ = class'Global'.static.GetInstance(); + encoder = Utf8Encoder(_.memory.Allocate(class'Utf8Encoder')); + decoder = Utf8Decoder(_.memory.Allocate(class'Utf8Decoder')); + linkName = name; + linkHost = host; + linkPort = port; + if (port <= 0) + { + _.logger.Auto(fatalBadPort) + .ArgInt(port) + .Arg(_.text.FromString(linkName)); + return false; + } + if (BindPort(, true) <= 0) + { + _.logger.Auto(fatalCannotBindPort) + .ArgInt(port) + .Arg(_.text.FromString(name)); + return false; + } + StringToIpAddr(host, remoteAddress); + remoteAddress.port = port; + if (remoteAddress.addr == 0) { + Resolve(host); + } + else { + OpenAddress(); + } + return true; +} + +event Resolved(IpAddr resolvedAddress) +{ + remoteAddress.addr = resolvedAddress.addr; + OpenAddress(); +} + +private final function bool OpenAddress() +{ + if (!OpenNoSteam(remoteAddress)) { + _.logger.Auto(fatalCannotConnect).Arg(_.text.FromString(linkName)); + } + _.logger.Auto(infoSuccess).Arg(_.text.FromString(linkName)); +} + +event ResolveFailed() +{ + _.logger.Auto(fatalCannotResolveHost).Arg(_.text.FromString(linkHost)); + // !Shut down! +} + +event Tick(float delta) +{ + local array toSend; + local AvariceMessage nextAMessage; + local MutableText nextMessage; + local int i, j, dataRead, totalRead, iter; + local byte data[255]; + if (didWorkLastTick) + { + didWorkLastTick = false; + return; + } + if (!IsDataPending()) { + return; + } + while (true) { + dataRead = ReadBinary(255, data); + for (i = 0; i < dataRead; i += 1) { + ttt += 1; + decoder.PushByte(data[i]); + } + if (dataRead <= 0) { + break; + } + } + if (ttt >= 4095) { + toSend = encoder.Encode(_.text.FromString("FLUSH")); + data[0] = toSend[0]; + data[1] = toSend[1]; + data[2] = toSend[2]; + data[3] = toSend[3]; + data[4] = toSend[4]; + data[5] = 0; + SendBinary(6, data); + } + if (dataRead > 0) { + didWorkLastTick = true; + } + // Obtain! + nextMessage = decoder.PopText(); + while (nextMessage != none) + { + Log("SIZE:" @ nextMessage.GetLength() @ ttt); + StopWatch(false); + nextAMessage = _.avarice.MessageFromText(nextMessage); + nextMessage.FreeSelf(); + nextMessage = nextAMessage.ToText(); + toSend = encoder.Encode(nextMessage); + toSend[toSend.length] = 0; + j = 0; + for (i = 0; i < toSend.length; i += 1) + { + data[j] = toSend[i]; + j += 1; + if (j >= 255) { + j = 0; + SendBinary(255, data); + } + } + if (j > 0) { + SendBinary(j, data); + } + nextMessage.FreeSelf(); + nextMessage = decoder.PopText(); + StopWatch(true); + } +} + +event Opened() +{ + //Log("[TestTcp] Accepted!"); + LOG("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"); +} + +event Closed() +{ + //Log("[TestTcp] Closed!"); +} + +defaultproperties +{ + infoSuccess = (l=LOG_Info,m="Successfully started Avarice link \"%1\"") + fatalBadPort = (l=LOG_Fatal,m="Bad port \"%1\" specified for Avarice link \"%2\"") + fatalCannotBindPort = (l=LOG_Fatal,m="Cannot bind port for Avarice link \"%1\"") + fatalCannotResolveHost = (l=LOG_Fatal,m="Cannot resolve host \"%1\" for Avarice link \"%2\"") + fatalCannotConnect = (l=LOG_Fatal,m="Connection for Avarice link \"%1\" was rejected") +} \ No newline at end of file diff --git a/sources/Avarice/Tests/TEST_UTF8EncoderDecoder.uc b/sources/Avarice/Tests/TEST_UTF8EncoderDecoder.uc new file mode 100644 index 0000000000000000000000000000000000000000..950d0daa77987d328e49cd106741cee99777b141 GIT binary patch literal 16540 zcmeHOTWDOz8J@FRxp9J9gK-}WK3U05EI0CMb&c#gwq(hQ4T@|mxhAz8Yjs&$Sdo;~ z%JQXl3N7?Xq3MfLNSnqgen|;U`_MokZJ|);gCEimm)Z(7A*Q4+g+7(i^!w)D(VW>m zd(p1e7+MZz_spD`fByOJ|ID2Ke5zX17jSJ*kE$5{W4MOZq{`rbR;BQpL1`Yp^LS3< zd0HiP-GqKOi{DXo95pG`t@`nP9Q`KnpGJKQZD&vlwmFEJV4G&VT>$h+-EK@>Lb+F6 zK+m+ALd!`Ncd#Rdy!tTC99py0S#=H=#8jI~;BLiJFGk6r#SETDG_0f^V@91vt(omE z_3&N>N8*1Db0qy!_@4$vV%hSXo|)xUucp@o-X;T7vyg)C4UClLDq+rpGAU4PQA3?n zdr)@?bqn|(M;&QN2_Sq*1Svm;x0Db9j+%Rs3@s9ME!U&cR=QIYS11Tfe3D*|Ua#~YVRJmvne>>{a}44& z1=*OxNK<%6tVL4Onm!p#AyVFA@tit{G5RpRjMJ+Q0G~mP6R{8}9nj?=v>nDpP9H%p z;!61^9^l&Gon zzs9;)dJL;3wNvAIMgPi-Xme)KUaW1npBLG*K3(uJ{b}RvyO??gScJoA0#D9pEaI5y zByRHf61dq5EDRo;R}&x_j_xP`Z3kwvGs^*kOCKOIE)bqJsYXbHaFh0rvTCt4HMCol z67d=AD>S86CV^udP;$pHP|xHKPBAzp}m6KN_fOSBmx z`9_w4_HPGh%D9-g(87$O#CD8YsUg)!UC^TS1HREeYPe|URZS1hr!t!%TC+O&4%hmi z&WYR;E7t}c2Ce3yE!6x*9rIZ&3hJe$U$LAdmE|(&zo_M!F^0r9uJ07T6sFJoftCD`W3QG`)L`$8N*CYe&;sLGD(ag((Kf z6G^3wW^NF)Dr?KRL}4s$s}B!lIoAqpXa|+M*PM49fSfP2)k|b6u&ovEI-tk3!MUg& z#oo+vgR3IF$Y0i05~xRETD%s#Xs;QUpDt$g-anP4SU_=4f;F>pU=3NYgLmgF`uliGWu)PIUc<}OKmxM zOK$uV#Fw-Whd!z!Os?-Sb7Z^lxiIpaKpVz%CKs1Xm8*BmQJC`(GZVuw6KHhoiHS^l z(6SNpWbVdHlj}IL8deO;un5c0f~&dA8S|qhQo%&eHnKICnEq`Q_7AsXLzqnlYgWu7 z8KId`XyM(wcXj<4ip3W{+khB#SPQx~;7Nw&TU*hmt%2NPgXI~pfkOGd?k-Lig6{@r)*FZC)vs^TYuTk8s6OUq1c;(>BjzrsCfu9vEGJ(W=c_k7OhtRK3X;@a{9+G0vHMh~$F z9V_PXr?j`{D&`!lXR-7+Q?CA9L9+uq%Ql>vk(X0or^I=kFu`y4^5*^ZVK>Z8bL6xY7x@69c$%z8@ z+)TKH6w{1w+A(u2WG%%`2CXGm23Kz_3-hx)x>%???VuE#wiM&!>^^(vH)E=ATb7rw+^Ut=z|;OqvygRF)I=#Mx@RL1C@R z*L223^sYfow_4@KMTs)vpVf@rigmBtm@GJU(8@gQW3Se}=Fi;t0Pgn(ub_# zzIu%|5|SGk@@x;i4lOIsBlDSOPeuC8*;hs%-0iYE3i+{+oRJFdg)y68P7xOS=f^X- zS$w&%VPPtnIfus%rRl#q-WzSJ4sIdNu2o)H?9>}Ezv9?};O;DCf;*LA*Hq5_M6llo zWQY5;Vg2M3`<(Xirk$JPmkWuJHb*&zf5}&w(-31h*Gqd|7&3R#xR-VbXR1xEv@eUX zDOU#B_Q7WRqW#F;^NM$ zT3lRcVYrkdD}tdtYi;^4nvy&?c=lY*!locW{Kg{#i&yBBy2;XvHk2b>f{V+|xN{=k z?y(n>(%i94B+~?YFJmP1UKLAwahHQzT>B2QYssB1_I@h6-^@MCD~?4m^LFJS>|AF# zqo2fRr*(`Iw7Oy|%O5G<(}dY2p-?{Fxp}y&gYNo_l}+PegItv}Ld*mC{R3$_3i_4f zY28yNT!}E&lo0qO#WW;yHL)gF9)f2$!YGm}Po<)mFc@*1WRPz*XoG{NMT$%E+KfGd zv5$f_%#UWA??r<9@8o$Ux*blBO4f9&_jhrTn2RTlxHri!zGN-a)j58dLT~u2wpv53!r-Ugsn5Oi?Q8!$wHMe2_1;9< zL!gA5$D-AyK2eM0n+&eSaZL9+LB^g*=-XuWD-A7WyCl@(WhZkh60m12adn zBNxPi5k&YmQsl1YtXCHuzukE^RzKta)_d987I}QrPj7eL`HuHDZ=b*a=Jz8lw>y_x zQS$;?{SnU{k*?dFAATM8BXxg3{i~>3hx-QB*?D@V1FP;`I7!lt6PO8bv`1}I+tm~5 z1@$Dpr>3s$!WRI~z;B#{zc{U~!Vm4iy3;9exEa1Ej_=i4)P8jhFdy*&`R*IIXV*>m ze*K2%R__+-4|><@yPL1$>G1lG>wbdwA4LX%%e}~!=#%*Wd2|%N_q|_4wnxuye#`p| zTKvQRJ?e(h=hVh`fc@JjoxuHFZ?E6y9gglp-S<#_Wo!B-=DR%Qe}Z1Oyam)@j(2|O zU0&-MMmPGA<|T~s0dD^P`R40O_al#DwDrK|iKV~dsR#Fh_jh3QGM@UqFX49p_giTH zOVn<~?Js@eT||!t{{~8p-dFMN*ZyPvyIW@(0B!j+d3yI`bWhzoeqa4W^JVl0HI~~@ zOYUFy&Z1__zv_Prwe=hCd)L-{t|>r%$@|9U<+>XUt>EO+x+F&45Z!{2|APCD_gdt$ zX!9#P_xoSQ?>&FUzwEue=}$}G#PT+`sX6;P-c^*%o~* z@}j>Vr86isq4fx!$NZBh{Tr6%UnsTvU)$M?H)%Y-x7M=+%-8E)!Dv4MwvXY?;Q4*8 zG2;8smAeZ|A4GnP+OIThUjH%vG1t2v)%Psj^XI(x8YWQM()=cRK$e$}<8DNaIazx3 Ezb943E&u=k literal 0 HcmV?d00001 diff --git a/sources/Avarice/Utf8Decoder.uc b/sources/Avarice/Utf8Decoder.uc new file mode 100644 index 0000000..89f677a --- /dev/null +++ b/sources/Avarice/Utf8Decoder.uc @@ -0,0 +1,260 @@ +/** + * Class for decoding UTF8 byte stream into Acedia's `MutableText` value. + * It is made to work with incoming, and possibly incomplete, streams of + * bytes: instead of consuming the whole utf8 text, it is made to consume it + * byte-by-byte and store `MutableText`s that it parsed from the stream + * (assumes that separate `MutableText`s are separated by `0` byte). + * This implementation should correctly convert any valid UTF8, but it is + * not guaranteed to reject any invalid UTF8. In particular, it accepts + * overlong code point encodings (except overlong encoding of zero). + * It, however, does check whether every byte has a correct bit prefix and + * does not attempt to repair input data if it finds invalid one. + * See [wiki page](https://en.wikipedia.org/wiki/UTF-8) for details. + * Copyright 2021 Anton Tarasenko + *------------------------------------------------------------------------------ + * This file is part of Acedia. + * + * Acedia is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, version 3 of the License, or + * (at your option) any later version. + * + * Acedia is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Acedia. If not, see . + */ +class Utf8Decoder extends AcediaObject; + +/** + * `Utf8Decoder` consumes byte by byte with `PushByte()` method and it's + * algorithm is simple: + * 1. If it encounters a byte that encodes a singular code point by + * itself (starts with `0` bit) - it is added as a codepoint; + * 2. If it encounters byte which indicates that next code point is + * composed out of several bytes (starts with 110, 1110 or 11110) - + * remembers that it has to read several "inner" bytes belonging to + * the same code point and starts to expect them instead; + * 3. If it ever encounters a byte with unexpected (and thus invalid) + * bit prefix - enters a failed state; + * 4. If it ever encounters a `0` byte: + * * If it was not in a failed state - records `MutableText` + * accumulated so far; + * * Clears failed state. + */ + +var private bool failedState; + +// Variables for building a multi-byte code point +var private int nextCodePoint; +var private int innerBytesLeft; + +// `MutableText` we are building right now +var private MutableText nextText; +// `MutableText`s we have already built +var private array outputQueue; + +// These masks (`maskDropN`) allow to turn into zero first `N` bits in +// the byte with `&` operator. +var private byte maskDrop1, maskDrop2, maskDrop3, maskDrop4, maskDrop5; +// These masks (`maskTakeN`) allow to turn into zero all but first `N` bits +// in the byte with `&` operator. +// `maskTakeN == ~maskDropN`. +var private byte maskTake1, maskTake2, maskTake3, maskTake4, maskTake5; + +protected function Constructor() +{ + nextText = _.text.Empty(); +} + +protected function Finalizer() +{ + _.memory.Free(nextText); + _.memory.FreeMany(outputQueue); + nextText = none; + failedState = false; + outputQueue.length = 0; + innerBytesLeft = 0; + nextCodePoint = 0; +} + +/** + * Checks whether data in the `MutableText` that caller `Utf8Decoder` is + * currently filling was detected to be invalid. + * + * This state can be reset by pushing `0` byte into caller `Utf8Decoder`. + * See `PushByte()` for more info. + * + * @return `true` iff caller `Utf8Decoder` is not in a failed state. + */ +public final function bool Failed() +{ + return failedState; +} + +/** + * Checks whether caller `Utf8Decoder` has any data put in + * the `MutableText` it is currently building. + * Result is guaranteed to be `false` after `self.PushByte(0)` call, since + * it starts a brand new `MutableText`. + */ +public final function bool HasUnfinishedData() +{ + if (innerBytesLeft > 0) return true; + if (nextText.GetLength() > 0) return true; + return false; +} + +/** + * Returns next `MutableText` that was successfully decoded by + * the caller `Utf8Decoder`, removing it from the output queue. + * + * @return Next `MutableText` in the caller `Utf8Decoder`'s output queue. + * `none` iff output queue is empty. `MutableText`s are returned in order + * they were decoded. + */ +public final function MutableText PopText() +{ + local MutableText result; + if (outputQueue.length <= 0) { + return none; + } + result = outputQueue[0]; + outputQueue.Remove(0, 1); + return result; +} + +/** + * Adds next `byte` from the byte stream that is supposed to encode UTF8 text. + * To finish building `MutableText` pass `0` byte into this method, which will + * `MutableText` built so far into an "output queue" (accessible with + * `PopText()`) and start building a new one. + * + * This method expects `byte`s, in order, from a sequence that has correct + * UTF8 encoding. If method detects incorrect UTF8 sequence - it will be put + * into a "failed state", discarding `MutableText` it was currently building, + * along with any further input (except `0` byte). + * Pushing `0` byte will restore `Utf8Decoder` from a failed state and it + * will start building a new `MutableText`. + * + * @param nextByte next byte from byte stream that is supposed to encode + * UTF8 text. `0` will make caller `Utf8Decoder` start building new + * `MutableText`. + * @return `true` iff caller `Utf8Decoder` was not in a failed state and + * operation was successful. + */ +public final function bool PushByte(byte nextByte) +{ + if (nextByte == 0) return QueueCurrentText(); + if (failedState) return false; + if (innerBytesLeft > 0) return PushInnerByte(nextByte); + + // Form of 0xxxxxxx means 1 byte per code point + if ((nextByte & maskTake1) == 0) + { + AppendCodePoint(nextByte); + return true; + } + // Form of 110xxxxx means 2 bytes per code point + if ((nextByte & maskTake3) == maskTake2) // maskTake2 == 1 1 0 0 0 0 0 0 + { + nextCodePoint = nextByte & maskDrop3; + innerBytesLeft = 1; + return true; + } + // Form of 1110xxxx means 3 bytes per code point + if ((nextByte & maskTake4) == maskTake3) // maskTake3 == 1 1 1 0 0 0 0 0 + { + nextCodePoint = nextByte & maskDrop4; + innerBytesLeft = 2; + return true; + } + // Form of 11110xxx means 4 bytes per code point + if ((nextByte & maskTake5) == maskTake4) // maskTake4 == 1 1 1 1 0 0 0 0 + { + nextCodePoint = nextByte & maskDrop5; + innerBytesLeft = 3; + return true; + } + // `nextByte` must have has one of the above forms + // (or 10xxxxxx that is handled in `PushInnerByte()`) + failedState = true; + return false; +} + +// This method is responsible for pushing "inner" bytes: bytes that come +// after the first one when code point is encoded with multiple bytes. +// All of them are expected to have 10xxxxxx prefix. +// Assumes `innerBytesLeft > 0` and `failedState == false` +// to avoid needless checks. +private final function bool PushInnerByte(byte nextByte) +{ + // Fail if `nextByte` does not have an expected form: 10xxxxxx + if ((nextByte & maskTake2) != maskTake1) + { + failedState = true; + return false; + } + // Since inner bytes have the form of 10xxxxxx, they all carry only 6 bits + // that actually encode code point, so to make space for those bits we must + // shift previously added code points by `6` + nextCodePoint = (nextCodePoint << 6) + (nextByte & maskDrop2); + innerBytesLeft -= 1; + if (innerBytesLeft <= 0) + { + // We forbid overlong encoding of `0` + // (as does the Unicode standard) + if (nextCodePoint == 0) + { + failedState = true; + return false; + } + AppendCodePoint(nextCodePoint); + } + return true; +} + +private final function AppendCodePoint(int codePoint) +{ + local Text.Character nextCharacter; + nextCharacter.codePoint = codePoint; + nextText.AppendCharacter(nextCharacter); +} + +// Return `true` if `MutableText` was added to the queue +// (there were no encoding errors) +private final function bool QueueCurrentText() +{ + local bool result; + // If we still do not have all bytes for the character we were building - + // then passed UTF8 was invalid + failedState = failedState || innerBytesLeft > 0; + result = !failedState; + if (failedState) { + _.memory.Free(nextText); + } + else { + outputQueue[outputQueue.length] = nextText; + } + failedState = false; + innerBytesLeft = 0; + nextText = _.text.Empty(); + return result; +} + +defaultproperties +{ + maskDrop1 = 127 // 0 1 1 1 1 1 1 1 + maskDrop2 = 63 // 0 0 1 1 1 1 1 1 + maskDrop3 = 31 // 0 0 0 1 1 1 1 1 + maskDrop4 = 15 // 0 0 0 0 1 1 1 1 + maskDrop5 = 7 // 0 0 0 0 0 1 1 1 + maskTake1 = 128 // 1 0 0 0 0 0 0 0 + maskTake2 = 192 // 1 1 0 0 0 0 0 0 + maskTake3 = 224 // 1 1 1 0 0 0 0 0 + maskTake4 = 240 // 1 1 1 1 0 0 0 0 + maskTake5 = 248 // 1 1 1 1 1 0 0 0 +} \ No newline at end of file diff --git a/sources/Avarice/Utf8Encoder.uc b/sources/Avarice/Utf8Encoder.uc new file mode 100644 index 0000000..e66321d --- /dev/null +++ b/sources/Avarice/Utf8Encoder.uc @@ -0,0 +1,121 @@ +/** + * Class for encoding Acedia's `MutableText` value into UTF8 byte + * representation. + * See [wiki page](https://en.wikipedia.org/wiki/UTF-8) for details. + * Copyright 2021 Anton Tarasenko + *------------------------------------------------------------------------------ + * This file is part of Acedia. + * + * Acedia is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, version 3 of the License, or + * (at your option) any later version. + * + * Acedia is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Acedia. If not, see . + */ +class Utf8Encoder extends AcediaObject; + +// Limits on code point values that can be recorded with 1, 2, 3 and 4 bytes +// respectively +var private int utfLimit1, utfLimit2, utfLimit3, utfLimit4; + +// Bit prefixes for UTF8 encoding +var private int utfMask2, utfMask3, utfMask4, utfMaskIn; +// This integer will have only 6 last bits be 1s. +// We need it to zero all but last 6 bits for `int`s (with `&` bit operator). +var private int lastSixBits; + +/** + * Encodes passed `Text` object into UTF8 byte representation. + * + * In case passed `text` is somehow broken and contains invalid Unicode + * code points - this method will return empty array. + * + * @param text `Text` object to encode. + * @return UTF8 representation of passed `text` as an array of `byte`s. + * Empty array if `text == none` or `text` contains invalid Unicode + * code points. + */ +public final function array Encode(Text text) +{ + local int i, nextCodepoint, textLength; + local array buffer; + if (__().text.IsEmpty(text)) { + return buffer; // empty array + } + textLength = text.GetLength(); + for (i = 0; i < textLength; i += 1) + { + nextCodepoint = text.GetCharacter(i).codePoint; + if (nextCodepoint <= utfLimit1) { + buffer[buffer.length] = nextCodepoint; + } + else if (nextCodepoint <= utfLimit2) + { + // Drop 6 bits that will be recorded inside second byte and + // add 2-byte sequence mask + buffer[buffer.length] = utfMask2 | (nextCodepoint >> 6); + // Take only last 6 bits for the second (last) byte + // + add inner-byte sequence mask + buffer[buffer.length] = utfMaskIn | (nextCodepoint & lastSixBits); + } + else if (nextCodepoint <= utfLimit3) + { + // Drop 12 bits that will be recorded inside second and third bytes + // and add 3-byte sequence mask + buffer[buffer.length] = utfMask3 | (nextCodepoint >> 12); + // Drop 6 bits that will be recorded inside third byte and + // add inner-byte sequence mask + buffer[buffer.length] = + utfMaskIn | ((nextCodepoint >> 6) & lastSixBits); + // Take only last 6 bits for the third (last) byte + // + add inner-byte sequence mask + buffer[buffer.length] = utfMaskIn | (nextCodepoint & lastSixBits); + } + else if (nextCodepoint <= utfLimit4) + { + // Drop 18 bits that will be recorded inside second, third and + // fourth bytes, then add 4-byte sequence mask + buffer[buffer.length] = utfMask4 | (nextCodepoint >> 18); + // Drop 12 bits that will be recorded inside third and fourth bytes + // and add inner-byte sequence mask + buffer[buffer.length] = + utfMaskIn | ((nextCodepoint >> 12) & lastSixBits); + // Drop 6 bits that will be recorded inside fourth byte + // and add inner-byte sequence mask + buffer[buffer.length] = + utfMaskIn | ((nextCodepoint >> 6) & lastSixBits); + // Take only last 6 bits for the fourth (last) byte + // + add inner-byte sequence mask + buffer[buffer.length] = utfMaskIn | (nextCodepoint & lastSixBits); + } + else + { + // Outside of known Unicode range + // Should not be possible, since `Text` is expected to + // contain only correct Unicode + buffer.length = 0; + break; + } + } + return buffer; +} + +defaultproperties +{ + utfLimit1 = 127 + utfLimit2 = 2047 + utfLimit3 = 65535 + utfLimit4 = 1114111 + utfMask2 = 192 // 1 1 0 0 0 0 0 0 + utfMask3 = 224 // 1 1 1 0 0 0 0 0 + utfMask4 = 240 // 1 1 1 1 0 0 0 0 + utfMaskIn = 128 // 1 0 0 0 0 0 0 0 + lastSixBits = 63 // 0 0 1 1 1 1 1 1 +} \ No newline at end of file diff --git a/sources/Global.uc b/sources/Global.uc index bf1a4e4..e475a65 100644 --- a/sources/Global.uc +++ b/sources/Global.uc @@ -40,6 +40,7 @@ var public UserAPI users; var public PlayersAPI players; var public JSONAPI json; var public DBAPI db; +var public AvariceAPI avarice; var public KFFrontend kf; @@ -74,6 +75,7 @@ protected function Initialize() players = PlayersAPI(memory.Allocate(class'PlayersAPI')); json = JSONAPI(memory.Allocate(class'JSONAPI')); db = DBAPI(memory.Allocate(class'DBAPI')); + avarice = AvariceAPI(memory.Allocate(class'AvariceAPI')); kf = KFFrontend(memory.Allocate(class'KF1_Frontend')); json.StaticConstructor(); } \ No newline at end of file diff --git a/sources/Manifest.uc b/sources/Manifest.uc index f0353db..f0629aa 100644 --- a/sources/Manifest.uc +++ b/sources/Manifest.uc @@ -23,6 +23,7 @@ defaultproperties { features(0) = class'Commands' + features(1) = class'Avarice' commands(0) = class'ACommandHelp' commands(1) = class'ACommandDosh' commands(2) = class'ACommandNick' @@ -55,4 +56,5 @@ defaultproperties testCases(20) = class'TEST_CommandDataBuilder' testCases(21) = class'TEST_LogMessage' testCases(22) = class'TEST_LocalDatabase' + testCases(23) = class'TEST_UTF8EncoderDecoder' } \ No newline at end of file