Anton Tarasenko
3 years ago
10 changed files with 870 additions and 0 deletions
@ -0,0 +1,80 @@ |
|||||||
|
/** |
||||||
|
* |
||||||
|
* Copyright 2021 Anton Tarasenko |
||||||
|
*------------------------------------------------------------------------------ |
||||||
|
* This file is part of Acedia. |
||||||
|
* |
||||||
|
* Acedia is free software: you can redistribute it and/or modify |
||||||
|
* it under the terms of the GNU General Public License as published by |
||||||
|
* the Free Software Foundation, version 3 of the License, or |
||||||
|
* (at your option) any later version. |
||||||
|
* |
||||||
|
* Acedia is distributed in the hope that it will be useful, |
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||||||
|
* GNU General Public License for more details. |
||||||
|
* |
||||||
|
* You should have received a copy of the GNU General Public License |
||||||
|
* along with Acedia. If not, see <https://www.gnu.org/licenses/>. |
||||||
|
*/ |
||||||
|
class Avarice extends Feature |
||||||
|
config(AcediaAvarice); |
||||||
|
|
||||||
|
struct AvariceLink |
||||||
|
{ |
||||||
|
var string name; |
||||||
|
var string host; |
||||||
|
}; |
||||||
|
|
||||||
|
var private config array<AvariceLink> link; |
||||||
|
|
||||||
|
var private LoggerAPI.Definition errorBadAddress; |
||||||
|
|
||||||
|
protected function OnEnabled() |
||||||
|
{ |
||||||
|
local int i; |
||||||
|
local string host; |
||||||
|
local int port; |
||||||
|
local AvariceTCPLink nextTCPLink; |
||||||
|
for (i = 0; i < link.length; i += 1) |
||||||
|
{ |
||||||
|
if (!ParseAddress(link[i].host, host, port)) { |
||||||
|
_.logger.Auto(errorBadAddress).Arg(_.text.FromString(link[i].name)); |
||||||
|
} |
||||||
|
nextTCPLink = AvariceTCPLink(_.memory.Allocate(class'AvariceTCPLink')); |
||||||
|
nextTCPLink.Connect(link[i].name, host, port); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
protected function OnDisabled() |
||||||
|
{ |
||||||
|
local LevelInfo level; |
||||||
|
local AvariceTCPLink nextTCPLink; |
||||||
|
level = _.unreal.GetLevel(); |
||||||
|
foreach level.DynamicActors(class'AvariceTCPLink', nextTCPLink) { |
||||||
|
nextTCPLink.Destroy(); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
private final function bool ParseAddress( |
||||||
|
string address, |
||||||
|
out string host, |
||||||
|
out int port) |
||||||
|
{ |
||||||
|
local bool success; |
||||||
|
local Parser parser; |
||||||
|
parser = _.text.ParseString(address); |
||||||
|
parser.Skip() |
||||||
|
.MUntilS(host, _.text.GetCharacter(":")) |
||||||
|
.MatchS(":") |
||||||
|
.MUnsignedInteger(port) |
||||||
|
.Skip(); |
||||||
|
success = parser.Ok() && parser.GetRemainingLength() == 0; |
||||||
|
parser.FreeSelf(); |
||||||
|
return success; |
||||||
|
} |
||||||
|
|
||||||
|
defaultproperties |
||||||
|
{ |
||||||
|
errorBadAddress = (l=LOG_Error,m="Cannot parse address \"%1\"") |
||||||
|
} |
@ -0,0 +1,54 @@ |
|||||||
|
/** |
||||||
|
* Copyright 2020 - 2021 Anton Tarasenko |
||||||
|
*------------------------------------------------------------------------------ |
||||||
|
* This file is part of Acedia. |
||||||
|
* |
||||||
|
* Acedia is free software: you can redistribute it and/or modify |
||||||
|
* it under the terms of the GNU General Public License as published by |
||||||
|
* the Free Software Foundation, version 3 of the License, or |
||||||
|
* (at your option) any later version. |
||||||
|
* |
||||||
|
* Acedia is distributed in the hope that it will be useful, |
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||||||
|
* GNU General Public License for more details. |
||||||
|
* |
||||||
|
* You should have received a copy of the GNU General Public License |
||||||
|
* along with Acedia. If not, see <https://www.gnu.org/licenses/>. |
||||||
|
*/ |
||||||
|
class AvariceAPI extends AcediaObject; |
||||||
|
|
||||||
|
public final function AvariceMessage MessageFromText(Text message) |
||||||
|
{ |
||||||
|
local Parser parser; |
||||||
|
local AvariceMessage result; |
||||||
|
local AssociativeArray parsedMessage; |
||||||
|
if (message == none) return none; |
||||||
|
parser = _.text.Parse(message); |
||||||
|
parsedMessage = _.json.ParseObjectWith(parser); |
||||||
|
parser.FreeSelf(); |
||||||
|
if (!HasNecessaryMessageKeys(parsedMessage)) |
||||||
|
{ |
||||||
|
_.memory.Free(parsedMessage); |
||||||
|
return none; |
||||||
|
} |
||||||
|
result = AvariceMessage(_.memory.Allocate(class'AvariceMessage')); |
||||||
|
result.SetID(parsedMessage.GetText(P("i"))); |
||||||
|
result.SetGroup(parsedMessage.GetText(P("g"))); |
||||||
|
result.data = parsedMessage.TakeItem(P("p")); |
||||||
|
_.memory.Free(parsedMessage); |
||||||
|
return result; |
||||||
|
} |
||||||
|
|
||||||
|
private final function bool HasNecessaryMessageKeys(AssociativeArray message) |
||||||
|
{ |
||||||
|
if (message == none) return false; |
||||||
|
if (!message.HasKey(P("i"))) return false; |
||||||
|
if (!message.HasKey(P("g"))) return false; |
||||||
|
|
||||||
|
return true; |
||||||
|
} |
||||||
|
|
||||||
|
defaultproperties |
||||||
|
{ |
||||||
|
} |
@ -0,0 +1,88 @@ |
|||||||
|
class AvariceClient extends AcediaObject; |
||||||
|
|
||||||
|
enum AvariceClientState |
||||||
|
{ |
||||||
|
ACS_Waiting, |
||||||
|
ACS_ReadingID, |
||||||
|
ACS_ReadingLength, |
||||||
|
ACS_ReadingPayload, |
||||||
|
ACS_Invalid |
||||||
|
}; |
||||||
|
|
||||||
|
var private int currentID; |
||||||
|
var private int currentMessageLength; |
||||||
|
var private array<byte> currentPayload; |
||||||
|
|
||||||
|
var private AvariceClientState currentState; |
||||||
|
var private int bytesLeftToRead; |
||||||
|
var private byte buffer[255]; |
||||||
|
var private array<byte> longBuffer; |
||||||
|
var private int pendingBytes; |
||||||
|
|
||||||
|
public final function PushByte(byte nextByte) |
||||||
|
{ |
||||||
|
if (nextByte == 0) |
||||||
|
{ |
||||||
|
if (bytesLeftToRead > 0) |
||||||
|
{ |
||||||
|
// ACK for short message (with id) |
||||||
|
} |
||||||
|
currentState = ACS_Waiting; |
||||||
|
ResetBuffer(); |
||||||
|
return; |
||||||
|
} |
||||||
|
else if (currentState == ACS_Invalid) |
||||||
|
{ |
||||||
|
// ACK of invalid message's end |
||||||
|
return; |
||||||
|
} |
||||||
|
else if (currentState == ACS_Waiting) |
||||||
|
{ |
||||||
|
currentID = nextByte; |
||||||
|
currentID = currentID << 8; |
||||||
|
currentState = ACS_ReadingID; |
||||||
|
} |
||||||
|
else if (currentState == ACS_ReadingID) |
||||||
|
{ |
||||||
|
currentID += nextByte; |
||||||
|
currentState = ACS_ReadingLength; |
||||||
|
bytesLeftToRead = 2; |
||||||
|
} |
||||||
|
else if (currentState == ACS_ReadingLength) |
||||||
|
{ |
||||||
|
bytesLeftToRead -= 1; |
||||||
|
if (bytesLeftToRead > 0) |
||||||
|
{ |
||||||
|
currentMessageLength = nextByte; |
||||||
|
currentMessageLength = currentMessageLength << 8; |
||||||
|
} |
||||||
|
else |
||||||
|
{ |
||||||
|
currentMessageLength += nextByte; |
||||||
|
currentState = ACS_ReadingPayload; |
||||||
|
bytesLeftToRead = currentMessageLength; |
||||||
|
} |
||||||
|
} |
||||||
|
else if (currentState == ACS_ReadingPayload) |
||||||
|
{ |
||||||
|
currentPayload[currentPayload.length] = nextByte; |
||||||
|
// Decode payload into `AvariceMessage` |
||||||
|
// Send messages via Acedia's signals |
||||||
|
bytesLeftToRead -= 1; |
||||||
|
if (bytesLeftToRead == 0) |
||||||
|
{ |
||||||
|
currentState = ACS_Waiting; |
||||||
|
// ACK into buffer |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
private final function ResetBuffer() |
||||||
|
{ |
||||||
|
pendingBytes = 0; |
||||||
|
longBuffer.length = 0; |
||||||
|
} |
||||||
|
|
||||||
|
defaultproperties |
||||||
|
{ |
||||||
|
} |
@ -0,0 +1,93 @@ |
|||||||
|
class AvariceMessage extends AcediaObject; |
||||||
|
|
||||||
|
var private Text messageID; |
||||||
|
var private Text messageGroup; |
||||||
|
|
||||||
|
var public AcediaObject data; |
||||||
|
|
||||||
|
var private AssociativeArray messageTemplate; |
||||||
|
|
||||||
|
public static function StaticConstructor() |
||||||
|
{ |
||||||
|
if (StaticConstructorGuard()) return; |
||||||
|
super.StaticConstructor(); |
||||||
|
|
||||||
|
default.messageTemplate = __().collections.EmptyAssociativeArray(); |
||||||
|
ResetTemplate(default.messageTemplate); |
||||||
|
} |
||||||
|
|
||||||
|
protected function Finalizer() |
||||||
|
{ |
||||||
|
__().memory.Free(messageID); |
||||||
|
__().memory.Free(messageGroup); |
||||||
|
__().memory.Free(data); |
||||||
|
messageID = none; |
||||||
|
messageGroup = none; |
||||||
|
data = none; |
||||||
|
} |
||||||
|
|
||||||
|
private static final function ResetTemplate(AssociativeArray template) |
||||||
|
{ |
||||||
|
if (template == none) { |
||||||
|
return; |
||||||
|
} |
||||||
|
template.SetItem(P("i"), none); |
||||||
|
template.SetItem(P("g"), none); |
||||||
|
template.SetItem(P("p"), none); |
||||||
|
} |
||||||
|
|
||||||
|
public final function SetID(Text id) |
||||||
|
{ |
||||||
|
_.memory.Free(messageID); |
||||||
|
messageID = none; |
||||||
|
if (id != none) { |
||||||
|
messageID = id.Copy(); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
public final function Text GetID() |
||||||
|
{ |
||||||
|
if (messageID != none) { |
||||||
|
return messageID.Copy(); |
||||||
|
} |
||||||
|
return none; |
||||||
|
} |
||||||
|
|
||||||
|
public final function SetGroup(Text group) |
||||||
|
{ |
||||||
|
_.memory.Free(messageGroup); |
||||||
|
messageGroup = none; |
||||||
|
if (group != none) { |
||||||
|
messageGroup = group.Copy(); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
public final function Text GetGroup() |
||||||
|
{ |
||||||
|
if (messageGroup != none) { |
||||||
|
return messageGroup.Copy(); |
||||||
|
} |
||||||
|
return none; |
||||||
|
} |
||||||
|
|
||||||
|
public final function MutableText ToText() |
||||||
|
{ |
||||||
|
local MutableText result; |
||||||
|
local AssociativeArray template; |
||||||
|
if (messageID == none) return none; |
||||||
|
if (messageGroup == none) return none; |
||||||
|
|
||||||
|
template = default.messageTemplate; |
||||||
|
template.SetItem(P("i"), messageID); |
||||||
|
template.SetItem(P("g"), messageGroup); |
||||||
|
if (data != none) { |
||||||
|
template.SetItem(P("p"), data); |
||||||
|
} |
||||||
|
result = _.json.Print(template); |
||||||
|
ResetTemplate(template); |
||||||
|
return result; |
||||||
|
} |
||||||
|
|
||||||
|
defaultproperties |
||||||
|
{ |
||||||
|
} |
@ -0,0 +1,170 @@ |
|||||||
|
class AvariceTcpLink extends TcpLink |
||||||
|
dependson(LoggerAPI); |
||||||
|
|
||||||
|
var private Global _; |
||||||
|
|
||||||
|
var private string linkName; |
||||||
|
var private string linkHost; |
||||||
|
var private int linkPort; |
||||||
|
var private IpAddr remoteAddress; |
||||||
|
var private int ttt; |
||||||
|
|
||||||
|
var private bool didWorkLastTick; |
||||||
|
|
||||||
|
var private array<byte> buffer; |
||||||
|
|
||||||
|
var private Utf8Encoder encoder; |
||||||
|
var private Utf8Decoder decoder; |
||||||
|
|
||||||
|
var private LoggerAPI.Definition infoSuccess; |
||||||
|
var private LoggerAPI.Definition fatalBadPort; |
||||||
|
var private LoggerAPI.Definition fatalCannotBindPort; |
||||||
|
var private LoggerAPI.Definition fatalCannotResolveHost; |
||||||
|
var private LoggerAPI.Definition fatalCannotConnect; |
||||||
|
|
||||||
|
public final function bool Connect(string name, string host, int port) |
||||||
|
{ |
||||||
|
local InternetLink.IpAddr ip; |
||||||
|
local int usedPort; |
||||||
|
// Apparently `TcpLink` ignores default values for these variables, |
||||||
|
// so we set them here |
||||||
|
linkMode = MODE_Binary; |
||||||
|
receiveMode = RMODE_Manual; |
||||||
|
_ = class'Global'.static.GetInstance(); |
||||||
|
encoder = Utf8Encoder(_.memory.Allocate(class'Utf8Encoder')); |
||||||
|
decoder = Utf8Decoder(_.memory.Allocate(class'Utf8Decoder')); |
||||||
|
linkName = name; |
||||||
|
linkHost = host; |
||||||
|
linkPort = port; |
||||||
|
if (port <= 0) |
||||||
|
{ |
||||||
|
_.logger.Auto(fatalBadPort) |
||||||
|
.ArgInt(port) |
||||||
|
.Arg(_.text.FromString(linkName)); |
||||||
|
return false; |
||||||
|
} |
||||||
|
if (BindPort(, true) <= 0) |
||||||
|
{ |
||||||
|
_.logger.Auto(fatalCannotBindPort) |
||||||
|
.ArgInt(port) |
||||||
|
.Arg(_.text.FromString(name)); |
||||||
|
return false; |
||||||
|
} |
||||||
|
StringToIpAddr(host, remoteAddress); |
||||||
|
remoteAddress.port = port; |
||||||
|
if (remoteAddress.addr == 0) { |
||||||
|
Resolve(host); |
||||||
|
} |
||||||
|
else { |
||||||
|
OpenAddress(); |
||||||
|
} |
||||||
|
return true; |
||||||
|
} |
||||||
|
|
||||||
|
event Resolved(IpAddr resolvedAddress) |
||||||
|
{ |
||||||
|
remoteAddress.addr = resolvedAddress.addr; |
||||||
|
OpenAddress(); |
||||||
|
} |
||||||
|
|
||||||
|
private final function bool OpenAddress() |
||||||
|
{ |
||||||
|
if (!OpenNoSteam(remoteAddress)) { |
||||||
|
_.logger.Auto(fatalCannotConnect).Arg(_.text.FromString(linkName)); |
||||||
|
} |
||||||
|
_.logger.Auto(infoSuccess).Arg(_.text.FromString(linkName)); |
||||||
|
} |
||||||
|
|
||||||
|
event ResolveFailed() |
||||||
|
{ |
||||||
|
_.logger.Auto(fatalCannotResolveHost).Arg(_.text.FromString(linkHost)); |
||||||
|
// !Shut down! |
||||||
|
} |
||||||
|
|
||||||
|
event Tick(float delta) |
||||||
|
{ |
||||||
|
local array<byte> toSend; |
||||||
|
local AvariceMessage nextAMessage; |
||||||
|
local MutableText nextMessage; |
||||||
|
local int i, j, dataRead, totalRead, iter; |
||||||
|
local byte data[255]; |
||||||
|
if (didWorkLastTick) |
||||||
|
{ |
||||||
|
didWorkLastTick = false; |
||||||
|
return; |
||||||
|
} |
||||||
|
if (!IsDataPending()) { |
||||||
|
return; |
||||||
|
} |
||||||
|
while (true) { |
||||||
|
dataRead = ReadBinary(255, data); |
||||||
|
for (i = 0; i < dataRead; i += 1) { |
||||||
|
ttt += 1; |
||||||
|
decoder.PushByte(data[i]); |
||||||
|
} |
||||||
|
if (dataRead <= 0) { |
||||||
|
break; |
||||||
|
} |
||||||
|
} |
||||||
|
if (ttt >= 4095) { |
||||||
|
toSend = encoder.Encode(_.text.FromString("FLUSH")); |
||||||
|
data[0] = toSend[0]; |
||||||
|
data[1] = toSend[1]; |
||||||
|
data[2] = toSend[2]; |
||||||
|
data[3] = toSend[3]; |
||||||
|
data[4] = toSend[4]; |
||||||
|
data[5] = 0; |
||||||
|
SendBinary(6, data); |
||||||
|
} |
||||||
|
if (dataRead > 0) { |
||||||
|
didWorkLastTick = true; |
||||||
|
} |
||||||
|
// Obtain! |
||||||
|
nextMessage = decoder.PopText(); |
||||||
|
while (nextMessage != none) |
||||||
|
{ |
||||||
|
Log("SIZE:" @ nextMessage.GetLength() @ ttt); |
||||||
|
StopWatch(false); |
||||||
|
nextAMessage = _.avarice.MessageFromText(nextMessage); |
||||||
|
nextMessage.FreeSelf(); |
||||||
|
nextMessage = nextAMessage.ToText(); |
||||||
|
toSend = encoder.Encode(nextMessage); |
||||||
|
toSend[toSend.length] = 0; |
||||||
|
j = 0; |
||||||
|
for (i = 0; i < toSend.length; i += 1) |
||||||
|
{ |
||||||
|
data[j] = toSend[i]; |
||||||
|
j += 1; |
||||||
|
if (j >= 255) { |
||||||
|
j = 0; |
||||||
|
SendBinary(255, data); |
||||||
|
} |
||||||
|
} |
||||||
|
if (j > 0) { |
||||||
|
SendBinary(j, data); |
||||||
|
} |
||||||
|
nextMessage.FreeSelf(); |
||||||
|
nextMessage = decoder.PopText(); |
||||||
|
StopWatch(true); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
event Opened() |
||||||
|
{ |
||||||
|
//Log("[TestTcp] Accepted!"); |
||||||
|
LOG("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"); |
||||||
|
} |
||||||
|
|
||||||
|
event Closed() |
||||||
|
{ |
||||||
|
//Log("[TestTcp] Closed!"); |
||||||
|
} |
||||||
|
|
||||||
|
defaultproperties |
||||||
|
{ |
||||||
|
infoSuccess = (l=LOG_Info,m="Successfully started Avarice link \"%1\"") |
||||||
|
fatalBadPort = (l=LOG_Fatal,m="Bad port \"%1\" specified for Avarice link \"%2\"") |
||||||
|
fatalCannotBindPort = (l=LOG_Fatal,m="Cannot bind port for Avarice link \"%1\"") |
||||||
|
fatalCannotResolveHost = (l=LOG_Fatal,m="Cannot resolve host \"%1\" for Avarice link \"%2\"") |
||||||
|
fatalCannotConnect = (l=LOG_Fatal,m="Connection for Avarice link \"%1\" was rejected") |
||||||
|
} |
Binary file not shown.
@ -0,0 +1,260 @@ |
|||||||
|
/** |
||||||
|
* Class for decoding UTF8 byte stream into Acedia's `MutableText` value. |
||||||
|
* It is made to work with incoming, and possibly incomplete, streams of |
||||||
|
* bytes: instead of consuming the whole utf8 text, it is made to consume it |
||||||
|
* byte-by-byte and store `MutableText`s that it parsed from the stream |
||||||
|
* (assumes that separate `MutableText`s are separated by `0` byte). |
||||||
|
* This implementation should correctly convert any valid UTF8, but it is |
||||||
|
* not guaranteed to reject any invalid UTF8. In particular, it accepts |
||||||
|
* overlong code point encodings (except overlong encoding of zero). |
||||||
|
* It, however, does check whether every byte has a correct bit prefix and |
||||||
|
* does not attempt to repair input data if it finds invalid one. |
||||||
|
* See [wiki page](https://en.wikipedia.org/wiki/UTF-8) for details. |
||||||
|
* Copyright 2021 Anton Tarasenko |
||||||
|
*------------------------------------------------------------------------------ |
||||||
|
* This file is part of Acedia. |
||||||
|
* |
||||||
|
* Acedia is free software: you can redistribute it and/or modify |
||||||
|
* it under the terms of the GNU General Public License as published by |
||||||
|
* the Free Software Foundation, version 3 of the License, or |
||||||
|
* (at your option) any later version. |
||||||
|
* |
||||||
|
* Acedia is distributed in the hope that it will be useful, |
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||||||
|
* GNU General Public License for more details. |
||||||
|
* |
||||||
|
* You should have received a copy of the GNU General Public License |
||||||
|
* along with Acedia. If not, see <https://www.gnu.org/licenses/>. |
||||||
|
*/ |
||||||
|
class Utf8Decoder extends AcediaObject; |
||||||
|
|
||||||
|
/** |
||||||
|
* `Utf8Decoder` consumes byte by byte with `PushByte()` method and it's |
||||||
|
* algorithm is simple: |
||||||
|
* 1. If it encounters a byte that encodes a singular code point by |
||||||
|
* itself (starts with `0` bit) - it is added as a codepoint; |
||||||
|
* 2. If it encounters byte which indicates that next code point is |
||||||
|
* composed out of several bytes (starts with 110, 1110 or 11110) - |
||||||
|
* remembers that it has to read several "inner" bytes belonging to |
||||||
|
* the same code point and starts to expect them instead; |
||||||
|
* 3. If it ever encounters a byte with unexpected (and thus invalid) |
||||||
|
* bit prefix - enters a failed state; |
||||||
|
* 4. If it ever encounters a `0` byte: |
||||||
|
* * If it was not in a failed state - records `MutableText` |
||||||
|
* accumulated so far; |
||||||
|
* * Clears failed state. |
||||||
|
*/ |
||||||
|
|
||||||
|
var private bool failedState; |
||||||
|
|
||||||
|
// Variables for building a multi-byte code point |
||||||
|
var private int nextCodePoint; |
||||||
|
var private int innerBytesLeft; |
||||||
|
|
||||||
|
// `MutableText` we are building right now |
||||||
|
var private MutableText nextText; |
||||||
|
// `MutableText`s we have already built |
||||||
|
var private array<MutableText> outputQueue; |
||||||
|
|
||||||
|
// These masks (`maskDropN`) allow to turn into zero first `N` bits in |
||||||
|
// the byte with `&` operator. |
||||||
|
var private byte maskDrop1, maskDrop2, maskDrop3, maskDrop4, maskDrop5; |
||||||
|
// These masks (`maskTakeN`) allow to turn into zero all but first `N` bits |
||||||
|
// in the byte with `&` operator. |
||||||
|
// `maskTakeN == ~maskDropN`. |
||||||
|
var private byte maskTake1, maskTake2, maskTake3, maskTake4, maskTake5; |
||||||
|
|
||||||
|
protected function Constructor() |
||||||
|
{ |
||||||
|
nextText = _.text.Empty(); |
||||||
|
} |
||||||
|
|
||||||
|
protected function Finalizer() |
||||||
|
{ |
||||||
|
_.memory.Free(nextText); |
||||||
|
_.memory.FreeMany(outputQueue); |
||||||
|
nextText = none; |
||||||
|
failedState = false; |
||||||
|
outputQueue.length = 0; |
||||||
|
innerBytesLeft = 0; |
||||||
|
nextCodePoint = 0; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Checks whether data in the `MutableText` that caller `Utf8Decoder` is |
||||||
|
* currently filling was detected to be invalid. |
||||||
|
* |
||||||
|
* This state can be reset by pushing `0` byte into caller `Utf8Decoder`. |
||||||
|
* See `PushByte()` for more info. |
||||||
|
* |
||||||
|
* @return `true` iff caller `Utf8Decoder` is not in a failed state. |
||||||
|
*/ |
||||||
|
public final function bool Failed() |
||||||
|
{ |
||||||
|
return failedState; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Checks whether caller `Utf8Decoder` has any data put in |
||||||
|
* the `MutableText` it is currently building. |
||||||
|
* Result is guaranteed to be `false` after `self.PushByte(0)` call, since |
||||||
|
* it starts a brand new `MutableText`. |
||||||
|
*/ |
||||||
|
public final function bool HasUnfinishedData() |
||||||
|
{ |
||||||
|
if (innerBytesLeft > 0) return true; |
||||||
|
if (nextText.GetLength() > 0) return true; |
||||||
|
return false; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Returns next `MutableText` that was successfully decoded by |
||||||
|
* the caller `Utf8Decoder`, removing it from the output queue. |
||||||
|
* |
||||||
|
* @return Next `MutableText` in the caller `Utf8Decoder`'s output queue. |
||||||
|
* `none` iff output queue is empty. `MutableText`s are returned in order |
||||||
|
* they were decoded. |
||||||
|
*/ |
||||||
|
public final function MutableText PopText() |
||||||
|
{ |
||||||
|
local MutableText result; |
||||||
|
if (outputQueue.length <= 0) { |
||||||
|
return none; |
||||||
|
} |
||||||
|
result = outputQueue[0]; |
||||||
|
outputQueue.Remove(0, 1); |
||||||
|
return result; |
||||||
|
} |
||||||
|
|
||||||
|
/** |
||||||
|
* Adds next `byte` from the byte stream that is supposed to encode UTF8 text. |
||||||
|
* To finish building `MutableText` pass `0` byte into this method, which will |
||||||
|
* `MutableText` built so far into an "output queue" (accessible with |
||||||
|
* `PopText()`) and start building a new one. |
||||||
|
* |
||||||
|
* This method expects `byte`s, in order, from a sequence that has correct |
||||||
|
* UTF8 encoding. If method detects incorrect UTF8 sequence - it will be put |
||||||
|
* into a "failed state", discarding `MutableText` it was currently building, |
||||||
|
* along with any further input (except `0` byte). |
||||||
|
* Pushing `0` byte will restore `Utf8Decoder` from a failed state and it |
||||||
|
* will start building a new `MutableText`. |
||||||
|
* |
||||||
|
* @param nextByte next byte from byte stream that is supposed to encode |
||||||
|
* UTF8 text. `0` will make caller `Utf8Decoder` start building new |
||||||
|
* `MutableText`. |
||||||
|
* @return `true` iff caller `Utf8Decoder` was not in a failed state and |
||||||
|
* operation was successful. |
||||||
|
*/ |
||||||
|
public final function bool PushByte(byte nextByte) |
||||||
|
{ |
||||||
|
if (nextByte == 0) return QueueCurrentText(); |
||||||
|
if (failedState) return false; |
||||||
|
if (innerBytesLeft > 0) return PushInnerByte(nextByte); |
||||||
|
|
||||||
|
// Form of 0xxxxxxx means 1 byte per code point |
||||||
|
if ((nextByte & maskTake1) == 0) |
||||||
|
{ |
||||||
|
AppendCodePoint(nextByte); |
||||||
|
return true; |
||||||
|
} |
||||||
|
// Form of 110xxxxx means 2 bytes per code point |
||||||
|
if ((nextByte & maskTake3) == maskTake2) // maskTake2 == 1 1 0 0 0 0 0 0 |
||||||
|
{ |
||||||
|
nextCodePoint = nextByte & maskDrop3; |
||||||
|
innerBytesLeft = 1; |
||||||
|
return true; |
||||||
|
} |
||||||
|
// Form of 1110xxxx means 3 bytes per code point |
||||||
|
if ((nextByte & maskTake4) == maskTake3) // maskTake3 == 1 1 1 0 0 0 0 0 |
||||||
|
{ |
||||||
|
nextCodePoint = nextByte & maskDrop4; |
||||||
|
innerBytesLeft = 2; |
||||||
|
return true; |
||||||
|
} |
||||||
|
// Form of 11110xxx means 4 bytes per code point |
||||||
|
if ((nextByte & maskTake5) == maskTake4) // maskTake4 == 1 1 1 1 0 0 0 0 |
||||||
|
{ |
||||||
|
nextCodePoint = nextByte & maskDrop5; |
||||||
|
innerBytesLeft = 3; |
||||||
|
return true; |
||||||
|
} |
||||||
|
// `nextByte` must have has one of the above forms |
||||||
|
// (or 10xxxxxx that is handled in `PushInnerByte()`) |
||||||
|
failedState = true; |
||||||
|
return false; |
||||||
|
} |
||||||
|
|
||||||
|
// This method is responsible for pushing "inner" bytes: bytes that come |
||||||
|
// after the first one when code point is encoded with multiple bytes. |
||||||
|
// All of them are expected to have 10xxxxxx prefix. |
||||||
|
// Assumes `innerBytesLeft > 0` and `failedState == false` |
||||||
|
// to avoid needless checks. |
||||||
|
private final function bool PushInnerByte(byte nextByte) |
||||||
|
{ |
||||||
|
// Fail if `nextByte` does not have an expected form: 10xxxxxx |
||||||
|
if ((nextByte & maskTake2) != maskTake1) |
||||||
|
{ |
||||||
|
failedState = true; |
||||||
|
return false; |
||||||
|
} |
||||||
|
// Since inner bytes have the form of 10xxxxxx, they all carry only 6 bits |
||||||
|
// that actually encode code point, so to make space for those bits we must |
||||||
|
// shift previously added code points by `6` |
||||||
|
nextCodePoint = (nextCodePoint << 6) + (nextByte & maskDrop2); |
||||||
|
innerBytesLeft -= 1; |
||||||
|
if (innerBytesLeft <= 0) |
||||||
|
{ |
||||||
|
// We forbid overlong encoding of `0` |
||||||
|
// (as does the Unicode standard) |
||||||
|
if (nextCodePoint == 0) |
||||||
|
{ |
||||||
|
failedState = true; |
||||||
|
return false; |
||||||
|
} |
||||||
|
AppendCodePoint(nextCodePoint); |
||||||
|
} |
||||||
|
return true; |
||||||
|
} |
||||||
|
|
||||||
|
private final function AppendCodePoint(int codePoint) |
||||||
|
{ |
||||||
|
local Text.Character nextCharacter; |
||||||
|
nextCharacter.codePoint = codePoint; |
||||||
|
nextText.AppendCharacter(nextCharacter); |
||||||
|
} |
||||||
|
|
||||||
|
// Return `true` if `MutableText` was added to the queue |
||||||
|
// (there were no encoding errors) |
||||||
|
private final function bool QueueCurrentText() |
||||||
|
{ |
||||||
|
local bool result; |
||||||
|
// If we still do not have all bytes for the character we were building - |
||||||
|
// then passed UTF8 was invalid |
||||||
|
failedState = failedState || innerBytesLeft > 0; |
||||||
|
result = !failedState; |
||||||
|
if (failedState) { |
||||||
|
_.memory.Free(nextText); |
||||||
|
} |
||||||
|
else { |
||||||
|
outputQueue[outputQueue.length] = nextText; |
||||||
|
} |
||||||
|
failedState = false; |
||||||
|
innerBytesLeft = 0; |
||||||
|
nextText = _.text.Empty(); |
||||||
|
return result; |
||||||
|
} |
||||||
|
|
||||||
|
defaultproperties |
||||||
|
{ |
||||||
|
maskDrop1 = 127 // 0 1 1 1 1 1 1 1 |
||||||
|
maskDrop2 = 63 // 0 0 1 1 1 1 1 1 |
||||||
|
maskDrop3 = 31 // 0 0 0 1 1 1 1 1 |
||||||
|
maskDrop4 = 15 // 0 0 0 0 1 1 1 1 |
||||||
|
maskDrop5 = 7 // 0 0 0 0 0 1 1 1 |
||||||
|
maskTake1 = 128 // 1 0 0 0 0 0 0 0 |
||||||
|
maskTake2 = 192 // 1 1 0 0 0 0 0 0 |
||||||
|
maskTake3 = 224 // 1 1 1 0 0 0 0 0 |
||||||
|
maskTake4 = 240 // 1 1 1 1 0 0 0 0 |
||||||
|
maskTake5 = 248 // 1 1 1 1 1 0 0 0 |
||||||
|
} |
@ -0,0 +1,121 @@ |
|||||||
|
/** |
||||||
|
* Class for encoding Acedia's `MutableText` value into UTF8 byte |
||||||
|
* representation. |
||||||
|
* See [wiki page](https://en.wikipedia.org/wiki/UTF-8) for details. |
||||||
|
* Copyright 2021 Anton Tarasenko |
||||||
|
*------------------------------------------------------------------------------ |
||||||
|
* This file is part of Acedia. |
||||||
|
* |
||||||
|
* Acedia is free software: you can redistribute it and/or modify |
||||||
|
* it under the terms of the GNU General Public License as published by |
||||||
|
* the Free Software Foundation, version 3 of the License, or |
||||||
|
* (at your option) any later version. |
||||||
|
* |
||||||
|
* Acedia is distributed in the hope that it will be useful, |
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||||||
|
* GNU General Public License for more details. |
||||||
|
* |
||||||
|
* You should have received a copy of the GNU General Public License |
||||||
|
* along with Acedia. If not, see <https://www.gnu.org/licenses/>. |
||||||
|
*/ |
||||||
|
class Utf8Encoder extends AcediaObject; |
||||||
|
|
||||||
|
// Limits on code point values that can be recorded with 1, 2, 3 and 4 bytes |
||||||
|
// respectively |
||||||
|
var private int utfLimit1, utfLimit2, utfLimit3, utfLimit4; |
||||||
|
|
||||||
|
// Bit prefixes for UTF8 encoding |
||||||
|
var private int utfMask2, utfMask3, utfMask4, utfMaskIn; |
||||||
|
// This integer will have only 6 last bits be 1s. |
||||||
|
// We need it to zero all but last 6 bits for `int`s (with `&` bit operator). |
||||||
|
var private int lastSixBits; |
||||||
|
|
||||||
|
/** |
||||||
|
* Encodes passed `Text` object into UTF8 byte representation. |
||||||
|
* |
||||||
|
* In case passed `text` is somehow broken and contains invalid Unicode |
||||||
|
* code points - this method will return empty array. |
||||||
|
* |
||||||
|
* @param text `Text` object to encode. |
||||||
|
* @return UTF8 representation of passed `text` as an array of `byte`s. |
||||||
|
* Empty array if `text == none` or `text` contains invalid Unicode |
||||||
|
* code points. |
||||||
|
*/ |
||||||
|
public final function array<byte> Encode(Text text) |
||||||
|
{ |
||||||
|
local int i, nextCodepoint, textLength; |
||||||
|
local array<byte> buffer; |
||||||
|
if (__().text.IsEmpty(text)) { |
||||||
|
return buffer; // empty array |
||||||
|
} |
||||||
|
textLength = text.GetLength(); |
||||||
|
for (i = 0; i < textLength; i += 1) |
||||||
|
{ |
||||||
|
nextCodepoint = text.GetCharacter(i).codePoint; |
||||||
|
if (nextCodepoint <= utfLimit1) { |
||||||
|
buffer[buffer.length] = nextCodepoint; |
||||||
|
} |
||||||
|
else if (nextCodepoint <= utfLimit2) |
||||||
|
{ |
||||||
|
// Drop 6 bits that will be recorded inside second byte and |
||||||
|
// add 2-byte sequence mask |
||||||
|
buffer[buffer.length] = utfMask2 | (nextCodepoint >> 6); |
||||||
|
// Take only last 6 bits for the second (last) byte |
||||||
|
// + add inner-byte sequence mask |
||||||
|
buffer[buffer.length] = utfMaskIn | (nextCodepoint & lastSixBits); |
||||||
|
} |
||||||
|
else if (nextCodepoint <= utfLimit3) |
||||||
|
{ |
||||||
|
// Drop 12 bits that will be recorded inside second and third bytes |
||||||
|
// and add 3-byte sequence mask |
||||||
|
buffer[buffer.length] = utfMask3 | (nextCodepoint >> 12); |
||||||
|
// Drop 6 bits that will be recorded inside third byte and |
||||||
|
// add inner-byte sequence mask |
||||||
|
buffer[buffer.length] = |
||||||
|
utfMaskIn | ((nextCodepoint >> 6) & lastSixBits); |
||||||
|
// Take only last 6 bits for the third (last) byte |
||||||
|
// + add inner-byte sequence mask |
||||||
|
buffer[buffer.length] = utfMaskIn | (nextCodepoint & lastSixBits); |
||||||
|
} |
||||||
|
else if (nextCodepoint <= utfLimit4) |
||||||
|
{ |
||||||
|
// Drop 18 bits that will be recorded inside second, third and |
||||||
|
// fourth bytes, then add 4-byte sequence mask |
||||||
|
buffer[buffer.length] = utfMask4 | (nextCodepoint >> 18); |
||||||
|
// Drop 12 bits that will be recorded inside third and fourth bytes |
||||||
|
// and add inner-byte sequence mask |
||||||
|
buffer[buffer.length] = |
||||||
|
utfMaskIn | ((nextCodepoint >> 12) & lastSixBits); |
||||||
|
// Drop 6 bits that will be recorded inside fourth byte |
||||||
|
// and add inner-byte sequence mask |
||||||
|
buffer[buffer.length] = |
||||||
|
utfMaskIn | ((nextCodepoint >> 6) & lastSixBits); |
||||||
|
// Take only last 6 bits for the fourth (last) byte |
||||||
|
// + add inner-byte sequence mask |
||||||
|
buffer[buffer.length] = utfMaskIn | (nextCodepoint & lastSixBits); |
||||||
|
} |
||||||
|
else |
||||||
|
{ |
||||||
|
// Outside of known Unicode range |
||||||
|
// Should not be possible, since `Text` is expected to |
||||||
|
// contain only correct Unicode |
||||||
|
buffer.length = 0; |
||||||
|
break; |
||||||
|
} |
||||||
|
} |
||||||
|
return buffer; |
||||||
|
} |
||||||
|
|
||||||
|
defaultproperties |
||||||
|
{ |
||||||
|
utfLimit1 = 127 |
||||||
|
utfLimit2 = 2047 |
||||||
|
utfLimit3 = 65535 |
||||||
|
utfLimit4 = 1114111 |
||||||
|
utfMask2 = 192 // 1 1 0 0 0 0 0 0 |
||||||
|
utfMask3 = 224 // 1 1 1 0 0 0 0 0 |
||||||
|
utfMask4 = 240 // 1 1 1 1 0 0 0 0 |
||||||
|
utfMaskIn = 128 // 1 0 0 0 0 0 0 0 |
||||||
|
lastSixBits = 63 // 0 0 1 1 1 1 1 1 |
||||||
|
} |
Loading…
Reference in new issue