Skip to content

Commit

Permalink
Tried adding SQFC decompression (doesn't work)
Browse files Browse the repository at this point in the history
  • Loading branch information
dedmen committed Feb 13, 2024
1 parent f2ea896 commit b4b7f64
Show file tree
Hide file tree
Showing 3 changed files with 341 additions and 3 deletions.
249 changes: 249 additions & 0 deletions src/lzokay_stream.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,249 @@
// I hate this, but I need lzokay reading from a stream
// this is 99% code from lzokay library from lib folder, just with std::istream used as input

#include <istream>

#include "lzokay.hpp"
using namespace lzokay;


#define NEEDS_IN(count) { \
auto curTell = (std::streamoff)inp.tellg(); \
if (curTell + (count) > inp_end) { \
dst_size = outp - dst; \
return EResult::InputOverrun; \
}}

#define NEEDS_OUT(count) \
if (outp + (count) > outp_end) { \
dst_size = outp - dst; \
return EResult::OutputOverrun; \
}

#define CONSUME_ZERO_BYTE_LENGTH \
std::size_t offset; \
{ \
auto old_inp = inp.tellg(); \
while (inp.peek() == 0) inp.get(); \
offset = inp.tellg() - old_inp; \
if (offset > Max255Count) { \
dst_size = outp - dst; \
return EResult::Error; \
} \
}

constexpr uint32_t M1Marker = 0x0;
constexpr uint32_t M2Marker = 0x40;
constexpr uint32_t M3Marker = 0x20;
constexpr uint32_t M4Marker = 0x10;

constexpr std::size_t Max255Count = std::size_t(~0) / 255 - 2;

static uint16_t get_le16(std::istream& p) {
uint16_t result;
p.read((char*) &result, 2);
return result;
}


EResult decompressStream(std::istream& src, std::size_t src_size,
uint8_t* dst, std::size_t init_dst_size,
std::size_t& dst_size) {
dst_size = init_dst_size;

if (src_size < 3) {
dst_size = 0;
return EResult::InputOverrun;
}

std::istream& inp = src;
size_t inp_end = src_size;
uint8_t* outp = dst;
uint8_t* outp_end = dst + dst_size;
uint8_t* lbcur;
std::size_t lblen;
std::size_t state = 0;
std::size_t nstate = 0;

/* First byte encoding */
if (inp.peek() >= 22) {
/* 22..255 : copy literal string
* length = (byte - 17) = 4..238
* state = 4 [ don't copy extra literals ]
* skip byte
*/
std::size_t len = inp.get() - uint8_t(17);
NEEDS_IN(len)
NEEDS_OUT(len)
for (std::size_t i = 0; i < len; ++i)
*outp++ = inp.get();
state = 4;
} else if (inp.peek() >= 18) {
/* 18..21 : copy 0..3 literals
* state = (byte - 17) = 0..3 [ copy <state> literals ]
* skip byte
*/
nstate = inp.get() - uint8_t(17);
state = nstate;
NEEDS_IN(nstate)
NEEDS_OUT(nstate)
for (std::size_t i = 0; i < nstate; ++i)
*outp++ = inp.get();
}
/* 0..17 : follow regular instruction encoding, see below. It is worth
* noting that codes 16 and 17 will represent a block copy from
* the dictionary which is empty, and that they will always be
* invalid at this place.
*/

while (true) {
NEEDS_IN(1)
uint8_t inst = inp.get();
if (inst & 0xC0) {
/* [M2]
* 1 L L D D D S S (128..255)
* Copy 5-8 bytes from block within 2kB distance
* state = S (copy S literals after this block)
* length = 5 + L
* Always followed by exactly one byte : H H H H H H H H
* distance = (H << 3) + D + 1
*
* 0 1 L D D D S S (64..127)
* Copy 3-4 bytes from block within 2kB distance
* state = S (copy S literals after this block)
* length = 3 + L
* Always followed by exactly one byte : H H H H H H H H
* distance = (H << 3) + D + 1
*/
NEEDS_IN(1)
lbcur = outp - ((inp.get() << 3) + ((inst >> 2) & 0x7) + 1);
lblen = std::size_t(inst >> 5) + 1;
nstate = inst & uint8_t(0x3);
} else if (inst & M3Marker) {
/* [M3]
* 0 0 1 L L L L L (32..63)
* Copy of small block within 16kB distance (preferably less than 34B)
* length = 2 + (L ?: 31 + (zero_bytes * 255) + non_zero_byte)
* Always followed by exactly one LE16 : D D D D D D D D : D D D D D D S S
* distance = D + 1
* state = S (copy S literals after this block)
*/
lblen = std::size_t(inst & uint8_t(0x1f)) + 2;
if (lblen == 2) {
CONSUME_ZERO_BYTE_LENGTH
NEEDS_IN(1)
lblen += offset * 255 + 31 + inp.get();
}
NEEDS_IN(2)
nstate = get_le16(inp);
//inp += 2;
lbcur = outp - ((nstate >> 2) + 1);
nstate &= 0x3;
} else if (inst & M4Marker) {
/* [M4]
* 0 0 0 1 H L L L (16..31)
* Copy of a block within 16..48kB distance (preferably less than 10B)
* length = 2 + (L ?: 7 + (zero_bytes * 255) + non_zero_byte)
* Always followed by exactly one LE16 : D D D D D D D D : D D D D D D S S
* distance = 16384 + (H << 14) + D
* state = S (copy S literals after this block)
* End of stream is reached if distance == 16384
*/
lblen = std::size_t(inst & uint8_t(0x7)) + 2;
if (lblen == 2) {
CONSUME_ZERO_BYTE_LENGTH
NEEDS_IN(1)
lblen += offset * 255 + 7 + inp.get();
}
NEEDS_IN(2)
nstate = get_le16(inp);
//inp += 2;
lbcur = outp - (((inst & 0x8) << 11) + (nstate >> 2));
nstate &= 0x3;
if (lbcur == outp)
break; /* Stream finished */
lbcur -= 16384;
} else {
/* [M1] Depends on the number of literals copied by the last instruction. */
if (state == 0) {
/* If last instruction did not copy any literal (state == 0), this
* encoding will be a copy of 4 or more literal, and must be interpreted
* like this :
*
* 0 0 0 0 L L L L (0..15) : copy long literal string
* length = 3 + (L ?: 15 + (zero_bytes * 255) + non_zero_byte)
* state = 4 (no extra literals are copied)
*/
std::size_t len = inst + 3;
if (len == 3) {
CONSUME_ZERO_BYTE_LENGTH
NEEDS_IN(1)
len += offset * 255 + 15 + inp.get();
}
/* copy_literal_run */
NEEDS_IN(len)
NEEDS_OUT(len)
for (std::size_t i = 0; i < len; ++i)
*outp++ = inp.get();
state = 4;
continue;
} else if (state != 4) {
/* If last instruction used to copy between 1 to 3 literals (encoded in
* the instruction's opcode or distance), the instruction is a copy of a
* 2-byte block from the dictionary within a 1kB distance. It is worth
* noting that this instruction provides little savings since it uses 2
* bytes to encode a copy of 2 other bytes but it encodes the number of
* following literals for free. It must be interpreted like this :
*
* 0 0 0 0 D D S S (0..15) : copy 2 bytes from <= 1kB distance
* length = 2
* state = S (copy S literals after this block)
* Always followed by exactly one byte : H H H H H H H H
* distance = (H << 2) + D + 1
*/
NEEDS_IN(1)
nstate = inst & uint8_t(0x3);
lbcur = outp - ((inst >> 2) + (inp.get() << 2) + 1);
lblen = 2;
} else {
/* If last instruction used to copy 4 or more literals (as detected by
* state == 4), the instruction becomes a copy of a 3-byte block from the
* dictionary from a 2..3kB distance, and must be interpreted like this :
*
* 0 0 0 0 D D S S (0..15) : copy 3 bytes from 2..3 kB distance
* length = 3
* state = S (copy S literals after this block)
* Always followed by exactly one byte : H H H H H H H H
* distance = (H << 2) + D + 2049
*/
NEEDS_IN(1)
nstate = inst & uint8_t(0x3);
lbcur = outp - ((inst >> 2) + (inp.get() << 2) + 2049);
lblen = 3;
}
}
if (lbcur < dst) {
dst_size = outp - dst;
return EResult::LookbehindOverrun;
}
NEEDS_IN(nstate)
NEEDS_OUT(lblen + nstate)
/* Copy lookbehind */
for (std::size_t i = 0; i < lblen; ++i)
*outp++ = *lbcur++;
state = nstate;
/* Copy literal */
for (std::size_t i = 0; i < nstate; ++i)
*outp++ = inp.get();
}

dst_size = outp - dst;
if (lblen != 3) /* Ensure terminating M4 was encountered */
return EResult::Error;
if (inp.tellg() == inp_end)
return EResult::Success;
else if (inp.tellg() < inp_end)
return EResult::InputNotConsumed;
else
return EResult::InputOverrun;
}
19 changes: 19 additions & 0 deletions src/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -137,10 +137,29 @@ void processFile(ScriptCompiler& comp, std::filesystem::path path) {
catch (std::runtime_error& err) {

}
}

void DecompressSQFC(std::filesystem::path inputPath, std::filesystem::path outputPath)
{
// To use this, also need to edit ScriptSerializer::compiledToBinary and disable compressed serialization

std::ifstream inputFile(inputPath, std::ifstream::binary);
auto compiledData = ScriptSerializer::binaryToCompiled(inputFile);



std::stringstream output(std::stringstream::binary | std::stringstream::out);
ScriptSerializer::compiledToBinary(compiledData, output);

auto data = output.str();
auto encoded = data; //base64_encode(data);
std::ofstream outputFile(outputPath, std::ofstream::binary);

outputFile.write(encoded.data(), encoded.length());
outputFile.flush();
}


int main(int argc, char* argv[]) {

if (std::filesystem::exists("sqfc.lua")) {
Expand Down
76 changes: 73 additions & 3 deletions src/scriptSerializer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include <functional>
#include <zstd.h>
#include <sstream>
#include <strstream>

static constexpr const int compressionLevel = 22;

Expand Down Expand Up @@ -205,7 +206,7 @@ void ScriptSerializer::compiledToBinary(const CompiledCodeData& code, std::ostre
__debugbreak();

writeT(static_cast<uint32_t>(bufferContent.size()), output); // uncompressed size
writeT(static_cast<uint8_t>(2), output); // compression method, always 2
writeT(static_cast<uint8_t>(2), output); // compression method, always 2 (That is RV engine stuff)
output.write((const char*)compressed.get(), compressed_size);
} else {
writeT(static_cast<uint8_t>(SerializedBlockType::constant), output);
Expand All @@ -227,6 +228,58 @@ void ScriptSerializer::compiledToBinary(const CompiledCodeData& code, std::ostre
output.flush();
}

// lzokay_stream.cpp
extern lzokay::EResult decompressStream(std::istream& src, std::size_t src_size,
uint8_t* dst, std::size_t init_dst_size,
std::size_t& dst_size);

struct DecompressedData
{
DecompressedData(std::vector<uint8_t>&& data):
uncompressedData(std::move(data)),
buf(uncompressedData.data(), uncompressedData.size()),
stream(&buf)
{}

std::vector<uint8_t> uncompressedData;
std::strstreambuf buf;
std::istream stream;
};

//#TODO make the compress write also a method

// This is shit code, it depends on RVO being applied otherwise the streambuf pointer would change
DecompressedData DecompressFromStream(std::istream& input)
{
size_t uncompressedSize = readT<uint32_t>(input);
readT<uint8_t>(input); // compression method, always 2

// compressed buffer

std::vector<uint8_t> uncompressedData;
uncompressedData.resize(uncompressedSize);

const auto pos = input.tellg();
input.seekg(0, SEEK_END);
const std::streamoff size = input.tellg(); // -pos;
input.seekg(pos, SEEK_SET);

//if (pos == 0x2bc && size == 3413)
//{
// std::vector<uint8_t> compData;
// compData.resize(size);
// input.read((char*)compData.data(), size);
// auto res = lzokay::decompress(compData.data(), size - pos, uncompressedData.data(), uncompressedSize, uncompressedSize);
//}

const auto error = decompressStream(input, size, uncompressedData.data(), uncompressedSize, uncompressedSize);
if (error < lzokay::EResult::Success)
__debugbreak();

return { std::move(uncompressedData) };
}


CompiledCodeData ScriptSerializer::binaryToCompiled(std::istream& input) {
CompiledCodeData output;
output.version = readT<uint32_t>(input);
Expand All @@ -241,6 +294,10 @@ CompiledCodeData ScriptSerializer::binaryToCompiled(std::istream& input) {
case SerializedBlockType::constant: {
readConstants(output, input);
} break;
case SerializedBlockType::constantCompressed: {
auto decompressedData = DecompressFromStream(input);
readConstants(output, decompressedData.stream);
} break;
case SerializedBlockType::locationInfo: {
auto locCount = readT<uint16_t>(input);

Expand All @@ -251,6 +308,19 @@ CompiledCodeData ScriptSerializer::binaryToCompiled(std::istream& input) {
case SerializedBlockType::code: {
output.codeIndex = readT<uint64_t>(input);
} break;
case SerializedBlockType::codeDebug: {
__debugbreak(); // not implemented
} break;
case SerializedBlockType::commandNameDirectory: {
// Compressed buffer

auto decompressedData = DecompressFromStream(input);

auto numCommandNames = readT<uint16_t>(decompressedData.stream);
output.commandNameDirectory.reserve(numCommandNames);
for (int i = 0; i < numCommandNames; ++i)
output.commandNameDirectory.emplace_back(readString(decompressedData.stream));
} break;
}
}
return output;
Expand Down Expand Up @@ -639,8 +709,8 @@ std::vector<char> ScriptSerializer::decompressDataDictionary(const std::vector<c
}

STRINGTYPE ScriptSerializer::readString(std::istream& input) {
uint32_t length;
input.read(reinterpret_cast<char*>(&length) + 1, 3);
uint32_t length{}; // length is actually uint24
input.read(reinterpret_cast<char*>(&length), 3);


if constexpr (std::is_same_v<STRINGTYPE, std::string>) {
Expand Down

0 comments on commit b4b7f64

Please sign in to comment.