Files
wc3re/decompress.cc

258 lines
6.3 KiB
C++

#include <unordered_map>
#include <algorithm>
#include "common.hh"
#include "compiler.hh"
#include "util.hh"
#include "decompress.hh"
std::vector<uint8_t> decompressLZ(uint8_t const* data, size_t len, size_t retlen_hint)
{
std::vector<uint8_t> ret;
if (retlen_hint)
ret.reserve(retlen_hint);
size_t pos = 0;
while (pos < len) {
uint8_t b = *(data+pos++);
if ((b&0xe0) != 0xe0) {
unsigned size = 0, replSize = 0;
unsigned replOfs = 0;
if (!(b&0x80)) {
if (pos >= len)
throw FormatException{"Compressed stream overrun"};
uint8_t ofs = *(data+pos++);
size = b&0x3;
replSize = ((b&0x1c)>>2) + 3;
replOfs = ((b&0x60)<<3)+ofs+1;
} else if (!(b&0x40)) {
if (pos+1 >= len)
throw FormatException{"Compressed stream overrun"};
uint8_t b1 = *(data+pos++);
uint8_t b2 = *(data+pos++);
size = (b1&0xc0)>>6;
replSize = (b&0x3f)+4;
replOfs = ((b1&0x3f)<<8)+b2+1;
} else if (!(b&0x20)) {
if (pos+2 >= len)
throw FormatException{"Compressed stream overrun"};
uint8_t b1 = *(data+pos++);
uint8_t b2 = *(data+pos++);
uint8_t b3 = *(data+pos++);
size = b&0x3;
replSize = b3+5+((b&0xc)<<6);
replOfs = ((b&0x10)<<12)+1+(b1<<8)+b2;
}
if (pos+size >= len)
throw FormatException{"Compressed stream overrun"};
std::copy(data+pos, data+pos+size,
std::back_inserter(ret));
pos += size;
if (replOfs > ret.size())
throw FormatException{"Replication offset exceeds buffer"};
unsigned start = ret.size()-replOfs;
for (unsigned i = 0;i < replSize;++i)
ret.push_back(ret[start+i]);
} else {
unsigned size = (b&0x1f)*4+4;
if (size > 0x70) {
if (pos+(b&0x3) > len)
throw FormatException{"Compressed stream overrun"};
std::copy(data+pos, data+pos+(b&0x3),
std::back_inserter(ret));
pos += (b&0x3);
#ifndef NDEBUG
if (pos < len)
printf("%lu unparsed bytes in compressed data\n", len-pos);
#endif
break;
}
if (pos+size > len)
throw FormatException{"Compressed stream overrun"};
std::copy(data+pos, data+pos+size,
std::back_inserter(ret));
pos += size;
}
}
return ret;
}
size_t decompressLZInto(uint8_t const* RESTRICT data, size_t len, uint8_t * RESTRICT out, size_t maxOut)
{
size_t pos = 0, outPos = 0;
while (pos < len) {
uint8_t b = *(data+pos++);
if (!((b&0xe0)==0xe0)) {
unsigned size = 0, replSize = 0;
unsigned replOfs = 0;
if (!(b&0x80)) {
if (pos >= len)
throw FormatException{"Compressed stream overrun"};
uint8_t ofs = *(data+pos++);
size = b&0x3;
replSize = ((b&0x1c)>>2) + 3;
replOfs = ((b&0x60)<<3)+ofs+1;
} else if (!(b&0x40)) {
if (pos+1 >= len)
throw FormatException{"Compressed stream overrun"};
uint8_t b1 = *(data+pos++);
uint8_t b2 = *(data+pos++);
size = (b1&0xc0)>>6;
replSize = (b&0x3f)+4;
replOfs = ((b1&0x3f)<<8)+b2+1;
} else if (!(b&0x20)) {
if (pos+2 >= len)
throw FormatException{"Compressed stream overrun"};
uint8_t b1 = *(data+pos++);
uint8_t b2 = *(data+pos++);
uint8_t b3 = *(data+pos++);
size = b&0x3;
replSize = b3+5+((b&0xc)<<6);
replOfs = ((b&0x10)<<12)+1+(b1<<8)+b2;
}
if (pos+size >= len)
throw FormatException{"Compressed stream overrun"};
if (outPos+size > maxOut)
throw Exception{"Output buffer overrun"};
std::copy(data+pos, data+pos+size,
out+outPos);
pos += size;
outPos += size;
if (replOfs > outPos)
throw FormatException{"Replication offset exceeds buffer"};
if (outPos+replSize > maxOut)
throw Exception{"Output buffer overrun"};
unsigned start = outPos-replOfs;
for (unsigned i = 0;i < replSize;++i)
out[outPos++] = out[start+i];
} else {
unsigned size = (b&0x1f)*4+4;
if (size > 0x70) {
if (pos+(b&0x3) > len)
throw FormatException{"Compressed stream overrun"};
if (outPos+(b&0x03) > maxOut)
throw Exception{"Output buffer overrun"};
std::copy(data+pos, data+pos+(b&0x3),
out+outPos);
pos += (b&0x3);
outPos += (b&0x3);
#ifndef NDEBUG
if (pos < len)
printf("%lu unparsed bytes in compressed data\n", len-pos);
#endif
break;
}
if (pos+size > len)
throw FormatException{"Compressed stream overrun"};
if (outPos+size > maxOut)
throw Exception{"Output buffer overrun"};
std::copy(data+pos, data+pos+size,
out+outPos);
pos += size;
outPos += size;
}
}
return outPos;
}
std::vector<uint8_t> decompressLZW(uint8_t const* data, size_t len, size_t retlen_hint)
{
const unsigned DICT_OFS = 0x102;
BitReader bitReader(data, len);
std::vector<uint8_t> ret;
if (retlen_hint)
ret.reserve(retlen_hint);
std::vector<std::vector<uint8_t> > dict;
std::vector<uint8_t> cur_string, to_dict;
unsigned code_width = 9;
unsigned dict_head = DICT_OFS;
while (true) {
unsigned bits = bitReader.getBits(code_width);
#ifdef LZWDEBUG
printf("Code: %x\n", bits);
#endif
if (bits == 0x101) { // termination code
break;
} else if (bits == 0x100) { // Restart code
code_width = 9;
dict_head = DICT_OFS;
dict.clear();
cur_string.clear();
continue;
}
// Data
if (bits <= 0xff) {
#ifdef LZWDEBUG
printf("Verbatim: %x\n", bits);
#endif
if (!cur_string.empty()) {
to_dict = std::move(cur_string);
to_dict.push_back(bits);
}
cur_string = {static_cast<uint8_t>(bits)};
} else if (bits < dict_head) {
#ifdef LZWDEBUG
printf("Dict: %x: ", bits);
std::for_each(dict[bits-DICT_OFS].begin(), dict[bits-DICT_OFS].end(),
[](uint8_t const& d) { printf("%.2x ", d); });
printf("\n");
#endif
to_dict = std::move(cur_string);
to_dict.push_back(dict[bits-DICT_OFS][0]);
cur_string = dict[bits-DICT_OFS];
} else {
if (bits != dict_head)
throw Exception("WTF?! " + std::to_string(dict_head));
cur_string.push_back(cur_string[0]);
#ifdef LZWDEBUG
printf("Dict-unknown: %x: ", bits);
std::for_each(cur_string.begin(), cur_string.end(),
[](uint8_t const& d) { printf("%.2x ", d); });
printf("\n");
#endif
to_dict = cur_string;
}
std::copy(cur_string.begin(), cur_string.end(),
std::back_inserter(ret));
if (!to_dict.empty()) {
dict.emplace_back(std::move(to_dict));
++dict_head;
}
if (dict_head>>code_width) {
if (code_width < 12)
++code_width;
}
}
return ret;
}