258 lines
6.3 KiB
C++
258 lines
6.3 KiB
C++
#include <unordered_map>
|
|
#include <algorithm>
|
|
|
|
#include "common.hh"
|
|
#include "compiler.hh"
|
|
#include "util.hh"
|
|
#include "decompress.hh"
|
|
|
|
std::vector<uint8_t> decompressLZ(uint8_t const* data, size_t len, size_t retlen_hint)
|
|
{
|
|
std::vector<uint8_t> ret;
|
|
if (retlen_hint)
|
|
ret.reserve(retlen_hint);
|
|
|
|
size_t pos = 0;
|
|
while (pos < len) {
|
|
uint8_t b = *(data+pos++);
|
|
if ((b&0xe0) != 0xe0) {
|
|
unsigned size = 0, replSize = 0;
|
|
unsigned replOfs = 0;
|
|
if (!(b&0x80)) {
|
|
if (pos >= len)
|
|
throw FormatException{"Compressed stream overrun"};
|
|
|
|
uint8_t ofs = *(data+pos++);
|
|
size = b&0x3;
|
|
|
|
replSize = ((b&0x1c)>>2) + 3;
|
|
replOfs = ((b&0x60)<<3)+ofs+1;
|
|
} else if (!(b&0x40)) {
|
|
if (pos+1 >= len)
|
|
throw FormatException{"Compressed stream overrun"};
|
|
|
|
uint8_t b1 = *(data+pos++);
|
|
uint8_t b2 = *(data+pos++);
|
|
|
|
size = (b1&0xc0)>>6;
|
|
|
|
replSize = (b&0x3f)+4;
|
|
replOfs = ((b1&0x3f)<<8)+b2+1;
|
|
} else if (!(b&0x20)) {
|
|
if (pos+2 >= len)
|
|
throw FormatException{"Compressed stream overrun"};
|
|
|
|
uint8_t b1 = *(data+pos++);
|
|
uint8_t b2 = *(data+pos++);
|
|
uint8_t b3 = *(data+pos++);
|
|
|
|
size = b&0x3;
|
|
|
|
replSize = b3+5+((b&0xc)<<6);
|
|
replOfs = ((b&0x10)<<12)+1+(b1<<8)+b2;
|
|
}
|
|
if (pos+size >= len)
|
|
throw FormatException{"Compressed stream overrun"};
|
|
std::copy(data+pos, data+pos+size,
|
|
std::back_inserter(ret));
|
|
pos += size;
|
|
|
|
if (replOfs > ret.size())
|
|
throw FormatException{"Replication offset exceeds buffer"};
|
|
unsigned start = ret.size()-replOfs;
|
|
for (unsigned i = 0;i < replSize;++i)
|
|
ret.push_back(ret[start+i]);
|
|
} else {
|
|
unsigned size = (b&0x1f)*4+4;
|
|
if (size > 0x70) {
|
|
if (pos+(b&0x3) > len)
|
|
throw FormatException{"Compressed stream overrun"};
|
|
std::copy(data+pos, data+pos+(b&0x3),
|
|
std::back_inserter(ret));
|
|
pos += (b&0x3);
|
|
#ifndef NDEBUG
|
|
if (pos < len)
|
|
printf("%lu unparsed bytes in compressed data\n", len-pos);
|
|
#endif
|
|
break;
|
|
}
|
|
|
|
if (pos+size > len)
|
|
throw FormatException{"Compressed stream overrun"};
|
|
std::copy(data+pos, data+pos+size,
|
|
std::back_inserter(ret));
|
|
pos += size;
|
|
}
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
size_t decompressLZInto(uint8_t const* RESTRICT data, size_t len, uint8_t * RESTRICT out, size_t maxOut)
|
|
{
|
|
size_t pos = 0, outPos = 0;
|
|
while (pos < len) {
|
|
uint8_t b = *(data+pos++);
|
|
if (!((b&0xe0)==0xe0)) {
|
|
unsigned size = 0, replSize = 0;
|
|
unsigned replOfs = 0;
|
|
if (!(b&0x80)) {
|
|
if (pos >= len)
|
|
throw FormatException{"Compressed stream overrun"};
|
|
|
|
uint8_t ofs = *(data+pos++);
|
|
size = b&0x3;
|
|
|
|
replSize = ((b&0x1c)>>2) + 3;
|
|
replOfs = ((b&0x60)<<3)+ofs+1;
|
|
} else if (!(b&0x40)) {
|
|
if (pos+1 >= len)
|
|
throw FormatException{"Compressed stream overrun"};
|
|
|
|
uint8_t b1 = *(data+pos++);
|
|
uint8_t b2 = *(data+pos++);
|
|
|
|
size = (b1&0xc0)>>6;
|
|
|
|
replSize = (b&0x3f)+4;
|
|
replOfs = ((b1&0x3f)<<8)+b2+1;
|
|
} else if (!(b&0x20)) {
|
|
if (pos+2 >= len)
|
|
throw FormatException{"Compressed stream overrun"};
|
|
|
|
uint8_t b1 = *(data+pos++);
|
|
uint8_t b2 = *(data+pos++);
|
|
uint8_t b3 = *(data+pos++);
|
|
|
|
size = b&0x3;
|
|
|
|
replSize = b3+5+((b&0xc)<<6);
|
|
replOfs = ((b&0x10)<<12)+1+(b1<<8)+b2;
|
|
}
|
|
if (pos+size >= len)
|
|
throw FormatException{"Compressed stream overrun"};
|
|
if (outPos+size > maxOut)
|
|
throw Exception{"Output buffer overrun"};
|
|
std::copy(data+pos, data+pos+size,
|
|
out+outPos);
|
|
pos += size;
|
|
outPos += size;
|
|
|
|
if (replOfs > outPos)
|
|
throw FormatException{"Replication offset exceeds buffer"};
|
|
if (outPos+replSize > maxOut)
|
|
throw Exception{"Output buffer overrun"};
|
|
unsigned start = outPos-replOfs;
|
|
for (unsigned i = 0;i < replSize;++i)
|
|
out[outPos++] = out[start+i];
|
|
} else {
|
|
unsigned size = (b&0x1f)*4+4;
|
|
if (size > 0x70) {
|
|
if (pos+(b&0x3) > len)
|
|
throw FormatException{"Compressed stream overrun"};
|
|
if (outPos+(b&0x03) > maxOut)
|
|
throw Exception{"Output buffer overrun"};
|
|
std::copy(data+pos, data+pos+(b&0x3),
|
|
out+outPos);
|
|
pos += (b&0x3);
|
|
outPos += (b&0x3);
|
|
#ifndef NDEBUG
|
|
if (pos < len)
|
|
printf("%lu unparsed bytes in compressed data\n", len-pos);
|
|
#endif
|
|
break;
|
|
}
|
|
|
|
if (pos+size > len)
|
|
throw FormatException{"Compressed stream overrun"};
|
|
if (outPos+size > maxOut)
|
|
throw Exception{"Output buffer overrun"};
|
|
std::copy(data+pos, data+pos+size,
|
|
out+outPos);
|
|
pos += size;
|
|
outPos += size;
|
|
}
|
|
}
|
|
|
|
return outPos;
|
|
}
|
|
|
|
|
|
std::vector<uint8_t> decompressLZW(uint8_t const* data, size_t len, size_t retlen_hint)
|
|
{
|
|
const unsigned DICT_OFS = 0x102;
|
|
BitReader bitReader(data, len);
|
|
std::vector<uint8_t> ret;
|
|
if (retlen_hint)
|
|
ret.reserve(retlen_hint);
|
|
|
|
std::vector<std::vector<uint8_t> > dict;
|
|
std::vector<uint8_t> cur_string, to_dict;
|
|
unsigned code_width = 9;
|
|
unsigned dict_head = DICT_OFS;
|
|
|
|
while (true) {
|
|
unsigned bits = bitReader.getBits(code_width);
|
|
#ifdef LZWDEBUG
|
|
printf("Code: %x\n", bits);
|
|
#endif
|
|
|
|
if (bits == 0x101) { // termination code
|
|
break;
|
|
} else if (bits == 0x100) { // Restart code
|
|
code_width = 9;
|
|
dict_head = DICT_OFS;
|
|
dict.clear();
|
|
cur_string.clear();
|
|
|
|
continue;
|
|
}
|
|
|
|
// Data
|
|
if (bits <= 0xff) {
|
|
#ifdef LZWDEBUG
|
|
printf("Verbatim: %x\n", bits);
|
|
#endif
|
|
if (!cur_string.empty()) {
|
|
to_dict = std::move(cur_string);
|
|
to_dict.push_back(bits);
|
|
}
|
|
cur_string = {static_cast<uint8_t>(bits)};
|
|
} else if (bits < dict_head) {
|
|
#ifdef LZWDEBUG
|
|
printf("Dict: %x: ", bits);
|
|
std::for_each(dict[bits-DICT_OFS].begin(), dict[bits-DICT_OFS].end(),
|
|
[](uint8_t const& d) { printf("%.2x ", d); });
|
|
printf("\n");
|
|
#endif
|
|
to_dict = std::move(cur_string);
|
|
to_dict.push_back(dict[bits-DICT_OFS][0]);
|
|
cur_string = dict[bits-DICT_OFS];
|
|
} else {
|
|
if (bits != dict_head)
|
|
throw Exception("WTF?! " + std::to_string(dict_head));
|
|
cur_string.push_back(cur_string[0]);
|
|
#ifdef LZWDEBUG
|
|
printf("Dict-unknown: %x: ", bits);
|
|
std::for_each(cur_string.begin(), cur_string.end(),
|
|
[](uint8_t const& d) { printf("%.2x ", d); });
|
|
printf("\n");
|
|
#endif
|
|
to_dict = cur_string;
|
|
}
|
|
|
|
std::copy(cur_string.begin(), cur_string.end(),
|
|
std::back_inserter(ret));
|
|
if (!to_dict.empty()) {
|
|
dict.emplace_back(std::move(to_dict));
|
|
++dict_head;
|
|
}
|
|
|
|
if (dict_head>>code_width) {
|
|
if (code_width < 12)
|
|
++code_width;
|
|
}
|
|
}
|
|
|
|
return ret;
|
|
}
|