Added BitReader to util; Implement LZW decompression
This commit is contained in:
@@ -118,9 +118,12 @@ IffFile::Form::Form(std::string type, uint8_t const* base, size_t length)
|
||||
if (length < 4)
|
||||
throw FormatException{"length < subtype id length"};
|
||||
|
||||
for (unsigned i = 0;i < 4;++i)
|
||||
for (unsigned i = 0;i < 4;++i) {
|
||||
if ((i > 0) || (base[i] == '\0'))
|
||||
break;
|
||||
if (!isprint(base[i]))
|
||||
throw FormatException{"Subtype not printable"};
|
||||
}
|
||||
subtype_ = std::string(reinterpret_cast<char const*>(base), 4);
|
||||
|
||||
size_t pos = 4;
|
||||
|
||||
@@ -157,30 +157,15 @@ size_t parseHuff_(uint8_t const* data, size_t len, OutputIt commands, size_t max
|
||||
printf("\n");
|
||||
#endif
|
||||
|
||||
int byteValid = 0;
|
||||
uint8_t byteBuf = 0;
|
||||
unsigned bytePos = 45;
|
||||
BitReader bitReader(data+45, len-45);
|
||||
unsigned huffIdx = huffTree.size();
|
||||
size_t count = 0;
|
||||
while (huffIdx != 22) {
|
||||
// Read next bit
|
||||
unsigned bit;
|
||||
if (byteValid) {
|
||||
bit = byteBuf&0x1;
|
||||
byteBuf >>= 1;
|
||||
--byteValid;
|
||||
} else {
|
||||
if (bytePos >= len)
|
||||
throw FormatException{"Huffman stream overrun"};
|
||||
byteBuf = data[bytePos++];
|
||||
bit = byteBuf&0x1;
|
||||
byteBuf >>= 1;
|
||||
byteValid = 7;
|
||||
}
|
||||
unsigned bit = bitReader.getBit();
|
||||
|
||||
huffIdx = huffTree.at(huffIdx-(bit?1:23));
|
||||
|
||||
|
||||
if (huffIdx < 22) {
|
||||
*commands++ = huffIdx;
|
||||
if (++count >= maxOut)
|
||||
|
||||
14
TreFile.cc
14
TreFile.cc
@@ -375,16 +375,22 @@ TreFile::Object TreFile::openIdx_(size_t table3Idx) const
|
||||
|
||||
if (flags&0x80) {
|
||||
if (flags&0x40) {
|
||||
auto dec = decompressLZ(base_+dataPtr, clength);
|
||||
auto dec = decompressLZ(base_+dataPtr, clength, length);
|
||||
#ifndef NDEBUG
|
||||
if (dec.size() != length)
|
||||
printf("WARNING: Decompressed size != expected (%lu, %u)\n", dec.size(), length);
|
||||
#endif
|
||||
return Object(std::move(dec));
|
||||
} else
|
||||
throw Exception{"Compression type 0 NYI"};
|
||||
} else {
|
||||
return Object(base_+dataPtr, length);
|
||||
auto dec = decompressLZW(base_+dataPtr, clength, length);
|
||||
#ifndef NDEBUG
|
||||
if (dec.size() != length)
|
||||
printf("WARNING: Decompressed size != expected (%lu, %u)\n", dec.size(), length);
|
||||
#endif
|
||||
return Object(std::move(dec));
|
||||
}
|
||||
} else {
|
||||
return Object(base_+dataPtr, clength);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,11 +1,16 @@
|
||||
#include <unordered_map>
|
||||
#include <algorithm>
|
||||
|
||||
#include "common.hh"
|
||||
#include "compiler.hh"
|
||||
|
||||
#include "util.hh"
|
||||
#include "decompress.hh"
|
||||
|
||||
std::vector<uint8_t> decompressLZ(uint8_t const* data, size_t len)
|
||||
std::vector<uint8_t> decompressLZ(uint8_t const* data, size_t len, size_t retlen_hint)
|
||||
{
|
||||
std::vector<uint8_t> ret;
|
||||
if (retlen_hint)
|
||||
ret.reserve(retlen_hint);
|
||||
|
||||
size_t pos = 0;
|
||||
while (pos < len) {
|
||||
@@ -170,3 +175,83 @@ size_t decompressLZInto(uint8_t const* RESTRICT data, size_t len, uint8_t * REST
|
||||
|
||||
return outPos;
|
||||
}
|
||||
|
||||
|
||||
std::vector<uint8_t> decompressLZW(uint8_t const* data, size_t len, size_t retlen_hint)
|
||||
{
|
||||
const unsigned DICT_OFS = 0x102;
|
||||
BitReader bitReader(data, len);
|
||||
std::vector<uint8_t> ret;
|
||||
if (retlen_hint)
|
||||
ret.reserve(retlen_hint);
|
||||
|
||||
std::vector<std::vector<uint8_t> > dict;
|
||||
std::vector<uint8_t> cur_string, to_dict;
|
||||
unsigned code_width = 9;
|
||||
unsigned dict_head = DICT_OFS;
|
||||
|
||||
while (true) {
|
||||
unsigned bits = bitReader.getBits(code_width);
|
||||
#ifdef LZWDEBUG
|
||||
printf("Code: %x\n", bits);
|
||||
#endif
|
||||
|
||||
if (bits == 0x101) { // termination code
|
||||
break;
|
||||
} else if (bits == 0x100) { // Restart code
|
||||
code_width = 9;
|
||||
dict_head = DICT_OFS;
|
||||
dict.clear();
|
||||
cur_string.clear();
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
// Data
|
||||
if (bits <= 0xff) {
|
||||
#ifdef LZWDEBUG
|
||||
printf("Verbatim: %x\n", bits);
|
||||
#endif
|
||||
if (!cur_string.empty()) {
|
||||
to_dict = cur_string;
|
||||
to_dict.push_back(bits);
|
||||
}
|
||||
cur_string = {static_cast<uint8_t>(bits)};
|
||||
} else if (bits < dict_head) {
|
||||
#ifdef LZWDEBUG
|
||||
printf("Dict: %x: ", bits);
|
||||
std::for_each(dict[bits-DICT_OFS].begin(), dict[bits-DICT_OFS].end(),
|
||||
[](uint8_t const& d) { printf("%.2x ", d); });
|
||||
printf("\n");
|
||||
#endif
|
||||
to_dict = cur_string;
|
||||
to_dict.push_back(dict[bits-DICT_OFS][0]);
|
||||
cur_string = dict[bits-DICT_OFS];
|
||||
} else {
|
||||
if (bits != dict_head)
|
||||
throw Exception("WTF?! " + std::to_string(dict_head));
|
||||
cur_string.push_back(cur_string[0]);
|
||||
#ifdef LZWDEBUG
|
||||
printf("Dict-unknown: %x: ", bits);
|
||||
std::for_each(cur_string.begin(), cur_string.end(),
|
||||
[](uint8_t const& d) { printf("%.2x ", d); });
|
||||
printf("\n");
|
||||
#endif
|
||||
to_dict = cur_string;
|
||||
}
|
||||
|
||||
std::copy(cur_string.begin(), cur_string.end(),
|
||||
std::back_inserter(ret));
|
||||
if (!to_dict.empty()) {
|
||||
dict.emplace_back(std::move(to_dict));
|
||||
++dict_head;
|
||||
}
|
||||
|
||||
if (dict_head>>code_width) {
|
||||
if (code_width < 12)
|
||||
++code_width;
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -11,9 +11,14 @@
|
||||
and/or replication of previously output data */
|
||||
|
||||
/* Decompress compressed data in 'data', return decompressed data */
|
||||
std::vector<uint8_t> decompressLZ(uint8_t const* data, size_t len);
|
||||
std::vector<uint8_t> decompressLZ(uint8_t const* data, size_t len, size_t retlen_hint = 0);
|
||||
/* Decompress compressed data in 'data' into pre-allocated buffer 'out' of size 'maxOut',
|
||||
return size of decompressed data */
|
||||
size_t decompressLZInto(uint8_t const* data, size_t len, uint8_t * out, size_t maxOut);
|
||||
|
||||
|
||||
/* LZW-type compression */
|
||||
|
||||
/* Decompress compressed data in 'data', return decompressed data */
|
||||
std::vector<uint8_t> decompressLZW(uint8_t const* data, size_t len, size_t retlen_hint = 0);
|
||||
#endif
|
||||
|
||||
@@ -83,9 +83,6 @@ int main(int argc, char *argv[]) {
|
||||
}
|
||||
|
||||
for(auto crc : file.getCRCs()) {
|
||||
auto s = file.statCRC(crc);
|
||||
if ((s.flags&0xc0) == 0x80)
|
||||
continue;
|
||||
auto f = file.openCRC(crc);
|
||||
try {
|
||||
IffFile iff{f.data(), f.size()};
|
||||
|
||||
51
util.cc
51
util.cc
@@ -181,3 +181,54 @@ std::string fileToString(std::string const& name) {
|
||||
std::fclose(file);
|
||||
return ret;
|
||||
}
|
||||
|
||||
BitReader::BitReader(uint8_t const* data, size_t len)
|
||||
: data_(data), len_(len), pos_(0), buf_(0), bufValid_(0)
|
||||
{
|
||||
}
|
||||
|
||||
unsigned BitReader::getBits(unsigned count) {
|
||||
unsigned out = buf_;
|
||||
unsigned bPos = bufValid_;
|
||||
|
||||
if (bPos >= count) {
|
||||
buf_ >>= count;
|
||||
bufValid_ -= count;
|
||||
return out & ((1<<count)-1);
|
||||
}
|
||||
|
||||
while (bPos < count) {
|
||||
if (pos_ >= len_)
|
||||
throw Exception{"Input stream overrun"};
|
||||
buf_ = data_[pos_++];
|
||||
|
||||
out |= buf_<<bPos;
|
||||
|
||||
if ((count-bPos) <= 8) {
|
||||
buf_ >>= (count-bPos);
|
||||
bufValid_ = 8-(count-bPos);
|
||||
|
||||
return out & ((1<<count)-1);
|
||||
} else
|
||||
bPos += 8;
|
||||
}
|
||||
|
||||
return out & ((1<<count)-1);
|
||||
}
|
||||
|
||||
unsigned BitReader::getBit() {
|
||||
if (bufValid_) {
|
||||
unsigned ret = buf_&0x1;
|
||||
buf_ >>= 1;
|
||||
--bufValid_;
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (pos_ >= len_)
|
||||
throw Exception{"Input stream overrun"};
|
||||
buf_ = data_[pos_++];
|
||||
bufValid_ = 7;
|
||||
unsigned ret = buf_&0x1;
|
||||
buf_ >>= 1;
|
||||
return ret;
|
||||
}
|
||||
|
||||
14
util.hh
14
util.hh
@@ -44,4 +44,18 @@ int sextend(unsigned b, unsigned msb);
|
||||
|
||||
// Load simple resource from file
|
||||
std::string fileToString(std::string const& name);
|
||||
|
||||
class BitReader {
|
||||
public:
|
||||
BitReader(uint8_t const* data, size_t len);
|
||||
|
||||
unsigned getBits(unsigned count);
|
||||
unsigned getBit();
|
||||
|
||||
private:
|
||||
uint8_t const* data_;
|
||||
size_t len_, pos_;
|
||||
uint8_t buf_;
|
||||
unsigned bufValid_;
|
||||
};
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user