Added BitReader to util; Implement LZW decompression

This commit is contained in:
2015-05-17 20:18:26 +02:00
parent d8a96ee73f
commit 1221f19b67
8 changed files with 174 additions and 28 deletions

View File

@@ -118,9 +118,12 @@ IffFile::Form::Form(std::string type, uint8_t const* base, size_t length)
if (length < 4)
throw FormatException{"length < subtype id length"};
for (unsigned i = 0;i < 4;++i)
for (unsigned i = 0;i < 4;++i) {
if ((i > 0) || (base[i] == '\0'))
break;
if (!isprint(base[i]))
throw FormatException{"Subtype not printable"};
}
subtype_ = std::string(reinterpret_cast<char const*>(base), 4);
size_t pos = 4;

View File

@@ -157,30 +157,15 @@ size_t parseHuff_(uint8_t const* data, size_t len, OutputIt commands, size_t max
printf("\n");
#endif
int byteValid = 0;
uint8_t byteBuf = 0;
unsigned bytePos = 45;
BitReader bitReader(data+45, len-45);
unsigned huffIdx = huffTree.size();
size_t count = 0;
while (huffIdx != 22) {
// Read next bit
unsigned bit;
if (byteValid) {
bit = byteBuf&0x1;
byteBuf >>= 1;
--byteValid;
} else {
if (bytePos >= len)
throw FormatException{"Huffman stream overrun"};
byteBuf = data[bytePos++];
bit = byteBuf&0x1;
byteBuf >>= 1;
byteValid = 7;
}
unsigned bit = bitReader.getBit();
huffIdx = huffTree.at(huffIdx-(bit?1:23));
if (huffIdx < 22) {
*commands++ = huffIdx;
if (++count >= maxOut)

View File

@@ -375,16 +375,22 @@ TreFile::Object TreFile::openIdx_(size_t table3Idx) const
if (flags&0x80) {
if (flags&0x40) {
auto dec = decompressLZ(base_+dataPtr, clength);
auto dec = decompressLZ(base_+dataPtr, clength, length);
#ifndef NDEBUG
if (dec.size() != length)
printf("WARNING: Decompressed size != expected (%lu, %u)\n", dec.size(), length);
#endif
return Object(std::move(dec));
} else
throw Exception{"Compression type 0 NYI"};
} else {
return Object(base_+dataPtr, length);
auto dec = decompressLZW(base_+dataPtr, clength, length);
#ifndef NDEBUG
if (dec.size() != length)
printf("WARNING: Decompressed size != expected (%lu, %u)\n", dec.size(), length);
#endif
return Object(std::move(dec));
}
} else {
return Object(base_+dataPtr, clength);
}
}

View File

@@ -1,11 +1,16 @@
#include <unordered_map>
#include <algorithm>
#include "common.hh"
#include "compiler.hh"
#include "util.hh"
#include "decompress.hh"
std::vector<uint8_t> decompressLZ(uint8_t const* data, size_t len)
std::vector<uint8_t> decompressLZ(uint8_t const* data, size_t len, size_t retlen_hint)
{
std::vector<uint8_t> ret;
if (retlen_hint)
ret.reserve(retlen_hint);
size_t pos = 0;
while (pos < len) {
@@ -170,3 +175,83 @@ size_t decompressLZInto(uint8_t const* RESTRICT data, size_t len, uint8_t * REST
return outPos;
}
std::vector<uint8_t> decompressLZW(uint8_t const* data, size_t len, size_t retlen_hint)
{
const unsigned DICT_OFS = 0x102;
BitReader bitReader(data, len);
std::vector<uint8_t> ret;
if (retlen_hint)
ret.reserve(retlen_hint);
std::vector<std::vector<uint8_t> > dict;
std::vector<uint8_t> cur_string, to_dict;
unsigned code_width = 9;
unsigned dict_head = DICT_OFS;
while (true) {
unsigned bits = bitReader.getBits(code_width);
#ifdef LZWDEBUG
printf("Code: %x\n", bits);
#endif
if (bits == 0x101) { // termination code
break;
} else if (bits == 0x100) { // Restart code
code_width = 9;
dict_head = DICT_OFS;
dict.clear();
cur_string.clear();
continue;
}
// Data
if (bits <= 0xff) {
#ifdef LZWDEBUG
printf("Verbatim: %x\n", bits);
#endif
if (!cur_string.empty()) {
to_dict = cur_string;
to_dict.push_back(bits);
}
cur_string = {static_cast<uint8_t>(bits)};
} else if (bits < dict_head) {
#ifdef LZWDEBUG
printf("Dict: %x: ", bits);
std::for_each(dict[bits-DICT_OFS].begin(), dict[bits-DICT_OFS].end(),
[](uint8_t const& d) { printf("%.2x ", d); });
printf("\n");
#endif
to_dict = cur_string;
to_dict.push_back(dict[bits-DICT_OFS][0]);
cur_string = dict[bits-DICT_OFS];
} else {
if (bits != dict_head)
throw Exception("WTF?! " + std::to_string(dict_head));
cur_string.push_back(cur_string[0]);
#ifdef LZWDEBUG
printf("Dict-unknown: %x: ", bits);
std::for_each(cur_string.begin(), cur_string.end(),
[](uint8_t const& d) { printf("%.2x ", d); });
printf("\n");
#endif
to_dict = cur_string;
}
std::copy(cur_string.begin(), cur_string.end(),
std::back_inserter(ret));
if (!to_dict.empty()) {
dict.emplace_back(std::move(to_dict));
++dict_head;
}
if (dict_head>>code_width) {
if (code_width < 12)
++code_width;
}
}
return ret;
}

View File

@@ -11,9 +11,14 @@
and/or replication of previously output data */
/* Decompress compressed data in 'data', return decompressed data */
std::vector<uint8_t> decompressLZ(uint8_t const* data, size_t len);
std::vector<uint8_t> decompressLZ(uint8_t const* data, size_t len, size_t retlen_hint = 0);
/* Decompress compressed data in 'data' into pre-allocated buffer 'out' of size 'maxOut',
return size of decompressed data */
size_t decompressLZInto(uint8_t const* data, size_t len, uint8_t * out, size_t maxOut);
/* LZW-type compression */
/* Decompress compressed data in 'data', return decompressed data */
std::vector<uint8_t> decompressLZW(uint8_t const* data, size_t len, size_t retlen_hint = 0);
#endif

View File

@@ -83,9 +83,6 @@ int main(int argc, char *argv[]) {
}
for(auto crc : file.getCRCs()) {
auto s = file.statCRC(crc);
if ((s.flags&0xc0) == 0x80)
continue;
auto f = file.openCRC(crc);
try {
IffFile iff{f.data(), f.size()};

51
util.cc
View File

@@ -181,3 +181,54 @@ std::string fileToString(std::string const& name) {
std::fclose(file);
return ret;
}
BitReader::BitReader(uint8_t const* data, size_t len)
: data_(data), len_(len), pos_(0), buf_(0), bufValid_(0)
{
}
unsigned BitReader::getBits(unsigned count) {
unsigned out = buf_;
unsigned bPos = bufValid_;
if (bPos >= count) {
buf_ >>= count;
bufValid_ -= count;
return out & ((1<<count)-1);
}
while (bPos < count) {
if (pos_ >= len_)
throw Exception{"Input stream overrun"};
buf_ = data_[pos_++];
out |= buf_<<bPos;
if ((count-bPos) <= 8) {
buf_ >>= (count-bPos);
bufValid_ = 8-(count-bPos);
return out & ((1<<count)-1);
} else
bPos += 8;
}
return out & ((1<<count)-1);
}
unsigned BitReader::getBit() {
if (bufValid_) {
unsigned ret = buf_&0x1;
buf_ >>= 1;
--bufValid_;
return ret;
}
if (pos_ >= len_)
throw Exception{"Input stream overrun"};
buf_ = data_[pos_++];
bufValid_ = 7;
unsigned ret = buf_&0x1;
buf_ >>= 1;
return ret;
}

14
util.hh
View File

@@ -44,4 +44,18 @@ int sextend(unsigned b, unsigned msb);
// Load simple resource from file
std::string fileToString(std::string const& name);
class BitReader {
public:
BitReader(uint8_t const* data, size_t len);
unsigned getBits(unsigned count);
unsigned getBit();
private:
uint8_t const* data_;
size_t len_, pos_;
uint8_t buf_;
unsigned bufValid_;
};
#endif