From 1221f19b67b542d9faf433322f1327de107c01c7 Mon Sep 17 00:00:00 2001 From: Matthias Blankertz Date: Sun, 17 May 2015 20:18:26 +0200 Subject: [PATCH] Added BitReader to util; Implement LZW decompression --- IffFile.cc | 5 ++- MveDecoder.cc | 19 ++--------- TreFile.cc | 14 +++++--- decompress.cc | 89 +++++++++++++++++++++++++++++++++++++++++++++++++-- decompress.hh | 7 +++- treexplore.cc | 3 -- util.cc | 51 +++++++++++++++++++++++++++++ util.hh | 14 ++++++++ 8 files changed, 174 insertions(+), 28 deletions(-) diff --git a/IffFile.cc b/IffFile.cc index c6b0401..490b2b8 100644 --- a/IffFile.cc +++ b/IffFile.cc @@ -118,9 +118,12 @@ IffFile::Form::Form(std::string type, uint8_t const* base, size_t length) if (length < 4) throw FormatException{"length < subtype id length"}; - for (unsigned i = 0;i < 4;++i) + for (unsigned i = 0;i < 4;++i) { + if ((i > 0) || (base[i] == '\0')) + break; if (!isprint(base[i])) throw FormatException{"Subtype not printable"}; + } subtype_ = std::string(reinterpret_cast(base), 4); size_t pos = 4; diff --git a/MveDecoder.cc b/MveDecoder.cc index a28b944..c314a02 100644 --- a/MveDecoder.cc +++ b/MveDecoder.cc @@ -157,30 +157,15 @@ size_t parseHuff_(uint8_t const* data, size_t len, OutputIt commands, size_t max printf("\n"); #endif - int byteValid = 0; - uint8_t byteBuf = 0; - unsigned bytePos = 45; + BitReader bitReader(data+45, len-45); unsigned huffIdx = huffTree.size(); size_t count = 0; while (huffIdx != 22) { // Read next bit - unsigned bit; - if (byteValid) { - bit = byteBuf&0x1; - byteBuf >>= 1; - --byteValid; - } else { - if (bytePos >= len) - throw FormatException{"Huffman stream overrun"}; - byteBuf = data[bytePos++]; - bit = byteBuf&0x1; - byteBuf >>= 1; - byteValid = 7; - } + unsigned bit = bitReader.getBit(); huffIdx = huffTree.at(huffIdx-(bit?1:23)); - if (huffIdx < 22) { *commands++ = huffIdx; if (++count >= maxOut) diff --git a/TreFile.cc b/TreFile.cc index a9c8a4b..4d8e2dc 100644 --- a/TreFile.cc +++ b/TreFile.cc @@ -375,16 +375,22 @@ TreFile::Object TreFile::openIdx_(size_t table3Idx) const if (flags&0x80) { if (flags&0x40) { - auto dec = decompressLZ(base_+dataPtr, clength); + auto dec = decompressLZ(base_+dataPtr, clength, length); #ifndef NDEBUG if (dec.size() != length) printf("WARNING: Decompressed size != expected (%lu, %u)\n", dec.size(), length); #endif return Object(std::move(dec)); - } else - throw Exception{"Compression type 0 NYI"}; + } else { + auto dec = decompressLZW(base_+dataPtr, clength, length); +#ifndef NDEBUG + if (dec.size() != length) + printf("WARNING: Decompressed size != expected (%lu, %u)\n", dec.size(), length); +#endif + return Object(std::move(dec)); + } } else { - return Object(base_+dataPtr, length); + return Object(base_+dataPtr, clength); } } diff --git a/decompress.cc b/decompress.cc index 284849e..163949f 100644 --- a/decompress.cc +++ b/decompress.cc @@ -1,11 +1,16 @@ +#include +#include + #include "common.hh" #include "compiler.hh" - +#include "util.hh" #include "decompress.hh" -std::vector decompressLZ(uint8_t const* data, size_t len) +std::vector decompressLZ(uint8_t const* data, size_t len, size_t retlen_hint) { std::vector ret; + if (retlen_hint) + ret.reserve(retlen_hint); size_t pos = 0; while (pos < len) { @@ -170,3 +175,83 @@ size_t decompressLZInto(uint8_t const* RESTRICT data, size_t len, uint8_t * REST return outPos; } + + +std::vector decompressLZW(uint8_t const* data, size_t len, size_t retlen_hint) +{ + const unsigned DICT_OFS = 0x102; + BitReader bitReader(data, len); + std::vector ret; + if (retlen_hint) + ret.reserve(retlen_hint); + + std::vector > dict; + std::vector cur_string, to_dict; + unsigned code_width = 9; + unsigned dict_head = DICT_OFS; + + while (true) { + unsigned bits = bitReader.getBits(code_width); +#ifdef LZWDEBUG + printf("Code: %x\n", bits); +#endif + + if (bits == 0x101) { // termination code + break; + } else if (bits == 0x100) { // Restart code + code_width = 9; + dict_head = DICT_OFS; + dict.clear(); + cur_string.clear(); + + continue; + } + + // Data + if (bits <= 0xff) { +#ifdef LZWDEBUG + printf("Verbatim: %x\n", bits); +#endif + if (!cur_string.empty()) { + to_dict = cur_string; + to_dict.push_back(bits); + } + cur_string = {static_cast(bits)}; + } else if (bits < dict_head) { +#ifdef LZWDEBUG + printf("Dict: %x: ", bits); + std::for_each(dict[bits-DICT_OFS].begin(), dict[bits-DICT_OFS].end(), + [](uint8_t const& d) { printf("%.2x ", d); }); + printf("\n"); +#endif + to_dict = cur_string; + to_dict.push_back(dict[bits-DICT_OFS][0]); + cur_string = dict[bits-DICT_OFS]; + } else { + if (bits != dict_head) + throw Exception("WTF?! " + std::to_string(dict_head)); + cur_string.push_back(cur_string[0]); +#ifdef LZWDEBUG + printf("Dict-unknown: %x: ", bits); + std::for_each(cur_string.begin(), cur_string.end(), + [](uint8_t const& d) { printf("%.2x ", d); }); + printf("\n"); +#endif + to_dict = cur_string; + } + + std::copy(cur_string.begin(), cur_string.end(), + std::back_inserter(ret)); + if (!to_dict.empty()) { + dict.emplace_back(std::move(to_dict)); + ++dict_head; + } + + if (dict_head>>code_width) { + if (code_width < 12) + ++code_width; + } + } + + return ret; +} diff --git a/decompress.hh b/decompress.hh index fd64b17..5385513 100644 --- a/decompress.hh +++ b/decompress.hh @@ -11,9 +11,14 @@ and/or replication of previously output data */ /* Decompress compressed data in 'data', return decompressed data */ -std::vector decompressLZ(uint8_t const* data, size_t len); +std::vector decompressLZ(uint8_t const* data, size_t len, size_t retlen_hint = 0); /* Decompress compressed data in 'data' into pre-allocated buffer 'out' of size 'maxOut', return size of decompressed data */ size_t decompressLZInto(uint8_t const* data, size_t len, uint8_t * out, size_t maxOut); + +/* LZW-type compression */ + +/* Decompress compressed data in 'data', return decompressed data */ +std::vector decompressLZW(uint8_t const* data, size_t len, size_t retlen_hint = 0); #endif diff --git a/treexplore.cc b/treexplore.cc index 5489864..feaa0ec 100644 --- a/treexplore.cc +++ b/treexplore.cc @@ -83,9 +83,6 @@ int main(int argc, char *argv[]) { } for(auto crc : file.getCRCs()) { - auto s = file.statCRC(crc); - if ((s.flags&0xc0) == 0x80) - continue; auto f = file.openCRC(crc); try { IffFile iff{f.data(), f.size()}; diff --git a/util.cc b/util.cc index cb6bf88..cd82deb 100644 --- a/util.cc +++ b/util.cc @@ -181,3 +181,54 @@ std::string fileToString(std::string const& name) { std::fclose(file); return ret; } + +BitReader::BitReader(uint8_t const* data, size_t len) + : data_(data), len_(len), pos_(0), buf_(0), bufValid_(0) +{ +} + +unsigned BitReader::getBits(unsigned count) { + unsigned out = buf_; + unsigned bPos = bufValid_; + + if (bPos >= count) { + buf_ >>= count; + bufValid_ -= count; + return out & ((1<= len_) + throw Exception{"Input stream overrun"}; + buf_ = data_[pos_++]; + + out |= buf_<>= (count-bPos); + bufValid_ = 8-(count-bPos); + + return out & ((1<>= 1; + --bufValid_; + return ret; + } + + if (pos_ >= len_) + throw Exception{"Input stream overrun"}; + buf_ = data_[pos_++]; + bufValid_ = 7; + unsigned ret = buf_&0x1; + buf_ >>= 1; + return ret; +} diff --git a/util.hh b/util.hh index 2c9b184..67d8232 100644 --- a/util.hh +++ b/util.hh @@ -44,4 +44,18 @@ int sextend(unsigned b, unsigned msb); // Load simple resource from file std::string fileToString(std::string const& name); + +class BitReader { +public: + BitReader(uint8_t const* data, size_t len); + + unsigned getBits(unsigned count); + unsigned getBit(); + +private: + uint8_t const* data_; + size_t len_, pos_; + uint8_t buf_; + unsigned bufValid_; +}; #endif