#include #include #include "common.hh" #include "compiler.hh" #include "util.hh" #include "decompress.hh" std::vector decompressLZ(uint8_t const* data, size_t len, size_t retlen_hint) { std::vector ret; if (retlen_hint) ret.reserve(retlen_hint); size_t pos = 0; while (pos < len) { uint8_t b = *(data+pos++); if (!((b&0xe0)==0xe0)) { unsigned size = 0, replSize = 0; unsigned replOfs = 0; if (!(b&0x80)) { if (pos >= len) throw FormatException{"Compressed stream overrun"}; uint8_t ofs = *(data+pos++); size = b&0x3; replSize = ((b&0x1c)>>2) + 3; replOfs = ((b&0x60)<<3)+ofs+1; } else if (!(b&0x40)) { if (pos+1 >= len) throw FormatException{"Compressed stream overrun"}; uint8_t b1 = *(data+pos++); uint8_t b2 = *(data+pos++); size = (b1&0xc0)>>6; replSize = (b&0x3f)+4; replOfs = ((b1&0x3f)<<8)+b2+1; } else if (!(b&0x20)) { if (pos+2 >= len) throw FormatException{"Compressed stream overrun"}; uint8_t b1 = *(data+pos++); uint8_t b2 = *(data+pos++); uint8_t b3 = *(data+pos++); size = b&0x3; replSize = b3+5+((b&0xc)<<6); replOfs = ((b&0x10)<<12)+1+(b1<<8)+b2; } if (pos+size >= len) throw FormatException{"Compressed stream overrun"}; std::copy(data+pos, data+pos+size, std::back_inserter(ret)); pos += size; if (replOfs > ret.size()) throw FormatException{"Replication offset exceeds buffer"}; unsigned start = ret.size()-replOfs; for (unsigned i = 0;i < replSize;++i) ret.push_back(ret[start+i]); } else { unsigned size = (b&0x1f)*4+4; if (size > 0x70) { if (pos+(b&0x3) > len) throw FormatException{"Compressed stream overrun"}; std::copy(data+pos, data+pos+(b&0x3), std::back_inserter(ret)); pos += (b&0x3); #ifndef NDEBUG if (pos < len) printf("%lu unparsed bytes in compressed data\n", len-pos); #endif break; } if (pos+size > len) throw FormatException{"Compressed stream overrun"}; std::copy(data+pos, data+pos+size, std::back_inserter(ret)); pos += size; } } return ret; } size_t decompressLZInto(uint8_t const* RESTRICT data, size_t len, uint8_t * RESTRICT out, size_t maxOut) { size_t pos = 0, outPos = 0; while (pos < len) { uint8_t b = *(data+pos++); if (!((b&0xe0)==0xe0)) { unsigned size = 0, replSize = 0; unsigned replOfs = 0; if (!(b&0x80)) { if (pos >= len) throw FormatException{"Compressed stream overrun"}; uint8_t ofs = *(data+pos++); size = b&0x3; replSize = ((b&0x1c)>>2) + 3; replOfs = ((b&0x60)<<3)+ofs+1; } else if (!(b&0x40)) { if (pos+1 >= len) throw FormatException{"Compressed stream overrun"}; uint8_t b1 = *(data+pos++); uint8_t b2 = *(data+pos++); size = (b1&0xc0)>>6; replSize = (b&0x3f)+4; replOfs = ((b1&0x3f)<<8)+b2+1; } else if (!(b&0x20)) { if (pos+2 >= len) throw FormatException{"Compressed stream overrun"}; uint8_t b1 = *(data+pos++); uint8_t b2 = *(data+pos++); uint8_t b3 = *(data+pos++); size = b&0x3; replSize = b3+5+((b&0xc)<<6); replOfs = ((b&0x10)<<12)+1+(b1<<8)+b2; } if (pos+size >= len) throw FormatException{"Compressed stream overrun"}; if (outPos+size > maxOut) throw Exception{"Output buffer overrun"}; std::copy(data+pos, data+pos+size, out+outPos); pos += size; outPos += size; if (replOfs > outPos) throw FormatException{"Replication offset exceeds buffer"}; if (outPos+replSize > maxOut) throw Exception{"Output buffer overrun"}; unsigned start = outPos-replOfs; for (unsigned i = 0;i < replSize;++i) out[outPos++] = out[start+i]; } else { unsigned size = (b&0x1f)*4+4; if (size > 0x70) { if (pos+(b&0x3) > len) throw FormatException{"Compressed stream overrun"}; if (outPos+(b&0x03) > maxOut) throw Exception{"Output buffer overrun"}; std::copy(data+pos, data+pos+(b&0x3), out+outPos); pos += (b&0x3); outPos += (b&0x3); #ifndef NDEBUG if (pos < len) printf("%lu unparsed bytes in compressed data\n", len-pos); #endif break; } if (pos+size > len) throw FormatException{"Compressed stream overrun"}; if (outPos+size > maxOut) throw Exception{"Output buffer overrun"}; std::copy(data+pos, data+pos+size, out+outPos); pos += size; outPos += size; } } return outPos; } std::vector decompressLZW(uint8_t const* data, size_t len, size_t retlen_hint) { const unsigned DICT_OFS = 0x102; BitReader bitReader(data, len); std::vector ret; if (retlen_hint) ret.reserve(retlen_hint); std::vector > dict; std::vector cur_string, to_dict; unsigned code_width = 9; unsigned dict_head = DICT_OFS; while (true) { unsigned bits = bitReader.getBits(code_width); #ifdef LZWDEBUG printf("Code: %x\n", bits); #endif if (bits == 0x101) { // termination code break; } else if (bits == 0x100) { // Restart code code_width = 9; dict_head = DICT_OFS; dict.clear(); cur_string.clear(); continue; } // Data if (bits <= 0xff) { #ifdef LZWDEBUG printf("Verbatim: %x\n", bits); #endif if (!cur_string.empty()) { to_dict = std::move(cur_string); to_dict.push_back(bits); } cur_string = {static_cast(bits)}; } else if (bits < dict_head) { #ifdef LZWDEBUG printf("Dict: %x: ", bits); std::for_each(dict[bits-DICT_OFS].begin(), dict[bits-DICT_OFS].end(), [](uint8_t const& d) { printf("%.2x ", d); }); printf("\n"); #endif to_dict = std::move(cur_string); to_dict.push_back(dict[bits-DICT_OFS][0]); cur_string = dict[bits-DICT_OFS]; } else { if (bits != dict_head) throw Exception("WTF?! " + std::to_string(dict_head)); cur_string.push_back(cur_string[0]); #ifdef LZWDEBUG printf("Dict-unknown: %x: ", bits); std::for_each(cur_string.begin(), cur_string.end(), [](uint8_t const& d) { printf("%.2x ", d); }); printf("\n"); #endif to_dict = cur_string; } std::copy(cur_string.begin(), cur_string.end(), std::back_inserter(ret)); if (!to_dict.empty()) { dict.emplace_back(std::move(to_dict)); ++dict_head; } if (dict_head>>code_width) { if (code_width < 12) ++code_width; } } return ret; }