From c87836468d88cb5a6a3587187e449680a4f0f541 Mon Sep 17 00:00:00 2001 From: Matthias Blankertz Date: Wed, 22 Apr 2015 17:18:43 +0200 Subject: [PATCH] iffexplore: Put IFF parsing logic in own class Created an IffFile class which contains the IFF parsing logic, changed iffexplore to use this class. --- IffFile.cc | 137 +++++++++++++++++++++++++++++ IffFile.hh | 200 +++++++++++++++++++++++++++++++++++++++++++ Makefile | 2 +- iffexplore.cc | 232 +++++++++++++++++++------------------------------- 4 files changed, 424 insertions(+), 147 deletions(-) create mode 100644 IffFile.cc create mode 100644 IffFile.hh diff --git a/IffFile.cc b/IffFile.cc new file mode 100644 index 0000000..eb5af64 --- /dev/null +++ b/IffFile.cc @@ -0,0 +1,137 @@ +#include + +#include "IffFile.hh" +#include "common.hh" + +struct ChunkHeader { + char typeID[4]; + uint32_t length; // big endian!! +} __attribute__((__packed__)); + +IffFile::IffFile(char const* base, size_t length) + : base_(base), length_(length), root_(nullptr) +{ + root_ = parseObject(base, length); + +#ifndef NDEBUG + if (length > (root_->getSize()+8)) { + printf("%lu excess bytes parsing IFF\n", length-(root_->getSize()+8)); + } +#endif +} + +IffFile::~IffFile() +{ +} + +static void _printStructure(IffFile::Object const& obj, unsigned level) +{ + for (unsigned i = 0;i < level;++i) + putchar('\t'); + printf("%s Length %lu (0x%.lx)", obj.getType().c_str(), obj.getSize(), obj.getSize()); + + if (obj.isForm()) { + auto form = dynamic_cast(obj); + printf(", Subtype %s\n", form.getSubtype().c_str()); + for(auto it = form.childrenBegin();it != form.childrenEnd();++it) + _printStructure(*it, level+1); + } else { + try { + printf(" = \"%s\"\n", static_cast(obj).c_str()); + } catch(FormatException &ex) { + printf("\n"); + } + } +} + +void IffFile::printStructure(unsigned level) +{ + _printStructure(*root_, level); +} + + +std::unique_ptr IffFile::parseObject(char const* base, size_t length) +{ + // if (reinterpret_cast(base)%2 != 0) { + // ++base; + // --length; + // } + + if (length < sizeof(ChunkHeader)) + throw FormatException{"length < header size"}; + + ChunkHeader header; + memcpy(&header, base, sizeof(ChunkHeader)); + header.length = ntohl(header.length); + + for (unsigned i = 0;i < 4;++i) + if (!isprint(header.typeID[i])) + throw FormatException{"Not an IFF chunk"}; + + if (header.length > length-7) + throw FormatException{"length < size in header"}; + + if(memcmp(header.typeID, "FORM", 4) == 0) + return std::make_unique
("FORM", base+8, static_cast(header.length)); + else + return std::make_unique(std::string(header.typeID, 4), base+8, static_cast(header.length)); +} + +IffFile::Object::Object(std::string type, char const* base, size_t length) + : base_(base), length_(length), type_(std::move(type)) +{ +} + +IffFile::Object::operator std::string() const +{ + // Check if BLOB is string + enum class State { STARTASCII, NULLS, ERROR }; + State state = State::STARTASCII; + for (char const& c : *this) { + switch(state) { + case State::STARTASCII: + if (isprint(c)) + continue; + if (c == '\0') + state = State::NULLS; + else + state = State::ERROR; + break; + case State::NULLS: + if (c != '\0') + state = State::ERROR; + break; + case State::ERROR: + break; + } + + if (state == State::ERROR) + break; + } + + if (state == State::NULLS) + return std::string(base_); + else if (state == State::STARTASCII) + return std::string(base_, length_); + else + throw FormatException{"BLOB not string"}; +} + +IffFile::Form::Form(std::string type, char const* base, size_t length) + : Object(std::move(type), base, length) +{ + if (length < 4) + throw FormatException{"length < subtype id length"}; + + subtype_ = std::string(base, 4); + + size_t pos = 4; + while (pos+8 < length) { + children_.push_back(parseObject(base+pos, length-pos)); + pos += 8 + children_.back()->getSize(); + + if (pos%2 != 0) + ++pos; + } +} + diff --git a/IffFile.hh b/IffFile.hh new file mode 100644 index 0000000..46c092e --- /dev/null +++ b/IffFile.hh @@ -0,0 +1,200 @@ +#ifndef WC3RE_IFFFILE_HH__ +#define WC3RE_IFFFILE_HH__ + +#include +#include +#include +#include +#include + +class IffFile { +public: + IffFile(char const* base, size_t length); + + ~IffFile(); + + class Object { + public: + Object(std::string type, char const* base, size_t length); + Object(Object const& copy) + : base_(copy.base_), length_(copy.length_), type_(copy.type_) { + } + + virtual ~Object() { + } + + virtual Object* copy() const { + return new Object(*this); + } + + std::string const& getType() const { + return type_; + } + + bool isForm() const { + return (typeid(*this) == typeid(Form)); + } + + size_t getSize() const { + return length_; + } + + char const* begin() const { + return base_; + } + + char const* end() const { + return base_+length_; + } + + operator std::string() const; + + protected: + char const* base_; + const size_t length_; + std::string const type_; + }; + + class Form final : public Object { + public: + Form(std::string type, char const* base, size_t length); + Form(Form const& copy) + : Object(copy), subtype_(copy.subtype_) { + for(auto& ent : copy.children_) { + children_.push_back(std::unique_ptr(ent->copy())); + } + } + + Form* copy() const override { + return new Form(*this); + } + + ~Form() {} + + std::string const& getSubtype() const { + return subtype_; + } + + size_t getChildCount() const { + return children_.size(); + } + + class ObjectIterator : public std::iterator { + public: + Object const& operator*() const { + return **implIt_; + } + + ObjectIterator& operator++() { + ++implIt_; + return *this; + } + + ObjectIterator& operator--() { + --implIt_; + return *this; + } + + bool operator!=(ObjectIterator const& other) const { + return (implIt_ != other.implIt_); + } + + bool operator>(ObjectIterator const& other) const { + return (implIt_ > other.implIt_); + } + + bool operator>=(ObjectIterator const& other) const { + return (implIt_ >= other.implIt_); + } + + bool operator<(ObjectIterator const& other) const { + return (implIt_ < other.implIt_); + } + + bool operator<=(ObjectIterator const& other) const { + return (implIt_ <= other.implIt_); + } + + Object const* operator->() const { + return implIt_->get(); + } + + ObjectIterator operator++(int) { + ObjectIterator ret = *this; + ++implIt_; + return ret; + } + + ObjectIterator operator--(int) { + ObjectIterator ret = *this; + --implIt_; + return ret; + } + + ObjectIterator& operator+=(ptrdiff_t n) { + implIt_ += n; + return *this; + } + + ObjectIterator operator+(ptrdiff_t n) const { + ObjectIterator ret = *this; + ret += n; + return ret; + } + + ObjectIterator& operator-=(ptrdiff_t n) { + implIt_ -= n; + return *this; + } + + ObjectIterator operator-(ptrdiff_t n) const { + ObjectIterator ret = *this; + ret -= n; + return ret; + } + + ptrdiff_t operator-(ObjectIterator const& other) const { + return implIt_ - other.implIt_; + } + + Object const& operator[](ptrdiff_t n) const { + return *(implIt_[n]); + } + + private: + ObjectIterator(std::vector >::const_iterator implIt) + : implIt_(std::move(implIt)) { + } + + friend class Form; + + std::vector >::const_iterator implIt_; + }; + + ObjectIterator childrenBegin() const { + return ObjectIterator(children_.cbegin()); + } + + ObjectIterator childrenEnd() const { + return ObjectIterator(children_.cend()); + } + private: + std::vector > children_; + std::string subtype_; + }; + + Object const& getRoot() { + return *root_; + } + + void printStructure(unsigned level = 0); + +private: + static std::unique_ptr parseObject(char const* base, size_t length); + + char const* base_; + const size_t length_; + std::unique_ptr root_; +}; + +#endif diff --git a/Makefile b/Makefile index e5ebddb..029d69e 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ CXX=g++ CXXOPTS=-Og -ggdb -Wall -Wextra -pedantic -std=c++14 -flto LDOPTS= -IFFEXPLORE_CXXSRCS=iffexplore.cc +IFFEXPLORE_CXXSRCS=iffexplore.cc IffFile.cc IFFEXPLORE_OBJS=$(addprefix objs/,$(IFFEXPLORE_CXXSRCS:.cc=.o)) TREEXPLORE_CXXSRCS=treexplore.cc TreFile.cc diff --git a/iffexplore.cc b/iffexplore.cc index 689029f..e3c3011 100644 --- a/iffexplore.cc +++ b/iffexplore.cc @@ -8,176 +8,116 @@ #include #include #include +#include #include "common.hh" +#include "IffFile.hh" -struct ChunkHeader { - char typeID[4]; - uint32_t length; // big endian!! -} __attribute__((__packed__)); - -void parseBlob(FILE *iffFile, off_t start, off_t length, char name[4], unsigned level, bool dump = false) +void blobDump(IffFile::Object const& obj, std::string const& filename) { - static unsigned count = 0; - - if (fseeko(iffFile, start, SEEK_SET) != 0) { - throw POSIXException(errno, "Could not seek"); - } + FILEUPtr file{fopen(filename.c_str(), "wb")}; + if (!file) + throw POSIXException{errno, "Could not open file: " + filename}; - std::vector buf(length); - if (fread(buf.data(), length, 1, iffFile) != 1) - throw POSIXException{errno, "Could not read data"}; - - // Check if BLOB is string - { - enum class State { STARTASCII, NULLS, ERROR }; - State state = State::STARTASCII; - for (char const& c : buf) { - switch(state) { - case State::STARTASCII: - if (isprint(c)) - continue; - if (c == '\0') - state = State::NULLS; - else - state = State::ERROR; - break; - case State::NULLS: - if (c != '\0') - state = State::ERROR; - break; - case State::ERROR: - break; - } - - if (state == State::ERROR) - break; - } - if (state == State::NULLS) { // BLOB is zero(s)-terminated string - for (unsigned i = 0;i < level;++i) - putchar('\t'); - printf("= '%s'\n", buf.data()); - return; - } else if (state == State::STARTASCII) { // BLOB is string - buf.push_back('\0'); - for (unsigned i = 0;i < level;++i) - putchar('\t'); - printf("= '%s'\n", buf.data()); - return; - } - } - - // Nothing string-y, so just dump it to a file - std::string filename{"tmp/"}; - if (count < 10) - filename.append("0"); - filename.append(std::to_string(count) + "-"); - filename.append(name, 4); - - if (dump) { - FILEUPtr outFile{fopen(filename.c_str(), "wb")}; - if (!outFile) - throw POSIXException{errno, "Could not open " + filename}; - - for (unsigned i = 0;i < level;++i) - putchar('\t'); - printf("Dumping BLOB of length %ld (0x%lx) to %s\n", length, length, filename.c_str()); - - if (fwrite(buf.data(), length, 1, outFile.get()) != 1) - throw POSIXException{errno, "Could not write data"}; - } else { - for (unsigned i = 0;i < level;++i) - putchar('\t'); - printf("BLOB of length %ld (0x%lx)\n", length, length); - } - - ++count; + if (fwrite(obj.begin(), obj.getSize(), 1, file.get()) != 1) + throw POSIXException{errno, "Could not write"}; } -void parseChunk(FILE* iffFile, off_t start, off_t length, unsigned level = 0) + +void iffDumper(IffFile::Object const& obj, bool dumpBlobs, std::string dumpPath, unsigned& blobCount, unsigned level = 0) { - off_t pos = 0; + for (unsigned i = 0;i < level;++i) + putchar('\t'); + printf("%s Length %lu (0x%.lx)", obj.getType().c_str(), obj.getSize(), obj.getSize()); - while (pos < length) { - if (fseeko(iffFile, pos+start, SEEK_SET) != 0) { - throw POSIXException(errno, "Could not seek"); + if (obj.isForm()) { + auto form = dynamic_cast(obj); + printf(", Subtype %s\n", form.getSubtype().c_str()); + for(auto it = form.childrenBegin();it != form.childrenEnd();++it) + iffDumper(*it, dumpBlobs, dumpPath, blobCount, level+1); + } else { + try { + printf(" = \"%s\"\n", static_cast(obj).c_str()); + } catch(FormatException &ex) { + if (dumpBlobs) { + std::string filename{dumpPath}; + filename += obj.getType() + "-"s + std::to_string(blobCount++); + printf(" dump to %s\n", filename.c_str()); + blobDump(obj, filename); + } else + printf("\n"); } - - // pos < length due to loop condition, cast of difference to unsigned is OK - if (static_cast(length-pos) < sizeof(ChunkHeader)) - return; - //throw FormatException{"Remaining size < header size"}; - - ChunkHeader header; - - if (fread(&header, sizeof(ChunkHeader), 1, iffFile) != 1) { - if (feof(iffFile)) - return; - throw POSIXException(errno, "Could not read header"); - } - - if (!isalnum(header.typeID[0])) { - ++pos; - continue; - } - - header.length = ntohl(header.length); - for (unsigned i = 0;i < level;++i) - putchar('\t'); - printf("Type: %.4s, Length: %u (0x%x)", header.typeID, header.length, header.length); - if (header.length > (length-(pos+8))) { - if (header.length == (length-(pos+8))+1) { - --header.length; // Fix off-by-one errors - } else { - putchar('\n'); - throw FormatException{"Length in header > remaining parent size"}; - } - } - - if (memcmp(header.typeID, "FORM", 4) == 0) { - char subType[4]; - if (fread(&subType, 4, 1, iffFile) != 1) { - putchar('\n'); - throw POSIXException(errno, "Could not read form subtype"); - } - printf(", SubType: %.4s\n", subType); - - try { - parseChunk(iffFile, start+pos+12, header.length-4, level+1); - } catch (FormatException &ex) { - for (unsigned i = 0;i < level;++i) - putchar('\t'); - printf("Error parsing sub-chunk: %s\n", ex.toString().c_str()); - } - } else { - putchar('\n'); - parseBlob(iffFile, start+pos+8, header.length, header.typeID, level+1); - } - - pos += header.length+8; } } +void usage(char *argv0) { + fprintf(stderr, "Usage: %s [-sh] [-d dest] iff-file\n", argv0); + fprintf(stderr, "\t-s\tPrint the iff-file's structure\n"); + fprintf(stderr, "\t-d dest\tDump BLOBs as files to dest/\n"); + fprintf(stderr, "\t-h\tPrint this help\n"); +} + int main(int argc, char *argv[]) { - if (argc != 2) { - fprintf(stderr, "Usage: %s iff-file\n", argv[0]); - return 1; - } + bool printStructure = false, dumpBlobs = false; + std::string dumpPath, iffFile; + { + int opt; + while ((opt = getopt(argc, argv, "hsd:")) != -1) { + switch (opt) { + case 'h': + usage(argv[0]); + return 0; + case 's': + printStructure = true; + break; + case 'd': + dumpPath = optarg; + dumpBlobs = true; + break; + default: + usage(argv[0]); + return 1; + } + } + if (optind >= argc) { + usage(argv[0]); + return 1; + } + iffFile = argv[optind]; + } + try { - FILEUPtr iffFile{fopen(argv[1], "rb")}; - if (!iffFile) { - throw POSIXException{errno, "Could not open "s + argv[1]}; + FILEUPtr iffFD{fopen(iffFile.c_str(), "rb")}; + if (!iffFD) { + throw POSIXException{errno, "Could not open "s + iffFile}; } struct stat statBuf; - if (fstat(fileno(iffFile.get()), &statBuf) != 0) { + if (fstat(fileno(iffFD.get()), &statBuf) != 0) { throw POSIXException(errno, "Could not stat"); } + + char *mmapBase = static_cast(mmap(nullptr, statBuf.st_size, PROT_READ, MAP_SHARED, fileno(iffFD.get()), 0)); + if (!mmapBase) + throw POSIXException(errno, "mmap failed"); - parseChunk(iffFile.get(), 0, statBuf.st_size); + try { + IffFile iff{mmapBase, static_cast(statBuf.st_size)}; + + if (printStructure) { + unsigned blobCount = 0; + iffDumper(iff.getRoot(), dumpBlobs, dumpPath, blobCount); + } + } catch(...) { + munmap(mmapBase, statBuf.st_size); + throw; + } + + if (munmap(mmapBase, statBuf.st_size) != 0) + fprintf(stderr, "Warning: munmap failed: %s\n", strerror(errno)); } catch (POSIXException &ex) { fflush(stdout);