Reputation: 1294
I have a chunk of data which is supposed to be zlib compressed data (I was not 100% sure).
I first tried to uncompress it with gzip by prepending "1F 8B 08 00 00 00 00 00". Just like in the accepted answer of this thread (https://unix.stackexchange.com/questions/22834/how-to-uncompress-zlib-data-in-unix). It worked out and it was probably the right approach, because the output contained a lot of human readable strings.
I then tried to implement this in a c++ program using zlib. But it seems that zlib generates a different output. Am I missing something? zlib and gzip should be basically the same (despite the headers and trailers), shouldn't they? Or do I have a simple error in my code below? (the chunk of data is shortened for the sake of simplicity)
unsigned char* decompressed;
unsigned char* dataChunk = /*...*/;
printHex(dataChunk, 160);
int error = inflateZlib(dataChunk, 160, decompressed, 1000);
printHex(decompressed, 160);
//zerr(error);
printHex(unsigned char* data, size_t n)
{
for(size_t i = 0; i < n; i++)
{
std::cout << std::hex << (uint16_t)data[i] << " ";
}
std::cout << std::dec << "\n-\n";
}
int inflateZlib(unsigned char* data, size_t length, unsigned char* decompressed, size_t maxDecompressed)
{
decompressed = new unsigned char[maxDecompressed];
z_stream infstream;
infstream.zalloc = Z_NULL;
infstream.zfree = Z_NULL;
infstream.opaque = Z_NULL;
infstream.avail_in = (uInt)(length); // size of input
infstream.next_in = (Bytef *)data; // input char array
infstream.avail_out = (uInt)maxDecompressed; // size of output
infstream.next_out = (Bytef *)decompressed; // output char array
// the actual DE-compression work.
int ret = inflateInit(&infstream);
zerr(ret);
ret = inflate(&infstream, Z_NO_FLUSH);
zerr(ret);
inflateEnd(&infstream);
return ret;
}
This produces the following output:
78 9c bd 58 4b 88 23 45 18 ee 3c 67 e3 24 93 cc ae 8a f8 42 10 c4 cb 1a 33 a3 7b f0 60 e6 e0 e6 e0 49 90 bd 29 4d 4d 77 25 dd 99 ee ea de aa ee 4c 32 82 2c e8 c1 93 ac 47 c5 45 f 82 8 5e 16 f ba 78 18 45 d0 83 7 95 15 5c d0 c3 aa b0 b2 ee 65 5c f0 e4 c5 bf aa 1f a9 ea 74 cf 64 7 31 c3 24 9d fa fe bf ea ab ff 59 15 ab 62 6a b5 5d 9b 8c 18 2a 5b 15 47 d3 b4 92 55 35 b5 ba b7 3d c6 46 b0 a3 35 3 1c 50 64 61 93 7a a4 67 d5 0 e1 c2 d8 e4 92 75 fe 56 b3 ca a6 76 c2 f0 1c 8f
-
0 0 6 c0 83 50 0 0 16 b0 78 9c bd 58 4b 88 23 45 18 ee 3c 67 e3 24 93 cc ae 8a f8 42 10 c4 cb 1a 33 a3 7b f0 60 e6 e0 e6 e0 49 90 bd 29 4d 4d 77 25 dd 99 ee ea de aa ee 4c 32 82 2c e8 c1 93 ac 47 c5 45 f 82 8 5e 16 f ba 78 18 45 d0 83 7 95 15 5c d0 c3 aa b0 b2 ee 65 5c f0 e4 c5 bf aa 1f a9 ea 74 cf 64 7 31 c3 24 9d fa fe bf ea ab ff 59 15 ab 62 6a b5 5d 9b 8c 18 2a 5b 15 47 d3 b4 92 55 35 b5 ba b7 3d c6 46 b0 a3 35 3 1c 50 64 61 93 7a a4 67 d5 0 e1 c2 d8 e4 92 75
-
which is not what I want. Whereas gzip:
printf "\x1f\x8b\x08\x00\x00\x00\x00\x00\x78\x9c\xbd\x58\x4b\x88\x23\x45\x18\xee\x3c\x67\xe3\x24\x93\xcc\xae\x8a\xf8\x42\x10\xc4\xcb\x1a\x33\xa3\x7b\xf0\x60\xe6\xe0\xe6\xe0\x49\x90\xbd\x29\x4d\x4d\x77\x25\xdd\x99\xee\xea\xde\xaa\xee\x4c\x32\x82\x2c\xe8\xc1\x93\xac\x47\xc5\x45\xf\x82\x8\x5e\x16\xf\xba\x78\x18\x45\xd0\x83\x7\x95\x15\x5c\xd0\xc3\xaa\xb0\xb2\xee\x65\x5c\xf0\xe4\xc5\xbf\xaa\x1f\xa9\xea\x74\xcf\x64\x7\x31\xc3\x24\x9d\xfa\xfe\xbf\xea\xab\xff\x59\x15\xab\x62\x6a\xb5\x5d\x9b\x8c\x18\x2a\x5b\x15\x47\xd3\xb4\x92\x55\x35\xb5\xba\xb7\x3d\xc6\x46\xb0\xa3\x35\x3\x1c\x50\x64\x61\x93\x7a\xa4\x67\xd5\x0\xe1\xc2\xd8\xe4\x92\x75\xfe\x56\xb3\xca\xa6\x76\xc2\xf0\x1c\x8f" | gzip -dc | hexdump -C
produces:
gzip: stdin: unexpected end of file
00000000 68 03 64 00 05 77 69 6e 67 73 61 02 68 03 6c 00 |h.d..wingsa.h.l.|
00000010 00 00 01 68 04 64 00 06 6f 62 6a 65 63 74 6b 00 |...h.d..objectk.|
00000020 0c 74 65 74 72 61 68 65 64 72 6f 6e 31 68 05 64 |.tetrahedron1h.d|
00000030 00 06 77 69 6e 67 65 64 6c 00 00 00 06 6c 00 00 |..wingedl....l..|
00000040 00 05 68 02 64 00 08 63 6f 6c 6f |..h.d..colo|
0000004b
which is what I want.
Upvotes: 0
Views: 630
Reputation: 4924
I was able to decode the data you provided by using zlib 1.2.8 and the inflateInit2
function with 32 for windowBits. I used 32 based on this information from the zlib documentation:
windowBits can also be zero to request that inflate use the window size in the zlib header of the compressed stream.
and
Add 32 to windowBits to enable zlib and gzip decoding with automatic header detection
Here's the full code. I stripped out error checking since I don't have a zerr
function. It doesn't appear you're using Visual C++, so you will want to remove the #pragma
to avoid a warning as well.
#include <iostream>
#include <iomanip>
#include <cstdint>
#include <cctype>
#include "zlib.h"
#pragma comment(lib, "zdll.lib")
const size_t block_size = 16;
void printLine(unsigned char* data, size_t offset, size_t n)
{
if(n)
{
std::cout << std::setw(8) << std::setfill('0') << std::right << offset << " ";
for(size_t x = 0; x < block_size; ++x)
{
if(x % (block_size/2) == 0) std::cout << " ";
uint16_t d = x < n ? data[x] : 0;
std::cout << std::hex << std::setw(2) << d << " ";
}
std::cout << "|";
for(size_t x = 0; x < block_size; ++x)
{
int c = (x < n && isalnum(data[x])) ? data[x] : '.';
std::cout << static_cast<char>(c);
}
std::cout << "|\n";
}
}
void printHex(unsigned char* data, size_t n)
{
const size_t blocks = n / block_size;
const size_t remainder = n % block_size;
for(size_t i = 0; i < blocks; i++)
{
size_t offset = i * block_size;
printLine(&data[offset], offset, block_size);
}
size_t offset = blocks * block_size;
printLine(&data[offset], offset, remainder);
std::cout << "\n";
}
int inflateZlib(unsigned char* data, uint32_t length, unsigned char* decompressed, uint32_t maxDecompressed)
{
z_stream infstream;
infstream.zalloc = Z_NULL;
infstream.zfree = Z_NULL;
infstream.opaque = Z_NULL;
infstream.avail_in = length;
infstream.next_in = data;
infstream.avail_out = maxDecompressed;
infstream.next_out = decompressed;
inflateInit2(&infstream, 32);
inflate(&infstream, Z_FINISH);
inflateEnd(&infstream);
return infstream.total_out;
}
int main()
{
unsigned char dataChunk[] =
"\x1f\x8b\x08\x00\x00\x00\x00\x00\x78\x9c\xbd\x58\x4b\x88\x23\x45"
"\x18\xee\x3c\x67\xe3\x24\x93\xcc\xae\x8a\xf8\x42\x10\xc4\xcb\x1a"
"\x33\xa3\x7b\xf0\x60\xe6\xe0\xe6\xe0\x49\x90\xbd\x29\x4d\x4d\x77"
"\x25\xdd\x99\xee\xea\xde\xaa\xee\x4c\x32\x82\x2c\xe8\xc1\x93\xac"
"\x47\xc5\x45\xf\x82\x8\x5e\x16\xf\xba\x78\x18\x45\xd0\x83\x7\x95"
"\x15\x5c\xd0\xc3\xaa\xb0\xb2\xee\x65\x5c\xf0\xe4\xc5\xbf\xaa\x1f"
"\xa9\xea\x74\xcf\x64\x07\x31\xc3\x24\x9d\xfa\xfe\xbf\xea\xab\xff"
"\x59\x15\xab\x62\x6a\xb5\x5d\x9b\x8c\x18\x2a\x5b\x15\x47\xd3\xb4"
"\x92\x55\x35\xb5\xba\xb7\x3d\xc6\x46\xb0\xa3\x35\x03\x1c\x50\x64"
"\x61\x93\x7a\xa4\x67\xd5\x00\xe1\xc2\xd8\xe4\x92\x75\xfe\x56\xb3"
"\xca\xa6\x76\xc2\xf0\x1c\x8f";
unsigned char decompressed[1000] = {};
printHex(dataChunk, sizeof(dataChunk));
uint32_t len = inflateZlib(dataChunk, sizeof(dataChunk), decompressed, sizeof(decompressed));
printHex(decompressed, len);
return 0;
}
Upvotes: 1
Reputation: 9416
I think you might want to define decompressed differently:
unsigned char decompressed[1000];
Upvotes: 0