benjamin54
benjamin54

Reputation: 1300

Gzip compression and decompression in C#

I'm trying to compress an string in one module and decompressing it in another module. Here is the code I'm using.

Compress

public static string CompressString(string text)
{
    byte[] buffer = Encoding.ASCII.GetBytes(text);
    MemoryStream ms = new MemoryStream();
    using (GZipStream zip = new GZipStream(ms, CompressionMode.Compress, true))
    {
         zip.Write(buffer, 0, buffer.Length);
    }

    ms.Position = 0;
    MemoryStream outStream = new MemoryStream();

    byte[] compressed = new byte[ms.Length];
    ms.Read(compressed, 0, compressed.Length);

    byte[] gzBuffer = new byte[compressed.Length + 4];
    System.Buffer.BlockCopy(compressed, 0, gzBuffer, 4, compressed.Length);
    System.Buffer.BlockCopy(BitConverter.GetBytes(buffer.Length), 0, gzBuffer, 0, 4);
    return Convert.ToBase64String(gzBuffer);
}

Decompress

public static byte[] DecompressString(byte[] data)
{
   using (var compressedStream = new MemoryStream(data))
   using (var zipStream = new GZipStream(compressedStream, CompressionMode.Decompress))
     using (var resultStream = new MemoryStream())
     {
        zipStream.CopyTo(resultStream);
        return resultStream.ToArray();
     }
}

Using it as:

 DecompressString(System.Text.Encoding.ASCII.GetBytes(ip));

But, for above statement, I'm getting following error.

{"The magic number in GZip header is not correct. Make sure you are passing in a GZip stream."} System.SystemException {System.IO.InvalidDataException}

Upvotes: 19

Views: 43201

Answers (3)

MattE303
MattE303

Reputation: 11

I needed a solution that produced compressed strings which could be decoded by standard utilities such as https://bugdays.com/gzip-base64. The previous solutions don't (presumably due to the non-standard, prepended length value that Lasse mentioned).

I looked at the Microsoft docs for GZipStream, and based on that example code, and the previous solutions presented here by Lasse V. Karlsen and DaGrisa, I came up with the following solution which works nicely (tested in LINQPad):

The output

original: 256
This is a test. This is a test. This is a test. This is a test. This is a test. This is a test. This is a test. This is a test. This is a test. This is a test. This is a test. This is a test. This is a test. This is a test. This is a test. This is a test. 

compressed: 52
H4sIAAAAAAAACgvJyCxWAKJEhZLU4hI9hZARxgcA1T13lgABAAA=

decompressed: 256
This is a test. This is a test. This is a test. This is a test. This is a test. This is a test. This is a test. This is a test. This is a test. This is a test. This is a test. This is a test. This is a test. This is a test. This is a test. This is a test.

The program

void Main()
{
    var input = "This is a test. This is a test. ";
    input += input;
    input += input;
    input += input;
    string compressed = Compress(input);
    string decompressed = Decompress(compressed);

    input.Dump("original: " + input.Length);
    compressed.Dump("compressed: " + compressed.Length);
    decompressed.Dump("decompressed: " + decompressed.Length);
}

public static string Decompress(string input)
{
    byte[] compressed = Convert.FromBase64String(input);
    byte[] decompressed = Decompress(compressed);
    return Encoding.UTF8.GetString(decompressed);
}

public static string Compress(string input)
{
    byte[] encoded = Encoding.UTF8.GetBytes(input);
    byte[] compressed = Compress(encoded);
    return Convert.ToBase64String(compressed);
}

public static byte[] Decompress(byte[] input)
{
    using (var source = new MemoryStream(input))
    {
        using (var result = new MemoryStream())
        {
            using (var Decompress = new GZipStream(source, CompressionMode.Decompress))
            {
                Decompress.CopyTo(result);
            }

            return result.ToArray();
        }
    }
}

public static byte[] Compress(byte[] input)
{
    using (var source = new MemoryStream(input))
    {
        using (var result = new MemoryStream())
        {
            using (var Compress = new GZipStream(result, CompressionMode.Compress))
            {
                source.CopyTo(Compress);
            }
            
            return result.ToArray();
        }
    }
}

Upvotes: 1

DaGrisa
DaGrisa

Reputation: 31

.NET 6+ solution (with fix of https://github.com/dotnet/runtime/issues/64577):

void Main()
{
    var input = "This is a test. This is a test. ";
    input += input;
    input += input;
    input += input;
    string compressed = Compress(input);
    string decompressed = Decompress(compressed);

    input.Dump("original: " + input.Length);
    compressed.Dump("compressed: " + compressed.Length);
    decompressed.Dump("decompressed: " + decompressed.Length);
}

public static string Decompress(string input)
{
    byte[] compressed = Convert.FromBase64String(input);
    byte[] decompressed = Decompress(compressed);
    return Encoding.UTF8.GetString(decompressed);
}

public static string Compress(string input)
{
    byte[] encoded = Encoding.UTF8.GetBytes(input);
    byte[] compressed = Compress(encoded);
    return Convert.ToBase64String(compressed);
}

public static byte[] Decompress(byte[] input)
{
    using (var source = new MemoryStream(input))
    {
        byte[] lengthBytes = new byte[4];
        source.Read(lengthBytes, 0, 4);

        var length = BitConverter.ToInt32(lengthBytes, 0);
        using (var decompressionStream = new GZipStream(source,
            CompressionMode.Decompress))
        {
            var result = new byte[length];
            int totalRead = 0, bytesRead;
            while ((bytesRead = decompressionStream.Read(result, totalRead, length - totalRead)) > 0)
            {
              totalRead += bytesRead;
            }

            return result;
        }
    }
}

public static byte[] Compress(byte[] input)
{
    using (var result = new MemoryStream())
    {
        var lengthBytes = BitConverter.GetBytes(input.Length);
        result.Write(lengthBytes, 0, 4);

        using (var compressionStream = new GZipStream(result,
            CompressionMode.Compress))
        {
            compressionStream.Write(input, 0, input.Length);
            compressionStream.Flush();

        }
        return result.ToArray();
    }
}

Upvotes: 3

Lasse V. Karlsen
Lasse V. Karlsen

Reputation: 391594

Here is a rewrite of your code that should work the way you want it to.

I wrote it in LINQPad and it can be tested in that.

Note that there's very little error checking here. You should add checks to see if all read operations complete and has actually read what they were supposed to and similar checks.

The output

original: 256
This is a test. This is a test. This is a test. This is a test. This is a test. This is a test. This is a test. This is a test. This is a test. This is a test. This is a test. This is a test. This is a test. This is a test. This is a test. This is a test.  

compressed: 56
AAEAAB+LCAAAAAAABAALycgsVgCiRIWS1OISPYWQEcYHANU9d5YAAQAA 

decompressed: 256
This is a test. This is a test. This is a test. This is a test. This is a test. This is a test. This is a test. This is a test. This is a test. This is a test. This is a test. This is a test. This is a test. This is a test. This is a test. This is a test.  

The program

void Main()
{
    var input = "This is a test. This is a test. ";
    input += input;
    input += input;
    input += input;
    string compressed = Compress(input);
    string decompressed = Decompress(compressed);

    input.Dump("original: " + input.Length);
    compressed.Dump("compressed: " + compressed.Length);
    decompressed.Dump("decompressed: " + decompressed.Length);
}

public static string Decompress(string input)
{
    byte[] compressed = Convert.FromBase64String(input);
    byte[] decompressed = Decompress(compressed);
    return Encoding.UTF8.GetString(decompressed);
}

public static string Compress(string input)
{
    byte[] encoded = Encoding.UTF8.GetBytes(input);
    byte[] compressed = Compress(encoded);
    return Convert.ToBase64String(compressed);
}

public static byte[] Decompress(byte[] input)
{
    using (var source = new MemoryStream(input))
    {
        byte[] lengthBytes = new byte[4];
        source.Read(lengthBytes, 0, 4);

        var length = BitConverter.ToInt32(lengthBytes, 0);
        using (var decompressionStream = new GZipStream(source,
            CompressionMode.Decompress))
        {
            var result = new byte[length];
            decompressionStream.Read(result, 0, length);
            return result;
        }
    }
}

public static byte[] Compress(byte[] input)
{
    using (var result = new MemoryStream())
    {
        var lengthBytes = BitConverter.GetBytes(input.Length);
        result.Write(lengthBytes, 0, 4);

        using (var compressionStream = new GZipStream(result,
            CompressionMode.Compress))
        {
            compressionStream.Write(input, 0, input.Length);
            compressionStream.Flush();

        }
        return result.ToArray();
    }
}

Upvotes: 28

Related Questions