jhoefnagels
jhoefnagels

Reputation: 379

C# equivalent of LZMA-JS compress

I have used some client side javscript code to compress some user input using this javascript library. In the backend code I have used the code samples from this post to decompress the data server side using C#.

The works perfectly.

Now I would like to be able to compress a string the same way the javascript does. When I compress the code using this sample I get an array of signed integers ranging from -128 to 128. Now I would like to use my backend code to do the same.

The LMZA properties from the javascript are a bit different than the default properties from the C# code but even if I change those to the same values I get different results from the two libraries.

At first the output values from the C# code are unsigned. Secondly the number of characters returned are different. The differences may be introduced by the properties of the decoders but I have no idea how to get the two libraries aligned.

My C# uses the LMZA SDK from 7zip

My C# code to decompress the javascript compressed data (comma separated array of signed integers):

public static void Decompress(Stream inStream, Stream outStream)
{
    byte[] properties = new byte[5];
    inStream.Read(properties, 0, 5);
    SevenZip.Compression.LZMA.Decoder decoder = new SevenZip.Compression.LZMA.Decoder();
    decoder.SetDecoderProperties(properties);
    long outSize = 0;
    for (int i = 0; i < 8; i++)
    {
        int v = inStream.ReadByte();
        outSize |= ((long)(byte)v) << (8 * i);
    }
    long compressedSize = inStream.Length - inStream.Position;
    decoder.Code(inStream, outStream, compressedSize, outSize, null);
}

public static string DecompressLzma(string inputstring)
{
    if (!string.IsNullOrEmpty(inputstring))
    {
        byte[] myInts = Array.ConvertAll(inputstring.Split(','), s => (byte)int.Parse(s));
        var stream = new MemoryStream(myInts);
        var outputStream = new MemoryStream();
        Decompress(stream, outputStream);
        using (var reader = new StreamReader(outputStream))
        {
            outputStream.Position = 0;
            string output = reader.ReadToEnd();
            return output;
        }
    }

    return "";
}

The code to compress the data is like this (number of bytes are diffrent and unsigned):

public static string CompressLzma(string inputstring)
{
    if (!string.IsNullOrEmpty(inputstring))
    {               
        var stream = new MemoryStream(Encoding.Unicode.GetBytes(inputstring ?? ""));
        var outputStream = new MemoryStream();
        Compress(stream, outputStream);



        byte[] bytes = outputStream.ToArray();


    }

    return "";
}

public static void Compress(MemoryStream inStream, MemoryStream outStream)
{
    CoderPropID[] propIDs;
    object[] properties;
    PrepareEncoder(out propIDs, out properties);

    SevenZip.Compression.LZMA.Encoder encoder = new SevenZip.Compression.LZMA.Encoder();
    encoder.SetCoderProperties(propIDs, properties);
    encoder.WriteCoderProperties(outStream);
    Int64 fileSize = inStream.Length;
    for (int i = 0; i < 8; i++)
    {
        outStream.WriteByte((Byte)(fileSize >> (8 * i)));
    }
    encoder.Code(inStream, outStream, -1, -1, null);
}

public static void PrepareEncoder(out CoderPropID[] propIDs, out object[] properties)
{
    bool eos = true;
    Int32 dictionary = 1 << 16;
    Int32 posStateBits = 2;
    Int32 litContextBits = 3; // for normal files
    // UInt32 litContextBits = 0; // for 32-bit data
    Int32 litPosBits = 0;
    // UInt32 litPosBits = 2; // for 32-bit data
    Int32 algorithm = 2;
    Int32 numFastBytes = 32;
    string mf = "bt2";

    propIDs = new CoderPropID[]
    {
        CoderPropID.DictionarySize,
        CoderPropID.PosStateBits,
        CoderPropID.LitContextBits,
        CoderPropID.LitPosBits,
        CoderPropID.Algorithm,
        CoderPropID.NumFastBytes,
        CoderPropID.MatchFinder,
        CoderPropID.EndMarker
    };
    properties = new object[]
    {
        dictionary,
        posStateBits,
        litContextBits,
        litPosBits,
        algorithm,
        numFastBytes,
        mf,
        eos
    };
}

Upvotes: 3

Views: 984

Answers (1)

jhoefnagels
jhoefnagels

Reputation: 379

This code works to create the same string the javascript code would, the LMZA settings are included:

public static string CompressLzma(string inputstring)
{
    if (!string.IsNullOrEmpty(inputstring))
    {
        var stream = new MemoryStream(Encoding.UTF8.GetBytes(inputstring ?? ""));
        var outputStream = new MemoryStream();
        Compress(stream, outputStream);


        byte[] bytes = outputStream.ToArray();
        var result = string.Join(",", Array.ConvertAll(bytes, v => signedInt((int)v)));
        return result;
    }

    return "";
}


public static void PrepareEncoder(out CoderPropID[] propIDs, out object[] properties)
{
    bool eos = true;
    Int32 dictionary = 1 << 16;
    Int32 posStateBits = 2;
    Int32 litContextBits = 3; // for normal files
    // UInt32 litContextBits = 0; // for 32-bit data
    Int32 litPosBits = 0;
    // UInt32 litPosBits = 2; // for 32-bit data
    Int32 algorithm = 2;
    Int32 numFastBytes = 64;
    string mf = "bt4";

    propIDs = new CoderPropID[]
    {
       CoderPropID.DictionarySize,
       CoderPropID.PosStateBits,
       CoderPropID.LitContextBits,
       CoderPropID.LitPosBits,
       CoderPropID.Algorithm,
       CoderPropID.NumFastBytes,
       CoderPropID.MatchFinder,
       CoderPropID.EndMarker
    };
    properties = new object[]
    {
       dictionary,
       posStateBits,
       litContextBits,
       litPosBits,
       algorithm,
       numFastBytes,
       mf,
       eos
    };
}

private static int signedInt(int unsignedInt)
{
    return unsignedInt >= 128 ? Math.Abs(128 - unsignedInt) - 128 : unsignedInt;
}


public static void Compress(MemoryStream inStream, MemoryStream outStream)
{
    CoderPropID[] propIDs;
    object[] properties;
    PrepareEncoder(out propIDs, out properties);

    SevenZip.Compression.LZMA.Encoder encoder = new SevenZip.Compression.LZMA.Encoder();
    encoder.SetCoderProperties(propIDs, properties);
    encoder.WriteCoderProperties(outStream);
    Int64 fileSize = inStream.Length;
    for (int i = 0; i < 8; i++)
    {
        outStream.WriteByte((Byte)(fileSize >> (8 * i)));
    }
    encoder.Code(inStream, outStream, -1, -1, null);
}

Upvotes: 1

Related Questions