Greg Graham
Greg Graham

Reputation: 474

Avro (Microsoft) Serialization of derived type members missing

I am evaluating the performance of Microsoft's implementation of Avro, and at first I thought I was getting phenomenal performance until I realized it just wasn't serializing the entire message ;-)

In the following there is a simple hierarchy of messages decorated with [DataContract] (a base and two derived types). All members are decorated with the [DataMember] attribute. I create a serializer from the base message type and serialize a list of derived messages, but it appears to only serialize/deserialize the base class members. All of the derived message members are missing from the result.

Am I missing something? My application will require mixed message types.

FWIW I don't see any strings from the second derived type in the binary file, so I suspect the derived type members aren't being serialized.

Thanks, Greg


class Program
{
    [DataContract(Name = "SideType", Namespace = "AvroMessage")]
    public enum EventType
    {
        Unknown = 0,
        One = 1,
        Two = 2
    }

    [DataContract(Name = "MessageBase", Namespace = "AvroMessage")]
    public class MessageBase
    {
        [DataMember(Name = "Subtype")]
        public string Subtype;

        [DataMember(Name = "Timestamp")]
        public DateTime Timestamp;

        [DataMember(Name = "GroupName")]
        public string GroupName;

        public override bool Equals(object obj)
        {
            MessageBase other = obj as MessageBase;
            if (other == null) return false;

            return Subtype == other.Subtype &&
                   Timestamp == other.Timestamp &&
                   GroupName == other.GroupName;
        }
    }

    [DataContract(Name = "SubMessage1", Namespace = "AvroMessage")]
    public class SubMessage1 : MessageBase
    {
        [DataMember(Name = "Volume")]
        public int Volume;

        [DataMember(Name = "Count")]
        public int Count;

        [DataMember(Name = "DetectedSide")]
        public EventType Event;

        public override bool Equals(object obj)
        {
            SubMessage1 other = obj as SubMessage1;
            if (other == null) return false;

            return Subtype == other.Subtype &&
                   Timestamp == other.Timestamp &&
                   GroupName == other.GroupName &&
                   Event == other.Event &&
                   Volume == other.Volume &&
                   Count == other.Count;
        }
    }

    [DataContract(Name = "SubMessage2", Namespace = "AvroMessage")]
    public class SubMessage2 : MessageBase
    {
        [DataMember(Name = "Name1")]
        public string Name1;

        [DataMember(Name = "Volume1")]
        public int Volume1;

        [DataMember(Name = "Name2")]
        public string Name2;

        [DataMember(Name = "Volume2")]
        public int Volume2;

        [DataMember(Name = "PriceMove")]
        public double PriceMove;

        public override bool Equals(object obj)
        {
            SubMessage2 other = obj as SubMessage2;
            if (other == null) return false;

            return Subtype == other.Subtype &&
                   Timestamp == other.Timestamp &&
                   GroupName == other.GroupName &&
                   Volume1 == other.Volume1 &&
                   Name1 == other.Name1 &&
                   Volume2 == other.Volume2 &&
                   Name2 == other.Name2 &&
                   PriceMove == other.PriceMove;
        }
    }

    public class MessageFactory
    {
        public static IEnumerable<MessageBase> CreateMessages(int number)
        {
            Random ran = new Random();

            List<MessageBase> retval = new List<MessageBase>();
            for (int i = 0; i < number; i++)
            {
                if (ran.Next(2) == 0)
                {
                    SubMessage1 sub1 = new SubMessage1();
                    sub1.Timestamp = DateTime.Now;
                    sub1.GroupName = "Group" + DateTime.Now.Millisecond.ToString();
                    sub1.Subtype = "SubMessag1";
                    sub1.Volume = ran.Next(10000);
                    sub1.Count = ran.Next(100);
                    if (ran.Next(2) == 0)
                    {
                        sub1.Event = EventType.One;
                    }
                    else
                    {
                        sub1.Event = EventType.Two;
                    }
                    retval.Add(sub1);
                }
                else
                {
                    SubMessage2 sub2 = new SubMessage2();
                    sub2.Timestamp = DateTime.Now;
                    sub2.GroupName = "Group" + DateTime.Now.Millisecond.ToString();
                    sub2.Subtype = "SubMessag2";
                    sub2.Volume1 = ran.Next(1000);
                    sub2.PriceMove = ran.NextDouble() * 100 - 50;
                    sub2.Volume2 = ran.Next(1000);
                    sub2.Name1 = "Contract" + (DateTime.Now.Millisecond + ran.Next(5)).ToString();
                    sub2.Name2 = "Contract" + DateTime.Now.Millisecond.ToString();
                    retval.Add(sub2);
                }
            }
            return retval;
        }

    }

    public static void TestAvro(int count)
    {
        bool correct = false;
        long serTicks = 0;
        long deserTicks = 0;

        Stopwatch sw = new Stopwatch();
        sw.Reset();

        var serializer = Microsoft.Hadoop.Avro.AvroSerializer.Create<MessageBase>();
        MessageBase[] messages = new MessageBase[count];
        using (var file = File.Create(@"C:\test_avro.bin"))
        {
            int i = 0;
            foreach (var message in MessageFactory.CreateMessages(count))
            {
                messages[i++] = message;
                sw.Start();
                serializer.Serialize(file, message);
                sw.Stop();
            }
        }

        serTicks = sw.ElapsedTicks;
        sw.Reset();

        List<int> badMessages = new List<int>();
        using (var file = File.OpenRead(@"C:\test_avro.bin"))
        {
            for (int i = 0; i < count; i++)
            {
                sw.Start();
                MessageBase message = serializer.Deserialize(file);
                sw.Stop();

                SubMessage1 m1 = message as SubMessage1;
                SubMessage2 m2 = message as SubMessage2;

                bool areNull = (m1 == null) && (m2 == null);   // Always true

                if (!messages[i].Equals(message)) badMessages.Add(i);
            }
        }

        deserTicks = sw.ElapsedTicks;
        correct = badMessages.Count == 0;

        long size = (new FileInfo(@"C:\test_proto.bin")).Length;

        Console.WriteLine(String.Format("Correct: {0}, Time Out: {1}, , Time In: {2}, , Size: {3}", correct, serTicks, deserTicks, size));
    }

    static void Main(string[] args)
    {
        TestAvro(10000);
        Console.ReadLine();
    }
}

Upvotes: 0

Views: 247

Answers (1)

Greg Graham
Greg Graham

Reputation: 474

My bad - I forgot the KnownType attribute on the base class, one for each derived type. It works if you include the attributes.

Upvotes: 1

Related Questions