JDK
JDK

Reputation: 125

Merging data in a list based on date

I have a generic list which is using this class

public class Data
{
    public string Date { get; set; }
    public int OkRecords { get; set; }
    public int ErrorRecords { get; set; }
}

I also have List<string> which holds unique Dates. I need to add data of original list in such a way that there should be only one record for a date i.e. if there are 10 unique dates then there should be only 10 records filtered out of the original list. I have implemented the following logic but it is taking way too much time if there is more than 100 000 data.

int distinctDatesCount = distinctDates.Count;

for (int i = 0; i < distinctDatesCount; i++)
{
    string date = distinctDates[i];
    int ok = 0, error = 0;
    foreach (var item in dataList.Where(w => w.Date == date))
    {
        ok += item.OkRecords;
        error += item.ErrorRecords;
    }
    Data dataValues = new Data
    {
        Date = date,
        OkRecords = ok,
        ErrorRecords = error
    };
    DataListCombined.Add(dataValues);
}

DataListCombined is the list in which I am storing the newly merged data.

I've stored the date in string format as I am using it elsewhere where it is required in string format.

Upvotes: 2

Views: 796

Answers (2)

GERAUD Fabien
GERAUD Fabien

Reputation: 31

Personnally i used a group join. Because record in the DataList shouldn't be in the final result.

public class Data
{
    public string Date { get; set; }
    public int OkRecords { get; set; }
    public int ErrorRecords { get; set; }
}

class Program
{
    static void Main(string[] args)
    {
        Random rand = new Random();
        List<string> distinctDates = new List<string>();
        for (int i = 0; i < 10000; i++)
        {
            distinctDates.Add( rand.Next(1, 12) + "/" + rand.Next(1, 30) + "/1");
        }

        List<Data> dataList = new List<Data>(); 
        for (int i = 0; i < 10000; i++)
        {
            dataList.Add(new Data{ Date = rand.Next(1,12)+"/"+rand.Next(1, 30)+"/1", OkRecords=0, ErrorRecords=1});
        }

        Stopwatch watch = new Stopwatch();
        watch.Start();
        Method1(distinctDates, dataList);
        watch.Stop();
        Console.WriteLine(watch.Elapsed);

        watch.Reset();
        watch.Start();
        Method2(distinctDates, dataList);
        watch.Stop();
        Console.WriteLine(watch.Elapsed);
        Console.ReadLine();
    }

    private static void Method1(List<string> distinctDates, List<Data> dataList)
    {
        List<Data> DataListCombined = new List<Data>();
        int distinctDatesCount = distinctDates.Count;
        for (int i = 0; i < distinctDatesCount; i++)
        {
            string date = distinctDates[i];
            int ok = 0, error = 0;

            foreach (var item in dataList.Where(w => w.Date == date))
            {
                ok += item.OkRecords;
                error += item.ErrorRecords;
            }
            Data dataValues = new Data
            {
                Date = date,
                OkRecords = ok,
                ErrorRecords = error
            };
            DataListCombined.Add(dataValues);
        }
    }

    private static void Method2(List<string> distinctDates, List<Data> dataList)
    {
        List<Data> DataListCombined = distinctDates.GroupJoin(
            dataList,
            distinctDateItem => distinctDateItem,
            dataListItem => dataListItem.Date,
            (dataListItem, distinctDateItems) => new Data
            {
                ErrorRecords = distinctDateItems.Sum(item => item.ErrorRecords),
                OkRecords = distinctDateItems.Sum(item => item.OkRecords),
                Date = dataListItem
            }
            ).ToList();
    }
}

Upvotes: 1

Nkosi
Nkosi

Reputation: 247098

You can use Linq GroupBy to group by data by date and then use Sum to add up the counts into single objects.

var DataListCombined = dataList.GroupBy(data => data.Date)
                           .Select(groupedData => 
                               new Data {
                                   Date = groupedData.Key,
                                   OkRecords = groupedData.Sum(item => item.OkRecords),
                                   ErrorRecords = groupedData.Sum(item => item.ErrorRecords)
                               })
                            .Where(data => distinctDates.Contains(data.Date))
                            .ToList();

You can also check this resource for more examples.

101 LINQ Samples

Upvotes: 1

Related Questions