Hesk
Hesk

Reputation: 327

Java8: Filter and compare 2 Lists with Lambda

The task:

I have 2 Lists which contains of Entrys (Id + DateTime) The ID's can be multiple with differen DateTimes.

I need a List of ID's whith the following conditions:

The question: How can I do this with Java 8 Streams?

ExampleCode:

import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.joda.time.DateTime;
import org.joda.time.format.DateTimeFormat;

public class LambdaFilter
{
    public static void main(final String[] args)
    {
        final LambdaFilter lf = new LambdaFilter();
        lf.start();
    }

    private void start()
    {
        final List<Entry> list1 = Arrays.asList(
                                                new Entry(15, new DateTime(2012, 6, 29, 0, 0, 0, 0)),
                                                new Entry(101, new DateTime(2012, 3, 12, 0, 0, 0, 0)),
                                                new Entry(101, new DateTime(2012, 3, 12, 0, 0, 0, 0)),
                                                new Entry(68691, new DateTime(2015, 2, 12, 0, 0, 0, 0)),
                                                new Entry(68691, new DateTime(2015, 2, 12, 0, 0, 0, 0)),
                                                new Entry(68691, new DateTime(2015, 5, 01, 0, 0, 0, 0)),
                                                new Entry(70738, new DateTime(2016, 1, 26, 0, 0, 0, 0)));
        final List<Entry> list2 = Arrays.asList(
                                                new Entry(15, new DateTime(2012, 6, 29, 0, 0, 0, 0)),
                                                new Entry(101, new DateTime(2012, 3, 12, 0, 0, 0, 0)),
                                                new Entry(68691, new DateTime(2015, 2, 12, 0, 0, 0, 0)),
                                                new Entry(68691, new DateTime(2015, 2, 12, 0, 0, 0, 0)),
                                                new Entry(70738, new DateTime(2015, 7, 30, 0, 0, 0, 0)));

        System.out.println(list1);
        System.out.println(list2);

        // MAIN-GOAL: Get a list of ID's from list1 which have a higher Date or doesnt exists in list2

        // Filter list1 so every ID is unique (with highest Date)
        final Map<Integer, DateTime> list1UniqueIdMap = new HashMap<Integer, DateTime>();
        for (final Entry e : list1)
        {
            if (!list1UniqueIdMap.containsKey(e.getId()))
            {
                list1UniqueIdMap.put(e.getId(), e.getDate());
            }
            else
            {
                final DateTime dateFromMap = list1UniqueIdMap.get(e.getId());
                if (e.getDate().isAfter(dateFromMap))
                {
                    list1UniqueIdMap.put(e.getId(), e.getDate());
                }
            }
        }

        // Filter list2 so every ID is unique (with highest Date)
        final Map<Integer, DateTime> list2UniqueIdMap = new HashMap<Integer, DateTime>();
        for (final Entry e : list2)
        {
            if (!list2UniqueIdMap.containsKey(e.getId()))
            {
                list2UniqueIdMap.put(e.getId(), e.getDate());
            }
            else
            {
                final DateTime dateFromMap = list2UniqueIdMap.get(e.getId());
                if (e.getDate().isAfter(dateFromMap))
                {
                    list2UniqueIdMap.put(e.getId(), e.getDate());
                }
            }
        }

        System.out.println(list1UniqueIdMap);
        System.out.println(list2UniqueIdMap);

        // Get List of ID's which are in list1 but not in list2, or, if they are in list2, if they have a higher date
        // Furthermore, the the ID's of list1 which have a higher count then in list2
        final Set<Integer> resultSet = new HashSet<Integer>();
        for (final Integer id : list1UniqueIdMap.keySet())
        {
            if (!list2UniqueIdMap.containsKey(id))
            {
                resultSet.add(id);
            }
            else
            {
                final DateTime dateList1 = list1UniqueIdMap.get(id);
                final DateTime dateList2 = list2UniqueIdMap.get(id);

                if (dateList1.isAfter(dateList2))
                {
                    resultSet.add(id);
                }
            }

            if (getCount(list1, id) > getCount(list2, id))
            {
                resultSet.add(id);
            }
        }

        // Result
        System.out.println(resultSet);
    }

    private int getCount(final List<Entry> list, final int id)
    {
        int count = 0;
        for (final Entry e : list)
        {
            if (e.getId() == id)
            {
                count++;
            }
        }
        return count;
    }

    private class Entry
    {
        private int id;
        private DateTime date;

        public Entry(final int id, final DateTime date)
        {
            this.id = id;
            this.date = date;
        }

        public int getId()
        {
            return id;
        }

        public void setId(final int id)
        {
            this.id = id;
        }

        public DateTime getDate()
        {
            return date;
        }

        public String getFormattedLastChangeDat()
        {
            return DateTimeFormat.forPattern("dd.MM.yyyy").print(getDate());
        }

        public void setDate(final DateTime date)
        {
            this.date = date;
        }

        @Override
        public String toString()
        {
            return this.getClass().getSimpleName() + "[id: " + this.getId() + " , date: " + this.getFormattedLastChangeDat() + "]";
        }

    }
}

Output of my Example:

List1
[
Entry[id: 15 , date: 29.06.2012], 
Entry[id: 101 , date: 13.03.2012], 
Entry[id: 101 , date: 13.03.2012],   
Entry[id: 68691 , date: 12.02.2015],   
Entry[id: 68691 , date: 12.02.2015],   
Entry[id: 68691 , date: 01.05.2015],   
Entry[id: 70738 , date: 26.01.2016]]

List2:  
[
Entry[id: 15 , date: 29.06.2012],  
Entry[id: 101 , date: 13.03.2012],  
Entry[id: 68691 , date: 12.02.2015],   
Entry[id: 68691 , date: 12.02.2015],   
Entry[id: 70738 , date: 30.07.2015]]

List1UniqueIdMap:  
{
101=2012-03-12T00:00:00.000+01:00,
70738=2016-01-26T00:00:00.000+01:00,     
68691=2015-05-01T00:00:00.000+02:00,       
15=2012-06-29T00:00:00.000+02:00}

List2UniqueIdMap:  
{
101=2012-03-12T00:00:00.000+01:00,
70738=2015-07-30T00:00:00.000+02:00,     
68691=2015-02-12T00:00:00.000+01:00,     
15=2012-06-29T00:00:00.000+02:00}

Result:  
[101, 68691, 70738]

Upvotes: 4

Views: 6035

Answers (1)

Tunaki
Tunaki

Reputation: 137064

First, what you want is to create an intermediate Map<Integer, DateTime> from list2 where each entry's id is mapped to the maximum date. This way, we will just have to compare each id from list1 to this maximum date to see if it is after or not.

To consider your update where you also need to keep ids which have a higher count of list1 than in list2, we also need to create two another Map<Integer, Long> that stores the count for each id for list1 and list2

Creating this map can be done by grouping by list2 with the entry's id. We are using groupingBy(classifier, downstream) with the classifier being the method-reference Entry::getId returning the id of the entry. The downstream collector is used to collect all value having the same id into a single result; in this case, we are using the maxBy collector that is comparing each entry's date with comparing(keyExtractor). Since this comparator returns an Optional (to handle the case where we would have noting to collect, so no maximum value), it is wrapped into collectingAndThen that applies a finisher operation which, in this case, gets the optional value and retrieve the date from it. The idea is the same for the count map, the difference is that this time, the downstream collector is counting() that counts the number of values having the same key.

Map<Integer, DateTime> map =
    list2.stream()
         .collect(groupingBy(
             Entry::getId,
             collectingAndThen(maxBy(comparing(Entry::getDate)), e -> e.get().getDate())
         ));

Map<Integer, Long> mapCount2 = list2.stream().collect(groupingBy(Entry::getId, counting()));
Map<Integer, Long> mapCount1 = list1.stream().collect(groupingBy(Entry::getId, counting()));

Having this intermediate map, we can then easily filter the list1: we only keep elements for which the map does not contain the current id, or if it does, the current entry's date is after the one stored in the map. Since we're not interested in duplicates, this is collected into a Set.

Set<Integer> ids =
    list1.stream()
         .filter(e -> !mapDate.containsKey(e.getId()) || 
                      e.getDate().isAfter(mapDate.get(e.getId())) ||
                      mapCount1.get(e.getId()) > mapCount2.get(e.getId()))
         .map(Entry::getId)
         .collect(toSet());

Static imports used to make the code cleaner:

import static java.util.Comparator.comparing;
import static java.util.stream.Collectors.collectingAndThen;
import static java.util.stream.Collectors.counting;
import static java.util.stream.Collectors.groupingBy;
import static java.util.stream.Collectors.maxBy;
import static java.util.stream.Collectors.toSet;

Upvotes: 3

Related Questions