deHaar
deHaar

Reputation: 18568

Junit test fails after exchanging implementation with stream API, why?

I have implemented the following method that provides an overview over Strings and their occurrences in the values of a Map<String, List<String>>:

public static Map<String, Long> getValueItemOccurrences(Map<String, List<String>> map) {
    Map<String, Long> occurrencesOfValueItems = new HashMap<>();

    map.forEach((key, value) -> {
        value.forEach(item -> {
            if (occurrencesOfValueItems.containsKey(item)) {
                occurrencesOfValueItems.put(item, occurrencesOfValueItems.get(item) + 1);
            } else {
                occurrencesOfValueItems.put(item, 1L);
            }
        });
    });

    return occurrencesOfValueItems;
}

I have tested it with a single JUnit test and the test succeeds. Here it is (now also including the imports):

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;

import static org.hamcrest.CoreMatchers.is;
import static org.hamcrest.MatcherAssert.assertThat;

class TryoutTest {

    static Map<String, List<String>> items = new HashMap<>();
    static List<String> largeList = new ArrayList<String>();
    static List<String> mediumList = new ArrayList<String>();       
    static List<String> smallList = new ArrayList<String>();
    static List<String> differentLargeList = new ArrayList<String>();
    static List<String> differentSmallList = new ArrayList<String>();
    static List<String> anotherList = new ArrayList<String>();
    static List<String> someList = new ArrayList<String>();
    static List<String> justAList = new ArrayList<String>();

    @BeforeAll
    static void setup() {
        largeList.add("Alfred");
        largeList.add("Bakari");
        largeList.add("Christian");
        largeList.add("Dong");
        largeList.add("Etienne");
        largeList.add("Francesco");
        largeList.add("Guido");
        largeList.add("Henrik");
        largeList.add("Ivan");
        largeList.add("Jos");
        largeList.add("Kumar");
        largeList.add("Leonard");
        largeList.add("Marcin");
        largeList.add("Nico");
        largeList.add("Olof");
        items.put("fifteen-01", largeList);

        mediumList.add("Petar");
        mediumList.add("Quentin");
        mediumList.add("Renato");
        mediumList.add("Sadio");
        mediumList.add("Tomislav");
        mediumList.add("Ulrich");
        mediumList.add("Volkan");
        mediumList.add("Wladimir");
        items.put("eight-01", mediumList);

        smallList.add("Xavier");
        smallList.add("Yves");
        smallList.add("Zinedine");
        smallList.add("Alfred");
        items.put("four-01", smallList);

        differentLargeList.add("Bakari");
        differentLargeList.add("Christian");
        differentLargeList.add("Dong");
        differentLargeList.add("Etienne");
        differentLargeList.add("Francesco");
        differentLargeList.add("Xavier");
        differentLargeList.add("Yves");
        differentLargeList.add("Wladimir");
        differentLargeList.add("Jens");
        differentLargeList.add("Hong");
        differentLargeList.add("Le");
        differentLargeList.add("Leigh");
        differentLargeList.add("Manfred");
        differentLargeList.add("Anders");
        differentLargeList.add("Rafal");
        items.put("fifteen-02", differentLargeList);

        differentSmallList.add("Dario");
        differentSmallList.add("Mohammad");
        differentSmallList.add("Abdul");
        differentSmallList.add("Alfred");
        items.put("four-02", differentSmallList);

        anotherList.add("Kenneth");
        anotherList.add("Hong");
        anotherList.add("Bakari");
        anotherList.add("Ulrich");
        anotherList.add("Henrik");
        anotherList.add("Bernd");
        anotherList.add("Samuel");
        anotherList.add("Ibrahim");
        items.put("eight-02", anotherList);

        someList.add("Kumar");
        someList.add("Konrad");
        someList.add("Bakari");
        someList.add("Francesco");
        someList.add("Leigh");
        someList.add("Yves");
        items.put("six-01", someList);

        justAList.add("Bakari");
        items.put("one-01", justAList);
    }

    @Test
    void valueOccurrencesTest() {
        Map<String, Integer> expected = new HashMap<>();
        expected.put("Abdul", 1);
        expected.put("Alfred", 3);
        expected.put("Anders", 1);
        expected.put("Bakari", 5);
        expected.put("Bernd", 1);
        expected.put("Christian", 2);
        expected.put("Dario", 1);
        expected.put("Dong", 2);
        expected.put("Etienne", 2);
        expected.put("Francesco", 3);
        expected.put("Guido", 1);
        expected.put("Henrik", 2);
        expected.put("Hong", 2);
        expected.put("Ibrahim", 1);
        expected.put("Ivan", 1);
        expected.put("Jens", 1);
        expected.put("Jos", 1);
        expected.put("Kenneth", 1);
        expected.put("Konrad", 1);
        expected.put("Kumar", 2);
        expected.put("Le", 1);
        expected.put("Leigh", 2);
        expected.put("Leonard", 1);
        expected.put("Manfred", 1);
        expected.put("Marcin", 1);
        expected.put("Mohammad", 1);
        expected.put("Nico", 1);
        expected.put("Olof", 1);
        expected.put("Petar", 1);
        expected.put("Quentin", 1);
        expected.put("Rafal", 1);
        expected.put("Renato", 1);
        expected.put("Sadio", 1);
        expected.put("Samuel", 1);
        expected.put("Tomislav", 1);
        expected.put("Ulrich", 2);
        expected.put("Volkan", 1);
        expected.put("Wladimir", 2);
        expected.put("Xavier", 2);
        expected.put("Yves", 3);
        expected.put("Zinedine", 1);
        assertThat(FunctionalMain.getValueItemOccurrences(items), is(expected));
    }
}

When I change the implementation of the method to

public static Map<String, Long> getValueItemOccurrences(Map<String, List<String>> map) {
    return map.values().stream()
            .flatMap(Collection::stream)
            .collect(Collectors.groupingBy(Function.identity(), Collectors.counting()));
}

the test case fails, stating that the resulting map is not equal to the expected one. See this eclipse screenshot, which shows that, obviously, the order of elements makes the test fail:

enter image description here

Is it really just that? I think I have read that HashMaps generally don't guarantee any order of keys.

My (quite long) question is: What can I do to make the stream API call produce a result that passes the test or do I have to change the test case, maybe use a different assertion?

Some sub-questions are:

Upvotes: 6

Views: 882

Answers (2)

Kevin Welker
Kevin Welker

Reputation: 7937

I highly recommend you start using AssertJ instead of JUnit's built-in assertions. Doing that, you could have used the following AssertJ assertion:

assertThat(FunctionalMain.getValueItemOccurrences(items))containsOnly(expected);

containsOnly() checks that your maps have exactly the same elements in any order

Besides that advantage, AssertJ's assertThat() also utilizes fluid syntax (unlike JUnit's built-in assertThat), so that your IDE can give you context sensitive help to let you know which of AssertJ's hundreds of type-specific assertions are available for your tested value.

Upvotes: 1

Eugene
Eugene

Reputation: 120848

TL;DR your test is broken, fix that.

First of all this is more easy to re-produce with:

List<String> list = ImmutableList.of("Kumar", "Kumar", "Jens");

public static Map<String, Long> getValueItemOccurrences1(List<String> list) {
    return list
        .stream()
        .collect(Collectors.groupingBy(Function.identity(), Collectors.counting()));
}

public static Map<String, Long> getValueItemOccurrences2(List<String> list) {
    Map<String, Long> occurrencesOfValueItems = new HashMap<>();

    list.forEach(item -> {
        if (occurrencesOfValueItems.containsKey(item)) {
            occurrencesOfValueItems.put(item, occurrencesOfValueItems.get(item) + 1);
        } else {
            occurrencesOfValueItems.put(item, 1L);
        }
    });

    return occurrencesOfValueItems;
}

The problem is that after the internal HashMap::hash (also called re-hash) and getting the last bits that actually matter when deciding which bucket to choose, they have the same values:

    System.out.println(hash("Kumar".hashCode()) & 15);
    System.out.println(hash("Jens".hashCode()) & 15);

In simpler words, a HashMap decides where to put an entry (bucket is chosen) based on the hashCode of your entries. Well, almost, once the hashCode is computed, internally there is another hash done - to better disperse entries. That final int value of the hashCode is used to decide the bucket. When you create a HashMap with a default capacity of 16 (via new HashMap for example), only the last 4 bits matter where an entry will go (that is why I did the & 15 there - to see the last 4 bits).

where hash is :

// xor first 16 and last 16 bits
static final int hash(Object key) {
    int h;
    return (key == null) ? 0 : (h = key.hashCode()) ^ (h >>> 16);
}

Now, it turns out that ["Kumar" and "Jens"] or ["Xavier", "Kenneth", "Samuel"] have the same last 4 digits after the algorithm above is applied (3 in the first case and 1 in the second case).

Now that we know this info, this actually can be simplified even further:

Map<String, Long> map = new HashMap<>();
map.put("Kumar", 2L);
map.put("Jens", 1L);

System.out.println(map); // {Kumar=2, Jens=1}

map = new HashMap<>();
map.computeIfAbsent("Kumar", x -> 2L);
map.computeIfAbsent("Jens", x -> 1L);
System.out.println(map); // {Jens=1, Kumar=2}

I've used map.computeIfAbsent because this is what Collectors.groupingBy is using under the hood.


It turns out that put and computeIfAbsent, put elements in the HashMap using a different way; this is totally allowed as a Map does not have any order anyway - and these elements end up in the same bucket anyway, which is the import part. So test your code, key by key, the previous testing code was broken.


This is even funner reading if you want:

HashMap::put will add elements in a Linked fashion (until Tree entries are created), so if you have one element existing, all others will be added like:

one --> next --> next ... so on.

elements are appended to the end of this queue as they come in to the put method.

On the other hand computeIfAbsent is a bit different, it adds elements to the beginning of the queue. If we take the example above, first Xavier is added. Then, when Kenneth is added, becoming the first:

 Kenneth -> Xavier // Xavier was "first"

When Samuel is added, it becomes the first:

 Samuel -> [Kenneth -> Xavier] 

Upvotes: 6

Related Questions