seinecle
seinecle

Reputation: 10788

FastUtil - operation on Set fails with Long type

Minimum reproducible example below, with the dependency:

<dependency>
    <groupId>it.unimi.dsi</groupId>
    <artifactId>fastutil</artifactId>
    <version>8.5.12</version>
</dependency>

The issue with the code below is that it works with short numerical values, and it doesn't with bigger values 😕.

package net.clementlevallois.functions.mapsofscience;

import it.unimi.dsi.fastutil.longs.Long2ObjectOpenHashMap;
import it.unimi.dsi.fastutil.objects.ReferenceOpenHashSet;
import java.io.IOException;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;

/**
 *
 * @author LEVALLOIS
 */
public class JournalSimilaritiesComputer {

    static Long2ObjectOpenHashMap<ReferenceOpenHashSet<Long>> journal2AuthorsMap = new Long2ObjectOpenHashMap<>();

    public static void main(String[] args) throws IOException {
        JournalSimilaritiesComputer computer = new JournalSimilaritiesComputer();
        computer.loadDataToMap();
        System.out.println("number of entries in the map: " + journal2AuthorsMap.size());
        computer.doubleLoopingThroughJournalIds();
    }

    private void loadDataToMap() throws IOException {
        
        /*
        The goal is to:
        - load lines
        - split them on |
        - the left side is an entity, the right side is thing to measure similarity on.
        - in the examples below, we would expect 2 lines to be similar because they have the same value on the right side
        - but weirdly, the second example fails to detect the similarity. Help!
        */
        
        List<String> lines = List.of("1|0","2|0"); // DETECTION OF SIMILARITY WORKS
        lines = List.of("4210168784|3089954349","198793727|3089954349"); // DETECTION OF SIMILARITY FAILS
        
        lines.stream().forEach(line -> {
            processLine(line);
        });
    }

    private void doubleLoopingThroughJournalIds() {        
        journal2AuthorsMap.keySet().longStream().forEach(journalIdA -> {
                    ReferenceOpenHashSet<Long> authorsOfJournalA = journal2AuthorsMap.get(journalIdA);

            journal2AuthorsMap.keySet().longStream().forEach(journalIdB -> {
                if (journalIdA == journalIdB) {
                    return;
                }
                ReferenceOpenHashSet<Long> authorsOfJournalB = journal2AuthorsMap.get(journalIdB);
                Integer similarity = computeSimilarities(authorsOfJournalA, authorsOfJournalB);
                if (similarity > 0) {
                    String sim = journalIdA + "," + journalIdB + "," + similarity;
                    System.out.println("similarity detected! -> " + sim);
                }
            });
        });
    }

    private Integer computeSimilarities(ReferenceOpenHashSet<Long> authorsOfJournalA, ReferenceOpenHashSet<Long> authorsOfJournalB) {
        
        
        for (Long entry: authorsOfJournalA){
            if (authorsOfJournalB.contains(entry)){
                System.out.println("stop"); // never reached with example with long values
                System.out.println("stop"); 
            }
        }
        Set<Long> commonElements = authorsOfJournalA.parallelStream()
                .filter(authorsOfJournalB::contains)
                .collect(Collectors.toSet());
        return commonElements.size(); // is zero with example with long values, should be equal to 1!
    }

    private void processLine(String line) {
        String fields[] = line.split("\\|");
        if (fields.length < 2) {
            return;
        }
        String journalId = fields[0];
        long journalIdAsLong = Long.parseLong(journalId);
        String authorIdsAsLine = fields[1];
        String authorIds[] = authorIdsAsLine.split(",");
        ReferenceOpenHashSet<Long> setOfCurrentAuthors = new ReferenceOpenHashSet();
        ReferenceOpenHashSet<Long> setOfAuthorsForThisJournal = journal2AuthorsMap.getOrDefault(journalIdAsLong, setOfCurrentAuthors);
        for (String authorId : authorIds) {
            try {
                long authorIdLong = Long.parseLong(authorId);
                setOfAuthorsForThisJournal.add(authorIdLong);
            } catch (NumberFormatException e) {
                System.out.println("error with author id, not  long: " + authorId);
            }
        }
        journal2AuthorsMap.put(journalIdAsLong, setOfAuthorsForThisJournal);
    }

}

Upvotes: 0

Views: 275

Answers (0)

Related Questions