Reputation: 61
I am using following class to serialize and deserialize my classes.
import java.io.*;
public final class Serialization {
public static void writeObject(Object obj, String path){
try (ObjectOutputStream oos =
new ObjectOutputStream(new FileOutputStream(path))) {
oos.writeObject(obj);
//System.out.println("Done");
} catch (Exception ex) {
ex.printStackTrace();
}
}
public static Object readObject(String path){
Object obj = null;
FileInputStream fin = null;
ObjectInputStream ois = null;
try {
fin = new FileInputStream(path);
ois = new ObjectInputStream(fin);
obj = ois.readObject();
} catch (Exception ex) {
ex.printStackTrace();
} finally {
if (fin != null) {
try {
fin.close();
} catch (IOException e) {
e.printStackTrace();
}
}
if (ois != null) {
try {
ois.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
return obj;
}
}
I have a class that implements the Serializable
interface: TextCategorizator. I am trying to use this class as a classification model. So, to serialize this class' object, I use
TextCategorizator tc = new TextCategorizator(trainingFiles, vecFile);
Serialization.writeObject(tc, MODEL_PATH);
And then when I try to read this serialized object with
TextCategorizator model = (TextCategorizator) Serialization.readObject(MODEL_PATH);
I got the following exception trace:
java.io.OptionalDataException
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1373)
at java.io.ObjectInputStream.readObject(ObjectInputStream.java:373)
at java.util.HashMap.readObject(HashMap.java:1402)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at java.io.ObjectStreamClass.invokeReadObject(ObjectStreamClass.java:1058)
at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1909)
at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1808)
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1353)
at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2018)
at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1942)
at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1808)
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1353)
at java.io.ObjectInputStream.readObject(ObjectInputStream.java:373)
at utils.Serialization.readObject(Serialization.java:27)
at Main.main(Main.java:33)
The part that raises the exception is:
obj = ois.readObject();
When I look at reference page of this exception, it says there are two options which indicated with the eof flag and length variable in this exception. I printed them to see. eof is true, length is 0. This means, according to reference page,
An attempt was made to read past the end of data consumable by a class-defined readObject or readExternal method. In this case, the OptionalDataException's eof field is set to true, and the length field is set to 0.
I used these methods before and I did not face with this exception. What's wrong and what exactly the "read past" means?
EDIT : TextCategorizator class is here:
import utils.FileUtils;
import java.io.File;
import java.io.Serializable;
import java.util.*;
import java.util.stream.Collectors;
public class TextCategorizator implements Serializable {
private Map<String, String> wordVectors;
private Map<File, List<List<Double>>> docVectors;
private Map<File, String> trainingFiles;
private Set<String> classes;
public TextCategorizator(Map<File, String> trainingFiles, String trainedVectors) {
wordVectors = new HashMap<>();
docVectors = new HashMap<>();
classes = new HashSet<>();
this.trainingFiles = trainingFiles;
List<String> lines = FileUtils.readFileAsList(new File(trainedVectors));
System.out.println("> Reading word vector file.");
lines.parallelStream().forEach(line -> {
String name = line.substring(0, line.indexOf(' '));
wordVectors.put(name, line);
});
train(trainingFiles);
}
private void train(Map<File, String> trainingFiles) {
System.out.println("> Starting training parallel.");
trainingFiles.entrySet().parallelStream().forEach(entry -> {
docVectors.put(entry.getKey(), getVectorsOfDoc(entry.getKey()));
classes.add(entry.getValue());
});
}
private List<List<Double>> getVectorsOfDoc(File doc) {
List<List<Double>> lists = new ArrayList<>();
List<Double> resultVecAvg = new ArrayList<>();
List<Double> resultVecMax = new ArrayList<>();
List<Double> resultVecMin = new ArrayList<>();
int vecSize = 100;
for (int i = 0; i < vecSize; i++) {
resultVecAvg.add(0.0);
resultVecMax.add(0.0);
resultVecMin.add(0.0);
}
String[] words = FileUtils.readWords(doc);
for (String word : words) {
String line = wordVectors.get(word);
if (line != null) {
List<Double> vec = new ArrayList<>();
String[] tokens = line.split(" ");
for (int i = 1; i < tokens.length; i++) {
vec.add(Double.parseDouble(tokens[i]));
}
for (int i = 0; i < vec.size(); i++) {
resultVecAvg.set(i, resultVecAvg.get(i) + (vec.get(i) / vecSize));
resultVecMax.set(i, Math.max(resultVecMax.get(i), vec.get(i)));
resultVecMin.set(i, Math.min(resultVecMin.get(i), vec.get(i)));
}
}
}
lists.add(resultVecAvg); lists.add(resultVecMax); lists.add(resultVecMin);
return lists;
}
private void getCosineSimilarities(List<Double> givenVec, int option, Map<File, Double> distances) {
for (Map.Entry<File, List<List<Double>>> entry : docVectors.entrySet()) {
List<Double> vec = null;
if (option == 1) // AVG
vec = entry.getValue().get(0);
else if (option == 2) // MAX
vec = entry.getValue().get(1);
else if (option == 3) // MIN
vec = entry.getValue().get(2);
distances.put(entry.getKey(), cosSimilarity(givenVec, vec));
}
}
private double cosSimilarity(List<Double> vec1, List<Double> vec2) {
double norm1 = 0.0;
double norm2 = 0.0;
double dotProduct = 0.0;
for (int i = 0; i < vec1.size(); i++) {
norm1 += Math.pow(vec1.get(i), 2);
norm2 += Math.pow(vec2.get(i), 2);
dotProduct += vec1.get(i) * vec2.get(i);
}
return dotProduct / (Math.sqrt(norm1) * Math.sqrt(norm2));
}
// from http://stackoverflow.com/questions/109383/sort-a-mapkey-value-by-values-java
private <K, V extends Comparable<? super V>> Map<K, V>
sortByValue(Map<K, V> map, boolean reverse) {
return map.entrySet()
.stream()
.sorted((reverse ?
Map.Entry.comparingByValue(Collections.reverseOrder()) :
Map.Entry.comparingByValue()))
.collect(Collectors.toMap(
Map.Entry::getKey,
Map.Entry::getValue,
(e1, e2) -> e1,
LinkedHashMap::new
));
}
private int countClass(List<File> files, String c) {
int counter = 0;
for (File file : files) {
if (trainingFiles.get(file).equals(c))
++counter;
}
return counter;
}
public Map.Entry<String, Integer> classifyKnn(File file, int k, int option) {
List<List<Double>> vecs = getVectorsOfDoc(file);
List<Double> vec = getProperVector(vecs, option);
Map<File, Double> distances = new HashMap<>();
getCosineSimilarities(vec, option, distances);
distances = sortByValue(distances, true);
List<File> sortedFiles = new ArrayList<>(distances.keySet());
sortedFiles = sortedFiles.subList(0, k);
Map<String, Integer> counts = new HashMap<>();
for (String category : classes) {
counts.put(category, countClass(sortedFiles, category));
}
ArrayList<Map.Entry<String, Integer>> resultList =
new ArrayList(sortByValue(counts, true).entrySet());
return resultList.get(0);
}
private List<Double> getProperVector(List<List<Double>> lists, int option) {
List<Double> vec = null;
if (option == 1) // AVG
vec = lists.get(0);
else if (option == 2) // MAX
vec = lists.get(1);
else if (option == 3) // MIN
vec = lists.get(2);
return vec;
}
public Map.Entry<String, Double> classifyRocchio(File file, int option) {
List<List<Double>> vecs = getVectorsOfDoc(file);
List<Double> vec = getProperVector(vecs, option);
Map<File, Double> distances = new HashMap<>();
getCosineSimilarities(vec, option, distances);
distances = sortByValue(distances, true);
List<Map.Entry<File, Double>> sortedFiles =
new ArrayList<>(distances.entrySet());
return new AbstractMap.SimpleEntry<>
(trainingFiles.get(sortedFiles.get(0).getKey()),
sortedFiles.get(0).getValue());
}
}
Upvotes: 2
Views: 2922
Reputation: 61
Finally I got this working. The problem was the size of the objects that I was trying to serialize and deserialize (i.e wordVectors
is 480 MB).
To fix this, I used synchronized map. So
wordVectors = new HashMap<>();
is changed to
wordVectors = Collections.synchronizedMap(new HashMap<>());
I got the idea from here.
Upvotes: 3