Reputation: 259
for (a = 0; a < filename; a++) {
try {
System.out
.println(" _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ ");
System.out.println("\n");
System.out.println("The word inputted : " + word2);
File file = new File(
"C:\\Users\\user\\fypworkspace\\TextRenderer\\abc" + a
+ ".txt");
System.out.println(" _________________");
System.out.print("| File = abc" + a + ".txt | \t\t \n");
for (int i = 0; i < array2.length; i++) {
totalCount = 0;
wordCount = 0;
Scanner s = new Scanner(file);
{
while (s.hasNext()) {
totalCount++;
if (s.next().equals(array2[i]))
wordCount++;
}
System.out.print(array2[i] + " --> Word count = "
+ "\t " + "|" + wordCount + "|");
System.out.print(" Total count = " + "\t " + "|"
+ totalCount + "|");
System.out.printf(" Term Frequency = | %8.4f |",
(double) wordCount / totalCount);
System.out.println("\t ");
double inverseTF = Math.log10((float) numDoc
/ (numofDoc[i]));
System.out.println(" --> IDF = " + inverseTF );
double TFIDF = (((double) wordCount / totalCount) * inverseTF);
System.out.println(" --> TF/IDF = " + TFIDF + "\n");
}
}
} catch (FileNotFoundException e) {
System.out.println("File is not found");
}
}
}
}
this is the example output :
The word inputted : how are you
| File = abc0.txt |
how --> Word count = |4| Total count = |957| Term Frequency = | 0.0042 |
--> IDF = 0.5642714398516419
--> TF/IDF = 0.0023585013159943234
are --> Word count = |7| Total count = |957| Term Frequency = | 0.0073 |
--> IDF = 0.1962946357308887
--> TF/IDF = 0.00143580193324579
you --> Word count = |10| Total count = |957| Term Frequency = | 0.0104 |
--> IDF = 0.1962946357308887
--> TF/IDF = 0.002051145618922557
How do i sum up the entire 3 TF/IDF for each text file ?
Upvotes: 0
Views: 848
Reputation: 2923
Asssuming you just want a running total to be able to display, then prior to your for loop
add something like:
double runningTfIDF = 0;
Then immediately after calculating the current TF/IDF, then add the line
runningTfIDF += TFIDF;
Then, after your for loop
, you can add a line to print the runningTfIDF.
edited to included more complete answer
HashMap<String, BigDecimal> runningTdIDF = new HashMap<String, Double>();
HashMap<String, BigDecimal> wordCount = new HashMap<String, Double>();
HashMap<String, BigDecimal> frequency = new HashMap<String, Double>();
HashMap<String, BigDecimal> inverseTF = new HashMap<String, Double>();
for (int i = 0; i < array2.length; i++) {
totalCount = 0;
wordCountVal = 0;
Scanner s = new Scanner(file);
{
while (s.hasNext()) {
totalCount++;
if (s.next().equals(array2[i]))
wordCountVal++;
}
BigDecimal wordCount(array2[i],new BigDecimal(wordCountVal));
BigDecimal frequencyVal = new BigDecimal( (double) wordCount / totalCount));
frequency.put(array2[i],frequencyVal);
BigDecimal inverseTFVal = new BigDecimal(Math.log10((float) numDoc
/ (numofDoc[i])));
inverseTF.put(array2[i], inverseTFVal);
BigDecaim TFIDF =new BigDecima( (( wordCount / totalCount) * inverseTF));
runningTfIDF.put(array2[i], TFIDF);
}
for(String word : wordCount.keySet()){
System.out.print(word + " --> word count "
+ "\t |"+wordCount.get(word)+"|");
System.out.print(" Total count = " + "\t " + "|"
+ totalCount + "|");
System.out.printf(" Term Frequency = | %8.4f |",
frequency.get(word));
System.out.println("\t ");
System.out.println(" --> IDF = " + inverseTF.get(word));
System.out.println(" --> TF/IDF = " + runningTfIDF.get(word) + "\n");
}
}
That isn't the cleanest possible implementation by far, but in short you need to store your information for each word and cycle through the words after you've created your totals if you want to display the total starting with the first possible result. Does that make sense?
Upvotes: 1