Reputation: 1003
Please help me to implement string similarity comparison in java! Using org.apache.commons.codec.language.Soundex library
Soundex soundex = new Soundex();
String phoneticValue = soundex.encode("YourString");
String phoneticValue2 = soundex.encode("YourStrink");
if(phoneticValue.equals(phoneticValue2)){
}
Doesn't seems to work. Encode function gives different results in case of similar strings. How to compare both of similar Strings with this library?
Looking forward to hear from you soon! ;)
Upvotes: 3
Views: 3865
Reputation: 1
public class Soundex {
public static String soundexOut(String word) {
String drop = dropedWord(word);
word = word.toLowerCase();
String soundex = "" + drop.charAt(0);
drop = drop.toLowerCase();
int i;
if (soundexCode(drop.charAt(0)) == soundexCode(drop.charAt(1)))
i = 2;
else
i = 1;
for (; i < drop.length() && soundex.length() < 5; i++) {
if (i < drop.length()-1 && soundexCode(drop.charAt(i-1)) == soundexCode(drop.charAt(i+1)) ) {
if (drop.charAt(i) == 'y' || drop.charAt(i) == 'h' || drop.charAt(i) == 'w')
i++;
}
else {
int code = soundexCode(drop.charAt(i));
soundex += code!=0 ? code : "";
}
}
if (soundex.length() < 4)
for (i = soundex.length(); i < 4; i++) {
soundex += "0";
}
return soundex;
}
public static int soundexCode(char c) {
String [] code = {"b, f, p, v" , "c, g, j, k, q, s, x, z" , "d, t" , "l" , "m,n" , "r"} ;
int codeNumber = 0;
for( int i = 0 ; i < code.length ; i++ ){
if( code[i].indexOf(c) >= 0 ) {
codeNumber = i+1;
}
}
return codeNumber;
}
public static String dropedWord(String word) {
String drop = "";
drop += word.charAt(0);
word = word.toLowerCase();
for (int i = 1; i < word.length(); i++) {
if (word.charAt(i) == 'a' || word.charAt(i) == 'e' || word.charAt(i) == 'i' ||
word.charAt(i) == 'o' || word.charAt(i) == 'u' )
continue;
drop += word.charAt(i);
}
return drop;
}
}
Upvotes: 0
Reputation: 1
class Soundex{
private static int getConsonantCode( char ch ){
String codeList[] = { "BFPV", "CGJKQSXZ","DT","L","MN","R" };
int code = 0;
for( int i = 0 ; i < codeList.length ; i++ ){
if( codeList[i].indexOf(ch) >= 0 ) {
code = i+1;
}
}
return code;
}
private static boolean isVowel( char ch ){
return (new String("AEIOUaeiou")).indexOf(ch) >= 0 ;
}
public static String getSoundexCode( String str ){
str=str.toUpperCase();
String soundexCode = "" + str.charAt(0), temp="";
int length = str.length();
char curr, prev, next;{ }
String dropList = "AEIOUYHW";
for( int i=1 ; i< length ; i++ ){
curr = str.charAt(i);
prev = str.charAt( i-1 );
if( ( curr=='H' || curr == 'W') && i != length-1 ){
if( temp.length() >= 2) temp=temp.substring(1);
next=str.charAt( i+1 );
if( isVowel(curr) && getConsonantCode( prev ) == getConsonantCode( next ) ){
temp += prev+prev;
i=i+1;
}else if( getConsonantCode( prev ) == getConsonantCode(next) ){
temp += prev;
i=i+1;
}
}else if( getConsonantCode( curr ) != getConsonantCode(prev) ){
if( dropList.indexOf( curr ) == -1 ){
temp += curr;
}
}
}
temp = ( temp + "0000" ).substring( 0, 3 );
for( int i = 0; i<=2 ; i++ ){
soundexCode += getConsonantCode( temp.charAt(i) );
}
return soundexCode;
}
}
Upvotes: 0