anduplats
anduplats

Reputation: 1003

Implementing Soundex in Java

Please help me to implement string similarity comparison in java! Using org.apache.commons.codec.language.Soundex library

Soundex soundex = new Soundex();
String phoneticValue = soundex.encode("YourString");
String phoneticValue2 = soundex.encode("YourStrink");

if(phoneticValue.equals(phoneticValue2)){

}

Doesn't seems to work. Encode function gives different results in case of similar strings. How to compare both of similar Strings with this library?

Looking forward to hear from you soon! ;)

Upvotes: 3

Views: 3865

Answers (2)

user16885563
user16885563

Reputation: 1

public class Soundex {
    public static String soundexOut(String word) {
        String drop = dropedWord(word);
        word = word.toLowerCase();
        String soundex = "" + drop.charAt(0);
        drop = drop.toLowerCase();
        int i;
        if (soundexCode(drop.charAt(0)) == soundexCode(drop.charAt(1)))
            i = 2;
        else
            i = 1;
        for (; i < drop.length() && soundex.length() < 5; i++) {
            if (i < drop.length()-1 && soundexCode(drop.charAt(i-1)) == soundexCode(drop.charAt(i+1)) ) {
                if (drop.charAt(i) == 'y' || drop.charAt(i) == 'h' || drop.charAt(i) == 'w')
                    i++;
            }
            else {
                int code = soundexCode(drop.charAt(i));
                soundex += code!=0 ? code : "";
            }
        }
        if (soundex.length() < 4)
            for (i = soundex.length(); i < 4; i++) {
                soundex += "0";
            }
        return soundex;
    }
    public static int soundexCode(char c) {
        String [] code = {"b, f, p, v" , "c, g, j, k, q, s, x, z" , "d, t" , "l" , "m,n" , "r"} ;
        int codeNumber = 0;
        for( int i = 0 ; i < code.length ; i++ ){
            if( code[i].indexOf(c) >= 0 ) {
            codeNumber = i+1;
            }
        }
        return codeNumber;

    }
    public static String dropedWord(String word) {
        String drop = "";
        drop += word.charAt(0);
        word = word.toLowerCase();
        for (int i = 1; i < word.length(); i++) {
            if (word.charAt(i) == 'a' || word.charAt(i) == 'e' || word.charAt(i) == 'i' ||
                word.charAt(i) == 'o' || word.charAt(i) == 'u' )
                    continue;
            drop += word.charAt(i);
        }
        return drop;
    }
}

Upvotes: 0

user16885563
user16885563

Reputation: 1

class Soundex{
private static int getConsonantCode( char ch ){
    String codeList[] = { "BFPV", "CGJKQSXZ","DT","L","MN","R" };
    int code = 0;
    for( int i = 0 ; i < codeList.length ; i++ ){
         if( codeList[i].indexOf(ch) >= 0 ) {
            code = i+1;
        }
    }
    return code;
}
private static boolean isVowel( char ch ){
    return (new String("AEIOUaeiou")).indexOf(ch) >= 0 ;
}
public static String getSoundexCode( String str ){
    str=str.toUpperCase();
    String soundexCode = "" + str.charAt(0), temp="";
    int length = str.length();
    char curr, prev, next;{ }
    String dropList = "AEIOUYHW";
    for( int i=1 ; i< length ; i++ ){
        curr = str.charAt(i);
        prev = str.charAt( i-1 );
        if( ( curr=='H' || curr == 'W') && i != length-1 ){
            if( temp.length() >= 2) temp=temp.substring(1);
            next=str.charAt( i+1 );
            if( isVowel(curr) && getConsonantCode( prev ) == getConsonantCode( next ) ){
                temp += prev+prev;
                i=i+1;
            }else if( getConsonantCode( prev ) == getConsonantCode(next) ){
                temp += prev;
                i=i+1;
            }
        }else if( getConsonantCode( curr ) != getConsonantCode(prev) ){
            if( dropList.indexOf( curr ) == -1 ){
                temp += curr;
            }
        }
    }
    temp = ( temp + "0000" ).substring( 0, 3 );
    for( int i = 0; i<=2 ; i++ ){
        soundexCode += getConsonantCode( temp.charAt(i) );
    }
    return soundexCode;
}
}

Upvotes: 0

Related Questions