abhinav
abhinav

Reputation: 1282

[Interview]Similar Words Distance Calculation

This Question was asked in an interview

Assume you have a dictionary of words: (use if you have /usr/share/dict/words).

Given a word(eg: cricket), give me all the words from the dictionary that could be reached by doing n operations. Where an operation is one of :
Addition
Replace
Deletion

For example lets find all the words that can be formed from "cricket" if only 1 operation is allowed.

{'word': 'clicket', 'op': ['replace']} {'word': 'crickey', 'op': ['replace']} {'word': 'crickety', 'op': ['addition']} etc

I am printing in my own format, but you get the gist.

Here is what I have attempted

  1. based on number of operations compute a list of all possible sequence.
  2. then iterate over the list and apply them one by one and store words which are present in dictionary.

This is brute force solution. I am wondering if there is an efficient solution for this. Below is the code for brute force solution

import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;


public class SimilarWordDistance {

    Map<String,Boolean> dictionary = new HashMap<String,Boolean>();
    int ADDTION = -1;
    int REPLACE = 0;
    int DELETION = 1;

    /**
     * @param args
     * @throws IOException 
     */
    public static void main(String[] args) throws IOException {

        SimilarWordDistance swd = new SimilarWordDistance();
        swd.readDictionary();
        //swd.findSimilar("cricket", 1);
        swd.findSimilar("happiness", 3);
    }

    public void findSimilar(String word,int num) {
        int possibleOperations = (int) Math.pow(3 , num);
        Integer[][] operations = new Integer[possibleOperations][num];
        buildOperationsArray(num, possibleOperations, operations);
        List<String> l = new ArrayList<String>();
        l.add(word);
        Map<String,Integer[]> sols = new HashMap<String,Integer[]>();

        for(int i=0;i<operations.length;i++)
            applyOperation(operations[i],l,sols);

        Iterator<String> itr = sols.keySet().iterator();
        while(itr.hasNext()) {
            String n = itr.next();
            printSolution(sols.get(n), n);
        }
    }


    private void applyOperation(Integer[] operation,List<String> word,Map<String,Integer[]> sols) {
        List<String> possiblities = word;
         for(int i=0;i<operation.length;i++) {
            if(operation[i] == ADDTION) {
                List<String> temp = new ArrayList<String>();
                for(int j =0;j<possiblities.size();j++) {
                   temp.addAll(applyAdditionOperation(possiblities.get(j)));
                   //System.out.println(temp.size());
                }
                possiblities = temp;
            } 
            if(operation[i] == REPLACE) {
                List<String> temp = new ArrayList<String>();
                for(int j =0;j<possiblities.size();j++) {
                    temp.addAll(applyReplace(possiblities.get(j)));
                    //System.out.println(temp.size());
                 }
                possiblities = temp;
            }
            if(operation[i] == DELETION) {
                List<String> temp = new ArrayList<String>();
                for(int j =0;j<possiblities.size();j++) {
                    temp.addAll(applyDeletion(possiblities.get(j)));
                 }
                possiblities = temp;
            }
        }

        for(int i=0;i<possiblities.size() ;i++) {
            String w = possiblities.get(i);
            if(dictionary.containsKey(w)) {
                sols.put(w, operation);
            }
        }

    }

    protected void printSolution(Integer[] operation, String w) {
        System.out.print(w+"\t" );
        for(int j=0;j<operation.length;j++) {
            System.out.print(printOperation(operation[j])+"\t");
        }
        System.out.println();
    }

    private String printOperation(Integer integer) {
        if(integer == ADDTION) {
            return "Addition";
        } if(integer == REPLACE) {
            return "Replace";
        } else {
            return "Deletion";
        }
    }

    private List<String> applyAdditionOperation(String word) {
        char[] possiblities = {'a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','y','z'};
        List<String> possibleWords = new ArrayList<String>();
        for(int i=0;i<possiblities.length;i++) {
            for(int j=0;j<word.length();j++) {
                String temp = insertAt(word,j,possiblities[i]);
                possibleWords.add(temp);
            }
        }
        return possibleWords;
    }

    private List<String> applyDeletion(String word) {
        List<String> possibleWord = new ArrayList<String>();
        for(int i=0;i<word.length();i++) {
            String prefix = word.substring(0,i);
            String suffix = word.substring(i+1,word.length());
            String tenp = prefix+suffix;
            possibleWord.add(tenp);
        }
        return possibleWord;
    }

    private List<String> applyReplace(String word) {
        char[] possiblities = {'a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','y','z'};
        List<String> possibleWord = new ArrayList<String>();
        for(int i=0;i<possiblities.length;i++) {
            for(int j=0;j<word.length();j++) {
                String temp = word.substring(0,j)+possiblities[i]+word.substring(j+1,word.length());
                if(temp.length()!=word.length()) 
                    System.out.println("#####################");
                possibleWord.add(temp);
            }
        }
        return possibleWord;
    }

    private String insertAt(String word, int j, char c) {
        String prefix = word.substring(0,j);
        String suffix = word.substring(j+1,word.length());
        String ret = prefix+c+suffix;
        return ret;
    }

    protected void buildOperationsArray(int num, int possibleOperations,
            Integer[][] operations) {
        for(int i=0;i<possibleOperations;i=i+9){
            for(int j=0;j<num;j++) {
                fillPossiblities(num, operations, ADDTION, i, j); // 3 rows
                if(i+3<possibleOperations)
                    fillPossiblities(num, operations, REPLACE, i+3, j); // 3 rows
                if(i+6 < possibleOperations)
                fillPossiblities(num, operations, DELETION, i+6, j);  // 3 rows
            }
        }
       /* System.out.println(operations.length);
        for(int i=0;i<operations.length;i++) {
            for(int j=0;j<operations[0].length;j++) {
                System.out.print(operations[i][j]+"\t");
            }
            System.out.println();
        }*/
    }


    /**
     * Every time this method is called it will fill all the colums of the passed row
     * with 1 default value and the fill the next 2 rows with possible permutation of that
     * column
     * @param num
     * @param operations
     * @param def
     * @param curRow
     */
    protected void fillPossiblities(int num, Integer[][] operations,int def,int curRow,int curColumn) {
        for(int i=0;i<num;i++) {
            operations[curRow][i] = def;
        }
        for(int i=0;i<num;i++) {
            if(i!=curColumn)
                operations[curRow+1][i] = def;
        }
        operations[curRow+1][curColumn] = getNext(def); //
        int def1 = getNext(def);
        for(int i=0;i<num;i++) {
            if(i!=curColumn)
                operations[curRow+2][i] = def;
        }
        operations[curRow+2][curColumn] = getNext(def1);
    }

    private int getNext(int def) {
        if(def == -1) {
            return REPLACE;
        }
        if(def == 0) {
            return DELETION;
        } else {
            return ADDTION;
        }
    }

    public void readDictionary() throws IOException {

        BufferedReader in = new BufferedReader(new FileReader("C:\\Documents\\Downloads\\words"));

        while (in.ready()) {
          String s = in.readLine();
          dictionary.put(s, true);
        }
        in.close();
    }

}

Upvotes: 0

Views: 395

Answers (2)

doptimusprime
doptimusprime

Reputation: 9411

One solution is that you can modify your dictionary data structure and represent it in the form of graph.

Each node of the graph will represent a word. There will be an edge from one node to another if the one word is different from another by a single letter.

In your case, there could be a node between 'cricket' and 'crickets'.

Once the dictionary is loaded into this form, after that to query the words made by such operations would be node directly connected to cricket.

Upvotes: 0

chill
chill

Reputation: 16898

For each word in the-dictionary
   d = minimum-edit-distance (given-word, word)
   if d <= n
      print (word)

The minimum edit distance can be solved by aa well-known dynamic programming algorithm with complexity O(n*m) where n and m are length of the two words.

The wikipedia article has implementations: http://en.wikipedia.org/wiki/Levenshtein_distance

Upvotes: 1

Related Questions