Andy Nguyen
Andy Nguyen

Reputation: 45

Corner cases for Longest Common Substring problem?

I have been working on an exercise that gives two string s and t, find the longest common substring by outputting 3 integers: its starting position in s, its starting position in t (all 0-based), and its length. For example: cool toolbox would output 1 1 3. If there are multiple answers with same maximal length, output any of them.

Here is the original statement:

Input Format: Every line of the input contains two strings s and t consisting of lower case Latin letters.

Constraints: The total length of all sโ€™s as well as the total length of all tโ€™s does not exceed 10^5.

Output Format: For each pair of strings s and t, find its longest common substring and specify it by outputting three integers: its starting position in s, its starting position in t (both 0-based), and its length. More formally, output integers 0 โ‰ค ๐‘– < |๐‘ |, 0 โ‰ค ๐‘— < |๐‘ก|, and ๐‘™ โ‰ฅ 0 such that ๐‘ ๐‘–๐‘ ๐‘–+1 ยท ยท ยท ๐‘ ๐‘–+๐‘™โˆ’1 = ๐‘ก๐‘—๐‘ก๐‘—+1 ยท ยท ยท ๐‘ก๐‘—+๐‘™โˆ’1 and ๐‘™ is maximal. (If there are many such triples with maximal ๐‘™, output any of them.)

Time Limits: Java: 5 sec

Memory Limits: 512MB

I used the binary search to find the longest length possible and polynomial hash approach to find the substring itself. The code (including stress test) is below:

import java.util.*;
import java.io.*;
import java.math.BigInteger;

public class common_substring {
    private static int prime1 = 2000000033;
    private static int prime2 = 1000000007;
    private static int multiplier = 97;

    public class Answer {
        int i, j, len;
        Answer(int i, int j, int len) {
            this.i = i;
            this.j = j;
            this.len = len;
        }
    }

    public Answer solveNaive(String s, String t) {
        Answer ans = new Answer(0, 0, 0);
        for (int i = 0; i < s.length(); i++)
            for (int j = 0; j < t.length(); j++)
                for (int len = 0; i + len <= s.length() && j + len <= t.length(); len++)
                    if (len > ans.len && s.substring(i, i + len).equals(t.substring(j, j + len)))
                        ans = new Answer(i, j, len);
        return ans;
    }

    public Answer solve(String s, String t) {
        Answer ans = new Answer(0, 0, 0);

        int minLen = Math.min(s.length(), t.length());
        int shortest = 0, longest = minLen;
        
        // Use binary search to find the length of LONGEST common substring
        while (shortest <= longest) {
            int mid = (shortest + longest) / 2; 
            // In case shortest == 0 and longest == 1, cover the case l = 1
            if (shortest == 0 && longest == 1)
                mid = 1;
            Answer tmp = getSubString(mid, s, t);
            if (tmp.len != 0) {
                shortest = mid + 1;
                ans = tmp;
            }
            else
                longest = mid - 1;
        }

        return ans;
    }

    private Answer getSubString(int l, String s, String t) {
        Answer ans = new Answer(0, 0, 0);

        // Compute the (polynomial) hashes of all substrings of length l
        // First hash
        ArrayList<Long> subHashes1_S = new ArrayList<Long>(s.length() - l + 1);
        ArrayList<Long> subHashes1_T = new ArrayList<Long>(t.length() - l + 1);
        subHashes1_S.add(0, (long) hashFunc1(s.substring(0, l)));
        subHashes1_T.add(0, (long) hashFunc1(t.substring(0, l)));
        // Multiplier for the next substring
        long y1 = (new BigInteger(Integer.toString(multiplier))
            .modPow(new BigInteger(Integer.toString(l)), new BigInteger(Integer.toString(prime1))))
            .longValue(); // (multiplier ^ k) % prime1

        for (int i = 1; i <= s.length() - l; i++) {
            subHashes1_S.add(i, ((subHashes1_S.get(i - 1) * multiplier + s.charAt(i + l - 1) - s.charAt(i - 1) * y1) % prime1 + prime1) % prime1);
        }
        for (int i = 1; i <= t.length() - l; i++) {
            subHashes1_T.add(i, ((subHashes1_T.get(i - 1) * multiplier + t.charAt(i + l - 1) - t.charAt(i - 1) * y1) % prime1 + prime1) % prime1);
        }

        // Second hash
        ArrayList<Long> subHashes2_S = new ArrayList<Long>(s.length() - l + 1);
        ArrayList<Long> subHashes2_T = new ArrayList<Long>(t.length() - l + 1);
        subHashes2_S.add(0, (long) hashFunc2(s.substring(0, l)));
        subHashes2_T.add(0, (long) hashFunc2(t.substring(0, l)));
        // Multiplier for the next substring
        long y2 = (new BigInteger(Integer.toString(multiplier))
            .modPow(new BigInteger(Integer.toString(l)), new BigInteger(Integer.toString(prime2))))
            .longValue(); // (multiplier ^ k) % prime2

        for (int i = 1; i <= s.length() - l; i++) {
            subHashes2_S.add(i, ((subHashes2_S.get(i - 1) * multiplier + s.charAt(i + l - 1) - s.charAt(i - 1) * y2) % prime2 + prime2) % prime2);
        }
        for (int i = 1; i <= t.length() - l; i++) {
            subHashes2_T.add(i, ((subHashes2_T.get(i - 1) * multiplier + t.charAt(i + l - 1) - t.charAt(i - 1) * y2) % prime2 + prime2) % prime2);
        }

        // Compare hashes of two substrings
        // Keys represent the hashes, values represent the indices
        HashMap<Long, Integer> exclusive1 = new HashMap<Long, Integer>(), exclusive2 = new HashMap<Long, Integer>();
        for (int i = 0; i < subHashes1_S.size(); i++) {
            exclusive1.put(subHashes1_S.get(i), i);
            exclusive2.put(subHashes2_S.get(i), i);
        }

        for (int j = 0; j < subHashes1_T.size(); j++) {
            if (exclusive1.containsKey(subHashes1_T.get(j)) && exclusive2.containsKey(subHashes2_T.get(j)))
                ans = new Answer(exclusive1.get(subHashes1_T.get(j)), j, l);
        }
        
        return ans;
    }

    private static int hashFunc1(String s) {
        long hash = 0;
        for (int i = 0; i < s.length(); ++i)
            hash = (hash * multiplier + s.charAt(i)) % prime1;
        return (int)hash;
    }

    private static int hashFunc2(String s) {
        long hash = 0;
        for (int i = 0; i < s.length(); ++i)
            hash = (hash * multiplier + s.charAt(i)) % prime2;
        return (int)hash;
    }

    public void run() {
        BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
        PrintWriter out = new PrintWriter(System.out);
        in.lines().forEach(line -> {
            StringTokenizer tok = new StringTokenizer(line);
            String s = tok.nextToken();
            String t = tok.nextToken();
            Answer ans = solve(s, t);
            out.format("%d %d %d\n", ans.i, ans.j, ans.len);
        });

        // Stress test
        // boolean again = true;
        // while (again) {
        //     // choose a Character random from this String 
        //     String alphabet = "abcdefghijklmnopqrstuvwxyz";
        //     Random random = new Random();

        //     // create StringBuffer size of AlphaNumericString 
        //     StringBuilder sb1 = new StringBuilder(); 
        //     StringBuilder sb2 = new StringBuilder();

        //     for (int i = 0; i < 120; i++) { 

        //         // add Character one by one in end of sb 
        //         sb1.append(alphabet.charAt(random.nextInt(26)));
        //         sb2.append(alphabet.charAt(random.nextInt(26)));
        //     }

        //     String s = sb1.toString();
        //     String t = sb2.toString();
        //     // s = "yslhjqrnyecavqucgiucgxmn";
        //     // t = "svilryrdllesupkdhkwhbdzh";

        //     Answer ans = solve(s, t);
        //     Answer nai = solveNaive(s, t);
        //     System.out.println(s + " " + t);
        //     System.out.println(String.format("Answer: %d %d %d", ans.i, ans.j, ans.len));
        //     System.out.println(String.format("Naive : %d %d %d", nai.i, nai.j, nai.len));
            
        //     again = ans.len == nai.len;
        // }
        out.close();
    }

    static public void main(String[] args) {
        new common_substring().run();
    }
}

However, it seems like there are corner cases that I haven't taken into account yet, as the grader keeps rejecting my solution by saying "Wrong Answer" on a specific case. The problem is that I have no access to the input. Stress test runs fine. Any idea what could this corner case be?

Upvotes: 0

Views: 86

Answers (0)

Related Questions