Reputation: 45
I have been working on an exercise that gives two string s
and t
, find the longest common substring by outputting 3 integers: its starting position in s
, its starting position in t
(all 0-based), and its length. For example: cool toolbox
would output 1 1 3
. If there are multiple answers with same maximal length, output any of them.
Here is the original statement:
Input Format: Every line of the input contains two strings s
and t
consisting of lower case Latin letters.
Constraints: The total length of all s
โs as well as the total length of all t
โs does not exceed 10^5.
Output Format: For each pair of strings s
and t
, find its longest common substring and specify it by
outputting three integers: its starting position in s
, its starting position in t
(both 0-based), and its
length. More formally, output integers 0 โค ๐ < |๐ |, 0 โค ๐ < |๐ก|, and ๐ โฅ 0 such that ๐ ๐๐ ๐+1 ยท ยท ยท ๐ ๐+๐โ1 =
๐ก๐๐ก๐+1 ยท ยท ยท ๐ก๐+๐โ1 and ๐ is maximal. (If there are many such triples with maximal ๐, output any of them.)
Time Limits: Java: 5 sec
Memory Limits: 512MB
I used the binary search to find the longest length possible and polynomial hash approach to find the substring itself. The code (including stress test) is below:
import java.util.*;
import java.io.*;
import java.math.BigInteger;
public class common_substring {
private static int prime1 = 2000000033;
private static int prime2 = 1000000007;
private static int multiplier = 97;
public class Answer {
int i, j, len;
Answer(int i, int j, int len) {
this.i = i;
this.j = j;
this.len = len;
}
}
public Answer solveNaive(String s, String t) {
Answer ans = new Answer(0, 0, 0);
for (int i = 0; i < s.length(); i++)
for (int j = 0; j < t.length(); j++)
for (int len = 0; i + len <= s.length() && j + len <= t.length(); len++)
if (len > ans.len && s.substring(i, i + len).equals(t.substring(j, j + len)))
ans = new Answer(i, j, len);
return ans;
}
public Answer solve(String s, String t) {
Answer ans = new Answer(0, 0, 0);
int minLen = Math.min(s.length(), t.length());
int shortest = 0, longest = minLen;
// Use binary search to find the length of LONGEST common substring
while (shortest <= longest) {
int mid = (shortest + longest) / 2;
// In case shortest == 0 and longest == 1, cover the case l = 1
if (shortest == 0 && longest == 1)
mid = 1;
Answer tmp = getSubString(mid, s, t);
if (tmp.len != 0) {
shortest = mid + 1;
ans = tmp;
}
else
longest = mid - 1;
}
return ans;
}
private Answer getSubString(int l, String s, String t) {
Answer ans = new Answer(0, 0, 0);
// Compute the (polynomial) hashes of all substrings of length l
// First hash
ArrayList<Long> subHashes1_S = new ArrayList<Long>(s.length() - l + 1);
ArrayList<Long> subHashes1_T = new ArrayList<Long>(t.length() - l + 1);
subHashes1_S.add(0, (long) hashFunc1(s.substring(0, l)));
subHashes1_T.add(0, (long) hashFunc1(t.substring(0, l)));
// Multiplier for the next substring
long y1 = (new BigInteger(Integer.toString(multiplier))
.modPow(new BigInteger(Integer.toString(l)), new BigInteger(Integer.toString(prime1))))
.longValue(); // (multiplier ^ k) % prime1
for (int i = 1; i <= s.length() - l; i++) {
subHashes1_S.add(i, ((subHashes1_S.get(i - 1) * multiplier + s.charAt(i + l - 1) - s.charAt(i - 1) * y1) % prime1 + prime1) % prime1);
}
for (int i = 1; i <= t.length() - l; i++) {
subHashes1_T.add(i, ((subHashes1_T.get(i - 1) * multiplier + t.charAt(i + l - 1) - t.charAt(i - 1) * y1) % prime1 + prime1) % prime1);
}
// Second hash
ArrayList<Long> subHashes2_S = new ArrayList<Long>(s.length() - l + 1);
ArrayList<Long> subHashes2_T = new ArrayList<Long>(t.length() - l + 1);
subHashes2_S.add(0, (long) hashFunc2(s.substring(0, l)));
subHashes2_T.add(0, (long) hashFunc2(t.substring(0, l)));
// Multiplier for the next substring
long y2 = (new BigInteger(Integer.toString(multiplier))
.modPow(new BigInteger(Integer.toString(l)), new BigInteger(Integer.toString(prime2))))
.longValue(); // (multiplier ^ k) % prime2
for (int i = 1; i <= s.length() - l; i++) {
subHashes2_S.add(i, ((subHashes2_S.get(i - 1) * multiplier + s.charAt(i + l - 1) - s.charAt(i - 1) * y2) % prime2 + prime2) % prime2);
}
for (int i = 1; i <= t.length() - l; i++) {
subHashes2_T.add(i, ((subHashes2_T.get(i - 1) * multiplier + t.charAt(i + l - 1) - t.charAt(i - 1) * y2) % prime2 + prime2) % prime2);
}
// Compare hashes of two substrings
// Keys represent the hashes, values represent the indices
HashMap<Long, Integer> exclusive1 = new HashMap<Long, Integer>(), exclusive2 = new HashMap<Long, Integer>();
for (int i = 0; i < subHashes1_S.size(); i++) {
exclusive1.put(subHashes1_S.get(i), i);
exclusive2.put(subHashes2_S.get(i), i);
}
for (int j = 0; j < subHashes1_T.size(); j++) {
if (exclusive1.containsKey(subHashes1_T.get(j)) && exclusive2.containsKey(subHashes2_T.get(j)))
ans = new Answer(exclusive1.get(subHashes1_T.get(j)), j, l);
}
return ans;
}
private static int hashFunc1(String s) {
long hash = 0;
for (int i = 0; i < s.length(); ++i)
hash = (hash * multiplier + s.charAt(i)) % prime1;
return (int)hash;
}
private static int hashFunc2(String s) {
long hash = 0;
for (int i = 0; i < s.length(); ++i)
hash = (hash * multiplier + s.charAt(i)) % prime2;
return (int)hash;
}
public void run() {
BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
PrintWriter out = new PrintWriter(System.out);
in.lines().forEach(line -> {
StringTokenizer tok = new StringTokenizer(line);
String s = tok.nextToken();
String t = tok.nextToken();
Answer ans = solve(s, t);
out.format("%d %d %d\n", ans.i, ans.j, ans.len);
});
// Stress test
// boolean again = true;
// while (again) {
// // choose a Character random from this String
// String alphabet = "abcdefghijklmnopqrstuvwxyz";
// Random random = new Random();
// // create StringBuffer size of AlphaNumericString
// StringBuilder sb1 = new StringBuilder();
// StringBuilder sb2 = new StringBuilder();
// for (int i = 0; i < 120; i++) {
// // add Character one by one in end of sb
// sb1.append(alphabet.charAt(random.nextInt(26)));
// sb2.append(alphabet.charAt(random.nextInt(26)));
// }
// String s = sb1.toString();
// String t = sb2.toString();
// // s = "yslhjqrnyecavqucgiucgxmn";
// // t = "svilryrdllesupkdhkwhbdzh";
// Answer ans = solve(s, t);
// Answer nai = solveNaive(s, t);
// System.out.println(s + " " + t);
// System.out.println(String.format("Answer: %d %d %d", ans.i, ans.j, ans.len));
// System.out.println(String.format("Naive : %d %d %d", nai.i, nai.j, nai.len));
// again = ans.len == nai.len;
// }
out.close();
}
static public void main(String[] args) {
new common_substring().run();
}
}
However, it seems like there are corner cases that I haven't taken into account yet, as the grader keeps rejecting my solution by saying "Wrong Answer" on a specific case. The problem is that I have no access to the input. Stress test runs fine. Any idea what could this corner case be?
Upvotes: 0
Views: 86