Reputation: 588
I am writing a JavaScript function intended to find repeating phrases in a long string. The string is going to be in Unicode and non-English, which stops me from using RegExps. In this 99 bottles of beer example the results should be "bottles of beer on the wall", "take one down, pass it around" etc without numbers, only the recurring parts, but it gives wrong results:
window.findRepeats=function(){
inputString=document.getElementById("txt1").value;
outputArray=[];
for(originalStart=0;originalStart<inputString.length;originalStart++){
for(copyStart=originalStart+1;copyStart<inputString.length;copyStart++){
if(inputString[originalStart]==inputString[copyStart]){
for(windowByte=originalStart+1;windowByte<copyStart;windowByte++){
if(inputString[windowByte]!=inputString[copyStart-originalStart+windowByte]){
break;
}
if(windowByte>originalStart+1){
outputArray.push([originalStart,windowByte]);
}
}
}
}
}
for(arrayCounter=0;arrayCounter<outputArray.length;arrayCounter++){
outputArray[arrayCounter]+=":"+inputString.substring(outputArray[arrayCounter][0],outputArray[arrayCounter][1]);
}
document.getElementById("txt2").value=outputArray.join("\n");
}
<textarea rows=9 id="txt1">
99 bottles of beer on the wall, 99 bottles of beer.
Take one down and pass it around, 98 bottles of beer on the wall.
98 bottles of beer on the wall, 98 bottles of beer.
Take one down and pass it around, 97 bottles of beer on the wall.
97 bottles of beer on the wall, 97 bottles of beer.
Take one down and pass it around, 96 bottles of beer on the wall.
96 bottles of beer on the wall, 96 bottles of beer.
Take one down and pass it around, 95 bottles of beer on the wall.
95 bottles of beer on the wall, 95 bottles of beer.
Take one down and pass it around, 94 bottles of beer on the wall.
</textarea>
<br>
<textarea rows=9 id="txt2">
</textarea>
<br>
<input type="button" value="go" onclick='findRepeats()'>
</input>
If you check out the result, it gives all the repeating substrings but not the longest ones. I want only the longest substrings. As for suffix arrays and trees, I have failed to understand these so far, so I have to deal with this approach.
Upvotes: 1
Views: 118
Reputation: 4819
SOSO WORKING
How about this?
Apparently, your original function had an error, something isn't right in the result. I'll attempt to simplify the code now.
window.findRepeats=function(){
var inputString = document.getElementById("txt1").value;
var outputArray=[];
for(originalStart=0;originalStart<inputString.length;originalStart++){
for(copyStart=originalStart+1;copyStart<inputString.length;copyStart++){
if(inputString[originalStart]==inputString[copyStart]){
for(windowByte=originalStart+1;windowByte<copyStart;windowByte++){
if(inputString[windowByte]!=inputString[copyStart-originalStart+windowByte]){
break;
}
if(windowByte>originalStart+1){
outputArray.push([originalStart,windowByte+1]);
}
}
}
}
}
var shorts = [];
for(var i=0;i<outputArray.length;i++){
var curString = inputString.substring(outputArray[i][0],outputArray[i][1]);
if (curString[0] === " ")
curString = curString.substr(1);
if (curString.trim() != "")
shorts.push(curString);
}
var uniq = removeDuplicate(shorts);
for (var j=uniq.length-1;j>-1;j--){
for (var k=uniq.length-1;k>-1;k--){
if (uniq[j].indexOf(uniq[k]) > -1) {
uniq[k]=uniq[j];
}
}
}
uniq = removeDuplicate(uniq);
shorts = uniq;
document.getElementById("txt2").value=shorts.join("\n----------------\n");
}
function removeDuplicate(arr) {
return arr.slice().sort(function(a,b){return a > b}).reduce(function(a,b){if (a.slice(-1)[0] !== b) a.push(b);return a;},[]);
}
<textarea rows=9 style="width:100%" id="txt1">3 bottles of beer on the wall, 3 bottles of beer.
Take one down and pass it around, 2 bottles of beer on the wall.
2 bottles of beer on the wall, 2 bottles of beer.
Take one down and pass it around, 1 bottle of beer on the wall.
1 bottle of beer on the wall, 1 bottle of beer.
Take one down and pass it around, no more bottles of beer on the wall.
No more bottles of beer on the wall, no more bottles of beer.
Go to the store and buy some more, 99 bottles of beer on the wall.</textarea>
<br>
<textarea rows=9 style="width:100%" id="txt2"></textarea>
<br>
<input type="button" value="go" onclick='findRepeats()'>
Upvotes: 2