Reputation: 186
I'm trying to find a repeating pattern in a string of binary digits.
eg. 0010010010 or 1110111011 = ok
not. 0100101101 = bad
The strings are 10 digits long (as above) & i guess 2 iterations of the 'pattern' are the minimum.
I started manually setting a 'bank' of patterns that the program could match it with but there must be a better way using an algorithm?
Searching got me nowhere - i think the language & terminology i'm using is incorrect..
Upvotes: 6
Views: 5901
Reputation: 23945
As far as I can tell, there are 62 patterned binary strings of length 10 => 2^1 + 2^2 + 2^3 + 2^4 + 2^5
. Here's some code that lists them and matches a patterned string:
function binComb (n){
var answer = []
for (var i=0; i<Math.pow(2,n);i++){
var str = i.toString(2)
for (var j=str.length; j<n; j++){
str = "0" + str
}
answer.push(str)
}
return answer
}
function allCycles(){
var answer = {}, cycled = ""
for (var i=1; i<=5; i++){
var arr = binComb(i)
for (var j=0; j<arr.length; j++){
while(cycled.length < 10){
cycled += arr[j]
if (10 - cycled.length < arr[j].length)
cycled += arr[j].substr(0,10 - cycled.length)
}
if (answer[cycled])
answer[cycled].push(arr[j])
else answer[cycled] = [arr[j]]
cycled = ""
}
}
return answer
}
function getPattern (str){
var patterns = allCycles()
if (patterns[str])
return patterns[str]
else return "Pattern not found."
}
OUTPUT:
console.log(getPattern("0010010010"))
console.log(getPattern("1110111011"))
console.log(getPattern("0100101101"))
console.log(getPattern("1111111111"))
["001"]
["1110"]
Pattern not found.
["1", "11", "111", "1111", "11111"]
Upvotes: 1
Reputation: 4772
This answer uses a Python regular expression to be compiled with the VERBOSE flag set which allows multi-line regexps with comments and non-significant spaces. I also use named groups in the regexp.
The regexp was developed with the aid of the Kodos tool.
The regexp searches for the longest repeating strings of length five then four then three repeating characters. (two repeating characters is redundant as it is equal to the longer four; and similarly one repeating is made redundant by five).
import re
rawstr = r"""
(?P<r5> .{5}) (?P=r5) |
(?P<r4>(?P<_42> .{2}) .{2}) (?P=r4) (?P=_42) |
(?P<r3>(?P<_31> .{1}) .{2}) (?P=r3){2} (?P=_31)
"""
matchstr = """\
1001110011
1110111011
0010010010
1010101010
1111111111
0100101101
"""
for matchobj in re.finditer(rawstr, matchstr, re.VERBOSE):
grp, rep = [(g, r) for g, r in matchobj.groupdict().items()
if g[0] != '_' and r is not None][0]
print('%r is a repetition of %r' % (matchobj.group().strip(), rep))
Gives the output:
'1001110011' is a repetition of '10011'
'1110111011' is a repetition of '1110'
'0010010010' is a repetition of '001'
'1010101010' is a repetition of '1010'
'1111111111' is a repetition of '11111'
Upvotes: 0
Reputation: 310
givenString: 0010010010
:
create list of possible patterns for givenString 0010010010
:
possiblePatterns = [00, 010, 0010, 00100, 01, 001, 0100, 10, 100]
repeat them to make strings with length >= 10
testPattern0 = 0000000000 // 00 00 00 00 00
testPattern1 = 010010010010 // 010 010 010 010
testPattern2 = 001000100010 // 0010 0010 0010
...
and check...
for each testPattern:
if '0010010010' is a substring of testPattern ==> pattern found
one of the matching strings:
testPattern2: 010010010010
givenString : 0010010010
found patterns:
foundPatterns = [010, 001, 100]
As one can see, this is a possibly redundant list as all patterns are basically the same, just shifted. But depending on the use case, this may actually be what you want.
function findPatterns(str){
var len = str.length;
var possible_patterns = {}; // save as keys to prevent duplicates
var testPattern;
var foundPatterns = [];
// 1) create collection of possible patterns where:
// 1 < possiblePattern.length <= str.length/2
for(var i = 0; i <= len/2; i++){
for(var j = i+2; j <= len/2; j++){
possible_patterns[str.substring(i, j)] = 0;
}
}
// 2) create testPattern to test against given str where:
// testPattern.length >= str.length
for(var pattern in possible_patterns){
testPattern = new Array(Math.ceil(len/pattern.length)+1).join(pattern);
if(testPattern.indexOf(str) >= 0){
foundPatterns.push(pattern);
}
}
return foundPatterns;
}
==> fiddle
Upvotes: 1
Reputation: 32912
Quite a challenge. What about this function?
function findPattern(n) {
var maxlen = parseInt(n.length/2);
NEXT:
for(var i=1; i<=maxlen; ++i) {
var len=0, k=0, prev="", sub;
do {
sub = n.substring(k,k+i);
k+= i;
len = sub.length;
if(len!=i) break;
if(prev.length && sub.length==i && prev!=sub) continue NEXT;
if(!prev.length) prev = sub;
} while(sub.length);
var trail = n.substr(n.length-len);
if(!len || len && trail==n.substr(0,len)) return n.substr(0,i);
}
return false;
}
It even works for any length strings with any contents. See the fiddle
Inspired by Jack's and Zim-Zam's answer, here is the list for brute force algorithm:
var oksubs =
["001","010","011","100","101","110",
"0001","0010","0011","0100","0101","0110","0111",
"1000","1001","1010","1011","1100","1101","1110",
"00000","00001","00011","00101","00110","00111","01000",
"01001","01010","01011","01100","01101","01110","01111",
"10000","10001","10011","10101","10110","10111","11000","11001",
"11010","11011","11100","11101","11110","11111"];
Thanks to Jack's comment, here is both short and effective code:
function findPattern(n) {
var oksubs = [n.substr(0,5),n.substr(0,4),n.substr(0,3)];
for(var i=0; i<oksubs.length; ++i) {
var sub = oksubs[i];
if((sub+sub+sub+sub).substr(0,10)==n) return sub;
}
return false;
}
Upvotes: 2
Reputation: 4772
In Python (again) but without regexps:
def is_repeated(text):
'check if the first part of the string is repeated throughout the string'
len_text = len(text)
for rep_len in range(len_text // 2, 0, -1):
reps = (len_text + rep_len) // rep_len
if (text[:rep_len] * reps).startswith(text):
return rep_len # equivalent to boolean True as will not be zero
return 0 # equivalent to boolean False
matchstr = """\
1001110011
1110111011
0010010010
1010101010
1111111111
0100101101
"""
for line in matchstr.split():
print('%r has a repetition length of %i' % (line, is_repeated(line)))
Output:
'1001110011' has a repetition length of 5
'1110111011' has a repetition length of 4
'0010010010' has a repetition length of 3
'1010101010' has a repetition length of 4
'1111111111' has a repetition length of 5
'0100101101' has a repetition length of 0
Upvotes: 0
Reputation: 761
To have a repeating pattern the pattern length can be only be <= 5
there can be a pattern of length 1.but pattern of length five will cover it. [STEP EDITED]
if there a pattern with length 2, there is always a pattern ith length 4.
from (1),(2), (3) and (4) , it is only necessary to check patterns of length 3,4 and 5
that means if first three digits match with next three digits proceed till end of string else break and go to 7
else match first four digits with next four if match proceed till end of string else break and go to 8
else match first five digits with next four if match proceed till end of string else break and go to 9
if one of 6, 7,8 is false, return failure
Upvotes: 1
Reputation: 18148
You've only got 2^10 patterns, that's a small enough number that you can just precompute all of the valid strings and store the results in a 1024-element boolean array; if a string is valid, then convert it to an integer (e.g. "0000001111" = 15) and store "true" in the resulting array index. To check if a string is valid, convert it to an integer and look up the index in the precomputed boolean array.
If your strings are longer than 10 digits then you'll need to be more clever about determining if a string is valid, but since you only have 1024 strings you might as well be lazy about this.
Upvotes: 1