Reputation: 97
I want to find numbers with a pattern, out of a list of several thousand, and I'd like to match things like:
In a JSON object like this:
["ok", {"series/020" : ["02034353637", "02034445673", "02034147369", "02034653185"]}]
So all of those would match; for example the first one has 34 35 36, the second has both 444 and 567, the last has both 147 and/or 369, etc.
What have I tried? Well, most of the sites in that little box nagging me on the right! Stackoverflow search. Some online books... but I was starting to wonder if there's such a thing as "regex dyslexia" (regexia?), or perhaps it's just thickness, when I found the 142 page book. I know when I'm beat.
With hindsight, it perhaps would have been far quicker and easier to just do an "array of array searches" manually building up the patterns with the help of Google Sheets autofill, but is it even a possible thing?
Finally, is there a "pay me $10 and I'll do your regex for you" site? If not, their should be! They'd make a mint! Thank you.
Upvotes: 4
Views: 570
Reputation: 3593
repeating digits and repeating pattern are pretty simple in this case
(\d+?)\1+
assuming that this is the first/only bracket in the total regex
the sequences take a little more effort, especially if we are talking about sequences of dynamic length. i wrote a litte JS-function to build such a regex
function sequencedDigitsRegex(minLength, maxLength){
//the last digit
var t = "'+a[0]+'?";
//the optional Part
for(var i=1; i<maxLength - minLength; ++i)
t = "(?:'+a["+(i%10)+"]+'" + t + ")?";
//the required part
for(t = "'" + t + "'"; minLength>0; --minLength)
t = "a["+(i++%10)+"] + " + t;
//a template-method
var template = new Function("a", "return " + t);
var digits = "98765432109876543210".split("");
//the regex-parts
var out = new Array(20);
for(var i=0; i<10; ++i){
var a = digits.slice(i, i+10);
out[i] = template(a);
//also include the reversed variant
out[i+10] = template(a.reverse());
}
//join the parts into one, long regex
return new RegExp("("+out.join("|")+")", "g")
}
if you want to combine them you could do sth. like this:
var repeatingPattern = /(\d+?)\1+/g;
var sequencePattern = sequencedDigitsRegex(2,8);
var patterns = new RegExp(repeatingPattern.source + "|" + sequencePattern.source, "g");
Upvotes: 1
Reputation: 13211
You can write is as one RegEx, but for readability I left it like that:
var data = ["ok", {
"series/020": [
"02034353637", "02034445673", "02034147369", "02034653185",
"345", "2345", "876",
"2233", "3355", "77777",
"147", "258", "369",
"373737", "33773377",
]
}]
var numbers = data[1]["series/020"]
var patternNumbers = numbers.filter(isPatternNumber)
// Demo output
document.write("<pre>" + JSON.stringify(patternNumbers, null, "\t") + "</pre>")
document.write("Matched " + patternNumbers.length + " out of " + numbers.length)
function isPatternNumber(n) {
// constant pattern
if (/147|258|369/.test(n)) return true
// repeating pattern 3+
if (/(?=(\d))\1{3,}/.test(n)) return true
// sequence asc 3+
if (/012|123|234|345|456|567|678|789/.test(n)) return true
// sequence desc 3+
if (/987|876|765|654|543|432|321/.test(n)) return true
// repeating double sequence xxyy (note that x=y is possible, same as 4 in a row)
if (/(?=(\d))\1{2}(?=(\d))\2{2}/.test(n)) return true
// alternerting pattern xyxy (note that x=y is possible, => same as 4 in a row)
if (/(?=(\d))\1{1}(?=(\d))\2{1}(?=(\d))\1{1}(?=(\d))\2{1}/.test(n)) return true
return false
}
// Demo Input
var input = document.getElementsByTagName("input")[0]
var output = document.getElementsByTagName("span")[0]
input.oninput = function() {
output.textContent = isPatternNumber(input.value)
}
Test a number:
<input type="text" /> <span></span>
Upvotes: 5