lardconcepts
lardconcepts

Reputation: 97

Regex for finding repeating patterns in a phone number? Or perhaps not?

I want to find numbers with a pattern, out of a list of several thousand, and I'd like to match things like:

In a JSON object like this:

["ok", {"series/020" : ["02034353637", "02034445673", "02034147369", "02034653185"]}]

So all of those would match; for example the first one has 34 35 36, the second has both 444 and 567, the last has both 147 and/or 369, etc.

What have I tried? Well, most of the sites in that little box nagging me on the right! Stackoverflow search. Some online books... but I was starting to wonder if there's such a thing as "regex dyslexia" (regexia?), or perhaps it's just thickness, when I found the 142 page book. I know when I'm beat.

With hindsight, it perhaps would have been far quicker and easier to just do an "array of array searches" manually building up the patterns with the help of Google Sheets autofill, but is it even a possible thing?

Finally, is there a "pay me $10 and I'll do your regex for you" site? If not, their should be! They'd make a mint! Thank you.

Upvotes: 4

Views: 570

Answers (2)

Thomas
Thomas

Reputation: 3593

repeating digits and repeating pattern are pretty simple in this case

(\d+?)\1+

assuming that this is the first/only bracket in the total regex

the sequences take a little more effort, especially if we are talking about sequences of dynamic length. i wrote a litte JS-function to build such a regex

function sequencedDigitsRegex(minLength, maxLength){
    //the last digit
    var t = "'+a[0]+'?";
    //the optional Part
    for(var i=1; i<maxLength - minLength; ++i)
        t = "(?:'+a["+(i%10)+"]+'" + t + ")?";

    //the required part
    for(t = "'" + t + "'"; minLength>0; --minLength)
        t = "a["+(i++%10)+"] + " + t;

    //a template-method
    var template = new Function("a", "return " + t);
    var digits = "98765432109876543210".split("");

    //the regex-parts
    var out = new Array(20);
    for(var i=0; i<10; ++i){
        var a = digits.slice(i, i+10);
        out[i] = template(a);
        //also include the reversed variant
        out[i+10] = template(a.reverse());
    }
    //join the parts into one, long regex
    return new RegExp("("+out.join("|")+")", "g")
}   

if you want to combine them you could do sth. like this:

var repeatingPattern = /(\d+?)\1+/g;   
var sequencePattern = sequencedDigitsRegex(2,8);
var patterns = new RegExp(repeatingPattern.source + "|" + sequencePattern.source, "g");

Upvotes: 1

CoderPi
CoderPi

Reputation: 13211

You can write is as one RegEx, but for readability I left it like that:

var data = ["ok", {
  "series/020": [
    "02034353637", "02034445673", "02034147369", "02034653185",
    "345", "2345", "876",
    "2233", "3355", "77777",
    "147", "258", "369",
    "373737", "33773377",
  ]
}]

var numbers = data[1]["series/020"]

var patternNumbers = numbers.filter(isPatternNumber)

// Demo output
document.write("<pre>" + JSON.stringify(patternNumbers, null, "\t") + "</pre>")
document.write("Matched " + patternNumbers.length + " out of " + numbers.length)

function isPatternNumber(n) {
  // constant pattern
  if (/147|258|369/.test(n)) return true

  // repeating pattern 3+
  if (/(?=(\d))\1{3,}/.test(n)) return true

  // sequence asc 3+
  if (/012|123|234|345|456|567|678|789/.test(n)) return true
    // sequence desc 3+
  if (/987|876|765|654|543|432|321/.test(n)) return true

  // repeating double sequence xxyy (note that x=y is possible, same as 4 in a row)
  if (/(?=(\d))\1{2}(?=(\d))\2{2}/.test(n)) return true

  // alternerting pattern xyxy (note that x=y is possible, => same as 4 in a row)
  if (/(?=(\d))\1{1}(?=(\d))\2{1}(?=(\d))\1{1}(?=(\d))\2{1}/.test(n)) return true

  return false
}

// Demo Input
var input = document.getElementsByTagName("input")[0]
var output = document.getElementsByTagName("span")[0]
input.oninput = function() {
  output.textContent = isPatternNumber(input.value)
}
Test a number:
<input type="text" /> <span></span>

Upvotes: 5

Related Questions