Jordan Wallwork
Jordan Wallwork

Reputation: 3114

Constructing Regular Expressions to match numeric ranges

I'm looking for a way to construct regular expressions to match numeric inputs specified by a given integer range, ie. if I pass in a range of 1,3-4 then a regex would be returned matching just 1, 3 and 4.

I wrote the following method to try and do this:

function generateRegex(values) {
    if (values == "*") {
        return new RegExp("^[0-9]+$");
    } else {
        return new RegExp("^[" + values + "]+$");
    }
}

I'm having issues however as sometimes I need to match double digits, such as "8-16", and I also need to ensure that if I am passed a single digit value, such as "1", that the generated regex matches only 1, and not say 11.

I really would like this to remain a pretty small snippet of code, but am not sure enough about regexs to know how to do this. Would be massively grateful for any help!

EDIT: I realise I wasn't clear, with my original paragraph, so have edited it. I realise the regex's that I originally generated do not work at all

Upvotes: 1

Views: 1025

Answers (4)

xanatos
xanatos

Reputation: 111820

I was sure it was 4-8 hours :-) In the end (and in its uselessness) it was a good exercise in composing Regexes. You are free to try it. If we exclude one use of continue and the use of the Array constructor, it's fully jsLint ok.

var BuildRegex = function(matches) {
    "use strict";

    var splits = matches.split(','),
        res = '^(',
        i, subSplit, min, max, temp, tempMin;

    if (splits.length === 0) {
        return new RegExp('^()$');
    }

    for (i = 0; i < splits.length; i += 1) {
        if (splits[i] === '*') {
            return new RegExp('^([0-9]+)$');
        }

        subSplit = splits[i].split('-');

        min = BuildRegex.Trim(subSplit[0], '0');

        if (min === '') {
            return null;
        }

        if (subSplit.length === 1) {
            res += min;
            res += '|';

            continue;
        } else if (subSplit.length > 2) {
            return null;
        }

        max = BuildRegex.Trim(subSplit[1], '0');

        if (max === '') {
            return null;
        }

        if (min.length > max.length) {
            return null;
        }

        // For 2-998 we first produce 2-9, then 10-99
        temp = BuildRegex.DifferentLength(res, min, max);

        tempMin = temp.min;

        if (tempMin === null) {
            return null;
        }

        res = temp.res;

        // Then here 100-998
        res = BuildRegex.SameLength(res, tempMin, max);
    }

    res = res.substr(0, res.length - 1);
    res += ')$';

    return new RegExp(res);
};

BuildRegex.Repeat = function(ch, n) {
    "use strict";

    return new Array(n + 1).join(ch);
};

BuildRegex.Trim = function(str, ch) {
    "use strict";

    var i = 0;

    while (i < str.length && str[i] === ch) {
        i += 1;
    }

    return str.substr(i);
};

BuildRegex.IsOnlyDigit = function(str, start, digit) {
    "use strict";

    var i;

    for (i = start; i < str.length; i += 1) {
        if (str[i] !== digit) {
            return false;
        }
    }

    return true;
};

BuildRegex.RangeDigit = function(min, max) {
    "use strict";

    if (min === max) {
        return min;
    }

    return '[' + min + '-' + max + ']';
};

BuildRegex.DifferentLength = function(res, min, max) {
    "use strict";

    var tempMin = min,
        i, tempMax;

    for (i = min.length; i < max.length; i += 1) {
        tempMax = BuildRegex.Repeat('9', i);

        res = BuildRegex.SameLength(res, tempMin, tempMax);

        tempMin = '1' + BuildRegex.Repeat('0', i);
    }

    if (tempMin > tempMax) {
        return null;
    }

    return {
        min: tempMin,
        res: res
    };
};

BuildRegex.SameLength = function(res, min, max) {
    "use strict";

    var commonPart;

    // 100-100
    if (min === max) {
        res += min;
        res += '|';

        return res;
    }

    for (commonPart = 0; commonPart < min.length; commonPart += 1) {
        if (min[commonPart] !== max[commonPart]) {
            break;
        }
    }

    res = BuildRegex.RecursivelyAddRange(res, min.substr(0, commonPart), min.substr(commonPart), max.substr(commonPart));

    return res;
};

BuildRegex.RecursivelyAddRange = function(res, prefix, min, max) {
    "use strict";

    var only0Min, only9Max, i, middleMin, middleMax;

    if (min.length === 1) {
        res += prefix;
        res += BuildRegex.RangeDigit(min[0], max[0]);
        res += '|';

        return res;
    }

    // Check if 
    only0Min = BuildRegex.IsOnlyDigit(min, 1, '0');
    only9Max = BuildRegex.IsOnlyDigit(max, 1, '9');

    if (only0Min && only9Max) {
        res += prefix;
        res += BuildRegex.RangeDigit(min[0], max[0]);

        for (i = 1; i < min.length; i += 1) {
            res += '[0-9]';
        }

        res += '|';

        return res;
    }

    middleMin = min;

    if (!only0Min) {
        res = BuildRegex.RecursivelyAddRange(res, prefix + min[0], min.substr(1), BuildRegex.Repeat('9', min.length - 1));

        if (min[0] !== '9') {
            middleMin = String.fromCharCode(min.charCodeAt(0) + 1) + BuildRegex.Repeat('0', min.length - 1);
        } else {
            middleMin = null;
        }
    }

    middleMax = max;

    if (!only9Max) {
        if (max[0] !== '0') {
            middleMax = String.fromCharCode(max.charCodeAt(0) - 1) + BuildRegex.Repeat('9', max.length - 1);
        } else {
            middleMax = null;
        }
    }

    if (middleMin !== null && middleMax !== null && middleMin[0] <= middleMax[0]) {
        res = BuildRegex.RecursivelyAddRange(res, prefix + BuildRegex.RangeDigit(middleMin[0], middleMax[0]), middleMin.substr(1), middleMax.substr(1));
    }

    if (!only9Max) {
        res = BuildRegex.RecursivelyAddRange(res, prefix + max[0], BuildRegex.Repeat('0', max.length - 1), max.substr(1));
    }

    return res;
};

// ----------------------------------------------------------

var printRegex = function(p) {
    "use strict";

    document.write(p + ': ' + BuildRegex(p) + '<br>');
};

printRegex('*');
printRegex('1');
printRegex('1,*');
printRegex('1,2,3,4');
printRegex('1,11-88');
printRegex('1,11-88,90-101');
printRegex('1-11111');
printRegex('75-11119');

Test here http://jsfiddle.net/dnqYV/

The C# version is here http://ideone.com/3aEt3E

Upvotes: 1

rodneyrehm
rodneyrehm

Reputation: 13557

I'm not sure there is a (sane) way to test integer ranges with RegExp. I believe you're fixated on RegExp, where there are much simpler (more flexible) approaches. Take a look at IntRangeTest().

var range = new IntRangeTest('0,10-20');
console.log(
    "0,10-20",
    range.test("") == false,
    range.test("-5") == false,
    range.test("0") == true,
    range.test("5") == false,
    range.test("11") == true,
    range.test("123.23") == false
);

If you feel like it, you can easily add this to Number.prototype. You could also quite easily make this an extension to RegExp, if that's what you're worried about.

Upvotes: 0

Jordan Wallwork
Jordan Wallwork

Reputation: 3114

Ok so it seems that there are 4 main cases that I need to address:

  • Single digits, ie 1, would simply generate the regex /^1$/
  • Multiple digits, ie 12, would require the regex /^12&/
  • Single digit ranges, ie 3-6, would generate the regex /^[3-6]$/
  • And finally, multiple digit ranges work in a similar method to multiple digits but with a range, ie 11-14 would become /^1[1-4]$/. These would need to be split into multiple regexes if they span over multiple start digits, Ie 23-31 would become /^2[3-9]|3[0-1]$/

Therefore, all I need to do is identify each of these cases and create a compound regex using | like xanatos suggested. Ie, to match all of the above criteria would generate a regex like:

/^( 1 | 12 | [3-6] | 1[1-4] | 2[3-9]|3[0-1] )$/

Do other agree this seems like a decent way to progress?

Upvotes: 0

Ludovic Kuty
Ludovic Kuty

Reputation: 4954

Regexes don't know anything about numbers, only digits. So [8-16] is invalid because you say match between 8 and 1 (instead of 1 and 8 e.g.) plus the digit 6. If you want to match numbers, you have to consider them lexically. For example, to match numbers between 1 and 30, you have to write something like (other regexes exist):

/^(30|[1-2]\d|[1-9])$/

Upvotes: 1

Related Questions