Reputation: 39
I'm attempting to use RexExp and JS to parse bible verses. Output doesn't matter, because some of my regexes are giving me false negatives (unless I just don't understand something about RegExp, which is probably the case).
Consider the following function:
function wtf() {
var s = "1:1-8, 3:5, 4:-8-10, 25-36, 5:1-6:1-26, 32-40, 55, 7:8",
exp1 = new RegExp('(\\d+:)?([\\d-]+(?=\\d:)|[\\d-]+)','g'),
result = s.split(exp1),
exp2 = new RegExp('\\d+[ :]+','g'), //tests for \d:
exp3 = new RegExp('\\d+[-]\\B','g'), //tests for \d-
exp4 = new RegExp('\\b\\d{1,3}[ -]+\\d{1,3}\\b','g'), //tests for \d-\d
exp5 = new RegExp('\\d:.*\\b','g'), //tests for d: followed by anything
exp6 = new RegExp('^\\d{1,3}$','g'), //tests for 1,12,123,etc.
output = [];
for(i=0;i<result.length;i++) {
var t = String(result[i]);
if(result[i] == "" | result[i] == ","| result[i] == " ," | result[i] == ", " | result[i] == undefined) {}
else if(exp5.test(result[i]) == true) {}
else {output[i] = result[i];}
}
output = output.filter(function(val){return val});
console.log(JSON.stringify(output));
for(i=0;i<output.length;i++) {
if(exp2.test(output[i]) == true) { //tests for '3:','10:','100:', etc
console.log("Current Index: "+output[i]);
console.log("IF Branch: "+exp2);
}
else if(exp4.test(output[i]) == true){//tests for '1-1','12-34','123-456', etc.
console.log("Current Index: "+output[i]);
console.log("IF Branch: "+exp4);
}
else if(exp3.test(output[i]) == true) { //tests for '/\\d[-]\\B/g'
console.log("Current Index: "+output[i]);
console.log("IF Branch: "+exp3);
}
else if(exp6.test(output[i]) == true) { //tests for '1','12','123',etc.
console.log("Current Index: "+output[i]);
console.log("IF Branch: "+exp6);
}
else {
console.log("Current Index: "+output[i]);
console.log("IF Branch: else");
}
console.log("");
}
}
The above code parses the string into an array, removes extraneous indices, renumbers the remaining indices, and then loops through each index, running a test to determine which block of code to run on each index.
For the most part, this works fine. The function mostly works on strings that follow the corresponding pattern. But there is a problem, as you can see below in the function's output:
["1:","1-8","3:","5","4:","-8-10","25-36","5:","1-","6:","1-26","32-40","55","7:","8"]
Current Index: 1:
IF Branch: /\d+[ :]+/g
Current Index: 1-8
IF Branch: /\b\d{1,3}[ -]+\d{1,3}\b/g
Current Index: 3:
IF Branch: /\d+[ :]+/g
Current Index: 5
IF Branch: /^\d{1,3}$/g
Current Index: 4:
IF Branch: /\d+[ :]+/g
Current Index: -8-10
IF Branch: /\b\d{1,3}[ -]+\d{1,3}\b/g
Current Index: 25-36
IF Branch: else
Current Index: 5:
IF Branch: /\d+[ :]+/g
Current Index: 1-
IF Branch: /\d+[-]\B/g
Current Index: 6:
IF Branch: /\d+[ :]+/g
Current Index: 1-26
IF Branch: /\b\d{1,3}[ -]+\d{1,3}\b/g
Current Index: 32-40
IF Branch: else
Current Index: 55
IF Branch: /^\d{1,3}$/g
Current Index: 7:
IF Branch: /\d+[ :]+/g
Current Index: 8
IF Branch: else
As you can tell, the JSON.stringified Array is first, followed by a loop of the indices of the array. For each index, the value and the pattern the index matched are output. You'll notice that when the index value is "25-36", "32-40", and "8" that the else branch is triggered, even though they all clearly match the patterns used. Moreover, in each case, there are preceding indices formatted in exactly the same way that DO trigger the appropriate branch of the if statement.
What on earth is going on? What don't I understand about what is happening here? I am checking to make certain the patterns are correct on regex101, so I am certain they are working. What gives?
Upvotes: 0
Views: 62
Reputation: 224942
You’re creating regular expressions with the global flag, which keep their state across matches:
var re = /\d/g;
re.exec('123') // ['1']
re.exec('123') // ['2']
re.exec('123') // ['3']
re.exec('123') // null
So, for test
:
var re = /\d/g;
re.test('123') // true
re.test('123') // true
re.test('123') // true
re.test('123') // false
Solution: don’t add the global flag when you don’t want it.
Generally simplified:
const output = [
"1:1-8", "3:5", "4:-8-10", "25-36", "5:1-6:1-26", "32-40", "55", "7:8"];
const patterns = [
/\d+[ :]+/,
/\b\d{1,3}[ -]+\d{1,3}\b/,
/\d+[-]\B/,
/^\d{1,3}$/,
];
output.forEach(t => {
const matched = patterns.find(p => p.test(t));
console.log("Item: " + t);
console.log("Matched: " + matched);
console.log();
});
Upvotes: 2