Reputation: 1501
I have a Javascript application which retrieves shortcode stings from a WordPress database. So I may end up with a variable like this:
var shortcode = '[wp-form id="1946" title="My Test Form"]';
I am looking to use pure Javascript to access the attributes so I can extract the title, etc. I imagine this will be some form or regex and split(). But so far my efforts get frustrated by splitting by whitespace.
Any ideas greatly appreciated.
Upvotes: 2
Views: 2568
Reputation: 1037
Try to use this code:
const shortcode = '[wp-form id="1946" title="My Test Form" empty=""]';
let attributes = {};
shortcode.match(/[\w-]+=".*?"/g).forEach(function(attribute) {
attribute = attribute.match(/([\w-]+)="(.*?)"/);
attributes[attribute[1]] = attribute[2];
});
console.log(attributes);
Output:
Object {id: "1946", title: "My Test Form", empty: ''}
Upvotes: 6
Reputation: 20599
Okay, even though I'm late to the party I'm going to throw an answer in. I'm surprised nobody complained "you can't parse with just a regular expression!" I guess this used to be a much more fashionable comment to make . Anyways, I think it's perfectly reasonable to use just a regex and see some reasonable attempts already given.
However, if you want to really parse the tag, here's a quick parser I whipped up.
function parseShortCode(shortCode) {
var re = /(\s+|\W)|(\w+)/g;
var match;
var token;
var curAttribute = '';
var quoteChar;
var mode = 'NOT STARTED'
var parsedValue = {
name: '',
attributes: {}
};
while ((match = re.exec(shortCode)) != null) {
token = match[0];
switch (mode) {
case 'NOT STARTED':
if (token == '[') {
mode = 'GETNAME';
}
break;
case 'GETNAME':
if (!(/\s/.test(token))) {
parsedValue.name += token;
} else if (parsedValue.name) {
mode = 'PARSING';
}
break;
case 'PARSING':
// if non text char throw it
if (token == "]") { mode = 'COMPLETE'; }
else if (token == "=") {
if (!curAttribute) throw ('invalid token: "' + token + '" encountered at ' + match.index);
else mode = 'GET ATTRIBUTE VALUE';
}
else if (!/\s/.test(token)) {
curAttribute += token;
} else if (curAttribute) {
mode = 'SET ATTRIBUTE'
}
break;
case 'SET ATTRIBUTE':
// these are always from match[1]
if (/\s/.test(token)) { parsedValue.attributes[curAttribute] = null; }
else if (token == '=') { mode = 'GET ATTRIBUTE VALUE'; }
else { throw ('invalid token: "' + token + '" encountered at ' + match.index); }
break;
case 'GET ATTRIBUTE VALUE':
if (!(/\s/.test(token))) {
if (/["']/.test(token)) {
quoteChar = token;
parsedValue.attributes[curAttribute] = '';
mode = 'GET QUOTED ATTRIBUTE VALUE';
} else {
parsedValue.attributes[curAttribute] = token;
curAttribute = '';
mode = 'PARSING';
}
}
break;
case 'GET QUOTED ATTRIBUTE VALUE':
if (/\\/.test(token)) { mode = 'ESCAPE VALUE'; }
else if (token == quoteChar) {
mode = 'PARSING';
curAttribute = '';
}
else { parsedValue.attributes[curAttribute] += token; }
break;
case 'ESCAPE VALUE':
if (/\\'"/.test(token)) { parsedValue.attributes[curAttribute] += token; }
else { parsedValue.attributes[curAttribute] += '\\' + token; }
mode = 'GET QUOTED ATTRIBUTE VALUE';
break;
}
}
if (curAttribute && !parsedValue.attributes[curAttribute]) {
parsedValue.attributes[curAttribute] = '';
}
return parsedValue;
}
function doUpdate() {
var text = document.getElementById('shortcode').value;
var output;
try {
output = parseShortCode(text);
} catch (err) {
output = err;
}
document.getElementById('result').innerHTML = JSON.stringify(output);
}
document.getElementById('updateBtn').addEventListener("click", doUpdate);
doUpdate();
Short Code:
<textarea type=text id="shortcode" style="width:100%; height:60px">[wp-form id="1946" title="My Test Form"]</textarea>
<div>
<button id="updateBtn">Update</button>
</div>
<div>
<pre id="result"></pre>
</div>
I'm sure this has bugs, but I got it to work with your case, and some cases the other answers couldn't handle. Unless shortcodes get really intense, I'd just stick with a regex. But if you encounter stuff like unquoted attribute values and empty attributes this might work for you.
Upvotes: 3
Reputation: 89629
Don't try to use String.prototype.split
in this case, describe an attribute with its value and build a pattern to match them using RegExp.prototype.exec
:
var re = /([\w-]+)="([^"]*)"/g;
var str = '[wp-form id="1946" title="My Test Form"]';
var m;
while ((m = re.exec(str)) !== null) {
console.log(m[1] + "\n" + m[2] + "\n");
}
Upvotes: 1
Reputation: 399
can be done using regex simply
var shortcode = '[wp-form id="1946" title="My Test Form"]';
// use of regex to extract id , title
var arr = /id\=\"(.*?)\".*title=\"(.*?)\"/.exec(shortcode);
var id = arr[1];
var title = arr[2];
Upvotes: 0