Xavia
Xavia

Reputation: 95

converting anchor hrefs to unique tags using regex and javascript

I have this sample data:

<products_database>
<product>
         <id>##1234
         <a name ="toy">toy</a>
         <weight>5kg
         <a href ="#block">block</a>
</product>
<product>
          <id>##56789
          <a name ="brick">brick</a>
          <a name ="lego">lego</a>
          <a name ="block">block</a>
          <weight>2kg
          <a href ="#toy">toy</a>
</product>
<product>
          <id>##1357902
          <a href ="#toy">toy</a>
          <a href ="#brick">brick</a>
          <weight>4kg
</product>
</product_database>

I'd like to convert the hrefs into:

<products_database>
<product>
         <id>##1234
         <a name ="toy">toy</a>
         <weight>5kg
         <..pd ##56789 #block>block</a>
</product>
<product>
          <id>##56789
          <a name ="brick">brick</a>
          <a name ="lego">lego</a>
          <a name ="block">block</a>
          <weight>2kg
          <..pd ##1234 #toy>toy</a>
</product>
<product>
          <id>##1357902
          <..pd ##1234 #toy>toy</a>
          <..pd ##56789 #brick>brick</a>
          <weight>4kg
</product>

the href is to be converted into this unique tag in which it gets the "id" in each individual "product" where their "a href" values are equivalent to their "a name". I'm only allowed to use javascript and regular expression any help would be appreciated.

Upvotes: 1

Views: 44

Answers (2)

Fallenhero
Fallenhero

Reputation: 1583

Or you can use this (mainly regex approach), but I am not sure how error-resistant this is:

var a = '<products_database>\n<product>\n         <id>##1234\n         <a name ="toy">toy</a>\n         <weight>5kg\n         <a href ="#block">block</a>\n</product>\n<product>\n          <id>##56789\n          <a name ="brick">brick</a>\n          <a name ="lego">lego</a>\n          <a name ="block">block</a>\n          <weight>2kg\n          <a href ="#toy">toy</a>\n</product>\n<product>\n          <id>##1357902\n          <a href ="#toy">toy</a>\n          <a href ="#brick">brick</a>\n          <weight>4kg\n</product>\n</product_database>';

while (a.match(/(?:<id>(##\d+)((?:[\s\S](?!<id))*?<a name =")(.+?)("[\S\s]*?)<a href ="#\3">|<a href ="#(.+?)">([\S\s]*?)<id>(##\d+)((?:[\s\S](?!<id))*?<a name =")\1")/g) != null) {
  a = a.replace(/<id>(##\d+)((?:[\s\S](?!<id))*?<a name =")(.+?)("[\S\s]*?)<a href ="#\3">/g, '<id>$1$2$3$4<.. pd $1 #$3>').replace(/<a href ="#(.+?)">([\S\s]*?)<id>(##\d+)((?:[\s\S](?!<id))*?<a name =")\1"/gm, '<.. pd $3 #$1>$2<id>$3$4$1"');
}

a;

There are two regexs

1.) matching if the id comes before the href

2.) matching if the id comes after the href

You have to loop through it, because regex would not match the id multiple times.

Please ask if something is not clear.

Upvotes: 1

Whothehellisthat
Whothehellisthat

Reputation: 2152

I tried to figure out what exactly you need things to do, and wrote a solution that might do what you need it to.

//var findProduct = /<product>\s+<id>(##\d+)\s+((?:<a name ="[^"]+">[^<]+<\/a>)+)\s+<weight>(\d+kg)\s+((?:<a href ="#[^"]+">[^<]+<\/a>)+)\s+<\/product>/g;

var byName = {}, products = [];

var findProduct = /<product>\s+([\W\w]+?)\s+<\/product>/g;
var findTag = /<([\w]+)(?: (name|href) ="#?([^"]+)")?>([^<\n\r]+)/g;

var data = document.getElementById("data").value;
data.replace(findProduct, function(match, tags) {
  var product = {
    id: "", names: [], weight: "", links: []
  };
  tags.replace(findTag, function(match, tagName, attr, attrValue, tagValue) {
    switch (tagName) {
      case "id": product.id = tagValue; break;
      case "weight": product.weight = tagValue; break;
      case "a":
        if (attr === "name") { product.names.push(attrValue); byName[attrValue] = product; }
        else /* if (attr === "href") */ { product.links.push(attrValue); }
        break;
    }
  });
  products.push(product);
});

data = "<product_database>" + products.map(function(product) {
  return "<product><id>" + product.id + "\n<weight>" + product.weight + "\n" +
    product.names.map(function(name) {
      return "<a name =\"" + name + "\">" + name + "</a>";
    }).join("\n") +
    product.links.map(function(link) {
      return "<..pd " + byName[link].id + " #" + link + ">block</a>";
    }).join("\n")
  + "\n</product>";
}).join("\n") + "</product_database>";

document.getElementById("data").value = data;
  <textarea id="data" cols=50 rows=30><products_database>
<product>
         <id>##1234
         <a name ="toy">toy</a>
         <weight>5kg
         <a href ="#block">block</a>
</product>
<product>
          <id>##56789
          <a name ="brick">brick</a>
          <a name ="lego">lego</a>
          <a name ="block">block</a>
          <weight>2kg
          <a href ="#toy">toy</a>
</product>
<product>
          <id>##1357902
          <a href ="#toy">toy</a>
          <a href ="#brick">brick</a>
          <weight>4kg
</product>
</product_database></textarea>

Upvotes: 1

Related Questions