Front End Coder
Front End Coder

Reputation: 468

Extract domain from a string using Javascript?

I have an string which include many website url but i want to extract the only url that is out side these bracket [ ].

Can someone correct this ?

Note : Output Must be www.google.com and it not necessary that domain name outside [ ] will come at the end of string.

var str = '[[www.abc.com/corporate/partner/just-a-test]]acdascvdvsa.1563e24e32e42|[[www.abc.com/corporate/partner/just-a-test]]1563e24e32e42.1563e24e32e42|[[www.abc.com/instruments/infrared-guided-measurement/]]www.google.com&1566805689640.1566806059701.3';

// String can include https and instead of .com there can be .in 


var arr = str.split("|");

function domainName(str) {
  var match = str.match(/^(?:https?:\/\/)?(?:w{3}\.)?([a-z\d\.-]+)\.(?:[a-z\.]{2,10})(?:[\w\.-]*)*/);
  if (match != null && match.length > 0) {
    return match;
  } else {
    return null;
  }
}
var domainname = domainName(str);
var domain = domainname;
console.log(domain);

Upvotes: 2

Views: 321

Answers (5)

Jay Brunet
Jay Brunet

Reputation: 3750

var dirtySource = "https://example.com/subdirectory";
var dirtyPos = dirtySource.indexOf("://"); // works for http and https
var cleanSource = dirtySource.substr(dirtyPos+3);
cleanSource = cleanSource.split("/")[0]; // trim off subdirectories

Upvotes: 0

James
James

Reputation: 66

As CertainPerformance Suggest you can exclude the url that is in [ ] using replace then by using regex you can extract the domain name. Below is the code :

var str = '[[www.abc.com/corporate/partner/just-a-test]]acdascvdvsa.1563e24e32e42|[[www.abc.com/corporate/partner/just-a-test]]1563e24e32e42.1563e24e32e42|[[www.abc.com/instruments/infrared-guided-measurement/]]www.google.com&1566805689640.1566806059701.3';

var str = str.replace(/\[\[[^[\]]*\]\]/g, '');

var ptrn = /^(?:https?:\/\/)?(?:w{3}\.)?([a-z\d\.-]+)\.(?:[a-z\.]{2,10})(?:[\w\.-]*)*/g;

var i, value, domain, len, array;

array = str.split("|");

len = array.length;

for(i=0; len > i; i++) {

  value = array[i].match(ptrn);

  if (value !== null) {
    domain = value;
  }
 
  else {
     domain = "Not Found";
  }

}
 

document.write("Domain is = ", domain);

 

Upvotes: 1

akshay bagade
akshay bagade

Reputation: 1219

this can be achieve by split

var str = '[[www.abc.com/corporate/partner/just-a-test]]acdascvdvsa.1563e24e32e42|[[www.abc.com/corporate/partner/just-a-test]]1563e24e32e42.1563e24e32e42|[[www.abc.com/instruments/infrared-guided-measurement/]]www.google.com&1566805689640.1566806059701.3';
let ans=str.split("]")[6]
let finalAns=ans.split("&")[0]
console.log(finalAns)

Upvotes: 1

JoshG
JoshG

Reputation: 6745

Two main steps:

  • Create a regular expression that matches your desired pattern.
  • Use String.match()

Example:

// match all URLs
// let regex = /((([A-Za-z]{3,9}:(?:\/\/)?)(?:[\-;:&=\+\$,\w]+@)?[A-Za-z0-9\.\-]+|(?:www\.|[\-;:&=\+\$,\w]+@)[A-Za-z0-9\.\-]+)((?:\/[\+~%\/\.\w\-_]*)?\??(?:[\-\+=&;%@\.\w_]*)#?(?:[\.\!\/\\\w]*))?)/g;

// match only the URL outside of the brackets
let regex = /(((([A-Za-z]{3,9}:(?:\/\/)?)(?:[\-;:&=\+\$,\w]+@)?[A-Za-z0-9\.\-]+|(?:www\.|[\-;:&=\+\$,\w]+@)[A-Za-z0-9\.\-]+)((?:\/[\+~%\/\.\w\-_]*)?\??(?:[\-\+=&;%@\.\w_]*)#?(?:[\.\!\/\\\w]*))?))(([^[\]]+)(?:$|\[))/g;

function getUrlsFromText(input) {
  return input.match(regex);
}

console.log(getUrlsFromText('[[www.abc.com/corporate/partner/just-a-test]]acdascvdvsa.1563e24e32e42|[[www.abc.com/corporate/partner/just-a-test]]1563e24e32e42.1563e24e32e42|[[www.abc.com/instruments/infrared-guided-measurement/]]www.google.com&1566805689640.1566806059701.3'));

Note that I borrowed the URL matching part of the regular expression from here. If you don't want the query string to be included (as it is on the google.com match), you can modify the regex as desired.

Upvotes: 1

CertainPerformance
CertainPerformance

Reputation: 370929

Replace all occurrences of [[, followed by non-brackets, followed by ]] with a space::

var str = '[[www.abc.com/corporate/partner/just-a-test]]acdascvdvsa.1563e24e32e42|[[www.abc.com/corporate/partner/just-a-test]]1563e24e32e42.1563e24e32e42|[[www.abc.com/instruments/infrared-guided-measurement/]]www.google.com&1566805689640.1566806059701.3';

const result = str.replace(/\[\[[^[\]]*\]\]/g, ' ');
console.log(result);

Then you can search for URLs in the replaced string.

Upvotes: 2

Related Questions