Reputation: 468
I have an string which include many website url but i want to extract the only url that is out side these bracket [ ].
Can someone correct this ?
Note : Output Must be www.google.com and it not necessary that domain name outside [ ] will come at the end of string.
var str = '[[www.abc.com/corporate/partner/just-a-test]]acdascvdvsa.1563e24e32e42|[[www.abc.com/corporate/partner/just-a-test]]1563e24e32e42.1563e24e32e42|[[www.abc.com/instruments/infrared-guided-measurement/]]www.google.com&1566805689640.1566806059701.3';
// String can include https and instead of .com there can be .in
var arr = str.split("|");
function domainName(str) {
var match = str.match(/^(?:https?:\/\/)?(?:w{3}\.)?([a-z\d\.-]+)\.(?:[a-z\.]{2,10})(?:[\w\.-]*)*/);
if (match != null && match.length > 0) {
return match;
} else {
return null;
}
}
var domainname = domainName(str);
var domain = domainname;
console.log(domain);
Upvotes: 2
Views: 321
Reputation: 3750
var dirtySource = "https://example.com/subdirectory";
var dirtyPos = dirtySource.indexOf("://"); // works for http and https
var cleanSource = dirtySource.substr(dirtyPos+3);
cleanSource = cleanSource.split("/")[0]; // trim off subdirectories
Upvotes: 0
Reputation: 66
As CertainPerformance Suggest you can exclude the url that is in [ ] using replace then by using regex you can extract the domain name. Below is the code :
var str = '[[www.abc.com/corporate/partner/just-a-test]]acdascvdvsa.1563e24e32e42|[[www.abc.com/corporate/partner/just-a-test]]1563e24e32e42.1563e24e32e42|[[www.abc.com/instruments/infrared-guided-measurement/]]www.google.com&1566805689640.1566806059701.3';
var str = str.replace(/\[\[[^[\]]*\]\]/g, '');
var ptrn = /^(?:https?:\/\/)?(?:w{3}\.)?([a-z\d\.-]+)\.(?:[a-z\.]{2,10})(?:[\w\.-]*)*/g;
var i, value, domain, len, array;
array = str.split("|");
len = array.length;
for(i=0; len > i; i++) {
value = array[i].match(ptrn);
if (value !== null) {
domain = value;
}
else {
domain = "Not Found";
}
}
document.write("Domain is = ", domain);
Upvotes: 1
Reputation: 1219
this can be achieve by split
var str = '[[www.abc.com/corporate/partner/just-a-test]]acdascvdvsa.1563e24e32e42|[[www.abc.com/corporate/partner/just-a-test]]1563e24e32e42.1563e24e32e42|[[www.abc.com/instruments/infrared-guided-measurement/]]www.google.com&1566805689640.1566806059701.3';
let ans=str.split("]")[6]
let finalAns=ans.split("&")[0]
console.log(finalAns)
Upvotes: 1
Reputation: 6745
Two main steps:
Example:
// match all URLs
// let regex = /((([A-Za-z]{3,9}:(?:\/\/)?)(?:[\-;:&=\+\$,\w]+@)?[A-Za-z0-9\.\-]+|(?:www\.|[\-;:&=\+\$,\w]+@)[A-Za-z0-9\.\-]+)((?:\/[\+~%\/\.\w\-_]*)?\??(?:[\-\+=&;%@\.\w_]*)#?(?:[\.\!\/\\\w]*))?)/g;
// match only the URL outside of the brackets
let regex = /(((([A-Za-z]{3,9}:(?:\/\/)?)(?:[\-;:&=\+\$,\w]+@)?[A-Za-z0-9\.\-]+|(?:www\.|[\-;:&=\+\$,\w]+@)[A-Za-z0-9\.\-]+)((?:\/[\+~%\/\.\w\-_]*)?\??(?:[\-\+=&;%@\.\w_]*)#?(?:[\.\!\/\\\w]*))?))(([^[\]]+)(?:$|\[))/g;
function getUrlsFromText(input) {
return input.match(regex);
}
console.log(getUrlsFromText('[[www.abc.com/corporate/partner/just-a-test]]acdascvdvsa.1563e24e32e42|[[www.abc.com/corporate/partner/just-a-test]]1563e24e32e42.1563e24e32e42|[[www.abc.com/instruments/infrared-guided-measurement/]]www.google.com&1566805689640.1566806059701.3'));
Note that I borrowed the URL matching part of the regular expression from here. If you don't want the query string to be included (as it is on the google.com match), you can modify the regex as desired.
Upvotes: 1
Reputation: 370929
Replace all occurrences of [[
, followed by non-brackets, followed by ]]
with a space::
var str = '[[www.abc.com/corporate/partner/just-a-test]]acdascvdvsa.1563e24e32e42|[[www.abc.com/corporate/partner/just-a-test]]1563e24e32e42.1563e24e32e42|[[www.abc.com/instruments/infrared-guided-measurement/]]www.google.com&1566805689640.1566806059701.3';
const result = str.replace(/\[\[[^[\]]*\]\]/g, ' ');
console.log(result);
Then you can search for URLs in the replaced string.
Upvotes: 2