Bruno
Bruno

Reputation: 9017

Check if a JavaScript string is a URL

Is there a way in JavaScript to check if a string is a URL?

RegExes are excluded because the URL is most likely written like stackoverflow; that is to say that it might not have a .com, www or http.

Upvotes: 586

Views: 768186

Answers (30)

Sachin De Silva
Sachin De Silva

Reputation: 30

function isValidHttpUrl(string) {
  try {
    const newUrl = new URL(string);
    return newUrl.protocol === 'http:' || newUrl.protocol === 'https:';
  } catch (err) {
    return false;
  }
}

console.log(isValidHttpUrl('https://www.google.com/')); // true
console.log(isValidHttpUrl('mailto://[email protected]')); // false
console.log(isValidHttpUrl('google')); // false

Upvotes: -1

Aral Roca
Aral Roca

Reputation: 5889

You can use the URL native API:

  const isUrl = string => {
      try { return Boolean(new URL(string)); }
      catch(e){ return false; }
  }

Updated:

Today there is the native API URL.canParse(url, base) and no non-native implementation is needed: https://developer.mozilla.org/en-US/docs/Web/API/URL/canParse_static.

Upvotes: 32

Harsh Patel
Harsh Patel

Reputation: 1324

Using URL Class

/**
  * The given value must start with a protocol (e.g., http:// or https://)
  * If the value doesn't start with a protocol,
  * the function may return false.
*/
function testWithUrlClass(urlStr) {
    try {
    new URL(urlStr);
    return true;
  } catch(e) {
    return false;
  }
}

difficult to do with pure regex because URLs have many 'inconveniences'.

function isValidUrl(str) {
    /* const pattern = new RegExp(
      '^(https?:\\/\\/)?' + // protocol
      '((([a-z\\d]([a-z\\d-]*[a-z\\d])*)\\.)+[a-z]{2,}|' + // domain name
      '((\\d{1,3}\\.){3}\\d{1,3}))' + // OR IP (v4) address
      '(\\:\\d+)?(\\/[-a-z\\d%_.~+]*)*' + // port and path
      '(\\?[a-zA-Z\\&\\d%_.,~+-:@=;&]*)?' + // query string
      '(\\#[-a-z\\d_]*)?$', // fragment locator
      'i'
    ); */
    
    /* reference from https://stackoverflow.com/a/54620350/14344959*/
    const newModi = new RegExp(
       '^(https?:\\/\\/)?' + // protocol
       '((([a-z\\d]([a-z\\d-]*[a-z\\d])*)\\.)+[a-z]{2,}' + // domain name
       '|((\\d{1,3}\\.){3}\\d{1,3})' + // OR IP (v4) address
       '|localhost)' + // OR localhost
       // '(\\:\\d+) + // port (one or more digits)
       '(\\:\\d{1,5})?' + // port (digits limit 1 to 5)
       // '(\\/[-a-z\\d%_.~+]*)*'+ // path
       '(\\/[a-zA-Z\\&\\d%_.~+-:@]*)*' + // path
       // '(\\?[;&a-z\\d%_.~+=-]*)?' + // query string
      '(\\?[a-zA-Z\\&\\d%_.,~+-:@=;&]*)?' + // query string
      // '(\\#[-a-z\\d_]*)?$', // fragment locator
      '(\\#[-a-zA-Z&\\d_]*)?$', // fragment locator
    );
    
    return newModi.test(str);
  }

Valid Url Cases:

  1. only domain name
  2. full url with fragment also
  3. if url doesn't contains http or https protocol
  4. if url doesn't contains 'www.' before the domain
  5. url has sub domain

Run Below Snippet or run on jsFiddle for see Output of this regex method.

in Snippet & jsFiddle link - even you can also compare the output of above regex way & URL() class (checked url is valid or not by URL class method)

I hope you like this...

:) Happy Coding!

function isValidUrl(str) {
    /* const pattern = new RegExp(
      '^(https?:\\/\\/)?' + // protocol
      '((([a-z\\d]([a-z\\d-]*[a-z\\d])*)\\.)+[a-z]{2,}|' + // domain name
      '((\\d{1,3}\\.){3}\\d{1,3}))' + // OR IP (v4) address
      '(\\:\\d+)?(\\/[-a-z\\d%_.~+]*)*' + // port and path
      '(\\?[a-zA-Z\\&\\d%_.,~+-:@=;&]*)?' + // query string
      '(\\#[-a-z\\d_]*)?$', // fragment locator
      'i'
    ); */
    
     /* reference from https://stackoverflow.com/a/54620350/14344959*/
    const newModi = new RegExp(
       '^(https?:\\/\\/)?' + // protocol
       '((([a-z\\d]([a-z\\d-]*[a-z\\d])*)\\.)+[a-z]{2,}' + // domain name
       '|((\\d{1,3}\\.){3}\\d{1,3})' + // OR IP (v4) address
       '|localhost)' + // OR localhost
       // '(\\:\\d+) + // port (one or more digits)
       '(\\:\\d{1,5})?' + // port (digits limit 1 to 5)
       // '(\\/[-a-z\\d%_.~+]*)*'+ // path
       '(\\/[a-zA-Z\\&\\d%_.~+-:@]*)*' + // path
       // '(\\?[;&a-z\\d%_.~+=-]*)?' + // query string
      '(\\?[a-zA-Z\\&\\d%_.,~+-:@=;&]*)?' + // query string
      // '(\\#[-a-z\\d_]*)?$', // fragment locator
      '(\\#[-a-zA-Z&\\d_]*)?$', // fragment locator
    );
    
    return newModi.test(str);
  }

const testLinks  = [
    ['test', false],
  ['test.com', true],
  ['http://test.com', true],
    ['www.test.com', true],
  ['http://www.test.com', true],
  ['test.com/products', true],
  ['help.test.com', true],
  ['www.help.test.com', true],
  ['http://example.com', true],
  ['https://www.example.com/path', true],
  ['www.example.com', true],
  ['example.com/path', true],
  ['example.com?query=param', true],
  ['ftp://example.com', false],
  ['http://192.168.0.1', true],
  ['http://192.168.0.1:8080', true],
  ['http://example.com#section', true],
  ['http://example.com:8080/path?query=param', true],
  ['/product/sas.png', false],
  ['?query=param', false],
  ['#section', false],
  ['http://example.com?param=123&name=John', true],
  ['http://example.com?param1=123&param2=456', true],
  ['https://example.com/path?query=param#section', true],
  ['?name=John&age=30', false],
  ['#section-content', false],
  ['http://example.com#', true],
  ['https://example.com/path?', true],
  ['', false], // Empty string 
  [null, false], // Null value 
  [undefined, false], // Undefined value 
  
  
  // additional urls
  ['http://en.wikipedia.org/wiki/Procter_&_Gamble', true],
  ['https://sdfasd', false],
  ['http://www.google.com/url?sa=i&rct=j&q=&esrc=s&source=images&cd=&docid=nIv5rk2GyP3hXM&tbnid=isiOkMe3nCtexM:&ved=0CAUQjRw&url=http%3A%2F%2Fanimalcrossing.wikia.com%2Fwiki%2FLion&ei=ygZXU_2fGKbMsQTf4YLgAQ&bvm=bv.65177938,d.aWc&psig=AFQjCNEpBfKnal9kU7Zu4n7RnEt2nerN4g&ust=1398298682009707', true],
  
  ['https://stackoverflow.com/', true],
  ['https://w', false],
  ['aaa', false],
  ['aaaa', false],
  ['oh.my', true],
  ['dfdsfdsfdfdsfsdfs', false],
  ['google.co.uk', true],
  '-hyphen-start.gov.tr',
  ['test-domain.MUSEUM', true],
  ['-hyphen-start.gov.tr', false],
  ['hyphen-end-.com', false],
  ['https://sdfasdp.international', true],
  ['https://sdfasdp.pppppppp', false],
  ['https://sdfasdp.ppppppppppppppppppp', false],
  ['https://sdfasd', false],
  ['https://sub1.1234.sub3.sub4.sub5.co.uk/?', true],
  ['http://www.google-com.123', false],
  ['http://my--testdomain.com', false],
  ['http://my2nd--testdomain.com', true],
  ['http://thingiverse.com/download:1894343', true],
  ['https://medium.com/@techytimo', true],
  ['http://localhost', true],
  ['localhost', true],
  ['localhost:8080', true],
  ['localhost:65536', true],
  ['localhost:80000', false],
  ['magnet:?xt=urn:btih:123', true],
  ['test:8080', false],
  ['test.com:8080', false],
];

const validUrlResults = [];

const resElem = document.getElementById('result');
var resStr = '';
testLinks.forEach((e) => {
    let testResult = isValidUrl(e[0]);
  if (testResult === true) {
    validUrlResults.push(e[0]);
  }
  
    resStr += '<li><span class="input">' + e[0] + '</span> <span class="outputBool">'+ testWithUrlClass(e[0]) + '</span> <span class="outputBool">'+ testResult + '</span>  <span>' + (testResult === e[1] ? 'passed' : 'failed') + '</span></li>';
});

resElem.innerHTML = resElem.innerHTML + resStr;

// console.log(validUrlResults);

/**
  * The given value must start with a protocol (e.g., http:// or https://)
  * If the value doesn't start with a protocol,
  * the function may return false.
*/
function testWithUrlClass(urlStr) {
    try {
    new URL(urlStr);
    return true;
  } catch(e) {
    return false;
  }
}
li {
  margin-bottom: 15px;
}
.input {
  display: inline-block;
  width: 40%;
  word-break: break-all;
}

.outputBool {
 display: inline-block;
  width: 15%;
}
<ul id="result">
  <li>
    <span class="input"><b>Inputs</b></span>
    <span class="outputBool"><b>With Url Class</b></span>
    <span class="outputBool"><b>With Regex</b></span>
    <span><b>Regex passed as Expected?</b></span>
  </li>
</ul>

Upvotes: 1

Diego Ferri
Diego Ferri

Reputation: 2787

Since Node v19.9.0 you can use URL.canParse(input, [base]).

  • input: <string> The absolute or relative input URL to parse. If input is relative, then base is required. If input is absolute, the base is ignored. If input is not a string, it is converted to a string first.
  • base: <string> The base URL to resolve against if the input is not absolute. If base is not a string, it is converted to a string first.

https://nodejs.org/api/url.html#urlcanparseinput-base

URL.canParse('https://example.org/foo'); // TRUE
URL.canParse('nothttps://example.org/foo'); // FALSE

Browser support is (very) limited at the moment. See https://developer.mozilla.org/en-US/docs/Web/API/URL/canParse_static

Upvotes: 4

Ronnie Smith
Ronnie Smith

Reputation: 18555

call this function. If the string is not a valid URL a meaningful error occurs.

function encodedURL(string) {
  return new URL(string).href;
}

console.log(encodedURL("http://www.example.com?q=<foo>"));
console.log(encodedURL("www.example.com"));

So, for example:

Upvotes: -1

Pavlo
Pavlo

Reputation: 44889

If you want to check whether a string is valid HTTP URL, you can use URL constructor (it will throw on malformed string):

function isValidHttpUrl(string) {
  let url;
  
  try {
    url = new URL(string);
  } catch (_) {
    return false;  
  }

  return url.protocol === "http:" || url.protocol === "https:";
}

Note: Per RFC 3886, URL must begin with a scheme (not limited to http/https), e. g.:

  • www.example.com is not valid URL (missing scheme)
  • javascript:void(0) is valid URL, although not an HTTP one
  • http://.. is valid URL with the host being .. (whether it resolves depends on your DNS)
  • https://example..com is valid URL, same as above

Upvotes: 686

Denys Sokolov
Denys Sokolov

Reputation: 83

function isURL(_url)
{
    let result = false;
    let w = window;

    if (!w._check_input)
    {
        let input = document.createElement("input");
        input.type      = "url";
        input.required  = true;

        w._check_input = input;
    }

    w._check_input.value = _url;
    if (w._check_input.checkValidity()) result = true;

    return result;
}

Upvotes: 0

Tom Gullen
Tom Gullen

Reputation: 61729

A related question with an answer

Or this Regexp from Devshed:

function validURL(str) {
  var pattern = new RegExp('^(https?:\\/\\/)?'+ // protocol
    '((([a-z\\d]([a-z\\d-]*[a-z\\d])*)\\.)+[a-z]{2,}|'+ // domain name
    '((\\d{1,3}\\.){3}\\d{1,3}))'+ // OR ip (v4) address
    '(\\:\\d+)?(\\/[-a-z\\d%_.~+]*)*'+ // port and path
    '(\\?[;&a-z\\d%_.~+=-]*)?'+ // query string
    '(\\#[-a-z\\d_]*)?$','i'); // fragment locator
  return !!pattern.test(str);
}

Upvotes: 413

user6434796
user6434796

Reputation:

Here is yet another method.

// ***note***: if the incoming value is empty(""), the function returns true

var elm;
function isValidURL(u){
  //A precaution/solution for the problem written in the ***note***
  if(u!==""){  
      if(!elm){
      elm = document.createElement('input');
      elm.setAttribute('type', 'url');
      }
  elm.value = u;
  return elm.validity.valid;
  }
  else{
      return false
  }
}

console.log(isValidURL(''));
console.log(isValidURL('http://www.google.com/'));
console.log(isValidURL('//google.com'));
console.log(isValidURL('google.com'));
console.log(isValidURL('localhost:8000'));

Upvotes: 16

LNT
LNT

Reputation: 896

This is extension to @palvo's answer.

function isValidHttpUrl(string) {
  let url;
  try {
    url = new URL(string);
  } catch (_) {
    return false;  
  }
  return (url.protocol === "http:" || url.protocol === "https:") && (url.href == string || url.origin == string);
}

try followings :-

  • isValidHttpUrl("https:ewe/dsdsd");
  • isValidHttpUrl("https://ewe/dsdsd");

Tested in Chrome

Upvotes: -3

kavitha Reddy
kavitha Reddy

Reputation: 3333

To Validate Url using javascript is shown below

function ValidURL(str) {
  var regex = /(?:https?):\/\/(\w+:?\w*)?(\S+)(:\d+)?(\/|\/([\w#!:.?+=&%!\-\/]))?/;
  if(!regex .test(str)) {
    alert("Please enter valid URL.");
    return false;
  } else {
    return true;
  }
}

Upvotes: 52

Woonder
Woonder

Reputation: 321

I had revised all the comments, notes and remarks is this topic and have made a new regular expression:

^((javascript:[\w-_]+(\([\w-_\s,.]*\))?)|(mailto:([\w\u00C0-\u1FFF\u2C00-\uD7FF-_]+\.)*[\w\u00C0-\u1FFF\u2C00-\uD7FF-_]+@([\w\u00C0-\u1FFF\u2C00-\uD7FF-_]+\.)*[\w\u00C0-\u1FFF\u2C00-\uD7FF-_]+)|(\w+:\/\/(([\w\u00C0-\u1FFF\u2C00-\uD7FF-]+\.)*([\w\u00C0-\u1FFF\u2C00-\uD7FF-]*\.?))(:\d+)?(((\/[^\s#$%^&*?]+)+|\/)(\?[\w\u00C0-\u1FFF\u2C00-\uD7FF:;&%_,.~+=-]+)?)?(#[\w\u00C0-\u1FFF\u2C00-\uD7FF-_]+)?))$

You can test and improve it here https://regexr.com/668mt .

I checked this expression on next values:

http://www.google.com/url?sa=i&rct=j&q=&esrc=s&source=images&cd=&docid=nIv5rk2GyP3hXM&tbnid=isiOkMe3nCtexM:&ved=0CAUQjRw&url=http%3A%2F%2Fanimalcrossing.wikia.com%2Fwiki%2FLion&ei=ygZXU_2fGKbMsQTf4YLgAQ&bvm=bv.65177938,d.aWc&psig=AFQjCNEpBfKnal9kU7Zu4n7RnEt2nerN4g&ust=1398298682009707
http://192.168.0.4:55/
https://web.archive.org/web/20170817095211/https://github.com/Microsoft/vscode/issues/32405
http://www.example.com
javascript:void()
http://.
https://example.
https://en.m.wikipedia.org/wiki/C_Sharp_(programming_language)
http://zh.wikipedia.org/wiki/Wikipedia:关于中文维基百科/en?a#a
https://medium.com/@User_name/
https://test-test-test-test-test-test-test-test-test.web.app/
http://www.google.com/url?sa=i&rct=j&q=&esrc=s&source=images&cd=&docid=nIv5rk2GyP3hXM&tbnid=isiOkMe3nCtexM:&ved=0CAUQjRw&url=http%3A%2F%2Fanimalcrossing.wikia.com%2Fwiki%2FLion&ei=ygZXU_2fGKbMsQTf4YLgAQ&bvm=bv.65177938,d.aWc&psig=AFQjCNEpBfKnal9kU7Zu4n7RnEt2nerN4g&ust=1398298682009707
https://sdfasdp.ppppppppppp
mailto:[email protected]
https://тест.юа

Upvotes: 3

Other way is use Node.JS DNS module.

The DNS module provides a way of performing name resolutions, and with it you can verify if the url is valid.

const dns = require('dns');
const url = require('url'); 

const lookupUrl = "https://stackoverflow.com";
const parsedLookupUrl = url.parse(lookupUrl);

dns.lookup(parsedLookupUrl.protocol ? parsedLookupUrl.host 
           : parsedLookupUrl.path, (error,address,family)=>{

              console.log(error || !address ? lookupUrl + ' is an invalid url!' 
                           : lookupUrl + ' is a valid url: ' + ' at ' + address);
    
              }
);

That way you can check if the url is valid and if it exists

Upvotes: 0

Mark Anthony Libres
Mark Anthony Libres

Reputation: 1054

You can use ajax request to check if a string is valid url and accessible

(function() {



$("input").change(function() {

const check = $.ajax({
        url : this.value,
        dataType: "jsonp"
});

check.then(function() {
   console.log("Site is valid and registered");
});

//expected output
check.catch(function(reason) {
    if(reason.status === 200) {
        return console.log("Site is valid and registered");
    }
    console.log("Not a valid site");
})

});

})()
<script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.3.1/jquery.min.js"></script>
<input type="text" placeholder="Please input url to check ? ">

Upvotes: 0

Crashalot
Crashalot

Reputation: 34503

This function disallows localhost and only allows URLs for web pages (ie, only allows http or https protocol).

It also only allows safe characters as defined here: https://www.urlencoder.io/learn/

function isValidWebUrl(url) {
   let regEx = /^https?:\/\/(?:www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_\+.~#?&//=]*)$/gm;
   return regEx.test(url);
}

Upvotes: 16

Bruno Finger
Bruno Finger

Reputation: 2563

There's a lot of answers already, but here's another contribution: Taken directly from the URL polyfill validity check, use an input element with type="url" to take advantage of the browser's built-in validity check:

var inputElement = doc.createElement('input');
inputElement.type = 'url';
inputElement.value = url;

if (!inputElement.checkValidity()) {
    throw new TypeError('Invalid URL');
}

Source

Upvotes: 8

J-a-n-u-s
J-a-n-u-s

Reputation: 1587

2020 Update. To expand on both excellent answerd from @iamnewton and @Fernando Chavez Herrera I've started to see @ being used in the path of URLs.

So the updated regex is:

RegExp('(https?:\\/\\/)?((([a-z\\d]([a-z\\d-]*[a-z\\d])*)\\.)+[a-z]{2,}|((\\d{1,3}\\.){3}\\d{1,3}))(\\:\\d+)?(\\/[-a-z\\d%_.~+@]*)*(\\?[;&a-z\\d%_.~+=-]*)?(\\#[-a-z\\d_]*)?$', 'i');

If you want to allow it in the query string and hash, use:

RegExp('(https?:\\/\\/)?((([a-z\\d]([a-z\\d-]*[a-z\\d])*)\\.)+[a-z]{2,}|((\\d{1,3}\\.){3}\\d{1,3}))(\\:\\d+)?(\\/[-a-z\\d%_.~+@]*)*(\\?[;&a-z\\d%_.~+=-@]*)?(\\#[-a-z\\d_@]*)?$', 'i');

That being said, I'm not sure if there's a whitepaper rule disallowing @ in the query string or hash.

Upvotes: 0

greg.arnott
greg.arnott

Reputation: 1642

There are a couple of tests using the URL constructor which do not delineate whether the input is a string or URL object.

// Testing whether something is a URL
function isURL(url) {
    return toString.call(url) === "[object URL]";
}

// Testing whether the input is both a string and valid url:
function isUrl(url) {
    try {
        return toString.call(url) === "[object String]" && !!(new URL(url));
    } catch (_) {
        return false;  
    }
}

Upvotes: 3

Ashish Gupta
Ashish Gupta

Reputation: 1241

I change the function to Match + make a change here with the slashes and its work: (http:// and https) both

function isValidUrl(userInput) {
    var res = userInput.match(/(http(s)?:\/\/.)?(www\.)?[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}\b([-a-zA-Z0-9@:%_\+.~#?&//=]*)/g);
    if(res == null)
       return false;
    else
       return true;
}

Upvotes: 2

Guido Flohr
Guido Flohr

Reputation: 2340

Mathias Bynens has compiled a list of well-known URL regexes with test URLs. There is little reason to write a new regular expression; just pick an existing one that suits you best.

But the comparison table for those regexes also shows that it is next to impossible to do URL validation with a single regular expression. All of the regexes in Bynens' list produce false positives and false negatives.

I suggest that you use an existing URL parser (for example new URL('http://www.example.com/') in JavaScript) and then apply the checks you want to perform against the parsed and normalized form of the URL resp. its components. Using the JavaScript URL interface has the additional benefit that it will only accept such URLs that are really accepted by the browser.

You should also keep in mind that technically incorrect URLs may still work. For example http://w_w_w.example.com/, http://www..example.com/, http://123.example.com/ all have an invalid hostname part but every browser I know will try to open them without complaints, and when you specify IP addresses for those invalid names in /etc/hosts/ such URLs will even work but only on your computer.

The question is, therefore, not so much whether a URL is valid, but rather which URLs work and should be allowed in a particular context.

If you want to do URL validation there are a lot of details and edge cases that are easy to overlook:

  • URLs may contain credentials as in http://user:[email protected]/.
  • Port numbers must be in the range of 0-65535, but you may still want to exclude the wildcard port 0.
  • Port numbers may have leading zeros as in http://www.example.com:000080/.
  • IPv4 addresses are by no means restricted to 4 decimal integers in the range of 0-255. You can use one to four integers, and they can be decimal, octal or hexadecimal. The URLs https://010.010.000010.010/, https://0x8.0x8.0x0008.0x8/, https://8.8.2056/, https://8.526344/, https://134744072/ are all valid and just creative ways of writing https://8.8.8.8/.
  • Allowing loopback addresses (http://127.0.0.1/), private IP addresses (http://192.168.1.1), link-local addresses (http://169.254.100.200) and so on may have an impact on security or privacy. If, for instance, you allow them as the address of user avatars in a forum, you cause the users' browsers to send unsolicited network requests in their local network and in the internet of things such requests may cause funny and not so funny things to happen in your home.
  • For the same reasons, you may want to discard links to not fully qualified hostnames, in other words hostnames without a dot.
  • But hostnames may always have a trailing dot (like in http://www.stackoverflow.com.).
  • The hostname portion of a link may contain angle brackets for IPv6 addresses as in http://[::1].
  • IPv6 addresses also have ranges for private networks or link-local addresses etc.
  • If you block certain IPv4 addresses, keep in mind that for example https://127.0.0.1 and https://[::ffff:127.0.0.1] point to the same resource (if the loopback device of your machine is IPv6 ready).
  • The hostname portion of URLs may now contain Unicode, so that the character range [-0-9a-zA-z] is definitely no longer sufficient.
  • Many registries for top-level domains define specific restrictions, for example on the allowed set of Unicode characters. Or they subdivide their namespace (like co.uk and many others).
  • Top-level domains must not contain decimal digits, and the hyphen is not allowed unless for the IDN A-label prefix "xn--".
  • Unicode top-level domains (and their punycode encoding with "xn--") must still contain only letters but who wants to check that in a regex?

Which of these limitations and rules apply is a question of project requirements and taste.

I have recently written a URL validator for a web app that is suitable for user-supplied URLs in forums, social networks, or the like. Feel free to use it as a base for your own one:

I have also written a blog post The Gory Details of URL Validation with more in-depth information.

Upvotes: 4

Mark Hetherington
Mark Hetherington

Reputation: 1691

If you need to also support https://localhost:3000 then use this modified version of [Devshed]s regex.

    function isURL(url) {
        if(!url) return false;
        var pattern = new RegExp('^(https?:\\/\\/)?'+ // protocol
            '((([a-z\\d]([a-z\\d-]*[a-z\\d])*)\\.)+[a-z]{2,}|'+ // domain name
            '((\\d{1,3}\\.){3}\\d{1,3}))|' + // OR ip (v4) address
            'localhost' + // OR localhost
            '(\\:\\d+)?(\\/[-a-z\\d%_.~+]*)*'+ // port and path
            '(\\?[;&a-z\\d%_.~+=-]*)?'+ // query string
            '(\\#[-a-z\\d_]*)?$', 'i'); // fragment locator
        return pattern.test(url);
    }

Upvotes: 2

user8094098
user8094098

Reputation:

This is defiantly not the most effective approach, but it is readable and easy to form to whatever you need. And it's easier to add regex/complexity from here. So here is a very pragmatic approach

const validFirstBits = ["ftp://", "http://", "https://", "www."];
const invalidPatterns = [" ", "//.", ".."];

export function isUrl(word) {
// less than www.1.dk
if (!word || word.length < 8) return false;

// Let's check and see, if our candidate starts with some of our valid first bits
const firstBitIsValid = validFirstBits.some(bit => word.indexOf(bit) === 0);
if (!firstBitIsValid) return false;

const hasInvalidPatterns = invalidPatterns.some(
    pattern => word.indexOf(pattern) !== -1,
);

if (hasInvalidPatterns) return false;

const dotSplit = word.split(".");
if (dotSplit.length > 1) {
    const lastBit = dotSplit.pop(); // string or undefined
    if (!lastBit) return false;
    const length = lastBit.length;
    const lastBitIsValid =
        length > 1 || (length === 1 && !isNaN(parseInt(lastBit)));
    return !!lastBitIsValid;
}

    return false;
}

TEST:

import { isUrl } from "./foo";

describe("Foo", () => {
    test("should validate correct urls correctly", function() {
        const validUrls = [
            "http://example.com",
            "http://example.com/blah",
            "http://127.0.0.1",
            "http://127.0.0.1/wow",
            "https://example.com",
            "https://example.com/blah",
            "https://127.0.0.1:1234",
            "ftp://example.com",
            "ftp://example.com/blah",
            "ftp://127.0.0.1",
            "www.example.com",
            "www.example.com/blah",
        ];

        validUrls.forEach(url => {
            expect(isUrl(url) && url).toEqual(url);
        });
    });

    test("should validate invalid urls correctly", function() {
        const inValidUrls = [
            "http:// foo.com",
            "http:/foo.com",
            "http://.foo.com",
            "http://foo..com",
            "http://.com",
            "http://foo",
            "http://foo.c",
        ];

        inValidUrls.forEach(url => {
            expect(!isUrl(url) && url).toEqual(url);
        });
    });
});

Upvotes: 0

Daniel Rodr&#237;guez
Daniel Rodr&#237;guez

Reputation: 685

If you can change the input type, I think this solution would be much easier:

You can simple use type="url" in your input and the check it with checkValidity() in js

E.g:

your.html

<input id="foo" type="url">

your.js

// The selector is JQuery, but the function is plain JS
$("#foo").on("keyup", function() {
    if (this.checkValidity()) {
        // The url is valid
    } else {
        // The url is invalid
    }
});

Upvotes: 1

Ilyich
Ilyich

Reputation: 5776

Use validator.js

ES6

import isURL from 'validator/lib/isURL'

isURL(string)

No ES6

var validator = require('validator');

validator.isURL(string)

You can also fine tune this function's behavior by passing optional options object as the second argument of isURL

Here is the default options object:

let options = {
    protocols: [
        'http',
        'https',
        'ftp'
    ],
    require_tld: true,
    require_protocol: false,
    require_host: true,
    require_valid_protocol: true,
    allow_underscores: false,
    host_whitelist: false,
    host_blacklist: false,
    allow_trailing_dot: false,
    allow_protocol_relative_urls: false,
    disallow_auth: false
}

isURL(string, options)

host_whitelist and host_blacklist can be arrays of hosts. They also support regular expressions.

let options = {
    host_blacklist: ['foo.com', 'bar.com'],
}

isURL('http://foobar.com', options) // => true
isURL('http://foo.bar.com/', options) // => true
isURL('http://qux.com', options) // => true

isURL('http://bar.com/', options) // => false
isURL('http://foo.com/', options) // => false


options = {
    host_blacklist: ['bar.com', 'foo.com', /\.foo\.com$/],
}

isURL('http://foobar.com', options) // => true
isURL('http://foo.bar.com/', options) // => true
isURL('http://qux.com', options) // => true

isURL('http://bar.com/', options) // => false
isURL('http://foo.com/', options) // => false
isURL('http://images.foo.com/', options) // => false
isURL('http://cdn.foo.com/', options) // => false
isURL('http://a.b.c.foo.com/', options) // => false

Upvotes: 24

Vikasdeep Singh
Vikasdeep Singh

Reputation: 21756

I am using below function to validate URL with or without http/https:

function isValidURL(string) {
  var res = string.match(/(http(s)?:\/\/.)?(www\.)?[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}\b([-a-zA-Z0-9@:%_\+.~#?&//=]*)/g);
  return (res !== null)
};

var testCase1 = "http://en.wikipedia.org/wiki/Procter_&_Gamble";
console.log(isValidURL(testCase1)); // return true

var testCase2 = "http://www.google.com/url?sa=i&rct=j&q=&esrc=s&source=images&cd=&docid=nIv5rk2GyP3hXM&tbnid=isiOkMe3nCtexM:&ved=0CAUQjRw&url=http%3A%2F%2Fanimalcrossing.wikia.com%2Fwiki%2FLion&ei=ygZXU_2fGKbMsQTf4YLgAQ&bvm=bv.65177938,d.aWc&psig=AFQjCNEpBfKnal9kU7Zu4n7RnEt2nerN4g&ust=1398298682009707";
console.log(isValidURL(testCase2)); // return true

var testCase3 = "https://sdfasd";
console.log(isValidURL(testCase3)); // return false

var testCase4 = "dfdsfdsfdfdsfsdfs";
console.log(isValidURL(testCase4)); // return false

var testCase5 = "magnet:?xt=urn:btih:123";
console.log(isValidURL(testCase5)); // return false

var testCase6 = "https://stackoverflow.com/";
console.log(isValidURL(testCase6)); // return true

var testCase7 = "https://w";
console.log(isValidURL(testCase7)); // return false

var testCase8 = "https://sdfasdp.ppppppppppp";
console.log(isValidURL(testCase8)); // return false

Upvotes: 108

HeshamSalama
HeshamSalama

Reputation: 158

this working with me

function isURL(str) {
  var regex = /(http|https):\/\/(\w+:{0,1}\w*)?(\S+)(:[0-9]+)?(\/|\/([\w#!:.?+=&%!\-\/]))?/;
  var pattern = new RegExp(regex); 
return pattern.test(str);
}

Upvotes: 3

Michael Ecklund
Michael Ecklund

Reputation: 1256

Here's just a very simple check to make sure there's a valid protocol, and the domain extension must be two or more characters.

is_valid_url = ( $url ) => {

    let $url_object = null;

    try {
        $url_object = new URL( $url );
    } catch ( $error ) {
        return false;
    }

    const $protocol = $url_object.protocol;
    const $protocol_position = $url.lastIndexOf( $protocol );
    const $domain_extension_position = $url.lastIndexOf( '.' );

    return (
        $protocol_position === 0 &&
        [ 'http:', 'https:' ].indexOf( $protocol ) !== - 1 &&
        $domain_extension_position > 2 && $url.length - $domain_extension_position > 2
    );

};

Upvotes: 0

rdans
rdans

Reputation: 2157

In my case my only requirement is that the user input won't be interpreted as a relative link when placed in the href of an a tag and the answers here were either a bit OTT for that or allowed URLs not meeting my requirements, so this is what I'm going with:

^https?://.+$

The same thing could be achieved pretty easily without regex.

Upvotes: 1

Caner
Caner

Reputation: 59158

This is quite difficult to do with pure regex because URLs have many 'inconveniences'.

  1. For example domain names have complicated restrictions on hyphens:

    a. It is allowed to have many consecutive hyphens in the middle.

    b. but the first character and last character of the domain name cannot be a hyphen

    c. The 3rd and 4th character cannot be both hyphen

  2. Similarly port number can only be in the range 1-65535. This is easy to check if you extract the port part and convert to int but quite difficult to check with a regular expression.

  3. There is also no easy way to check valid domain extensions. Some countries have second-level domains(such as 'co.uk'), or the extension can be a long word such as '.international'. And new TLDs are added regularly. This type of things can only be checked against a hard-coded list. (see https://en.wikipedia.org/wiki/Top-level_domain)

  4. Then there are magnet urls, ftp addresses etc. These all have different requirements.

Nevertheless, here is a function that handles pretty much everything except:

  • Case 1. c
  • Accepts any 1-5 digit port number
  • Accepts any extension 2-13 chars
  • Does not accept ftp, magnet, etc...

function isValidURL(input) {
    pattern = '^(https?:\\/\\/)?' + // protocol
        '((([a-zA-Z\\d]([a-zA-Z\\d-]{0,61}[a-zA-Z\\d])*\\.)+' + // sub-domain + domain name
        '[a-zA-Z]{2,13})' + // extension
        '|((\\d{1,3}\\.){3}\\d{1,3})' + // OR ip (v4) address
        '|localhost)' + // OR localhost
        '(\\:\\d{1,5})?' + // port
        '(\\/[a-zA-Z\\&\\d%_.~+-:@]*)*' + // path
        '(\\?[a-zA-Z\\&\\d%_.,~+-:@=;&]*)?' + // query string
        '(\\#[-a-zA-Z&\\d_]*)?$'; // fragment locator
    regex = new RegExp(pattern);
    return regex.test(input);
}

let tests = [];
tests.push(['', false]);
tests.push(['http://en.wikipedia.org/wiki/Procter_&_Gamble', true]);
tests.push(['https://sdfasd', false]);
tests.push(['http://www.google.com/url?sa=i&rct=j&q=&esrc=s&source=images&cd=&docid=nIv5rk2GyP3hXM&tbnid=isiOkMe3nCtexM:&ved=0CAUQjRw&url=http%3A%2F%2Fanimalcrossing.wikia.com%2Fwiki%2FLion&ei=ygZXU_2fGKbMsQTf4YLgAQ&bvm=bv.65177938,d.aWc&psig=AFQjCNEpBfKnal9kU7Zu4n7RnEt2nerN4g&ust=1398298682009707', true]);
tests.push(['https://stackoverflow.com/', true]);
tests.push(['https://w', false]);
tests.push(['aaa', false]);
tests.push(['aaaa', false]);
tests.push(['oh.my', true]);
tests.push(['dfdsfdsfdfdsfsdfs', false]);
tests.push(['google.co.uk', true]);
tests.push(['test-domain.MUSEUM', true]);
tests.push(['-hyphen-start.gov.tr', false]);
tests.push(['hyphen-end-.com', false]);
tests.push(['https://sdfasdp.international', true]);
tests.push(['https://sdfasdp.pppppppp', false]);
tests.push(['https://sdfasdp.ppppppppppppppppppp', false]);
tests.push(['https://sdfasd', false]);
tests.push(['https://sub1.1234.sub3.sub4.sub5.co.uk/?', true]);
tests.push(['http://www.google-com.123', false]);
tests.push(['http://my--testdomain.com', false]);
tests.push(['http://my2nd--testdomain.com', true]);
tests.push(['http://thingiverse.com/download:1894343', true]);
tests.push(['https://medium.com/@techytimo', true]);
tests.push(['http://localhost', true]);
tests.push(['localhost', true]);
tests.push(['localhost:8080', true]);
tests.push(['localhost:65536', true]);
tests.push(['localhost:80000', false]);
tests.push(['magnet:?xt=urn:btih:123', true]);

for (let i = 0; i < tests.length; i++) {
    console.log('Test #' + i + (isValidURL(tests[i][0]) == tests[i][1] ? ' passed' : ' failed') + ' on ["' + tests[i][0] + '", ' + tests[i][1] + ']');
}

Upvotes: 9

Timothy
Timothy

Reputation: 4285

Improvement on the accepted answer...

  • Check for ftp/ftps as protocol
  • Has double escaping for backslashes (\\)
  • Ensures that domains have a dot and an extension (.com .io .xyz)
  • Allows full colon (:) in the path e.g. http://thingiverse.com/download:1894343
  • Allows ampersand (&) in path e.g http://en.wikipedia.org/wiki/Procter_&_Gamble
  • Allows @ symbol in path e.g. https://medium.com/@techytimo

    isURL(str) {
      var pattern = new RegExp('^((ft|htt)ps?:\\/\\/)?'+ // protocol
      '((([a-z\\d]([a-z\\d-]*[a-z\\d])*)\\.)+[a-z]{2,}|'+ // domain name and extension
      '((\\d{1,3}\\.){3}\\d{1,3}))'+ // OR ip (v4) address
      '(\\:\\d+)?'+ // port
      '(\\/[-a-z\\d%@_.~+&:]*)*'+ // path
      '(\\?[;&a-z\\d%@_.,~+&:=-]*)?'+ // query string
      '(\\#[-a-z\\d_]*)?$','i'); // fragment locator
      return pattern.test(str);
    }
    

Upvotes: 27

Related Questions