Jason Kim
Jason Kim

Reputation: 19031

Return HTML content as a string, given URL. Javascript Function

I want to write a javascript function that returns HTML content as string given URL to the function. I found a similar answer on Stackoverflow.

I am trying to use this answer to solve my problem.

However, it seems as though document.write() isn't writing anything. When I load the page, I get a a blank screen.

<html>
<head>
</head>
<body>  
  <script type="text/JavaScript">
  function httpGet(theUrl)
  {
    var xmlHttp = null;

    xmlHttp = new XMLHttpRequest();
    xmlHttp.open( "GET", theUrl, false );
    xmlHttp.send( null );
    return xmlHttp.responseText;
  }
  document.write(httpGet("https://stackoverflow.com/"));
  </script>
</body>
</html>

Upvotes: 56

Views: 303615

Answers (6)

T.J. Crowder
T.J. Crowder

Reputation: 1074355

You can't do this in the general case

...because of the Same Origin Policy that browsers use to restrict access to Site B from code running on Site A. When you try (with XMLHttpRequest or fetch etc.), you'll get an error saying something along the lines of:

No 'Access-Control-Allow-Origin' header is present on the requested resource

More in this question's answers and the SOP link above. But basically: It's not about preventing people seeing Site B's content, it's about not being able to do so from a context (a browser) that may have stored authentication information for the current user that would reveal their private information if you could read Site B's content from that user's browser.

A very small number of websites may serve their content using Cross Origin Resource Sharing headers (the aforementioned Access-Control-Allow-Origin and others) to allow any site to read their content, but that's very unusual.

If you want to retrieve the content of the vast majority of sites, you'll have to use code running in a non-browser context to do it (for instance, code running on a server somewhere). Over the years there have been many sites that allowed you to query them with a URL and return you the content of that URL queried by their server (and so not subject to the SOP, because the current user's browser-based authentication information for the other site isn't used), but they tend to spring up and then go away again as there's not really a good revenue model to support the bandwidth requirements. After all, why pay someone else for that when you can just run your own server for cheap (or even free) and do it yourself.

Upvotes: 2

Jongwoo Lee
Jongwoo Lee

Reputation: 1128

In some websites, XMLHttpRequest may send you something like <script src="#"></srcipt>. In that case, try using a HTML document like the script under:

<html>
  <body>
    <iframe src="website.com"></iframe>
    <script src="your_JS"></script>
  </body>
</html>

Now you can use JS to get some text out of the HTML, such as getElementbyId.

But this may not work for some websites with cross-domain blocking.

Upvotes: 0

RedDragonWebDesign
RedDragonWebDesign

Reputation: 2571

Here's a version of the accepted answer that 1) returns a value from the function (bugfix), and 2) doesn't break when using "use strict";

I use this code to pre-load a .txt file into my <textarea> when the user loads the page.

function httpGet(theUrl)
{
    let xmlhttp;
    
    if (window.XMLHttpRequest) { // code for IE7+, Firefox, Chrome, Opera, Safari
        xmlhttp=new XMLHttpRequest();
    } else { // code for IE6, IE5
        xmlhttp=new ActiveXObject("Microsoft.XMLHTTP");
    }
    
    xmlhttp.onreadystatechange=function() {
        if (xmlhttp.readyState==4 && xmlhttp.status==200) {
            return xmlhttp.responseText;
        }
    }
    xmlhttp.open("GET", theUrl, false);
    xmlhttp.send();
    
    return xmlhttp.response;
}

Upvotes: 1

otaxige_aol
otaxige_aol

Reputation: 341

The only one i have found for Cross-site, is this function:

<script type="text/javascript">
var your_url = 'http://www.example.com';

</script>

<script type="text/javascript" src="jquery.min.js" ></script>
<script type="text/javascript">
// jquery.xdomainajax.js  ------ from padolsey

jQuery.ajax = (function(_ajax){

    var protocol = location.protocol,
        hostname = location.hostname,
        exRegex = RegExp(protocol + '//' + hostname),
        YQL = 'http' + (/^https/.test(protocol)?'s':'') + '://query.yahooapis.com/v1/public/yql?callback=?',
        query = 'select * from html where url="{URL}" and xpath="*"';

    function isExternal(url) {
        return !exRegex.test(url) && /:\/\//.test(url);
    }

    return function(o) {

        var url = o.url;

        if ( /get/i.test(o.type) && !/json/i.test(o.dataType) && isExternal(url) ) {

            // Manipulate options so that JSONP-x request is made to YQL

            o.url = YQL;
            o.dataType = 'json';

            o.data = {
                q: query.replace(
                    '{URL}',
                    url + (o.data ?
                        (/\?/.test(url) ? '&' : '?') + jQuery.param(o.data)
                    : '')
                ),
                format: 'xml'
            };

            // Since it's a JSONP request
            // complete === success
            if (!o.success && o.complete) {
                o.success = o.complete;
                delete o.complete;
            }

            o.success = (function(_success){
                return function(data) {

                    if (_success) {
                        // Fake XHR callback.
                        _success.call(this, {
                            responseText: data.results[0]
                                // YQL screws with <script>s
                                // Get rid of them
                                .replace(/<script[^>]+?\/>|<script(.|\s)*?\/script>/gi, '')
                        }, 'success');
                    }

                };
            })(o.success);

        }

        return _ajax.apply(this, arguments);

    };

})(jQuery.ajax);



$.ajax({
    url: your_url,
    type: 'GET',
    success: function(res) {
        var text = res.responseText;
        // then you can manipulate your text as you wish
        alert(text);
    }
});

</script>

Upvotes: 17

Pranay Rana
Pranay Rana

Reputation: 176896

after you get the response just do call this function to append data to your body element

function createDiv(responsetext)
{
    var _body = document.getElementsByTagName('body')[0];
    var _div = document.createElement('div');
    _div.innerHTML = responsetext;
    _body.appendChild(_div);
}

@satya code modified as below

function httpGet(theUrl)
{
    if (window.XMLHttpRequest)
    {// code for IE7+, Firefox, Chrome, Opera, Safari
        xmlhttp=new XMLHttpRequest();
    }
    else
    {// code for IE6, IE5
        xmlhttp=new ActiveXObject("Microsoft.XMLHTTP");
    }
    xmlhttp.onreadystatechange=function()
    {
        if (xmlhttp.readyState==4 && xmlhttp.status==200)
        {
            createDiv(xmlhttp.responseText);
        }
    }
    xmlhttp.open("GET", theUrl, false);
    xmlhttp.send();    
}

Upvotes: 8

Satya
Satya

Reputation: 8881

you need to return when the readystate==4 e.g.

function httpGet(theUrl)
{
    if (window.XMLHttpRequest)
    {// code for IE7+, Firefox, Chrome, Opera, Safari
        xmlhttp=new XMLHttpRequest();
    }
    else
    {// code for IE6, IE5
        xmlhttp=new ActiveXObject("Microsoft.XMLHTTP");
    }
    xmlhttp.onreadystatechange=function()
    {
        if (xmlhttp.readyState==4 && xmlhttp.status==200)
        {
            return xmlhttp.responseText;
        }
    }
    xmlhttp.open("GET", theUrl, false );
    xmlhttp.send();    
}

Upvotes: 47

Related Questions