Jonathan Laliberte
Jonathan Laliberte

Reputation: 179

Using regex to extract links only in loop

import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.regex.Pattern;
import java.util.regex.Matcher;

public class google {

    public static void main(String[] args) throws Exception 

    {
        StringBuilder a = new StringBuilder();
        String regex = "(https?):\\/\\/(www\\.)?[a-z0-9\\.:].*?(?=\\s)";
        Pattern r = Pattern.compile(regex);
        String key="myapikey";
        String qry="tree";  

        URL url = new URL("https://www.googleapis.com/customsearch/v1?key="+key+ "&cx=**************&q="+ qry +"&alt=json");
        HttpURLConnection conn = (HttpURLConnection) url.openConnection();
        conn.setRequestMethod("GET");
        conn.setRequestProperty("Accept", "application/json");
        BufferedReader br = new BufferedReader(new InputStreamReader(
                (conn.getInputStream())));

        String output;
        System.out.println(url);
        System.out.println("Output from Server .... \n");
        StringBuffer sb = new StringBuffer();

        while ((output = br.readLine()) != null) {

            if(output.contains("jpg")){ 
            //  Matcher m = r.matcher(output);

                a.append(output + "\n");

            }     
        }

        System.out.println(a);       //Will print the google search links
        conn.disconnect();                              
    }

}

That program returns the following:

"url": "https://i.ytimg.com/vi/XGM6sHIJuho/hqdefault1.jpg"

"og:image": "https://i.ytimg.com/vi/XGM6sHIJuho/hqdefault2.jpg",

"twitter:image": "https://i.ytimg.com/vi/XGM6sHIJuho/hqdefault3.jpg",

"thumbnailurl": "https://i.ytimg.com/vi/XGM6sHIJuho/hqdefault4.jpg",

"src": "https://i.ytimg.com/vi/XGM6sHIJuho/hqdefault5.jpg"

"url": "https://i.ytimg.com/vi/Iv9E9xLFUso/maxresdefault6.jpg",

"og:image": "https://i.ytimg.com/vi/Iv9E9xLFUso/maxresdefault7.jpg",

"thumbnailurl": "https://i.ytimg.com/vi/Iv9E9xLFUso/maxresdefault8.jpg",

"src": "https://i.ytimg.com/vi/Iv9E9xLFUso/maxresdefault9.jpg"

But need it to only return this:

https://i.ytimg.com/vi/XGM6sHIJuho/hqdefault1.jpg https://i.ytimg.com/vi/XGM6sHIJuho/hqdefault2.jpg https://i.ytimg.com/vi/XGM6sHIJuho/hqdefault3.jpg

etc...

The regex that can match for only the link is this:

String regex = "(https?):\/\/(www\.)?[a-z0-9\.:].*?(?=\s)";

But having trouble implementing it in this program. Any ideas?

Thanks for your time

Upvotes: 1

Views: 127

Answers (1)

Jonathan Laliberte
Jonathan Laliberte

Reputation: 179

Found a solution in the end. Using a different expression. Thanks for the suggestions!

import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.regex.Pattern;
import java.util.regex.Matcher;


public class google {

    public static void main(String[] args) throws Exception 

    {


        StringBuilder results = new StringBuilder();


        String key="myprivatekey";
        String qry="tree";

        URL url = new URL(
                "https://www.googleapis.com/customsearch/v1?key="+key+ "&cx=myprivatekey&q="+ qry +"&alt=json");
        HttpURLConnection conn = (HttpURLConnection) url.openConnection();
        conn.setRequestMethod("GET");
        conn.setRequestProperty("Accept", "application/json");
        BufferedReader br = new BufferedReader(new InputStreamReader(
                (conn.getInputStream())));

        String output;
        System.out.println(url);
        System.out.println("Output from Server .... \n");




        while ((output = br.readLine()) != null) {

            Pattern pattern = Pattern.compile("(?:(?:https?)+\\:\\/\\/+[a-zA-Z0-9\\/\\._-]{1,})+(?:(?:jpe?g|png|gif))");
            Matcher matcher = pattern.matcher(output);

            if(matcher.find()){
                  results.append(matcher.group() + "\n");
            }







        }


        System.out.println(results);
        conn.disconnect();                              
    }

}

Upvotes: 1

Related Questions