Java website parser

Question

I'm trying to parse the following line from a site:

£2,995

I only want the 2995 part of it, but I'm having difficulty in doing so. Here is my code; it's currently able to parse all lines containing £ sign and display all currency in website. Please help!

public class parser {

    private static String string1 = "£";
    private String testURL = "http://www.autotrader.co.uk/search/used/cars/bmw/1_series/postcode/tn126bg/radius/1500/onesearchad/used%2Cnearlynew%2Cnew/quicksearch/true/page/2";
    private ArrayList list = new ArrayList();
    private ArrayList prices = new ArrayList();
    private int averagePrice;
    private int start;
    private int finish;

    public parser() throws IOException {

        URL url = new URL(testURL);
        Scanner scan = new Scanner(url.openStream());
        boolean alreadyHit = false;

        while (scan.hasNext()) {

            String line = scan.nextLine();

            if (line.contains(string1)) {

                list.add(line);

                start = line.indexOf("£");
                line = line.substring(start);
                for (int i = 0; i < line.length(); i++) {

                    if (((line.charAt((i)) == ' ') || ((line.charAt((i)) == '<'))) && (alreadyHit == false)) {
                        finish = i;
                        alreadyHit = true;
                    }
                }
                alreadyHit = false;

                line = line.substring(0, finish);
                line = line.trim();
                line = line.replace("£", "");
                line = line.replace(",", "");

                try {

                    int price = Integer.parseInt(line);
                    prices.add(price);
                } catch (Exception e) {

                }
            }
        }
    }

    public static void main(String args[]) throws IOException {

        parser p = new parser();

        for (Integer x : p.prices) {

            System.out.println(x);
        }
    }
}

Java website parser

Answers (1)

Related Questions