ZedBrannigan
ZedBrannigan

Reputation: 611

My first automated SQL insert: I feel im losing a lot of performance here

I am crawling Web Data, parsing it, and write it to a Database. Here is the "insert" part:

public void insert(String comp, String title, String date, String location, String keyword){
     String query = "INSERT INTO "+ dbtablename +" "
            + "(company_name,job_title,date_created,location, platform, keyword) VALUES "
            + "(\""+comp+"\",\""+title+"\",\""+date+"\",\""+location+"\",\""+ platform +"\",\""+keyword  + "\");";
     OpenConnectionDB();
     try {
        this.statement = this.connection.createStatement();
            this.statement.execute(query);
            statement.close();
    } catch (SQLException ex) {
        Logger.getLogger(Database.class.getName()).log(Level.SEVERE, null, ex);
    }
     finally {closeConnectionDB();}
    closeConnectionDB();
}

The connection is created like this:

public void getData(Database c) throws IOException 
        {


//          try {

           CSVReader reader = new CSVReader(new FileReader(csvFilename), ';');
           String[] row = null;

           while((row = reader.readNext()) != null) {
               for (int i=0; i<  row.length; i=i+2 )
               {
                   System.out.println(row[i].trim());
                   System.out.println( "C:/Talend/workspace/WEBCRAWLER/output/keywords_"+row[i].trim()+".txt");

                   Document document = Jsoup.parse(new File("C:/Talend/workspace/WEBCRAWLER/output/keywords_"+row[i].trim()+".txt"), "utf-8");
                    Elements elements = document.select(".joblisting");
                    for (Element element : elements) 
                    {           
                        // Parse Data into Elements
                        Elements jobTitleElement = element.select(".job_title span");
                        Elements companyNameElement = element.select(".company_name span[itemprop=name]");
                        Elements locationElement = element.select(".locality span[itemprop=addressLocality]");
                        Elements dateElement = element.select(".job_date_added [datetime]");

                        // Strip Data from unnecessary tags
                        String companyName = companyNameElement.text();
                        String jobTitle = jobTitleElement.text();
                        String location = locationElement.text();
                        String timeAdded = dateElement.attr("datetime");

                        String cleanJobTitle = jobTitle.replaceAll("\"", "");

                        c.insert(companyName, cleanJobTitle, timeAdded, location, row[i].trim());
                        c.closeConnectionDB();

                            } 


                       }
                       // return row[rowCount];
                        }
                   //...
                   reader.close();


        }

It works just fine, but at some point it provokes this error:

Jul 29, 2014 1:26:03 PM org.jsoup.examples.Database OpenConnectionDB
SEVERE: null
com.mysql.jdbc.exceptions.jdbc4.CommunicationsException: Communications link failure

The last packet sent successfully to the server was 0 milliseconds ago. The driver has not received any packets from the server.
    at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
    at sun.reflect.NativeConstructorAccessorImpl.newInstance(Unknown Source)
    at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(Unknown Source)
    at java.lang.reflect.Constructor.newInstance(Unknown Source)
    at com.mysql.jdbc.Util.handleNewInstance(Util.java:409)
    at com.mysql.jdbc.SQLError.createCommunicationsException(SQLError.java:1127)
    at com.mysql.jdbc.MysqlIO.<init>(MysqlIO.java:356)
    at com.mysql.jdbc.ConnectionImpl.coreConnect(ConnectionImpl.java:2502)
    at com.mysql.jdbc.ConnectionImpl.connectOneTryOnly(ConnectionImpl.java:2539)
    at com.mysql.jdbc.ConnectionImpl.createNewIO(ConnectionImpl.java:2321)
    at com.mysql.jdbc.ConnectionImpl.<init>(ConnectionImpl.java:832)
    at com.mysql.jdbc.JDBC4Connection.<init>(JDBC4Connection.java:46)
    at sun.reflect.GeneratedConstructorAccessor5.newInstance(Unknown Source)
    at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(Unknown Source)
    at java.lang.reflect.Constructor.newInstance(Unknown Source)
    at com.mysql.jdbc.Util.handleNewInstance(Util.java:409)
    at com.mysql.jdbc.ConnectionImpl.getInstance(ConnectionImpl.java:417)
    at com.mysql.jdbc.NonRegisteringDriver.connect(NonRegisteringDriver.java:344)
    at java.sql.DriverManager.getConnection(Unknown Source)
    at java.sql.DriverManager.getConnection(Unknown Source)
    at org.jsoup.examples.Database.OpenConnectionDB(Database.java:35)
    at org.jsoup.examples.Database.insert(Database.java:70)
    at org.jsoup.examples.parseEasy.getData(parseEasy.java:65)
    at startWorkflow.main(startWorkflow.java:27)
Caused by: java.net.SocketException: Permission denied: connect
    at java.net.DualStackPlainSocketImpl.connect0(Native Method)
    at java.net.DualStackPlainSocketImpl.socketConnect(Unknown Source)
    at java.net.AbstractPlainSocketImpl.doConnect(Unknown Source)
    at java.net.AbstractPlainSocketImpl.connectToAddress(Unknown Source)
    at java.net.AbstractPlainSocketImpl.connect(Unknown Source)
    at java.net.PlainSocketImpl.connect(Unknown Source)
    at java.net.SocksSocketImpl.connect(Unknown Source)
    at java.net.Socket.connect(Unknown Source)
    at java.net.Socket.connect(Unknown Source)
    at java.net.Socket.<init>(Unknown Source)
    at java.net.Socket.<init>(Unknown Source)
    at com.mysql.jdbc.StandardSocketFactory.connect(StandardSocketFactory.java:258)
    at com.mysql.jdbc.MysqlIO.<init>(MysqlIO.java:306)
    ... 17 more

And it feels like im doing an unnecessary open() or close() of the DB. Do you see any of these in my massive Wall of Code? (sorry). And can you help me resolve that error?

Thank you so much community so far!

Upvotes: 0

Views: 120

Answers (1)

Baldy
Baldy

Reputation: 2002

Here's a quick re-factoring that reuses the connection and uses prepared statements. I'm not sure what database driver you're using so the open/close and prepared statement stuff should get you into the ballpark but is likely syntactically incorrect.

private static final String KEYWORD_INSERT = "INSERT INTO " + dbtablename
        + "(company_name, job_title, date_created, location, platform, keyword)"
        + "VALUES(?, ?, ?, ?, ?, ?)";

public void getData(Database c) throws IOException {
    try {
        Connection connection = c.getDbConnection();
        PreparedStatement stmt = connection.prepareStatement(KEYWORD_INSERT);

        CSVReader reader = new CSVReader(new FileReader(csvFilename), ';');
        String[] row;

        while((row = reader.readNext()) != null) {
            for (int i=0; i<  row.length; i=i+2 ) {

                Document document = Jsoup.parse(new File("C:/Talend/workspace/WEBCRAWLER/output/keywords_"+row[i].trim()+".txt"), "utf-8");
                Elements elements = document.select(".joblisting");
                for (Element element : elements) {           
                    // Parse Data into Elements
                    Elements jobTitleElement = element.select(".job_title span");
                    Elements companyNameElement = element.select(".company_name span[itemprop=name]");
                    Elements locationElement = element.select(".locality span[itemprop=addressLocality]");
                    Elements dateElement = element.select(".job_date_added [datetime]");

                    // Strip Data from unnecessary tags
                    String companyName = companyNameElement.text();
                    String jobTitle = jobTitleElement.text().replaceAll("\"", "");
                    String location = locationElement.text();
                    String timeAdded = dateElement.attr("datetime");

                    stmt.setString(1, companyName);
                    stmt.setString(2, jobTitle);
                    stmt.setString(3, timeAdded);
                    stmt.setString(4, location);
                    stmt.setString(5, platform);
                    stmt.setString(6, row[i].trim());

                    stmt.executeUpdate();
                }
            }
        }
    } finally {
        stmt.close();
        connection.close();
    }
}

Upvotes: 1

Related Questions