Reputation: 611
I am crawling Web Data, parsing it, and write it to a Database. Here is the "insert" part:
public void insert(String comp, String title, String date, String location, String keyword){
String query = "INSERT INTO "+ dbtablename +" "
+ "(company_name,job_title,date_created,location, platform, keyword) VALUES "
+ "(\""+comp+"\",\""+title+"\",\""+date+"\",\""+location+"\",\""+ platform +"\",\""+keyword + "\");";
OpenConnectionDB();
try {
this.statement = this.connection.createStatement();
this.statement.execute(query);
statement.close();
} catch (SQLException ex) {
Logger.getLogger(Database.class.getName()).log(Level.SEVERE, null, ex);
}
finally {closeConnectionDB();}
closeConnectionDB();
}
The connection is created like this:
public void getData(Database c) throws IOException
{
// try {
CSVReader reader = new CSVReader(new FileReader(csvFilename), ';');
String[] row = null;
while((row = reader.readNext()) != null) {
for (int i=0; i< row.length; i=i+2 )
{
System.out.println(row[i].trim());
System.out.println( "C:/Talend/workspace/WEBCRAWLER/output/keywords_"+row[i].trim()+".txt");
Document document = Jsoup.parse(new File("C:/Talend/workspace/WEBCRAWLER/output/keywords_"+row[i].trim()+".txt"), "utf-8");
Elements elements = document.select(".joblisting");
for (Element element : elements)
{
// Parse Data into Elements
Elements jobTitleElement = element.select(".job_title span");
Elements companyNameElement = element.select(".company_name span[itemprop=name]");
Elements locationElement = element.select(".locality span[itemprop=addressLocality]");
Elements dateElement = element.select(".job_date_added [datetime]");
// Strip Data from unnecessary tags
String companyName = companyNameElement.text();
String jobTitle = jobTitleElement.text();
String location = locationElement.text();
String timeAdded = dateElement.attr("datetime");
String cleanJobTitle = jobTitle.replaceAll("\"", "");
c.insert(companyName, cleanJobTitle, timeAdded, location, row[i].trim());
c.closeConnectionDB();
}
}
// return row[rowCount];
}
//...
reader.close();
}
It works just fine, but at some point it provokes this error:
Jul 29, 2014 1:26:03 PM org.jsoup.examples.Database OpenConnectionDB
SEVERE: null
com.mysql.jdbc.exceptions.jdbc4.CommunicationsException: Communications link failure
The last packet sent successfully to the server was 0 milliseconds ago. The driver has not received any packets from the server.
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(Unknown Source)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(Unknown Source)
at java.lang.reflect.Constructor.newInstance(Unknown Source)
at com.mysql.jdbc.Util.handleNewInstance(Util.java:409)
at com.mysql.jdbc.SQLError.createCommunicationsException(SQLError.java:1127)
at com.mysql.jdbc.MysqlIO.<init>(MysqlIO.java:356)
at com.mysql.jdbc.ConnectionImpl.coreConnect(ConnectionImpl.java:2502)
at com.mysql.jdbc.ConnectionImpl.connectOneTryOnly(ConnectionImpl.java:2539)
at com.mysql.jdbc.ConnectionImpl.createNewIO(ConnectionImpl.java:2321)
at com.mysql.jdbc.ConnectionImpl.<init>(ConnectionImpl.java:832)
at com.mysql.jdbc.JDBC4Connection.<init>(JDBC4Connection.java:46)
at sun.reflect.GeneratedConstructorAccessor5.newInstance(Unknown Source)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(Unknown Source)
at java.lang.reflect.Constructor.newInstance(Unknown Source)
at com.mysql.jdbc.Util.handleNewInstance(Util.java:409)
at com.mysql.jdbc.ConnectionImpl.getInstance(ConnectionImpl.java:417)
at com.mysql.jdbc.NonRegisteringDriver.connect(NonRegisteringDriver.java:344)
at java.sql.DriverManager.getConnection(Unknown Source)
at java.sql.DriverManager.getConnection(Unknown Source)
at org.jsoup.examples.Database.OpenConnectionDB(Database.java:35)
at org.jsoup.examples.Database.insert(Database.java:70)
at org.jsoup.examples.parseEasy.getData(parseEasy.java:65)
at startWorkflow.main(startWorkflow.java:27)
Caused by: java.net.SocketException: Permission denied: connect
at java.net.DualStackPlainSocketImpl.connect0(Native Method)
at java.net.DualStackPlainSocketImpl.socketConnect(Unknown Source)
at java.net.AbstractPlainSocketImpl.doConnect(Unknown Source)
at java.net.AbstractPlainSocketImpl.connectToAddress(Unknown Source)
at java.net.AbstractPlainSocketImpl.connect(Unknown Source)
at java.net.PlainSocketImpl.connect(Unknown Source)
at java.net.SocksSocketImpl.connect(Unknown Source)
at java.net.Socket.connect(Unknown Source)
at java.net.Socket.connect(Unknown Source)
at java.net.Socket.<init>(Unknown Source)
at java.net.Socket.<init>(Unknown Source)
at com.mysql.jdbc.StandardSocketFactory.connect(StandardSocketFactory.java:258)
at com.mysql.jdbc.MysqlIO.<init>(MysqlIO.java:306)
... 17 more
And it feels like im doing an unnecessary open() or close() of the DB. Do you see any of these in my massive Wall of Code? (sorry). And can you help me resolve that error?
Thank you so much community so far!
Upvotes: 0
Views: 120
Reputation: 2002
Here's a quick re-factoring that reuses the connection and uses prepared statements. I'm not sure what database driver you're using so the open/close and prepared statement stuff should get you into the ballpark but is likely syntactically incorrect.
private static final String KEYWORD_INSERT = "INSERT INTO " + dbtablename
+ "(company_name, job_title, date_created, location, platform, keyword)"
+ "VALUES(?, ?, ?, ?, ?, ?)";
public void getData(Database c) throws IOException {
try {
Connection connection = c.getDbConnection();
PreparedStatement stmt = connection.prepareStatement(KEYWORD_INSERT);
CSVReader reader = new CSVReader(new FileReader(csvFilename), ';');
String[] row;
while((row = reader.readNext()) != null) {
for (int i=0; i< row.length; i=i+2 ) {
Document document = Jsoup.parse(new File("C:/Talend/workspace/WEBCRAWLER/output/keywords_"+row[i].trim()+".txt"), "utf-8");
Elements elements = document.select(".joblisting");
for (Element element : elements) {
// Parse Data into Elements
Elements jobTitleElement = element.select(".job_title span");
Elements companyNameElement = element.select(".company_name span[itemprop=name]");
Elements locationElement = element.select(".locality span[itemprop=addressLocality]");
Elements dateElement = element.select(".job_date_added [datetime]");
// Strip Data from unnecessary tags
String companyName = companyNameElement.text();
String jobTitle = jobTitleElement.text().replaceAll("\"", "");
String location = locationElement.text();
String timeAdded = dateElement.attr("datetime");
stmt.setString(1, companyName);
stmt.setString(2, jobTitle);
stmt.setString(3, timeAdded);
stmt.setString(4, location);
stmt.setString(5, platform);
stmt.setString(6, row[i].trim());
stmt.executeUpdate();
}
}
}
} finally {
stmt.close();
connection.close();
}
}
Upvotes: 1