Reputation: 434
I am reading a pipe delimited file, and loading to a table in a MySQL database. I eventually need to have this script run off an application server in Oregon, and load to the table in North Carolina. So yes, I realize there is going to be a sizeable network latency involved. Also, to address this to some extent, I made sure I was performing array inserts, and not row-by-row.
However, I am confused as to why I am seeing a dramatic difference in performance when I do this through Java, and MySQL JDBC versus, using Perl/DBI. When I run my inserts in batch sizes of 1000 rows at a time, Perl finishes in 8 seconds, while the java application takes 137 seconds (2 mins and 17 secs)
I didn't expect identical performance, but not a dramatic difference as this. Thoughts?
I am only a Java beginner, so its quite possible, I am leaving out something obvious! hence wanted to reach out.
Here are the relevant environments: Java: 1.6.0_31 MySQL JDBC: 5.1.31 perl: 5.10.1
Also, what else can I do to make inserts over WAN faster? Does MySQL support compressed data ingestion? I have some massive compute capacity on my application cluster.
public class testMySql_Load {
public static void main(String[] args) throws IOException, ClassNotFoundException, SQLException, ParseException{
int i = 0;
FileInputStream inputStream = new FileInputStream("pipe_delimited_text_file");
Scanner sc = null;
Connection connect = null;
try {
// this will load the MySQL driver, each DB has its own driver
Class.forName("com.mysql.jdbc.Driver");
String connectionString = "jdbc:mysql://host:port/database?user=user&password=password";
System.out.println(connectionString);
// setup the connection with the DB.
connect = DriverManager.getConnection(connectionString);
connect.setAutoCommit(false);
System.out.println("Connected");
PreparedStatement truncateStatement = connect.prepareStatement("Truncate table target_table");
truncateStatement.executeUpdate();
// preparedStatements can use variables and are more efficient
PreparedStatement preparedStatement = connect.prepareStatement(
"INSERT INTO target_table( target_table.ds, ...... ) VALUES ( ?, .... )");
sc = new Scanner(inputStream, "UTF-8");
while (sc.hasNextLine()) {
i++;
if(i>1000){
break;
}
String[] line = sc.nextLine().split("\\|");
//System.out.println(line);
if(line[0].length() == 0) { preparedStatement.setNull(1, java.sql.Types.DATE); } else {
preparedStatement.setDate(1, new java.sql.Date(new SimpleDateFormat("M/d/y", Locale.ENGLISH).parse(line[0].substring(0,Math.max(0, Math.min(line[0].length()-1, 19)))).getTime()));
}
preparedStatement.setInt(2, Integer.valueOf(line[1]));
preparedStatement.setString(3, line[2]);
preparedStatement.setString(4, line[3]);
preparedStatement.setString(5, line[4]);
preparedStatement.setDouble(6, Double.valueOf(line[5]));
preparedStatement.setDouble(7, Double.valueOf(line[6]));
preparedStatement.setString(8, line[7]);
preparedStatement.setString(9, line[8]);
if(line[9].length() == 0) { preparedStatement.setNull(10, java.sql.Types.DATE); } else {
preparedStatement.setTimestamp(10, new java.sql.Timestamp(new SimpleDateFormat("M/d/y H:m:s", Locale.ENGLISH).parse(line[9].substring(0,Math.max(0, Math.min(line[9].length()-1, 19)))).getTime()));
}
preparedStatement.setString(11, line[10]);
preparedStatement.setString(12, line[11]);
preparedStatement.setString(13, line[12]);
preparedStatement.setString(14, line[13]);
preparedStatement.setString(15, line[14]);
preparedStatement.setString(16, line[15]);
preparedStatement.setString(17, line[16]);
preparedStatement.setString(18, line[17]);
if(line[18].length() == 0) { preparedStatement.setNull(19, java.sql.Types.DATE); } else {
preparedStatement.setTimestamp(19, new java.sql.Timestamp(new SimpleDateFormat("M/d/y H:m:s", Locale.ENGLISH).parse(line[18].substring(0,Math.max(0, Math.min(line[18].length()-1, 19)))).getTime()));
}
preparedStatement.setString(20, line[19]);
preparedStatement.setString(21, line[20]);
preparedStatement.setString(22, line[21]);
preparedStatement.setString(23, line[22]);
preparedStatement.setString(24, line[23]);
if(line[24].length() == 0) { preparedStatement.setNull(25, java.sql.Types.DATE); } else {
preparedStatement.setTimestamp(25, new java.sql.Timestamp(new SimpleDateFormat("M/d/y H:m:s", Locale.ENGLISH).parse(line[24].substring(0,Math.max(0, Math.min(line[24].length()-1, 19)))).getTime()));
}
if(line[25].length() == 0) { preparedStatement.setNull(26, java.sql.Types.DATE); } else {
preparedStatement.setTimestamp(26, new java.sql.Timestamp(new SimpleDateFormat("M/d/y H:m:s", Locale.ENGLISH).parse(line[25].substring(0,Math.max(0, Math.min(line[25].length()-1, 19)))).getTime()));
}
preparedStatement.setString(27, line[26]);
preparedStatement.setString(28, line[27]);
preparedStatement.setString(29, line[28]);
preparedStatement.setString(30, line[29]);
preparedStatement.setString(31, line[30]);
preparedStatement.setString(32, line[31]);
if(line[32].length() == 0) { preparedStatement.setNull(33, java.sql.Types.DATE); } else {
preparedStatement.setTimestamp(33, new java.sql.Timestamp(new SimpleDateFormat("M/d/y H:m:s", Locale.ENGLISH).parse(line[32].substring(0,Math.max(0, Math.min(line[32].length()-1, 19)))).getTime()));
}
preparedStatement.setString(34, line[33]);
preparedStatement.setString(35, line[34]);
preparedStatement.setString(36, line[35]);
if(line[36].length() == 0) { preparedStatement.setNull(37, java.sql.Types.DATE); } else {
preparedStatement.setTimestamp(37, new java.sql.Timestamp(new SimpleDateFormat("M/d/y H:m:s", Locale.ENGLISH).parse(line[36].substring(0,Math.max(0, Math.min(line[36].length()-1, 19)))).getTime()));
}
preparedStatement.setString(38, line[37]);
preparedStatement.setString(39, line[38]);
if(line[39].length() == 0) { preparedStatement.setNull(40, java.sql.Types.DATE); } else {
preparedStatement.setTimestamp(40, new java.sql.Timestamp(new SimpleDateFormat("M/d/y H:m:s", Locale.ENGLISH).parse(line[39].substring(0,Math.max(0, Math.min(line[39].length()-1, 19)))).getTime()));
}
if(line[40].length() == 0) { preparedStatement.setNull(41, java.sql.Types.DATE); } else {
preparedStatement.setTimestamp(41, new java.sql.Timestamp(new SimpleDateFormat("M/d/y H:m:s", Locale.ENGLISH).parse(line[40].substring(0,Math.max(0, Math.min(line[40].length()-1, 19)))).getTime()));
}
preparedStatement.setString(42, line[41]);
preparedStatement.setString(43, line[42]);
preparedStatement.setString(44, line[43]);
if(line[44].length() == 0) { preparedStatement.setNull(45, java.sql.Types.DATE); } else {
preparedStatement.setDate(45, new java.sql.Date(new SimpleDateFormat("M/d/y", Locale.ENGLISH).parse(line[44].substring(0,Math.max(0, Math.min(line[44].length()-1, 19)))).getTime()));
}
preparedStatement.setDouble(46, Double.valueOf(line[45]));
preparedStatement.setString(47, line[46]);
preparedStatement.setString(48, line[47]);
preparedStatement.setString(49, line[48]);
preparedStatement.setString(50, line[49]);
preparedStatement.setString(51, line[50]);
preparedStatement.setString(52, line[51]);
preparedStatement.setString(53, line[52]);
preparedStatement.setString(54, line[53]);
preparedStatement.setString(55, line[54]);
preparedStatement.setString(56, line[55]);
preparedStatement.setString(57, line[56]);
preparedStatement.setInt(58, Integer.valueOf(line[57]));
if(line.length == 59){ if( line[58].length() == 0) { preparedStatement.setNull(59, java.sql.Types.DATE); } else {
preparedStatement.setTimestamp(59, new java.sql.Timestamp(new SimpleDateFormat("M/d/y H:m:s", Locale.ENGLISH).parse(line[58].substring(0,Math.max(0, Math.min(line[58].length()-1, 19)))).getTime()));
}} else {preparedStatement.setNull(59, java.sql.Types.DATE);}
preparedStatement.addBatch();
}
preparedStatement.executeBatch();
connect.commit();
preparedStatement.close();
// note that Scanner suppresses exceptions
if (sc.ioException() != null) {
throw sc.ioException();
}
} finally {
if (inputStream != null) {
inputStream.close();
}
if (sc != null) {
sc.close();
}
if (connect != null) {
connect.close();
}
}
}
}
Upvotes: 2
Views: 614
Reputation: 123809
When performing JDBC batch inserts into MySQL over a network connection we can improve performance by adding rewriteBatchedStatements=true
to the JDBC connection URL. Without that directive, .executeBatch()
will cause MySQL Connector/J to send individual INSERT statements like
INSERT INTO tablename (field1,field2) VALUES (value1,value2)
INSERT INTO tablename (field1,field2) VALUES (value3,value4)
With rewriteBatchedStatements=true
, MySQL Connector/J will send multi-row inserts like
INSERT INTO tablename (field1, field2) VALUES (value1,value2),(value3,value4)
That can save quite a bit of bandwidth is the field list is long.
(For more details, see the related question here.)
Upvotes: 1