Alex Tonkonozhenko
Alex Tonkonozhenko

Reputation: 1574

Export from MySQL to Neo4j

I have such problem.
I have big database in mysql - all pages of Wikipedia.
I want to build graph of all pages (nearly 1m pages what are real pages, not redirects, etc.) and links to others.
So for this purpose i want to use neo4j. I tried to do this with such code

    var neo4j = require('neo4j');
neo4j = new neo4j.GraphDatabase('http://localhost:7474');

var mysql = require('mysql');
var connection = mysql.createConnection({
    host: 'localhost',
    user: 'root',
    password: '******',
    database: 'wiki-wizualizer'
});
connection.connect();

var numberOfPages = 0;
// Number of pages from DB
connection.query("SELECT COUNT(1) AS number FROM page WHERE page.page_is_redirect = false AND page.page_namespace = 0", function (err, rows, fields) {
    numberOfPages = rows[0].number;

    var per_page = 10;
    var n = numberOfPages;
    var k = 0;
    for (var i = 0; i < parseInt((n + per_page - 1) / per_page) && i < parseInt((numberOfPages + per_page - 1) / per_page); i++) {
        // Query for selecting vertices
        var q = "SELECT page.page_id AS page_id, page.page_title AS title FROM page " +
            "WHERE page.page_id > " + per_page * i + " AND page.page_id <= " + per_page * (i + 1) + " AND page.page_is_redirect = false AND " +
            "page.page_namespace = 0 ORDER BY page_id LIMIT " + per_page;
        connection.query(q, function (err, rows, fields) {
            // Progress
            console.log('' + (k / parseInt((n + per_page - 1) / per_page) * 100).toFixed(2) + '%');
            k += 1;

            for (var j = 0; j < rows.length; j++) {
                var node = neo4j.createNode({title: rows[j].title});     // instantaneous, but...
                node.save(function (err, node) {
                });
            }
        });
    }
});
connection.end();

But it takes too much times (it creates only 700 vertices per minute).
I store all data (MySQL and neo4j) on single HDD.
Is there any way to make it faster?
WIll it help if I move neo4j to SSD?

Upvotes: 1

Views: 796

Answers (1)

Michael Hunger
Michael Hunger

Reputation: 41676

Check out: http://neo4j.org/develop/import

esp. the CSV batch-importer that should help you easily to generate the csv files that will represent your nodes and relationships and then import them directly.

For more info on importing data with cypher, check out: http://jexp.de/blog/2013/05/on-importing-data-in-neo4j-blog-series/

Upvotes: 2

Related Questions