user8241949
user8241949

Reputation:

Importing large .xlsx file very slow

I'm new to c# and WPF and trying to import a large .xlsx file into a datagrid, i can have around 200+ columns & 100,000+ rows. With my current method it is taking over an hour (i didn't let it finish). An example of my format in csv terms would be;

"Time","Dist","V_Front","V_Rear","RPM"
"s","m","km/h","km/h","rpm"
"0.000","0","30.3","30.0","11995"
"0.005","0","30.3","30.0","11965"
"0.010","0","30.3","31.0","11962"

I'm using Interop at the moment but i'm wondering whether there is another approach which would drastically cut down load time. I hope to plot this data using SciCharts (they have a student licence), with check boxes for channel selection but that's another matter.

.CS

    private void Button_Click(object sender, RoutedEventArgs e)
    {
        OpenFileDialog openfile = new OpenFileDialog();
        openfile.DefaultExt = ".xlsx";
        openfile.Filter = "(.xlsx)|*.xlsx";

        var browsefile = openfile.ShowDialog();

        if (browsefile == true)
        {
            txtFilePath.Text = openfile.FileName;

            Microsoft.Office.Interop.Excel.Application excelApp = new Microsoft.Office.Interop.Excel.Application();
            Microsoft.Office.Interop.Excel.Workbook excelBook = excelApp.Workbooks.Open(txtFilePath.Text.ToString(), 0, true, 5, "", "", true, Microsoft.Office.Interop.Excel.XlPlatform.xlWindows, "\t", false, false, 0, true, 1, 0);
            Microsoft.Office.Interop.Excel.Worksheet excelSheet = (Microsoft.Office.Interop.Excel.Worksheet)excelBook.Worksheets.get_Item(1); ;
            Microsoft.Office.Interop.Excel.Range excelRange = excelSheet.UsedRange;

            string strCellData = "";
            double douCellData;
            int rowCnt = 0;
            int colCnt = 0;

            DataTable dt = new DataTable();
            for (colCnt = 1; colCnt <= excelRange.Columns.Count; colCnt++)
            {
                string strColumn = "";
                strColumn = (string)(excelRange.Cells[1, colCnt] as Microsoft.Office.Interop.Excel.Range).Value2;
                dt.Columns.Add(strColumn, typeof(string));
            }

            for (rowCnt = 2; rowCnt <= excelRange.Rows.Count; rowCnt++)
            {
                string strData = "";
                for (colCnt = 1; colCnt <= excelRange.Columns.Count; colCnt++)
                {
                    try
                    {
                        strCellData = (string)(excelRange.Cells[rowCnt, colCnt] as Microsoft.Office.Interop.Excel.Range).Value2;
                        strData += strCellData + "|";
                    }
                    catch (Exception ex)
                    {
                        douCellData = (excelRange.Cells[rowCnt, colCnt] as Microsoft.Office.Interop.Excel.Range).Value2;
                        strData += douCellData.ToString() + "|";
                    }
                }
                strData = strData.Remove(strData.Length - 1, 1);
                dt.Rows.Add(strData.Split('|'));
            }

            dtGrid.ItemsSource = dt.DefaultView;

            excelBook.Close(true, null, null);
            excelApp.Quit();


        }
    }

Any help i really appreciated.

Upvotes: 0

Views: 1169

Answers (1)

Szabolcs D&#233;zsi
Szabolcs D&#233;zsi

Reputation: 8843

The problem is that there is too many individual reads which causes a lot of reflection usage and marshalling between Excel and your application. If you're not concerned about memory usage, you can just read the whole Range into memory and work from memory instead of individually reading cells. The below code runs in 3880 ms on a test file with 5 columns and 103938 rows:

OpenFileDialog openfile = new OpenFileDialog();
openfile.DefaultExt = ".xlsx";
openfile.Filter = "(.xlsx)|*.xlsx";

var browsefile = openfile.ShowDialog();

if (browsefile == true)
{
    txtFilePath.Text = openfile.FileName;

    var excelApp = new Microsoft.Office.Interop.Excel.Application();
    var excelBook = excelApp.Workbooks.Open(txtFilePath.Text, 0, true, 5, "", "", true,
        Microsoft.Office.Interop.Excel.XlPlatform.xlWindows, "\t", false, false, 0, true, 1, 0);
    var excelSheet = (Microsoft.Office.Interop.Excel.Worksheet) excelBook.Worksheets.Item[1];

    Microsoft.Office.Interop.Excel.Range excelRange = excelSheet.UsedRange;

    DataTable dt = new DataTable();

    object[,] value = excelRange.Value;

    int columnsCount = value.GetLength(1);
    for (var colCnt = 1; colCnt <= columnsCount; colCnt++)
    {
        dt.Columns.Add((string)value[1, colCnt], typeof(string));
    }

    int rowsCount = value.GetLength(0);
    for (var rowCnt = 2; rowCnt <= rowsCount; rowCnt++)
    {
        var dataRow = dt.NewRow();
        for (var colCnt = 1; colCnt <= columnsCount; colCnt++)
        {
            dataRow[colCnt - 1] = value[rowCnt, colCnt];
        }
        dt.Rows.Add(dataRow);
    }

    dtGrid.ItemsSource = dt.DefaultView;

    excelBook.Close(true);
    excelApp.Quit();
}

If you don't want to read the whole Range in, then you should do that in sensible batches.

Another optimization is to run this on a background thread, so it won't block the UI while it's loading.

Edit

For running this on a background thread you could modify the button click handler to be an async method and put the parsing logic into another method which runs the actual parsing on a threadpool thread with Task.Run:

private async void Button_Click(object sender, RoutedEventArgs e)
{
    OpenFileDialog openfile = new OpenFileDialog();
    openfile.DefaultExt = ".xlsx";
    openfile.Filter = "(.xlsx)|*.xlsx";

    var browsefile = openfile.ShowDialog();

    if (browsefile == true)
    {
        txtFilePath.Text = openfile.FileName;

        DataTable dataTable = await ParseExcel(txtFilePath.Text).ConfigureAwait(true);

        dtGrid.ItemsSource = dataTable.DefaultView;
    }
}

private Task<DataTable> ParseExcel(string filePath)
{
    return Task.Run(() =>
    {
        var excelApp = new Microsoft.Office.Interop.Excel.Application();
        var excelBook = excelApp.Workbooks.Open(filePath, 0, true, 5, "", "", true,
            Microsoft.Office.Interop.Excel.XlPlatform.xlWindows, "\t", false, false, 0, true, 1, 0);
        var excelSheet = (Microsoft.Office.Interop.Excel.Worksheet) excelBook.Worksheets.Item[1];

        Microsoft.Office.Interop.Excel.Range excelRange = excelSheet.UsedRange;

        DataTable dt = new DataTable();

        object[,] value = excelRange.Value;

        int columnsCount = value.GetLength(1);
        for (var colCnt = 1; colCnt <= columnsCount; colCnt++)
        {
            dt.Columns.Add((string) value[1, colCnt], typeof(string));
        }

        int rowsCount = value.GetLength(0);
        for (var rowCnt = 2; rowCnt <= rowsCount; rowCnt++)
        {
            var dataRow = dt.NewRow();
            for (var colCnt = 1; colCnt <= columnsCount; colCnt++)
            {
                dataRow[colCnt - 1] = value[rowCnt, colCnt];
            }
            dt.Rows.Add(dataRow);
        }

        excelBook.Close(true);
        excelApp.Quit();

        return dt;
    });
}

The handler just invokes the parsing function, the parsing function runs on a background thread and when it finishes the handler can continue by assigning the resulting DataTable to the ItemsSource.

Upvotes: 1

Related Questions