user793468
user793468

Reputation: 4966

Reading Excel files using OpenXML

I am trying to read Excel files via OpenXML and looking to output into a CSV. Currently it reads each cell in a separate line (due to writeline) or a single line (when using write). What is the best way to read and outputing in a tabular format like in Excel? Is there a built in feature in OpenXML which I can leverage for this?

static void Main(string[] args)
{
        String xlDocName = @"C:\Users\xlp111\source.xlsx";

        using (SpreadsheetDocument spreadsheetDocument = SpreadsheetDocument.Open(xlDocName, false))
        {
            WorkbookPart workbookPart = spreadsheetDocument.WorkbookPart;
            string cellValue = string.Empty;
            foreach(WorksheetPart worksheetPart in workbookPart.WorksheetParts)
            {
                OpenXmlReader reader = OpenXmlReader.Create(worksheetPart);

                while (reader.Read())
                {
                if (reader.ElementType == typeof(Row))
                {
                    reader.ReadFirstChild();

                    do
                    {
                    if (reader.ElementType == typeof(Cell))
                    {
                        Cell c = (Cell)reader.LoadCurrentElement();

                        if (c.DataType != null && c.DataType == CellValues.SharedString)
                        {
                        SharedStringItem ssi = workbookPart.SharedStringTablePart.SharedStringTable.Elements<SharedStringItem>().ElementAt(int.Parse(c.CellValue.InnerText));
                        cellValue = ssi.Text.Text;
                        Console.WriteLine(cellValue);
                        }
                    }
                    }                      
                    while (reader.ReadNextSibling());
                }
                }
            }
            Console.ReadLine();
        }
    }
}

Upvotes: 3

Views: 23698

Answers (2)

Naveen Kumar V
Naveen Kumar V

Reputation: 2809

SOLUTION: (based on solution by @M.Hassan)

This handles the extra delimiter at the end of line.

public class OpenXmlExcel {
  public void ExcelToCsv (string source, string target, string delimiter = ",", bool isFirstRowHeader = true) {
     var dt = ReadExcelSheet (source, isFirstRowHeader);
     DatatableToCsv (dt, target, delimiter, true);
  }

  private void DatatableToCsv (DataTable dt, string fname, string delimiter = ",", bool isHeaderRequired = true) {
     using (StreamWriter writer = new StreamWriter (fname)) {
        StringBuilder sb = new ();
        if (isHeaderRequired) {
           for (int i = 0; i < dt.Columns.Count; i++) { // Headers
              sb.Append (dt.Columns[i].ColumnName);
              if (i + 1 != dt.Columns.Count) sb.Append (delimiter);
           }
           writer.WriteLine (sb.ToString ());
        }
        for (int i = 0; i < dt.Rows.Count; i++) { // Values
           var row = dt.Rows[i];
           int colCount = row.ItemArray.Length;
           sb.Clear ();
           for (int j = 0; j < colCount; j++) {
              sb.Append (row.ItemArray[j].ToString ());
              if (j + 1 != colCount) sb.Append (delimiter);
           }
           writer.WriteLine (sb.ToString ());
        }
     }
  }

  private DataTable ReadExcelSheet (string fname, bool isFirstRowHeader) {
     DataTable dt = new DataTable ();
     fname = Path.GetFullPath (fname);
     using (SpreadsheetDocument doc = SpreadsheetDocument.Open (fname, false)) {
        //Read the first Sheets 
        Sheet sheet = doc.WorkbookPart.Workbook.Sheets.GetFirstChild<Sheet> ();
        Worksheet worksheet = (doc.WorkbookPart.GetPartById (sheet.Id.Value) as WorksheetPart).Worksheet;
        IEnumerable<Row> rows = worksheet.GetFirstChild<SheetData> ().Descendants<Row> ();
        foreach (Row row in rows) {
           //Read the first row as header
           if (row.RowIndex.Value == 1) {
              var j = 1;
              foreach (Cell cell in row.Descendants<Cell> ()) {
                 var colunmName = isFirstRowHeader ? GetCellValue (doc, cell) : "Field" + j++;
                 Console.WriteLine (colunmName); // Headers could be added here as well. eg: class level scoped List<string>
                 dt.Columns.Add (colunmName);
              }
           } else {
              dt.Rows.Add ();
              int i = 0;
              foreach (Cell cell in row.Descendants<Cell> ()) dt.Rows[dt.Rows.Count - 1][i++] = GetCellValue (doc, cell);
           }
        }
     }
     return dt;
  }

  private string GetCellValue (SpreadsheetDocument doc, Cell cell) {
     string value = cell.CellValue.InnerText;
     if (cell.DataType != null && cell.DataType.Value == CellValues.SharedString)
        return doc.WorkbookPart.SharedStringTablePart.SharedStringTable.ChildElements.GetItem (int.Parse (value)).InnerText;
     return value;
  }
}

Upvotes: 0

M.Hassan
M.Hassan

Reputation: 11032

Edit:

Using Open XML SDK for Microsoft Office

install V2 from :https://www.microsoft.com/en-eg/download/details.aspx?id=5124&wa=wsignin1.0

(or V2.5)

The following class convert excel sheet to CSV file with delimeter

 //reference library
 using DocumentFormat.OpenXml.Packaging;
 using DocumentFormat.OpenXml.Spreadsheet;


public class OpenXmlExcel
{
public void ExcelToCsv(string source, string target, string delimiter = ";", bool firstRowIsHeade = true)
{
    var dt = ReadExcelSheet(source, firstRowIsHeade);
    DatatableToCsv(dt, target, delimiter);

}

private void DatatableToCsv(DataTable dt, string fname, string delimiter = ";")
{

    using (StreamWriter writer = new StreamWriter(fname))
    {
        foreach (DataRow row in dt.AsEnumerable())
        {
            writer.WriteLine(string.Join(delimiter, row.ItemArray.Select(x => x.ToString())) + delimiter);
        }
    }

}

List<string> Headers = new List<string>();


private DataTable ReadExcelSheet(string fname, bool firstRowIsHeade)
{

    DataTable dt = new DataTable();
    using (SpreadsheetDocument doc = SpreadsheetDocument.Open(fname, false))
    {
        //Read the first Sheets 
        Sheet sheet = doc.WorkbookPart.Workbook.Sheets.GetFirstChild<Sheet>();
        Worksheet worksheet = (doc.WorkbookPart.GetPartById(sheet.Id.Value) as WorksheetPart).Worksheet;
        IEnumerable<Row> rows = worksheet.GetFirstChild<SheetData>().Descendants<Row>();

        foreach (Row row in rows)
        {
            //Read the first row as header
            if (row.RowIndex.Value == 1)
            {
                var j = 1;
                foreach (Cell cell in row.Descendants<Cell>())
                {
                    var colunmName = firstRowIsHeade ? GetCellValue(doc, cell) : "Field" + j++;
                    Console.WriteLine(colunmName);
                    Headers.Add(colunmName);
                    dt.Columns.Add(colunmName);
                }
            }
            else
            {
                dt.Rows.Add();
                int i = 0;
                foreach (Cell cell in row.Descendants<Cell>())
                {
                    dt.Rows[dt.Rows.Count - 1][i] = GetCellValue(doc, cell);
                    i++;
                }
            }
        }

    }
    return dt;
}

private string GetCellValue(SpreadsheetDocument doc, Cell cell)
{
    string value = cell.CellValue.InnerText;
    if (cell.DataType != null && cell.DataType.Value == CellValues.SharedString)
    {
        return doc.WorkbookPart.SharedStringTablePart.SharedStringTable.ChildElements.GetItem(int.Parse(value)).InnerText;
    }
    return value;
 }
}

How to use:

new OpenXmlExcel().ExcelToCsv("f1.xlsx","f1.csv",";",true);
or
//use default: separator=";" ,first row is header
new OpenXmlExcel().ExcelToCsv("f1.xlsx","f1.csv");

Upvotes: 7

Related Questions