Reputation: 4966
I am trying to read Excel files via OpenXML and looking to output into a CSV. Currently it reads each cell in a separate line (due to writeline) or a single line (when using write). What is the best way to read and outputing in a tabular format like in Excel? Is there a built in feature in OpenXML which I can leverage for this?
static void Main(string[] args) { String xlDocName = @"C:\Users\xlp111\source.xlsx"; using (SpreadsheetDocument spreadsheetDocument = SpreadsheetDocument.Open(xlDocName, false)) { WorkbookPart workbookPart = spreadsheetDocument.WorkbookPart; string cellValue = string.Empty; foreach(WorksheetPart worksheetPart in workbookPart.WorksheetParts) { OpenXmlReader reader = OpenXmlReader.Create(worksheetPart); while (reader.Read()) { if (reader.ElementType == typeof(Row)) { reader.ReadFirstChild(); do { if (reader.ElementType == typeof(Cell)) { Cell c = (Cell)reader.LoadCurrentElement(); if (c.DataType != null && c.DataType == CellValues.SharedString) { SharedStringItem ssi = workbookPart.SharedStringTablePart.SharedStringTable.Elements<SharedStringItem>().ElementAt(int.Parse(c.CellValue.InnerText)); cellValue = ssi.Text.Text; Console.WriteLine(cellValue); } } } while (reader.ReadNextSibling()); } } } Console.ReadLine(); } } }
Upvotes: 3
Views: 23698
Reputation: 2809
SOLUTION: (based on solution by @M.Hassan)
This handles the extra delimiter at the end of line.
public class OpenXmlExcel {
public void ExcelToCsv (string source, string target, string delimiter = ",", bool isFirstRowHeader = true) {
var dt = ReadExcelSheet (source, isFirstRowHeader);
DatatableToCsv (dt, target, delimiter, true);
}
private void DatatableToCsv (DataTable dt, string fname, string delimiter = ",", bool isHeaderRequired = true) {
using (StreamWriter writer = new StreamWriter (fname)) {
StringBuilder sb = new ();
if (isHeaderRequired) {
for (int i = 0; i < dt.Columns.Count; i++) { // Headers
sb.Append (dt.Columns[i].ColumnName);
if (i + 1 != dt.Columns.Count) sb.Append (delimiter);
}
writer.WriteLine (sb.ToString ());
}
for (int i = 0; i < dt.Rows.Count; i++) { // Values
var row = dt.Rows[i];
int colCount = row.ItemArray.Length;
sb.Clear ();
for (int j = 0; j < colCount; j++) {
sb.Append (row.ItemArray[j].ToString ());
if (j + 1 != colCount) sb.Append (delimiter);
}
writer.WriteLine (sb.ToString ());
}
}
}
private DataTable ReadExcelSheet (string fname, bool isFirstRowHeader) {
DataTable dt = new DataTable ();
fname = Path.GetFullPath (fname);
using (SpreadsheetDocument doc = SpreadsheetDocument.Open (fname, false)) {
//Read the first Sheets
Sheet sheet = doc.WorkbookPart.Workbook.Sheets.GetFirstChild<Sheet> ();
Worksheet worksheet = (doc.WorkbookPart.GetPartById (sheet.Id.Value) as WorksheetPart).Worksheet;
IEnumerable<Row> rows = worksheet.GetFirstChild<SheetData> ().Descendants<Row> ();
foreach (Row row in rows) {
//Read the first row as header
if (row.RowIndex.Value == 1) {
var j = 1;
foreach (Cell cell in row.Descendants<Cell> ()) {
var colunmName = isFirstRowHeader ? GetCellValue (doc, cell) : "Field" + j++;
Console.WriteLine (colunmName); // Headers could be added here as well. eg: class level scoped List<string>
dt.Columns.Add (colunmName);
}
} else {
dt.Rows.Add ();
int i = 0;
foreach (Cell cell in row.Descendants<Cell> ()) dt.Rows[dt.Rows.Count - 1][i++] = GetCellValue (doc, cell);
}
}
}
return dt;
}
private string GetCellValue (SpreadsheetDocument doc, Cell cell) {
string value = cell.CellValue.InnerText;
if (cell.DataType != null && cell.DataType.Value == CellValues.SharedString)
return doc.WorkbookPart.SharedStringTablePart.SharedStringTable.ChildElements.GetItem (int.Parse (value)).InnerText;
return value;
}
}
Upvotes: 0
Reputation: 11032
Edit:
Using Open XML SDK for Microsoft Office
install V2 from :https://www.microsoft.com/en-eg/download/details.aspx?id=5124&wa=wsignin1.0
(or V2.5)
The following class convert excel sheet to CSV file with delimeter
//reference library
using DocumentFormat.OpenXml.Packaging;
using DocumentFormat.OpenXml.Spreadsheet;
public class OpenXmlExcel
{
public void ExcelToCsv(string source, string target, string delimiter = ";", bool firstRowIsHeade = true)
{
var dt = ReadExcelSheet(source, firstRowIsHeade);
DatatableToCsv(dt, target, delimiter);
}
private void DatatableToCsv(DataTable dt, string fname, string delimiter = ";")
{
using (StreamWriter writer = new StreamWriter(fname))
{
foreach (DataRow row in dt.AsEnumerable())
{
writer.WriteLine(string.Join(delimiter, row.ItemArray.Select(x => x.ToString())) + delimiter);
}
}
}
List<string> Headers = new List<string>();
private DataTable ReadExcelSheet(string fname, bool firstRowIsHeade)
{
DataTable dt = new DataTable();
using (SpreadsheetDocument doc = SpreadsheetDocument.Open(fname, false))
{
//Read the first Sheets
Sheet sheet = doc.WorkbookPart.Workbook.Sheets.GetFirstChild<Sheet>();
Worksheet worksheet = (doc.WorkbookPart.GetPartById(sheet.Id.Value) as WorksheetPart).Worksheet;
IEnumerable<Row> rows = worksheet.GetFirstChild<SheetData>().Descendants<Row>();
foreach (Row row in rows)
{
//Read the first row as header
if (row.RowIndex.Value == 1)
{
var j = 1;
foreach (Cell cell in row.Descendants<Cell>())
{
var colunmName = firstRowIsHeade ? GetCellValue(doc, cell) : "Field" + j++;
Console.WriteLine(colunmName);
Headers.Add(colunmName);
dt.Columns.Add(colunmName);
}
}
else
{
dt.Rows.Add();
int i = 0;
foreach (Cell cell in row.Descendants<Cell>())
{
dt.Rows[dt.Rows.Count - 1][i] = GetCellValue(doc, cell);
i++;
}
}
}
}
return dt;
}
private string GetCellValue(SpreadsheetDocument doc, Cell cell)
{
string value = cell.CellValue.InnerText;
if (cell.DataType != null && cell.DataType.Value == CellValues.SharedString)
{
return doc.WorkbookPart.SharedStringTablePart.SharedStringTable.ChildElements.GetItem(int.Parse(value)).InnerText;
}
return value;
}
}
How to use:
new OpenXmlExcel().ExcelToCsv("f1.xlsx","f1.csv",";",true);
or
//use default: separator=";" ,first row is header
new OpenXmlExcel().ExcelToCsv("f1.xlsx","f1.csv");
Upvotes: 7