Reputation: 1772
I have downloaded this project from here -->> http://code.msdn.microsoft.com/office/How-to-convert-excel-file-7a9bb404
This one works fine and it's a great commented code to understand even for beginners like myself, but it only works with one table per sheet. Once I add second table in sheet, it throws an error that something is wrong in one of the columns in my spreadsheet: It says: " Error occurs! The error message is: Cannot find column 4. "
Basically, I have this type of tables in my spreadsheet:
So I want that my program would export those two tables in a single .XML file (just read them both). And those two tables should be separated in two XML childs: the upper one should be Order-Header and the lower one - Line-Items, like this:
<ROOT>
<Order-Header>
.....
</Order-Header>
<Line-Items>
.....
</Line-Items>
</ROOT>
This is the code this program has:
using System;
using System.Collections.Generic;
using System.Data;
using System.IO;
using System.Linq;
using System.Text.RegularExpressions;
using DocumentFormat.OpenXml.Packaging;
using DocumentFormat.OpenXml.Spreadsheet;
namespace CSOpenXmlExcelToXml
{
public class ConvertExcelToXml
{
/// <summary>
/// Read Data from selected excel file into DataTable
/// </summary>
/// <param name="filename">Excel File Path</param>
/// <returns></returns>
private DataTable ReadExcelFile(string filename)
{
// Initialize an instance of DataTable
DataTable dt = new DataTable("Line-Item");
try
{
// Use SpreadSheetDocument class of Open XML SDK to open excel file
using (SpreadsheetDocument spreadsheetDocument = SpreadsheetDocument.Open(filename, false))
{
// Get Workbook Part of Spread Sheet Document
WorkbookPart workbookPart = spreadsheetDocument.WorkbookPart;
// Get all sheets in spread sheet document
IEnumerable<Sheet> sheetcollection = spreadsheetDocument.WorkbookPart.Workbook.GetFirstChild<Sheets>().Elements<Sheet>();
// Get relationship Id
string relationshipId = sheetcollection.First().Id.Value;
// Get sheet1 Part of Spread Sheet Document
WorksheetPart worksheetPart = (WorksheetPart)spreadsheetDocument.WorkbookPart.GetPartById(relationshipId);
// Get Data in Excel file
SheetData sheetData = worksheetPart.Worksheet.Elements<SheetData>().First();
IEnumerable<Row> rowcollection = sheetData.Descendants<Row>();
// If there is no rows in the spreadsheet at all, when just return in how it is and output it
if (rowcollection.Count() == 0)
{
return dt;
}
// Add columns
foreach (Cell cell in rowcollection.ElementAt(0))
{
dt.Columns.Add(GetValueOfCell(spreadsheetDocument, cell));
}
// Add rows into DataTable
foreach (Row row in rowcollection)
{
// Create temporary row to read rows in spreadsheet
DataRow temprow = dt.NewRow();
int columnIndex = 0;
foreach (Cell cell in row.Descendants<Cell>())
{
// Get Cell Column Index
int cellColumnIndex = GetColumnIndex(GetColumnName(cell.CellReference));
if (columnIndex < cellColumnIndex)
{
do
{
temprow[columnIndex] = string.Empty;
columnIndex++;
}
while (columnIndex < cellColumnIndex);
}
temprow[columnIndex] = GetValueOfCell(spreadsheetDocument, cell);
columnIndex++;
}
// Add the row to DataTable
// the rows include header row
dt.Rows.Add(temprow);
}
}
// Here remove header row
dt.Rows.RemoveAt(0);
return dt;
}
// Throw error message
catch (IOException ex)
{
throw new IOException(ex.Message);
}
}
/// <summary>
/// Get Value of Cell
/// </summary>
/// <param name="spreadsheetdocument">SpreadSheet Document Object</param>
/// <param name="cell">Cell Object</param>
/// <returns>The Value in Cell</returns>
private static string GetValueOfCell(SpreadsheetDocument spreadsheetdocument, Cell cell)
{
// Get value in Cell
SharedStringTablePart sharedString = spreadsheetdocument.WorkbookPart.SharedStringTablePart;
if (cell.CellValue == null)
{
// If there is no data in cell, just return it
return string.Empty;
}
string cellValue = cell.CellValue.InnerText;
// The condition that the Cell DataType is SharedString
if (cell.DataType != null && cell.DataType.Value == CellValues.SharedString)
{
return sharedString.SharedStringTable.ChildElements[int.Parse(cellValue)].InnerText;
}
else
{
return cellValue;
}
}
/// <summary>
/// Get Column Name From given cell name
/// </summary>
/// <param name="cellReference">Cell Name(For example,A1)</param>
/// <returns>Column Name(For example, A)</returns>
private string GetColumnName(string cellReference)
{
// Create a regular expression to match the column name of cell
Regex regex = new Regex("[A-Za-z]+");
Match match = regex.Match(cellReference);
return match.Value;
}
/// <summary>
/// Get Index of Column from given column name
/// </summary>
/// <param name="columnName">Column Name(For Example,A or AA)</param>
/// <returns>Column Index</returns>
private int GetColumnIndex(string columnName)
{
int columnIndex = 0;
int factor = 1;
// From right to left
for (int position = columnName.Length - 1; position >= 0; position--)
{
// For letters
if (Char.IsLetter(columnName[position]))
{
columnIndex += factor * ((columnName[position] - 'A') + 1) - 1;
factor *= 26;
}
}
return columnIndex;
}
/// <summary>
/// Convert DataTable to Xml format
/// </summary>
/// <param name="filename">Excel File Path</param>
/// <returns>Xml format string</returns>
public string GetXML(string filename)
{
using (DataSet ds = new DataSet())
{
ds![enter image description here][2].DataSetName = "Document-Order";
ds.Tables.Add(this.ReadExcelFile(filename));a
return ds.GetXml();
}
}
}
}
P.S. I copied all the code because I think it is the easier way to spot where to change it, to read those two tables in one spreadsheet and export them both in one XML file.
Would appreciate any help or just some ideas, how could I archieve this kind of functionality. Thank you.
UPDATE This is MainForm.cs code:
using System;
using System.IO;
using System.Windows.Forms;
namespace CSOpenXmlExcelToXml
{
public partial class MainForm : Form
{
public MainForm()
{
InitializeComponent();
this.btnSaveAs.Enabled = false;
}
/// <summary>
/// Open an dialog to let users select Excel file
/// </summary>
/// <param name="sender"></param>
/// <param name="e"></param>
private void btnBrowser_Click(object sender, EventArgs e)
{
// Initializes a OpenFileDialog instance
using (OpenFileDialog openfileDialog = new OpenFileDialog())
{
openfileDialog.RestoreDirectory = true;
openfileDialog.Filter = "Excel files(*.xlsx;*.xls)|*.xlsx;*.xls";
if (openfileDialog.ShowDialog() == DialogResult.OK)
{
tbExcelName.Text = openfileDialog.FileName;
}
}
}
/// <summary>
/// Convert Excel file to Xml format and view in Listbox control
/// </summary>
/// <param name="sender"></param>
/// <param name="e"></param>
private void btnConvert_Click(object sender, EventArgs e)
{
tbXmlView.Clear();
string excelfileName = tbExcelName.Text;
if (string.IsNullOrEmpty(excelfileName) || !File.Exists(excelfileName))
{
MessageBox.Show("The Excel file is invalid! Please select a valid file.");
return;
}
try
{
string xmlFormatstring = new ConvertExcelToXml().GetXML(excelfileName);
if (string.IsNullOrEmpty(xmlFormatstring))
{
// Line just for checking, if Excel document is empty. If it's true, when just print out an error message
MessageBox.Show("The content of Excel file is Empty!");
return;
}
// Print it in TextView
tbXmlView.Text = xmlFormatstring;
// If txbXmlView has text, set btnSaveAs button to be enable
btnSaveAs.Enabled = true;
}
catch (Exception ex)
{
// General error message checking for errors
MessageBox.Show("Error occurs! The error message is: " +ex.Message);
}
}
/// <summary>
/// Save the XMl format string as Xml file
/// </summary>
/// <param name="sender"></param>
/// <param name="e"></param>
private void btnSaveAs_Click(object sender, EventArgs e)
{
// Initializes a SaveFileDialog instance
using (SaveFileDialog savefiledialog = new SaveFileDialog())
{
savefiledialog.RestoreDirectory = true;
savefiledialog.DefaultExt = "xml";
savefiledialog.Filter = "All Files(*.xml)|*.xml";
if (savefiledialog.ShowDialog() == DialogResult.OK)
{
Stream filestream = savefiledialog.OpenFile();
StreamWriter streamwriter = new StreamWriter(filestream);
streamwriter.Write("<?xml version='1.0'?>" +
Environment.NewLine + tbXmlView.Text);
streamwriter.Close();
}
}
}
// Disable maximize button of the form
private void MainForm_Load(object sender, EventArgs e)
{
this.MaximizeBox = false; //this is an instance of Form or its decendant
}
}
}
Upvotes: 1
Views: 2491
Reputation: 9861
The problem that you are encountering is that that code assumes that your data is in a single table, I guess it bombs out at either the end of the last column of the top table, or its last row.
The Open XML API is not easy to work with. Most people who have worked with it agree that. You need to do a lot just to get to the stage where you can do pretty basic stuff.
I would suggest you try to use a library that solves some of the issues with the raw API for you.
Why don't you look at ClosedXML? It is not brilliant and has some annoying bugs when you start manipulating files, but for reading data it should be fine.
Hopefully this page in the documentation should push you in the right direction.
Upvotes: 1