Reputation: 773
I use the xlsx.js library for converting excel-files to json and it works, but I have a problem when I add a hyperlink into a cell of the table as it's illustrated in the image below:
The code I use for parsing:
function readExcel(url) {
var xmlhttp = new XMLHttpRequest();
xmlhttp.open("GET", url, true);
xmlhttp.responseType = "arraybuffer";
if (xmlhttp.overrideMimeType) {
xmlhttp.overrideMimeType('text/plain; charset=x-user-defined');
}
xmlhttp.onload = function(e) {
var arraybuffer = xmlhttp.response;
var data = new Uint8Array(arraybuffer);
var arr = new Array();
for (var i = 0; i != data.length; ++i) arr[i] = String.fromCharCode(data[i]);
var wb = XLSX.read(arr.join(""), { type: "binary" });
// var sheet_name_list = wb.SheetNames;
wb.SheetNames.forEach((sheetName, index) => {
let jsonObject = XLSX.utils.sheet_to_json(wb.Sheets[sheetName], { header: 1, raw: true })
console.log("jsonObject", jsonObject)
createTable(jsonObject, index);
});
};
xmlhttp.send();
}
The current output:
[
[
"FirstName",
"LastName",
"Age",
"Country"
],
[
"Alessio",
"",
23,
"Italia"
],
[
"Mauro",
"Doe",
34,
"Germania"
],
...............
]
The problem is on Germania the hyperlink isn't present - How can I solve this problem?
Upvotes: 1
Views: 939
Reputation: 182
Great question! The xlsx
library doesn't seem to support this out of the box, but luckily it does provide a very simplistic structure of the sheet that we can interact with before converting to JSON.
The structure of each cell object is based on its content, all possible cell properties can be found in the Cell Object section of the xlsx
documentation.
In the documentation, we can see that hyperlinks are stored in the l
key of the cell object.
To access this, we need to do so before converting the sheet data to JSON.
In your original question, the workbook object can be found at wb.Sheets[sheetName]
. If we log this value out prior to converting it to JSON, it would look something like this:
{
'!ref': 'A1:D5',
A1: {
t: 's',
v: 'FirstName',
r: '<t>FirstName</t>',
h: 'FirstName',
w: 'FirstName'
},
B1: {
t: 's',
v: 'LastName',
r: '<t>LastName</t>',
h: 'LastName',
w: 'LastName'
},
C1: { t: 's', v: 'Age', r: '<t>Age</t>', h: 'Age', w: 'Age' },
D1: {
t: 's',
v: 'Country',
r: '<t>Country</t>',
h: 'Country',
w: 'Country'
},
A2: {
t: 's',
v: 'Alessio',
r: '<t>Alessio</t>',
h: 'Alessio',
w: 'Alessio'
},
C2: { t: 'n', v: 23, w: '23' },
D2: { t: 's', v: 'Italia', r: '<t>Italia</t>', h: 'Italia', w: 'Italia' },
A3: { t: 's', v: 'Mauro', r: '<t>Mauro</t>', h: 'Mauro', w: 'Mauro' },
B3: { t: 's', v: 'Doe', r: '<t>Doe</t>', h: 'Doe', w: 'Doe' },
C3: { t: 'n', v: 34, w: '34' },
D3: {
t: 's',
v: 'Germania',
r: '<t>Germania</t>',
h: 'Germania',
w: 'Germania',
l: {
ref: 'D3',
id: 'rId1',
uid: '{F50A35D6-56AF-4849-B5E2-A938EFDD7328}',
Target: 'https://germania.com/',
Rel: [Object]
}
},
A4: {
t: 's',
v: 'Giovanni',
r: '<t>Giovanni</t>',
h: 'Giovanni',
w: 'Giovanni'
},
B4: { t: 's', v: 'Zonzo', r: '<t>Zonzo</t>', h: 'Zonzo', w: 'Zonzo' },
C4: { t: 'n', v: 55, w: '55' },
D4: {
t: 's',
v: 'Polonia',
r: '<t>Polonia</t>',
h: 'Polonia',
w: 'Polonia'
},
A5: { t: 's', v: 'Nicola', r: '<t>Nicola</t>', h: 'Nicola', w: 'Nicola' },
B5: { t: 's', v: 'Fogo', r: '<t>Fogo</t>', h: 'Fogo', w: 'Fogo' },
C5: { t: 'n', v: 22, w: '22' },
D5: {
t: 's',
v: 'Francia',
r: '<t>Francia</t>',
h: 'Francia',
w: 'Francia'
},
'!margins': {
left: 0.7,
right: 0.7,
top: 0.75,
bottom: 0.75,
header: 0.3,
footer: 0.3
}
}
As you can see, in cell D3
an l
key is included because the cell is formatted as a hyperlink. The actual hyperlink value can be found inside of Target
within the l
object.
Instead of creating our own parser to convert this data to JSON, all we need to do is modify the original workbook object and then convert it to JSON after the fact.
First, we get a list of all of the cells by using Object.keys()
and then can loop over that list to check if a hyperlink is present or not in the current cell.
const sheet = workbook.Sheets['Sheet1'];
const cells = Object.keys(sheet);
If a hyperlink is present, we will overwrite the v
key inside of the workbook object for that cell which usually represents the raw
value from the cell.
cells.forEach(cell => {
// If cell contains a hyperlink, overwrite the raw value with the hyperlink
if (sheet[cell].l && sheet[cell].l.Target) {
sheet[cell].v = sheet[cell].l.Target;
}
});
Once we've overwritten the v
key with the hyperlink values, we can then convert the sheet to JSON like usual.
const json = xlsx.utils.sheet_to_json(sheet, { header: 1, raw: true });
Our result should now come out looking something like this:
[
[ 'FirstName', 'LastName', 'Age', 'Country' ],
[ 'Alessio', null, 23, 'Italia' ],
[ 'Mauro', 'Doe', 34, 'https://germania.com/' ],
[ 'Giovanni', 'Zonzo', 55, 'Polonia' ],
[ 'Nicola', 'Fogo', 22, 'Francia' ]
]
Upvotes: 2