Reputation: 1272
I am trying to read a string dataset from a HDF5 file in C# into a array of strings. I was able to read into the dataset using the following code:
//read the no of rows and columns
var datasetID = H5D.open(fileId,"dimensions");
var dataTypeId = H5D.getType(datasetID);
var dataType = H5T.getClass(dataTypeId);
var length = H5T.getSize(dataTypeId);
int[] dDim = new int[length];
H5D.read(datasetID, dataTypeId, new H5Array<int>(dDim));
I tried to do the same for string dataset but I get all the values initialized to null. So I referred this link (https://www.mail-archive.com/[email protected]/msg02980.html). I was able to read them as bytes, but I don't know the size the byte array should be initialized to. The code i have right now to read string is this:
//read string
datasetID = H5D.open(fileId, "names");
var dataSpaceId = H5D.getSpace(datasetID);
long[] dims = H5S.getSimpleExtentDims(dataSpaceId);
dataTypeId = H5T.copy(H5T.H5Type.C_S1);
//hard coding the no of string to read (213)
byte[] buffer = new byte[dims[0]*213];
Console.WriteLine(dims[0]);
H5D.read(datasetID, dataTypeId, new H5Array<byte>(buffer));
Console.WriteLine(System.Text.ASCIIEncoding.ASCII.GetString(buffer)); `.
Upvotes: 0
Views: 3405
Reputation: 934
your start was exceptionally helpful! With it and some help from HDF5 Example code, I was able to come up with some generic extensions, that would reduce your code to:
//read string
string[] datasetValue = fileId.Read1DArray<string>("names");
The extensions look something like this (which is, or should be, exactly the same as from the referenced question.):
public static class HdfExtensions
{
// thank you https://stackoverflow.com/questions/4133377/splitting-a-string-number-every-nth-character-number
public static IEnumerable<String> SplitInParts(this String s, Int32 partLength)
{
if (s == null)
throw new ArgumentNullException("s");
if (partLength <= 0)
throw new ArgumentException("Part length has to be positive.", "partLength");
for (var i = 0; i < s.Length; i += partLength)
yield return s.Substring(i, Math.Min(partLength, s.Length - i));
}
public static T[] Read1DArray<T>(this H5FileId fileId, string dataSetName)
{
var dataset = H5D.open(fileId, dataSetName);
var space = H5D.getSpace(dataset);
var dims = H5S.getSimpleExtentDims(space);
var dataType = H5D.getType(dataset);
if (typeof(T) == typeof(string))
{
int stringLength = H5T.getSize(dataType);
byte[] buffer = new byte[dims[0] * stringLength];
H5D.read(dataset, dataType, new H5Array<byte>(buffer));
string stuff = System.Text.ASCIIEncoding.ASCII.GetString(buffer);
return stuff.SplitInParts(stringLength).Select(ss => (T)(object)ss).ToArray();
}
T[] dataArray = new T[dims[0]];
var wrapArray = new H5Array<T>(dataArray);
H5D.read(dataset, dataType, wrapArray);
return dataArray;
}
public static T[,] Read2DArray<T>(this H5FileId fileId, string dataSetName)
{
var dataset = H5D.open(fileId, dataSetName);
var space = H5D.getSpace(dataset);
var dims = H5S.getSimpleExtentDims(space);
var dataType = H5D.getType(dataset);
if (typeof(T) == typeof(string))
{
// this will also need a string hack...
}
T[,] dataArray = new T[dims[0], dims[1]];
var wrapArray = new H5Array<T>(dataArray);
H5D.read(dataset, dataType, wrapArray);
return dataArray;
}
}
Upvotes: 0
Reputation: 1108
If you do not know in advance what your data type will be, try the following code. It is incomplete for data types but it is easily modifiable:
public static Array Read1DArray(this H5FileId fileId, string dataSetName)
{
var dataset = H5D.open(fileId, dataSetName);
var space = H5D.getSpace(dataset);
var dims = H5S.getSimpleExtentDims(space);
var dtype = H5D.getType(dataset);
var size = H5T.getSize(dtype);
var classID = H5T.getClass(dtype);
var rank = H5S.getSimpleExtentNDims(space);
var status = H5S.getSimpleExtentDims(space);
// Read data into byte array
var dataArray = new Byte[status[0]*size];
var wrapArray = new H5Array<Byte>(dataArray);
H5D.read(dataset, dtype, wrapArray);
// Convert types
Array returnArray = null;
Type dataType = null;
switch (classID)
{
case H5T.H5TClass.STRING:
dataType = typeof(string);
break;
case H5T.H5TClass.FLOAT:
if (size == 4)
dataType = typeof(float);
else if (size == 8)
dataType = typeof(double);
break;
case H5T.H5TClass.INTEGER:
if (size == 2)
dataType = typeof(Int16);
else if (size == 4)
dataType = typeof(Int32);
else if (size == 8)
dataType = typeof(Int64);
break;
}
if (dataType == typeof (string))
{
var cSet = H5T.get_cset(dtype);
string[] stringArray = new String[status[0]];
for (int i = 0; i < status[0]; i++)
{
byte[] buffer = new byte[size];
Array.Copy(dataArray, i*size, buffer, 0, size);
Encoding enc = null;
switch (cSet)
{
case H5T.CharSet.ASCII:
enc = new ASCIIEncoding();
break;
case H5T.CharSet.UTF8:
enc = new UTF8Encoding();
break;
case H5T.CharSet.ERROR:
break;
}
stringArray[i] = enc.GetString(buffer).TrimEnd('\0');
}
returnArray = stringArray;
}
else
{
returnArray = Array.CreateInstance(dataType, status[0]);
Buffer.BlockCopy(dataArray, 0, returnArray, 0, (int) status[0]*size);
}
H5S.close(space);
H5T.close(dtype);
H5D.close(dataset);
return returnArray;
}
Upvotes: 1