How to create attributes to the groups and access them in hdf5 file system?

Question

I want to create two groups in the hdf5 file. First group /h5md group description and the /particles/lipids group group2 description. The former consists only of a direct attribute 'version' (=1.0) and two groups creator and author with their attributes, so there are no datasets here.

In the /particles/lipids group, the only missing bit is the box group box group description. The minimal information are two attributes: dimension (=3) and the boundary conditions, e.g, the string array ("none", "none", "none"). In our case, we have actually periodic boundaries, so the string array should be ("periodic", "periodic", "periodic") and the dataset 'edges' must be provided. The box size is given in the File file in the last line of each frame, it is something like 61.42836 61.42836 8.47704 and changes slightly in the course of the simulation. This means that the edges dataset is time-dependent as well, i.e., it has maxshape=(None, 3).

I guess the problem is defined clearly. I need to create these two groups according to the description. I have create the first and second group, see the code below! And given attribute to the version group in /h5md, the code works fine but when I try to access the attribute it shows nothing in there!

import struct
import numpy as np
import h5py
import re

# First part generate convert the .gro -> .h5 .
csv_file = 'com'
fmtstring = '7s 8s 5s 7s 7s 7s'
fieldstruct = struct.Struct(fmtstring)
parse = fieldstruct.unpack_from

#define a np.dtype for gro array/dataset (hard-coded for now)
gro_dt = np.dtype([('col1', 'S7'), ('col2', 'S8'), ('col3', int), 
                   ('col4', float), ('col5', float), ('col6', float)])

with open(csv_file, 'r') as f, \
    h5py.File('xaa.h5', 'w') as hdf:

    # open group for position data
    particles_grp = hdf.require_group('particles/lipids/positions')
    h5md_grp = hdf.require_group('h5md/version/author/creator')
    h5md_grp.attrs['version'] = 1.0
    # datasets with known sizes
    ds_time = particles_grp.create_dataset('time', dtype="f", shape=(0,), maxshape=(None,), compression='gzip', shuffle=True)
    ds_step = particles_grp.create_dataset('step', dtype=np.uint64, shape=(0,), maxshape=(None,), compression='gzip', shuffle=True)
    ds_value = None

    step = 0
    while True:
        header = f.readline()
        m = re.search("t= *(.*)$", header)
        if m:
            time = float(m.group(1))
        else:
            print("End Of File")
            break

        # get number of data rows, i.e., number of particles
        nparticles = int(f.readline())
        # read data lines and store in array
        arr = np.empty(shape=(nparticles, 3), dtype=np.float32)
        for row in range(nparticles):
            fields = parse( f.readline().encode('utf-8') )
#            arr[row]['col1'] = fields[0].strip()            
#            arr[row]['col2'] = fields[1].strip()            
#            arr[row]['col3'] = int(fields[2])
            arr[row] = np.array((float(fields[3]), float(fields[4]), float(fields[5])))

        if nparticles > 0:
            # create a resizable dataset upon the first iteration
            if not ds_value:
                ds_value = particles_grp.create_dataset('value', dtype=np.float32,
                                                        shape=(0, nparticles, 3), maxshape=(None, nparticles, 3),
                                                        chunks=(1, nparticles, 3), compression='gzip', shuffle=True)

            # append this sample to the datasets
            ds_time.resize(step + 1, axis=0)
            ds_step.resize(step + 1, axis=0)
            ds_value.resize(step + 1, axis=0)

            ds_time[step] = time
            ds_step[step] = step
            ds_value[step] = arr

            #particles_grp[f'dataset_{step:04}'] = ds
            #ds= hdf.create_dataset(f'dataset_{step:04}', data=arr,compression='gzip') 
            #create attributes for this dataset / time step
#            hdr_tokens = header.split()
            #particles_grp['ds'] = ds
            #particles_grp[f'dataset_{step:04}'] = ds
#            ds.attrs['raw_header'] = header
            #ds.attrs['Generated by'] = hdr_tokens[2]
            #ds.attrs['P/L'] = hdr_tokens[4].split('=')[1]
#            ds.attrs['Time'] = hdr_tokens[6]

        footer = f.readline()
        step += 1


        #=============================================================================

The code for reading the hdf5 file

with h5py.File('xaa.h5', 'r') as ff:
    base_items = list(ff.keys())
    print('Items in the base directory: ', base_items)
    value = ff.get('h5md/version')
    #dataset = np.array(value)
    #print("The shape of the value", value.shape)
    print(value.get_id('h5md/version/'))
    #print(list(ff.attrs.keys()))

kcw78 · Accepted Answer

You need to use the same group and attribute names as when you created them. Simple code to print the attribute based on your code:

with h5py.File('xaa.h5', 'r') as ff:
    h5md_grp = ff['h5md/version/author/creator']
    print(h5md_grp.attrs['version'])

Code to add the "file version" as a global attribute to the h5py file object then retrieve and print:

with h5py.File('xaa.h5', 'w') as ff:
    ....
    ff.attrs['version'] = 1.0
    ....

with h5py.File('xaa.h5', 'r') as ff:
    print(ff.attrs['version'])

How to create attributes to the groups and access them in hdf5 file system?

Answers (1)

Related Questions