mikhovr
mikhovr

Reputation: 11

Storing numpy arrays as PyTables cell element

I have 4 files with data in the following format: 3 files contain numpy arrays with different dimensions, say, 20, 30 and 25. The number of records in each file is the same, say 10000. The fourth file contains 1000 floats (as many as arrays in each file). I attempt to create a table based on these files with the following structure:

+-----------------------------------------------------------+
| VecsFile #0   | VecsFile #1   | VecsFile #2   | FloatFile |
+-----------------------------------------------------------+
|np.ndarray(20,)|np.ndarray(30,)|np.ndarray(25,)|   0.1     |
+-----------------------------------------------------------+
|np.ndarray(20,)|np.ndarray(30,)|np.ndarray(25,)|   0.2     |
                               ...

By I encountered that PyTables doesn't receive numpy array as valid type for cell data.

Code: import tables import numpy as np

def create_table_def(n_files):
    table_def = dict()
    for rnum in range(n_files):
        table_def['VecsFile #'+str(rnum)] = tables.Col.from_atom(tables.Float64Atom())
    table_def['FloatFile'] = tables.Col.from_atom(tables.Float64Atom())

    return table_def

r0 = np.load('file0.npy')
r1 = np.load('file1.npy')
r2 = np.load('file2.npy')
s = np.random.rand(*r0.shape)


with tables.open_file('save.hdf', 'w') as saveFile:
    table_def = create_table_def(3)
    table = saveFile.create_table(saveFile.root, 'que_vectors', table_def)
    tablerow = table.row
    for i in range(r0.shape[0]):
        print(r0[i])
        tablerow['VecsFile #0'] = r0[i]
        tablerow['VecsFile #1'] = r1[i]
        tablerow['VecsFile #2'] = r2[i]
        tablerow['FloatFile'] = s[i]
        tablerow.append()
    table.flush()

And I get the following traceback:

    Traceback (most recent call last):
  File "C:/scratch_6.py", line 27, in <module>
    tablerow['VecsFile #0] = r0[i]
  File "tables\tableextension.pyx", line 1591, in tables.tableextension.Row.__setitem__
TypeError: invalid type (<class 'numpy.ndarray'>) for column ``VecsFile #0``

Am I doing something wrong? Or is this way to store such vectors and column with floats as one file without appending all these vectors to a numpy matrix? I want to use it for appending rows with vectors and one float in future, ranging them and delete them.

Upvotes: 1

Views: 846

Answers (1)

Duane
Duane

Reputation: 5150

import numpy as np
import tables as tb


class NumpyTable(tb.IsDescription):
    """ define a table with cells of 84 x 84"""
    numpy_cell = tb.Float32Col(shape=(84, 84))


""" open a file and create the table """
fileh = tb.open_file('numpy_cell.h5', mode='w')
group = fileh.create_group(fileh.root, 'group')
filters = tb.Filters(complevel=5, complib='zlib')
np_table = fileh.create_table('/group', 'numpy_table', NumpyTable, "group: NumpyTable",
                              filters=filters)

""" get the last row """
row = np_table.row

""" add a row """
row['numpy_cell'] = np.zeros((84, 84), dtype=np.float32)
row.append()

""" add another row """
row['numpy_cell'] = np.ones((84, 84), dtype=np.float32)
row.append()

""" write to disk and close the file"""
np_table.flush()
fileh.close()

""" check it """
fileh = tb.open_file('numpy_cell.h5', mode='r')
assert np.allclose(
  fileh.root.group.numpy_table[0]['numpy_cell'], 
  np.zeros((84, 84), dtype=np.float32)
)
assert np.allclose(
  fileh.root.group.numpy_table[1]['numpy_cell'], 
  np.ones((84, 84), dtype=np.float32)
)
fileh.close()

Upvotes: 1

Related Questions