HDF5 Reader and Writer for Unstructured Grids
08 Jan 2015We are taking a quick break from the series of blogs on streaming. Instead,
in preparation for a discussion on block-based streaming, I will discuss how
you can write multi-block unstructured grid readers and writers in Python using
the h5py
library. Let's get right down business. Here is the writer code.
import vtk
import h5py
from vtk.util.vtkAlgorithm import VTKPythonAlgorithmBase
from vtk.numpy_interface import dataset_adapter as dsa
class HDF5Writer(VTKPythonAlgorithmBase):
def __init__(self):
VTKPythonAlgorithmBase.__init__(self,
nInputPorts=1, inputType='vtkUnstructuredGrid',
nOutputPorts=0)
self.__FileName = ""
self.__NumberOfPieces = 1
self.__CurrentPiece = 0
def RequestData(self, request, inInfo, outInfo):
info = inInfo[0].GetInformationObject(0)
inp = dsa.WrapDataObject(vtk.vtkDataSet.GetData(info))
if self.__CurrentPiece == 0:
self.__File = h5py.File(self.__FileName, 'w')
grp = self.__File.create_group("piece%d" % self.__CurrentPiece)
grp.attrs['bounds'] = inp.GetBounds()
grp.create_dataset("cells", data=inp.Cells)
grp.create_dataset("cell_types", data=inp.CellTypes)
grp.create_dataset("cell_locations", data=inp.CellLocations)
grp.create_dataset("points", data=inp.Points)
pdata = grp.create_group("point_data")
for name in inp.PointData.keys():
pdata.create_dataset(name, data=inp.PointData[name])
if self.__CurrentPiece < self.__NumberOfPieces - 1:
# If we are not done, ask the pipeline to re-execute us.
self.__CurrentPiece += 1
request.Set(
vtk.vtkStreamingDemandDrivenPipeline.CONTINUE_EXECUTING(),
1)
else:
# Stop execution
request.Remove(
vtk.vtkStreamingDemandDrivenPipeline.CONTINUE_EXECUTING())
self.__File.close()
del self.__File
return 1
def RequestInformation(self, request, inInfo, outInfo):
# Reset values.
self.__CurrentPiece = 0
return 1
def RequestUpdateExtent(self, request, inInfo, outInfo):
info = inInfo[0].GetInformationObject(0)
info.Set(
vtk.vtkStreamingDemandDrivenPipeline.UPDATE_NUMBER_OF_PIECES(),
self.__NumberOfPieces)
info.Set(
vtk.vtkStreamingDemandDrivenPipeline.UPDATE_PIECE_NUMBER(),
self.__CurrentPiece)
return 1
def SetFileName(self, fname):
if fname != self.__FileName:
self.Modified()
self.__FileName = fname
def GetFileName(self):
return self.__FileName
def SetNumberOfPieces(self, npieces):
if npieces != self.__NumberOfPieces:
self.Modified()
self.__NumberOfPieces = npieces
def GetNumberOfPieces(self):
return self.__NumberOfPieces
First of all, this writer uses streaming as described in a previous blog. See also this blog for a more detailed description of how streaming works. The meat of the writer is actually just a few lines:
grp = self.__File.create_group("piece%d" % self.__CurrentPiece)
grp.attrs['bounds'] = inp.GetBounds()
grp.create_dataset("cells", data=inp.Cells)
grp.create_dataset("cell_types", data=inp.CellTypes)
grp.create_dataset("cell_locations", data=inp.CellLocations)
pdata = grp.create_group("point_data")
for name in inp.PointData.keys():
pdata.create_dataset(name, data=inp.PointData[name])
This block of code writes the 3 data arrays specific to vtkUnstructuredGrid
s:
cells, cell types and cell locations. In short, cells
describes the connectivity
of cells (which points they contain), cell_types
describe the type of each cell
and cell_locations
stores the offset of each cell in the cells
array for quick
random access. I will not describe these in further detail here.
See the VTK Users' Guide for more information. I also added support for point arrays.
Writing out cell arrays is left to you as an exercise.
Note that, in addition to writing these arrays, I wrote the spatial bounds of each block as a meta-data (attribute). Why will become clear in the next blog (hint: think demand-driven pipeline and streaming).
We can exercise this writer with the following code:
s = vtk.vtkRTAnalyticSource()
c = vtk.vtkClipDataSet()
c.SetInputConnection(s.GetOutputPort())
c.SetValue(157)
w = HDF5Writer()
w.SetInputConnection(c.GetOutputPort())
w.SetFileName("test.h5")
w.SetNumberOfPieces(5)
w.Update()
This produces a file like this:
>>> h5ls -r test.h5
/ Group
/piece0 Group
/piece0/cell_locations Dataset {4778}
/piece0/cell_types Dataset {4778}
/piece0/cells Dataset {26534}
/piece0/point_data Group
/piece0/point_data/RTData Dataset {2402}
/piece0/points Dataset {2402, 3}
/piece1 Group
/piece1/cell_locations Dataset {4609}
/piece1/cell_types Dataset {4609}
/piece1/cells Dataset {25609}
/piece1/point_data Group
/piece1/point_data/RTData Dataset {2284}
/piece1/points Dataset {2284, 3}
/piece2 Group
/piece2/cell_locations Dataset {4173}
/piece2/cell_types Dataset {4173}
/piece2/cells Dataset {23265}
/piece2/point_data Group
/piece2/point_data/RTData Dataset {2156}
/piece2/points Dataset {2156, 3}
/piece3 Group
/piece3/cell_locations Dataset {6065}
/piece3/cell_types Dataset {6065}
/piece3/cells Dataset {33073}
/piece3/point_data Group
/piece3/point_data/RTData Dataset {2672}
/piece3/points Dataset {2672, 3}
/piece4 Group
/piece4/cell_locations Dataset {5971}
/piece4/cell_types Dataset {5971}
/piece4/cells Dataset {32407}
/piece4/point_data Group
/piece4/point_data/RTData Dataset {2574}
/piece4/points Dataset {2574, 3}
Here is a very simple reader for this data:
import vtk, h5py
from vtk.util.vtkAlgorithm import VTKPythonAlgorithmBase
from vtk.numpy_interface import dataset_adapter as dsa
class HDF5Reader(VTKPythonAlgorithmBase):
def __init__(self):
VTKPythonAlgorithmBase.__init__(self,
nInputPorts=0,
nOutputPorts=1, outputType='vtkMultiBlockDataSet')
self.__FileName = ""
def RequestData(self, request, inInfo, outInfo):
output = dsa.WrapDataObject(vtk.vtkMultiBlockDataSet.GetData(outInfo))
f = h5py.File(self.__FileName, 'r')
idx = 0
for grp_name in f:
ug = vtk.vtkUnstructuredGrid()
output.SetBlock(idx, ug)
idx += 1
ug = dsa.WrapDataObject(ug)
grp = f[grp_name]
cells = grp['cells'][:]
locations = grp['cell_locations'][:]
types = grp['cell_types'][:]
ug.SetCells(types, locations, cells)
pts = grp['points'][:]
ug.Points = pts
pt_arrays = grp['point_data']
for pt_array in pt_arrays:
array = pt_arrays[pt_array][:]
ug.PointData.append(array, pt_array)
return 1
def SetFileName(self, fname):
if fname != self.__FileName:
self.Modified()
self.__FileName = fname
def GetFileName(self):
return self.__FileName
This is for the most part self-explanatory (you may want to take a quick look at the h5py documentation). It is mostly a matter of mapping HDF5 groups and datasets to VTK data structures:
# Access the group containing the current block
grp = f[grp_name]
# Read unstructured grip topology
cells = grp['cells'][:]
locations = grp['cell_locations'][:]
types = grp['cell_types'][:]
# Set the topology data structures
ug.SetCells(types, locations, cells)
# Read and set the points
pts = grp['points'][:]
ug.Points = pts
# Read and set the point arrays
pt_arrays = grp['point_data']
for pt_array in pt_arrays:
array = pt_arrays[pt_array][:]
ug.PointData.append(array, pt_array)
In the next article, we will discover how we can extend this reader to support block-based streaming. Until then, happy coding.