Berk Geveci     About     Archive     Feed

HDF5 Reader and Writer for Unstructured Grids

We are taking a quick break from the series of blogs on streaming. Instead, in preparation for a discussion on block-based streaming, I will discuss how you can write multi-block unstructured grid readers and writers in Python using the h5py library. Let's get right down business. Here is the writer code.

import vtk
import h5py
from vtk.util.vtkAlgorithm import VTKPythonAlgorithmBase
from vtk.numpy_interface import dataset_adapter as dsa

class HDF5Writer(VTKPythonAlgorithmBase):
    def __init__(self):
        VTKPythonAlgorithmBase.__init__(self,
            nInputPorts=1, inputType='vtkUnstructuredGrid',
            nOutputPorts=0)

        self.__FileName = ""
        self.__NumberOfPieces = 1
        self.__CurrentPiece = 0

    def RequestData(self, request, inInfo, outInfo):
        info = inInfo[0].GetInformationObject(0)
        inp = dsa.WrapDataObject(vtk.vtkDataSet.GetData(info))

        if self.__CurrentPiece == 0:
              self.__File = h5py.File(self.__FileName, 'w')

        grp = self.__File.create_group("piece%d" % self.__CurrentPiece)
        grp.attrs['bounds'] = inp.GetBounds()

        grp.create_dataset("cells", data=inp.Cells)
        grp.create_dataset("cell_types", data=inp.CellTypes)
        grp.create_dataset("cell_locations", data=inp.CellLocations)

        grp.create_dataset("points", data=inp.Points)

        pdata = grp.create_group("point_data")
        for name in inp.PointData.keys():
            pdata.create_dataset(name, data=inp.PointData[name])

        if self.__CurrentPiece < self.__NumberOfPieces - 1:
            # If we are not done, ask the pipeline to re-execute us.
            self.__CurrentPiece += 1
            request.Set(
                vtk.vtkStreamingDemandDrivenPipeline.CONTINUE_EXECUTING(),
                1)
        else:
            # Stop execution
            request.Remove(
                vtk.vtkStreamingDemandDrivenPipeline.CONTINUE_EXECUTING())
            self.__File.close()
            del self.__File
        return 1

    def RequestInformation(self, request, inInfo, outInfo):
        # Reset values.
        self.__CurrentPiece = 0
        return 1

    def RequestUpdateExtent(self, request, inInfo, outInfo):
        info = inInfo[0].GetInformationObject(0)
        info.Set(
            vtk.vtkStreamingDemandDrivenPipeline.UPDATE_NUMBER_OF_PIECES(),
            self.__NumberOfPieces)
        info.Set(
            vtk.vtkStreamingDemandDrivenPipeline.UPDATE_PIECE_NUMBER(),
            self.__CurrentPiece)
        return 1

    def SetFileName(self, fname):
        if fname != self.__FileName:
            self.Modified()
            self.__FileName = fname

    def GetFileName(self):
        return self.__FileName

    def SetNumberOfPieces(self, npieces):
        if npieces != self.__NumberOfPieces:
            self.Modified()
            self.__NumberOfPieces = npieces

    def GetNumberOfPieces(self):
        return self.__NumberOfPieces

First of all, this writer uses streaming as described in a previous blog. See also this blog for a more detailed description of how streaming works. The meat of the writer is actually just a few lines:

grp = self.__File.create_group("piece%d" % self.__CurrentPiece)
grp.attrs['bounds'] = inp.GetBounds()

grp.create_dataset("cells", data=inp.Cells)
grp.create_dataset("cell_types", data=inp.CellTypes)
grp.create_dataset("cell_locations", data=inp.CellLocations)

pdata = grp.create_group("point_data")
for name in inp.PointData.keys():
    pdata.create_dataset(name, data=inp.PointData[name])

This block of code writes the 3 data arrays specific to vtkUnstructuredGrids: cells, cell types and cell locations. In short, cells describes the connectivity of cells (which points they contain), cell_types describe the type of each cell and cell_locations stores the offset of each cell in the cells array for quick random access. I will not describe these in further detail here. See the VTK Users' Guide for more information. I also added support for point arrays. Writing out cell arrays is left to you as an exercise.

Note that, in addition to writing these arrays, I wrote the spatial bounds of each block as a meta-data (attribute). Why will become clear in the next blog (hint: think demand-driven pipeline and streaming).

We can exercise this writer with the following code:

s = vtk.vtkRTAnalyticSource()

c = vtk.vtkClipDataSet()
c.SetInputConnection(s.GetOutputPort())
c.SetValue(157)

w = HDF5Writer()
w.SetInputConnection(c.GetOutputPort())
w.SetFileName("test.h5")
w.SetNumberOfPieces(5)

w.Update()

This produces a file like this:

>>> h5ls -r test.h5
/                        Group
/piece0                  Group
/piece0/cell_locations   Dataset {4778}
/piece0/cell_types       Dataset {4778}
/piece0/cells            Dataset {26534}
/piece0/point_data       Group
/piece0/point_data/RTData Dataset {2402}
/piece0/points           Dataset {2402, 3}
/piece1                  Group
/piece1/cell_locations   Dataset {4609}
/piece1/cell_types       Dataset {4609}
/piece1/cells            Dataset {25609}
/piece1/point_data       Group
/piece1/point_data/RTData Dataset {2284}
/piece1/points           Dataset {2284, 3}
/piece2                  Group
/piece2/cell_locations   Dataset {4173}
/piece2/cell_types       Dataset {4173}
/piece2/cells            Dataset {23265}
/piece2/point_data       Group
/piece2/point_data/RTData Dataset {2156}
/piece2/points           Dataset {2156, 3}
/piece3                  Group
/piece3/cell_locations   Dataset {6065}
/piece3/cell_types       Dataset {6065}
/piece3/cells            Dataset {33073}
/piece3/point_data       Group
/piece3/point_data/RTData Dataset {2672}
/piece3/points           Dataset {2672, 3}
/piece4                  Group
/piece4/cell_locations   Dataset {5971}
/piece4/cell_types       Dataset {5971}
/piece4/cells            Dataset {32407}
/piece4/point_data       Group
/piece4/point_data/RTData Dataset {2574}
/piece4/points           Dataset {2574, 3}

Here is a very simple reader for this data:

import vtk, h5py
from vtk.util.vtkAlgorithm import VTKPythonAlgorithmBase
from vtk.numpy_interface import dataset_adapter as dsa

class HDF5Reader(VTKPythonAlgorithmBase):
    def __init__(self):
        VTKPythonAlgorithmBase.__init__(self,
            nInputPorts=0,
            nOutputPorts=1, outputType='vtkMultiBlockDataSet')

        self.__FileName = ""

    def RequestData(self, request, inInfo, outInfo):
        output = dsa.WrapDataObject(vtk.vtkMultiBlockDataSet.GetData(outInfo))
        f = h5py.File(self.__FileName, 'r')
        idx = 0
        for grp_name in f:
            ug = vtk.vtkUnstructuredGrid()
            output.SetBlock(idx, ug)
            idx += 1
            ug = dsa.WrapDataObject(ug)
            grp = f[grp_name]
            cells = grp['cells'][:]
            locations = grp['cell_locations'][:]
            types = grp['cell_types'][:]
            ug.SetCells(types, locations, cells)
            pts = grp['points'][:]
            ug.Points = pts
            pt_arrays = grp['point_data']
            for pt_array in pt_arrays:
                array = pt_arrays[pt_array][:]
                ug.PointData.append(array, pt_array)

        return 1

    def SetFileName(self, fname):
        if fname != self.__FileName:
            self.Modified()
            self.__FileName = fname

    def GetFileName(self):
        return self.__FileName

This is for the most part self-explanatory (you may want to take a quick look at the h5py documentation). It is mostly a matter of mapping HDF5 groups and datasets to VTK data structures:

# Access the group containing the current block
grp = f[grp_name]
# Read unstructured grip topology
cells = grp['cells'][:]
locations = grp['cell_locations'][:]
types = grp['cell_types'][:]
# Set the topology data structures
ug.SetCells(types, locations, cells)
# Read and set the points
pts = grp['points'][:]
ug.Points = pts
# Read and set the point arrays
pt_arrays = grp['point_data']
for pt_array in pt_arrays:
    array = pt_arrays[pt_array][:]
    ug.PointData.append(array, pt_array)

In the next article, we will discover how we can extend this reader to support block-based streaming. Until then, happy coding.