xref: /petsc/src/vec/is/utils/hdf5/hdf5io.c (revision 84467f862f2de26368b63ea79dccd665bcda30cd)
1 #include <petsc/private/viewerhdf5impl.h>
2 #include <petsclayouthdf5.h> /*I   "petsclayoutdf5.h"   I*/
3 #include <petscis.h>         /*I   "petscis.h"   I*/
4 
5 #if defined(PETSC_HAVE_HDF5)
6 
7 struct _n_HDF5ReadCtx {
8   const char *name;
9   hid_t       file, group, dataset, dataspace;
10   int         lenInd, bsInd, complexInd, rdim;
11   hsize_t    *dims;
12   PetscBool   complexVal, dim2;
13 
14   // Needed for compression
15   PetscInt  runs;
16   PetscInt *cind;
17 };
18 typedef struct _n_HDF5ReadCtx *HDF5ReadCtx;
19 
20 PetscErrorCode PetscViewerHDF5CheckTimestepping_Internal(PetscViewer viewer, const char name[])
21 {
22   PetscViewer_HDF5 *hdf5         = (PetscViewer_HDF5 *)viewer->data;
23   PetscBool         timestepping = PETSC_FALSE;
24 
25   PetscFunctionBegin;
26   PetscCall(PetscViewerHDF5ReadAttribute(viewer, name, "timestepping", PETSC_BOOL, &hdf5->defTimestepping, &timestepping));
27   if (timestepping != hdf5->timestepping) {
28     char *group;
29 
30     PetscCall(PetscViewerHDF5GetGroup(viewer, NULL, &group));
31     SETERRQ(PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Dataset %s/%s stored with timesteps? %s Timestepping pushed? %s", group, name, PetscBools[timestepping], PetscBools[hdf5->timestepping]);
32   }
33   PetscFunctionReturn(PETSC_SUCCESS);
34 }
35 
36 static PetscErrorCode PetscViewerHDF5ReadInitialize_Private(PetscViewer viewer, const char name[], HDF5ReadCtx *ctx)
37 {
38   PetscViewer_HDF5 *hdf5 = (PetscViewer_HDF5 *)viewer->data;
39   HDF5ReadCtx       h    = NULL;
40 
41   PetscFunctionBegin;
42   PetscCall(PetscViewerHDF5CheckTimestepping_Internal(viewer, name));
43   PetscCall(PetscNew(&h));
44   h->name = name;
45   PetscCall(PetscViewerHDF5OpenGroup(viewer, NULL, &h->file, &h->group));
46   PetscCallHDF5Return(h->dataset, H5Dopen2, (h->group, name, H5P_DEFAULT));
47   PetscCallHDF5Return(h->dataspace, H5Dget_space, (h->dataset));
48   PetscCall(PetscViewerHDF5ReadAttribute(viewer, name, "complex", PETSC_BOOL, &h->complexVal, &h->complexVal));
49   if (!hdf5->horizontal) {
50     /* MATLAB stores column vectors horizontally */
51     PetscCall(PetscViewerHDF5HasAttribute(viewer, name, "MATLAB_class", &hdf5->horizontal));
52   }
53   h->runs = 0;
54   h->cind = NULL;
55   *ctx    = h;
56   PetscFunctionReturn(PETSC_SUCCESS);
57 }
58 
59 static PetscErrorCode PetscViewerHDF5ReadFinalize_Private(PetscViewer viewer, HDF5ReadCtx *ctx)
60 {
61   HDF5ReadCtx h;
62 
63   PetscFunctionBegin;
64   h = *ctx;
65   PetscCallHDF5(H5Gclose, (h->group));
66   PetscCallHDF5(H5Sclose, (h->dataspace));
67   PetscCallHDF5(H5Dclose, (h->dataset));
68   PetscCall(PetscFree((*ctx)->dims));
69   PetscCall(PetscFree((*ctx)->cind));
70   PetscCall(PetscFree(*ctx));
71   PetscFunctionReturn(PETSC_SUCCESS);
72 }
73 
74 // Need forward declaration because we have a cyclic call chain
75 static PetscErrorCode PetscViewerHDF5Load_Internal(PetscViewer, const char[], PetscBool, PetscLayout, hid_t, void **);
76 
77 static PetscErrorCode PetscViewerHDF5ReadSizes_Private(PetscViewer viewer, HDF5ReadCtx ctx, PetscBool uncompress, PetscBool setup, PetscLayout *map_)
78 {
79   PetscViewer_HDF5 *hdf5 = (PetscViewer_HDF5 *)viewer->data;
80   PetscInt          bs, N;
81   PetscLayout       map;
82   PetscBool         compressed;
83 
84   PetscFunctionBegin;
85   if (!*map_) PetscCall(PetscLayoutCreate(PetscObjectComm((PetscObject)viewer), map_));
86   map = *map_;
87 
88   PetscCall(PetscViewerHDF5HasAttribute(viewer, ctx->name, "compressed", &compressed));
89   if (compressed && uncompress) {
90     hid_t           inttype;
91     PetscLayout     cmap;
92     PetscInt       *lcind, N = 0;
93     PetscMPIInt    *counts, *displs, size, n;
94     const PetscInt *range;
95     MPI_Comm        comm;
96 
97   #if defined(PETSC_USE_64BIT_INDICES)
98     inttype = H5T_NATIVE_LLONG;
99   #else
100     inttype = H5T_NATIVE_INT;
101   #endif
102     PetscCall(PetscObjectGetComm((PetscObject)viewer, &comm));
103     PetscCall(PetscLayoutCreate(PetscObjectComm((PetscObject)viewer), &cmap));
104     cmap->bs = 3;
105     PetscCall(PetscViewerHDF5Load_Internal(viewer, ctx->name, PETSC_FALSE, cmap, inttype, (void **)&lcind));
106     PetscCheck(!(cmap->n % 3), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Compressed IS must have an even number of entries, not %" PetscInt_FMT, cmap->n);
107     for (PetscInt i = 0; i < cmap->n / 3; ++i) N += lcind[i * 3 + 0];
108     PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &N, 1, MPIU_INT, MPIU_SUM, comm));
109     ctx->runs = cmap->N / 3;
110     PetscCall(PetscMalloc1(cmap->N, &ctx->cind));
111     PetscCallMPI(MPI_Comm_size(comm, &size));
112     PetscCall(PetscLayoutGetRanges(cmap, &range));
113     PetscCall(PetscMalloc2(size, &counts, size, &displs));
114     for (PetscInt r = 0; r < size; ++r) {
115       PetscCall(PetscMPIIntCast(range[r + 1] - range[r], &counts[r]));
116       PetscCall(PetscMPIIntCast(range[r], &displs[r]));
117     }
118     PetscCall(PetscMPIIntCast(cmap->n, &n));
119     PetscCallMPI(MPI_Allgatherv(lcind, n, MPIU_INT, ctx->cind, counts, displs, MPIU_INT, comm));
120     PetscCall(PetscFree2(counts, displs));
121     PetscCall(PetscFree(lcind));
122     PetscCall(PetscLayoutDestroy(&cmap));
123 
124     ctx->dim2   = PETSC_FALSE;
125     ctx->rdim   = 1;
126     ctx->lenInd = 0;
127     PetscCall(PetscMalloc1(ctx->rdim, &ctx->dims));
128     ctx->dims[0] = N;
129     bs           = 1;
130     goto layout;
131   }
132 
133   /* Get actual number of dimensions in dataset */
134   PetscCallHDF5Return(ctx->rdim, H5Sget_simple_extent_dims, (ctx->dataspace, NULL, NULL));
135   PetscCall(PetscMalloc1(ctx->rdim, &ctx->dims));
136   PetscCallHDF5Return(ctx->rdim, H5Sget_simple_extent_dims, (ctx->dataspace, ctx->dims, NULL));
137 
138   /*
139      Dimensions are in this order:
140      [0]        timesteps (optional)
141      [lenInd]   entries (numbers or blocks)
142      ...
143      [bsInd]    entries of blocks (optional)
144      [bsInd+1]  real & imaginary part (optional)
145       = rdim-1
146    */
147 
148   /* Get entries dimension index */
149   ctx->lenInd = 0;
150   if (hdf5->timestepping) ++ctx->lenInd;
151 
152   /* Get block dimension index */
153   if (ctx->complexVal) {
154     ctx->bsInd      = ctx->rdim - 2;
155     ctx->complexInd = ctx->rdim - 1;
156   } else {
157     ctx->bsInd      = ctx->rdim - 1;
158     ctx->complexInd = -1;
159   }
160   PetscCheck(ctx->lenInd <= ctx->bsInd, PetscObjectComm((PetscObject)viewer), PETSC_ERR_PLIB, "Calculated block dimension index = %d < %d = length dimension index.", ctx->bsInd, ctx->lenInd);
161   PetscCheck(ctx->bsInd <= ctx->rdim - 1, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Calculated block dimension index = %d > %d = total number of dimensions - 1.", ctx->bsInd, ctx->rdim - 1);
162   PetscCheck(!ctx->complexVal || ctx->dims[ctx->complexInd] == 2, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Complex numbers must have exactly 2 parts (%" PRIuHSIZE ")", ctx->dims[ctx->complexInd]);
163 
164   if (hdf5->horizontal) {
165     /* support horizontal 1D arrays (MATLAB vectors) - swap meaning of blocks and entries */
166     int t       = ctx->lenInd;
167     ctx->lenInd = ctx->bsInd;
168     ctx->bsInd  = t;
169   }
170 
171   /* Get block size */
172   ctx->dim2 = PETSC_FALSE;
173   if (ctx->lenInd == ctx->bsInd) {
174     bs = 1; /* support vectors stored as 1D array */
175   } else {
176     bs = (PetscInt)ctx->dims[ctx->bsInd];
177     if (bs == 1) ctx->dim2 = PETSC_TRUE; /* vector with blocksize of 1, still stored as 2D array */
178   }
179 
180 layout:
181   /* Get global size */
182   PetscCall(PetscIntCast(bs * ctx->dims[ctx->lenInd], &N));
183 
184   /* Set global size, blocksize and type if not yet set */
185   if (map->bs < 0) {
186     PetscCall(PetscLayoutSetBlockSize(map, bs));
187   } else PetscCheck(map->bs == bs, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Block size of array in file is %" PetscInt_FMT ", not %" PetscInt_FMT " as expected", bs, map->bs);
188   if (map->N < 0) {
189     PetscCall(PetscLayoutSetSize(map, N));
190   } else PetscCheck(map->N == N, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Global size of array %s in file is %" PetscInt_FMT ", not %" PetscInt_FMT " as expected", ctx->name, N, map->N);
191   if (setup) PetscCall(PetscLayoutSetUp(map));
192   PetscFunctionReturn(PETSC_SUCCESS);
193 }
194 
195 static PetscErrorCode PetscViewerHDF5ReadSelectHyperslab_Private(PetscViewer viewer, HDF5ReadCtx ctx, PetscLayout map, hid_t *memspace)
196 {
197   PetscViewer_HDF5 *hdf5 = (PetscViewer_HDF5 *)viewer->data;
198   hsize_t          *count, *offset;
199   PetscInt          bs, n, low;
200   int               i;
201 
202   PetscFunctionBegin;
203   /* Compute local size and ownership range */
204   PetscCall(PetscLayoutSetUp(map));
205   PetscCall(PetscLayoutGetBlockSize(map, &bs));
206   PetscCall(PetscLayoutGetLocalSize(map, &n));
207   PetscCall(PetscLayoutGetRange(map, &low, NULL));
208 
209   /* Each process defines a dataset and reads it from the hyperslab in the file */
210   PetscCall(PetscMalloc2(ctx->rdim, &count, ctx->rdim, &offset));
211   for (i = 0; i < ctx->rdim; i++) {
212     /* By default, select all entries with no offset */
213     offset[i] = 0;
214     count[i]  = ctx->dims[i];
215   }
216   if (hdf5->timestepping) {
217     count[0]  = 1;
218     offset[0] = hdf5->timestep;
219   }
220   {
221     PetscCall(PetscHDF5IntCast(n / bs, &count[ctx->lenInd]));
222     PetscCall(PetscHDF5IntCast(low / bs, &offset[ctx->lenInd]));
223   }
224   PetscCallHDF5Return(*memspace, H5Screate_simple, (ctx->rdim, count, NULL));
225   PetscCallHDF5(H5Sselect_hyperslab, (ctx->dataspace, H5S_SELECT_SET, offset, NULL, count, NULL));
226   PetscCall(PetscFree2(count, offset));
227   PetscFunctionReturn(PETSC_SUCCESS);
228 }
229 
230 static PetscErrorCode PetscViewerHDF5ReadArray_Private(PetscViewer viewer, HDF5ReadCtx h, hid_t datatype, hid_t memspace, void *arr)
231 {
232   PetscViewer_HDF5 *hdf5 = (PetscViewer_HDF5 *)viewer->data;
233 
234   PetscFunctionBegin;
235   PetscCallHDF5(H5Dread, (h->dataset, datatype, memspace, h->dataspace, hdf5->dxpl_id, arr));
236   PetscFunctionReturn(PETSC_SUCCESS);
237 }
238 
239 static PetscErrorCode PetscViewerHDF5Load_Internal(PetscViewer viewer, const char name[], PetscBool uncompress, PetscLayout map, hid_t datatype, void **newarr)
240 {
241   PetscBool   has;
242   char       *group;
243   HDF5ReadCtx h        = NULL;
244   hid_t       memspace = 0;
245   size_t      unitsize;
246   void       *arr;
247 
248   PetscFunctionBegin;
249   PetscCall(PetscViewerHDF5GetGroup(viewer, NULL, &group));
250   PetscCall(PetscViewerHDF5HasDataset(viewer, name, &has));
251   PetscCheck(has, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Object (dataset) \"%s\" not stored in group %s", name, group);
252   PetscCall(PetscViewerHDF5ReadInitialize_Private(viewer, name, &h));
253   #if defined(PETSC_USE_COMPLEX)
254   if (!h->complexVal) {
255     H5T_class_t clazz = H5Tget_class(datatype);
256     PetscCheck(clazz != H5T_FLOAT, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Dataset %s/%s is marked as real but PETSc is configured for complex scalars. The conversion is not yet implemented. Configure with --with-scalar-type=real to read this dataset", group ? group : "", name);
257   }
258   #else
259   PetscCheck(!h->complexVal, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Dataset %s/%s is marked as complex but PETSc is configured for real scalars. Configure with --with-scalar-type=complex to read this dataset", group, name);
260   #endif
261 
262   PetscCall(PetscViewerHDF5ReadSizes_Private(viewer, h, uncompress, PETSC_TRUE, &map));
263   PetscCall(PetscViewerHDF5ReadSelectHyperslab_Private(viewer, h, map, &memspace));
264 
265   if (h->runs && uncompress) {
266     PetscInt *ind;
267 
268     PetscCall(PetscInfo(viewer, "Read compressed object with name %s of size %" PetscInt_FMT ":%" PetscInt_FMT "\n", name, map->n, map->N));
269     // Each process stores the whole compression, so skip any leading parts
270     PetscCall(PetscMalloc1(map->n, &ind));
271     for (PetscInt i = 0, off = 0; i < h->runs; ++i) {
272       for (PetscInt j = 0, inc = 0; j < h->cind[i * 3 + 0]; ++j, ++off, inc += h->cind[i * 3 + 1]) {
273         if (off >= map->rend) {
274           i = h->runs;
275           break;
276         }
277         if (off >= map->rstart) ind[off - map->rstart] = h->cind[i * 3 + 2] + inc;
278       }
279     }
280     *newarr = ind;
281     goto cleanup;
282   }
283 
284   unitsize = H5Tget_size(datatype);
285   if (h->complexVal) unitsize *= 2;
286   /* unitsize is size_t i.e. always unsigned, so the negative check is pointless? */
287   PetscCheck(unitsize > 0 && unitsize <= PetscMax(sizeof(PetscInt), sizeof(PetscScalar)), PETSC_COMM_SELF, PETSC_ERR_LIB, "Sanity check failed: HDF5 function H5Tget_size(datatype) returned suspicious value %zu", unitsize);
288   PetscCall(PetscMalloc(map->n * unitsize, &arr));
289 
290   PetscCall(PetscViewerHDF5ReadArray_Private(viewer, h, datatype, memspace, arr));
291   *newarr = arr;
292 
293 cleanup:
294   PetscCallHDF5(H5Sclose, (memspace));
295   PetscCall(PetscViewerHDF5ReadFinalize_Private(viewer, &h));
296   PetscCall(PetscFree(group));
297   PetscFunctionReturn(PETSC_SUCCESS);
298 }
299 
300 /*@C
301   PetscViewerHDF5Load - Read a raw array from the `PETSCVIEWERHDF5` dataset in parallel
302 
303   Collective; No Fortran Support
304 
305   Input Parameters:
306 + viewer   - The `PETSCVIEWERHDF5` viewer
307 . name     - The dataset name
308 - datatype - The HDF5 datatype of the items in the dataset
309 
310   Input/Output Parameter:
311 . map - The layout which specifies array partitioning, on output the
312              set up layout (with global size and blocksize according to dataset)
313 
314   Output Parameter:
315 . newarr - The partitioned array, a memory image of the given dataset
316 
317   Level: developer
318 
319   Notes:
320   This is intended mainly for internal use; users should use higher level routines such as `ISLoad()`, `VecLoad()`, `DMLoad()`.
321 
322   The array is partitioned according to the given `PetscLayout` which is converted to an HDF5 hyperslab.
323 
324   This name is relative to the current group returned by `PetscViewerHDF5OpenGroup()`.
325 
326 .seealso: `PetscViewer`, `PETSCVIEWERHDF5`, `PetscViewerHDF5Open()`, `PetscViewerHDF5PushGroup()`, `PetscViewerHDF5OpenGroup()`, `PetscViewerHDF5ReadSizes()`,
327           `VecLoad()`, `ISLoad()`, `PetscLayout`
328 @*/
329 PetscErrorCode PetscViewerHDF5Load(PetscViewer viewer, const char name[], PetscLayout map, hid_t datatype, void **newarr)
330 {
331   PetscFunctionBegin;
332   PetscCall(PetscViewerHDF5Load_Internal(viewer, name, PETSC_TRUE, map, datatype, newarr));
333   PetscFunctionReturn(PETSC_SUCCESS);
334 }
335 
336 /*@C
337   PetscViewerHDF5ReadSizes - Read block size and global size of a `Vec` or `IS` stored in an HDF5 file.
338 
339   Input Parameters:
340 + viewer - The `PETSCVIEWERHDF5` viewer
341 - name   - The dataset name
342 
343   Output Parameters:
344 + bs - block size
345 - N  - global size
346 
347   Level: advanced
348 
349   Notes:
350   The dataset is stored as an HDF5 dataspace with 1-4 dimensions in the order
351   1) # timesteps (optional), 2) # blocks, 3) # elements per block (optional), 4) real and imaginary part (only for complex).
352 
353   The dataset can be stored as a 2D dataspace even if its blocksize is 1; see `PetscViewerHDF5SetBaseDimension2()`.
354 
355 .seealso: `PetscViewer`, `PETSCVIEWERHDF5`, `PetscViewerHDF5Open()`, `VecLoad()`, `ISLoad()`, `VecGetSize()`, `ISGetSize()`, `PetscViewerHDF5SetBaseDimension2()`
356 @*/
357 PetscErrorCode PetscViewerHDF5ReadSizes(PetscViewer viewer, const char name[], PetscInt *bs, PetscInt *N)
358 {
359   HDF5ReadCtx h   = NULL;
360   PetscLayout map = NULL;
361 
362   PetscFunctionBegin;
363   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 1);
364   PetscCall(PetscViewerHDF5ReadInitialize_Private(viewer, name, &h));
365   PetscCall(PetscViewerHDF5ReadSizes_Private(viewer, h, PETSC_TRUE, PETSC_FALSE, &map));
366   PetscCall(PetscViewerHDF5ReadFinalize_Private(viewer, &h));
367   if (bs) *bs = map->bs;
368   if (N) *N = map->N;
369   PetscCall(PetscLayoutDestroy(&map));
370   PetscFunctionReturn(PETSC_SUCCESS);
371 }
372 
373 #endif /* defined(PETSC_HAVE_HDF5) */
374