xref: /petsc/src/vec/is/utils/hdf5/hdf5io.c (revision 6e415bd26dfd0d33b8d7233bdee8a51de4844a26)
1 #include <petsc/private/viewerhdf5impl.h>
2 #include <petsclayouthdf5.h> /*I   "petsclayouthdf5.h"   I*/
3 
4 struct _n_HDF5ReadCtx {
5   const char *name;
6   hid_t       file, group, dataset, dataspace;
7   int         lenInd, bsInd, complexInd, rdim;
8   hsize_t    *dims;
9   PetscBool   complexVal, dim2;
10 
11   // Needed for compression
12   PetscInt  runs;
13   PetscInt *cind;
14 };
15 typedef struct _n_HDF5ReadCtx *HDF5ReadCtx;
16 
17 PetscErrorCode PetscViewerHDF5CheckTimestepping_Internal(PetscViewer viewer, const char name[])
18 {
19   PetscViewer_HDF5 *hdf5         = (PetscViewer_HDF5 *)viewer->data;
20   PetscBool         timestepping = PETSC_FALSE;
21 
22   PetscFunctionBegin;
23   PetscCall(PetscViewerHDF5ReadAttribute(viewer, name, "timestepping", PETSC_BOOL, &hdf5->defTimestepping, &timestepping));
24   if (timestepping != hdf5->timestepping) {
25     char *group;
26 
27     PetscCall(PetscViewerHDF5GetGroup(viewer, NULL, &group));
28     SETERRQ(PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Dataset %s/%s stored with timesteps? %s Timestepping pushed? %s", group, name, PetscBools[timestepping], PetscBools[hdf5->timestepping]);
29   }
30   PetscFunctionReturn(PETSC_SUCCESS);
31 }
32 
33 static PetscErrorCode PetscViewerHDF5ReadInitialize_Private(PetscViewer viewer, const char name[], HDF5ReadCtx *ctx)
34 {
35   PetscViewer_HDF5 *hdf5 = (PetscViewer_HDF5 *)viewer->data;
36   HDF5ReadCtx       h    = NULL;
37 
38   PetscFunctionBegin;
39   PetscCall(PetscViewerHDF5CheckTimestepping_Internal(viewer, name));
40   PetscCall(PetscNew(&h));
41   h->name = name;
42   PetscCall(PetscViewerHDF5OpenGroup(viewer, NULL, &h->file, &h->group));
43   PetscCallHDF5Return(h->dataset, H5Dopen2, (h->group, name, H5P_DEFAULT));
44   PetscCallHDF5Return(h->dataspace, H5Dget_space, (h->dataset));
45   PetscCall(PetscViewerHDF5ReadAttribute(viewer, name, "complex", PETSC_BOOL, &h->complexVal, &h->complexVal));
46   if (!hdf5->horizontal) {
47     /* MATLAB stores column vectors horizontally */
48     PetscCall(PetscViewerHDF5HasAttribute(viewer, name, "MATLAB_class", &hdf5->horizontal));
49   }
50   h->runs = 0;
51   h->cind = NULL;
52   *ctx    = h;
53   PetscFunctionReturn(PETSC_SUCCESS);
54 }
55 
56 static PetscErrorCode PetscViewerHDF5ReadFinalize_Private(PetscViewer viewer, HDF5ReadCtx *ctx)
57 {
58   HDF5ReadCtx h;
59 
60   PetscFunctionBegin;
61   h = *ctx;
62   PetscCallHDF5(H5Gclose, (h->group));
63   PetscCallHDF5(H5Sclose, (h->dataspace));
64   PetscCallHDF5(H5Dclose, (h->dataset));
65   PetscCall(PetscFree((*ctx)->dims));
66   PetscCall(PetscFree((*ctx)->cind));
67   PetscCall(PetscFree(*ctx));
68   PetscFunctionReturn(PETSC_SUCCESS);
69 }
70 
71 // Need forward declaration because we have a cyclic call chain
72 static PetscErrorCode PetscViewerHDF5Load_Internal(PetscViewer, const char[], PetscBool, PetscLayout, hid_t, void **);
73 
74 static PetscErrorCode PetscViewerHDF5ReadSizes_Private(PetscViewer viewer, HDF5ReadCtx ctx, PetscBool uncompress, PetscBool setup, PetscLayout *map_)
75 {
76   PetscViewer_HDF5 *hdf5 = (PetscViewer_HDF5 *)viewer->data;
77   PetscInt          bs, N;
78   PetscLayout       map;
79   PetscBool         compressed;
80 
81   PetscFunctionBegin;
82   if (!*map_) PetscCall(PetscLayoutCreate(PetscObjectComm((PetscObject)viewer), map_));
83   map = *map_;
84 
85   PetscCall(PetscViewerHDF5HasAttribute(viewer, ctx->name, "compressed", &compressed));
86   if (compressed && uncompress) {
87     hid_t           inttype;
88     PetscLayout     cmap;
89     PetscInt       *lcind, N = 0;
90     PetscMPIInt    *counts, *displs, size, n;
91     const PetscInt *range;
92     MPI_Comm        comm;
93 
94 #if defined(PETSC_USE_64BIT_INDICES)
95     inttype = H5T_NATIVE_LLONG;
96 #else
97     inttype = H5T_NATIVE_INT;
98 #endif
99     PetscCall(PetscObjectGetComm((PetscObject)viewer, &comm));
100     PetscCall(PetscLayoutCreate(PetscObjectComm((PetscObject)viewer), &cmap));
101     cmap->bs = 3;
102     PetscCall(PetscViewerHDF5Load_Internal(viewer, ctx->name, PETSC_FALSE, cmap, inttype, (void **)&lcind));
103     PetscCheck(!(cmap->n % 3), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Compressed IS must have an even number of entries, not %" PetscInt_FMT, cmap->n);
104     for (PetscInt i = 0; i < cmap->n / 3; ++i) N += lcind[i * 3 + 0];
105     PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &N, 1, MPIU_INT, MPIU_SUM, comm));
106     ctx->runs = cmap->N / 3;
107     PetscCall(PetscMalloc1(cmap->N, &ctx->cind));
108     PetscCallMPI(MPI_Comm_size(comm, &size));
109     PetscCall(PetscLayoutGetRanges(cmap, &range));
110     PetscCall(PetscMalloc2(size, &counts, size, &displs));
111     for (PetscInt r = 0; r < size; ++r) {
112       PetscCall(PetscMPIIntCast(range[r + 1] - range[r], &counts[r]));
113       PetscCall(PetscMPIIntCast(range[r], &displs[r]));
114     }
115     PetscCall(PetscMPIIntCast(cmap->n, &n));
116     PetscCallMPI(MPI_Allgatherv(lcind, n, MPIU_INT, ctx->cind, counts, displs, MPIU_INT, comm));
117     PetscCall(PetscFree2(counts, displs));
118     PetscCall(PetscFree(lcind));
119     PetscCall(PetscLayoutDestroy(&cmap));
120 
121     ctx->dim2   = PETSC_FALSE;
122     ctx->rdim   = 1;
123     ctx->lenInd = 0;
124     PetscCall(PetscMalloc1(ctx->rdim, &ctx->dims));
125     ctx->dims[0] = N;
126     bs           = 1;
127     goto layout;
128   }
129 
130   /* Get actual number of dimensions in dataset */
131   PetscCallHDF5Return(ctx->rdim, H5Sget_simple_extent_dims, (ctx->dataspace, NULL, NULL));
132   PetscCall(PetscMalloc1(ctx->rdim, &ctx->dims));
133   PetscCallHDF5Return(ctx->rdim, H5Sget_simple_extent_dims, (ctx->dataspace, ctx->dims, NULL));
134 
135   /*
136      Dimensions are in this order:
137      [0]        timesteps (optional)
138      [lenInd]   entries (numbers or blocks)
139      ...
140      [bsInd]    entries of blocks (optional)
141      [bsInd+1]  real & imaginary part (optional)
142       = rdim-1
143    */
144 
145   /* Get entries dimension index */
146   ctx->lenInd = 0;
147   if (hdf5->timestepping) ++ctx->lenInd;
148 
149   /* Get block dimension index */
150   if (ctx->complexVal) {
151     ctx->bsInd      = ctx->rdim - 2;
152     ctx->complexInd = ctx->rdim - 1;
153   } else {
154     ctx->bsInd      = ctx->rdim - 1;
155     ctx->complexInd = -1;
156   }
157   PetscCheck(ctx->lenInd <= ctx->bsInd, PetscObjectComm((PetscObject)viewer), PETSC_ERR_PLIB, "Calculated block dimension index = %d < %d = length dimension index.", ctx->bsInd, ctx->lenInd);
158   PetscCheck(ctx->bsInd <= ctx->rdim - 1, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Calculated block dimension index = %d > %d = total number of dimensions - 1.", ctx->bsInd, ctx->rdim - 1);
159   PetscCheck(!ctx->complexVal || ctx->dims[ctx->complexInd] == 2, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Complex numbers must have exactly 2 parts (%" PRIuHSIZE ")", ctx->dims[ctx->complexInd]);
160 
161   if (hdf5->horizontal) {
162     /* support horizontal 1D arrays (MATLAB vectors) - swap meaning of blocks and entries */
163     int t       = ctx->lenInd;
164     ctx->lenInd = ctx->bsInd;
165     ctx->bsInd  = t;
166   }
167 
168   /* Get block size */
169   ctx->dim2 = PETSC_FALSE;
170   if (ctx->lenInd == ctx->bsInd) {
171     bs = 1; /* support vectors stored as 1D array */
172   } else {
173     bs = (PetscInt)ctx->dims[ctx->bsInd];
174     if (bs == 1) ctx->dim2 = PETSC_TRUE; /* vector with blocksize of 1, still stored as 2D array */
175   }
176 
177 layout:
178   /* Get global size */
179   PetscCall(PetscIntCast(bs * ctx->dims[ctx->lenInd], &N));
180 
181   /* Set global size, blocksize and type if not yet set */
182   if (map->bs < 0) {
183     PetscCall(PetscLayoutSetBlockSize(map, bs));
184   } else PetscCheck(map->bs == bs, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Block size of array in file is %" PetscInt_FMT ", not %" PetscInt_FMT " as expected", bs, map->bs);
185   if (map->N < 0) {
186     PetscCall(PetscLayoutSetSize(map, N));
187   } else PetscCheck(map->N == N, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Global size of array %s in file is %" PetscInt_FMT ", not %" PetscInt_FMT " as expected", ctx->name, N, map->N);
188   if (setup) PetscCall(PetscLayoutSetUp(map));
189   PetscFunctionReturn(PETSC_SUCCESS);
190 }
191 
192 static PetscErrorCode PetscViewerHDF5ReadSelectHyperslab_Private(PetscViewer viewer, HDF5ReadCtx ctx, PetscLayout map, hid_t *memspace)
193 {
194   PetscViewer_HDF5 *hdf5 = (PetscViewer_HDF5 *)viewer->data;
195   hsize_t          *count, *offset;
196   PetscInt          bs, n, low;
197   int               i;
198 
199   PetscFunctionBegin;
200   /* Compute local size and ownership range */
201   PetscCall(PetscLayoutSetUp(map));
202   PetscCall(PetscLayoutGetBlockSize(map, &bs));
203   PetscCall(PetscLayoutGetLocalSize(map, &n));
204   PetscCall(PetscLayoutGetRange(map, &low, NULL));
205 
206   /* Each process defines a dataset and reads it from the hyperslab in the file */
207   PetscCall(PetscMalloc2(ctx->rdim, &count, ctx->rdim, &offset));
208   for (i = 0; i < ctx->rdim; i++) {
209     /* By default, select all entries with no offset */
210     offset[i] = 0;
211     count[i]  = ctx->dims[i];
212   }
213   if (hdf5->timestepping) {
214     count[0]  = 1;
215     offset[0] = hdf5->timestep;
216   }
217   {
218     PetscCall(PetscHDF5IntCast(n / bs, &count[ctx->lenInd]));
219     PetscCall(PetscHDF5IntCast(low / bs, &offset[ctx->lenInd]));
220   }
221   PetscCallHDF5Return(*memspace, H5Screate_simple, (ctx->rdim, count, NULL));
222   PetscCallHDF5(H5Sselect_hyperslab, (ctx->dataspace, H5S_SELECT_SET, offset, NULL, count, NULL));
223   PetscCall(PetscFree2(count, offset));
224   PetscFunctionReturn(PETSC_SUCCESS);
225 }
226 
227 static PetscErrorCode PetscViewerHDF5ReadArray_Private(PetscViewer viewer, HDF5ReadCtx h, hid_t datatype, hid_t memspace, void *arr)
228 {
229   PetscViewer_HDF5 *hdf5 = (PetscViewer_HDF5 *)viewer->data;
230 
231   PetscFunctionBegin;
232   PetscCallHDF5(H5Dread, (h->dataset, datatype, memspace, h->dataspace, hdf5->dxpl_id, arr));
233   PetscFunctionReturn(PETSC_SUCCESS);
234 }
235 
236 static PetscErrorCode PetscViewerHDF5Load_Internal(PetscViewer viewer, const char name[], PetscBool uncompress, PetscLayout map, hid_t datatype, void **newarr)
237 {
238   PetscBool   has;
239   char       *group;
240   HDF5ReadCtx h        = NULL;
241   hid_t       memspace = 0;
242   size_t      unitsize;
243   void       *arr;
244 
245   PetscFunctionBegin;
246   PetscCall(PetscViewerHDF5GetGroup(viewer, NULL, &group));
247   PetscCall(PetscViewerHDF5HasDataset(viewer, name, &has));
248   PetscCheck(has, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Object (dataset) \"%s\" not stored in group %s", name, group);
249   PetscCall(PetscViewerHDF5ReadInitialize_Private(viewer, name, &h));
250 #if defined(PETSC_USE_COMPLEX)
251   if (!h->complexVal) {
252     H5T_class_t clazz = H5Tget_class(datatype);
253     PetscCheck(clazz != H5T_FLOAT, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Dataset %s/%s is marked as real but PETSc is configured for complex scalars. The conversion is not yet implemented. Configure with --with-scalar-type=real to read this dataset", group ? group : "", name);
254   }
255 #else
256   PetscCheck(!h->complexVal, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Dataset %s/%s is marked as complex but PETSc is configured for real scalars. Configure with --with-scalar-type=complex to read this dataset", group, name);
257 #endif
258 
259   PetscCall(PetscViewerHDF5ReadSizes_Private(viewer, h, uncompress, PETSC_TRUE, &map));
260   PetscCall(PetscViewerHDF5ReadSelectHyperslab_Private(viewer, h, map, &memspace));
261 
262   if (h->runs && uncompress) {
263     PetscInt *ind;
264 
265     PetscCall(PetscInfo(viewer, "Read compressed object with name %s of size %" PetscInt_FMT ":%" PetscInt_FMT "\n", name, map->n, map->N));
266     // Each process stores the whole compression, so skip any leading parts
267     PetscCall(PetscMalloc1(map->n, &ind));
268     for (PetscInt i = 0, off = 0; i < h->runs; ++i) {
269       for (PetscInt j = 0, inc = 0; j < h->cind[i * 3 + 0]; ++j, ++off, inc += h->cind[i * 3 + 1]) {
270         if (off >= map->rend) {
271           i = h->runs;
272           break;
273         }
274         if (off >= map->rstart) ind[off - map->rstart] = h->cind[i * 3 + 2] + inc;
275       }
276     }
277     *newarr = ind;
278     goto cleanup;
279   }
280 
281   unitsize = H5Tget_size(datatype);
282   if (h->complexVal) unitsize *= 2;
283   /* unitsize is size_t i.e. always unsigned, so the negative check is pointless? */
284   PetscCheck(unitsize > 0 && unitsize <= PetscMax(sizeof(PetscInt), sizeof(PetscScalar)), PETSC_COMM_SELF, PETSC_ERR_LIB, "Sanity check failed: HDF5 function H5Tget_size(datatype) returned suspicious value %zu", unitsize);
285   PetscCall(PetscMalloc(map->n * unitsize, &arr));
286 
287   PetscCall(PetscViewerHDF5ReadArray_Private(viewer, h, datatype, memspace, arr));
288   *newarr = arr;
289 
290 cleanup:
291   PetscCallHDF5(H5Sclose, (memspace));
292   PetscCall(PetscViewerHDF5ReadFinalize_Private(viewer, &h));
293   PetscCall(PetscFree(group));
294   PetscFunctionReturn(PETSC_SUCCESS);
295 }
296 
297 /*@C
298   PetscViewerHDF5Load - Read a raw array from the `PETSCVIEWERHDF5` dataset in parallel
299 
300   Collective; No Fortran Support
301 
302   Input Parameters:
303 + viewer   - The `PETSCVIEWERHDF5` viewer
304 . name     - The dataset name
305 - datatype - The HDF5 datatype of the items in the dataset
306 
307   Input/Output Parameter:
308 . map - The layout which specifies array partitioning, on output the
309              set up layout (with global size and blocksize according to dataset)
310 
311   Output Parameter:
312 . newarr - The partitioned array, a memory image of the given dataset
313 
314   Level: developer
315 
316   Notes:
317   This is intended mainly for internal use; users should use higher level routines such as `ISLoad()`, `VecLoad()`, `DMLoad()`.
318 
319   The array is partitioned according to the given `PetscLayout` which is converted to an HDF5 hyperslab.
320 
321   This name is relative to the current group returned by `PetscViewerHDF5OpenGroup()`.
322 
323 .seealso: `PetscViewer`, `PETSCVIEWERHDF5`, `PetscViewerHDF5Open()`, `PetscViewerHDF5PushGroup()`, `PetscViewerHDF5OpenGroup()`, `PetscViewerHDF5ReadSizes()`,
324           `VecLoad()`, `ISLoad()`, `PetscLayout`
325 @*/
326 PetscErrorCode PetscViewerHDF5Load(PetscViewer viewer, const char name[], PetscLayout map, hid_t datatype, void **newarr)
327 {
328   PetscFunctionBegin;
329   PetscCall(PetscViewerHDF5Load_Internal(viewer, name, PETSC_TRUE, map, datatype, newarr));
330   PetscFunctionReturn(PETSC_SUCCESS);
331 }
332 
333 /*@
334   PetscViewerHDF5ReadSizes - Read block size and global size of a `Vec` or `IS` stored in an HDF5 file.
335 
336   Input Parameters:
337 + viewer - The `PETSCVIEWERHDF5` viewer
338 - name   - The dataset name
339 
340   Output Parameters:
341 + bs - block size
342 - N  - global size
343 
344   Level: advanced
345 
346   Notes:
347   The dataset is stored as an HDF5 dataspace with 1-4 dimensions in the order
348   1) # timesteps (optional), 2) # blocks, 3) # elements per block (optional), 4) real and imaginary part (only for complex).
349 
350   The dataset can be stored as a 2D dataspace even if its blocksize is 1; see `PetscViewerHDF5SetBaseDimension2()`.
351 
352 .seealso: `PetscViewer`, `PETSCVIEWERHDF5`, `PetscViewerHDF5Open()`, `VecLoad()`, `ISLoad()`, `VecGetSize()`, `ISGetSize()`, `PetscViewerHDF5SetBaseDimension2()`
353 @*/
354 PetscErrorCode PetscViewerHDF5ReadSizes(PetscViewer viewer, const char name[], PetscInt *bs, PetscInt *N)
355 {
356   HDF5ReadCtx h   = NULL;
357   PetscLayout map = NULL;
358 
359   PetscFunctionBegin;
360   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 1);
361   PetscCall(PetscViewerHDF5ReadInitialize_Private(viewer, name, &h));
362   PetscCall(PetscViewerHDF5ReadSizes_Private(viewer, h, PETSC_TRUE, PETSC_FALSE, &map));
363   PetscCall(PetscViewerHDF5ReadFinalize_Private(viewer, &h));
364   if (bs) *bs = map->bs;
365   if (N) *N = map->N;
366   PetscCall(PetscLayoutDestroy(&map));
367   PetscFunctionReturn(PETSC_SUCCESS);
368 }
369