1 /*
2 Code for some of the parallel vector primitives.
3 */
4 #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/
5 #include <petsc/private/viewerhdf5impl.h>
6 #include <petsc/private/glvisviewerimpl.h>
7 #include <petsc/private/glvisvecimpl.h>
8 #include <petscsf.h>
9
VecResetPreallocationCOO_MPI(Vec v)10 static PetscErrorCode VecResetPreallocationCOO_MPI(Vec v)
11 {
12 Vec_MPI *vmpi = (Vec_MPI *)v->data;
13
14 PetscFunctionBegin;
15 if (vmpi) {
16 PetscCall(PetscFree(vmpi->jmap1));
17 PetscCall(PetscFree(vmpi->perm1));
18 PetscCall(PetscFree(vmpi->Cperm));
19 PetscCall(PetscFree4(vmpi->imap2, vmpi->jmap2, vmpi->sendbuf, vmpi->recvbuf));
20 PetscCall(PetscFree(vmpi->perm2));
21 PetscCall(PetscSFDestroy(&vmpi->coo_sf));
22 }
23 PetscFunctionReturn(PETSC_SUCCESS);
24 }
25
VecDestroy_MPI(Vec v)26 PetscErrorCode VecDestroy_MPI(Vec v)
27 {
28 Vec_MPI *x = (Vec_MPI *)v->data;
29
30 PetscFunctionBegin;
31 PetscCall(PetscLogObjectState((PetscObject)v, "Length=%" PetscInt_FMT, v->map->N));
32 if (!x) PetscFunctionReturn(PETSC_SUCCESS);
33 PetscCall(PetscFree(x->array_allocated));
34
35 /* Destroy local representation of vector if it exists */
36 if (x->localrep) {
37 PetscCall(VecDestroy(&x->localrep));
38 PetscCall(VecScatterDestroy(&x->localupdate));
39 PetscCall(ISDestroy(&x->ghost));
40 }
41 PetscCall(VecAssemblyReset_MPI(v));
42
43 /* Destroy the stashes: note the order - so that the tags are freed properly */
44 PetscCall(VecStashDestroy_Private(&v->bstash));
45 PetscCall(VecStashDestroy_Private(&v->stash));
46
47 PetscCall(VecResetPreallocationCOO_MPI(v));
48 PetscCall(PetscObjectComposeFunction((PetscObject)v, "PetscMatlabEnginePut_C", NULL));
49 PetscCall(PetscObjectComposeFunction((PetscObject)v, "PetscMatlabEngineGet_C", NULL));
50 PetscCall(PetscFree(v->data));
51 PetscFunctionReturn(PETSC_SUCCESS);
52 }
53
VecView_MPI_ASCII(Vec xin,PetscViewer viewer)54 static PetscErrorCode VecView_MPI_ASCII(Vec xin, PetscViewer viewer)
55 {
56 PetscInt i, work = xin->map->n, cnt, nLen;
57 PetscMPIInt j, n = 0, size, rank, tag = ((PetscObject)viewer)->tag, len;
58 MPI_Status status;
59 PetscScalar *values;
60 const PetscScalar *xarray;
61 const char *name;
62 PetscViewerFormat format;
63
64 PetscFunctionBegin;
65 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)xin), &size));
66 PetscCall(PetscViewerGetFormat(viewer, &format));
67 if (format == PETSC_VIEWER_LOAD_BALANCE) {
68 PetscInt nmax = 0, nmin = xin->map->n, navg;
69 for (PetscMPIInt i = 0; i < size; i++) {
70 nmax = PetscMax(nmax, xin->map->range[i + 1] - xin->map->range[i]);
71 nmin = PetscMin(nmin, xin->map->range[i + 1] - xin->map->range[i]);
72 }
73 navg = xin->map->N / size;
74 PetscCall(PetscViewerASCIIPrintf(viewer, " Load Balance - Local vector size Min %" PetscInt_FMT " avg %" PetscInt_FMT " max %" PetscInt_FMT "\n", nmin, navg, nmax));
75 PetscFunctionReturn(PETSC_SUCCESS);
76 }
77
78 PetscCall(VecGetArrayRead(xin, &xarray));
79 /* determine maximum message to arrive */
80 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)xin), &rank));
81 PetscCallMPI(MPI_Reduce(rank ? &work : MPI_IN_PLACE, &work, 1, MPIU_INT, MPI_MAX, 0, PetscObjectComm((PetscObject)xin)));
82 PetscCall(PetscMPIIntCast(work, &len));
83 if (format == PETSC_VIEWER_ASCII_GLVIS) rank = 0, len = 0; /* no parallel distributed write support from GLVis */
84 if (rank == 0) {
85 PetscCall(PetscMalloc1(len, &values));
86 /*
87 MATLAB format and ASCII format are very similar except
88 MATLAB uses %18.16e format while ASCII uses %g
89 */
90 if (format == PETSC_VIEWER_ASCII_MATLAB) {
91 PetscCall(PetscObjectGetName((PetscObject)xin, &name));
92 PetscCall(PetscViewerASCIIPrintf(viewer, "%s = [\n", name));
93 for (i = 0; i < xin->map->n; i++) {
94 #if defined(PETSC_USE_COMPLEX)
95 if (PetscImaginaryPart(xarray[i]) > 0.0) {
96 PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e + %18.16ei\n", (double)PetscRealPart(xarray[i]), (double)PetscImaginaryPart(xarray[i])));
97 } else if (PetscImaginaryPart(xarray[i]) < 0.0) {
98 PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e - %18.16ei\n", (double)PetscRealPart(xarray[i]), -(double)PetscImaginaryPart(xarray[i])));
99 } else {
100 PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e\n", (double)PetscRealPart(xarray[i])));
101 }
102 #else
103 PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e\n", (double)xarray[i]));
104 #endif
105 }
106 /* receive and print messages */
107 for (j = 1; j < size; j++) {
108 PetscCallMPI(MPI_Recv(values, len, MPIU_SCALAR, j, tag, PetscObjectComm((PetscObject)xin), &status));
109 PetscCallMPI(MPI_Get_count(&status, MPIU_SCALAR, &n));
110 for (i = 0; i < n; i++) {
111 #if defined(PETSC_USE_COMPLEX)
112 if (PetscImaginaryPart(values[i]) > 0.0) {
113 PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e + %18.16ei\n", (double)PetscRealPart(values[i]), (double)PetscImaginaryPart(values[i])));
114 } else if (PetscImaginaryPart(values[i]) < 0.0) {
115 PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e - %18.16ei\n", (double)PetscRealPart(values[i]), -(double)PetscImaginaryPart(values[i])));
116 } else {
117 PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e\n", (double)PetscRealPart(values[i])));
118 }
119 #else
120 PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e\n", (double)values[i]));
121 #endif
122 }
123 }
124 PetscCall(PetscViewerASCIIPrintf(viewer, "];\n"));
125
126 } else if (format == PETSC_VIEWER_ASCII_SYMMODU) {
127 for (i = 0; i < xin->map->n; i++) {
128 #if defined(PETSC_USE_COMPLEX)
129 PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e %18.16e\n", (double)PetscRealPart(xarray[i]), (double)PetscImaginaryPart(xarray[i])));
130 #else
131 PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e\n", (double)xarray[i]));
132 #endif
133 }
134 /* receive and print messages */
135 for (j = 1; j < size; j++) {
136 PetscCallMPI(MPI_Recv(values, len, MPIU_SCALAR, j, tag, PetscObjectComm((PetscObject)xin), &status));
137 PetscCallMPI(MPI_Get_count(&status, MPIU_SCALAR, &n));
138 for (i = 0; i < n; i++) {
139 #if defined(PETSC_USE_COMPLEX)
140 PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e %18.16e\n", (double)PetscRealPart(values[i]), (double)PetscImaginaryPart(values[i])));
141 #else
142 PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e\n", (double)values[i]));
143 #endif
144 }
145 }
146 } else if (format == PETSC_VIEWER_ASCII_PCICE) {
147 PetscInt bs, b, vertexCount = 1;
148
149 PetscCall(VecGetLocalSize(xin, &nLen));
150 PetscCall(PetscMPIIntCast(nLen, &n));
151 PetscCall(VecGetBlockSize(xin, &bs));
152 PetscCheck(bs >= 1 && bs <= 3, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "PCICE can only handle up to 3D objects, but vector dimension is %" PetscInt_FMT, bs);
153
154 PetscCall(PetscViewerASCIIPrintf(viewer, "%" PetscInt_FMT "\n", xin->map->N / bs));
155 for (i = 0; i < n / bs; i++) {
156 PetscCall(PetscViewerASCIIPrintf(viewer, "%7" PetscInt_FMT " ", vertexCount++));
157 for (b = 0; b < bs; b++) {
158 if (b > 0) PetscCall(PetscViewerASCIIPrintf(viewer, " "));
159 #if !defined(PETSC_USE_COMPLEX)
160 PetscCall(PetscViewerASCIIPrintf(viewer, "% 12.5E", (double)xarray[i * bs + b]));
161 #endif
162 }
163 PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
164 }
165 for (j = 1; j < size; j++) {
166 PetscCallMPI(MPI_Recv(values, len, MPIU_SCALAR, j, tag, PetscObjectComm((PetscObject)xin), &status));
167 PetscCallMPI(MPI_Get_count(&status, MPIU_SCALAR, &n));
168 for (i = 0; i < n / bs; i++) {
169 PetscCall(PetscViewerASCIIPrintf(viewer, "%7" PetscInt_FMT " ", vertexCount++));
170 for (b = 0; b < bs; b++) {
171 if (b > 0) PetscCall(PetscViewerASCIIPrintf(viewer, " "));
172 #if !defined(PETSC_USE_COMPLEX)
173 PetscCall(PetscViewerASCIIPrintf(viewer, "% 12.5E", (double)values[i * bs + b]));
174 #endif
175 }
176 PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
177 }
178 }
179 } else if (format == PETSC_VIEWER_ASCII_GLVIS) {
180 /* GLVis ASCII visualization/dump: this function mimics mfem::GridFunction::Save() */
181 const PetscScalar *array;
182 PetscInt i, n, vdim, ordering = 1; /* mfem::FiniteElementSpace::Ordering::byVDIM */
183 PetscContainer glvis_container;
184 PetscViewerGLVisVecInfo glvis_vec_info;
185 PetscViewerGLVisInfo glvis_info;
186
187 /* mfem::FiniteElementSpace::Save() */
188 PetscCall(VecGetBlockSize(xin, &vdim));
189 PetscCall(PetscViewerASCIIPrintf(viewer, "FiniteElementSpace\n"));
190 PetscCall(PetscObjectQuery((PetscObject)xin, "_glvis_info_container", (PetscObject *)&glvis_container));
191 PetscCheck(glvis_container, PetscObjectComm((PetscObject)xin), PETSC_ERR_PLIB, "Missing GLVis container");
192 PetscCall(PetscContainerGetPointer(glvis_container, &glvis_vec_info));
193 PetscCall(PetscViewerASCIIPrintf(viewer, "%s\n", glvis_vec_info->fec_type));
194 PetscCall(PetscViewerASCIIPrintf(viewer, "VDim: %" PetscInt_FMT "\n", vdim));
195 PetscCall(PetscViewerASCIIPrintf(viewer, "Ordering: %" PetscInt_FMT "\n", ordering));
196 PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
197 /* mfem::Vector::Print() */
198 PetscCall(PetscObjectQuery((PetscObject)viewer, "_glvis_info_container", (PetscObject *)&glvis_container));
199 PetscCheck(glvis_container, PetscObjectComm((PetscObject)viewer), PETSC_ERR_PLIB, "Missing GLVis container");
200 PetscCall(PetscContainerGetPointer(glvis_container, &glvis_info));
201 if (glvis_info->enabled) {
202 PetscCall(VecGetLocalSize(xin, &n));
203 PetscCall(VecGetArrayRead(xin, &array));
204 for (i = 0; i < n; i++) {
205 PetscCall(PetscViewerASCIIPrintf(viewer, glvis_info->fmt, (double)PetscRealPart(array[i])));
206 PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
207 }
208 PetscCall(VecRestoreArrayRead(xin, &array));
209 }
210 } else if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
211 /* No info */
212 } else {
213 if (format != PETSC_VIEWER_ASCII_COMMON) PetscCall(PetscViewerASCIIPrintf(viewer, "Process [%d]\n", rank));
214 cnt = 0;
215 for (i = 0; i < xin->map->n; i++) {
216 if (format == PETSC_VIEWER_ASCII_INDEX) PetscCall(PetscViewerASCIIPrintf(viewer, "%" PetscInt_FMT ": ", cnt++));
217 #if defined(PETSC_USE_COMPLEX)
218 if (PetscImaginaryPart(xarray[i]) > 0.0) {
219 PetscCall(PetscViewerASCIIPrintf(viewer, "%g + %g i\n", (double)PetscRealPart(xarray[i]), (double)PetscImaginaryPart(xarray[i])));
220 } else if (PetscImaginaryPart(xarray[i]) < 0.0) {
221 PetscCall(PetscViewerASCIIPrintf(viewer, "%g - %g i\n", (double)PetscRealPart(xarray[i]), -(double)PetscImaginaryPart(xarray[i])));
222 } else {
223 PetscCall(PetscViewerASCIIPrintf(viewer, "%g\n", (double)PetscRealPart(xarray[i])));
224 }
225 #else
226 PetscCall(PetscViewerASCIIPrintf(viewer, "%g\n", (double)xarray[i]));
227 #endif
228 }
229 /* receive and print messages */
230 for (j = 1; j < size; j++) {
231 PetscCallMPI(MPI_Recv(values, len, MPIU_SCALAR, j, tag, PetscObjectComm((PetscObject)xin), &status));
232 PetscCallMPI(MPI_Get_count(&status, MPIU_SCALAR, &n));
233 if (format != PETSC_VIEWER_ASCII_COMMON) PetscCall(PetscViewerASCIIPrintf(viewer, "Process [%d]\n", j));
234 for (i = 0; i < n; i++) {
235 if (format == PETSC_VIEWER_ASCII_INDEX) PetscCall(PetscViewerASCIIPrintf(viewer, "%" PetscInt_FMT ": ", cnt++));
236 #if defined(PETSC_USE_COMPLEX)
237 if (PetscImaginaryPart(values[i]) > 0.0) {
238 PetscCall(PetscViewerASCIIPrintf(viewer, "%g + %g i\n", (double)PetscRealPart(values[i]), (double)PetscImaginaryPart(values[i])));
239 } else if (PetscImaginaryPart(values[i]) < 0.0) {
240 PetscCall(PetscViewerASCIIPrintf(viewer, "%g - %g i\n", (double)PetscRealPart(values[i]), -(double)PetscImaginaryPart(values[i])));
241 } else {
242 PetscCall(PetscViewerASCIIPrintf(viewer, "%g\n", (double)PetscRealPart(values[i])));
243 }
244 #else
245 PetscCall(PetscViewerASCIIPrintf(viewer, "%g\n", (double)values[i]));
246 #endif
247 }
248 }
249 }
250 PetscCall(PetscFree(values));
251 } else {
252 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
253 /* Rank 0 is not trying to receive anything, so don't send anything */
254 } else {
255 if (format == PETSC_VIEWER_ASCII_MATLAB) {
256 /* this may be a collective operation so make sure everyone calls it */
257 PetscCall(PetscObjectGetName((PetscObject)xin, &name));
258 }
259 PetscCallMPI(MPIU_Send((void *)xarray, xin->map->n, MPIU_SCALAR, 0, tag, PetscObjectComm((PetscObject)xin)));
260 }
261 }
262 PetscCall(PetscViewerFlush(viewer));
263 PetscCall(VecRestoreArrayRead(xin, &xarray));
264 PetscFunctionReturn(PETSC_SUCCESS);
265 }
266
VecView_MPI_Binary(Vec xin,PetscViewer viewer)267 PetscErrorCode VecView_MPI_Binary(Vec xin, PetscViewer viewer)
268 {
269 return VecView_Binary(xin, viewer);
270 }
271
272 #include <petscdraw.h>
VecView_MPI_Draw_LG(Vec xin,PetscViewer viewer)273 PetscErrorCode VecView_MPI_Draw_LG(Vec xin, PetscViewer viewer)
274 {
275 PetscDraw draw;
276 PetscBool isnull;
277 PetscDrawLG lg;
278 PetscMPIInt i, size, rank, n, N, *lens = NULL, *disp = NULL;
279 PetscReal *values, *xx = NULL, *yy = NULL;
280 const PetscScalar *xarray;
281 int colors[] = {PETSC_DRAW_RED};
282
283 PetscFunctionBegin;
284 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
285 PetscCall(PetscDrawIsNull(draw, &isnull));
286 if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
287 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)xin), &rank));
288 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)xin), &size));
289 PetscCall(PetscMPIIntCast(xin->map->n, &n));
290 PetscCall(PetscMPIIntCast(xin->map->N, &N));
291
292 PetscCall(VecGetArrayRead(xin, &xarray));
293 #if defined(PETSC_USE_COMPLEX)
294 PetscCall(PetscMalloc1(n + 1, &values));
295 for (i = 0; i < n; i++) values[i] = PetscRealPart(xarray[i]);
296 #else
297 values = (PetscReal *)xarray;
298 #endif
299 if (rank == 0) {
300 PetscCall(PetscMalloc2(N, &xx, N, &yy));
301 for (i = 0; i < N; i++) xx[i] = (PetscReal)i;
302 PetscCall(PetscMalloc2(size, &lens, size, &disp));
303 for (i = 0; i < size; i++) PetscCall(PetscMPIIntCast(xin->map->range[i + 1] - xin->map->range[i], &lens[i]));
304 for (i = 0; i < size; i++) PetscCall(PetscMPIIntCast(xin->map->range[i], &disp[i]));
305 }
306 PetscCallMPI(MPI_Gatherv(values, n, MPIU_REAL, yy, lens, disp, MPIU_REAL, 0, PetscObjectComm((PetscObject)xin)));
307 PetscCall(PetscFree2(lens, disp));
308 #if defined(PETSC_USE_COMPLEX)
309 PetscCall(PetscFree(values));
310 #endif
311 PetscCall(VecRestoreArrayRead(xin, &xarray));
312
313 PetscCall(PetscViewerDrawGetDrawLG(viewer, 0, &lg));
314 PetscCall(PetscDrawLGReset(lg));
315 PetscCall(PetscDrawLGSetDimension(lg, 1));
316 PetscCall(PetscDrawLGSetColors(lg, colors));
317 if (rank == 0) {
318 PetscCall(PetscDrawLGAddPoints(lg, N, &xx, &yy));
319 PetscCall(PetscFree2(xx, yy));
320 }
321 PetscCall(PetscDrawLGDraw(lg));
322 PetscCall(PetscDrawLGSave(lg));
323 PetscFunctionReturn(PETSC_SUCCESS);
324 }
325
VecView_MPI_Draw(Vec xin,PetscViewer viewer)326 PETSC_INTERN PetscErrorCode VecView_MPI_Draw(Vec xin, PetscViewer viewer)
327 {
328 PetscMPIInt rank, size, tag = ((PetscObject)viewer)->tag;
329 PetscInt i, start, end;
330 MPI_Status status;
331 PetscReal min, max, tmp = 0.0;
332 PetscDraw draw;
333 PetscBool isnull;
334 PetscDrawAxis axis;
335 const PetscScalar *xarray;
336
337 PetscFunctionBegin;
338 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
339 PetscCall(PetscDrawIsNull(draw, &isnull));
340 if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
341 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)xin), &size));
342 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)xin), &rank));
343
344 PetscCall(VecMin(xin, NULL, &min));
345 PetscCall(VecMax(xin, NULL, &max));
346 if (min == max) {
347 min -= 1.e-5;
348 max += 1.e-5;
349 }
350
351 PetscCall(PetscDrawCheckResizedWindow(draw));
352 PetscCall(PetscDrawClear(draw));
353
354 PetscCall(PetscDrawAxisCreate(draw, &axis));
355 PetscCall(PetscDrawAxisSetLimits(axis, 0.0, (PetscReal)xin->map->N, min, max));
356 PetscCall(PetscDrawAxisDraw(axis));
357 PetscCall(PetscDrawAxisDestroy(&axis));
358
359 /* draw local part of vector */
360 PetscCall(VecGetArrayRead(xin, &xarray));
361 PetscCall(VecGetOwnershipRange(xin, &start, &end));
362 if (rank < size - 1) { /* send value to right */
363 PetscCallMPI(MPI_Send((void *)&xarray[xin->map->n - 1], 1, MPIU_REAL, rank + 1, tag, PetscObjectComm((PetscObject)xin)));
364 }
365 if (rank) { /* receive value from right */
366 PetscCallMPI(MPI_Recv(&tmp, 1, MPIU_REAL, rank - 1, tag, PetscObjectComm((PetscObject)xin), &status));
367 }
368 PetscDrawCollectiveBegin(draw);
369 if (rank) PetscCall(PetscDrawLine(draw, (PetscReal)start - 1, tmp, (PetscReal)start, PetscRealPart(xarray[0]), PETSC_DRAW_RED));
370 for (i = 1; i < xin->map->n; i++) PetscCall(PetscDrawLine(draw, (PetscReal)(i - 1 + start), PetscRealPart(xarray[i - 1]), (PetscReal)(i + start), PetscRealPart(xarray[i]), PETSC_DRAW_RED));
371 PetscDrawCollectiveEnd(draw);
372 PetscCall(VecRestoreArrayRead(xin, &xarray));
373
374 PetscCall(PetscDrawFlush(draw));
375 PetscCall(PetscDrawPause(draw));
376 PetscCall(PetscDrawSave(draw));
377 PetscFunctionReturn(PETSC_SUCCESS);
378 }
379
380 #if defined(PETSC_HAVE_MATLAB)
VecView_MPI_Matlab(Vec xin,PetscViewer viewer)381 PetscErrorCode VecView_MPI_Matlab(Vec xin, PetscViewer viewer)
382 {
383 PetscMPIInt rank, size, *lens;
384 PetscInt i, N = xin->map->N;
385 const PetscScalar *xarray;
386 PetscScalar *xx;
387
388 PetscFunctionBegin;
389 PetscCall(VecGetArrayRead(xin, &xarray));
390 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)xin), &rank));
391 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)xin), &size));
392 if (rank == 0) {
393 PetscCall(PetscMalloc1(N, &xx));
394 PetscCall(PetscMalloc1(size, &lens));
395 for (i = 0; i < size; i++) lens[i] = xin->map->range[i + 1] - xin->map->range[i];
396
397 PetscCallMPI(MPI_Gatherv((void *)xarray, xin->map->n, MPIU_SCALAR, xx, lens, xin->map->range, MPIU_SCALAR, 0, PetscObjectComm((PetscObject)xin)));
398 PetscCall(PetscFree(lens));
399
400 PetscCall(PetscObjectName((PetscObject)xin));
401 PetscCall(PetscViewerMatlabPutArray(viewer, N, 1, xx, ((PetscObject)xin)->name));
402
403 PetscCall(PetscFree(xx));
404 } else {
405 PetscCallMPI(MPI_Gatherv((void *)xarray, xin->map->n, MPIU_SCALAR, 0, 0, 0, MPIU_SCALAR, 0, PetscObjectComm((PetscObject)xin)));
406 }
407 PetscCall(VecRestoreArrayRead(xin, &xarray));
408 PetscFunctionReturn(PETSC_SUCCESS);
409 }
410 #endif
411
412 #if defined(PETSC_HAVE_ADIOS)
413 #include <adios.h>
414 #include <adios_read.h>
415 #include <petsc/private/vieweradiosimpl.h>
416 #include <petsc/private/viewerimpl.h>
417
VecView_MPI_ADIOS(Vec xin,PetscViewer viewer)418 PetscErrorCode VecView_MPI_ADIOS(Vec xin, PetscViewer viewer)
419 {
420 PetscViewer_ADIOS *adios = (PetscViewer_ADIOS *)viewer->data;
421 const char *vecname;
422 int64_t id;
423 PetscInt n, N, rstart;
424 const PetscScalar *array;
425 char nglobalname[16], nlocalname[16], coffset[16];
426
427 PetscFunctionBegin;
428 PetscCall(PetscObjectGetName((PetscObject)xin, &vecname));
429
430 PetscCall(VecGetLocalSize(xin, &n));
431 PetscCall(VecGetSize(xin, &N));
432 PetscCall(VecGetOwnershipRange(xin, &rstart, NULL));
433
434 PetscCall(PetscSNPrintf(nlocalname, PETSC_STATIC_ARRAY_LENGTH(nlocalname), "%" PetscInt_FMT, n));
435 PetscCall(PetscSNPrintf(nglobalname, PETSC_STATIC_ARRAY_LENGTH(nglobalname), "%" PetscInt_FMT, N));
436 PetscCall(PetscSNPrintf(coffset, PETSC_STATIC_ARRAY_LENGTH(coffset), "%" PetscInt_FMT, rstart));
437 id = adios_define_var(Petsc_adios_group, vecname, "", adios_double, nlocalname, nglobalname, coffset);
438 PetscCall(VecGetArrayRead(xin, &array));
439 PetscCallExternal(adios_write_byid, adios->adios_handle, id, array);
440 PetscCall(VecRestoreArrayRead(xin, &array));
441 PetscFunctionReturn(PETSC_SUCCESS);
442 }
443 #endif
444
445 #if defined(PETSC_HAVE_HDF5)
VecView_MPI_HDF5(Vec xin,PetscViewer viewer)446 PetscErrorCode VecView_MPI_HDF5(Vec xin, PetscViewer viewer)
447 {
448 PetscViewer_HDF5 *hdf5 = (PetscViewer_HDF5 *)viewer->data;
449 /* TODO: It looks like we can remove the H5Sclose(filespace) and H5Dget_space(dset_id). Why do we do this? */
450 hid_t filespace; /* file dataspace identifier */
451 hid_t chunkspace; /* chunk dataset property identifier */
452 hid_t dset_id; /* dataset identifier */
453 hid_t memspace; /* memory dataspace identifier */
454 hid_t file_id;
455 hid_t group;
456 hid_t memscalartype; /* scalar type for mem (H5T_NATIVE_FLOAT or H5T_NATIVE_DOUBLE) */
457 hid_t filescalartype; /* scalar type for file (H5T_NATIVE_FLOAT or H5T_NATIVE_DOUBLE) */
458 PetscInt bs = xin->map->bs;
459 hsize_t dim;
460 hsize_t maxDims[4], dims[4], chunkDims[4], count[4], offset[4];
461 PetscBool timestepping, dim2, spoutput;
462 PetscInt timestep = PETSC_INT_MIN, low;
463 hsize_t chunksize;
464 const PetscScalar *x;
465 const char *vecname;
466 size_t len;
467
468 PetscFunctionBegin;
469 PetscCall(PetscViewerHDF5OpenGroup(viewer, NULL, &file_id, &group));
470 PetscCall(PetscViewerHDF5IsTimestepping(viewer, ×tepping));
471 if (timestepping) PetscCall(PetscViewerHDF5GetTimestep(viewer, ×tep));
472 PetscCall(PetscViewerHDF5GetBaseDimension2(viewer, &dim2));
473 PetscCall(PetscViewerHDF5GetSPOutput(viewer, &spoutput));
474
475 /* Create the dataspace for the dataset.
476 *
477 * dims - holds the current dimensions of the dataset
478 *
479 * maxDims - holds the maximum dimensions of the dataset (unlimited
480 * for the number of time steps with the current dimensions for the
481 * other dimensions; so only additional time steps can be added).
482 *
483 * chunkDims - holds the size of a single time step (required to
484 * permit extending dataset).
485 */
486 dim = 0;
487 chunksize = 1;
488 if (timestep >= 0) {
489 dims[dim] = timestep + 1;
490 maxDims[dim] = H5S_UNLIMITED;
491 chunkDims[dim] = 1;
492 ++dim;
493 }
494 PetscCall(PetscHDF5IntCast(xin->map->N / bs, dims + dim));
495
496 maxDims[dim] = dims[dim];
497 chunkDims[dim] = PetscMax(1, dims[dim]);
498 chunksize *= chunkDims[dim];
499 ++dim;
500 if (bs > 1 || dim2) {
501 dims[dim] = bs;
502 maxDims[dim] = dims[dim];
503 chunkDims[dim] = PetscMax(1, dims[dim]);
504 chunksize *= chunkDims[dim];
505 ++dim;
506 }
507 #if defined(PETSC_USE_COMPLEX)
508 dims[dim] = 2;
509 maxDims[dim] = dims[dim];
510 chunkDims[dim] = PetscMax(1, dims[dim]);
511 chunksize *= chunkDims[dim];
512 /* hdf5 chunks must be less than 4GB */
513 if (chunksize > PETSC_HDF5_MAX_CHUNKSIZE / 64) {
514 if (bs > 1 || dim2) {
515 if (chunkDims[dim - 2] > (hsize_t)PetscSqrtReal((PetscReal)(PETSC_HDF5_MAX_CHUNKSIZE / 128))) chunkDims[dim - 2] = (hsize_t)PetscSqrtReal((PetscReal)(PETSC_HDF5_MAX_CHUNKSIZE / 128));
516 if (chunkDims[dim - 1] > (hsize_t)PetscSqrtReal((PetscReal)(PETSC_HDF5_MAX_CHUNKSIZE / 128))) chunkDims[dim - 1] = (hsize_t)PetscSqrtReal((PetscReal)(PETSC_HDF5_MAX_CHUNKSIZE / 128));
517 } else {
518 chunkDims[dim - 1] = PETSC_HDF5_MAX_CHUNKSIZE / 128;
519 }
520 }
521 ++dim;
522 #else
523 /* hdf5 chunks must be less than 4GB */
524 if (chunksize > PETSC_HDF5_MAX_CHUNKSIZE / 64) {
525 if (bs > 1 || dim2) {
526 if (chunkDims[dim - 2] > (hsize_t)PetscSqrtReal((PetscReal)(PETSC_HDF5_MAX_CHUNKSIZE / 64))) chunkDims[dim - 2] = (hsize_t)PetscSqrtReal((PetscReal)(PETSC_HDF5_MAX_CHUNKSIZE / 64));
527 if (chunkDims[dim - 1] > (hsize_t)PetscSqrtReal((PetscReal)(PETSC_HDF5_MAX_CHUNKSIZE / 64))) chunkDims[dim - 1] = (hsize_t)PetscSqrtReal((PetscReal)(PETSC_HDF5_MAX_CHUNKSIZE / 64));
528 } else {
529 chunkDims[dim - 1] = PETSC_HDF5_MAX_CHUNKSIZE / 64;
530 }
531 }
532 #endif
533
534 PetscCallHDF5Return(filespace, H5Screate_simple, ((int)dim, dims, maxDims));
535
536 #if defined(PETSC_USE_REAL_SINGLE)
537 memscalartype = H5T_NATIVE_FLOAT;
538 filescalartype = H5T_NATIVE_FLOAT;
539 #elif defined(PETSC_USE_REAL___FLOAT128)
540 #error "HDF5 output with 128 bit floats not supported."
541 #elif defined(PETSC_USE_REAL___FP16)
542 #error "HDF5 output with 16 bit floats not supported."
543 #else
544 memscalartype = H5T_NATIVE_DOUBLE;
545 if (spoutput == PETSC_TRUE) filescalartype = H5T_NATIVE_FLOAT;
546 else filescalartype = H5T_NATIVE_DOUBLE;
547 #endif
548
549 /* Create the dataset with default properties and close filespace */
550 PetscCall(PetscObjectGetName((PetscObject)xin, &vecname));
551 PetscCall(PetscStrlen(vecname, &len));
552 PetscCheck(len, PetscObjectComm((PetscObject)viewer), PETSC_ERR_ARG_WRONG, "Object must be named");
553 if (H5Lexists(group, vecname, H5P_DEFAULT) < 1) {
554 /* Create chunk */
555 PetscCallHDF5Return(chunkspace, H5Pcreate, (H5P_DATASET_CREATE));
556 PetscCallHDF5(H5Pset_chunk, (chunkspace, (int)dim, chunkDims));
557
558 PetscCallHDF5Return(dset_id, H5Dcreate2, (group, vecname, filescalartype, filespace, H5P_DEFAULT, chunkspace, H5P_DEFAULT));
559 PetscCallHDF5(H5Pclose, (chunkspace));
560 } else {
561 PetscCallHDF5Return(dset_id, H5Dopen2, (group, vecname, H5P_DEFAULT));
562 PetscCallHDF5(H5Dset_extent, (dset_id, dims));
563 }
564 PetscCallHDF5(H5Sclose, (filespace));
565
566 /* Each process defines a dataset and writes it to the hyperslab in the file */
567 dim = 0;
568 if (timestep >= 0) {
569 count[dim] = 1;
570 ++dim;
571 }
572 PetscCall(PetscHDF5IntCast(xin->map->n / bs, count + dim));
573 ++dim;
574 if (bs > 1 || dim2) {
575 count[dim] = bs;
576 ++dim;
577 }
578 #if defined(PETSC_USE_COMPLEX)
579 count[dim] = 2;
580 ++dim;
581 #endif
582 if (xin->map->n > 0 || H5_VERSION_GE(1, 10, 0)) {
583 PetscCallHDF5Return(memspace, H5Screate_simple, ((int)dim, count, NULL));
584 } else {
585 /* Can't create dataspace with zero for any dimension, so create null dataspace. */
586 PetscCallHDF5Return(memspace, H5Screate, (H5S_NULL));
587 }
588
589 /* Select hyperslab in the file */
590 PetscCall(VecGetOwnershipRange(xin, &low, NULL));
591 dim = 0;
592 if (timestep >= 0) {
593 offset[dim] = timestep;
594 ++dim;
595 }
596 PetscCall(PetscHDF5IntCast(low / bs, offset + dim));
597 ++dim;
598 if (bs > 1 || dim2) {
599 offset[dim] = 0;
600 ++dim;
601 }
602 #if defined(PETSC_USE_COMPLEX)
603 offset[dim] = 0;
604 ++dim;
605 #endif
606 if (xin->map->n > 0 || H5_VERSION_GE(1, 10, 0)) {
607 PetscCallHDF5Return(filespace, H5Dget_space, (dset_id));
608 PetscCallHDF5(H5Sselect_hyperslab, (filespace, H5S_SELECT_SET, offset, NULL, count, NULL));
609 } else {
610 /* Create null filespace to match null memspace. */
611 PetscCallHDF5Return(filespace, H5Screate, (H5S_NULL));
612 }
613
614 PetscCall(VecGetArrayRead(xin, &x));
615 PetscCallHDF5(H5Dwrite, (dset_id, memscalartype, memspace, filespace, hdf5->dxpl_id, x));
616 PetscCallHDF5(H5Fflush, (file_id, H5F_SCOPE_GLOBAL));
617 PetscCall(VecRestoreArrayRead(xin, &x));
618
619 /* Close/release resources */
620 PetscCallHDF5(H5Gclose, (group));
621 PetscCallHDF5(H5Sclose, (filespace));
622 PetscCallHDF5(H5Sclose, (memspace));
623 PetscCallHDF5(H5Dclose, (dset_id));
624
625 #if defined(PETSC_USE_COMPLEX)
626 {
627 PetscBool tru = PETSC_TRUE;
628 PetscCall(PetscViewerHDF5WriteObjectAttribute(viewer, (PetscObject)xin, "complex", PETSC_BOOL, &tru));
629 }
630 #endif
631 if (timestepping) PetscCall(PetscViewerHDF5WriteObjectAttribute(viewer, (PetscObject)xin, "timestepping", PETSC_BOOL, ×tepping));
632 PetscCall(PetscInfo(xin, "Wrote Vec object with name %s\n", vecname));
633 PetscFunctionReturn(PETSC_SUCCESS);
634 }
635 #endif
636
VecView_MPI(Vec xin,PetscViewer viewer)637 PetscErrorCode VecView_MPI(Vec xin, PetscViewer viewer)
638 {
639 PetscBool isascii, isbinary, isdraw;
640 #if defined(PETSC_HAVE_MATHEMATICA)
641 PetscBool ismathematica;
642 #endif
643 #if defined(PETSC_HAVE_HDF5)
644 PetscBool ishdf5;
645 #endif
646 #if defined(PETSC_HAVE_MATLAB)
647 PetscBool ismatlab;
648 #endif
649 #if defined(PETSC_HAVE_ADIOS)
650 PetscBool isadios;
651 #endif
652 PetscBool isglvis;
653
654 PetscFunctionBegin;
655 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
656 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
657 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
658 #if defined(PETSC_HAVE_MATHEMATICA)
659 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERMATHEMATICA, &ismathematica));
660 #endif
661 #if defined(PETSC_HAVE_HDF5)
662 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
663 #endif
664 #if defined(PETSC_HAVE_MATLAB)
665 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERMATLAB, &ismatlab));
666 #endif
667 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERGLVIS, &isglvis));
668 #if defined(PETSC_HAVE_ADIOS)
669 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERADIOS, &isadios));
670 #endif
671 if (isascii) {
672 PetscCall(VecView_MPI_ASCII(xin, viewer));
673 } else if (isbinary) {
674 PetscCall(VecView_MPI_Binary(xin, viewer));
675 } else if (isdraw) {
676 PetscViewerFormat format;
677 PetscCall(PetscViewerGetFormat(viewer, &format));
678 if (format == PETSC_VIEWER_DRAW_LG) {
679 PetscCall(VecView_MPI_Draw_LG(xin, viewer));
680 } else {
681 PetscCall(VecView_MPI_Draw(xin, viewer));
682 }
683 #if defined(PETSC_HAVE_MATHEMATICA)
684 } else if (ismathematica) {
685 PetscCall(PetscViewerMathematicaPutVector(viewer, xin));
686 #endif
687 #if defined(PETSC_HAVE_HDF5)
688 } else if (ishdf5) {
689 PetscCall(VecView_MPI_HDF5(xin, viewer));
690 #endif
691 #if defined(PETSC_HAVE_ADIOS)
692 } else if (isadios) {
693 PetscCall(VecView_MPI_ADIOS(xin, viewer));
694 #endif
695 #if defined(PETSC_HAVE_MATLAB)
696 } else if (ismatlab) {
697 PetscCall(VecView_MPI_Matlab(xin, viewer));
698 #endif
699 } else if (isglvis) PetscCall(VecView_GLVis(xin, viewer));
700 PetscFunctionReturn(PETSC_SUCCESS);
701 }
702
VecGetSize_MPI(Vec xin,PetscInt * N)703 PetscErrorCode VecGetSize_MPI(Vec xin, PetscInt *N)
704 {
705 PetscFunctionBegin;
706 *N = xin->map->N;
707 PetscFunctionReturn(PETSC_SUCCESS);
708 }
709
VecGetValues_MPI(Vec xin,PetscInt ni,const PetscInt ix[],PetscScalar y[])710 PetscErrorCode VecGetValues_MPI(Vec xin, PetscInt ni, const PetscInt ix[], PetscScalar y[])
711 {
712 const PetscScalar *xx;
713 const PetscInt start = xin->map->range[xin->stash.rank];
714
715 PetscFunctionBegin;
716 PetscCall(VecGetArrayRead(xin, &xx));
717 for (PetscInt i = 0; i < ni; i++) {
718 if (xin->stash.ignorenegidx && ix[i] < 0) continue;
719 const PetscInt tmp = ix[i] - start;
720
721 PetscCheck(tmp >= 0 && tmp < xin->map->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Can only get local values, trying %" PetscInt_FMT, ix[i]);
722 y[i] = xx[tmp];
723 }
724 PetscCall(VecRestoreArrayRead(xin, &xx));
725 PetscFunctionReturn(PETSC_SUCCESS);
726 }
727
VecSetValues_MPI(Vec xin,PetscInt ni,const PetscInt ix[],const PetscScalar y[],InsertMode addv)728 PetscErrorCode VecSetValues_MPI(Vec xin, PetscInt ni, const PetscInt ix[], const PetscScalar y[], InsertMode addv)
729 {
730 const PetscBool ignorenegidx = xin->stash.ignorenegidx;
731 const PetscBool donotstash = xin->stash.donotstash;
732 const PetscMPIInt rank = xin->stash.rank;
733 const PetscInt *owners = xin->map->range;
734 const PetscInt start = owners[rank], end = owners[rank + 1];
735 PetscScalar *xx;
736
737 PetscFunctionBegin;
738 if (PetscDefined(USE_DEBUG)) {
739 PetscCheck(xin->stash.insertmode != INSERT_VALUES || addv != ADD_VALUES, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "You have already inserted values; you cannot now add");
740 PetscCheck(xin->stash.insertmode != ADD_VALUES || addv != INSERT_VALUES, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "You have already added values; you cannot now insert");
741 }
742 PetscCall(VecGetArray(xin, &xx));
743 xin->stash.insertmode = addv;
744 for (PetscInt i = 0; i < ni; ++i) {
745 PetscInt row;
746 PetscScalar yv = y ? y[i] : 0;
747
748 if (ignorenegidx && ix[i] < 0) continue;
749 PetscCheck(ix[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Out of range index value %" PetscInt_FMT " cannot be negative", ix[i]);
750 if ((row = ix[i]) >= start && row < end) {
751 if (addv == INSERT_VALUES) {
752 xx[row - start] = yv;
753 } else {
754 xx[row - start] += yv;
755 }
756 } else if (!donotstash) {
757 PetscCheck(ix[i] < xin->map->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Out of range index value %" PetscInt_FMT ", should be less than %" PetscInt_FMT, ix[i], xin->map->N);
758 PetscCall(VecStashValue_Private(&xin->stash, row, yv));
759 }
760 }
761 PetscCall(VecRestoreArray(xin, &xx));
762 PetscFunctionReturn(PETSC_SUCCESS);
763 }
764
VecSetValuesBlocked_MPI(Vec xin,PetscInt ni,const PetscInt ix[],const PetscScalar yin[],InsertMode addv)765 PetscErrorCode VecSetValuesBlocked_MPI(Vec xin, PetscInt ni, const PetscInt ix[], const PetscScalar yin[], InsertMode addv)
766 {
767 PetscMPIInt rank = xin->stash.rank;
768 PetscInt *owners = xin->map->range, start = owners[rank];
769 PetscInt end = owners[rank + 1], i, row, bs = xin->map->bs, j;
770 PetscScalar *xx, *y = (PetscScalar *)yin;
771
772 PetscFunctionBegin;
773 PetscCall(VecGetArray(xin, &xx));
774 if (PetscDefined(USE_DEBUG)) {
775 PetscCheck(xin->stash.insertmode != INSERT_VALUES || addv != ADD_VALUES, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "You have already inserted values; you cannot now add");
776 PetscCheck(xin->stash.insertmode != ADD_VALUES || addv != INSERT_VALUES, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "You have already added values; you cannot now insert");
777 }
778 xin->stash.insertmode = addv;
779
780 if (addv == INSERT_VALUES) {
781 for (i = 0; i < ni; i++) {
782 if ((row = bs * ix[i]) >= start && row < end) {
783 for (j = 0; j < bs; j++) xx[row - start + j] = y ? y[j] : 0.0;
784 } else if (!xin->stash.donotstash) {
785 if (ix[i] < 0) {
786 if (y) y += bs;
787 continue;
788 }
789 PetscCheck(ix[i] < xin->map->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Out of range index value %" PetscInt_FMT ", should be less than %" PetscInt_FMT, ix[i], xin->map->N);
790 PetscCall(VecStashValuesBlocked_Private(&xin->bstash, ix[i], y));
791 }
792 if (y) y += bs;
793 }
794 } else {
795 for (i = 0; i < ni; i++) {
796 if ((row = bs * ix[i]) >= start && row < end) {
797 for (j = 0; j < bs; j++) xx[row - start + j] += y ? y[j] : 0.0;
798 } else if (!xin->stash.donotstash) {
799 if (ix[i] < 0) {
800 if (y) y += bs;
801 continue;
802 }
803 PetscCheck(ix[i] <= xin->map->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Out of range index value %" PetscInt_FMT ", should be less than %" PetscInt_FMT, ix[i], xin->map->N);
804 PetscCall(VecStashValuesBlocked_Private(&xin->bstash, ix[i], y));
805 }
806 if (y) y += bs;
807 }
808 }
809 PetscCall(VecRestoreArray(xin, &xx));
810 PetscFunctionReturn(PETSC_SUCCESS);
811 }
812
813 /*
814 Since nsends or nreceives may be zero we add 1 in certain mallocs
815 to make sure we never malloc an empty one.
816 */
VecAssemblyBegin_MPI(Vec xin)817 PetscErrorCode VecAssemblyBegin_MPI(Vec xin)
818 {
819 PetscInt *owners = xin->map->range, *bowners, i, bs, nstash, reallocs;
820 PetscMPIInt size;
821 InsertMode addv;
822 MPI_Comm comm;
823
824 PetscFunctionBegin;
825 PetscCall(PetscObjectGetComm((PetscObject)xin, &comm));
826 if (xin->stash.donotstash) PetscFunctionReturn(PETSC_SUCCESS);
827
828 PetscCallMPI(MPIU_Allreduce((PetscEnum *)&xin->stash.insertmode, (PetscEnum *)&addv, 1, MPIU_ENUM, MPI_BOR, comm));
829 PetscCheck(addv != (ADD_VALUES | INSERT_VALUES), comm, PETSC_ERR_ARG_NOTSAMETYPE, "Some processors inserted values while others added");
830 xin->stash.insertmode = addv; /* in case this processor had no cache */
831 xin->bstash.insertmode = addv; /* Block stash implicitly tracks InsertMode of scalar stash */
832
833 PetscCall(VecGetBlockSize(xin, &bs));
834 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)xin), &size));
835 if (!xin->bstash.bowners && xin->map->bs != -1) {
836 PetscCall(PetscMalloc1(size + 1, &bowners));
837 for (i = 0; i < size + 1; i++) bowners[i] = owners[i] / bs;
838 xin->bstash.bowners = bowners;
839 } else bowners = xin->bstash.bowners;
840
841 PetscCall(VecStashScatterBegin_Private(&xin->stash, owners));
842 PetscCall(VecStashScatterBegin_Private(&xin->bstash, bowners));
843 PetscCall(VecStashGetInfo_Private(&xin->stash, &nstash, &reallocs));
844 PetscCall(PetscInfo(xin, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
845 PetscCall(VecStashGetInfo_Private(&xin->bstash, &nstash, &reallocs));
846 PetscCall(PetscInfo(xin, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
847 PetscFunctionReturn(PETSC_SUCCESS);
848 }
849
VecAssemblyEnd_MPI(Vec vec)850 PetscErrorCode VecAssemblyEnd_MPI(Vec vec)
851 {
852 PetscInt base, i, j, *row, flg, bs;
853 PetscMPIInt n;
854 PetscScalar *val, *vv, *array, *xarray;
855
856 PetscFunctionBegin;
857 if (!vec->stash.donotstash) {
858 PetscCall(VecGetArray(vec, &xarray));
859 PetscCall(VecGetBlockSize(vec, &bs));
860 base = vec->map->range[vec->stash.rank];
861
862 /* Process the stash */
863 while (1) {
864 PetscCall(VecStashScatterGetMesg_Private(&vec->stash, &n, &row, &val, &flg));
865 if (!flg) break;
866 if (vec->stash.insertmode == ADD_VALUES) {
867 for (i = 0; i < n; i++) xarray[row[i] - base] += val[i];
868 } else if (vec->stash.insertmode == INSERT_VALUES) {
869 for (i = 0; i < n; i++) xarray[row[i] - base] = val[i];
870 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_CORRUPT, "Insert mode is not set correctly; corrupted vector");
871 }
872 PetscCall(VecStashScatterEnd_Private(&vec->stash));
873
874 /* now process the block-stash */
875 while (1) {
876 PetscCall(VecStashScatterGetMesg_Private(&vec->bstash, &n, &row, &val, &flg));
877 if (!flg) break;
878 for (i = 0; i < n; i++) {
879 array = xarray + row[i] * bs - base;
880 vv = val + i * bs;
881 if (vec->stash.insertmode == ADD_VALUES) {
882 for (j = 0; j < bs; j++) array[j] += vv[j];
883 } else if (vec->stash.insertmode == INSERT_VALUES) {
884 for (j = 0; j < bs; j++) array[j] = vv[j];
885 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_CORRUPT, "Insert mode is not set correctly; corrupted vector");
886 }
887 }
888 PetscCall(VecStashScatterEnd_Private(&vec->bstash));
889 PetscCall(VecRestoreArray(vec, &xarray));
890 }
891 vec->stash.insertmode = NOT_SET_VALUES;
892 PetscFunctionReturn(PETSC_SUCCESS);
893 }
894
VecSetPreallocationCOO_MPI(Vec x,PetscCount coo_n,const PetscInt coo_i[])895 PetscErrorCode VecSetPreallocationCOO_MPI(Vec x, PetscCount coo_n, const PetscInt coo_i[])
896 {
897 PetscInt m, M, rstart, rend;
898 Vec_MPI *vmpi = (Vec_MPI *)x->data;
899 PetscCount k, p, q, rem; /* Loop variables over coo arrays */
900 PetscMPIInt size;
901 MPI_Comm comm;
902
903 PetscFunctionBegin;
904 PetscCall(PetscObjectGetComm((PetscObject)x, &comm));
905 PetscCallMPI(MPI_Comm_size(comm, &size));
906 PetscCall(VecResetPreallocationCOO_MPI(x));
907
908 PetscCall(PetscLayoutSetUp(x->map));
909 PetscCall(VecGetOwnershipRange(x, &rstart, &rend));
910 PetscCall(VecGetLocalSize(x, &m));
911 PetscCall(VecGetSize(x, &M));
912
913 /* Sort COOs along with a permutation array, so that negative indices come */
914 /* first, then local ones, then remote ones. */
915 PetscCount n1 = coo_n, nneg, *perm;
916 PetscInt *i1; /* Copy of input COOs along with a permutation array */
917 PetscCall(PetscMalloc1(n1, &i1));
918 PetscCall(PetscMalloc1(n1, &perm));
919 PetscCall(PetscArraycpy(i1, coo_i, n1)); /* Make a copy since we'll modify it */
920 for (k = 0; k < n1; k++) perm[k] = k;
921
922 /* Manipulate i1[] so that entries with negative indices will have the smallest
923 index, local entries will have greater but negative indices, and remote entries
924 will have positive indices.
925 */
926 for (k = 0; k < n1; k++) {
927 if (i1[k] < 0) {
928 if (x->stash.ignorenegidx) i1[k] = PETSC_INT_MIN; /* e.g., -2^31, minimal to move them ahead */
929 else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Found a negative index in VecSetPreallocateCOO() but VEC_IGNORE_NEGATIVE_INDICES was not set");
930 } else if (i1[k] >= rstart && i1[k] < rend) {
931 i1[k] -= PETSC_INT_MAX; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_INT_MAX, -1] */
932 } else {
933 PetscCheck(i1[k] < M, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Found index %" PetscInt_FMT " in VecSetPreallocateCOO() larger than the global size %" PetscInt_FMT, i1[k], M);
934 if (x->stash.donotstash) i1[k] = PETSC_INT_MIN; /* Ignore off-proc indices as if they were negative */
935 }
936 }
937
938 /* Sort the indices, after that, [0,nneg) have ignored entries, [nneg,rem) have local entries and [rem,n1) have remote entries */
939 PetscCall(PetscSortIntWithCountArray(n1, i1, perm));
940 for (k = 0; k < n1; k++) {
941 if (i1[k] > PETSC_INT_MIN) break;
942 } /* Advance k to the first entry we need to take care of */
943 nneg = k;
944 PetscCall(PetscSortedIntUpperBound(i1, nneg, n1, rend - 1 - PETSC_INT_MAX, &rem)); /* rem is upper bound of the last local row */
945 for (k = nneg; k < rem; k++) i1[k] += PETSC_INT_MAX; /* Revert indices of local entries */
946
947 /* Build stuff for local entries */
948 PetscCount tot1, *jmap1, *perm1;
949 PetscCall(PetscCalloc1(m + 1, &jmap1));
950 for (k = nneg; k < rem; k++) jmap1[i1[k] - rstart + 1]++; /* Count repeats of each local entry */
951 for (k = 0; k < m; k++) jmap1[k + 1] += jmap1[k]; /* Transform jmap1[] to CSR-like data structure */
952 tot1 = jmap1[m];
953 PetscAssert(tot1 == rem - nneg, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected errors in VecSetPreallocationCOO_MPI");
954 PetscCall(PetscMalloc1(tot1, &perm1));
955 PetscCall(PetscArraycpy(perm1, perm + nneg, tot1));
956
957 /* Record the permutation array for filling the send buffer */
958 PetscCount *Cperm;
959 PetscCall(PetscMalloc1(n1 - rem, &Cperm));
960 PetscCall(PetscArraycpy(Cperm, perm + rem, n1 - rem));
961 PetscCall(PetscFree(perm));
962
963 /* Send remote entries to their owner */
964 /* Find which entries should be sent to which remote ranks*/
965 PetscInt nsend = 0; /* Number of MPI ranks to send data to */
966 PetscMPIInt *sendto; /* [nsend], storing remote ranks */
967 PetscInt *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */
968 const PetscInt *ranges;
969 PetscInt maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */
970
971 PetscCall(PetscLayoutGetRanges(x->map, &ranges));
972 PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries));
973 for (k = rem; k < n1;) {
974 PetscMPIInt owner;
975 PetscInt firstRow, lastRow;
976
977 /* Locate a row range */
978 firstRow = i1[k]; /* first row of this owner */
979 PetscCall(PetscLayoutFindOwner(x->map, firstRow, &owner));
980 lastRow = ranges[owner + 1] - 1; /* last row of this owner */
981
982 /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */
983 PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p));
984
985 /* All entries in [k,p) belong to this remote owner */
986 if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */
987 PetscMPIInt *sendto2;
988 PetscInt *nentries2;
989 PetscInt maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size;
990
991 PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2));
992 PetscCall(PetscArraycpy(sendto2, sendto, maxNsend));
993 PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1));
994 PetscCall(PetscFree2(sendto, nentries2));
995 sendto = sendto2;
996 nentries = nentries2;
997 maxNsend = maxNsend2;
998 }
999 sendto[nsend] = owner;
1000 PetscCall(PetscIntCast(p - k, &nentries[nsend]));
1001 nsend++;
1002 k = p;
1003 }
1004
1005 /* Build 1st SF to know offsets on remote to send data */
1006 PetscSF sf1;
1007 PetscInt nroots = 1, nroots2 = 0;
1008 PetscInt nleaves = nsend, nleaves2 = 0;
1009 PetscInt *offsets;
1010 PetscSFNode *iremote;
1011
1012 PetscCall(PetscSFCreate(comm, &sf1));
1013 PetscCall(PetscMalloc1(nsend, &iremote));
1014 PetscCall(PetscMalloc1(nsend, &offsets));
1015 for (k = 0; k < nsend; k++) {
1016 iremote[k].rank = sendto[k];
1017 iremote[k].index = 0;
1018 nleaves2 += nentries[k];
1019 PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt");
1020 }
1021 PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
1022 PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM));
1023 PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */
1024 PetscCall(PetscSFDestroy(&sf1));
1025 PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT, nleaves2, n1 - rem);
1026
1027 /* Build 2nd SF to send remote COOs to their owner */
1028 PetscSF sf2;
1029 nroots = nroots2;
1030 nleaves = nleaves2;
1031 PetscCall(PetscSFCreate(comm, &sf2));
1032 PetscCall(PetscSFSetFromOptions(sf2));
1033 PetscCall(PetscMalloc1(nleaves, &iremote));
1034 p = 0;
1035 for (k = 0; k < nsend; k++) {
1036 PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt");
1037 for (q = 0; q < nentries[k]; q++, p++) {
1038 iremote[p].rank = sendto[k];
1039 PetscCall(PetscIntCast(offsets[k] + q, &iremote[p].index));
1040 }
1041 }
1042 PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
1043
1044 /* Send the remote COOs to their owner */
1045 PetscInt n2 = nroots, *i2; /* Buffers for received COOs from other ranks, along with a permutation array */
1046 PetscCount *perm2;
1047 PetscCall(PetscMalloc1(n2, &i2));
1048 PetscCall(PetscMalloc1(n2, &perm2));
1049 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1 + rem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE));
1050 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1 + rem, i2, MPI_REPLACE));
1051
1052 PetscCall(PetscFree(i1));
1053 PetscCall(PetscFree(offsets));
1054 PetscCall(PetscFree2(sendto, nentries));
1055
1056 /* Sort received COOs along with a permutation array */
1057 PetscCount *imap2;
1058 PetscCount *jmap2, nnz2;
1059 PetscScalar *sendbuf, *recvbuf;
1060 PetscInt old;
1061 PetscCount sendlen = n1 - rem, recvlen = n2;
1062
1063 for (k = 0; k < n2; k++) perm2[k] = k;
1064 PetscCall(PetscSortIntWithCountArray(n2, i2, perm2));
1065
1066 /* nnz2 will be # of unique entries in the recvbuf */
1067 nnz2 = n2;
1068 for (k = 1; k < n2; k++) {
1069 if (i2[k] == i2[k - 1]) nnz2--;
1070 }
1071
1072 /* Build imap2[] and jmap2[] for each unique entry */
1073 PetscCall(PetscMalloc4(nnz2, &imap2, nnz2 + 1, &jmap2, sendlen, &sendbuf, recvlen, &recvbuf));
1074 p = -1;
1075 old = -1;
1076 jmap2[0] = 0;
1077 jmap2++;
1078 for (k = 0; k < n2; k++) {
1079 if (i2[k] != old) { /* Meet a new entry */
1080 p++;
1081 imap2[p] = i2[k] - rstart;
1082 jmap2[p] = 1;
1083 old = i2[k];
1084 } else {
1085 jmap2[p]++;
1086 }
1087 }
1088 jmap2--;
1089 for (k = 0; k < nnz2; k++) jmap2[k + 1] += jmap2[k];
1090
1091 PetscCall(PetscFree(i2));
1092
1093 vmpi->coo_n = coo_n;
1094 vmpi->tot1 = tot1;
1095 vmpi->jmap1 = jmap1;
1096 vmpi->perm1 = perm1;
1097 vmpi->nnz2 = nnz2;
1098 vmpi->imap2 = imap2;
1099 vmpi->jmap2 = jmap2;
1100 vmpi->perm2 = perm2;
1101
1102 vmpi->Cperm = Cperm;
1103 vmpi->sendbuf = sendbuf;
1104 vmpi->recvbuf = recvbuf;
1105 vmpi->sendlen = sendlen;
1106 vmpi->recvlen = recvlen;
1107 vmpi->coo_sf = sf2;
1108 PetscFunctionReturn(PETSC_SUCCESS);
1109 }
1110
VecSetValuesCOO_MPI(Vec x,const PetscScalar v[],InsertMode imode)1111 PetscErrorCode VecSetValuesCOO_MPI(Vec x, const PetscScalar v[], InsertMode imode)
1112 {
1113 Vec_MPI *vmpi = (Vec_MPI *)x->data;
1114 PetscInt m;
1115 PetscScalar *a, *sendbuf = vmpi->sendbuf, *recvbuf = vmpi->recvbuf;
1116 const PetscCount *jmap1 = vmpi->jmap1;
1117 const PetscCount *perm1 = vmpi->perm1;
1118 const PetscCount *imap2 = vmpi->imap2;
1119 const PetscCount *jmap2 = vmpi->jmap2;
1120 const PetscCount *perm2 = vmpi->perm2;
1121 const PetscCount *Cperm = vmpi->Cperm;
1122 const PetscCount nnz2 = vmpi->nnz2;
1123
1124 PetscFunctionBegin;
1125 PetscCall(VecGetLocalSize(x, &m));
1126 PetscCall(VecGetArray(x, &a));
1127
1128 /* Pack entries to be sent to remote */
1129 for (PetscInt i = 0; i < vmpi->sendlen; i++) sendbuf[i] = v[Cperm[i]];
1130
1131 /* Send remote entries to their owner and overlap the communication with local computation */
1132 PetscCall(PetscSFReduceWithMemTypeBegin(vmpi->coo_sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE));
1133 /* Add local entries to A and B */
1134 for (PetscInt i = 0; i < m; i++) { /* All entries in a[] are either zero'ed or added with a value (i.e., initialized) */
1135 PetscScalar sum = 0.0; /* Do partial summation first to improve numerical stability */
1136 for (PetscCount k = jmap1[i]; k < jmap1[i + 1]; k++) sum += v[perm1[k]];
1137 a[i] = (imode == INSERT_VALUES ? 0.0 : a[i]) + sum;
1138 }
1139 PetscCall(PetscSFReduceEnd(vmpi->coo_sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE));
1140
1141 /* Add received remote entries to A and B */
1142 for (PetscInt i = 0; i < nnz2; i++) {
1143 for (PetscCount k = jmap2[i]; k < jmap2[i + 1]; k++) a[imap2[i]] += recvbuf[perm2[k]];
1144 }
1145
1146 PetscCall(VecRestoreArray(x, &a));
1147 PetscFunctionReturn(PETSC_SUCCESS);
1148 }
1149