xref: /petsc/src/vec/vec/impls/mpi/pdvec.c (revision 03047865b8d8757cf1cf9cda45785c1537b01dc1)
1 /*
2      Code for some of the parallel vector primitives.
3 */
4 #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I  "petscvec.h"   I*/
5 #include <petsc/private/viewerhdf5impl.h>
6 #include <petsc/private/glvisviewerimpl.h>
7 #include <petsc/private/glvisvecimpl.h>
8 #include <petscsf.h>
9 
VecResetPreallocationCOO_MPI(Vec v)10 static PetscErrorCode VecResetPreallocationCOO_MPI(Vec v)
11 {
12   Vec_MPI *vmpi = (Vec_MPI *)v->data;
13 
14   PetscFunctionBegin;
15   if (vmpi) {
16     PetscCall(PetscFree(vmpi->jmap1));
17     PetscCall(PetscFree(vmpi->perm1));
18     PetscCall(PetscFree(vmpi->Cperm));
19     PetscCall(PetscFree4(vmpi->imap2, vmpi->jmap2, vmpi->sendbuf, vmpi->recvbuf));
20     PetscCall(PetscFree(vmpi->perm2));
21     PetscCall(PetscSFDestroy(&vmpi->coo_sf));
22   }
23   PetscFunctionReturn(PETSC_SUCCESS);
24 }
25 
VecDestroy_MPI(Vec v)26 PetscErrorCode VecDestroy_MPI(Vec v)
27 {
28   Vec_MPI *x = (Vec_MPI *)v->data;
29 
30   PetscFunctionBegin;
31   PetscCall(PetscLogObjectState((PetscObject)v, "Length=%" PetscInt_FMT, v->map->N));
32   if (!x) PetscFunctionReturn(PETSC_SUCCESS);
33   PetscCall(PetscFree(x->array_allocated));
34 
35   /* Destroy local representation of vector if it exists */
36   if (x->localrep) {
37     PetscCall(VecDestroy(&x->localrep));
38     PetscCall(VecScatterDestroy(&x->localupdate));
39     PetscCall(ISDestroy(&x->ghost));
40   }
41   PetscCall(VecAssemblyReset_MPI(v));
42 
43   /* Destroy the stashes: note the order - so that the tags are freed properly */
44   PetscCall(VecStashDestroy_Private(&v->bstash));
45   PetscCall(VecStashDestroy_Private(&v->stash));
46 
47   PetscCall(VecResetPreallocationCOO_MPI(v));
48   PetscCall(PetscObjectComposeFunction((PetscObject)v, "PetscMatlabEnginePut_C", NULL));
49   PetscCall(PetscObjectComposeFunction((PetscObject)v, "PetscMatlabEngineGet_C", NULL));
50   PetscCall(PetscFree(v->data));
51   PetscFunctionReturn(PETSC_SUCCESS);
52 }
53 
VecView_MPI_ASCII(Vec xin,PetscViewer viewer)54 static PetscErrorCode VecView_MPI_ASCII(Vec xin, PetscViewer viewer)
55 {
56   PetscInt           i, work = xin->map->n, cnt, nLen;
57   PetscMPIInt        j, n = 0, size, rank, tag = ((PetscObject)viewer)->tag, len;
58   MPI_Status         status;
59   PetscScalar       *values;
60   const PetscScalar *xarray;
61   const char        *name;
62   PetscViewerFormat  format;
63 
64   PetscFunctionBegin;
65   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)xin), &size));
66   PetscCall(PetscViewerGetFormat(viewer, &format));
67   if (format == PETSC_VIEWER_LOAD_BALANCE) {
68     PetscInt nmax = 0, nmin = xin->map->n, navg;
69     for (PetscMPIInt i = 0; i < size; i++) {
70       nmax = PetscMax(nmax, xin->map->range[i + 1] - xin->map->range[i]);
71       nmin = PetscMin(nmin, xin->map->range[i + 1] - xin->map->range[i]);
72     }
73     navg = xin->map->N / size;
74     PetscCall(PetscViewerASCIIPrintf(viewer, "  Load Balance - Local vector size Min %" PetscInt_FMT "  avg %" PetscInt_FMT "  max %" PetscInt_FMT "\n", nmin, navg, nmax));
75     PetscFunctionReturn(PETSC_SUCCESS);
76   }
77 
78   PetscCall(VecGetArrayRead(xin, &xarray));
79   /* determine maximum message to arrive */
80   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)xin), &rank));
81   PetscCallMPI(MPI_Reduce(rank ? &work : MPI_IN_PLACE, &work, 1, MPIU_INT, MPI_MAX, 0, PetscObjectComm((PetscObject)xin)));
82   PetscCall(PetscMPIIntCast(work, &len));
83   if (format == PETSC_VIEWER_ASCII_GLVIS) rank = 0, len = 0; /* no parallel distributed write support from GLVis */
84   if (rank == 0) {
85     PetscCall(PetscMalloc1(len, &values));
86     /*
87         MATLAB format and ASCII format are very similar except
88         MATLAB uses %18.16e format while ASCII uses %g
89     */
90     if (format == PETSC_VIEWER_ASCII_MATLAB) {
91       PetscCall(PetscObjectGetName((PetscObject)xin, &name));
92       PetscCall(PetscViewerASCIIPrintf(viewer, "%s = [\n", name));
93       for (i = 0; i < xin->map->n; i++) {
94 #if defined(PETSC_USE_COMPLEX)
95         if (PetscImaginaryPart(xarray[i]) > 0.0) {
96           PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e + %18.16ei\n", (double)PetscRealPart(xarray[i]), (double)PetscImaginaryPart(xarray[i])));
97         } else if (PetscImaginaryPart(xarray[i]) < 0.0) {
98           PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e - %18.16ei\n", (double)PetscRealPart(xarray[i]), -(double)PetscImaginaryPart(xarray[i])));
99         } else {
100           PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e\n", (double)PetscRealPart(xarray[i])));
101         }
102 #else
103         PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e\n", (double)xarray[i]));
104 #endif
105       }
106       /* receive and print messages */
107       for (j = 1; j < size; j++) {
108         PetscCallMPI(MPI_Recv(values, len, MPIU_SCALAR, j, tag, PetscObjectComm((PetscObject)xin), &status));
109         PetscCallMPI(MPI_Get_count(&status, MPIU_SCALAR, &n));
110         for (i = 0; i < n; i++) {
111 #if defined(PETSC_USE_COMPLEX)
112           if (PetscImaginaryPart(values[i]) > 0.0) {
113             PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e + %18.16ei\n", (double)PetscRealPart(values[i]), (double)PetscImaginaryPart(values[i])));
114           } else if (PetscImaginaryPart(values[i]) < 0.0) {
115             PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e - %18.16ei\n", (double)PetscRealPart(values[i]), -(double)PetscImaginaryPart(values[i])));
116           } else {
117             PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e\n", (double)PetscRealPart(values[i])));
118           }
119 #else
120           PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e\n", (double)values[i]));
121 #endif
122         }
123       }
124       PetscCall(PetscViewerASCIIPrintf(viewer, "];\n"));
125 
126     } else if (format == PETSC_VIEWER_ASCII_SYMMODU) {
127       for (i = 0; i < xin->map->n; i++) {
128 #if defined(PETSC_USE_COMPLEX)
129         PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e %18.16e\n", (double)PetscRealPart(xarray[i]), (double)PetscImaginaryPart(xarray[i])));
130 #else
131         PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e\n", (double)xarray[i]));
132 #endif
133       }
134       /* receive and print messages */
135       for (j = 1; j < size; j++) {
136         PetscCallMPI(MPI_Recv(values, len, MPIU_SCALAR, j, tag, PetscObjectComm((PetscObject)xin), &status));
137         PetscCallMPI(MPI_Get_count(&status, MPIU_SCALAR, &n));
138         for (i = 0; i < n; i++) {
139 #if defined(PETSC_USE_COMPLEX)
140           PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e %18.16e\n", (double)PetscRealPart(values[i]), (double)PetscImaginaryPart(values[i])));
141 #else
142           PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e\n", (double)values[i]));
143 #endif
144         }
145       }
146     } else if (format == PETSC_VIEWER_ASCII_PCICE) {
147       PetscInt bs, b, vertexCount = 1;
148 
149       PetscCall(VecGetLocalSize(xin, &nLen));
150       PetscCall(PetscMPIIntCast(nLen, &n));
151       PetscCall(VecGetBlockSize(xin, &bs));
152       PetscCheck(bs >= 1 && bs <= 3, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "PCICE can only handle up to 3D objects, but vector dimension is %" PetscInt_FMT, bs);
153 
154       PetscCall(PetscViewerASCIIPrintf(viewer, "%" PetscInt_FMT "\n", xin->map->N / bs));
155       for (i = 0; i < n / bs; i++) {
156         PetscCall(PetscViewerASCIIPrintf(viewer, "%7" PetscInt_FMT "   ", vertexCount++));
157         for (b = 0; b < bs; b++) {
158           if (b > 0) PetscCall(PetscViewerASCIIPrintf(viewer, " "));
159 #if !defined(PETSC_USE_COMPLEX)
160           PetscCall(PetscViewerASCIIPrintf(viewer, "% 12.5E", (double)xarray[i * bs + b]));
161 #endif
162         }
163         PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
164       }
165       for (j = 1; j < size; j++) {
166         PetscCallMPI(MPI_Recv(values, len, MPIU_SCALAR, j, tag, PetscObjectComm((PetscObject)xin), &status));
167         PetscCallMPI(MPI_Get_count(&status, MPIU_SCALAR, &n));
168         for (i = 0; i < n / bs; i++) {
169           PetscCall(PetscViewerASCIIPrintf(viewer, "%7" PetscInt_FMT "   ", vertexCount++));
170           for (b = 0; b < bs; b++) {
171             if (b > 0) PetscCall(PetscViewerASCIIPrintf(viewer, " "));
172 #if !defined(PETSC_USE_COMPLEX)
173             PetscCall(PetscViewerASCIIPrintf(viewer, "% 12.5E", (double)values[i * bs + b]));
174 #endif
175           }
176           PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
177         }
178       }
179     } else if (format == PETSC_VIEWER_ASCII_GLVIS) {
180       /* GLVis ASCII visualization/dump: this function mimics mfem::GridFunction::Save() */
181       const PetscScalar      *array;
182       PetscInt                i, n, vdim, ordering = 1; /* mfem::FiniteElementSpace::Ordering::byVDIM */
183       PetscContainer          glvis_container;
184       PetscViewerGLVisVecInfo glvis_vec_info;
185       PetscViewerGLVisInfo    glvis_info;
186 
187       /* mfem::FiniteElementSpace::Save() */
188       PetscCall(VecGetBlockSize(xin, &vdim));
189       PetscCall(PetscViewerASCIIPrintf(viewer, "FiniteElementSpace\n"));
190       PetscCall(PetscObjectQuery((PetscObject)xin, "_glvis_info_container", (PetscObject *)&glvis_container));
191       PetscCheck(glvis_container, PetscObjectComm((PetscObject)xin), PETSC_ERR_PLIB, "Missing GLVis container");
192       PetscCall(PetscContainerGetPointer(glvis_container, &glvis_vec_info));
193       PetscCall(PetscViewerASCIIPrintf(viewer, "%s\n", glvis_vec_info->fec_type));
194       PetscCall(PetscViewerASCIIPrintf(viewer, "VDim: %" PetscInt_FMT "\n", vdim));
195       PetscCall(PetscViewerASCIIPrintf(viewer, "Ordering: %" PetscInt_FMT "\n", ordering));
196       PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
197       /* mfem::Vector::Print() */
198       PetscCall(PetscObjectQuery((PetscObject)viewer, "_glvis_info_container", (PetscObject *)&glvis_container));
199       PetscCheck(glvis_container, PetscObjectComm((PetscObject)viewer), PETSC_ERR_PLIB, "Missing GLVis container");
200       PetscCall(PetscContainerGetPointer(glvis_container, &glvis_info));
201       if (glvis_info->enabled) {
202         PetscCall(VecGetLocalSize(xin, &n));
203         PetscCall(VecGetArrayRead(xin, &array));
204         for (i = 0; i < n; i++) {
205           PetscCall(PetscViewerASCIIPrintf(viewer, glvis_info->fmt, (double)PetscRealPart(array[i])));
206           PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
207         }
208         PetscCall(VecRestoreArrayRead(xin, &array));
209       }
210     } else if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
211       /* No info */
212     } else {
213       if (format != PETSC_VIEWER_ASCII_COMMON) PetscCall(PetscViewerASCIIPrintf(viewer, "Process [%d]\n", rank));
214       cnt = 0;
215       for (i = 0; i < xin->map->n; i++) {
216         if (format == PETSC_VIEWER_ASCII_INDEX) PetscCall(PetscViewerASCIIPrintf(viewer, "%" PetscInt_FMT ": ", cnt++));
217 #if defined(PETSC_USE_COMPLEX)
218         if (PetscImaginaryPart(xarray[i]) > 0.0) {
219           PetscCall(PetscViewerASCIIPrintf(viewer, "%g + %g i\n", (double)PetscRealPart(xarray[i]), (double)PetscImaginaryPart(xarray[i])));
220         } else if (PetscImaginaryPart(xarray[i]) < 0.0) {
221           PetscCall(PetscViewerASCIIPrintf(viewer, "%g - %g i\n", (double)PetscRealPart(xarray[i]), -(double)PetscImaginaryPart(xarray[i])));
222         } else {
223           PetscCall(PetscViewerASCIIPrintf(viewer, "%g\n", (double)PetscRealPart(xarray[i])));
224         }
225 #else
226         PetscCall(PetscViewerASCIIPrintf(viewer, "%g\n", (double)xarray[i]));
227 #endif
228       }
229       /* receive and print messages */
230       for (j = 1; j < size; j++) {
231         PetscCallMPI(MPI_Recv(values, len, MPIU_SCALAR, j, tag, PetscObjectComm((PetscObject)xin), &status));
232         PetscCallMPI(MPI_Get_count(&status, MPIU_SCALAR, &n));
233         if (format != PETSC_VIEWER_ASCII_COMMON) PetscCall(PetscViewerASCIIPrintf(viewer, "Process [%d]\n", j));
234         for (i = 0; i < n; i++) {
235           if (format == PETSC_VIEWER_ASCII_INDEX) PetscCall(PetscViewerASCIIPrintf(viewer, "%" PetscInt_FMT ": ", cnt++));
236 #if defined(PETSC_USE_COMPLEX)
237           if (PetscImaginaryPart(values[i]) > 0.0) {
238             PetscCall(PetscViewerASCIIPrintf(viewer, "%g + %g i\n", (double)PetscRealPart(values[i]), (double)PetscImaginaryPart(values[i])));
239           } else if (PetscImaginaryPart(values[i]) < 0.0) {
240             PetscCall(PetscViewerASCIIPrintf(viewer, "%g - %g i\n", (double)PetscRealPart(values[i]), -(double)PetscImaginaryPart(values[i])));
241           } else {
242             PetscCall(PetscViewerASCIIPrintf(viewer, "%g\n", (double)PetscRealPart(values[i])));
243           }
244 #else
245           PetscCall(PetscViewerASCIIPrintf(viewer, "%g\n", (double)values[i]));
246 #endif
247         }
248       }
249     }
250     PetscCall(PetscFree(values));
251   } else {
252     if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
253       /* Rank 0 is not trying to receive anything, so don't send anything */
254     } else {
255       if (format == PETSC_VIEWER_ASCII_MATLAB) {
256         /* this may be a collective operation so make sure everyone calls it */
257         PetscCall(PetscObjectGetName((PetscObject)xin, &name));
258       }
259       PetscCallMPI(MPIU_Send((void *)xarray, xin->map->n, MPIU_SCALAR, 0, tag, PetscObjectComm((PetscObject)xin)));
260     }
261   }
262   PetscCall(PetscViewerFlush(viewer));
263   PetscCall(VecRestoreArrayRead(xin, &xarray));
264   PetscFunctionReturn(PETSC_SUCCESS);
265 }
266 
VecView_MPI_Binary(Vec xin,PetscViewer viewer)267 PetscErrorCode VecView_MPI_Binary(Vec xin, PetscViewer viewer)
268 {
269   return VecView_Binary(xin, viewer);
270 }
271 
272 #include <petscdraw.h>
VecView_MPI_Draw_LG(Vec xin,PetscViewer viewer)273 PetscErrorCode VecView_MPI_Draw_LG(Vec xin, PetscViewer viewer)
274 {
275   PetscDraw          draw;
276   PetscBool          isnull;
277   PetscDrawLG        lg;
278   PetscMPIInt        i, size, rank, n, N, *lens = NULL, *disp = NULL;
279   PetscReal         *values, *xx = NULL, *yy = NULL;
280   const PetscScalar *xarray;
281   int                colors[] = {PETSC_DRAW_RED};
282 
283   PetscFunctionBegin;
284   PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
285   PetscCall(PetscDrawIsNull(draw, &isnull));
286   if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
287   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)xin), &rank));
288   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)xin), &size));
289   PetscCall(PetscMPIIntCast(xin->map->n, &n));
290   PetscCall(PetscMPIIntCast(xin->map->N, &N));
291 
292   PetscCall(VecGetArrayRead(xin, &xarray));
293 #if defined(PETSC_USE_COMPLEX)
294   PetscCall(PetscMalloc1(n + 1, &values));
295   for (i = 0; i < n; i++) values[i] = PetscRealPart(xarray[i]);
296 #else
297   values = (PetscReal *)xarray;
298 #endif
299   if (rank == 0) {
300     PetscCall(PetscMalloc2(N, &xx, N, &yy));
301     for (i = 0; i < N; i++) xx[i] = (PetscReal)i;
302     PetscCall(PetscMalloc2(size, &lens, size, &disp));
303     for (i = 0; i < size; i++) PetscCall(PetscMPIIntCast(xin->map->range[i + 1] - xin->map->range[i], &lens[i]));
304     for (i = 0; i < size; i++) PetscCall(PetscMPIIntCast(xin->map->range[i], &disp[i]));
305   }
306   PetscCallMPI(MPI_Gatherv(values, n, MPIU_REAL, yy, lens, disp, MPIU_REAL, 0, PetscObjectComm((PetscObject)xin)));
307   PetscCall(PetscFree2(lens, disp));
308 #if defined(PETSC_USE_COMPLEX)
309   PetscCall(PetscFree(values));
310 #endif
311   PetscCall(VecRestoreArrayRead(xin, &xarray));
312 
313   PetscCall(PetscViewerDrawGetDrawLG(viewer, 0, &lg));
314   PetscCall(PetscDrawLGReset(lg));
315   PetscCall(PetscDrawLGSetDimension(lg, 1));
316   PetscCall(PetscDrawLGSetColors(lg, colors));
317   if (rank == 0) {
318     PetscCall(PetscDrawLGAddPoints(lg, N, &xx, &yy));
319     PetscCall(PetscFree2(xx, yy));
320   }
321   PetscCall(PetscDrawLGDraw(lg));
322   PetscCall(PetscDrawLGSave(lg));
323   PetscFunctionReturn(PETSC_SUCCESS);
324 }
325 
VecView_MPI_Draw(Vec xin,PetscViewer viewer)326 PETSC_INTERN PetscErrorCode VecView_MPI_Draw(Vec xin, PetscViewer viewer)
327 {
328   PetscMPIInt        rank, size, tag = ((PetscObject)viewer)->tag;
329   PetscInt           i, start, end;
330   MPI_Status         status;
331   PetscReal          min, max, tmp = 0.0;
332   PetscDraw          draw;
333   PetscBool          isnull;
334   PetscDrawAxis      axis;
335   const PetscScalar *xarray;
336 
337   PetscFunctionBegin;
338   PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
339   PetscCall(PetscDrawIsNull(draw, &isnull));
340   if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
341   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)xin), &size));
342   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)xin), &rank));
343 
344   PetscCall(VecMin(xin, NULL, &min));
345   PetscCall(VecMax(xin, NULL, &max));
346   if (min == max) {
347     min -= 1.e-5;
348     max += 1.e-5;
349   }
350 
351   PetscCall(PetscDrawCheckResizedWindow(draw));
352   PetscCall(PetscDrawClear(draw));
353 
354   PetscCall(PetscDrawAxisCreate(draw, &axis));
355   PetscCall(PetscDrawAxisSetLimits(axis, 0.0, (PetscReal)xin->map->N, min, max));
356   PetscCall(PetscDrawAxisDraw(axis));
357   PetscCall(PetscDrawAxisDestroy(&axis));
358 
359   /* draw local part of vector */
360   PetscCall(VecGetArrayRead(xin, &xarray));
361   PetscCall(VecGetOwnershipRange(xin, &start, &end));
362   if (rank < size - 1) { /* send value to right */
363     PetscCallMPI(MPI_Send((void *)&xarray[xin->map->n - 1], 1, MPIU_REAL, rank + 1, tag, PetscObjectComm((PetscObject)xin)));
364   }
365   if (rank) { /* receive value from right */
366     PetscCallMPI(MPI_Recv(&tmp, 1, MPIU_REAL, rank - 1, tag, PetscObjectComm((PetscObject)xin), &status));
367   }
368   PetscDrawCollectiveBegin(draw);
369   if (rank) PetscCall(PetscDrawLine(draw, (PetscReal)start - 1, tmp, (PetscReal)start, PetscRealPart(xarray[0]), PETSC_DRAW_RED));
370   for (i = 1; i < xin->map->n; i++) PetscCall(PetscDrawLine(draw, (PetscReal)(i - 1 + start), PetscRealPart(xarray[i - 1]), (PetscReal)(i + start), PetscRealPart(xarray[i]), PETSC_DRAW_RED));
371   PetscDrawCollectiveEnd(draw);
372   PetscCall(VecRestoreArrayRead(xin, &xarray));
373 
374   PetscCall(PetscDrawFlush(draw));
375   PetscCall(PetscDrawPause(draw));
376   PetscCall(PetscDrawSave(draw));
377   PetscFunctionReturn(PETSC_SUCCESS);
378 }
379 
380 #if defined(PETSC_HAVE_MATLAB)
VecView_MPI_Matlab(Vec xin,PetscViewer viewer)381 PetscErrorCode VecView_MPI_Matlab(Vec xin, PetscViewer viewer)
382 {
383   PetscMPIInt        rank, size, *lens;
384   PetscInt           i, N = xin->map->N;
385   const PetscScalar *xarray;
386   PetscScalar       *xx;
387 
388   PetscFunctionBegin;
389   PetscCall(VecGetArrayRead(xin, &xarray));
390   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)xin), &rank));
391   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)xin), &size));
392   if (rank == 0) {
393     PetscCall(PetscMalloc1(N, &xx));
394     PetscCall(PetscMalloc1(size, &lens));
395     for (i = 0; i < size; i++) lens[i] = xin->map->range[i + 1] - xin->map->range[i];
396 
397     PetscCallMPI(MPI_Gatherv((void *)xarray, xin->map->n, MPIU_SCALAR, xx, lens, xin->map->range, MPIU_SCALAR, 0, PetscObjectComm((PetscObject)xin)));
398     PetscCall(PetscFree(lens));
399 
400     PetscCall(PetscObjectName((PetscObject)xin));
401     PetscCall(PetscViewerMatlabPutArray(viewer, N, 1, xx, ((PetscObject)xin)->name));
402 
403     PetscCall(PetscFree(xx));
404   } else {
405     PetscCallMPI(MPI_Gatherv((void *)xarray, xin->map->n, MPIU_SCALAR, 0, 0, 0, MPIU_SCALAR, 0, PetscObjectComm((PetscObject)xin)));
406   }
407   PetscCall(VecRestoreArrayRead(xin, &xarray));
408   PetscFunctionReturn(PETSC_SUCCESS);
409 }
410 #endif
411 
412 #if defined(PETSC_HAVE_ADIOS)
413   #include <adios.h>
414   #include <adios_read.h>
415   #include <petsc/private/vieweradiosimpl.h>
416   #include <petsc/private/viewerimpl.h>
417 
VecView_MPI_ADIOS(Vec xin,PetscViewer viewer)418 PetscErrorCode VecView_MPI_ADIOS(Vec xin, PetscViewer viewer)
419 {
420   PetscViewer_ADIOS *adios = (PetscViewer_ADIOS *)viewer->data;
421   const char        *vecname;
422   int64_t            id;
423   PetscInt           n, N, rstart;
424   const PetscScalar *array;
425   char               nglobalname[16], nlocalname[16], coffset[16];
426 
427   PetscFunctionBegin;
428   PetscCall(PetscObjectGetName((PetscObject)xin, &vecname));
429 
430   PetscCall(VecGetLocalSize(xin, &n));
431   PetscCall(VecGetSize(xin, &N));
432   PetscCall(VecGetOwnershipRange(xin, &rstart, NULL));
433 
434   PetscCall(PetscSNPrintf(nlocalname, PETSC_STATIC_ARRAY_LENGTH(nlocalname), "%" PetscInt_FMT, n));
435   PetscCall(PetscSNPrintf(nglobalname, PETSC_STATIC_ARRAY_LENGTH(nglobalname), "%" PetscInt_FMT, N));
436   PetscCall(PetscSNPrintf(coffset, PETSC_STATIC_ARRAY_LENGTH(coffset), "%" PetscInt_FMT, rstart));
437   id = adios_define_var(Petsc_adios_group, vecname, "", adios_double, nlocalname, nglobalname, coffset);
438   PetscCall(VecGetArrayRead(xin, &array));
439   PetscCallExternal(adios_write_byid, adios->adios_handle, id, array);
440   PetscCall(VecRestoreArrayRead(xin, &array));
441   PetscFunctionReturn(PETSC_SUCCESS);
442 }
443 #endif
444 
445 #if defined(PETSC_HAVE_HDF5)
VecView_MPI_HDF5(Vec xin,PetscViewer viewer)446 PetscErrorCode VecView_MPI_HDF5(Vec xin, PetscViewer viewer)
447 {
448   PetscViewer_HDF5 *hdf5 = (PetscViewer_HDF5 *)viewer->data;
449   /* TODO: It looks like we can remove the H5Sclose(filespace) and H5Dget_space(dset_id). Why do we do this? */
450   hid_t              filespace;  /* file dataspace identifier */
451   hid_t              chunkspace; /* chunk dataset property identifier */
452   hid_t              dset_id;    /* dataset identifier */
453   hid_t              memspace;   /* memory dataspace identifier */
454   hid_t              file_id;
455   hid_t              group;
456   hid_t              memscalartype;  /* scalar type for mem (H5T_NATIVE_FLOAT or H5T_NATIVE_DOUBLE) */
457   hid_t              filescalartype; /* scalar type for file (H5T_NATIVE_FLOAT or H5T_NATIVE_DOUBLE) */
458   PetscInt           bs = xin->map->bs;
459   hsize_t            dim;
460   hsize_t            maxDims[4], dims[4], chunkDims[4], count[4], offset[4];
461   PetscBool          timestepping, dim2, spoutput;
462   PetscInt           timestep = PETSC_INT_MIN, low;
463   hsize_t            chunksize;
464   const PetscScalar *x;
465   const char        *vecname;
466   size_t             len;
467 
468   PetscFunctionBegin;
469   PetscCall(PetscViewerHDF5OpenGroup(viewer, NULL, &file_id, &group));
470   PetscCall(PetscViewerHDF5IsTimestepping(viewer, &timestepping));
471   if (timestepping) PetscCall(PetscViewerHDF5GetTimestep(viewer, &timestep));
472   PetscCall(PetscViewerHDF5GetBaseDimension2(viewer, &dim2));
473   PetscCall(PetscViewerHDF5GetSPOutput(viewer, &spoutput));
474 
475   /* Create the dataspace for the dataset.
476    *
477    * dims - holds the current dimensions of the dataset
478    *
479    * maxDims - holds the maximum dimensions of the dataset (unlimited
480    * for the number of time steps with the current dimensions for the
481    * other dimensions; so only additional time steps can be added).
482    *
483    * chunkDims - holds the size of a single time step (required to
484    * permit extending dataset).
485    */
486   dim       = 0;
487   chunksize = 1;
488   if (timestep >= 0) {
489     dims[dim]      = timestep + 1;
490     maxDims[dim]   = H5S_UNLIMITED;
491     chunkDims[dim] = 1;
492     ++dim;
493   }
494   PetscCall(PetscHDF5IntCast(xin->map->N / bs, dims + dim));
495 
496   maxDims[dim]   = dims[dim];
497   chunkDims[dim] = PetscMax(1, dims[dim]);
498   chunksize *= chunkDims[dim];
499   ++dim;
500   if (bs > 1 || dim2) {
501     dims[dim]      = bs;
502     maxDims[dim]   = dims[dim];
503     chunkDims[dim] = PetscMax(1, dims[dim]);
504     chunksize *= chunkDims[dim];
505     ++dim;
506   }
507   #if defined(PETSC_USE_COMPLEX)
508   dims[dim]      = 2;
509   maxDims[dim]   = dims[dim];
510   chunkDims[dim] = PetscMax(1, dims[dim]);
511   chunksize *= chunkDims[dim];
512   /* hdf5 chunks must be less than 4GB */
513   if (chunksize > PETSC_HDF5_MAX_CHUNKSIZE / 64) {
514     if (bs > 1 || dim2) {
515       if (chunkDims[dim - 2] > (hsize_t)PetscSqrtReal((PetscReal)(PETSC_HDF5_MAX_CHUNKSIZE / 128))) chunkDims[dim - 2] = (hsize_t)PetscSqrtReal((PetscReal)(PETSC_HDF5_MAX_CHUNKSIZE / 128));
516       if (chunkDims[dim - 1] > (hsize_t)PetscSqrtReal((PetscReal)(PETSC_HDF5_MAX_CHUNKSIZE / 128))) chunkDims[dim - 1] = (hsize_t)PetscSqrtReal((PetscReal)(PETSC_HDF5_MAX_CHUNKSIZE / 128));
517     } else {
518       chunkDims[dim - 1] = PETSC_HDF5_MAX_CHUNKSIZE / 128;
519     }
520   }
521   ++dim;
522   #else
523   /* hdf5 chunks must be less than 4GB */
524   if (chunksize > PETSC_HDF5_MAX_CHUNKSIZE / 64) {
525     if (bs > 1 || dim2) {
526       if (chunkDims[dim - 2] > (hsize_t)PetscSqrtReal((PetscReal)(PETSC_HDF5_MAX_CHUNKSIZE / 64))) chunkDims[dim - 2] = (hsize_t)PetscSqrtReal((PetscReal)(PETSC_HDF5_MAX_CHUNKSIZE / 64));
527       if (chunkDims[dim - 1] > (hsize_t)PetscSqrtReal((PetscReal)(PETSC_HDF5_MAX_CHUNKSIZE / 64))) chunkDims[dim - 1] = (hsize_t)PetscSqrtReal((PetscReal)(PETSC_HDF5_MAX_CHUNKSIZE / 64));
528     } else {
529       chunkDims[dim - 1] = PETSC_HDF5_MAX_CHUNKSIZE / 64;
530     }
531   }
532   #endif
533 
534   PetscCallHDF5Return(filespace, H5Screate_simple, ((int)dim, dims, maxDims));
535 
536   #if defined(PETSC_USE_REAL_SINGLE)
537   memscalartype  = H5T_NATIVE_FLOAT;
538   filescalartype = H5T_NATIVE_FLOAT;
539   #elif defined(PETSC_USE_REAL___FLOAT128)
540     #error "HDF5 output with 128 bit floats not supported."
541   #elif defined(PETSC_USE_REAL___FP16)
542     #error "HDF5 output with 16 bit floats not supported."
543   #else
544   memscalartype = H5T_NATIVE_DOUBLE;
545   if (spoutput == PETSC_TRUE) filescalartype = H5T_NATIVE_FLOAT;
546   else filescalartype = H5T_NATIVE_DOUBLE;
547   #endif
548 
549   /* Create the dataset with default properties and close filespace */
550   PetscCall(PetscObjectGetName((PetscObject)xin, &vecname));
551   PetscCall(PetscStrlen(vecname, &len));
552   PetscCheck(len, PetscObjectComm((PetscObject)viewer), PETSC_ERR_ARG_WRONG, "Object must be named");
553   if (H5Lexists(group, vecname, H5P_DEFAULT) < 1) {
554     /* Create chunk */
555     PetscCallHDF5Return(chunkspace, H5Pcreate, (H5P_DATASET_CREATE));
556     PetscCallHDF5(H5Pset_chunk, (chunkspace, (int)dim, chunkDims));
557 
558     PetscCallHDF5Return(dset_id, H5Dcreate2, (group, vecname, filescalartype, filespace, H5P_DEFAULT, chunkspace, H5P_DEFAULT));
559     PetscCallHDF5(H5Pclose, (chunkspace));
560   } else {
561     PetscCallHDF5Return(dset_id, H5Dopen2, (group, vecname, H5P_DEFAULT));
562     PetscCallHDF5(H5Dset_extent, (dset_id, dims));
563   }
564   PetscCallHDF5(H5Sclose, (filespace));
565 
566   /* Each process defines a dataset and writes it to the hyperslab in the file */
567   dim = 0;
568   if (timestep >= 0) {
569     count[dim] = 1;
570     ++dim;
571   }
572   PetscCall(PetscHDF5IntCast(xin->map->n / bs, count + dim));
573   ++dim;
574   if (bs > 1 || dim2) {
575     count[dim] = bs;
576     ++dim;
577   }
578   #if defined(PETSC_USE_COMPLEX)
579   count[dim] = 2;
580   ++dim;
581   #endif
582   if (xin->map->n > 0 || H5_VERSION_GE(1, 10, 0)) {
583     PetscCallHDF5Return(memspace, H5Screate_simple, ((int)dim, count, NULL));
584   } else {
585     /* Can't create dataspace with zero for any dimension, so create null dataspace. */
586     PetscCallHDF5Return(memspace, H5Screate, (H5S_NULL));
587   }
588 
589   /* Select hyperslab in the file */
590   PetscCall(VecGetOwnershipRange(xin, &low, NULL));
591   dim = 0;
592   if (timestep >= 0) {
593     offset[dim] = timestep;
594     ++dim;
595   }
596   PetscCall(PetscHDF5IntCast(low / bs, offset + dim));
597   ++dim;
598   if (bs > 1 || dim2) {
599     offset[dim] = 0;
600     ++dim;
601   }
602   #if defined(PETSC_USE_COMPLEX)
603   offset[dim] = 0;
604   ++dim;
605   #endif
606   if (xin->map->n > 0 || H5_VERSION_GE(1, 10, 0)) {
607     PetscCallHDF5Return(filespace, H5Dget_space, (dset_id));
608     PetscCallHDF5(H5Sselect_hyperslab, (filespace, H5S_SELECT_SET, offset, NULL, count, NULL));
609   } else {
610     /* Create null filespace to match null memspace. */
611     PetscCallHDF5Return(filespace, H5Screate, (H5S_NULL));
612   }
613 
614   PetscCall(VecGetArrayRead(xin, &x));
615   PetscCallHDF5(H5Dwrite, (dset_id, memscalartype, memspace, filespace, hdf5->dxpl_id, x));
616   PetscCallHDF5(H5Fflush, (file_id, H5F_SCOPE_GLOBAL));
617   PetscCall(VecRestoreArrayRead(xin, &x));
618 
619   /* Close/release resources */
620   PetscCallHDF5(H5Gclose, (group));
621   PetscCallHDF5(H5Sclose, (filespace));
622   PetscCallHDF5(H5Sclose, (memspace));
623   PetscCallHDF5(H5Dclose, (dset_id));
624 
625   #if defined(PETSC_USE_COMPLEX)
626   {
627     PetscBool tru = PETSC_TRUE;
628     PetscCall(PetscViewerHDF5WriteObjectAttribute(viewer, (PetscObject)xin, "complex", PETSC_BOOL, &tru));
629   }
630   #endif
631   if (timestepping) PetscCall(PetscViewerHDF5WriteObjectAttribute(viewer, (PetscObject)xin, "timestepping", PETSC_BOOL, &timestepping));
632   PetscCall(PetscInfo(xin, "Wrote Vec object with name %s\n", vecname));
633   PetscFunctionReturn(PETSC_SUCCESS);
634 }
635 #endif
636 
VecView_MPI(Vec xin,PetscViewer viewer)637 PetscErrorCode VecView_MPI(Vec xin, PetscViewer viewer)
638 {
639   PetscBool isascii, isbinary, isdraw;
640 #if defined(PETSC_HAVE_MATHEMATICA)
641   PetscBool ismathematica;
642 #endif
643 #if defined(PETSC_HAVE_HDF5)
644   PetscBool ishdf5;
645 #endif
646 #if defined(PETSC_HAVE_MATLAB)
647   PetscBool ismatlab;
648 #endif
649 #if defined(PETSC_HAVE_ADIOS)
650   PetscBool isadios;
651 #endif
652   PetscBool isglvis;
653 
654   PetscFunctionBegin;
655   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
656   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
657   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
658 #if defined(PETSC_HAVE_MATHEMATICA)
659   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERMATHEMATICA, &ismathematica));
660 #endif
661 #if defined(PETSC_HAVE_HDF5)
662   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
663 #endif
664 #if defined(PETSC_HAVE_MATLAB)
665   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERMATLAB, &ismatlab));
666 #endif
667   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERGLVIS, &isglvis));
668 #if defined(PETSC_HAVE_ADIOS)
669   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERADIOS, &isadios));
670 #endif
671   if (isascii) {
672     PetscCall(VecView_MPI_ASCII(xin, viewer));
673   } else if (isbinary) {
674     PetscCall(VecView_MPI_Binary(xin, viewer));
675   } else if (isdraw) {
676     PetscViewerFormat format;
677     PetscCall(PetscViewerGetFormat(viewer, &format));
678     if (format == PETSC_VIEWER_DRAW_LG) {
679       PetscCall(VecView_MPI_Draw_LG(xin, viewer));
680     } else {
681       PetscCall(VecView_MPI_Draw(xin, viewer));
682     }
683 #if defined(PETSC_HAVE_MATHEMATICA)
684   } else if (ismathematica) {
685     PetscCall(PetscViewerMathematicaPutVector(viewer, xin));
686 #endif
687 #if defined(PETSC_HAVE_HDF5)
688   } else if (ishdf5) {
689     PetscCall(VecView_MPI_HDF5(xin, viewer));
690 #endif
691 #if defined(PETSC_HAVE_ADIOS)
692   } else if (isadios) {
693     PetscCall(VecView_MPI_ADIOS(xin, viewer));
694 #endif
695 #if defined(PETSC_HAVE_MATLAB)
696   } else if (ismatlab) {
697     PetscCall(VecView_MPI_Matlab(xin, viewer));
698 #endif
699   } else if (isglvis) PetscCall(VecView_GLVis(xin, viewer));
700   PetscFunctionReturn(PETSC_SUCCESS);
701 }
702 
VecGetSize_MPI(Vec xin,PetscInt * N)703 PetscErrorCode VecGetSize_MPI(Vec xin, PetscInt *N)
704 {
705   PetscFunctionBegin;
706   *N = xin->map->N;
707   PetscFunctionReturn(PETSC_SUCCESS);
708 }
709 
VecGetValues_MPI(Vec xin,PetscInt ni,const PetscInt ix[],PetscScalar y[])710 PetscErrorCode VecGetValues_MPI(Vec xin, PetscInt ni, const PetscInt ix[], PetscScalar y[])
711 {
712   const PetscScalar *xx;
713   const PetscInt     start = xin->map->range[xin->stash.rank];
714 
715   PetscFunctionBegin;
716   PetscCall(VecGetArrayRead(xin, &xx));
717   for (PetscInt i = 0; i < ni; i++) {
718     if (xin->stash.ignorenegidx && ix[i] < 0) continue;
719     const PetscInt tmp = ix[i] - start;
720 
721     PetscCheck(tmp >= 0 && tmp < xin->map->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Can only get local values, trying %" PetscInt_FMT, ix[i]);
722     y[i] = xx[tmp];
723   }
724   PetscCall(VecRestoreArrayRead(xin, &xx));
725   PetscFunctionReturn(PETSC_SUCCESS);
726 }
727 
VecSetValues_MPI(Vec xin,PetscInt ni,const PetscInt ix[],const PetscScalar y[],InsertMode addv)728 PetscErrorCode VecSetValues_MPI(Vec xin, PetscInt ni, const PetscInt ix[], const PetscScalar y[], InsertMode addv)
729 {
730   const PetscBool   ignorenegidx = xin->stash.ignorenegidx;
731   const PetscBool   donotstash   = xin->stash.donotstash;
732   const PetscMPIInt rank         = xin->stash.rank;
733   const PetscInt   *owners       = xin->map->range;
734   const PetscInt    start = owners[rank], end = owners[rank + 1];
735   PetscScalar      *xx;
736 
737   PetscFunctionBegin;
738   if (PetscDefined(USE_DEBUG)) {
739     PetscCheck(xin->stash.insertmode != INSERT_VALUES || addv != ADD_VALUES, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "You have already inserted values; you cannot now add");
740     PetscCheck(xin->stash.insertmode != ADD_VALUES || addv != INSERT_VALUES, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "You have already added values; you cannot now insert");
741   }
742   PetscCall(VecGetArray(xin, &xx));
743   xin->stash.insertmode = addv;
744   for (PetscInt i = 0; i < ni; ++i) {
745     PetscInt    row;
746     PetscScalar yv = y ? y[i] : 0;
747 
748     if (ignorenegidx && ix[i] < 0) continue;
749     PetscCheck(ix[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Out of range index value %" PetscInt_FMT " cannot be negative", ix[i]);
750     if ((row = ix[i]) >= start && row < end) {
751       if (addv == INSERT_VALUES) {
752         xx[row - start] = yv;
753       } else {
754         xx[row - start] += yv;
755       }
756     } else if (!donotstash) {
757       PetscCheck(ix[i] < xin->map->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Out of range index value %" PetscInt_FMT ", should be less than %" PetscInt_FMT, ix[i], xin->map->N);
758       PetscCall(VecStashValue_Private(&xin->stash, row, yv));
759     }
760   }
761   PetscCall(VecRestoreArray(xin, &xx));
762   PetscFunctionReturn(PETSC_SUCCESS);
763 }
764 
VecSetValuesBlocked_MPI(Vec xin,PetscInt ni,const PetscInt ix[],const PetscScalar yin[],InsertMode addv)765 PetscErrorCode VecSetValuesBlocked_MPI(Vec xin, PetscInt ni, const PetscInt ix[], const PetscScalar yin[], InsertMode addv)
766 {
767   PetscMPIInt  rank   = xin->stash.rank;
768   PetscInt    *owners = xin->map->range, start = owners[rank];
769   PetscInt     end = owners[rank + 1], i, row, bs = xin->map->bs, j;
770   PetscScalar *xx, *y = (PetscScalar *)yin;
771 
772   PetscFunctionBegin;
773   PetscCall(VecGetArray(xin, &xx));
774   if (PetscDefined(USE_DEBUG)) {
775     PetscCheck(xin->stash.insertmode != INSERT_VALUES || addv != ADD_VALUES, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "You have already inserted values; you cannot now add");
776     PetscCheck(xin->stash.insertmode != ADD_VALUES || addv != INSERT_VALUES, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "You have already added values; you cannot now insert");
777   }
778   xin->stash.insertmode = addv;
779 
780   if (addv == INSERT_VALUES) {
781     for (i = 0; i < ni; i++) {
782       if ((row = bs * ix[i]) >= start && row < end) {
783         for (j = 0; j < bs; j++) xx[row - start + j] = y ? y[j] : 0.0;
784       } else if (!xin->stash.donotstash) {
785         if (ix[i] < 0) {
786           if (y) y += bs;
787           continue;
788         }
789         PetscCheck(ix[i] < xin->map->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Out of range index value %" PetscInt_FMT ", should be less than %" PetscInt_FMT, ix[i], xin->map->N);
790         PetscCall(VecStashValuesBlocked_Private(&xin->bstash, ix[i], y));
791       }
792       if (y) y += bs;
793     }
794   } else {
795     for (i = 0; i < ni; i++) {
796       if ((row = bs * ix[i]) >= start && row < end) {
797         for (j = 0; j < bs; j++) xx[row - start + j] += y ? y[j] : 0.0;
798       } else if (!xin->stash.donotstash) {
799         if (ix[i] < 0) {
800           if (y) y += bs;
801           continue;
802         }
803         PetscCheck(ix[i] <= xin->map->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Out of range index value %" PetscInt_FMT ", should be less than %" PetscInt_FMT, ix[i], xin->map->N);
804         PetscCall(VecStashValuesBlocked_Private(&xin->bstash, ix[i], y));
805       }
806       if (y) y += bs;
807     }
808   }
809   PetscCall(VecRestoreArray(xin, &xx));
810   PetscFunctionReturn(PETSC_SUCCESS);
811 }
812 
813 /*
814    Since nsends or nreceives may be zero we add 1 in certain mallocs
815 to make sure we never malloc an empty one.
816 */
VecAssemblyBegin_MPI(Vec xin)817 PetscErrorCode VecAssemblyBegin_MPI(Vec xin)
818 {
819   PetscInt   *owners = xin->map->range, *bowners, i, bs, nstash, reallocs;
820   PetscMPIInt size;
821   InsertMode  addv;
822   MPI_Comm    comm;
823 
824   PetscFunctionBegin;
825   PetscCall(PetscObjectGetComm((PetscObject)xin, &comm));
826   if (xin->stash.donotstash) PetscFunctionReturn(PETSC_SUCCESS);
827 
828   PetscCallMPI(MPIU_Allreduce((PetscEnum *)&xin->stash.insertmode, (PetscEnum *)&addv, 1, MPIU_ENUM, MPI_BOR, comm));
829   PetscCheck(addv != (ADD_VALUES | INSERT_VALUES), comm, PETSC_ERR_ARG_NOTSAMETYPE, "Some processors inserted values while others added");
830   xin->stash.insertmode  = addv; /* in case this processor had no cache */
831   xin->bstash.insertmode = addv; /* Block stash implicitly tracks InsertMode of scalar stash */
832 
833   PetscCall(VecGetBlockSize(xin, &bs));
834   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)xin), &size));
835   if (!xin->bstash.bowners && xin->map->bs != -1) {
836     PetscCall(PetscMalloc1(size + 1, &bowners));
837     for (i = 0; i < size + 1; i++) bowners[i] = owners[i] / bs;
838     xin->bstash.bowners = bowners;
839   } else bowners = xin->bstash.bowners;
840 
841   PetscCall(VecStashScatterBegin_Private(&xin->stash, owners));
842   PetscCall(VecStashScatterBegin_Private(&xin->bstash, bowners));
843   PetscCall(VecStashGetInfo_Private(&xin->stash, &nstash, &reallocs));
844   PetscCall(PetscInfo(xin, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
845   PetscCall(VecStashGetInfo_Private(&xin->bstash, &nstash, &reallocs));
846   PetscCall(PetscInfo(xin, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
847   PetscFunctionReturn(PETSC_SUCCESS);
848 }
849 
VecAssemblyEnd_MPI(Vec vec)850 PetscErrorCode VecAssemblyEnd_MPI(Vec vec)
851 {
852   PetscInt     base, i, j, *row, flg, bs;
853   PetscMPIInt  n;
854   PetscScalar *val, *vv, *array, *xarray;
855 
856   PetscFunctionBegin;
857   if (!vec->stash.donotstash) {
858     PetscCall(VecGetArray(vec, &xarray));
859     PetscCall(VecGetBlockSize(vec, &bs));
860     base = vec->map->range[vec->stash.rank];
861 
862     /* Process the stash */
863     while (1) {
864       PetscCall(VecStashScatterGetMesg_Private(&vec->stash, &n, &row, &val, &flg));
865       if (!flg) break;
866       if (vec->stash.insertmode == ADD_VALUES) {
867         for (i = 0; i < n; i++) xarray[row[i] - base] += val[i];
868       } else if (vec->stash.insertmode == INSERT_VALUES) {
869         for (i = 0; i < n; i++) xarray[row[i] - base] = val[i];
870       } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_CORRUPT, "Insert mode is not set correctly; corrupted vector");
871     }
872     PetscCall(VecStashScatterEnd_Private(&vec->stash));
873 
874     /* now process the block-stash */
875     while (1) {
876       PetscCall(VecStashScatterGetMesg_Private(&vec->bstash, &n, &row, &val, &flg));
877       if (!flg) break;
878       for (i = 0; i < n; i++) {
879         array = xarray + row[i] * bs - base;
880         vv    = val + i * bs;
881         if (vec->stash.insertmode == ADD_VALUES) {
882           for (j = 0; j < bs; j++) array[j] += vv[j];
883         } else if (vec->stash.insertmode == INSERT_VALUES) {
884           for (j = 0; j < bs; j++) array[j] = vv[j];
885         } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_CORRUPT, "Insert mode is not set correctly; corrupted vector");
886       }
887     }
888     PetscCall(VecStashScatterEnd_Private(&vec->bstash));
889     PetscCall(VecRestoreArray(vec, &xarray));
890   }
891   vec->stash.insertmode = NOT_SET_VALUES;
892   PetscFunctionReturn(PETSC_SUCCESS);
893 }
894 
VecSetPreallocationCOO_MPI(Vec x,PetscCount coo_n,const PetscInt coo_i[])895 PetscErrorCode VecSetPreallocationCOO_MPI(Vec x, PetscCount coo_n, const PetscInt coo_i[])
896 {
897   PetscInt    m, M, rstart, rend;
898   Vec_MPI    *vmpi = (Vec_MPI *)x->data;
899   PetscCount  k, p, q, rem; /* Loop variables over coo arrays */
900   PetscMPIInt size;
901   MPI_Comm    comm;
902 
903   PetscFunctionBegin;
904   PetscCall(PetscObjectGetComm((PetscObject)x, &comm));
905   PetscCallMPI(MPI_Comm_size(comm, &size));
906   PetscCall(VecResetPreallocationCOO_MPI(x));
907 
908   PetscCall(PetscLayoutSetUp(x->map));
909   PetscCall(VecGetOwnershipRange(x, &rstart, &rend));
910   PetscCall(VecGetLocalSize(x, &m));
911   PetscCall(VecGetSize(x, &M));
912 
913   /* Sort COOs along with a permutation array, so that negative indices come    */
914   /* first, then local ones, then remote ones.                                  */
915   PetscCount n1 = coo_n, nneg, *perm;
916   PetscInt  *i1; /* Copy of input COOs along with a permutation array */
917   PetscCall(PetscMalloc1(n1, &i1));
918   PetscCall(PetscMalloc1(n1, &perm));
919   PetscCall(PetscArraycpy(i1, coo_i, n1)); /* Make a copy since we'll modify it */
920   for (k = 0; k < n1; k++) perm[k] = k;
921 
922   /* Manipulate i1[] so that entries with negative indices will have the smallest
923      index, local entries will have greater but negative indices, and remote entries
924      will have positive indices.
925   */
926   for (k = 0; k < n1; k++) {
927     if (i1[k] < 0) {
928       if (x->stash.ignorenegidx) i1[k] = PETSC_INT_MIN; /* e.g., -2^31, minimal to move them ahead */
929       else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Found a negative index in VecSetPreallocateCOO() but VEC_IGNORE_NEGATIVE_INDICES was not set");
930     } else if (i1[k] >= rstart && i1[k] < rend) {
931       i1[k] -= PETSC_INT_MAX; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_INT_MAX, -1] */
932     } else {
933       PetscCheck(i1[k] < M, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Found index %" PetscInt_FMT " in VecSetPreallocateCOO() larger than the global size %" PetscInt_FMT, i1[k], M);
934       if (x->stash.donotstash) i1[k] = PETSC_INT_MIN; /* Ignore off-proc indices as if they were negative */
935     }
936   }
937 
938   /* Sort the indices, after that, [0,nneg) have ignored entries, [nneg,rem) have local entries and [rem,n1) have remote entries */
939   PetscCall(PetscSortIntWithCountArray(n1, i1, perm));
940   for (k = 0; k < n1; k++) {
941     if (i1[k] > PETSC_INT_MIN) break;
942   } /* Advance k to the first entry we need to take care of */
943   nneg = k;
944   PetscCall(PetscSortedIntUpperBound(i1, nneg, n1, rend - 1 - PETSC_INT_MAX, &rem)); /* rem is upper bound of the last local row */
945   for (k = nneg; k < rem; k++) i1[k] += PETSC_INT_MAX;                               /* Revert indices of local entries */
946 
947   /*           Build stuff for local entries                                    */
948   PetscCount tot1, *jmap1, *perm1;
949   PetscCall(PetscCalloc1(m + 1, &jmap1));
950   for (k = nneg; k < rem; k++) jmap1[i1[k] - rstart + 1]++; /* Count repeats of each local entry */
951   for (k = 0; k < m; k++) jmap1[k + 1] += jmap1[k];         /* Transform jmap1[] to CSR-like data structure */
952   tot1 = jmap1[m];
953   PetscAssert(tot1 == rem - nneg, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected errors in VecSetPreallocationCOO_MPI");
954   PetscCall(PetscMalloc1(tot1, &perm1));
955   PetscCall(PetscArraycpy(perm1, perm + nneg, tot1));
956 
957   /*        Record the permutation array for filling the send buffer            */
958   PetscCount *Cperm;
959   PetscCall(PetscMalloc1(n1 - rem, &Cperm));
960   PetscCall(PetscArraycpy(Cperm, perm + rem, n1 - rem));
961   PetscCall(PetscFree(perm));
962 
963   /*           Send remote entries to their owner                                  */
964   /* Find which entries should be sent to which remote ranks*/
965   PetscInt        nsend = 0; /* Number of MPI ranks to send data to */
966   PetscMPIInt    *sendto;    /* [nsend], storing remote ranks */
967   PetscInt       *nentries;  /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */
968   const PetscInt *ranges;
969   PetscInt        maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */
970 
971   PetscCall(PetscLayoutGetRanges(x->map, &ranges));
972   PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries));
973   for (k = rem; k < n1;) {
974     PetscMPIInt owner;
975     PetscInt    firstRow, lastRow;
976 
977     /* Locate a row range */
978     firstRow = i1[k]; /* first row of this owner */
979     PetscCall(PetscLayoutFindOwner(x->map, firstRow, &owner));
980     lastRow = ranges[owner + 1] - 1; /* last row of this owner */
981 
982     /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */
983     PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p));
984 
985     /* All entries in [k,p) belong to this remote owner */
986     if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */
987       PetscMPIInt *sendto2;
988       PetscInt    *nentries2;
989       PetscInt     maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size;
990 
991       PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2));
992       PetscCall(PetscArraycpy(sendto2, sendto, maxNsend));
993       PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1));
994       PetscCall(PetscFree2(sendto, nentries2));
995       sendto   = sendto2;
996       nentries = nentries2;
997       maxNsend = maxNsend2;
998     }
999     sendto[nsend] = owner;
1000     PetscCall(PetscIntCast(p - k, &nentries[nsend]));
1001     nsend++;
1002     k = p;
1003   }
1004 
1005   /* Build 1st SF to know offsets on remote to send data */
1006   PetscSF      sf1;
1007   PetscInt     nroots = 1, nroots2 = 0;
1008   PetscInt     nleaves = nsend, nleaves2 = 0;
1009   PetscInt    *offsets;
1010   PetscSFNode *iremote;
1011 
1012   PetscCall(PetscSFCreate(comm, &sf1));
1013   PetscCall(PetscMalloc1(nsend, &iremote));
1014   PetscCall(PetscMalloc1(nsend, &offsets));
1015   for (k = 0; k < nsend; k++) {
1016     iremote[k].rank  = sendto[k];
1017     iremote[k].index = 0;
1018     nleaves2 += nentries[k];
1019     PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt");
1020   }
1021   PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
1022   PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM));
1023   PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */
1024   PetscCall(PetscSFDestroy(&sf1));
1025   PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT, nleaves2, n1 - rem);
1026 
1027   /* Build 2nd SF to send remote COOs to their owner */
1028   PetscSF sf2;
1029   nroots  = nroots2;
1030   nleaves = nleaves2;
1031   PetscCall(PetscSFCreate(comm, &sf2));
1032   PetscCall(PetscSFSetFromOptions(sf2));
1033   PetscCall(PetscMalloc1(nleaves, &iremote));
1034   p = 0;
1035   for (k = 0; k < nsend; k++) {
1036     PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt");
1037     for (q = 0; q < nentries[k]; q++, p++) {
1038       iremote[p].rank = sendto[k];
1039       PetscCall(PetscIntCast(offsets[k] + q, &iremote[p].index));
1040     }
1041   }
1042   PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
1043 
1044   /* Send the remote COOs to their owner */
1045   PetscInt    n2 = nroots, *i2; /* Buffers for received COOs from other ranks, along with a permutation array */
1046   PetscCount *perm2;
1047   PetscCall(PetscMalloc1(n2, &i2));
1048   PetscCall(PetscMalloc1(n2, &perm2));
1049   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1 + rem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE));
1050   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1 + rem, i2, MPI_REPLACE));
1051 
1052   PetscCall(PetscFree(i1));
1053   PetscCall(PetscFree(offsets));
1054   PetscCall(PetscFree2(sendto, nentries));
1055 
1056   /* Sort received COOs along with a permutation array            */
1057   PetscCount  *imap2;
1058   PetscCount  *jmap2, nnz2;
1059   PetscScalar *sendbuf, *recvbuf;
1060   PetscInt     old;
1061   PetscCount   sendlen = n1 - rem, recvlen = n2;
1062 
1063   for (k = 0; k < n2; k++) perm2[k] = k;
1064   PetscCall(PetscSortIntWithCountArray(n2, i2, perm2));
1065 
1066   /* nnz2 will be # of unique entries in the recvbuf */
1067   nnz2 = n2;
1068   for (k = 1; k < n2; k++) {
1069     if (i2[k] == i2[k - 1]) nnz2--;
1070   }
1071 
1072   /* Build imap2[] and jmap2[] for each unique entry */
1073   PetscCall(PetscMalloc4(nnz2, &imap2, nnz2 + 1, &jmap2, sendlen, &sendbuf, recvlen, &recvbuf));
1074   p        = -1;
1075   old      = -1;
1076   jmap2[0] = 0;
1077   jmap2++;
1078   for (k = 0; k < n2; k++) {
1079     if (i2[k] != old) { /* Meet a new entry */
1080       p++;
1081       imap2[p] = i2[k] - rstart;
1082       jmap2[p] = 1;
1083       old      = i2[k];
1084     } else {
1085       jmap2[p]++;
1086     }
1087   }
1088   jmap2--;
1089   for (k = 0; k < nnz2; k++) jmap2[k + 1] += jmap2[k];
1090 
1091   PetscCall(PetscFree(i2));
1092 
1093   vmpi->coo_n = coo_n;
1094   vmpi->tot1  = tot1;
1095   vmpi->jmap1 = jmap1;
1096   vmpi->perm1 = perm1;
1097   vmpi->nnz2  = nnz2;
1098   vmpi->imap2 = imap2;
1099   vmpi->jmap2 = jmap2;
1100   vmpi->perm2 = perm2;
1101 
1102   vmpi->Cperm   = Cperm;
1103   vmpi->sendbuf = sendbuf;
1104   vmpi->recvbuf = recvbuf;
1105   vmpi->sendlen = sendlen;
1106   vmpi->recvlen = recvlen;
1107   vmpi->coo_sf  = sf2;
1108   PetscFunctionReturn(PETSC_SUCCESS);
1109 }
1110 
VecSetValuesCOO_MPI(Vec x,const PetscScalar v[],InsertMode imode)1111 PetscErrorCode VecSetValuesCOO_MPI(Vec x, const PetscScalar v[], InsertMode imode)
1112 {
1113   Vec_MPI          *vmpi = (Vec_MPI *)x->data;
1114   PetscInt          m;
1115   PetscScalar      *a, *sendbuf = vmpi->sendbuf, *recvbuf = vmpi->recvbuf;
1116   const PetscCount *jmap1 = vmpi->jmap1;
1117   const PetscCount *perm1 = vmpi->perm1;
1118   const PetscCount *imap2 = vmpi->imap2;
1119   const PetscCount *jmap2 = vmpi->jmap2;
1120   const PetscCount *perm2 = vmpi->perm2;
1121   const PetscCount *Cperm = vmpi->Cperm;
1122   const PetscCount  nnz2  = vmpi->nnz2;
1123 
1124   PetscFunctionBegin;
1125   PetscCall(VecGetLocalSize(x, &m));
1126   PetscCall(VecGetArray(x, &a));
1127 
1128   /* Pack entries to be sent to remote */
1129   for (PetscInt i = 0; i < vmpi->sendlen; i++) sendbuf[i] = v[Cperm[i]];
1130 
1131   /* Send remote entries to their owner and overlap the communication with local computation */
1132   PetscCall(PetscSFReduceWithMemTypeBegin(vmpi->coo_sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE));
1133   /* Add local entries to A and B */
1134   for (PetscInt i = 0; i < m; i++) { /* All entries in a[] are either zero'ed or added with a value (i.e., initialized) */
1135     PetscScalar sum = 0.0;           /* Do partial summation first to improve numerical stability */
1136     for (PetscCount k = jmap1[i]; k < jmap1[i + 1]; k++) sum += v[perm1[k]];
1137     a[i] = (imode == INSERT_VALUES ? 0.0 : a[i]) + sum;
1138   }
1139   PetscCall(PetscSFReduceEnd(vmpi->coo_sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE));
1140 
1141   /* Add received remote entries to A and B */
1142   for (PetscInt i = 0; i < nnz2; i++) {
1143     for (PetscCount k = jmap2[i]; k < jmap2[i + 1]; k++) a[imap2[i]] += recvbuf[perm2[k]];
1144   }
1145 
1146   PetscCall(VecRestoreArray(x, &a));
1147   PetscFunctionReturn(PETSC_SUCCESS);
1148 }
1149