xref: /petsc/src/mat/impls/elemental/matelem.cxx (revision 98d129c30f3ee9fdddc40fdbc5a989b7be64f888)
1 #include <petsc/private/petscelemental.h>
2 
3 const char       ElementalCitation[] = "@Article{Elemental2012,\n"
4                                        "  author  = {Jack Poulson and Bryan Marker and Jeff R. Hammond and Nichols A. Romero and Robert {v}an~{d}e~{G}eijn},\n"
5                                        "  title   = {Elemental: A New Framework for Distributed Memory Dense Matrix Computations},\n"
6                                        "  journal = {{ACM} Transactions on Mathematical Software},\n"
7                                        "  volume  = {39},\n"
8                                        "  number  = {2},\n"
9                                        "  year    = {2013}\n"
10                                        "}\n";
11 static PetscBool ElementalCite       = PETSC_FALSE;
12 
13 /*
14     The variable Petsc_Elemental_keyval is used to indicate an MPI attribute that
15   is attached to a communicator, in this case the attribute is a Mat_Elemental_Grid
16 */
17 static PetscMPIInt Petsc_Elemental_keyval = MPI_KEYVAL_INVALID;
18 
19 static PetscErrorCode MatView_Elemental(Mat A, PetscViewer viewer)
20 {
21   Mat_Elemental *a = (Mat_Elemental *)A->data;
22   PetscBool      iascii;
23 
24   PetscFunctionBegin;
25   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
26   if (iascii) {
27     PetscViewerFormat format;
28     PetscCall(PetscViewerGetFormat(viewer, &format));
29     if (format == PETSC_VIEWER_ASCII_INFO) {
30       /* call elemental viewing function */
31       PetscCall(PetscViewerASCIIPrintf(viewer, "Elemental run parameters:\n"));
32       PetscCall(PetscViewerASCIIPrintf(viewer, "  allocated entries=%zu\n", (*a->emat).AllocatedMemory()));
33       PetscCall(PetscViewerASCIIPrintf(viewer, "  grid height=%d, grid width=%d\n", (*a->emat).Grid().Height(), (*a->emat).Grid().Width()));
34       if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
35         /* call elemental viewing function */
36         PetscCall(PetscPrintf(PetscObjectComm((PetscObject)viewer), "test matview_elemental 2\n"));
37       }
38 
39     } else if (format == PETSC_VIEWER_DEFAULT) {
40       PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
41       El::Print(*a->emat, "Elemental matrix (cyclic ordering)");
42       PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
43       if (A->factortype == MAT_FACTOR_NONE) {
44         Mat Adense;
45         PetscCall(MatConvert(A, MATDENSE, MAT_INITIAL_MATRIX, &Adense));
46         PetscCall(MatView(Adense, viewer));
47         PetscCall(MatDestroy(&Adense));
48       }
49     } else SETERRQ(PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Format");
50   } else {
51     /* convert to dense format and call MatView() */
52     Mat Adense;
53     PetscCall(MatConvert(A, MATDENSE, MAT_INITIAL_MATRIX, &Adense));
54     PetscCall(MatView(Adense, viewer));
55     PetscCall(MatDestroy(&Adense));
56   }
57   PetscFunctionReturn(PETSC_SUCCESS);
58 }
59 
60 static PetscErrorCode MatGetInfo_Elemental(Mat A, MatInfoType flag, MatInfo *info)
61 {
62   Mat_Elemental *a = (Mat_Elemental *)A->data;
63 
64   PetscFunctionBegin;
65   info->block_size = 1.0;
66 
67   if (flag == MAT_LOCAL) {
68     info->nz_allocated = (*a->emat).AllocatedMemory(); /* locally allocated */
69     info->nz_used      = info->nz_allocated;
70   } else if (flag == MAT_GLOBAL_MAX) {
71     //PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin)));
72     /* see MatGetInfo_MPIAIJ() for getting global info->nz_allocated! */
73     //SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP," MAT_GLOBAL_MAX not written yet");
74   } else if (flag == MAT_GLOBAL_SUM) {
75     //SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP," MAT_GLOBAL_SUM not written yet");
76     info->nz_allocated = (*a->emat).AllocatedMemory(); /* locally allocated */
77     info->nz_used      = info->nz_allocated;           /* assume Elemental does accurate allocation */
78     //PetscCall(MPIU_Allreduce(isend,irecv,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A)));
79     //PetscPrintf(PETSC_COMM_SELF,"    ... [%d] locally allocated %g\n",rank,info->nz_allocated);
80   }
81 
82   info->nz_unneeded       = 0.0;
83   info->assemblies        = A->num_ass;
84   info->mallocs           = 0;
85   info->memory            = 0; /* REVIEW ME */
86   info->fill_ratio_given  = 0; /* determined by Elemental */
87   info->fill_ratio_needed = 0;
88   info->factor_mallocs    = 0;
89   PetscFunctionReturn(PETSC_SUCCESS);
90 }
91 
92 static PetscErrorCode MatSetOption_Elemental(Mat A, MatOption op, PetscBool flg)
93 {
94   Mat_Elemental *a = (Mat_Elemental *)A->data;
95 
96   PetscFunctionBegin;
97   switch (op) {
98   case MAT_NEW_NONZERO_LOCATIONS:
99   case MAT_NEW_NONZERO_LOCATION_ERR:
100   case MAT_NEW_NONZERO_ALLOCATION_ERR:
101   case MAT_SYMMETRIC:
102   case MAT_SORTED_FULL:
103   case MAT_HERMITIAN:
104     break;
105   case MAT_ROW_ORIENTED:
106     a->roworiented = flg;
107     break;
108   default:
109     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %s", MatOptions[op]);
110   }
111   PetscFunctionReturn(PETSC_SUCCESS);
112 }
113 
114 static PetscErrorCode MatSetValues_Elemental(Mat A, PetscInt nr, const PetscInt *rows, PetscInt nc, const PetscInt *cols, const PetscScalar *vals, InsertMode imode)
115 {
116   Mat_Elemental *a = (Mat_Elemental *)A->data;
117   PetscInt       i, j, rrank, ridx, crank, cidx, erow, ecol, numQueues = 0;
118 
119   PetscFunctionBegin;
120   // TODO: Initialize matrix to all zeros?
121 
122   // Count the number of queues from this process
123   if (a->roworiented) {
124     for (i = 0; i < nr; i++) {
125       if (rows[i] < 0) continue;
126       P2RO(A, 0, rows[i], &rrank, &ridx);
127       RO2E(A, 0, rrank, ridx, &erow);
128       PetscCheck(rrank >= 0 && ridx >= 0 && erow >= 0, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Incorrect row translation");
129       for (j = 0; j < nc; j++) {
130         if (cols[j] < 0) continue;
131         P2RO(A, 1, cols[j], &crank, &cidx);
132         RO2E(A, 1, crank, cidx, &ecol);
133         PetscCheck(crank >= 0 && cidx >= 0 && ecol >= 0, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Incorrect col translation");
134         if (!a->emat->IsLocal(erow, ecol)) { /* off-proc entry */
135           /* printf("Will later remotely update (%d,%d)\n",erow,ecol); */
136           PetscCheck(imode == ADD_VALUES, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only ADD_VALUES to off-processor entry is supported");
137           ++numQueues;
138           continue;
139         }
140         /* printf("Locally updating (%d,%d)\n",erow,ecol); */
141         switch (imode) {
142         case INSERT_VALUES:
143           a->emat->Set(erow, ecol, (PetscElemScalar)vals[i * nc + j]);
144           break;
145         case ADD_VALUES:
146           a->emat->Update(erow, ecol, (PetscElemScalar)vals[i * nc + j]);
147           break;
148         default:
149           SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for InsertMode %d", (int)imode);
150         }
151       }
152     }
153 
154     /* printf("numQueues=%d\n",numQueues); */
155     a->emat->Reserve(numQueues);
156     for (i = 0; i < nr; i++) {
157       if (rows[i] < 0) continue;
158       P2RO(A, 0, rows[i], &rrank, &ridx);
159       RO2E(A, 0, rrank, ridx, &erow);
160       for (j = 0; j < nc; j++) {
161         if (cols[j] < 0) continue;
162         P2RO(A, 1, cols[j], &crank, &cidx);
163         RO2E(A, 1, crank, cidx, &ecol);
164         if (!a->emat->IsLocal(erow, ecol)) { /*off-proc entry*/
165           /* printf("Queueing remotely update of (%d,%d)\n",erow,ecol); */
166           a->emat->QueueUpdate(erow, ecol, vals[i * nc + j]);
167         }
168       }
169     }
170   } else { /* column-oriented */
171     for (j = 0; j < nc; j++) {
172       if (cols[j] < 0) continue;
173       P2RO(A, 1, cols[j], &crank, &cidx);
174       RO2E(A, 1, crank, cidx, &ecol);
175       PetscCheck(crank >= 0 && cidx >= 0 && ecol >= 0, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Incorrect col translation");
176       for (i = 0; i < nr; i++) {
177         if (rows[i] < 0) continue;
178         P2RO(A, 0, rows[i], &rrank, &ridx);
179         RO2E(A, 0, rrank, ridx, &erow);
180         PetscCheck(rrank >= 0 && ridx >= 0 && erow >= 0, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Incorrect row translation");
181         if (!a->emat->IsLocal(erow, ecol)) { /* off-proc entry */
182           /* printf("Will later remotely update (%d,%d)\n",erow,ecol); */
183           PetscCheck(imode == ADD_VALUES, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only ADD_VALUES to off-processor entry is supported");
184           ++numQueues;
185           continue;
186         }
187         /* printf("Locally updating (%d,%d)\n",erow,ecol); */
188         switch (imode) {
189         case INSERT_VALUES:
190           a->emat->Set(erow, ecol, (PetscElemScalar)vals[i + j * nr]);
191           break;
192         case ADD_VALUES:
193           a->emat->Update(erow, ecol, (PetscElemScalar)vals[i + j * nr]);
194           break;
195         default:
196           SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for InsertMode %d", (int)imode);
197         }
198       }
199     }
200 
201     /* printf("numQueues=%d\n",numQueues); */
202     a->emat->Reserve(numQueues);
203     for (j = 0; j < nc; j++) {
204       if (cols[j] < 0) continue;
205       P2RO(A, 1, cols[j], &crank, &cidx);
206       RO2E(A, 1, crank, cidx, &ecol);
207 
208       for (i = 0; i < nr; i++) {
209         if (rows[i] < 0) continue;
210         P2RO(A, 0, rows[i], &rrank, &ridx);
211         RO2E(A, 0, rrank, ridx, &erow);
212         if (!a->emat->IsLocal(erow, ecol)) { /*off-proc entry*/
213           /* printf("Queueing remotely update of (%d,%d)\n",erow,ecol); */
214           a->emat->QueueUpdate(erow, ecol, vals[i + j * nr]);
215         }
216       }
217     }
218   }
219   PetscFunctionReturn(PETSC_SUCCESS);
220 }
221 
222 static PetscErrorCode MatMult_Elemental(Mat A, Vec X, Vec Y)
223 {
224   Mat_Elemental         *a = (Mat_Elemental *)A->data;
225   const PetscElemScalar *x;
226   PetscElemScalar       *y;
227   PetscElemScalar        one = 1, zero = 0;
228 
229   PetscFunctionBegin;
230   PetscCall(VecGetArrayRead(X, (const PetscScalar **)&x));
231   PetscCall(VecGetArray(Y, (PetscScalar **)&y));
232   { /* Scoping so that constructor is called before pointer is returned */
233     El::DistMatrix<PetscElemScalar, El::VC, El::STAR> xe, ye;
234     xe.LockedAttach(A->cmap->N, 1, *a->grid, 0, 0, x, A->cmap->n);
235     ye.Attach(A->rmap->N, 1, *a->grid, 0, 0, y, A->rmap->n);
236     El::Gemv(El::NORMAL, one, *a->emat, xe, zero, ye);
237   }
238   PetscCall(VecRestoreArrayRead(X, (const PetscScalar **)&x));
239   PetscCall(VecRestoreArray(Y, (PetscScalar **)&y));
240   PetscFunctionReturn(PETSC_SUCCESS);
241 }
242 
243 static PetscErrorCode MatMultTranspose_Elemental(Mat A, Vec X, Vec Y)
244 {
245   Mat_Elemental         *a = (Mat_Elemental *)A->data;
246   const PetscElemScalar *x;
247   PetscElemScalar       *y;
248   PetscElemScalar        one = 1, zero = 0;
249 
250   PetscFunctionBegin;
251   PetscCall(VecGetArrayRead(X, (const PetscScalar **)&x));
252   PetscCall(VecGetArray(Y, (PetscScalar **)&y));
253   { /* Scoping so that constructor is called before pointer is returned */
254     El::DistMatrix<PetscElemScalar, El::VC, El::STAR> xe, ye;
255     xe.LockedAttach(A->rmap->N, 1, *a->grid, 0, 0, x, A->rmap->n);
256     ye.Attach(A->cmap->N, 1, *a->grid, 0, 0, y, A->cmap->n);
257     El::Gemv(El::TRANSPOSE, one, *a->emat, xe, zero, ye);
258   }
259   PetscCall(VecRestoreArrayRead(X, (const PetscScalar **)&x));
260   PetscCall(VecRestoreArray(Y, (PetscScalar **)&y));
261   PetscFunctionReturn(PETSC_SUCCESS);
262 }
263 
264 static PetscErrorCode MatMultAdd_Elemental(Mat A, Vec X, Vec Y, Vec Z)
265 {
266   Mat_Elemental         *a = (Mat_Elemental *)A->data;
267   const PetscElemScalar *x;
268   PetscElemScalar       *z;
269   PetscElemScalar        one = 1;
270 
271   PetscFunctionBegin;
272   if (Y != Z) PetscCall(VecCopy(Y, Z));
273   PetscCall(VecGetArrayRead(X, (const PetscScalar **)&x));
274   PetscCall(VecGetArray(Z, (PetscScalar **)&z));
275   { /* Scoping so that constructor is called before pointer is returned */
276     El::DistMatrix<PetscElemScalar, El::VC, El::STAR> xe, ze;
277     xe.LockedAttach(A->cmap->N, 1, *a->grid, 0, 0, x, A->cmap->n);
278     ze.Attach(A->rmap->N, 1, *a->grid, 0, 0, z, A->rmap->n);
279     El::Gemv(El::NORMAL, one, *a->emat, xe, one, ze);
280   }
281   PetscCall(VecRestoreArrayRead(X, (const PetscScalar **)&x));
282   PetscCall(VecRestoreArray(Z, (PetscScalar **)&z));
283   PetscFunctionReturn(PETSC_SUCCESS);
284 }
285 
286 static PetscErrorCode MatMultTransposeAdd_Elemental(Mat A, Vec X, Vec Y, Vec Z)
287 {
288   Mat_Elemental         *a = (Mat_Elemental *)A->data;
289   const PetscElemScalar *x;
290   PetscElemScalar       *z;
291   PetscElemScalar        one = 1;
292 
293   PetscFunctionBegin;
294   if (Y != Z) PetscCall(VecCopy(Y, Z));
295   PetscCall(VecGetArrayRead(X, (const PetscScalar **)&x));
296   PetscCall(VecGetArray(Z, (PetscScalar **)&z));
297   { /* Scoping so that constructor is called before pointer is returned */
298     El::DistMatrix<PetscElemScalar, El::VC, El::STAR> xe, ze;
299     xe.LockedAttach(A->rmap->N, 1, *a->grid, 0, 0, x, A->rmap->n);
300     ze.Attach(A->cmap->N, 1, *a->grid, 0, 0, z, A->cmap->n);
301     El::Gemv(El::TRANSPOSE, one, *a->emat, xe, one, ze);
302   }
303   PetscCall(VecRestoreArrayRead(X, (const PetscScalar **)&x));
304   PetscCall(VecRestoreArray(Z, (PetscScalar **)&z));
305   PetscFunctionReturn(PETSC_SUCCESS);
306 }
307 
308 PetscErrorCode MatMatMultNumeric_Elemental(Mat A, Mat B, Mat C)
309 {
310   Mat_Elemental  *a   = (Mat_Elemental *)A->data;
311   Mat_Elemental  *b   = (Mat_Elemental *)B->data;
312   Mat_Elemental  *c   = (Mat_Elemental *)C->data;
313   PetscElemScalar one = 1, zero = 0;
314 
315   PetscFunctionBegin;
316   { /* Scoping so that constructor is called before pointer is returned */
317     El::Gemm(El::NORMAL, El::NORMAL, one, *a->emat, *b->emat, zero, *c->emat);
318   }
319   C->assembled = PETSC_TRUE;
320   PetscFunctionReturn(PETSC_SUCCESS);
321 }
322 
323 PetscErrorCode MatMatMultSymbolic_Elemental(Mat A, Mat B, PetscReal, Mat Ce)
324 {
325   PetscFunctionBegin;
326   PetscCall(MatSetSizes(Ce, A->rmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
327   PetscCall(MatSetType(Ce, MATELEMENTAL));
328   PetscCall(MatSetUp(Ce));
329   Ce->ops->matmultnumeric = MatMatMultNumeric_Elemental;
330   PetscFunctionReturn(PETSC_SUCCESS);
331 }
332 
333 static PetscErrorCode MatMatTransposeMultNumeric_Elemental(Mat A, Mat B, Mat C)
334 {
335   Mat_Elemental  *a   = (Mat_Elemental *)A->data;
336   Mat_Elemental  *b   = (Mat_Elemental *)B->data;
337   Mat_Elemental  *c   = (Mat_Elemental *)C->data;
338   PetscElemScalar one = 1, zero = 0;
339 
340   PetscFunctionBegin;
341   { /* Scoping so that constructor is called before pointer is returned */
342     El::Gemm(El::NORMAL, El::TRANSPOSE, one, *a->emat, *b->emat, zero, *c->emat);
343   }
344   C->assembled = PETSC_TRUE;
345   PetscFunctionReturn(PETSC_SUCCESS);
346 }
347 
348 static PetscErrorCode MatMatTransposeMultSymbolic_Elemental(Mat A, Mat B, PetscReal, Mat C)
349 {
350   PetscFunctionBegin;
351   PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, PETSC_DECIDE, PETSC_DECIDE));
352   PetscCall(MatSetType(C, MATELEMENTAL));
353   PetscCall(MatSetUp(C));
354   PetscFunctionReturn(PETSC_SUCCESS);
355 }
356 
357 static PetscErrorCode MatProductSetFromOptions_Elemental_AB(Mat C)
358 {
359   PetscFunctionBegin;
360   C->ops->matmultsymbolic = MatMatMultSymbolic_Elemental;
361   C->ops->productsymbolic = MatProductSymbolic_AB;
362   PetscFunctionReturn(PETSC_SUCCESS);
363 }
364 
365 static PetscErrorCode MatProductSetFromOptions_Elemental_ABt(Mat C)
366 {
367   PetscFunctionBegin;
368   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_Elemental;
369   C->ops->productsymbolic          = MatProductSymbolic_ABt;
370   PetscFunctionReturn(PETSC_SUCCESS);
371 }
372 
373 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_Elemental(Mat C)
374 {
375   Mat_Product *product = C->product;
376 
377   PetscFunctionBegin;
378   switch (product->type) {
379   case MATPRODUCT_AB:
380     PetscCall(MatProductSetFromOptions_Elemental_AB(C));
381     break;
382   case MATPRODUCT_ABt:
383     PetscCall(MatProductSetFromOptions_Elemental_ABt(C));
384     break;
385   default:
386     break;
387   }
388   PetscFunctionReturn(PETSC_SUCCESS);
389 }
390 
391 static PetscErrorCode MatMatMultNumeric_Elemental_MPIDense(Mat A, Mat B, Mat C)
392 {
393   Mat Be, Ce;
394 
395   PetscFunctionBegin;
396   PetscCall(MatConvert(B, MATELEMENTAL, MAT_INITIAL_MATRIX, &Be));
397   PetscCall(MatMatMult(A, Be, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &Ce));
398   PetscCall(MatConvert(Ce, MATMPIDENSE, MAT_REUSE_MATRIX, &C));
399   PetscCall(MatDestroy(&Be));
400   PetscCall(MatDestroy(&Ce));
401   PetscFunctionReturn(PETSC_SUCCESS);
402 }
403 
404 static PetscErrorCode MatMatMultSymbolic_Elemental_MPIDense(Mat A, Mat B, PetscReal, Mat C)
405 {
406   PetscFunctionBegin;
407   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
408   PetscCall(MatSetType(C, MATMPIDENSE));
409   PetscCall(MatSetUp(C));
410   C->ops->matmultnumeric = MatMatMultNumeric_Elemental_MPIDense;
411   PetscFunctionReturn(PETSC_SUCCESS);
412 }
413 
414 static PetscErrorCode MatProductSetFromOptions_Elemental_MPIDense_AB(Mat C)
415 {
416   PetscFunctionBegin;
417   C->ops->matmultsymbolic = MatMatMultSymbolic_Elemental_MPIDense;
418   C->ops->productsymbolic = MatProductSymbolic_AB;
419   PetscFunctionReturn(PETSC_SUCCESS);
420 }
421 
422 static PetscErrorCode MatProductSetFromOptions_Elemental_MPIDense(Mat C)
423 {
424   Mat_Product *product = C->product;
425 
426   PetscFunctionBegin;
427   if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_Elemental_MPIDense_AB(C));
428   PetscFunctionReturn(PETSC_SUCCESS);
429 }
430 
431 static PetscErrorCode MatGetDiagonal_Elemental(Mat A, Vec D)
432 {
433   PetscInt        i, nrows, ncols, nD, rrank, ridx, crank, cidx;
434   Mat_Elemental  *a = (Mat_Elemental *)A->data;
435   PetscElemScalar v;
436   MPI_Comm        comm;
437 
438   PetscFunctionBegin;
439   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
440   PetscCall(MatGetSize(A, &nrows, &ncols));
441   nD = nrows > ncols ? ncols : nrows;
442   for (i = 0; i < nD; i++) {
443     PetscInt erow, ecol;
444     P2RO(A, 0, i, &rrank, &ridx);
445     RO2E(A, 0, rrank, ridx, &erow);
446     PetscCheck(rrank >= 0 && ridx >= 0 && erow >= 0, comm, PETSC_ERR_PLIB, "Incorrect row translation");
447     P2RO(A, 1, i, &crank, &cidx);
448     RO2E(A, 1, crank, cidx, &ecol);
449     PetscCheck(crank >= 0 && cidx >= 0 && ecol >= 0, comm, PETSC_ERR_PLIB, "Incorrect col translation");
450     v = a->emat->Get(erow, ecol);
451     PetscCall(VecSetValues(D, 1, &i, (PetscScalar *)&v, INSERT_VALUES));
452   }
453   PetscCall(VecAssemblyBegin(D));
454   PetscCall(VecAssemblyEnd(D));
455   PetscFunctionReturn(PETSC_SUCCESS);
456 }
457 
458 static PetscErrorCode MatDiagonalScale_Elemental(Mat X, Vec L, Vec R)
459 {
460   Mat_Elemental         *x = (Mat_Elemental *)X->data;
461   const PetscElemScalar *d;
462 
463   PetscFunctionBegin;
464   if (R) {
465     PetscCall(VecGetArrayRead(R, (const PetscScalar **)&d));
466     El::DistMatrix<PetscElemScalar, El::VC, El::STAR> de;
467     de.LockedAttach(X->cmap->N, 1, *x->grid, 0, 0, d, X->cmap->n);
468     El::DiagonalScale(El::RIGHT, El::NORMAL, de, *x->emat);
469     PetscCall(VecRestoreArrayRead(R, (const PetscScalar **)&d));
470   }
471   if (L) {
472     PetscCall(VecGetArrayRead(L, (const PetscScalar **)&d));
473     El::DistMatrix<PetscElemScalar, El::VC, El::STAR> de;
474     de.LockedAttach(X->rmap->N, 1, *x->grid, 0, 0, d, X->rmap->n);
475     El::DiagonalScale(El::LEFT, El::NORMAL, de, *x->emat);
476     PetscCall(VecRestoreArrayRead(L, (const PetscScalar **)&d));
477   }
478   PetscFunctionReturn(PETSC_SUCCESS);
479 }
480 
481 static PetscErrorCode MatMissingDiagonal_Elemental(Mat, PetscBool *missing, PetscInt *)
482 {
483   PetscFunctionBegin;
484   *missing = PETSC_FALSE;
485   PetscFunctionReturn(PETSC_SUCCESS);
486 }
487 
488 static PetscErrorCode MatScale_Elemental(Mat X, PetscScalar a)
489 {
490   Mat_Elemental *x = (Mat_Elemental *)X->data;
491 
492   PetscFunctionBegin;
493   El::Scale((PetscElemScalar)a, *x->emat);
494   PetscFunctionReturn(PETSC_SUCCESS);
495 }
496 
497 /*
498   MatAXPY - Computes Y = a*X + Y.
499 */
500 static PetscErrorCode MatAXPY_Elemental(Mat Y, PetscScalar a, Mat X, MatStructure)
501 {
502   Mat_Elemental *x = (Mat_Elemental *)X->data;
503   Mat_Elemental *y = (Mat_Elemental *)Y->data;
504 
505   PetscFunctionBegin;
506   El::Axpy((PetscElemScalar)a, *x->emat, *y->emat);
507   PetscCall(PetscObjectStateIncrease((PetscObject)Y));
508   PetscFunctionReturn(PETSC_SUCCESS);
509 }
510 
511 static PetscErrorCode MatCopy_Elemental(Mat A, Mat B, MatStructure)
512 {
513   Mat_Elemental *a = (Mat_Elemental *)A->data;
514   Mat_Elemental *b = (Mat_Elemental *)B->data;
515 
516   PetscFunctionBegin;
517   El::Copy(*a->emat, *b->emat);
518   PetscCall(PetscObjectStateIncrease((PetscObject)B));
519   PetscFunctionReturn(PETSC_SUCCESS);
520 }
521 
522 static PetscErrorCode MatDuplicate_Elemental(Mat A, MatDuplicateOption op, Mat *B)
523 {
524   Mat            Be;
525   MPI_Comm       comm;
526   Mat_Elemental *a = (Mat_Elemental *)A->data;
527 
528   PetscFunctionBegin;
529   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
530   PetscCall(MatCreate(comm, &Be));
531   PetscCall(MatSetSizes(Be, A->rmap->n, A->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
532   PetscCall(MatSetType(Be, MATELEMENTAL));
533   PetscCall(MatSetUp(Be));
534   *B = Be;
535   if (op == MAT_COPY_VALUES) {
536     Mat_Elemental *b = (Mat_Elemental *)Be->data;
537     El::Copy(*a->emat, *b->emat);
538   }
539   Be->assembled = PETSC_TRUE;
540   PetscFunctionReturn(PETSC_SUCCESS);
541 }
542 
543 static PetscErrorCode MatTranspose_Elemental(Mat A, MatReuse reuse, Mat *B)
544 {
545   Mat            Be = *B;
546   MPI_Comm       comm;
547   Mat_Elemental *a = (Mat_Elemental *)A->data, *b;
548 
549   PetscFunctionBegin;
550   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B));
551   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
552   /* Only out-of-place supported */
553   PetscCheck(reuse != MAT_INPLACE_MATRIX, comm, PETSC_ERR_SUP, "Only out-of-place supported");
554   if (reuse == MAT_INITIAL_MATRIX) {
555     PetscCall(MatCreate(comm, &Be));
556     PetscCall(MatSetSizes(Be, A->cmap->n, A->rmap->n, PETSC_DECIDE, PETSC_DECIDE));
557     PetscCall(MatSetType(Be, MATELEMENTAL));
558     PetscCall(MatSetUp(Be));
559     *B = Be;
560   }
561   b = (Mat_Elemental *)Be->data;
562   El::Transpose(*a->emat, *b->emat);
563   Be->assembled = PETSC_TRUE;
564   PetscFunctionReturn(PETSC_SUCCESS);
565 }
566 
567 static PetscErrorCode MatConjugate_Elemental(Mat A)
568 {
569   Mat_Elemental *a = (Mat_Elemental *)A->data;
570 
571   PetscFunctionBegin;
572   El::Conjugate(*a->emat);
573   PetscFunctionReturn(PETSC_SUCCESS);
574 }
575 
576 static PetscErrorCode MatHermitianTranspose_Elemental(Mat A, MatReuse reuse, Mat *B)
577 {
578   Mat            Be = *B;
579   MPI_Comm       comm;
580   Mat_Elemental *a = (Mat_Elemental *)A->data, *b;
581 
582   PetscFunctionBegin;
583   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
584   /* Only out-of-place supported */
585   if (reuse == MAT_INITIAL_MATRIX) {
586     PetscCall(MatCreate(comm, &Be));
587     PetscCall(MatSetSizes(Be, A->cmap->n, A->rmap->n, PETSC_DECIDE, PETSC_DECIDE));
588     PetscCall(MatSetType(Be, MATELEMENTAL));
589     PetscCall(MatSetUp(Be));
590     *B = Be;
591   }
592   b = (Mat_Elemental *)Be->data;
593   El::Adjoint(*a->emat, *b->emat);
594   Be->assembled = PETSC_TRUE;
595   PetscFunctionReturn(PETSC_SUCCESS);
596 }
597 
598 static PetscErrorCode MatSolve_Elemental(Mat A, Vec B, Vec X)
599 {
600   Mat_Elemental   *a = (Mat_Elemental *)A->data;
601   PetscElemScalar *x;
602   PetscInt         pivoting = a->pivoting;
603 
604   PetscFunctionBegin;
605   PetscCall(VecCopy(B, X));
606   PetscCall(VecGetArray(X, (PetscScalar **)&x));
607 
608   El::DistMatrix<PetscElemScalar, El::VC, El::STAR> xe;
609   xe.Attach(A->rmap->N, 1, *a->grid, 0, 0, x, A->rmap->n);
610   El::DistMatrix<PetscElemScalar, El::MC, El::MR> xer(xe);
611   switch (A->factortype) {
612   case MAT_FACTOR_LU:
613     if (pivoting == 0) {
614       El::lu::SolveAfter(El::NORMAL, *a->emat, xer);
615     } else if (pivoting == 1) {
616       El::lu::SolveAfter(El::NORMAL, *a->emat, *a->P, xer);
617     } else { /* pivoting == 2 */
618       El::lu::SolveAfter(El::NORMAL, *a->emat, *a->P, *a->Q, xer);
619     }
620     break;
621   case MAT_FACTOR_CHOLESKY:
622     El::cholesky::SolveAfter(El::UPPER, El::NORMAL, *a->emat, xer);
623     break;
624   default:
625     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unfactored Matrix or Unsupported MatFactorType");
626     break;
627   }
628   El::Copy(xer, xe);
629 
630   PetscCall(VecRestoreArray(X, (PetscScalar **)&x));
631   PetscFunctionReturn(PETSC_SUCCESS);
632 }
633 
634 static PetscErrorCode MatSolveAdd_Elemental(Mat A, Vec B, Vec Y, Vec X)
635 {
636   PetscFunctionBegin;
637   PetscCall(MatSolve_Elemental(A, B, X));
638   PetscCall(VecAXPY(X, 1, Y));
639   PetscFunctionReturn(PETSC_SUCCESS);
640 }
641 
642 static PetscErrorCode MatMatSolve_Elemental(Mat A, Mat B, Mat X)
643 {
644   Mat_Elemental *a = (Mat_Elemental *)A->data;
645   Mat_Elemental *x;
646   Mat            C;
647   PetscInt       pivoting = a->pivoting;
648   PetscBool      flg;
649   MatType        type;
650 
651   PetscFunctionBegin;
652   PetscCall(MatGetType(X, &type));
653   PetscCall(PetscStrcmp(type, MATELEMENTAL, &flg));
654   if (!flg) {
655     PetscCall(MatConvert(B, MATELEMENTAL, MAT_INITIAL_MATRIX, &C));
656     x = (Mat_Elemental *)C->data;
657   } else {
658     x = (Mat_Elemental *)X->data;
659     El::Copy(*((Mat_Elemental *)B->data)->emat, *x->emat);
660   }
661   switch (A->factortype) {
662   case MAT_FACTOR_LU:
663     if (pivoting == 0) {
664       El::lu::SolveAfter(El::NORMAL, *a->emat, *x->emat);
665     } else if (pivoting == 1) {
666       El::lu::SolveAfter(El::NORMAL, *a->emat, *a->P, *x->emat);
667     } else {
668       El::lu::SolveAfter(El::NORMAL, *a->emat, *a->P, *a->Q, *x->emat);
669     }
670     break;
671   case MAT_FACTOR_CHOLESKY:
672     El::cholesky::SolveAfter(El::UPPER, El::NORMAL, *a->emat, *x->emat);
673     break;
674   default:
675     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unfactored Matrix or Unsupported MatFactorType");
676     break;
677   }
678   if (!flg) {
679     PetscCall(MatConvert(C, type, MAT_REUSE_MATRIX, &X));
680     PetscCall(MatDestroy(&C));
681   }
682   PetscFunctionReturn(PETSC_SUCCESS);
683 }
684 
685 static PetscErrorCode MatLUFactor_Elemental(Mat A, IS, IS, const MatFactorInfo *)
686 {
687   Mat_Elemental *a        = (Mat_Elemental *)A->data;
688   PetscInt       pivoting = a->pivoting;
689 
690   PetscFunctionBegin;
691   if (pivoting == 0) {
692     El::LU(*a->emat);
693   } else if (pivoting == 1) {
694     El::LU(*a->emat, *a->P);
695   } else {
696     El::LU(*a->emat, *a->P, *a->Q);
697   }
698   A->factortype = MAT_FACTOR_LU;
699   A->assembled  = PETSC_TRUE;
700 
701   PetscCall(PetscFree(A->solvertype));
702   PetscCall(PetscStrallocpy(MATSOLVERELEMENTAL, &A->solvertype));
703   PetscFunctionReturn(PETSC_SUCCESS);
704 }
705 
706 static PetscErrorCode MatLUFactorNumeric_Elemental(Mat F, Mat A, const MatFactorInfo *info)
707 {
708   PetscFunctionBegin;
709   PetscCall(MatCopy(A, F, SAME_NONZERO_PATTERN));
710   PetscCall(MatLUFactor_Elemental(F, nullptr, nullptr, info));
711   PetscFunctionReturn(PETSC_SUCCESS);
712 }
713 
714 static PetscErrorCode MatLUFactorSymbolic_Elemental(Mat, Mat, IS, IS, const MatFactorInfo *)
715 {
716   PetscFunctionBegin;
717   /* F is created and allocated by MatGetFactor_elemental_petsc(), skip this routine. */
718   PetscFunctionReturn(PETSC_SUCCESS);
719 }
720 
721 static PetscErrorCode MatCholeskyFactor_Elemental(Mat A, IS, const MatFactorInfo *)
722 {
723   Mat_Elemental                                    *a = (Mat_Elemental *)A->data;
724   El::DistMatrix<PetscElemScalar, El::MC, El::STAR> d;
725 
726   PetscFunctionBegin;
727   El::Cholesky(El::UPPER, *a->emat);
728   A->factortype = MAT_FACTOR_CHOLESKY;
729   A->assembled  = PETSC_TRUE;
730 
731   PetscCall(PetscFree(A->solvertype));
732   PetscCall(PetscStrallocpy(MATSOLVERELEMENTAL, &A->solvertype));
733   PetscFunctionReturn(PETSC_SUCCESS);
734 }
735 
736 static PetscErrorCode MatCholeskyFactorNumeric_Elemental(Mat F, Mat A, const MatFactorInfo *info)
737 {
738   PetscFunctionBegin;
739   PetscCall(MatCopy(A, F, SAME_NONZERO_PATTERN));
740   PetscCall(MatCholeskyFactor_Elemental(F, nullptr, info));
741   PetscFunctionReturn(PETSC_SUCCESS);
742 }
743 
744 static PetscErrorCode MatCholeskyFactorSymbolic_Elemental(Mat, Mat, IS, const MatFactorInfo *)
745 {
746   PetscFunctionBegin;
747   /* F is created and allocated by MatGetFactor_elemental_petsc(), skip this routine. */
748   PetscFunctionReturn(PETSC_SUCCESS);
749 }
750 
751 static PetscErrorCode MatFactorGetSolverType_elemental_elemental(Mat, MatSolverType *type)
752 {
753   PetscFunctionBegin;
754   *type = MATSOLVERELEMENTAL;
755   PetscFunctionReturn(PETSC_SUCCESS);
756 }
757 
758 static PetscErrorCode MatGetFactor_elemental_elemental(Mat A, MatFactorType ftype, Mat *F)
759 {
760   Mat B;
761 
762   PetscFunctionBegin;
763   /* Create the factorization matrix */
764   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
765   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
766   PetscCall(MatSetType(B, MATELEMENTAL));
767   PetscCall(MatSetUp(B));
768   B->factortype      = ftype;
769   B->trivialsymbolic = PETSC_TRUE;
770   PetscCall(PetscFree(B->solvertype));
771   PetscCall(PetscStrallocpy(MATSOLVERELEMENTAL, &B->solvertype));
772 
773   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_elemental_elemental));
774   *F = B;
775   PetscFunctionReturn(PETSC_SUCCESS);
776 }
777 
778 PETSC_INTERN PetscErrorCode MatSolverTypeRegister_Elemental(void)
779 {
780   PetscFunctionBegin;
781   PetscCall(MatSolverTypeRegister(MATSOLVERELEMENTAL, MATELEMENTAL, MAT_FACTOR_LU, MatGetFactor_elemental_elemental));
782   PetscCall(MatSolverTypeRegister(MATSOLVERELEMENTAL, MATELEMENTAL, MAT_FACTOR_CHOLESKY, MatGetFactor_elemental_elemental));
783   PetscFunctionReturn(PETSC_SUCCESS);
784 }
785 
786 static PetscErrorCode MatNorm_Elemental(Mat A, NormType type, PetscReal *nrm)
787 {
788   Mat_Elemental *a = (Mat_Elemental *)A->data;
789 
790   PetscFunctionBegin;
791   switch (type) {
792   case NORM_1:
793     *nrm = El::OneNorm(*a->emat);
794     break;
795   case NORM_FROBENIUS:
796     *nrm = El::FrobeniusNorm(*a->emat);
797     break;
798   case NORM_INFINITY:
799     *nrm = El::InfinityNorm(*a->emat);
800     break;
801   default:
802     SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Unsupported norm type");
803   }
804   PetscFunctionReturn(PETSC_SUCCESS);
805 }
806 
807 static PetscErrorCode MatZeroEntries_Elemental(Mat A)
808 {
809   Mat_Elemental *a = (Mat_Elemental *)A->data;
810 
811   PetscFunctionBegin;
812   El::Zero(*a->emat);
813   PetscFunctionReturn(PETSC_SUCCESS);
814 }
815 
816 static PetscErrorCode MatGetOwnershipIS_Elemental(Mat A, IS *rows, IS *cols)
817 {
818   Mat_Elemental *a = (Mat_Elemental *)A->data;
819   PetscInt       i, m, shift, stride, *idx;
820 
821   PetscFunctionBegin;
822   if (rows) {
823     m      = a->emat->LocalHeight();
824     shift  = a->emat->ColShift();
825     stride = a->emat->ColStride();
826     PetscCall(PetscMalloc1(m, &idx));
827     for (i = 0; i < m; i++) {
828       PetscInt rank, offset;
829       E2RO(A, 0, shift + i * stride, &rank, &offset);
830       RO2P(A, 0, rank, offset, &idx[i]);
831     }
832     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, rows));
833   }
834   if (cols) {
835     m      = a->emat->LocalWidth();
836     shift  = a->emat->RowShift();
837     stride = a->emat->RowStride();
838     PetscCall(PetscMalloc1(m, &idx));
839     for (i = 0; i < m; i++) {
840       PetscInt rank, offset;
841       E2RO(A, 1, shift + i * stride, &rank, &offset);
842       RO2P(A, 1, rank, offset, &idx[i]);
843     }
844     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, cols));
845   }
846   PetscFunctionReturn(PETSC_SUCCESS);
847 }
848 
849 static PetscErrorCode MatConvert_Elemental_Dense(Mat A, MatType, MatReuse reuse, Mat *B)
850 {
851   Mat             Bmpi;
852   Mat_Elemental  *a = (Mat_Elemental *)A->data;
853   MPI_Comm        comm;
854   IS              isrows, iscols;
855   PetscInt        rrank, ridx, crank, cidx, nrows, ncols, i, j, erow, ecol, elrow, elcol;
856   const PetscInt *rows, *cols;
857   PetscElemScalar v;
858   const El::Grid &grid = a->emat->Grid();
859 
860   PetscFunctionBegin;
861   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
862 
863   if (reuse == MAT_REUSE_MATRIX) {
864     Bmpi = *B;
865   } else {
866     PetscCall(MatCreate(comm, &Bmpi));
867     PetscCall(MatSetSizes(Bmpi, A->rmap->n, A->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
868     PetscCall(MatSetType(Bmpi, MATDENSE));
869     PetscCall(MatSetUp(Bmpi));
870   }
871 
872   /* Get local entries of A */
873   PetscCall(MatGetOwnershipIS(A, &isrows, &iscols));
874   PetscCall(ISGetLocalSize(isrows, &nrows));
875   PetscCall(ISGetIndices(isrows, &rows));
876   PetscCall(ISGetLocalSize(iscols, &ncols));
877   PetscCall(ISGetIndices(iscols, &cols));
878 
879   if (a->roworiented) {
880     for (i = 0; i < nrows; i++) {
881       P2RO(A, 0, rows[i], &rrank, &ridx); /* convert indices between PETSc <-> (Rank,Offset) <-> Elemental */
882       RO2E(A, 0, rrank, ridx, &erow);
883       PetscCheck(rrank >= 0 && ridx >= 0 && erow >= 0, comm, PETSC_ERR_PLIB, "Incorrect row translation");
884       for (j = 0; j < ncols; j++) {
885         P2RO(A, 1, cols[j], &crank, &cidx);
886         RO2E(A, 1, crank, cidx, &ecol);
887         PetscCheck(crank >= 0 && cidx >= 0 && ecol >= 0, comm, PETSC_ERR_PLIB, "Incorrect col translation");
888 
889         elrow = erow / grid.MCSize(); /* Elemental local row index */
890         elcol = ecol / grid.MRSize(); /* Elemental local column index */
891         v     = a->emat->GetLocal(elrow, elcol);
892         PetscCall(MatSetValues(Bmpi, 1, &rows[i], 1, &cols[j], (PetscScalar *)&v, INSERT_VALUES));
893       }
894     }
895   } else { /* column-oriented */
896     for (j = 0; j < ncols; j++) {
897       P2RO(A, 1, cols[j], &crank, &cidx);
898       RO2E(A, 1, crank, cidx, &ecol);
899       PetscCheck(crank >= 0 && cidx >= 0 && ecol >= 0, comm, PETSC_ERR_PLIB, "Incorrect col translation");
900       for (i = 0; i < nrows; i++) {
901         P2RO(A, 0, rows[i], &rrank, &ridx); /* convert indices between PETSc <-> (Rank,Offset) <-> Elemental */
902         RO2E(A, 0, rrank, ridx, &erow);
903         PetscCheck(rrank >= 0 && ridx >= 0 && erow >= 0, comm, PETSC_ERR_PLIB, "Incorrect row translation");
904 
905         elrow = erow / grid.MCSize(); /* Elemental local row index */
906         elcol = ecol / grid.MRSize(); /* Elemental local column index */
907         v     = a->emat->GetLocal(elrow, elcol);
908         PetscCall(MatSetValues(Bmpi, 1, &rows[i], 1, &cols[j], (PetscScalar *)&v, INSERT_VALUES));
909       }
910     }
911   }
912   PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY));
913   PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY));
914   if (reuse == MAT_INPLACE_MATRIX) {
915     PetscCall(MatHeaderReplace(A, &Bmpi));
916   } else {
917     *B = Bmpi;
918   }
919   PetscCall(ISDestroy(&isrows));
920   PetscCall(ISDestroy(&iscols));
921   PetscFunctionReturn(PETSC_SUCCESS);
922 }
923 
924 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_Elemental(Mat A, MatType, MatReuse reuse, Mat *newmat)
925 {
926   Mat                mat_elemental;
927   PetscInt           M = A->rmap->N, N = A->cmap->N, row, ncols;
928   const PetscInt    *cols;
929   const PetscScalar *vals;
930 
931   PetscFunctionBegin;
932   if (reuse == MAT_REUSE_MATRIX) {
933     mat_elemental = *newmat;
934     PetscCall(MatZeroEntries(mat_elemental));
935   } else {
936     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental));
937     PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, M, N));
938     PetscCall(MatSetType(mat_elemental, MATELEMENTAL));
939     PetscCall(MatSetUp(mat_elemental));
940   }
941   for (row = 0; row < M; row++) {
942     PetscCall(MatGetRow(A, row, &ncols, &cols, &vals));
943     /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
944     PetscCall(MatSetValues(mat_elemental, 1, &row, ncols, cols, vals, ADD_VALUES));
945     PetscCall(MatRestoreRow(A, row, &ncols, &cols, &vals));
946   }
947   PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY));
948   PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY));
949 
950   if (reuse == MAT_INPLACE_MATRIX) {
951     PetscCall(MatHeaderReplace(A, &mat_elemental));
952   } else {
953     *newmat = mat_elemental;
954   }
955   PetscFunctionReturn(PETSC_SUCCESS);
956 }
957 
958 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat A, MatType, MatReuse reuse, Mat *newmat)
959 {
960   Mat                mat_elemental;
961   PetscInt           row, ncols, rstart = A->rmap->rstart, rend = A->rmap->rend, j;
962   const PetscInt    *cols;
963   const PetscScalar *vals;
964 
965   PetscFunctionBegin;
966   if (reuse == MAT_REUSE_MATRIX) {
967     mat_elemental = *newmat;
968     PetscCall(MatZeroEntries(mat_elemental));
969   } else {
970     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental));
971     PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N));
972     PetscCall(MatSetType(mat_elemental, MATELEMENTAL));
973     PetscCall(MatSetUp(mat_elemental));
974   }
975   for (row = rstart; row < rend; row++) {
976     PetscCall(MatGetRow(A, row, &ncols, &cols, &vals));
977     for (j = 0; j < ncols; j++) {
978       /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
979       PetscCall(MatSetValues(mat_elemental, 1, &row, 1, &cols[j], &vals[j], ADD_VALUES));
980     }
981     PetscCall(MatRestoreRow(A, row, &ncols, &cols, &vals));
982   }
983   PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY));
984   PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY));
985 
986   if (reuse == MAT_INPLACE_MATRIX) {
987     PetscCall(MatHeaderReplace(A, &mat_elemental));
988   } else {
989     *newmat = mat_elemental;
990   }
991   PetscFunctionReturn(PETSC_SUCCESS);
992 }
993 
994 PETSC_INTERN PetscErrorCode MatConvert_SeqSBAIJ_Elemental(Mat A, MatType, MatReuse reuse, Mat *newmat)
995 {
996   Mat                mat_elemental;
997   PetscInt           M = A->rmap->N, N = A->cmap->N, row, ncols, j;
998   const PetscInt    *cols;
999   const PetscScalar *vals;
1000 
1001   PetscFunctionBegin;
1002   if (reuse == MAT_REUSE_MATRIX) {
1003     mat_elemental = *newmat;
1004     PetscCall(MatZeroEntries(mat_elemental));
1005   } else {
1006     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental));
1007     PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, M, N));
1008     PetscCall(MatSetType(mat_elemental, MATELEMENTAL));
1009     PetscCall(MatSetUp(mat_elemental));
1010   }
1011   PetscCall(MatGetRowUpperTriangular(A));
1012   for (row = 0; row < M; row++) {
1013     PetscCall(MatGetRow(A, row, &ncols, &cols, &vals));
1014     /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1015     PetscCall(MatSetValues(mat_elemental, 1, &row, ncols, cols, vals, ADD_VALUES));
1016     for (j = 0; j < ncols; j++) { /* lower triangular part */
1017       PetscScalar v;
1018       if (cols[j] == row) continue;
1019       v = A->hermitian == PETSC_BOOL3_TRUE ? PetscConj(vals[j]) : vals[j];
1020       PetscCall(MatSetValues(mat_elemental, 1, &cols[j], 1, &row, &v, ADD_VALUES));
1021     }
1022     PetscCall(MatRestoreRow(A, row, &ncols, &cols, &vals));
1023   }
1024   PetscCall(MatRestoreRowUpperTriangular(A));
1025   PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY));
1026   PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY));
1027 
1028   if (reuse == MAT_INPLACE_MATRIX) {
1029     PetscCall(MatHeaderReplace(A, &mat_elemental));
1030   } else {
1031     *newmat = mat_elemental;
1032   }
1033   PetscFunctionReturn(PETSC_SUCCESS);
1034 }
1035 
1036 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat A, MatType, MatReuse reuse, Mat *newmat)
1037 {
1038   Mat                mat_elemental;
1039   PetscInt           M = A->rmap->N, N = A->cmap->N, row, ncols, j, rstart = A->rmap->rstart, rend = A->rmap->rend;
1040   const PetscInt    *cols;
1041   const PetscScalar *vals;
1042 
1043   PetscFunctionBegin;
1044   if (reuse == MAT_REUSE_MATRIX) {
1045     mat_elemental = *newmat;
1046     PetscCall(MatZeroEntries(mat_elemental));
1047   } else {
1048     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental));
1049     PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, M, N));
1050     PetscCall(MatSetType(mat_elemental, MATELEMENTAL));
1051     PetscCall(MatSetUp(mat_elemental));
1052   }
1053   PetscCall(MatGetRowUpperTriangular(A));
1054   for (row = rstart; row < rend; row++) {
1055     PetscCall(MatGetRow(A, row, &ncols, &cols, &vals));
1056     /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1057     PetscCall(MatSetValues(mat_elemental, 1, &row, ncols, cols, vals, ADD_VALUES));
1058     for (j = 0; j < ncols; j++) { /* lower triangular part */
1059       PetscScalar v;
1060       if (cols[j] == row) continue;
1061       v = A->hermitian == PETSC_BOOL3_TRUE ? PetscConj(vals[j]) : vals[j];
1062       PetscCall(MatSetValues(mat_elemental, 1, &cols[j], 1, &row, &v, ADD_VALUES));
1063     }
1064     PetscCall(MatRestoreRow(A, row, &ncols, &cols, &vals));
1065   }
1066   PetscCall(MatRestoreRowUpperTriangular(A));
1067   PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY));
1068   PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY));
1069 
1070   if (reuse == MAT_INPLACE_MATRIX) {
1071     PetscCall(MatHeaderReplace(A, &mat_elemental));
1072   } else {
1073     *newmat = mat_elemental;
1074   }
1075   PetscFunctionReturn(PETSC_SUCCESS);
1076 }
1077 
1078 static PetscErrorCode MatDestroy_Elemental(Mat A)
1079 {
1080   Mat_Elemental      *a = (Mat_Elemental *)A->data;
1081   Mat_Elemental_Grid *commgrid;
1082   PetscBool           flg;
1083   MPI_Comm            icomm;
1084 
1085   PetscFunctionBegin;
1086   delete a->emat;
1087   delete a->P;
1088   delete a->Q;
1089 
1090   El::mpi::Comm cxxcomm(PetscObjectComm((PetscObject)A));
1091   PetscCall(PetscCommDuplicate(cxxcomm.comm, &icomm, nullptr));
1092   PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_Elemental_keyval, (void **)&commgrid, (int *)&flg));
1093   if (--commgrid->grid_refct == 0) {
1094     delete commgrid->grid;
1095     PetscCall(PetscFree(commgrid));
1096     PetscCallMPI(MPI_Comm_free_keyval(&Petsc_Elemental_keyval));
1097   }
1098   PetscCall(PetscCommDestroy(&icomm));
1099   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetOwnershipIS_C", nullptr));
1100   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", nullptr));
1101   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_elemental_mpidense_C", nullptr));
1102   PetscCall(PetscFree(A->data));
1103   PetscFunctionReturn(PETSC_SUCCESS);
1104 }
1105 
1106 static PetscErrorCode MatSetUp_Elemental(Mat A)
1107 {
1108   Mat_Elemental *a = (Mat_Elemental *)A->data;
1109   MPI_Comm       comm;
1110   PetscMPIInt    rsize, csize;
1111   PetscInt       n;
1112 
1113   PetscFunctionBegin;
1114   PetscCall(PetscLayoutSetUp(A->rmap));
1115   PetscCall(PetscLayoutSetUp(A->cmap));
1116 
1117   /* Check if local row and column sizes are equally distributed.
1118      Jed: Elemental uses "element" cyclic ordering so the sizes need to match that
1119      exactly.  The strategy in MatElemental is for PETSc to implicitly permute to block ordering (like would be returned by
1120      PetscSplitOwnership(comm,&n,&N), at which point Elemental matrices can act on PETSc vectors without redistributing the vectors. */
1121   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1122   n = PETSC_DECIDE;
1123   PetscCall(PetscSplitOwnership(comm, &n, &A->rmap->N));
1124   PetscCheck(n == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Local row size %" PetscInt_FMT " of ELEMENTAL matrix must be equally distributed", A->rmap->n);
1125 
1126   n = PETSC_DECIDE;
1127   PetscCall(PetscSplitOwnership(comm, &n, &A->cmap->N));
1128   PetscCheck(n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Local column size %" PetscInt_FMT " of ELEMENTAL matrix must be equally distributed", A->cmap->n);
1129 
1130   a->emat->Resize(A->rmap->N, A->cmap->N);
1131   El::Zero(*a->emat);
1132 
1133   PetscCallMPI(MPI_Comm_size(A->rmap->comm, &rsize));
1134   PetscCallMPI(MPI_Comm_size(A->cmap->comm, &csize));
1135   PetscCheck(csize == rsize, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_INCOMP, "Cannot use row and column communicators of different sizes");
1136   a->commsize = rsize;
1137   a->mr[0]    = A->rmap->N % rsize;
1138   if (!a->mr[0]) a->mr[0] = rsize;
1139   a->mr[1] = A->cmap->N % csize;
1140   if (!a->mr[1]) a->mr[1] = csize;
1141   a->m[0] = A->rmap->N / rsize + (a->mr[0] != rsize);
1142   a->m[1] = A->cmap->N / csize + (a->mr[1] != csize);
1143   PetscFunctionReturn(PETSC_SUCCESS);
1144 }
1145 
1146 static PetscErrorCode MatAssemblyBegin_Elemental(Mat A, MatAssemblyType)
1147 {
1148   Mat_Elemental *a = (Mat_Elemental *)A->data;
1149 
1150   PetscFunctionBegin;
1151   /* printf("Calling ProcessQueues\n"); */
1152   a->emat->ProcessQueues();
1153   /* printf("Finished ProcessQueues\n"); */
1154   PetscFunctionReturn(PETSC_SUCCESS);
1155 }
1156 
1157 static PetscErrorCode MatAssemblyEnd_Elemental(Mat, MatAssemblyType)
1158 {
1159   PetscFunctionBegin;
1160   /* Currently does nothing */
1161   PetscFunctionReturn(PETSC_SUCCESS);
1162 }
1163 
1164 static PetscErrorCode MatLoad_Elemental(Mat newMat, PetscViewer viewer)
1165 {
1166   Mat      Adense, Ae;
1167   MPI_Comm comm;
1168 
1169   PetscFunctionBegin;
1170   PetscCall(PetscObjectGetComm((PetscObject)newMat, &comm));
1171   PetscCall(MatCreate(comm, &Adense));
1172   PetscCall(MatSetType(Adense, MATDENSE));
1173   PetscCall(MatLoad(Adense, viewer));
1174   PetscCall(MatConvert(Adense, MATELEMENTAL, MAT_INITIAL_MATRIX, &Ae));
1175   PetscCall(MatDestroy(&Adense));
1176   PetscCall(MatHeaderReplace(newMat, &Ae));
1177   PetscFunctionReturn(PETSC_SUCCESS);
1178 }
1179 
1180 static struct _MatOps MatOps_Values = {MatSetValues_Elemental,
1181                                        nullptr,
1182                                        nullptr,
1183                                        MatMult_Elemental,
1184                                        /* 4*/ MatMultAdd_Elemental,
1185                                        MatMultTranspose_Elemental,
1186                                        MatMultTransposeAdd_Elemental,
1187                                        MatSolve_Elemental,
1188                                        MatSolveAdd_Elemental,
1189                                        nullptr,
1190                                        /*10*/ nullptr,
1191                                        MatLUFactor_Elemental,
1192                                        MatCholeskyFactor_Elemental,
1193                                        nullptr,
1194                                        MatTranspose_Elemental,
1195                                        /*15*/ MatGetInfo_Elemental,
1196                                        nullptr,
1197                                        MatGetDiagonal_Elemental,
1198                                        MatDiagonalScale_Elemental,
1199                                        MatNorm_Elemental,
1200                                        /*20*/ MatAssemblyBegin_Elemental,
1201                                        MatAssemblyEnd_Elemental,
1202                                        MatSetOption_Elemental,
1203                                        MatZeroEntries_Elemental,
1204                                        /*24*/ nullptr,
1205                                        MatLUFactorSymbolic_Elemental,
1206                                        MatLUFactorNumeric_Elemental,
1207                                        MatCholeskyFactorSymbolic_Elemental,
1208                                        MatCholeskyFactorNumeric_Elemental,
1209                                        /*29*/ MatSetUp_Elemental,
1210                                        nullptr,
1211                                        nullptr,
1212                                        nullptr,
1213                                        nullptr,
1214                                        /*34*/ MatDuplicate_Elemental,
1215                                        nullptr,
1216                                        nullptr,
1217                                        nullptr,
1218                                        nullptr,
1219                                        /*39*/ MatAXPY_Elemental,
1220                                        nullptr,
1221                                        nullptr,
1222                                        nullptr,
1223                                        MatCopy_Elemental,
1224                                        /*44*/ nullptr,
1225                                        MatScale_Elemental,
1226                                        MatShift_Basic,
1227                                        nullptr,
1228                                        nullptr,
1229                                        /*49*/ nullptr,
1230                                        nullptr,
1231                                        nullptr,
1232                                        nullptr,
1233                                        nullptr,
1234                                        /*54*/ nullptr,
1235                                        nullptr,
1236                                        nullptr,
1237                                        nullptr,
1238                                        nullptr,
1239                                        /*59*/ nullptr,
1240                                        MatDestroy_Elemental,
1241                                        MatView_Elemental,
1242                                        nullptr,
1243                                        nullptr,
1244                                        /*64*/ nullptr,
1245                                        nullptr,
1246                                        nullptr,
1247                                        nullptr,
1248                                        nullptr,
1249                                        /*69*/ nullptr,
1250                                        nullptr,
1251                                        MatConvert_Elemental_Dense,
1252                                        nullptr,
1253                                        nullptr,
1254                                        /*74*/ nullptr,
1255                                        nullptr,
1256                                        nullptr,
1257                                        nullptr,
1258                                        nullptr,
1259                                        /*79*/ nullptr,
1260                                        nullptr,
1261                                        nullptr,
1262                                        nullptr,
1263                                        MatLoad_Elemental,
1264                                        /*84*/ nullptr,
1265                                        nullptr,
1266                                        nullptr,
1267                                        nullptr,
1268                                        nullptr,
1269                                        /*89*/ nullptr,
1270                                        nullptr,
1271                                        MatMatMultNumeric_Elemental,
1272                                        nullptr,
1273                                        nullptr,
1274                                        /*94*/ nullptr,
1275                                        nullptr,
1276                                        nullptr,
1277                                        MatMatTransposeMultNumeric_Elemental,
1278                                        nullptr,
1279                                        /*99*/ MatProductSetFromOptions_Elemental,
1280                                        nullptr,
1281                                        nullptr,
1282                                        MatConjugate_Elemental,
1283                                        nullptr,
1284                                        /*104*/ nullptr,
1285                                        nullptr,
1286                                        nullptr,
1287                                        nullptr,
1288                                        nullptr,
1289                                        /*109*/ MatMatSolve_Elemental,
1290                                        nullptr,
1291                                        nullptr,
1292                                        nullptr,
1293                                        MatMissingDiagonal_Elemental,
1294                                        /*114*/ nullptr,
1295                                        nullptr,
1296                                        nullptr,
1297                                        nullptr,
1298                                        nullptr,
1299                                        /*119*/ nullptr,
1300                                        MatHermitianTranspose_Elemental,
1301                                        nullptr,
1302                                        nullptr,
1303                                        nullptr,
1304                                        /*124*/ nullptr,
1305                                        nullptr,
1306                                        nullptr,
1307                                        nullptr,
1308                                        nullptr,
1309                                        /*129*/ nullptr,
1310                                        nullptr,
1311                                        nullptr,
1312                                        nullptr,
1313                                        nullptr,
1314                                        /*134*/ nullptr,
1315                                        nullptr,
1316                                        nullptr,
1317                                        nullptr,
1318                                        nullptr,
1319                                        nullptr,
1320                                        /*140*/ nullptr,
1321                                        nullptr,
1322                                        nullptr,
1323                                        nullptr,
1324                                        nullptr,
1325                                        /*145*/ nullptr,
1326                                        nullptr,
1327                                        nullptr,
1328                                        nullptr,
1329                                        nullptr,
1330                                        /*150*/ nullptr,
1331                                        nullptr,
1332                                        nullptr};
1333 
1334 /*MC
1335    MATELEMENTAL = "elemental" - A matrix type for dense matrices using the Elemental package
1336 
1337   Use ./configure --download-elemental to install PETSc to use Elemental
1338 
1339    Options Database Keys:
1340 + -mat_type elemental - sets the matrix type to "elemental" during a call to MatSetFromOptions()
1341 . -pc_factor_mat_solver_type elemental - to use this direct solver with the option -pc_type lu
1342 - -mat_elemental_grid_height - sets Grid Height for 2D cyclic ordering of internal matrix
1343 
1344   Level: beginner
1345 
1346   Note:
1347    Note unlike most matrix formats, this format does not store all the matrix entries for a contiguous
1348    range of rows on an MPI rank. Use `MatGetOwnershipIS()` to determine what values are stored on
1349    the given rank.
1350 
1351 .seealso: `MATDENSE`, `MATSCALAPACK`, `MatGetOwnershipIS()`
1352 M*/
1353 #if defined(__clang__)
1354   #pragma clang diagnostic push
1355   #pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
1356 #endif
1357 PETSC_EXTERN PetscErrorCode MatCreate_Elemental(Mat A)
1358 {
1359   Mat_Elemental      *a;
1360   PetscBool           flg, flg1;
1361   Mat_Elemental_Grid *commgrid;
1362   MPI_Comm            icomm;
1363   PetscInt            optv1;
1364 
1365   PetscFunctionBegin;
1366   A->ops[0]     = MatOps_Values;
1367   A->insertmode = NOT_SET_VALUES;
1368 
1369   PetscCall(PetscNew(&a));
1370   A->data = (void *)a;
1371 
1372   /* Set up the elemental matrix */
1373   El::mpi::Comm cxxcomm(PetscObjectComm((PetscObject)A));
1374 
1375   /* Grid needs to be shared between multiple Mats on the same communicator, implement by attribute caching on the MPI_Comm */
1376   if (Petsc_Elemental_keyval == MPI_KEYVAL_INVALID) {
1377     PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN, MPI_COMM_NULL_DELETE_FN, &Petsc_Elemental_keyval, (void *)0));
1378     PetscCall(PetscCitationsRegister(ElementalCitation, &ElementalCite));
1379   }
1380   PetscCall(PetscCommDuplicate(cxxcomm.comm, &icomm, NULL));
1381   PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_Elemental_keyval, (void **)&commgrid, (int *)&flg));
1382   if (!flg) {
1383     PetscCall(PetscNew(&commgrid));
1384 
1385     PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "Elemental Options", "Mat");
1386     /* displayed default grid sizes (CommSize,1) are set by us arbitrarily until El::Grid() is called */
1387     PetscCall(PetscOptionsInt("-mat_elemental_grid_height", "Grid Height", "None", El::mpi::Size(cxxcomm), &optv1, &flg1));
1388     if (flg1) {
1389       PetscCheck((El::mpi::Size(cxxcomm) % optv1) == 0, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_INCOMP, "Grid Height %" PetscInt_FMT " must evenly divide CommSize %" PetscInt_FMT, optv1, (PetscInt)El::mpi::Size(cxxcomm));
1390       commgrid->grid = new El::Grid(cxxcomm, optv1); /* use user-provided grid height */
1391     } else {
1392       commgrid->grid = new El::Grid(cxxcomm); /* use Elemental default grid sizes */
1393       /* printf("new commgrid->grid = %p\n",commgrid->grid);  -- memory leak revealed by valgrind? */
1394     }
1395     commgrid->grid_refct = 1;
1396     PetscCallMPI(MPI_Comm_set_attr(icomm, Petsc_Elemental_keyval, (void *)commgrid));
1397 
1398     a->pivoting = 1;
1399     PetscCall(PetscOptionsInt("-mat_elemental_pivoting", "Pivoting", "None", a->pivoting, &a->pivoting, NULL));
1400 
1401     PetscOptionsEnd();
1402   } else {
1403     commgrid->grid_refct++;
1404   }
1405   PetscCall(PetscCommDestroy(&icomm));
1406   a->grid        = commgrid->grid;
1407   a->emat        = new El::DistMatrix<PetscElemScalar>(*a->grid);
1408   a->roworiented = PETSC_TRUE;
1409 
1410   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetOwnershipIS_C", MatGetOwnershipIS_Elemental));
1411   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_elemental_mpidense_C", MatProductSetFromOptions_Elemental_MPIDense));
1412   PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATELEMENTAL));
1413   PetscFunctionReturn(PETSC_SUCCESS);
1414 }
1415 #if defined(__clang__)
1416   #pragma clang diagnostic pop
1417 #endif
1418