xref: /petsc/src/mat/impls/elemental/matelem.cxx (revision bfe80ac4a46d58cb7760074b25f5e81b2f541d8a)
1 #include <petsc/private/petscelemental.h>
2 
3 const char       ElementalCitation[] = "@Article{Elemental2012,\n"
4                                        "  author  = {Jack Poulson and Bryan Marker and Jeff R. Hammond and Nichols A. Romero and Robert {v}an~{d}e~{G}eijn},\n"
5                                        "  title   = {Elemental: A New Framework for Distributed Memory Dense Matrix Computations},\n"
6                                        "  journal = {{ACM} Transactions on Mathematical Software},\n"
7                                        "  volume  = {39},\n"
8                                        "  number  = {2},\n"
9                                        "  year    = {2013}\n"
10                                        "}\n";
11 static PetscBool ElementalCite       = PETSC_FALSE;
12 
13 /*
14     The variable Petsc_Elemental_keyval is used to indicate an MPI attribute that
15   is attached to a communicator, in this case the attribute is a Mat_Elemental_Grid
16 */
17 static PetscMPIInt Petsc_Elemental_keyval = MPI_KEYVAL_INVALID;
18 
19 static PetscErrorCode MatView_Elemental(Mat A, PetscViewer viewer)
20 {
21   Mat_Elemental *a = (Mat_Elemental *)A->data;
22   PetscBool      iascii;
23 
24   PetscFunctionBegin;
25   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
26   if (iascii) {
27     PetscViewerFormat format;
28     PetscCall(PetscViewerGetFormat(viewer, &format));
29     if (format == PETSC_VIEWER_ASCII_INFO) {
30       /* call elemental viewing function */
31       PetscCall(PetscViewerASCIIPrintf(viewer, "Elemental run parameters:\n"));
32       PetscCall(PetscViewerASCIIPrintf(viewer, "  allocated entries=%zu\n", (*a->emat).AllocatedMemory()));
33       PetscCall(PetscViewerASCIIPrintf(viewer, "  grid height=%d, grid width=%d\n", (*a->emat).Grid().Height(), (*a->emat).Grid().Width()));
34       if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
35         /* call elemental viewing function */
36         PetscCall(PetscPrintf(PetscObjectComm((PetscObject)viewer), "test matview_elemental 2\n"));
37       }
38 
39     } else if (format == PETSC_VIEWER_DEFAULT) {
40       PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
41       El::Print(*a->emat, "Elemental matrix (cyclic ordering)");
42       PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
43       if (A->factortype == MAT_FACTOR_NONE) {
44         Mat Adense;
45         PetscCall(MatConvert(A, MATDENSE, MAT_INITIAL_MATRIX, &Adense));
46         PetscCall(MatView(Adense, viewer));
47         PetscCall(MatDestroy(&Adense));
48       }
49     } else SETERRQ(PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Format");
50   } else {
51     /* convert to dense format and call MatView() */
52     Mat Adense;
53     PetscCall(MatConvert(A, MATDENSE, MAT_INITIAL_MATRIX, &Adense));
54     PetscCall(MatView(Adense, viewer));
55     PetscCall(MatDestroy(&Adense));
56   }
57   PetscFunctionReturn(PETSC_SUCCESS);
58 }
59 
60 static PetscErrorCode MatGetInfo_Elemental(Mat A, MatInfoType flag, MatInfo *info)
61 {
62   Mat_Elemental *a = (Mat_Elemental *)A->data;
63 
64   PetscFunctionBegin;
65   info->block_size = 1.0;
66 
67   if (flag == MAT_LOCAL) {
68     info->nz_allocated = (*a->emat).AllocatedMemory(); /* locally allocated */
69     info->nz_used      = info->nz_allocated;
70   } else if (flag == MAT_GLOBAL_MAX) {
71     //PetscCallMPI(MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin)));
72     /* see MatGetInfo_MPIAIJ() for getting global info->nz_allocated! */
73     //SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP," MAT_GLOBAL_MAX not written yet");
74   } else if (flag == MAT_GLOBAL_SUM) {
75     //SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP," MAT_GLOBAL_SUM not written yet");
76     info->nz_allocated = (*a->emat).AllocatedMemory(); /* locally allocated */
77     info->nz_used      = info->nz_allocated;           /* assume Elemental does accurate allocation */
78     //PetscCallMPI(MPIU_Allreduce(isend,irecv,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A)));
79     //PetscPrintf(PETSC_COMM_SELF,"    ... [%d] locally allocated %g\n",rank,info->nz_allocated);
80   }
81 
82   info->nz_unneeded       = 0.0;
83   info->assemblies        = A->num_ass;
84   info->mallocs           = 0;
85   info->memory            = 0; /* REVIEW ME */
86   info->fill_ratio_given  = 0; /* determined by Elemental */
87   info->fill_ratio_needed = 0;
88   info->factor_mallocs    = 0;
89   PetscFunctionReturn(PETSC_SUCCESS);
90 }
91 
92 static PetscErrorCode MatSetOption_Elemental(Mat A, MatOption op, PetscBool flg)
93 {
94   Mat_Elemental *a = (Mat_Elemental *)A->data;
95 
96   PetscFunctionBegin;
97   switch (op) {
98   case MAT_ROW_ORIENTED:
99     a->roworiented = flg;
100     break;
101   default:
102     break;
103   }
104   PetscFunctionReturn(PETSC_SUCCESS);
105 }
106 
107 static PetscErrorCode MatSetValues_Elemental(Mat A, PetscInt nr, const PetscInt *rows, PetscInt nc, const PetscInt *cols, const PetscScalar *vals, InsertMode imode)
108 {
109   Mat_Elemental *a = (Mat_Elemental *)A->data;
110   PetscInt       i, j, rrank, ridx, crank, cidx, erow, ecol, numQueues = 0;
111 
112   PetscFunctionBegin;
113   // TODO: Initialize matrix to all zeros?
114 
115   // Count the number of queues from this process
116   if (a->roworiented) {
117     for (i = 0; i < nr; i++) {
118       if (rows[i] < 0) continue;
119       P2RO(A, 0, rows[i], &rrank, &ridx);
120       RO2E(A, 0, rrank, ridx, &erow);
121       PetscCheck(rrank >= 0 && ridx >= 0 && erow >= 0, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Incorrect row translation");
122       for (j = 0; j < nc; j++) {
123         if (cols[j] < 0) continue;
124         P2RO(A, 1, cols[j], &crank, &cidx);
125         RO2E(A, 1, crank, cidx, &ecol);
126         PetscCheck(crank >= 0 && cidx >= 0 && ecol >= 0, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Incorrect col translation");
127         if (!a->emat->IsLocal(erow, ecol)) { /* off-proc entry */
128           /* printf("Will later remotely update (%d,%d)\n",erow,ecol); */
129           PetscCheck(imode == ADD_VALUES, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only ADD_VALUES to off-processor entry is supported");
130           ++numQueues;
131           continue;
132         }
133         /* printf("Locally updating (%d,%d)\n",erow,ecol); */
134         switch (imode) {
135         case INSERT_VALUES:
136           a->emat->Set(erow, ecol, (PetscElemScalar)vals[i * nc + j]);
137           break;
138         case ADD_VALUES:
139           a->emat->Update(erow, ecol, (PetscElemScalar)vals[i * nc + j]);
140           break;
141         default:
142           SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for InsertMode %d", (int)imode);
143         }
144       }
145     }
146 
147     /* printf("numQueues=%d\n",numQueues); */
148     a->emat->Reserve(numQueues);
149     for (i = 0; i < nr; i++) {
150       if (rows[i] < 0) continue;
151       P2RO(A, 0, rows[i], &rrank, &ridx);
152       RO2E(A, 0, rrank, ridx, &erow);
153       for (j = 0; j < nc; j++) {
154         if (cols[j] < 0) continue;
155         P2RO(A, 1, cols[j], &crank, &cidx);
156         RO2E(A, 1, crank, cidx, &ecol);
157         if (!a->emat->IsLocal(erow, ecol)) { /*off-proc entry*/
158           /* printf("Queueing remotely update of (%d,%d)\n",erow,ecol); */
159           a->emat->QueueUpdate(erow, ecol, vals[i * nc + j]);
160         }
161       }
162     }
163   } else { /* column-oriented */
164     for (j = 0; j < nc; j++) {
165       if (cols[j] < 0) continue;
166       P2RO(A, 1, cols[j], &crank, &cidx);
167       RO2E(A, 1, crank, cidx, &ecol);
168       PetscCheck(crank >= 0 && cidx >= 0 && ecol >= 0, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Incorrect col translation");
169       for (i = 0; i < nr; i++) {
170         if (rows[i] < 0) continue;
171         P2RO(A, 0, rows[i], &rrank, &ridx);
172         RO2E(A, 0, rrank, ridx, &erow);
173         PetscCheck(rrank >= 0 && ridx >= 0 && erow >= 0, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Incorrect row translation");
174         if (!a->emat->IsLocal(erow, ecol)) { /* off-proc entry */
175           /* printf("Will later remotely update (%d,%d)\n",erow,ecol); */
176           PetscCheck(imode == ADD_VALUES, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only ADD_VALUES to off-processor entry is supported");
177           ++numQueues;
178           continue;
179         }
180         /* printf("Locally updating (%d,%d)\n",erow,ecol); */
181         switch (imode) {
182         case INSERT_VALUES:
183           a->emat->Set(erow, ecol, (PetscElemScalar)vals[i + j * nr]);
184           break;
185         case ADD_VALUES:
186           a->emat->Update(erow, ecol, (PetscElemScalar)vals[i + j * nr]);
187           break;
188         default:
189           SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for InsertMode %d", (int)imode);
190         }
191       }
192     }
193 
194     /* printf("numQueues=%d\n",numQueues); */
195     a->emat->Reserve(numQueues);
196     for (j = 0; j < nc; j++) {
197       if (cols[j] < 0) continue;
198       P2RO(A, 1, cols[j], &crank, &cidx);
199       RO2E(A, 1, crank, cidx, &ecol);
200 
201       for (i = 0; i < nr; i++) {
202         if (rows[i] < 0) continue;
203         P2RO(A, 0, rows[i], &rrank, &ridx);
204         RO2E(A, 0, rrank, ridx, &erow);
205         if (!a->emat->IsLocal(erow, ecol)) { /*off-proc entry*/
206           /* printf("Queueing remotely update of (%d,%d)\n",erow,ecol); */
207           a->emat->QueueUpdate(erow, ecol, vals[i + j * nr]);
208         }
209       }
210     }
211   }
212   PetscFunctionReturn(PETSC_SUCCESS);
213 }
214 
215 static PetscErrorCode MatMult_Elemental(Mat A, Vec X, Vec Y)
216 {
217   Mat_Elemental         *a = (Mat_Elemental *)A->data;
218   const PetscElemScalar *x;
219   PetscElemScalar       *y;
220   PetscElemScalar        one = 1, zero = 0;
221 
222   PetscFunctionBegin;
223   PetscCall(VecGetArrayRead(X, (const PetscScalar **)&x));
224   PetscCall(VecGetArray(Y, (PetscScalar **)&y));
225   { /* Scoping so that constructor is called before pointer is returned */
226     El::DistMatrix<PetscElemScalar, El::VC, El::STAR> xe, ye;
227     xe.LockedAttach(A->cmap->N, 1, *a->grid, 0, 0, x, A->cmap->n);
228     ye.Attach(A->rmap->N, 1, *a->grid, 0, 0, y, A->rmap->n);
229     El::Gemv(El::NORMAL, one, *a->emat, xe, zero, ye);
230   }
231   PetscCall(VecRestoreArrayRead(X, (const PetscScalar **)&x));
232   PetscCall(VecRestoreArray(Y, (PetscScalar **)&y));
233   PetscFunctionReturn(PETSC_SUCCESS);
234 }
235 
236 static PetscErrorCode MatMultTranspose_Elemental(Mat A, Vec X, Vec Y)
237 {
238   Mat_Elemental         *a = (Mat_Elemental *)A->data;
239   const PetscElemScalar *x;
240   PetscElemScalar       *y;
241   PetscElemScalar        one = 1, zero = 0;
242 
243   PetscFunctionBegin;
244   PetscCall(VecGetArrayRead(X, (const PetscScalar **)&x));
245   PetscCall(VecGetArray(Y, (PetscScalar **)&y));
246   { /* Scoping so that constructor is called before pointer is returned */
247     El::DistMatrix<PetscElemScalar, El::VC, El::STAR> xe, ye;
248     xe.LockedAttach(A->rmap->N, 1, *a->grid, 0, 0, x, A->rmap->n);
249     ye.Attach(A->cmap->N, 1, *a->grid, 0, 0, y, A->cmap->n);
250     El::Gemv(El::TRANSPOSE, one, *a->emat, xe, zero, ye);
251   }
252   PetscCall(VecRestoreArrayRead(X, (const PetscScalar **)&x));
253   PetscCall(VecRestoreArray(Y, (PetscScalar **)&y));
254   PetscFunctionReturn(PETSC_SUCCESS);
255 }
256 
257 static PetscErrorCode MatMultAdd_Elemental(Mat A, Vec X, Vec Y, Vec Z)
258 {
259   Mat_Elemental         *a = (Mat_Elemental *)A->data;
260   const PetscElemScalar *x;
261   PetscElemScalar       *z;
262   PetscElemScalar        one = 1;
263 
264   PetscFunctionBegin;
265   if (Y != Z) PetscCall(VecCopy(Y, Z));
266   PetscCall(VecGetArrayRead(X, (const PetscScalar **)&x));
267   PetscCall(VecGetArray(Z, (PetscScalar **)&z));
268   { /* Scoping so that constructor is called before pointer is returned */
269     El::DistMatrix<PetscElemScalar, El::VC, El::STAR> xe, ze;
270     xe.LockedAttach(A->cmap->N, 1, *a->grid, 0, 0, x, A->cmap->n);
271     ze.Attach(A->rmap->N, 1, *a->grid, 0, 0, z, A->rmap->n);
272     El::Gemv(El::NORMAL, one, *a->emat, xe, one, ze);
273   }
274   PetscCall(VecRestoreArrayRead(X, (const PetscScalar **)&x));
275   PetscCall(VecRestoreArray(Z, (PetscScalar **)&z));
276   PetscFunctionReturn(PETSC_SUCCESS);
277 }
278 
279 static PetscErrorCode MatMultTransposeAdd_Elemental(Mat A, Vec X, Vec Y, Vec Z)
280 {
281   Mat_Elemental         *a = (Mat_Elemental *)A->data;
282   const PetscElemScalar *x;
283   PetscElemScalar       *z;
284   PetscElemScalar        one = 1;
285 
286   PetscFunctionBegin;
287   if (Y != Z) PetscCall(VecCopy(Y, Z));
288   PetscCall(VecGetArrayRead(X, (const PetscScalar **)&x));
289   PetscCall(VecGetArray(Z, (PetscScalar **)&z));
290   { /* Scoping so that constructor is called before pointer is returned */
291     El::DistMatrix<PetscElemScalar, El::VC, El::STAR> xe, ze;
292     xe.LockedAttach(A->rmap->N, 1, *a->grid, 0, 0, x, A->rmap->n);
293     ze.Attach(A->cmap->N, 1, *a->grid, 0, 0, z, A->cmap->n);
294     El::Gemv(El::TRANSPOSE, one, *a->emat, xe, one, ze);
295   }
296   PetscCall(VecRestoreArrayRead(X, (const PetscScalar **)&x));
297   PetscCall(VecRestoreArray(Z, (PetscScalar **)&z));
298   PetscFunctionReturn(PETSC_SUCCESS);
299 }
300 
301 PetscErrorCode MatMatMultNumeric_Elemental(Mat A, Mat B, Mat C)
302 {
303   Mat_Elemental  *a   = (Mat_Elemental *)A->data;
304   Mat_Elemental  *b   = (Mat_Elemental *)B->data;
305   Mat_Elemental  *c   = (Mat_Elemental *)C->data;
306   PetscElemScalar one = 1, zero = 0;
307 
308   PetscFunctionBegin;
309   { /* Scoping so that constructor is called before pointer is returned */
310     El::Gemm(El::NORMAL, El::NORMAL, one, *a->emat, *b->emat, zero, *c->emat);
311   }
312   C->assembled = PETSC_TRUE;
313   PetscFunctionReturn(PETSC_SUCCESS);
314 }
315 
316 PetscErrorCode MatMatMultSymbolic_Elemental(Mat A, Mat B, PetscReal, Mat Ce)
317 {
318   PetscFunctionBegin;
319   PetscCall(MatSetSizes(Ce, A->rmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
320   PetscCall(MatSetType(Ce, MATELEMENTAL));
321   PetscCall(MatSetUp(Ce));
322   Ce->ops->matmultnumeric = MatMatMultNumeric_Elemental;
323   PetscFunctionReturn(PETSC_SUCCESS);
324 }
325 
326 static PetscErrorCode MatMatTransposeMultNumeric_Elemental(Mat A, Mat B, Mat C)
327 {
328   Mat_Elemental  *a   = (Mat_Elemental *)A->data;
329   Mat_Elemental  *b   = (Mat_Elemental *)B->data;
330   Mat_Elemental  *c   = (Mat_Elemental *)C->data;
331   PetscElemScalar one = 1, zero = 0;
332 
333   PetscFunctionBegin;
334   { /* Scoping so that constructor is called before pointer is returned */
335     El::Gemm(El::NORMAL, El::TRANSPOSE, one, *a->emat, *b->emat, zero, *c->emat);
336   }
337   C->assembled = PETSC_TRUE;
338   PetscFunctionReturn(PETSC_SUCCESS);
339 }
340 
341 static PetscErrorCode MatMatTransposeMultSymbolic_Elemental(Mat A, Mat B, PetscReal, Mat C)
342 {
343   PetscFunctionBegin;
344   PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, PETSC_DECIDE, PETSC_DECIDE));
345   PetscCall(MatSetType(C, MATELEMENTAL));
346   PetscCall(MatSetUp(C));
347   PetscFunctionReturn(PETSC_SUCCESS);
348 }
349 
350 static PetscErrorCode MatProductSetFromOptions_Elemental_AB(Mat C)
351 {
352   PetscFunctionBegin;
353   C->ops->matmultsymbolic = MatMatMultSymbolic_Elemental;
354   C->ops->productsymbolic = MatProductSymbolic_AB;
355   PetscFunctionReturn(PETSC_SUCCESS);
356 }
357 
358 static PetscErrorCode MatProductSetFromOptions_Elemental_ABt(Mat C)
359 {
360   PetscFunctionBegin;
361   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_Elemental;
362   C->ops->productsymbolic          = MatProductSymbolic_ABt;
363   PetscFunctionReturn(PETSC_SUCCESS);
364 }
365 
366 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_Elemental(Mat C)
367 {
368   Mat_Product *product = C->product;
369 
370   PetscFunctionBegin;
371   switch (product->type) {
372   case MATPRODUCT_AB:
373     PetscCall(MatProductSetFromOptions_Elemental_AB(C));
374     break;
375   case MATPRODUCT_ABt:
376     PetscCall(MatProductSetFromOptions_Elemental_ABt(C));
377     break;
378   default:
379     break;
380   }
381   PetscFunctionReturn(PETSC_SUCCESS);
382 }
383 
384 static PetscErrorCode MatMatMultNumeric_Elemental_MPIDense(Mat A, Mat B, Mat C)
385 {
386   Mat Be, Ce;
387 
388   PetscFunctionBegin;
389   PetscCall(MatConvert(B, MATELEMENTAL, MAT_INITIAL_MATRIX, &Be));
390   PetscCall(MatMatMult(A, Be, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &Ce));
391   PetscCall(MatConvert(Ce, MATMPIDENSE, MAT_REUSE_MATRIX, &C));
392   PetscCall(MatDestroy(&Be));
393   PetscCall(MatDestroy(&Ce));
394   PetscFunctionReturn(PETSC_SUCCESS);
395 }
396 
397 static PetscErrorCode MatMatMultSymbolic_Elemental_MPIDense(Mat A, Mat B, PetscReal, Mat C)
398 {
399   PetscFunctionBegin;
400   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
401   PetscCall(MatSetType(C, MATMPIDENSE));
402   PetscCall(MatSetUp(C));
403   C->ops->matmultnumeric = MatMatMultNumeric_Elemental_MPIDense;
404   PetscFunctionReturn(PETSC_SUCCESS);
405 }
406 
407 static PetscErrorCode MatProductSetFromOptions_Elemental_MPIDense_AB(Mat C)
408 {
409   PetscFunctionBegin;
410   C->ops->matmultsymbolic = MatMatMultSymbolic_Elemental_MPIDense;
411   C->ops->productsymbolic = MatProductSymbolic_AB;
412   PetscFunctionReturn(PETSC_SUCCESS);
413 }
414 
415 static PetscErrorCode MatProductSetFromOptions_Elemental_MPIDense(Mat C)
416 {
417   Mat_Product *product = C->product;
418 
419   PetscFunctionBegin;
420   if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_Elemental_MPIDense_AB(C));
421   PetscFunctionReturn(PETSC_SUCCESS);
422 }
423 
424 static PetscErrorCode MatGetDiagonal_Elemental(Mat A, Vec D)
425 {
426   PetscInt        i, nrows, ncols, nD, rrank, ridx, crank, cidx;
427   Mat_Elemental  *a = (Mat_Elemental *)A->data;
428   PetscElemScalar v;
429   MPI_Comm        comm;
430 
431   PetscFunctionBegin;
432   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
433   PetscCall(MatGetSize(A, &nrows, &ncols));
434   nD = nrows > ncols ? ncols : nrows;
435   for (i = 0; i < nD; i++) {
436     PetscInt erow, ecol;
437     P2RO(A, 0, i, &rrank, &ridx);
438     RO2E(A, 0, rrank, ridx, &erow);
439     PetscCheck(rrank >= 0 && ridx >= 0 && erow >= 0, comm, PETSC_ERR_PLIB, "Incorrect row translation");
440     P2RO(A, 1, i, &crank, &cidx);
441     RO2E(A, 1, crank, cidx, &ecol);
442     PetscCheck(crank >= 0 && cidx >= 0 && ecol >= 0, comm, PETSC_ERR_PLIB, "Incorrect col translation");
443     v = a->emat->Get(erow, ecol);
444     PetscCall(VecSetValues(D, 1, &i, (PetscScalar *)&v, INSERT_VALUES));
445   }
446   PetscCall(VecAssemblyBegin(D));
447   PetscCall(VecAssemblyEnd(D));
448   PetscFunctionReturn(PETSC_SUCCESS);
449 }
450 
451 static PetscErrorCode MatDiagonalScale_Elemental(Mat X, Vec L, Vec R)
452 {
453   Mat_Elemental         *x = (Mat_Elemental *)X->data;
454   const PetscElemScalar *d;
455 
456   PetscFunctionBegin;
457   if (R) {
458     PetscCall(VecGetArrayRead(R, (const PetscScalar **)&d));
459     El::DistMatrix<PetscElemScalar, El::VC, El::STAR> de;
460     de.LockedAttach(X->cmap->N, 1, *x->grid, 0, 0, d, X->cmap->n);
461     El::DiagonalScale(El::RIGHT, El::NORMAL, de, *x->emat);
462     PetscCall(VecRestoreArrayRead(R, (const PetscScalar **)&d));
463   }
464   if (L) {
465     PetscCall(VecGetArrayRead(L, (const PetscScalar **)&d));
466     El::DistMatrix<PetscElemScalar, El::VC, El::STAR> de;
467     de.LockedAttach(X->rmap->N, 1, *x->grid, 0, 0, d, X->rmap->n);
468     El::DiagonalScale(El::LEFT, El::NORMAL, de, *x->emat);
469     PetscCall(VecRestoreArrayRead(L, (const PetscScalar **)&d));
470   }
471   PetscFunctionReturn(PETSC_SUCCESS);
472 }
473 
474 static PetscErrorCode MatMissingDiagonal_Elemental(Mat, PetscBool *missing, PetscInt *)
475 {
476   PetscFunctionBegin;
477   *missing = PETSC_FALSE;
478   PetscFunctionReturn(PETSC_SUCCESS);
479 }
480 
481 static PetscErrorCode MatScale_Elemental(Mat X, PetscScalar a)
482 {
483   Mat_Elemental *x = (Mat_Elemental *)X->data;
484 
485   PetscFunctionBegin;
486   El::Scale((PetscElemScalar)a, *x->emat);
487   PetscFunctionReturn(PETSC_SUCCESS);
488 }
489 
490 /*
491   MatAXPY - Computes Y = a*X + Y.
492 */
493 static PetscErrorCode MatAXPY_Elemental(Mat Y, PetscScalar a, Mat X, MatStructure)
494 {
495   Mat_Elemental *x = (Mat_Elemental *)X->data;
496   Mat_Elemental *y = (Mat_Elemental *)Y->data;
497 
498   PetscFunctionBegin;
499   El::Axpy((PetscElemScalar)a, *x->emat, *y->emat);
500   PetscCall(PetscObjectStateIncrease((PetscObject)Y));
501   PetscFunctionReturn(PETSC_SUCCESS);
502 }
503 
504 static PetscErrorCode MatCopy_Elemental(Mat A, Mat B, MatStructure)
505 {
506   Mat_Elemental *a = (Mat_Elemental *)A->data;
507   Mat_Elemental *b = (Mat_Elemental *)B->data;
508 
509   PetscFunctionBegin;
510   El::Copy(*a->emat, *b->emat);
511   PetscCall(PetscObjectStateIncrease((PetscObject)B));
512   PetscFunctionReturn(PETSC_SUCCESS);
513 }
514 
515 static PetscErrorCode MatDuplicate_Elemental(Mat A, MatDuplicateOption op, Mat *B)
516 {
517   Mat            Be;
518   MPI_Comm       comm;
519   Mat_Elemental *a = (Mat_Elemental *)A->data;
520 
521   PetscFunctionBegin;
522   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
523   PetscCall(MatCreate(comm, &Be));
524   PetscCall(MatSetSizes(Be, A->rmap->n, A->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
525   PetscCall(MatSetType(Be, MATELEMENTAL));
526   PetscCall(MatSetUp(Be));
527   *B = Be;
528   if (op == MAT_COPY_VALUES) {
529     Mat_Elemental *b = (Mat_Elemental *)Be->data;
530     El::Copy(*a->emat, *b->emat);
531   }
532   Be->assembled = PETSC_TRUE;
533   PetscFunctionReturn(PETSC_SUCCESS);
534 }
535 
536 static PetscErrorCode MatTranspose_Elemental(Mat A, MatReuse reuse, Mat *B)
537 {
538   Mat            Be = *B;
539   MPI_Comm       comm;
540   Mat_Elemental *a = (Mat_Elemental *)A->data, *b;
541 
542   PetscFunctionBegin;
543   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B));
544   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
545   /* Only out-of-place supported */
546   PetscCheck(reuse != MAT_INPLACE_MATRIX, comm, PETSC_ERR_SUP, "Only out-of-place supported");
547   if (reuse == MAT_INITIAL_MATRIX) {
548     PetscCall(MatCreate(comm, &Be));
549     PetscCall(MatSetSizes(Be, A->cmap->n, A->rmap->n, PETSC_DECIDE, PETSC_DECIDE));
550     PetscCall(MatSetType(Be, MATELEMENTAL));
551     PetscCall(MatSetUp(Be));
552     *B = Be;
553   }
554   b = (Mat_Elemental *)Be->data;
555   El::Transpose(*a->emat, *b->emat);
556   Be->assembled = PETSC_TRUE;
557   PetscFunctionReturn(PETSC_SUCCESS);
558 }
559 
560 static PetscErrorCode MatConjugate_Elemental(Mat A)
561 {
562   Mat_Elemental *a = (Mat_Elemental *)A->data;
563 
564   PetscFunctionBegin;
565   El::Conjugate(*a->emat);
566   PetscFunctionReturn(PETSC_SUCCESS);
567 }
568 
569 static PetscErrorCode MatHermitianTranspose_Elemental(Mat A, MatReuse reuse, Mat *B)
570 {
571   Mat            Be = *B;
572   MPI_Comm       comm;
573   Mat_Elemental *a = (Mat_Elemental *)A->data, *b;
574 
575   PetscFunctionBegin;
576   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
577   /* Only out-of-place supported */
578   if (reuse == MAT_INITIAL_MATRIX) {
579     PetscCall(MatCreate(comm, &Be));
580     PetscCall(MatSetSizes(Be, A->cmap->n, A->rmap->n, PETSC_DECIDE, PETSC_DECIDE));
581     PetscCall(MatSetType(Be, MATELEMENTAL));
582     PetscCall(MatSetUp(Be));
583     *B = Be;
584   }
585   b = (Mat_Elemental *)Be->data;
586   El::Adjoint(*a->emat, *b->emat);
587   Be->assembled = PETSC_TRUE;
588   PetscFunctionReturn(PETSC_SUCCESS);
589 }
590 
591 static PetscErrorCode MatSolve_Elemental(Mat A, Vec B, Vec X)
592 {
593   Mat_Elemental   *a = (Mat_Elemental *)A->data;
594   PetscElemScalar *x;
595   PetscInt         pivoting = a->pivoting;
596 
597   PetscFunctionBegin;
598   PetscCall(VecCopy(B, X));
599   PetscCall(VecGetArray(X, (PetscScalar **)&x));
600 
601   El::DistMatrix<PetscElemScalar, El::VC, El::STAR> xe;
602   xe.Attach(A->rmap->N, 1, *a->grid, 0, 0, x, A->rmap->n);
603   El::DistMatrix<PetscElemScalar, El::MC, El::MR> xer(xe);
604   switch (A->factortype) {
605   case MAT_FACTOR_LU:
606     if (pivoting == 0) {
607       El::lu::SolveAfter(El::NORMAL, *a->emat, xer);
608     } else if (pivoting == 1) {
609       El::lu::SolveAfter(El::NORMAL, *a->emat, *a->P, xer);
610     } else { /* pivoting == 2 */
611       El::lu::SolveAfter(El::NORMAL, *a->emat, *a->P, *a->Q, xer);
612     }
613     break;
614   case MAT_FACTOR_CHOLESKY:
615     El::cholesky::SolveAfter(El::UPPER, El::NORMAL, *a->emat, xer);
616     break;
617   default:
618     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unfactored Matrix or Unsupported MatFactorType");
619     break;
620   }
621   El::Copy(xer, xe);
622 
623   PetscCall(VecRestoreArray(X, (PetscScalar **)&x));
624   PetscFunctionReturn(PETSC_SUCCESS);
625 }
626 
627 static PetscErrorCode MatSolveAdd_Elemental(Mat A, Vec B, Vec Y, Vec X)
628 {
629   PetscFunctionBegin;
630   PetscCall(MatSolve_Elemental(A, B, X));
631   PetscCall(VecAXPY(X, 1, Y));
632   PetscFunctionReturn(PETSC_SUCCESS);
633 }
634 
635 static PetscErrorCode MatMatSolve_Elemental(Mat A, Mat B, Mat X)
636 {
637   Mat_Elemental *a = (Mat_Elemental *)A->data;
638   Mat_Elemental *x;
639   Mat            C;
640   PetscInt       pivoting = a->pivoting;
641   PetscBool      flg;
642   MatType        type;
643 
644   PetscFunctionBegin;
645   PetscCall(MatGetType(X, &type));
646   PetscCall(PetscStrcmp(type, MATELEMENTAL, &flg));
647   if (!flg) {
648     PetscCall(MatConvert(B, MATELEMENTAL, MAT_INITIAL_MATRIX, &C));
649     x = (Mat_Elemental *)C->data;
650   } else {
651     x = (Mat_Elemental *)X->data;
652     El::Copy(*((Mat_Elemental *)B->data)->emat, *x->emat);
653   }
654   switch (A->factortype) {
655   case MAT_FACTOR_LU:
656     if (pivoting == 0) {
657       El::lu::SolveAfter(El::NORMAL, *a->emat, *x->emat);
658     } else if (pivoting == 1) {
659       El::lu::SolveAfter(El::NORMAL, *a->emat, *a->P, *x->emat);
660     } else {
661       El::lu::SolveAfter(El::NORMAL, *a->emat, *a->P, *a->Q, *x->emat);
662     }
663     break;
664   case MAT_FACTOR_CHOLESKY:
665     El::cholesky::SolveAfter(El::UPPER, El::NORMAL, *a->emat, *x->emat);
666     break;
667   default:
668     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unfactored Matrix or Unsupported MatFactorType");
669     break;
670   }
671   if (!flg) {
672     PetscCall(MatConvert(C, type, MAT_REUSE_MATRIX, &X));
673     PetscCall(MatDestroy(&C));
674   }
675   PetscFunctionReturn(PETSC_SUCCESS);
676 }
677 
678 static PetscErrorCode MatLUFactor_Elemental(Mat A, IS, IS, const MatFactorInfo *)
679 {
680   Mat_Elemental *a        = (Mat_Elemental *)A->data;
681   PetscInt       pivoting = a->pivoting;
682 
683   PetscFunctionBegin;
684   if (pivoting == 0) {
685     El::LU(*a->emat);
686   } else if (pivoting == 1) {
687     El::LU(*a->emat, *a->P);
688   } else {
689     El::LU(*a->emat, *a->P, *a->Q);
690   }
691   A->factortype = MAT_FACTOR_LU;
692   A->assembled  = PETSC_TRUE;
693 
694   PetscCall(PetscFree(A->solvertype));
695   PetscCall(PetscStrallocpy(MATSOLVERELEMENTAL, &A->solvertype));
696   PetscFunctionReturn(PETSC_SUCCESS);
697 }
698 
699 static PetscErrorCode MatLUFactorNumeric_Elemental(Mat F, Mat A, const MatFactorInfo *info)
700 {
701   PetscFunctionBegin;
702   PetscCall(MatCopy(A, F, SAME_NONZERO_PATTERN));
703   PetscCall(MatLUFactor_Elemental(F, nullptr, nullptr, info));
704   PetscFunctionReturn(PETSC_SUCCESS);
705 }
706 
707 static PetscErrorCode MatLUFactorSymbolic_Elemental(Mat, Mat, IS, IS, const MatFactorInfo *)
708 {
709   PetscFunctionBegin;
710   /* F is created and allocated by MatGetFactor_elemental_petsc(), skip this routine. */
711   PetscFunctionReturn(PETSC_SUCCESS);
712 }
713 
714 static PetscErrorCode MatCholeskyFactor_Elemental(Mat A, IS, const MatFactorInfo *)
715 {
716   Mat_Elemental                                    *a = (Mat_Elemental *)A->data;
717   El::DistMatrix<PetscElemScalar, El::MC, El::STAR> d;
718 
719   PetscFunctionBegin;
720   El::Cholesky(El::UPPER, *a->emat);
721   A->factortype = MAT_FACTOR_CHOLESKY;
722   A->assembled  = PETSC_TRUE;
723 
724   PetscCall(PetscFree(A->solvertype));
725   PetscCall(PetscStrallocpy(MATSOLVERELEMENTAL, &A->solvertype));
726   PetscFunctionReturn(PETSC_SUCCESS);
727 }
728 
729 static PetscErrorCode MatCholeskyFactorNumeric_Elemental(Mat F, Mat A, const MatFactorInfo *info)
730 {
731   PetscFunctionBegin;
732   PetscCall(MatCopy(A, F, SAME_NONZERO_PATTERN));
733   PetscCall(MatCholeskyFactor_Elemental(F, nullptr, info));
734   PetscFunctionReturn(PETSC_SUCCESS);
735 }
736 
737 static PetscErrorCode MatCholeskyFactorSymbolic_Elemental(Mat, Mat, IS, const MatFactorInfo *)
738 {
739   PetscFunctionBegin;
740   /* F is created and allocated by MatGetFactor_elemental_petsc(), skip this routine. */
741   PetscFunctionReturn(PETSC_SUCCESS);
742 }
743 
744 static PetscErrorCode MatFactorGetSolverType_elemental_elemental(Mat, MatSolverType *type)
745 {
746   PetscFunctionBegin;
747   *type = MATSOLVERELEMENTAL;
748   PetscFunctionReturn(PETSC_SUCCESS);
749 }
750 
751 static PetscErrorCode MatGetFactor_elemental_elemental(Mat A, MatFactorType ftype, Mat *F)
752 {
753   Mat B;
754 
755   PetscFunctionBegin;
756   /* Create the factorization matrix */
757   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
758   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
759   PetscCall(MatSetType(B, MATELEMENTAL));
760   PetscCall(MatSetUp(B));
761   B->factortype      = ftype;
762   B->trivialsymbolic = PETSC_TRUE;
763   PetscCall(PetscFree(B->solvertype));
764   PetscCall(PetscStrallocpy(MATSOLVERELEMENTAL, &B->solvertype));
765 
766   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_elemental_elemental));
767   *F = B;
768   PetscFunctionReturn(PETSC_SUCCESS);
769 }
770 
771 PETSC_INTERN PetscErrorCode MatSolverTypeRegister_Elemental(void)
772 {
773   PetscFunctionBegin;
774   PetscCall(MatSolverTypeRegister(MATSOLVERELEMENTAL, MATELEMENTAL, MAT_FACTOR_LU, MatGetFactor_elemental_elemental));
775   PetscCall(MatSolverTypeRegister(MATSOLVERELEMENTAL, MATELEMENTAL, MAT_FACTOR_CHOLESKY, MatGetFactor_elemental_elemental));
776   PetscFunctionReturn(PETSC_SUCCESS);
777 }
778 
779 static PetscErrorCode MatNorm_Elemental(Mat A, NormType type, PetscReal *nrm)
780 {
781   Mat_Elemental *a = (Mat_Elemental *)A->data;
782 
783   PetscFunctionBegin;
784   switch (type) {
785   case NORM_1:
786     *nrm = El::OneNorm(*a->emat);
787     break;
788   case NORM_FROBENIUS:
789     *nrm = El::FrobeniusNorm(*a->emat);
790     break;
791   case NORM_INFINITY:
792     *nrm = El::InfinityNorm(*a->emat);
793     break;
794   default:
795     SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Unsupported norm type");
796   }
797   PetscFunctionReturn(PETSC_SUCCESS);
798 }
799 
800 static PetscErrorCode MatZeroEntries_Elemental(Mat A)
801 {
802   Mat_Elemental *a = (Mat_Elemental *)A->data;
803 
804   PetscFunctionBegin;
805   El::Zero(*a->emat);
806   PetscFunctionReturn(PETSC_SUCCESS);
807 }
808 
809 static PetscErrorCode MatGetOwnershipIS_Elemental(Mat A, IS *rows, IS *cols)
810 {
811   Mat_Elemental *a = (Mat_Elemental *)A->data;
812   PetscInt       i, m, shift, stride, *idx;
813 
814   PetscFunctionBegin;
815   if (rows) {
816     m      = a->emat->LocalHeight();
817     shift  = a->emat->ColShift();
818     stride = a->emat->ColStride();
819     PetscCall(PetscMalloc1(m, &idx));
820     for (i = 0; i < m; i++) {
821       PetscInt rank, offset;
822       E2RO(A, 0, shift + i * stride, &rank, &offset);
823       RO2P(A, 0, rank, offset, &idx[i]);
824     }
825     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, rows));
826   }
827   if (cols) {
828     m      = a->emat->LocalWidth();
829     shift  = a->emat->RowShift();
830     stride = a->emat->RowStride();
831     PetscCall(PetscMalloc1(m, &idx));
832     for (i = 0; i < m; i++) {
833       PetscInt rank, offset;
834       E2RO(A, 1, shift + i * stride, &rank, &offset);
835       RO2P(A, 1, rank, offset, &idx[i]);
836     }
837     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, cols));
838   }
839   PetscFunctionReturn(PETSC_SUCCESS);
840 }
841 
842 static PetscErrorCode MatConvert_Elemental_Dense(Mat A, MatType, MatReuse reuse, Mat *B)
843 {
844   Mat             Bmpi;
845   Mat_Elemental  *a = (Mat_Elemental *)A->data;
846   MPI_Comm        comm;
847   IS              isrows, iscols;
848   PetscInt        rrank, ridx, crank, cidx, nrows, ncols, i, j, erow, ecol, elrow, elcol;
849   const PetscInt *rows, *cols;
850   PetscElemScalar v;
851   const El::Grid &grid = a->emat->Grid();
852 
853   PetscFunctionBegin;
854   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
855 
856   if (reuse == MAT_REUSE_MATRIX) {
857     Bmpi = *B;
858   } else {
859     PetscCall(MatCreate(comm, &Bmpi));
860     PetscCall(MatSetSizes(Bmpi, A->rmap->n, A->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
861     PetscCall(MatSetType(Bmpi, MATDENSE));
862     PetscCall(MatSetUp(Bmpi));
863   }
864 
865   /* Get local entries of A */
866   PetscCall(MatGetOwnershipIS(A, &isrows, &iscols));
867   PetscCall(ISGetLocalSize(isrows, &nrows));
868   PetscCall(ISGetIndices(isrows, &rows));
869   PetscCall(ISGetLocalSize(iscols, &ncols));
870   PetscCall(ISGetIndices(iscols, &cols));
871 
872   if (a->roworiented) {
873     for (i = 0; i < nrows; i++) {
874       P2RO(A, 0, rows[i], &rrank, &ridx); /* convert indices between PETSc <-> (Rank,Offset) <-> Elemental */
875       RO2E(A, 0, rrank, ridx, &erow);
876       PetscCheck(rrank >= 0 && ridx >= 0 && erow >= 0, comm, PETSC_ERR_PLIB, "Incorrect row translation");
877       for (j = 0; j < ncols; j++) {
878         P2RO(A, 1, cols[j], &crank, &cidx);
879         RO2E(A, 1, crank, cidx, &ecol);
880         PetscCheck(crank >= 0 && cidx >= 0 && ecol >= 0, comm, PETSC_ERR_PLIB, "Incorrect col translation");
881 
882         elrow = erow / grid.MCSize(); /* Elemental local row index */
883         elcol = ecol / grid.MRSize(); /* Elemental local column index */
884         v     = a->emat->GetLocal(elrow, elcol);
885         PetscCall(MatSetValues(Bmpi, 1, &rows[i], 1, &cols[j], (PetscScalar *)&v, INSERT_VALUES));
886       }
887     }
888   } else { /* column-oriented */
889     for (j = 0; j < ncols; j++) {
890       P2RO(A, 1, cols[j], &crank, &cidx);
891       RO2E(A, 1, crank, cidx, &ecol);
892       PetscCheck(crank >= 0 && cidx >= 0 && ecol >= 0, comm, PETSC_ERR_PLIB, "Incorrect col translation");
893       for (i = 0; i < nrows; i++) {
894         P2RO(A, 0, rows[i], &rrank, &ridx); /* convert indices between PETSc <-> (Rank,Offset) <-> Elemental */
895         RO2E(A, 0, rrank, ridx, &erow);
896         PetscCheck(rrank >= 0 && ridx >= 0 && erow >= 0, comm, PETSC_ERR_PLIB, "Incorrect row translation");
897 
898         elrow = erow / grid.MCSize(); /* Elemental local row index */
899         elcol = ecol / grid.MRSize(); /* Elemental local column index */
900         v     = a->emat->GetLocal(elrow, elcol);
901         PetscCall(MatSetValues(Bmpi, 1, &rows[i], 1, &cols[j], (PetscScalar *)&v, INSERT_VALUES));
902       }
903     }
904   }
905   PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY));
906   PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY));
907   if (reuse == MAT_INPLACE_MATRIX) {
908     PetscCall(MatHeaderReplace(A, &Bmpi));
909   } else {
910     *B = Bmpi;
911   }
912   PetscCall(ISDestroy(&isrows));
913   PetscCall(ISDestroy(&iscols));
914   PetscFunctionReturn(PETSC_SUCCESS);
915 }
916 
917 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_Elemental(Mat A, MatType, MatReuse reuse, Mat *newmat)
918 {
919   Mat                mat_elemental;
920   PetscInt           M = A->rmap->N, N = A->cmap->N, row, ncols;
921   const PetscInt    *cols;
922   const PetscScalar *vals;
923 
924   PetscFunctionBegin;
925   if (reuse == MAT_REUSE_MATRIX) {
926     mat_elemental = *newmat;
927     PetscCall(MatZeroEntries(mat_elemental));
928   } else {
929     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental));
930     PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, M, N));
931     PetscCall(MatSetType(mat_elemental, MATELEMENTAL));
932     PetscCall(MatSetUp(mat_elemental));
933   }
934   for (row = 0; row < M; row++) {
935     PetscCall(MatGetRow(A, row, &ncols, &cols, &vals));
936     /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
937     PetscCall(MatSetValues(mat_elemental, 1, &row, ncols, cols, vals, ADD_VALUES));
938     PetscCall(MatRestoreRow(A, row, &ncols, &cols, &vals));
939   }
940   PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY));
941   PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY));
942 
943   if (reuse == MAT_INPLACE_MATRIX) {
944     PetscCall(MatHeaderReplace(A, &mat_elemental));
945   } else {
946     *newmat = mat_elemental;
947   }
948   PetscFunctionReturn(PETSC_SUCCESS);
949 }
950 
951 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat A, MatType, MatReuse reuse, Mat *newmat)
952 {
953   Mat                mat_elemental;
954   PetscInt           row, ncols, rstart = A->rmap->rstart, rend = A->rmap->rend, j;
955   const PetscInt    *cols;
956   const PetscScalar *vals;
957 
958   PetscFunctionBegin;
959   if (reuse == MAT_REUSE_MATRIX) {
960     mat_elemental = *newmat;
961     PetscCall(MatZeroEntries(mat_elemental));
962   } else {
963     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental));
964     PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N));
965     PetscCall(MatSetType(mat_elemental, MATELEMENTAL));
966     PetscCall(MatSetUp(mat_elemental));
967   }
968   for (row = rstart; row < rend; row++) {
969     PetscCall(MatGetRow(A, row, &ncols, &cols, &vals));
970     for (j = 0; j < ncols; j++) {
971       /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
972       PetscCall(MatSetValues(mat_elemental, 1, &row, 1, &cols[j], &vals[j], ADD_VALUES));
973     }
974     PetscCall(MatRestoreRow(A, row, &ncols, &cols, &vals));
975   }
976   PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY));
977   PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY));
978 
979   if (reuse == MAT_INPLACE_MATRIX) {
980     PetscCall(MatHeaderReplace(A, &mat_elemental));
981   } else {
982     *newmat = mat_elemental;
983   }
984   PetscFunctionReturn(PETSC_SUCCESS);
985 }
986 
987 PETSC_INTERN PetscErrorCode MatConvert_SeqSBAIJ_Elemental(Mat A, MatType, MatReuse reuse, Mat *newmat)
988 {
989   Mat                mat_elemental;
990   PetscInt           M = A->rmap->N, N = A->cmap->N, row, ncols, j;
991   const PetscInt    *cols;
992   const PetscScalar *vals;
993 
994   PetscFunctionBegin;
995   if (reuse == MAT_REUSE_MATRIX) {
996     mat_elemental = *newmat;
997     PetscCall(MatZeroEntries(mat_elemental));
998   } else {
999     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental));
1000     PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, M, N));
1001     PetscCall(MatSetType(mat_elemental, MATELEMENTAL));
1002     PetscCall(MatSetUp(mat_elemental));
1003   }
1004   PetscCall(MatGetRowUpperTriangular(A));
1005   for (row = 0; row < M; row++) {
1006     PetscCall(MatGetRow(A, row, &ncols, &cols, &vals));
1007     /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1008     PetscCall(MatSetValues(mat_elemental, 1, &row, ncols, cols, vals, ADD_VALUES));
1009     for (j = 0; j < ncols; j++) { /* lower triangular part */
1010       PetscScalar v;
1011       if (cols[j] == row) continue;
1012       v = A->hermitian == PETSC_BOOL3_TRUE ? PetscConj(vals[j]) : vals[j];
1013       PetscCall(MatSetValues(mat_elemental, 1, &cols[j], 1, &row, &v, ADD_VALUES));
1014     }
1015     PetscCall(MatRestoreRow(A, row, &ncols, &cols, &vals));
1016   }
1017   PetscCall(MatRestoreRowUpperTriangular(A));
1018   PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY));
1019   PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY));
1020 
1021   if (reuse == MAT_INPLACE_MATRIX) {
1022     PetscCall(MatHeaderReplace(A, &mat_elemental));
1023   } else {
1024     *newmat = mat_elemental;
1025   }
1026   PetscFunctionReturn(PETSC_SUCCESS);
1027 }
1028 
1029 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat A, MatType, MatReuse reuse, Mat *newmat)
1030 {
1031   Mat                mat_elemental;
1032   PetscInt           M = A->rmap->N, N = A->cmap->N, row, ncols, j, rstart = A->rmap->rstart, rend = A->rmap->rend;
1033   const PetscInt    *cols;
1034   const PetscScalar *vals;
1035 
1036   PetscFunctionBegin;
1037   if (reuse == MAT_REUSE_MATRIX) {
1038     mat_elemental = *newmat;
1039     PetscCall(MatZeroEntries(mat_elemental));
1040   } else {
1041     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental));
1042     PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, M, N));
1043     PetscCall(MatSetType(mat_elemental, MATELEMENTAL));
1044     PetscCall(MatSetUp(mat_elemental));
1045   }
1046   PetscCall(MatGetRowUpperTriangular(A));
1047   for (row = rstart; row < rend; row++) {
1048     PetscCall(MatGetRow(A, row, &ncols, &cols, &vals));
1049     /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1050     PetscCall(MatSetValues(mat_elemental, 1, &row, ncols, cols, vals, ADD_VALUES));
1051     for (j = 0; j < ncols; j++) { /* lower triangular part */
1052       PetscScalar v;
1053       if (cols[j] == row) continue;
1054       v = A->hermitian == PETSC_BOOL3_TRUE ? PetscConj(vals[j]) : vals[j];
1055       PetscCall(MatSetValues(mat_elemental, 1, &cols[j], 1, &row, &v, ADD_VALUES));
1056     }
1057     PetscCall(MatRestoreRow(A, row, &ncols, &cols, &vals));
1058   }
1059   PetscCall(MatRestoreRowUpperTriangular(A));
1060   PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY));
1061   PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY));
1062 
1063   if (reuse == MAT_INPLACE_MATRIX) {
1064     PetscCall(MatHeaderReplace(A, &mat_elemental));
1065   } else {
1066     *newmat = mat_elemental;
1067   }
1068   PetscFunctionReturn(PETSC_SUCCESS);
1069 }
1070 
1071 static PetscErrorCode MatDestroy_Elemental(Mat A)
1072 {
1073   Mat_Elemental      *a = (Mat_Elemental *)A->data;
1074   Mat_Elemental_Grid *commgrid;
1075   PetscBool           flg;
1076   MPI_Comm            icomm;
1077 
1078   PetscFunctionBegin;
1079   delete a->emat;
1080   delete a->P;
1081   delete a->Q;
1082 
1083   El::mpi::Comm cxxcomm(PetscObjectComm((PetscObject)A));
1084   PetscCall(PetscCommDuplicate(cxxcomm.comm, &icomm, nullptr));
1085   PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_Elemental_keyval, (void **)&commgrid, (int *)&flg));
1086   if (--commgrid->grid_refct == 0) {
1087     delete commgrid->grid;
1088     PetscCall(PetscFree(commgrid));
1089     PetscCallMPI(MPI_Comm_free_keyval(&Petsc_Elemental_keyval));
1090   }
1091   PetscCall(PetscCommDestroy(&icomm));
1092   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetOwnershipIS_C", nullptr));
1093   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", nullptr));
1094   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_elemental_mpidense_C", nullptr));
1095   PetscCall(PetscFree(A->data));
1096   PetscFunctionReturn(PETSC_SUCCESS);
1097 }
1098 
1099 static PetscErrorCode MatSetUp_Elemental(Mat A)
1100 {
1101   Mat_Elemental *a = (Mat_Elemental *)A->data;
1102   MPI_Comm       comm;
1103   PetscMPIInt    rsize, csize;
1104   PetscInt       n;
1105 
1106   PetscFunctionBegin;
1107   PetscCall(PetscLayoutSetUp(A->rmap));
1108   PetscCall(PetscLayoutSetUp(A->cmap));
1109 
1110   /* Check if local row and column sizes are equally distributed.
1111      Jed: Elemental uses "element" cyclic ordering so the sizes need to match that
1112      exactly.  The strategy in MatElemental is for PETSc to implicitly permute to block ordering (like would be returned by
1113      PetscSplitOwnership(comm,&n,&N), at which point Elemental matrices can act on PETSc vectors without redistributing the vectors. */
1114   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1115   n = PETSC_DECIDE;
1116   PetscCall(PetscSplitOwnership(comm, &n, &A->rmap->N));
1117   PetscCheck(n == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Local row size %" PetscInt_FMT " of ELEMENTAL matrix must be equally distributed", A->rmap->n);
1118 
1119   n = PETSC_DECIDE;
1120   PetscCall(PetscSplitOwnership(comm, &n, &A->cmap->N));
1121   PetscCheck(n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Local column size %" PetscInt_FMT " of ELEMENTAL matrix must be equally distributed", A->cmap->n);
1122 
1123   a->emat->Resize(A->rmap->N, A->cmap->N);
1124   El::Zero(*a->emat);
1125 
1126   PetscCallMPI(MPI_Comm_size(A->rmap->comm, &rsize));
1127   PetscCallMPI(MPI_Comm_size(A->cmap->comm, &csize));
1128   PetscCheck(csize == rsize, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_INCOMP, "Cannot use row and column communicators of different sizes");
1129   a->commsize = rsize;
1130   a->mr[0]    = A->rmap->N % rsize;
1131   if (!a->mr[0]) a->mr[0] = rsize;
1132   a->mr[1] = A->cmap->N % csize;
1133   if (!a->mr[1]) a->mr[1] = csize;
1134   a->m[0] = A->rmap->N / rsize + (a->mr[0] != rsize);
1135   a->m[1] = A->cmap->N / csize + (a->mr[1] != csize);
1136   PetscFunctionReturn(PETSC_SUCCESS);
1137 }
1138 
1139 static PetscErrorCode MatAssemblyBegin_Elemental(Mat A, MatAssemblyType)
1140 {
1141   Mat_Elemental *a = (Mat_Elemental *)A->data;
1142 
1143   PetscFunctionBegin;
1144   /* printf("Calling ProcessQueues\n"); */
1145   a->emat->ProcessQueues();
1146   /* printf("Finished ProcessQueues\n"); */
1147   PetscFunctionReturn(PETSC_SUCCESS);
1148 }
1149 
1150 static PetscErrorCode MatAssemblyEnd_Elemental(Mat, MatAssemblyType)
1151 {
1152   PetscFunctionBegin;
1153   /* Currently does nothing */
1154   PetscFunctionReturn(PETSC_SUCCESS);
1155 }
1156 
1157 static PetscErrorCode MatLoad_Elemental(Mat newMat, PetscViewer viewer)
1158 {
1159   Mat      Adense, Ae;
1160   MPI_Comm comm;
1161 
1162   PetscFunctionBegin;
1163   PetscCall(PetscObjectGetComm((PetscObject)newMat, &comm));
1164   PetscCall(MatCreate(comm, &Adense));
1165   PetscCall(MatSetType(Adense, MATDENSE));
1166   PetscCall(MatLoad(Adense, viewer));
1167   PetscCall(MatConvert(Adense, MATELEMENTAL, MAT_INITIAL_MATRIX, &Ae));
1168   PetscCall(MatDestroy(&Adense));
1169   PetscCall(MatHeaderReplace(newMat, &Ae));
1170   PetscFunctionReturn(PETSC_SUCCESS);
1171 }
1172 
1173 static struct _MatOps MatOps_Values = {MatSetValues_Elemental,
1174                                        nullptr,
1175                                        nullptr,
1176                                        MatMult_Elemental,
1177                                        /* 4*/ MatMultAdd_Elemental,
1178                                        MatMultTranspose_Elemental,
1179                                        MatMultTransposeAdd_Elemental,
1180                                        MatSolve_Elemental,
1181                                        MatSolveAdd_Elemental,
1182                                        nullptr,
1183                                        /*10*/ nullptr,
1184                                        MatLUFactor_Elemental,
1185                                        MatCholeskyFactor_Elemental,
1186                                        nullptr,
1187                                        MatTranspose_Elemental,
1188                                        /*15*/ MatGetInfo_Elemental,
1189                                        nullptr,
1190                                        MatGetDiagonal_Elemental,
1191                                        MatDiagonalScale_Elemental,
1192                                        MatNorm_Elemental,
1193                                        /*20*/ MatAssemblyBegin_Elemental,
1194                                        MatAssemblyEnd_Elemental,
1195                                        MatSetOption_Elemental,
1196                                        MatZeroEntries_Elemental,
1197                                        /*24*/ nullptr,
1198                                        MatLUFactorSymbolic_Elemental,
1199                                        MatLUFactorNumeric_Elemental,
1200                                        MatCholeskyFactorSymbolic_Elemental,
1201                                        MatCholeskyFactorNumeric_Elemental,
1202                                        /*29*/ MatSetUp_Elemental,
1203                                        nullptr,
1204                                        nullptr,
1205                                        nullptr,
1206                                        nullptr,
1207                                        /*34*/ MatDuplicate_Elemental,
1208                                        nullptr,
1209                                        nullptr,
1210                                        nullptr,
1211                                        nullptr,
1212                                        /*39*/ MatAXPY_Elemental,
1213                                        nullptr,
1214                                        nullptr,
1215                                        nullptr,
1216                                        MatCopy_Elemental,
1217                                        /*44*/ nullptr,
1218                                        MatScale_Elemental,
1219                                        MatShift_Basic,
1220                                        nullptr,
1221                                        nullptr,
1222                                        /*49*/ nullptr,
1223                                        nullptr,
1224                                        nullptr,
1225                                        nullptr,
1226                                        nullptr,
1227                                        /*54*/ nullptr,
1228                                        nullptr,
1229                                        nullptr,
1230                                        nullptr,
1231                                        nullptr,
1232                                        /*59*/ nullptr,
1233                                        MatDestroy_Elemental,
1234                                        MatView_Elemental,
1235                                        nullptr,
1236                                        nullptr,
1237                                        /*64*/ nullptr,
1238                                        nullptr,
1239                                        nullptr,
1240                                        nullptr,
1241                                        nullptr,
1242                                        /*69*/ nullptr,
1243                                        nullptr,
1244                                        MatConvert_Elemental_Dense,
1245                                        nullptr,
1246                                        nullptr,
1247                                        /*74*/ nullptr,
1248                                        nullptr,
1249                                        nullptr,
1250                                        nullptr,
1251                                        nullptr,
1252                                        /*79*/ nullptr,
1253                                        nullptr,
1254                                        nullptr,
1255                                        nullptr,
1256                                        MatLoad_Elemental,
1257                                        /*84*/ nullptr,
1258                                        nullptr,
1259                                        nullptr,
1260                                        nullptr,
1261                                        nullptr,
1262                                        /*89*/ nullptr,
1263                                        nullptr,
1264                                        MatMatMultNumeric_Elemental,
1265                                        nullptr,
1266                                        nullptr,
1267                                        /*94*/ nullptr,
1268                                        nullptr,
1269                                        nullptr,
1270                                        MatMatTransposeMultNumeric_Elemental,
1271                                        nullptr,
1272                                        /*99*/ MatProductSetFromOptions_Elemental,
1273                                        nullptr,
1274                                        nullptr,
1275                                        MatConjugate_Elemental,
1276                                        nullptr,
1277                                        /*104*/ nullptr,
1278                                        nullptr,
1279                                        nullptr,
1280                                        nullptr,
1281                                        nullptr,
1282                                        /*109*/ MatMatSolve_Elemental,
1283                                        nullptr,
1284                                        nullptr,
1285                                        nullptr,
1286                                        MatMissingDiagonal_Elemental,
1287                                        /*114*/ nullptr,
1288                                        nullptr,
1289                                        nullptr,
1290                                        nullptr,
1291                                        nullptr,
1292                                        /*119*/ nullptr,
1293                                        MatHermitianTranspose_Elemental,
1294                                        nullptr,
1295                                        nullptr,
1296                                        nullptr,
1297                                        /*124*/ nullptr,
1298                                        nullptr,
1299                                        nullptr,
1300                                        nullptr,
1301                                        nullptr,
1302                                        /*129*/ nullptr,
1303                                        nullptr,
1304                                        nullptr,
1305                                        nullptr,
1306                                        nullptr,
1307                                        /*134*/ nullptr,
1308                                        nullptr,
1309                                        nullptr,
1310                                        nullptr,
1311                                        nullptr,
1312                                        nullptr,
1313                                        /*140*/ nullptr,
1314                                        nullptr,
1315                                        nullptr,
1316                                        nullptr,
1317                                        nullptr,
1318                                        /*145*/ nullptr,
1319                                        nullptr,
1320                                        nullptr,
1321                                        nullptr,
1322                                        nullptr,
1323                                        /*150*/ nullptr,
1324                                        nullptr,
1325                                        nullptr,
1326                                        nullptr,
1327                                        nullptr,
1328                                        /*155*/ nullptr,
1329                                        nullptr};
1330 
1331 /*MC
1332    MATELEMENTAL = "elemental" - A matrix type for dense matrices using the Elemental package
1333 
1334   Use ./configure --download-elemental to install PETSc to use Elemental
1335 
1336    Options Database Keys:
1337 + -mat_type elemental - sets the matrix type to "elemental" during a call to MatSetFromOptions()
1338 . -pc_factor_mat_solver_type elemental - to use this direct solver with the option -pc_type lu
1339 - -mat_elemental_grid_height - sets Grid Height for 2D cyclic ordering of internal matrix
1340 
1341   Level: beginner
1342 
1343   Note:
1344    Note unlike most matrix formats, this format does not store all the matrix entries for a contiguous
1345    range of rows on an MPI rank. Use `MatGetOwnershipIS()` to determine what values are stored on
1346    the given rank.
1347 
1348 .seealso: `MATDENSE`, `MATSCALAPACK`, `MatGetOwnershipIS()`
1349 M*/
1350 #if defined(__clang__)
1351   #pragma clang diagnostic push
1352   #pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
1353 #endif
1354 PETSC_EXTERN PetscErrorCode MatCreate_Elemental(Mat A)
1355 {
1356   Mat_Elemental      *a;
1357   PetscBool           flg, flg1;
1358   Mat_Elemental_Grid *commgrid;
1359   MPI_Comm            icomm;
1360   PetscInt            optv1;
1361 
1362   PetscFunctionBegin;
1363   A->ops[0]     = MatOps_Values;
1364   A->insertmode = NOT_SET_VALUES;
1365 
1366   PetscCall(PetscNew(&a));
1367   A->data = (void *)a;
1368 
1369   /* Set up the elemental matrix */
1370   El::mpi::Comm cxxcomm(PetscObjectComm((PetscObject)A));
1371 
1372   /* Grid needs to be shared between multiple Mats on the same communicator, implement by attribute caching on the MPI_Comm */
1373   if (Petsc_Elemental_keyval == MPI_KEYVAL_INVALID) {
1374     PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN, MPI_COMM_NULL_DELETE_FN, &Petsc_Elemental_keyval, nullptr));
1375     PetscCall(PetscCitationsRegister(ElementalCitation, &ElementalCite));
1376   }
1377   PetscCall(PetscCommDuplicate(cxxcomm.comm, &icomm, NULL));
1378   PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_Elemental_keyval, (void **)&commgrid, (int *)&flg));
1379   if (!flg) {
1380     PetscCall(PetscNew(&commgrid));
1381 
1382     PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "Elemental Options", "Mat");
1383     /* displayed default grid sizes (CommSize,1) are set by us arbitrarily until El::Grid() is called */
1384     PetscCall(PetscOptionsInt("-mat_elemental_grid_height", "Grid Height", "None", El::mpi::Size(cxxcomm), &optv1, &flg1));
1385     if (flg1) {
1386       PetscCheck((El::mpi::Size(cxxcomm) % optv1) == 0, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_INCOMP, "Grid Height %" PetscInt_FMT " must evenly divide CommSize %" PetscInt_FMT, optv1, (PetscInt)El::mpi::Size(cxxcomm));
1387       commgrid->grid = new El::Grid(cxxcomm, optv1); /* use user-provided grid height */
1388     } else {
1389       commgrid->grid = new El::Grid(cxxcomm); /* use Elemental default grid sizes */
1390       /* printf("new commgrid->grid = %p\n",commgrid->grid);  -- memory leak revealed by valgrind? */
1391     }
1392     commgrid->grid_refct = 1;
1393     PetscCallMPI(MPI_Comm_set_attr(icomm, Petsc_Elemental_keyval, (void *)commgrid));
1394 
1395     a->pivoting = 1;
1396     PetscCall(PetscOptionsInt("-mat_elemental_pivoting", "Pivoting", "None", a->pivoting, &a->pivoting, NULL));
1397 
1398     PetscOptionsEnd();
1399   } else {
1400     commgrid->grid_refct++;
1401   }
1402   PetscCall(PetscCommDestroy(&icomm));
1403   a->grid        = commgrid->grid;
1404   a->emat        = new El::DistMatrix<PetscElemScalar>(*a->grid);
1405   a->roworiented = PETSC_TRUE;
1406 
1407   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetOwnershipIS_C", MatGetOwnershipIS_Elemental));
1408   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_elemental_mpidense_C", MatProductSetFromOptions_Elemental_MPIDense));
1409   PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATELEMENTAL));
1410   PetscFunctionReturn(PETSC_SUCCESS);
1411 }
1412 #if defined(__clang__)
1413   #pragma clang diagnostic pop
1414 #endif
1415