xref: /petsc/src/mat/impls/elemental/matelem.cxx (revision cedd07cade5cbdfdad435c8172b7ec8972d9cd8d)
1 #include <petsc/private/petscelemental.h>
2 
3 const char       ElementalCitation[] = "@Article{Elemental2012,\n"
4                                        "  author  = {Jack Poulson and Bryan Marker and Jeff R. Hammond and Nichols A. Romero and Robert {v}an~{d}e~{G}eijn},\n"
5                                        "  title   = {Elemental: A New Framework for Distributed Memory Dense Matrix Computations},\n"
6                                        "  journal = {{ACM} Transactions on Mathematical Software},\n"
7                                        "  volume  = {39},\n"
8                                        "  number  = {2},\n"
9                                        "  year    = {2013}\n"
10                                        "}\n";
11 static PetscBool ElementalCite       = PETSC_FALSE;
12 
13 /*
14     The variable Petsc_Elemental_keyval is used to indicate an MPI attribute that
15   is attached to a communicator, in this case the attribute is a Mat_Elemental_Grid
16 */
17 static PetscMPIInt Petsc_Elemental_keyval = MPI_KEYVAL_INVALID;
18 
19 static PetscErrorCode MatView_Elemental(Mat A, PetscViewer viewer)
20 {
21   Mat_Elemental *a = (Mat_Elemental *)A->data;
22   PetscBool      iascii;
23 
24   PetscFunctionBegin;
25   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
26   if (iascii) {
27     PetscViewerFormat format;
28     PetscCall(PetscViewerGetFormat(viewer, &format));
29     if (format == PETSC_VIEWER_ASCII_INFO) {
30       /* call elemental viewing function */
31       PetscCall(PetscViewerASCIIPrintf(viewer, "Elemental run parameters:\n"));
32       PetscCall(PetscViewerASCIIPrintf(viewer, "  allocated entries=%zu\n", (*a->emat).AllocatedMemory()));
33       PetscCall(PetscViewerASCIIPrintf(viewer, "  grid height=%d, grid width=%d\n", (*a->emat).Grid().Height(), (*a->emat).Grid().Width()));
34       if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
35         /* call elemental viewing function */
36         PetscCall(PetscPrintf(PetscObjectComm((PetscObject)viewer), "test matview_elemental 2\n"));
37       }
38 
39     } else if (format == PETSC_VIEWER_DEFAULT) {
40       PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
41       El::Print(*a->emat, "Elemental matrix (cyclic ordering)");
42       PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
43       if (A->factortype == MAT_FACTOR_NONE) {
44         Mat Adense;
45         PetscCall(MatConvert(A, MATDENSE, MAT_INITIAL_MATRIX, &Adense));
46         PetscCall(MatView(Adense, viewer));
47         PetscCall(MatDestroy(&Adense));
48       }
49     } else SETERRQ(PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Format");
50   } else {
51     /* convert to dense format and call MatView() */
52     Mat Adense;
53     PetscCall(MatConvert(A, MATDENSE, MAT_INITIAL_MATRIX, &Adense));
54     PetscCall(MatView(Adense, viewer));
55     PetscCall(MatDestroy(&Adense));
56   }
57   PetscFunctionReturn(0);
58 }
59 
60 static PetscErrorCode MatGetInfo_Elemental(Mat A, MatInfoType flag, MatInfo *info)
61 {
62   Mat_Elemental *a = (Mat_Elemental *)A->data;
63 
64   PetscFunctionBegin;
65   info->block_size = 1.0;
66 
67   if (flag == MAT_LOCAL) {
68     info->nz_allocated = (*a->emat).AllocatedMemory(); /* locally allocated */
69     info->nz_used      = info->nz_allocated;
70   } else if (flag == MAT_GLOBAL_MAX) {
71     //PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin)));
72     /* see MatGetInfo_MPIAIJ() for getting global info->nz_allocated! */
73     //SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP," MAT_GLOBAL_MAX not written yet");
74   } else if (flag == MAT_GLOBAL_SUM) {
75     //SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP," MAT_GLOBAL_SUM not written yet");
76     info->nz_allocated = (*a->emat).AllocatedMemory(); /* locally allocated */
77     info->nz_used      = info->nz_allocated;           /* assume Elemental does accurate allocation */
78     //PetscCall(MPIU_Allreduce(isend,irecv,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A)));
79     //PetscPrintf(PETSC_COMM_SELF,"    ... [%d] locally allocated %g\n",rank,info->nz_allocated);
80   }
81 
82   info->nz_unneeded       = 0.0;
83   info->assemblies        = A->num_ass;
84   info->mallocs           = 0;
85   info->memory            = 0; /* REVIEW ME */
86   info->fill_ratio_given  = 0; /* determined by Elemental */
87   info->fill_ratio_needed = 0;
88   info->factor_mallocs    = 0;
89   PetscFunctionReturn(0);
90 }
91 
92 PetscErrorCode MatSetOption_Elemental(Mat A, MatOption op, PetscBool flg)
93 {
94   Mat_Elemental *a = (Mat_Elemental *)A->data;
95 
96   PetscFunctionBegin;
97   switch (op) {
98   case MAT_NEW_NONZERO_LOCATIONS:
99   case MAT_NEW_NONZERO_LOCATION_ERR:
100   case MAT_NEW_NONZERO_ALLOCATION_ERR:
101   case MAT_SYMMETRIC:
102   case MAT_SORTED_FULL:
103   case MAT_HERMITIAN:
104     break;
105   case MAT_ROW_ORIENTED:
106     a->roworiented = flg;
107     break;
108   default:
109     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %s", MatOptions[op]);
110   }
111   PetscFunctionReturn(0);
112 }
113 
114 static PetscErrorCode MatSetValues_Elemental(Mat A, PetscInt nr, const PetscInt *rows, PetscInt nc, const PetscInt *cols, const PetscScalar *vals, InsertMode imode)
115 {
116   Mat_Elemental *a = (Mat_Elemental *)A->data;
117   PetscInt       i, j, rrank, ridx, crank, cidx, erow, ecol, numQueues = 0;
118 
119   PetscFunctionBegin;
120   // TODO: Initialize matrix to all zeros?
121 
122   // Count the number of queues from this process
123   if (a->roworiented) {
124     for (i = 0; i < nr; i++) {
125       if (rows[i] < 0) continue;
126       P2RO(A, 0, rows[i], &rrank, &ridx);
127       RO2E(A, 0, rrank, ridx, &erow);
128       PetscCheck(rrank >= 0 && ridx >= 0 && erow >= 0, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Incorrect row translation");
129       for (j = 0; j < nc; j++) {
130         if (cols[j] < 0) continue;
131         P2RO(A, 1, cols[j], &crank, &cidx);
132         RO2E(A, 1, crank, cidx, &ecol);
133         PetscCheck(crank >= 0 && cidx >= 0 && ecol >= 0, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Incorrect col translation");
134         if (!a->emat->IsLocal(erow, ecol)) { /* off-proc entry */
135           /* printf("Will later remotely update (%d,%d)\n",erow,ecol); */
136           PetscCheck(imode == ADD_VALUES, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only ADD_VALUES to off-processor entry is supported");
137           ++numQueues;
138           continue;
139         }
140         /* printf("Locally updating (%d,%d)\n",erow,ecol); */
141         switch (imode) {
142         case INSERT_VALUES:
143           a->emat->Set(erow, ecol, (PetscElemScalar)vals[i * nc + j]);
144           break;
145         case ADD_VALUES:
146           a->emat->Update(erow, ecol, (PetscElemScalar)vals[i * nc + j]);
147           break;
148         default:
149           SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for InsertMode %d", (int)imode);
150         }
151       }
152     }
153 
154     /* printf("numQueues=%d\n",numQueues); */
155     a->emat->Reserve(numQueues);
156     for (i = 0; i < nr; i++) {
157       if (rows[i] < 0) continue;
158       P2RO(A, 0, rows[i], &rrank, &ridx);
159       RO2E(A, 0, rrank, ridx, &erow);
160       for (j = 0; j < nc; j++) {
161         if (cols[j] < 0) continue;
162         P2RO(A, 1, cols[j], &crank, &cidx);
163         RO2E(A, 1, crank, cidx, &ecol);
164         if (!a->emat->IsLocal(erow, ecol)) { /*off-proc entry*/
165           /* printf("Queueing remotely update of (%d,%d)\n",erow,ecol); */
166           a->emat->QueueUpdate(erow, ecol, vals[i * nc + j]);
167         }
168       }
169     }
170   } else { /* columnoriented */
171     for (j = 0; j < nc; j++) {
172       if (cols[j] < 0) continue;
173       P2RO(A, 1, cols[j], &crank, &cidx);
174       RO2E(A, 1, crank, cidx, &ecol);
175       PetscCheck(crank >= 0 && cidx >= 0 && ecol >= 0, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Incorrect col translation");
176       for (i = 0; i < nr; i++) {
177         if (rows[i] < 0) continue;
178         P2RO(A, 0, rows[i], &rrank, &ridx);
179         RO2E(A, 0, rrank, ridx, &erow);
180         PetscCheck(rrank >= 0 && ridx >= 0 && erow >= 0, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Incorrect row translation");
181         if (!a->emat->IsLocal(erow, ecol)) { /* off-proc entry */
182           /* printf("Will later remotely update (%d,%d)\n",erow,ecol); */
183           PetscCheck(imode == ADD_VALUES, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only ADD_VALUES to off-processor entry is supported");
184           ++numQueues;
185           continue;
186         }
187         /* printf("Locally updating (%d,%d)\n",erow,ecol); */
188         switch (imode) {
189         case INSERT_VALUES:
190           a->emat->Set(erow, ecol, (PetscElemScalar)vals[i + j * nr]);
191           break;
192         case ADD_VALUES:
193           a->emat->Update(erow, ecol, (PetscElemScalar)vals[i + j * nr]);
194           break;
195         default:
196           SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for InsertMode %d", (int)imode);
197         }
198       }
199     }
200 
201     /* printf("numQueues=%d\n",numQueues); */
202     a->emat->Reserve(numQueues);
203     for (j = 0; j < nc; j++) {
204       if (cols[j] < 0) continue;
205       P2RO(A, 1, cols[j], &crank, &cidx);
206       RO2E(A, 1, crank, cidx, &ecol);
207 
208       for (i = 0; i < nr; i++) {
209         if (rows[i] < 0) continue;
210         P2RO(A, 0, rows[i], &rrank, &ridx);
211         RO2E(A, 0, rrank, ridx, &erow);
212         if (!a->emat->IsLocal(erow, ecol)) { /*off-proc entry*/
213           /* printf("Queueing remotely update of (%d,%d)\n",erow,ecol); */
214           a->emat->QueueUpdate(erow, ecol, vals[i + j * nr]);
215         }
216       }
217     }
218   }
219   PetscFunctionReturn(0);
220 }
221 
222 static PetscErrorCode MatMult_Elemental(Mat A, Vec X, Vec Y)
223 {
224   Mat_Elemental         *a = (Mat_Elemental *)A->data;
225   const PetscElemScalar *x;
226   PetscElemScalar       *y;
227   PetscElemScalar        one = 1, zero = 0;
228 
229   PetscFunctionBegin;
230   PetscCall(VecGetArrayRead(X, (const PetscScalar **)&x));
231   PetscCall(VecGetArray(Y, (PetscScalar **)&y));
232   { /* Scoping so that constructor is called before pointer is returned */
233     El::DistMatrix<PetscElemScalar, El::VC, El::STAR> xe, ye;
234     xe.LockedAttach(A->cmap->N, 1, *a->grid, 0, 0, x, A->cmap->n);
235     ye.Attach(A->rmap->N, 1, *a->grid, 0, 0, y, A->rmap->n);
236     El::Gemv(El::NORMAL, one, *a->emat, xe, zero, ye);
237   }
238   PetscCall(VecRestoreArrayRead(X, (const PetscScalar **)&x));
239   PetscCall(VecRestoreArray(Y, (PetscScalar **)&y));
240   PetscFunctionReturn(0);
241 }
242 
243 static PetscErrorCode MatMultTranspose_Elemental(Mat A, Vec X, Vec Y)
244 {
245   Mat_Elemental         *a = (Mat_Elemental *)A->data;
246   const PetscElemScalar *x;
247   PetscElemScalar       *y;
248   PetscElemScalar        one = 1, zero = 0;
249 
250   PetscFunctionBegin;
251   PetscCall(VecGetArrayRead(X, (const PetscScalar **)&x));
252   PetscCall(VecGetArray(Y, (PetscScalar **)&y));
253   { /* Scoping so that constructor is called before pointer is returned */
254     El::DistMatrix<PetscElemScalar, El::VC, El::STAR> xe, ye;
255     xe.LockedAttach(A->rmap->N, 1, *a->grid, 0, 0, x, A->rmap->n);
256     ye.Attach(A->cmap->N, 1, *a->grid, 0, 0, y, A->cmap->n);
257     El::Gemv(El::TRANSPOSE, one, *a->emat, xe, zero, ye);
258   }
259   PetscCall(VecRestoreArrayRead(X, (const PetscScalar **)&x));
260   PetscCall(VecRestoreArray(Y, (PetscScalar **)&y));
261   PetscFunctionReturn(0);
262 }
263 
264 static PetscErrorCode MatMultAdd_Elemental(Mat A, Vec X, Vec Y, Vec Z)
265 {
266   Mat_Elemental         *a = (Mat_Elemental *)A->data;
267   const PetscElemScalar *x;
268   PetscElemScalar       *z;
269   PetscElemScalar        one = 1;
270 
271   PetscFunctionBegin;
272   if (Y != Z) PetscCall(VecCopy(Y, Z));
273   PetscCall(VecGetArrayRead(X, (const PetscScalar **)&x));
274   PetscCall(VecGetArray(Z, (PetscScalar **)&z));
275   { /* Scoping so that constructor is called before pointer is returned */
276     El::DistMatrix<PetscElemScalar, El::VC, El::STAR> xe, ze;
277     xe.LockedAttach(A->cmap->N, 1, *a->grid, 0, 0, x, A->cmap->n);
278     ze.Attach(A->rmap->N, 1, *a->grid, 0, 0, z, A->rmap->n);
279     El::Gemv(El::NORMAL, one, *a->emat, xe, one, ze);
280   }
281   PetscCall(VecRestoreArrayRead(X, (const PetscScalar **)&x));
282   PetscCall(VecRestoreArray(Z, (PetscScalar **)&z));
283   PetscFunctionReturn(0);
284 }
285 
286 static PetscErrorCode MatMultTransposeAdd_Elemental(Mat A, Vec X, Vec Y, Vec Z)
287 {
288   Mat_Elemental         *a = (Mat_Elemental *)A->data;
289   const PetscElemScalar *x;
290   PetscElemScalar       *z;
291   PetscElemScalar        one = 1;
292 
293   PetscFunctionBegin;
294   if (Y != Z) PetscCall(VecCopy(Y, Z));
295   PetscCall(VecGetArrayRead(X, (const PetscScalar **)&x));
296   PetscCall(VecGetArray(Z, (PetscScalar **)&z));
297   { /* Scoping so that constructor is called before pointer is returned */
298     El::DistMatrix<PetscElemScalar, El::VC, El::STAR> xe, ze;
299     xe.LockedAttach(A->rmap->N, 1, *a->grid, 0, 0, x, A->rmap->n);
300     ze.Attach(A->cmap->N, 1, *a->grid, 0, 0, z, A->cmap->n);
301     El::Gemv(El::TRANSPOSE, one, *a->emat, xe, one, ze);
302   }
303   PetscCall(VecRestoreArrayRead(X, (const PetscScalar **)&x));
304   PetscCall(VecRestoreArray(Z, (PetscScalar **)&z));
305   PetscFunctionReturn(0);
306 }
307 
308 PetscErrorCode MatMatMultNumeric_Elemental(Mat A, Mat B, Mat C)
309 {
310   Mat_Elemental  *a   = (Mat_Elemental *)A->data;
311   Mat_Elemental  *b   = (Mat_Elemental *)B->data;
312   Mat_Elemental  *c   = (Mat_Elemental *)C->data;
313   PetscElemScalar one = 1, zero = 0;
314 
315   PetscFunctionBegin;
316   { /* Scoping so that constructor is called before pointer is returned */
317     El::Gemm(El::NORMAL, El::NORMAL, one, *a->emat, *b->emat, zero, *c->emat);
318   }
319   C->assembled = PETSC_TRUE;
320   PetscFunctionReturn(0);
321 }
322 
323 PetscErrorCode MatMatMultSymbolic_Elemental(Mat A, Mat B, PetscReal, Mat Ce)
324 {
325   PetscFunctionBegin;
326   PetscCall(MatSetSizes(Ce, A->rmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
327   PetscCall(MatSetType(Ce, MATELEMENTAL));
328   PetscCall(MatSetUp(Ce));
329   Ce->ops->matmultnumeric = MatMatMultNumeric_Elemental;
330   PetscFunctionReturn(0);
331 }
332 
333 static PetscErrorCode MatMatTransposeMultNumeric_Elemental(Mat A, Mat B, Mat C)
334 {
335   Mat_Elemental  *a   = (Mat_Elemental *)A->data;
336   Mat_Elemental  *b   = (Mat_Elemental *)B->data;
337   Mat_Elemental  *c   = (Mat_Elemental *)C->data;
338   PetscElemScalar one = 1, zero = 0;
339 
340   PetscFunctionBegin;
341   { /* Scoping so that constructor is called before pointer is returned */
342     El::Gemm(El::NORMAL, El::TRANSPOSE, one, *a->emat, *b->emat, zero, *c->emat);
343   }
344   C->assembled = PETSC_TRUE;
345   PetscFunctionReturn(0);
346 }
347 
348 static PetscErrorCode MatMatTransposeMultSymbolic_Elemental(Mat A, Mat B, PetscReal, Mat C)
349 {
350   PetscFunctionBegin;
351   PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, PETSC_DECIDE, PETSC_DECIDE));
352   PetscCall(MatSetType(C, MATELEMENTAL));
353   PetscCall(MatSetUp(C));
354   PetscFunctionReturn(0);
355 }
356 
357 /* --------------------------------------- */
358 static PetscErrorCode MatProductSetFromOptions_Elemental_AB(Mat C)
359 {
360   PetscFunctionBegin;
361   C->ops->matmultsymbolic = MatMatMultSymbolic_Elemental;
362   C->ops->productsymbolic = MatProductSymbolic_AB;
363   PetscFunctionReturn(0);
364 }
365 
366 static PetscErrorCode MatProductSetFromOptions_Elemental_ABt(Mat C)
367 {
368   PetscFunctionBegin;
369   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_Elemental;
370   C->ops->productsymbolic          = MatProductSymbolic_ABt;
371   PetscFunctionReturn(0);
372 }
373 
374 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_Elemental(Mat C)
375 {
376   Mat_Product *product = C->product;
377 
378   PetscFunctionBegin;
379   switch (product->type) {
380   case MATPRODUCT_AB:
381     PetscCall(MatProductSetFromOptions_Elemental_AB(C));
382     break;
383   case MATPRODUCT_ABt:
384     PetscCall(MatProductSetFromOptions_Elemental_ABt(C));
385     break;
386   default:
387     break;
388   }
389   PetscFunctionReturn(0);
390 }
391 
392 PetscErrorCode MatMatMultNumeric_Elemental_MPIDense(Mat A, Mat B, Mat C)
393 {
394   Mat Be, Ce;
395 
396   PetscFunctionBegin;
397   PetscCall(MatConvert(B, MATELEMENTAL, MAT_INITIAL_MATRIX, &Be));
398   PetscCall(MatMatMult(A, Be, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &Ce));
399   PetscCall(MatConvert(Ce, MATMPIDENSE, MAT_REUSE_MATRIX, &C));
400   PetscCall(MatDestroy(&Be));
401   PetscCall(MatDestroy(&Ce));
402   PetscFunctionReturn(0);
403 }
404 
405 PetscErrorCode MatMatMultSymbolic_Elemental_MPIDense(Mat A, Mat B, PetscReal, Mat C)
406 {
407   PetscFunctionBegin;
408   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
409   PetscCall(MatSetType(C, MATMPIDENSE));
410   PetscCall(MatSetUp(C));
411   C->ops->matmultnumeric = MatMatMultNumeric_Elemental_MPIDense;
412   PetscFunctionReturn(0);
413 }
414 
415 PetscErrorCode MatProductSetFromOptions_Elemental_MPIDense_AB(Mat C)
416 {
417   PetscFunctionBegin;
418   C->ops->matmultsymbolic = MatMatMultSymbolic_Elemental_MPIDense;
419   C->ops->productsymbolic = MatProductSymbolic_AB;
420   PetscFunctionReturn(0);
421 }
422 
423 PetscErrorCode MatProductSetFromOptions_Elemental_MPIDense(Mat C)
424 {
425   Mat_Product *product = C->product;
426 
427   PetscFunctionBegin;
428   if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_Elemental_MPIDense_AB(C));
429   PetscFunctionReturn(0);
430 }
431 /* --------------------------------------- */
432 
433 static PetscErrorCode MatGetDiagonal_Elemental(Mat A, Vec D)
434 {
435   PetscInt        i, nrows, ncols, nD, rrank, ridx, crank, cidx;
436   Mat_Elemental  *a = (Mat_Elemental *)A->data;
437   PetscElemScalar v;
438   MPI_Comm        comm;
439 
440   PetscFunctionBegin;
441   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
442   PetscCall(MatGetSize(A, &nrows, &ncols));
443   nD = nrows > ncols ? ncols : nrows;
444   for (i = 0; i < nD; i++) {
445     PetscInt erow, ecol;
446     P2RO(A, 0, i, &rrank, &ridx);
447     RO2E(A, 0, rrank, ridx, &erow);
448     PetscCheck(rrank >= 0 && ridx >= 0 && erow >= 0, comm, PETSC_ERR_PLIB, "Incorrect row translation");
449     P2RO(A, 1, i, &crank, &cidx);
450     RO2E(A, 1, crank, cidx, &ecol);
451     PetscCheck(crank >= 0 && cidx >= 0 && ecol >= 0, comm, PETSC_ERR_PLIB, "Incorrect col translation");
452     v = a->emat->Get(erow, ecol);
453     PetscCall(VecSetValues(D, 1, &i, (PetscScalar *)&v, INSERT_VALUES));
454   }
455   PetscCall(VecAssemblyBegin(D));
456   PetscCall(VecAssemblyEnd(D));
457   PetscFunctionReturn(0);
458 }
459 
460 static PetscErrorCode MatDiagonalScale_Elemental(Mat X, Vec L, Vec R)
461 {
462   Mat_Elemental         *x = (Mat_Elemental *)X->data;
463   const PetscElemScalar *d;
464 
465   PetscFunctionBegin;
466   if (R) {
467     PetscCall(VecGetArrayRead(R, (const PetscScalar **)&d));
468     El::DistMatrix<PetscElemScalar, El::VC, El::STAR> de;
469     de.LockedAttach(X->cmap->N, 1, *x->grid, 0, 0, d, X->cmap->n);
470     El::DiagonalScale(El::RIGHT, El::NORMAL, de, *x->emat);
471     PetscCall(VecRestoreArrayRead(R, (const PetscScalar **)&d));
472   }
473   if (L) {
474     PetscCall(VecGetArrayRead(L, (const PetscScalar **)&d));
475     El::DistMatrix<PetscElemScalar, El::VC, El::STAR> de;
476     de.LockedAttach(X->rmap->N, 1, *x->grid, 0, 0, d, X->rmap->n);
477     El::DiagonalScale(El::LEFT, El::NORMAL, de, *x->emat);
478     PetscCall(VecRestoreArrayRead(L, (const PetscScalar **)&d));
479   }
480   PetscFunctionReturn(0);
481 }
482 
483 static PetscErrorCode MatMissingDiagonal_Elemental(Mat, PetscBool *missing, PetscInt *)
484 {
485   PetscFunctionBegin;
486   *missing = PETSC_FALSE;
487   PetscFunctionReturn(0);
488 }
489 
490 static PetscErrorCode MatScale_Elemental(Mat X, PetscScalar a)
491 {
492   Mat_Elemental *x = (Mat_Elemental *)X->data;
493 
494   PetscFunctionBegin;
495   El::Scale((PetscElemScalar)a, *x->emat);
496   PetscFunctionReturn(0);
497 }
498 
499 /*
500   MatAXPY - Computes Y = a*X + Y.
501 */
502 static PetscErrorCode MatAXPY_Elemental(Mat Y, PetscScalar a, Mat X, MatStructure)
503 {
504   Mat_Elemental *x = (Mat_Elemental *)X->data;
505   Mat_Elemental *y = (Mat_Elemental *)Y->data;
506 
507   PetscFunctionBegin;
508   El::Axpy((PetscElemScalar)a, *x->emat, *y->emat);
509   PetscCall(PetscObjectStateIncrease((PetscObject)Y));
510   PetscFunctionReturn(0);
511 }
512 
513 static PetscErrorCode MatCopy_Elemental(Mat A, Mat B, MatStructure)
514 {
515   Mat_Elemental *a = (Mat_Elemental *)A->data;
516   Mat_Elemental *b = (Mat_Elemental *)B->data;
517 
518   PetscFunctionBegin;
519   El::Copy(*a->emat, *b->emat);
520   PetscCall(PetscObjectStateIncrease((PetscObject)B));
521   PetscFunctionReturn(0);
522 }
523 
524 static PetscErrorCode MatDuplicate_Elemental(Mat A, MatDuplicateOption op, Mat *B)
525 {
526   Mat            Be;
527   MPI_Comm       comm;
528   Mat_Elemental *a = (Mat_Elemental *)A->data;
529 
530   PetscFunctionBegin;
531   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
532   PetscCall(MatCreate(comm, &Be));
533   PetscCall(MatSetSizes(Be, A->rmap->n, A->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
534   PetscCall(MatSetType(Be, MATELEMENTAL));
535   PetscCall(MatSetUp(Be));
536   *B = Be;
537   if (op == MAT_COPY_VALUES) {
538     Mat_Elemental *b = (Mat_Elemental *)Be->data;
539     El::Copy(*a->emat, *b->emat);
540   }
541   Be->assembled = PETSC_TRUE;
542   PetscFunctionReturn(0);
543 }
544 
545 static PetscErrorCode MatTranspose_Elemental(Mat A, MatReuse reuse, Mat *B)
546 {
547   Mat            Be = *B;
548   MPI_Comm       comm;
549   Mat_Elemental *a = (Mat_Elemental *)A->data, *b;
550 
551   PetscFunctionBegin;
552   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B));
553   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
554   /* Only out-of-place supported */
555   PetscCheck(reuse != MAT_INPLACE_MATRIX, comm, PETSC_ERR_SUP, "Only out-of-place supported");
556   if (reuse == MAT_INITIAL_MATRIX) {
557     PetscCall(MatCreate(comm, &Be));
558     PetscCall(MatSetSizes(Be, A->cmap->n, A->rmap->n, PETSC_DECIDE, PETSC_DECIDE));
559     PetscCall(MatSetType(Be, MATELEMENTAL));
560     PetscCall(MatSetUp(Be));
561     *B = Be;
562   }
563   b = (Mat_Elemental *)Be->data;
564   El::Transpose(*a->emat, *b->emat);
565   Be->assembled = PETSC_TRUE;
566   PetscFunctionReturn(0);
567 }
568 
569 static PetscErrorCode MatConjugate_Elemental(Mat A)
570 {
571   Mat_Elemental *a = (Mat_Elemental *)A->data;
572 
573   PetscFunctionBegin;
574   El::Conjugate(*a->emat);
575   PetscFunctionReturn(0);
576 }
577 
578 static PetscErrorCode MatHermitianTranspose_Elemental(Mat A, MatReuse reuse, Mat *B)
579 {
580   Mat            Be = *B;
581   MPI_Comm       comm;
582   Mat_Elemental *a = (Mat_Elemental *)A->data, *b;
583 
584   PetscFunctionBegin;
585   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
586   /* Only out-of-place supported */
587   if (reuse == MAT_INITIAL_MATRIX) {
588     PetscCall(MatCreate(comm, &Be));
589     PetscCall(MatSetSizes(Be, A->cmap->n, A->rmap->n, PETSC_DECIDE, PETSC_DECIDE));
590     PetscCall(MatSetType(Be, MATELEMENTAL));
591     PetscCall(MatSetUp(Be));
592     *B = Be;
593   }
594   b = (Mat_Elemental *)Be->data;
595   El::Adjoint(*a->emat, *b->emat);
596   Be->assembled = PETSC_TRUE;
597   PetscFunctionReturn(0);
598 }
599 
600 static PetscErrorCode MatSolve_Elemental(Mat A, Vec B, Vec X)
601 {
602   Mat_Elemental   *a = (Mat_Elemental *)A->data;
603   PetscElemScalar *x;
604   PetscInt         pivoting = a->pivoting;
605 
606   PetscFunctionBegin;
607   PetscCall(VecCopy(B, X));
608   PetscCall(VecGetArray(X, (PetscScalar **)&x));
609 
610   El::DistMatrix<PetscElemScalar, El::VC, El::STAR> xe;
611   xe.Attach(A->rmap->N, 1, *a->grid, 0, 0, x, A->rmap->n);
612   El::DistMatrix<PetscElemScalar, El::MC, El::MR> xer(xe);
613   switch (A->factortype) {
614   case MAT_FACTOR_LU:
615     if (pivoting == 0) {
616       El::lu::SolveAfter(El::NORMAL, *a->emat, xer);
617     } else if (pivoting == 1) {
618       El::lu::SolveAfter(El::NORMAL, *a->emat, *a->P, xer);
619     } else { /* pivoting == 2 */
620       El::lu::SolveAfter(El::NORMAL, *a->emat, *a->P, *a->Q, xer);
621     }
622     break;
623   case MAT_FACTOR_CHOLESKY:
624     El::cholesky::SolveAfter(El::UPPER, El::NORMAL, *a->emat, xer);
625     break;
626   default:
627     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unfactored Matrix or Unsupported MatFactorType");
628     break;
629   }
630   El::Copy(xer, xe);
631 
632   PetscCall(VecRestoreArray(X, (PetscScalar **)&x));
633   PetscFunctionReturn(0);
634 }
635 
636 static PetscErrorCode MatSolveAdd_Elemental(Mat A, Vec B, Vec Y, Vec X)
637 {
638   PetscFunctionBegin;
639   PetscCall(MatSolve_Elemental(A, B, X));
640   PetscCall(VecAXPY(X, 1, Y));
641   PetscFunctionReturn(0);
642 }
643 
644 static PetscErrorCode MatMatSolve_Elemental(Mat A, Mat B, Mat X)
645 {
646   Mat_Elemental *a = (Mat_Elemental *)A->data;
647   Mat_Elemental *x;
648   Mat            C;
649   PetscInt       pivoting = a->pivoting;
650   PetscBool      flg;
651   MatType        type;
652 
653   PetscFunctionBegin;
654   PetscCall(MatGetType(X, &type));
655   PetscCall(PetscStrcmp(type, MATELEMENTAL, &flg));
656   if (!flg) {
657     PetscCall(MatConvert(B, MATELEMENTAL, MAT_INITIAL_MATRIX, &C));
658     x = (Mat_Elemental *)C->data;
659   } else {
660     x = (Mat_Elemental *)X->data;
661     El::Copy(*((Mat_Elemental *)B->data)->emat, *x->emat);
662   }
663   switch (A->factortype) {
664   case MAT_FACTOR_LU:
665     if (pivoting == 0) {
666       El::lu::SolveAfter(El::NORMAL, *a->emat, *x->emat);
667     } else if (pivoting == 1) {
668       El::lu::SolveAfter(El::NORMAL, *a->emat, *a->P, *x->emat);
669     } else {
670       El::lu::SolveAfter(El::NORMAL, *a->emat, *a->P, *a->Q, *x->emat);
671     }
672     break;
673   case MAT_FACTOR_CHOLESKY:
674     El::cholesky::SolveAfter(El::UPPER, El::NORMAL, *a->emat, *x->emat);
675     break;
676   default:
677     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unfactored Matrix or Unsupported MatFactorType");
678     break;
679   }
680   if (!flg) {
681     PetscCall(MatConvert(C, type, MAT_REUSE_MATRIX, &X));
682     PetscCall(MatDestroy(&C));
683   }
684   PetscFunctionReturn(0);
685 }
686 
687 static PetscErrorCode MatLUFactor_Elemental(Mat A, IS, IS, const MatFactorInfo *)
688 {
689   Mat_Elemental *a        = (Mat_Elemental *)A->data;
690   PetscInt       pivoting = a->pivoting;
691 
692   PetscFunctionBegin;
693   if (pivoting == 0) {
694     El::LU(*a->emat);
695   } else if (pivoting == 1) {
696     El::LU(*a->emat, *a->P);
697   } else {
698     El::LU(*a->emat, *a->P, *a->Q);
699   }
700   A->factortype = MAT_FACTOR_LU;
701   A->assembled  = PETSC_TRUE;
702 
703   PetscCall(PetscFree(A->solvertype));
704   PetscCall(PetscStrallocpy(MATSOLVERELEMENTAL, &A->solvertype));
705   PetscFunctionReturn(0);
706 }
707 
708 static PetscErrorCode MatLUFactorNumeric_Elemental(Mat F, Mat A, const MatFactorInfo *info)
709 {
710   PetscFunctionBegin;
711   PetscCall(MatCopy(A, F, SAME_NONZERO_PATTERN));
712   PetscCall(MatLUFactor_Elemental(F, 0, 0, info));
713   PetscFunctionReturn(0);
714 }
715 
716 static PetscErrorCode MatLUFactorSymbolic_Elemental(Mat, Mat, IS, IS, const MatFactorInfo *)
717 {
718   PetscFunctionBegin;
719   /* F is created and allocated by MatGetFactor_elemental_petsc(), skip this routine. */
720   PetscFunctionReturn(0);
721 }
722 
723 static PetscErrorCode MatCholeskyFactor_Elemental(Mat A, IS, const MatFactorInfo *)
724 {
725   Mat_Elemental                                    *a = (Mat_Elemental *)A->data;
726   El::DistMatrix<PetscElemScalar, El::MC, El::STAR> d;
727 
728   PetscFunctionBegin;
729   El::Cholesky(El::UPPER, *a->emat);
730   A->factortype = MAT_FACTOR_CHOLESKY;
731   A->assembled  = PETSC_TRUE;
732 
733   PetscCall(PetscFree(A->solvertype));
734   PetscCall(PetscStrallocpy(MATSOLVERELEMENTAL, &A->solvertype));
735   PetscFunctionReturn(0);
736 }
737 
738 static PetscErrorCode MatCholeskyFactorNumeric_Elemental(Mat F, Mat A, const MatFactorInfo *info)
739 {
740   PetscFunctionBegin;
741   PetscCall(MatCopy(A, F, SAME_NONZERO_PATTERN));
742   PetscCall(MatCholeskyFactor_Elemental(F, 0, info));
743   PetscFunctionReturn(0);
744 }
745 
746 static PetscErrorCode MatCholeskyFactorSymbolic_Elemental(Mat, Mat, IS, const MatFactorInfo *)
747 {
748   PetscFunctionBegin;
749   /* F is created and allocated by MatGetFactor_elemental_petsc(), skip this routine. */
750   PetscFunctionReturn(0);
751 }
752 
753 PetscErrorCode MatFactorGetSolverType_elemental_elemental(Mat, MatSolverType *type)
754 {
755   PetscFunctionBegin;
756   *type = MATSOLVERELEMENTAL;
757   PetscFunctionReturn(0);
758 }
759 
760 static PetscErrorCode MatGetFactor_elemental_elemental(Mat A, MatFactorType ftype, Mat *F)
761 {
762   Mat B;
763 
764   PetscFunctionBegin;
765   /* Create the factorization matrix */
766   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
767   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
768   PetscCall(MatSetType(B, MATELEMENTAL));
769   PetscCall(MatSetUp(B));
770   B->factortype      = ftype;
771   B->trivialsymbolic = PETSC_TRUE;
772   PetscCall(PetscFree(B->solvertype));
773   PetscCall(PetscStrallocpy(MATSOLVERELEMENTAL, &B->solvertype));
774 
775   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_elemental_elemental));
776   *F = B;
777   PetscFunctionReturn(0);
778 }
779 
780 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_Elemental(void)
781 {
782   PetscFunctionBegin;
783   PetscCall(MatSolverTypeRegister(MATSOLVERELEMENTAL, MATELEMENTAL, MAT_FACTOR_LU, MatGetFactor_elemental_elemental));
784   PetscCall(MatSolverTypeRegister(MATSOLVERELEMENTAL, MATELEMENTAL, MAT_FACTOR_CHOLESKY, MatGetFactor_elemental_elemental));
785   PetscFunctionReturn(0);
786 }
787 
788 static PetscErrorCode MatNorm_Elemental(Mat A, NormType type, PetscReal *nrm)
789 {
790   Mat_Elemental *a = (Mat_Elemental *)A->data;
791 
792   PetscFunctionBegin;
793   switch (type) {
794   case NORM_1:
795     *nrm = El::OneNorm(*a->emat);
796     break;
797   case NORM_FROBENIUS:
798     *nrm = El::FrobeniusNorm(*a->emat);
799     break;
800   case NORM_INFINITY:
801     *nrm = El::InfinityNorm(*a->emat);
802     break;
803   default:
804     SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Unsupported norm type");
805   }
806   PetscFunctionReturn(0);
807 }
808 
809 static PetscErrorCode MatZeroEntries_Elemental(Mat A)
810 {
811   Mat_Elemental *a = (Mat_Elemental *)A->data;
812 
813   PetscFunctionBegin;
814   El::Zero(*a->emat);
815   PetscFunctionReturn(0);
816 }
817 
818 static PetscErrorCode MatGetOwnershipIS_Elemental(Mat A, IS *rows, IS *cols)
819 {
820   Mat_Elemental *a = (Mat_Elemental *)A->data;
821   PetscInt       i, m, shift, stride, *idx;
822 
823   PetscFunctionBegin;
824   if (rows) {
825     m      = a->emat->LocalHeight();
826     shift  = a->emat->ColShift();
827     stride = a->emat->ColStride();
828     PetscCall(PetscMalloc1(m, &idx));
829     for (i = 0; i < m; i++) {
830       PetscInt rank, offset;
831       E2RO(A, 0, shift + i * stride, &rank, &offset);
832       RO2P(A, 0, rank, offset, &idx[i]);
833     }
834     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, rows));
835   }
836   if (cols) {
837     m      = a->emat->LocalWidth();
838     shift  = a->emat->RowShift();
839     stride = a->emat->RowStride();
840     PetscCall(PetscMalloc1(m, &idx));
841     for (i = 0; i < m; i++) {
842       PetscInt rank, offset;
843       E2RO(A, 1, shift + i * stride, &rank, &offset);
844       RO2P(A, 1, rank, offset, &idx[i]);
845     }
846     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, cols));
847   }
848   PetscFunctionReturn(0);
849 }
850 
851 static PetscErrorCode MatConvert_Elemental_Dense(Mat A, MatType, MatReuse reuse, Mat *B)
852 {
853   Mat             Bmpi;
854   Mat_Elemental  *a = (Mat_Elemental *)A->data;
855   MPI_Comm        comm;
856   IS              isrows, iscols;
857   PetscInt        rrank, ridx, crank, cidx, nrows, ncols, i, j, erow, ecol, elrow, elcol;
858   const PetscInt *rows, *cols;
859   PetscElemScalar v;
860   const El::Grid &grid = a->emat->Grid();
861 
862   PetscFunctionBegin;
863   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
864 
865   if (reuse == MAT_REUSE_MATRIX) {
866     Bmpi = *B;
867   } else {
868     PetscCall(MatCreate(comm, &Bmpi));
869     PetscCall(MatSetSizes(Bmpi, A->rmap->n, A->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
870     PetscCall(MatSetType(Bmpi, MATDENSE));
871     PetscCall(MatSetUp(Bmpi));
872   }
873 
874   /* Get local entries of A */
875   PetscCall(MatGetOwnershipIS(A, &isrows, &iscols));
876   PetscCall(ISGetLocalSize(isrows, &nrows));
877   PetscCall(ISGetIndices(isrows, &rows));
878   PetscCall(ISGetLocalSize(iscols, &ncols));
879   PetscCall(ISGetIndices(iscols, &cols));
880 
881   if (a->roworiented) {
882     for (i = 0; i < nrows; i++) {
883       P2RO(A, 0, rows[i], &rrank, &ridx); /* convert indices between PETSc <-> (Rank,Offset) <-> Elemental */
884       RO2E(A, 0, rrank, ridx, &erow);
885       PetscCheck(rrank >= 0 && ridx >= 0 && erow >= 0, comm, PETSC_ERR_PLIB, "Incorrect row translation");
886       for (j = 0; j < ncols; j++) {
887         P2RO(A, 1, cols[j], &crank, &cidx);
888         RO2E(A, 1, crank, cidx, &ecol);
889         PetscCheck(crank >= 0 && cidx >= 0 && ecol >= 0, comm, PETSC_ERR_PLIB, "Incorrect col translation");
890 
891         elrow = erow / grid.MCSize(); /* Elemental local row index */
892         elcol = ecol / grid.MRSize(); /* Elemental local column index */
893         v     = a->emat->GetLocal(elrow, elcol);
894         PetscCall(MatSetValues(Bmpi, 1, &rows[i], 1, &cols[j], (PetscScalar *)&v, INSERT_VALUES));
895       }
896     }
897   } else { /* column-oriented */
898     for (j = 0; j < ncols; j++) {
899       P2RO(A, 1, cols[j], &crank, &cidx);
900       RO2E(A, 1, crank, cidx, &ecol);
901       PetscCheck(crank >= 0 && cidx >= 0 && ecol >= 0, comm, PETSC_ERR_PLIB, "Incorrect col translation");
902       for (i = 0; i < nrows; i++) {
903         P2RO(A, 0, rows[i], &rrank, &ridx); /* convert indices between PETSc <-> (Rank,Offset) <-> Elemental */
904         RO2E(A, 0, rrank, ridx, &erow);
905         PetscCheck(rrank >= 0 && ridx >= 0 && erow >= 0, comm, PETSC_ERR_PLIB, "Incorrect row translation");
906 
907         elrow = erow / grid.MCSize(); /* Elemental local row index */
908         elcol = ecol / grid.MRSize(); /* Elemental local column index */
909         v     = a->emat->GetLocal(elrow, elcol);
910         PetscCall(MatSetValues(Bmpi, 1, &rows[i], 1, &cols[j], (PetscScalar *)&v, INSERT_VALUES));
911       }
912     }
913   }
914   PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY));
915   PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY));
916   if (reuse == MAT_INPLACE_MATRIX) {
917     PetscCall(MatHeaderReplace(A, &Bmpi));
918   } else {
919     *B = Bmpi;
920   }
921   PetscCall(ISDestroy(&isrows));
922   PetscCall(ISDestroy(&iscols));
923   PetscFunctionReturn(0);
924 }
925 
926 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_Elemental(Mat A, MatType, MatReuse reuse, Mat *newmat)
927 {
928   Mat                mat_elemental;
929   PetscInt           M = A->rmap->N, N = A->cmap->N, row, ncols;
930   const PetscInt    *cols;
931   const PetscScalar *vals;
932 
933   PetscFunctionBegin;
934   if (reuse == MAT_REUSE_MATRIX) {
935     mat_elemental = *newmat;
936     PetscCall(MatZeroEntries(mat_elemental));
937   } else {
938     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental));
939     PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, M, N));
940     PetscCall(MatSetType(mat_elemental, MATELEMENTAL));
941     PetscCall(MatSetUp(mat_elemental));
942   }
943   for (row = 0; row < M; row++) {
944     PetscCall(MatGetRow(A, row, &ncols, &cols, &vals));
945     /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
946     PetscCall(MatSetValues(mat_elemental, 1, &row, ncols, cols, vals, ADD_VALUES));
947     PetscCall(MatRestoreRow(A, row, &ncols, &cols, &vals));
948   }
949   PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY));
950   PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY));
951 
952   if (reuse == MAT_INPLACE_MATRIX) {
953     PetscCall(MatHeaderReplace(A, &mat_elemental));
954   } else {
955     *newmat = mat_elemental;
956   }
957   PetscFunctionReturn(0);
958 }
959 
960 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat A, MatType, MatReuse reuse, Mat *newmat)
961 {
962   Mat                mat_elemental;
963   PetscInt           row, ncols, rstart = A->rmap->rstart, rend = A->rmap->rend, j;
964   const PetscInt    *cols;
965   const PetscScalar *vals;
966 
967   PetscFunctionBegin;
968   if (reuse == MAT_REUSE_MATRIX) {
969     mat_elemental = *newmat;
970     PetscCall(MatZeroEntries(mat_elemental));
971   } else {
972     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental));
973     PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N));
974     PetscCall(MatSetType(mat_elemental, MATELEMENTAL));
975     PetscCall(MatSetUp(mat_elemental));
976   }
977   for (row = rstart; row < rend; row++) {
978     PetscCall(MatGetRow(A, row, &ncols, &cols, &vals));
979     for (j = 0; j < ncols; j++) {
980       /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
981       PetscCall(MatSetValues(mat_elemental, 1, &row, 1, &cols[j], &vals[j], ADD_VALUES));
982     }
983     PetscCall(MatRestoreRow(A, row, &ncols, &cols, &vals));
984   }
985   PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY));
986   PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY));
987 
988   if (reuse == MAT_INPLACE_MATRIX) {
989     PetscCall(MatHeaderReplace(A, &mat_elemental));
990   } else {
991     *newmat = mat_elemental;
992   }
993   PetscFunctionReturn(0);
994 }
995 
996 PETSC_INTERN PetscErrorCode MatConvert_SeqSBAIJ_Elemental(Mat A, MatType, MatReuse reuse, Mat *newmat)
997 {
998   Mat                mat_elemental;
999   PetscInt           M = A->rmap->N, N = A->cmap->N, row, ncols, j;
1000   const PetscInt    *cols;
1001   const PetscScalar *vals;
1002 
1003   PetscFunctionBegin;
1004   if (reuse == MAT_REUSE_MATRIX) {
1005     mat_elemental = *newmat;
1006     PetscCall(MatZeroEntries(mat_elemental));
1007   } else {
1008     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental));
1009     PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, M, N));
1010     PetscCall(MatSetType(mat_elemental, MATELEMENTAL));
1011     PetscCall(MatSetUp(mat_elemental));
1012   }
1013   PetscCall(MatGetRowUpperTriangular(A));
1014   for (row = 0; row < M; row++) {
1015     PetscCall(MatGetRow(A, row, &ncols, &cols, &vals));
1016     /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1017     PetscCall(MatSetValues(mat_elemental, 1, &row, ncols, cols, vals, ADD_VALUES));
1018     for (j = 0; j < ncols; j++) { /* lower triangular part */
1019       PetscScalar v;
1020       if (cols[j] == row) continue;
1021       v = A->hermitian == PETSC_BOOL3_TRUE ? PetscConj(vals[j]) : vals[j];
1022       PetscCall(MatSetValues(mat_elemental, 1, &cols[j], 1, &row, &v, ADD_VALUES));
1023     }
1024     PetscCall(MatRestoreRow(A, row, &ncols, &cols, &vals));
1025   }
1026   PetscCall(MatRestoreRowUpperTriangular(A));
1027   PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY));
1028   PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY));
1029 
1030   if (reuse == MAT_INPLACE_MATRIX) {
1031     PetscCall(MatHeaderReplace(A, &mat_elemental));
1032   } else {
1033     *newmat = mat_elemental;
1034   }
1035   PetscFunctionReturn(0);
1036 }
1037 
1038 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat A, MatType, MatReuse reuse, Mat *newmat)
1039 {
1040   Mat                mat_elemental;
1041   PetscInt           M = A->rmap->N, N = A->cmap->N, row, ncols, j, rstart = A->rmap->rstart, rend = A->rmap->rend;
1042   const PetscInt    *cols;
1043   const PetscScalar *vals;
1044 
1045   PetscFunctionBegin;
1046   if (reuse == MAT_REUSE_MATRIX) {
1047     mat_elemental = *newmat;
1048     PetscCall(MatZeroEntries(mat_elemental));
1049   } else {
1050     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental));
1051     PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, M, N));
1052     PetscCall(MatSetType(mat_elemental, MATELEMENTAL));
1053     PetscCall(MatSetUp(mat_elemental));
1054   }
1055   PetscCall(MatGetRowUpperTriangular(A));
1056   for (row = rstart; row < rend; row++) {
1057     PetscCall(MatGetRow(A, row, &ncols, &cols, &vals));
1058     /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1059     PetscCall(MatSetValues(mat_elemental, 1, &row, ncols, cols, vals, ADD_VALUES));
1060     for (j = 0; j < ncols; j++) { /* lower triangular part */
1061       PetscScalar v;
1062       if (cols[j] == row) continue;
1063       v = A->hermitian == PETSC_BOOL3_TRUE ? PetscConj(vals[j]) : vals[j];
1064       PetscCall(MatSetValues(mat_elemental, 1, &cols[j], 1, &row, &v, ADD_VALUES));
1065     }
1066     PetscCall(MatRestoreRow(A, row, &ncols, &cols, &vals));
1067   }
1068   PetscCall(MatRestoreRowUpperTriangular(A));
1069   PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY));
1070   PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY));
1071 
1072   if (reuse == MAT_INPLACE_MATRIX) {
1073     PetscCall(MatHeaderReplace(A, &mat_elemental));
1074   } else {
1075     *newmat = mat_elemental;
1076   }
1077   PetscFunctionReturn(0);
1078 }
1079 
1080 static PetscErrorCode MatDestroy_Elemental(Mat A)
1081 {
1082   Mat_Elemental      *a = (Mat_Elemental *)A->data;
1083   Mat_Elemental_Grid *commgrid;
1084   PetscBool           flg;
1085   MPI_Comm            icomm;
1086 
1087   PetscFunctionBegin;
1088   delete a->emat;
1089   delete a->P;
1090   delete a->Q;
1091 
1092   El::mpi::Comm cxxcomm(PetscObjectComm((PetscObject)A));
1093   PetscCall(PetscCommDuplicate(cxxcomm.comm, &icomm, NULL));
1094   PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_Elemental_keyval, (void **)&commgrid, (int *)&flg));
1095   if (--commgrid->grid_refct == 0) {
1096     delete commgrid->grid;
1097     PetscCall(PetscFree(commgrid));
1098     PetscCallMPI(MPI_Comm_free_keyval(&Petsc_Elemental_keyval));
1099   }
1100   PetscCall(PetscCommDestroy(&icomm));
1101   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetOwnershipIS_C", NULL));
1102   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL));
1103   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_elemental_mpidense_C", NULL));
1104   PetscCall(PetscFree(A->data));
1105   PetscFunctionReturn(0);
1106 }
1107 
1108 PetscErrorCode MatSetUp_Elemental(Mat A)
1109 {
1110   Mat_Elemental *a = (Mat_Elemental *)A->data;
1111   MPI_Comm       comm;
1112   PetscMPIInt    rsize, csize;
1113   PetscInt       n;
1114 
1115   PetscFunctionBegin;
1116   PetscCall(PetscLayoutSetUp(A->rmap));
1117   PetscCall(PetscLayoutSetUp(A->cmap));
1118 
1119   /* Check if local row and column sizes are equally distributed.
1120      Jed: Elemental uses "element" cyclic ordering so the sizes need to match that
1121      exactly.  The strategy in MatElemental is for PETSc to implicitly permute to block ordering (like would be returned by
1122      PetscSplitOwnership(comm,&n,&N), at which point Elemental matrices can act on PETSc vectors without redistributing the vectors. */
1123   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1124   n = PETSC_DECIDE;
1125   PetscCall(PetscSplitOwnership(comm, &n, &A->rmap->N));
1126   PetscCheck(n == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Local row size %" PetscInt_FMT " of ELEMENTAL matrix must be equally distributed", A->rmap->n);
1127 
1128   n = PETSC_DECIDE;
1129   PetscCall(PetscSplitOwnership(comm, &n, &A->cmap->N));
1130   PetscCheck(n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Local column size %" PetscInt_FMT " of ELEMENTAL matrix must be equally distributed", A->cmap->n);
1131 
1132   a->emat->Resize(A->rmap->N, A->cmap->N);
1133   El::Zero(*a->emat);
1134 
1135   PetscCallMPI(MPI_Comm_size(A->rmap->comm, &rsize));
1136   PetscCallMPI(MPI_Comm_size(A->cmap->comm, &csize));
1137   PetscCheck(csize == rsize, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_INCOMP, "Cannot use row and column communicators of different sizes");
1138   a->commsize = rsize;
1139   a->mr[0]    = A->rmap->N % rsize;
1140   if (!a->mr[0]) a->mr[0] = rsize;
1141   a->mr[1] = A->cmap->N % csize;
1142   if (!a->mr[1]) a->mr[1] = csize;
1143   a->m[0] = A->rmap->N / rsize + (a->mr[0] != rsize);
1144   a->m[1] = A->cmap->N / csize + (a->mr[1] != csize);
1145   PetscFunctionReturn(0);
1146 }
1147 
1148 PetscErrorCode MatAssemblyBegin_Elemental(Mat A, MatAssemblyType)
1149 {
1150   Mat_Elemental *a = (Mat_Elemental *)A->data;
1151 
1152   PetscFunctionBegin;
1153   /* printf("Calling ProcessQueues\n"); */
1154   a->emat->ProcessQueues();
1155   /* printf("Finished ProcessQueues\n"); */
1156   PetscFunctionReturn(0);
1157 }
1158 
1159 PetscErrorCode MatAssemblyEnd_Elemental(Mat, MatAssemblyType)
1160 {
1161   PetscFunctionBegin;
1162   /* Currently does nothing */
1163   PetscFunctionReturn(0);
1164 }
1165 
1166 PetscErrorCode MatLoad_Elemental(Mat newMat, PetscViewer viewer)
1167 {
1168   Mat      Adense, Ae;
1169   MPI_Comm comm;
1170 
1171   PetscFunctionBegin;
1172   PetscCall(PetscObjectGetComm((PetscObject)newMat, &comm));
1173   PetscCall(MatCreate(comm, &Adense));
1174   PetscCall(MatSetType(Adense, MATDENSE));
1175   PetscCall(MatLoad(Adense, viewer));
1176   PetscCall(MatConvert(Adense, MATELEMENTAL, MAT_INITIAL_MATRIX, &Ae));
1177   PetscCall(MatDestroy(&Adense));
1178   PetscCall(MatHeaderReplace(newMat, &Ae));
1179   PetscFunctionReturn(0);
1180 }
1181 
1182 /* -------------------------------------------------------------------*/
1183 static struct _MatOps MatOps_Values = {MatSetValues_Elemental,
1184                                        0,
1185                                        0,
1186                                        MatMult_Elemental,
1187                                        /* 4*/ MatMultAdd_Elemental,
1188                                        MatMultTranspose_Elemental,
1189                                        MatMultTransposeAdd_Elemental,
1190                                        MatSolve_Elemental,
1191                                        MatSolveAdd_Elemental,
1192                                        0,
1193                                        /*10*/ 0,
1194                                        MatLUFactor_Elemental,
1195                                        MatCholeskyFactor_Elemental,
1196                                        0,
1197                                        MatTranspose_Elemental,
1198                                        /*15*/ MatGetInfo_Elemental,
1199                                        0,
1200                                        MatGetDiagonal_Elemental,
1201                                        MatDiagonalScale_Elemental,
1202                                        MatNorm_Elemental,
1203                                        /*20*/ MatAssemblyBegin_Elemental,
1204                                        MatAssemblyEnd_Elemental,
1205                                        MatSetOption_Elemental,
1206                                        MatZeroEntries_Elemental,
1207                                        /*24*/ 0,
1208                                        MatLUFactorSymbolic_Elemental,
1209                                        MatLUFactorNumeric_Elemental,
1210                                        MatCholeskyFactorSymbolic_Elemental,
1211                                        MatCholeskyFactorNumeric_Elemental,
1212                                        /*29*/ MatSetUp_Elemental,
1213                                        0,
1214                                        0,
1215                                        0,
1216                                        0,
1217                                        /*34*/ MatDuplicate_Elemental,
1218                                        0,
1219                                        0,
1220                                        0,
1221                                        0,
1222                                        /*39*/ MatAXPY_Elemental,
1223                                        0,
1224                                        0,
1225                                        0,
1226                                        MatCopy_Elemental,
1227                                        /*44*/ 0,
1228                                        MatScale_Elemental,
1229                                        MatShift_Basic,
1230                                        0,
1231                                        0,
1232                                        /*49*/ 0,
1233                                        0,
1234                                        0,
1235                                        0,
1236                                        0,
1237                                        /*54*/ 0,
1238                                        0,
1239                                        0,
1240                                        0,
1241                                        0,
1242                                        /*59*/ 0,
1243                                        MatDestroy_Elemental,
1244                                        MatView_Elemental,
1245                                        0,
1246                                        0,
1247                                        /*64*/ 0,
1248                                        0,
1249                                        0,
1250                                        0,
1251                                        0,
1252                                        /*69*/ 0,
1253                                        0,
1254                                        MatConvert_Elemental_Dense,
1255                                        0,
1256                                        0,
1257                                        /*74*/ 0,
1258                                        0,
1259                                        0,
1260                                        0,
1261                                        0,
1262                                        /*79*/ 0,
1263                                        0,
1264                                        0,
1265                                        0,
1266                                        MatLoad_Elemental,
1267                                        /*84*/ 0,
1268                                        0,
1269                                        0,
1270                                        0,
1271                                        0,
1272                                        /*89*/ 0,
1273                                        0,
1274                                        MatMatMultNumeric_Elemental,
1275                                        0,
1276                                        0,
1277                                        /*94*/ 0,
1278                                        0,
1279                                        0,
1280                                        MatMatTransposeMultNumeric_Elemental,
1281                                        0,
1282                                        /*99*/ MatProductSetFromOptions_Elemental,
1283                                        0,
1284                                        0,
1285                                        MatConjugate_Elemental,
1286                                        0,
1287                                        /*104*/ 0,
1288                                        0,
1289                                        0,
1290                                        0,
1291                                        0,
1292                                        /*109*/ MatMatSolve_Elemental,
1293                                        0,
1294                                        0,
1295                                        0,
1296                                        MatMissingDiagonal_Elemental,
1297                                        /*114*/ 0,
1298                                        0,
1299                                        0,
1300                                        0,
1301                                        0,
1302                                        /*119*/ 0,
1303                                        MatHermitianTranspose_Elemental,
1304                                        0,
1305                                        0,
1306                                        0,
1307                                        /*124*/ 0,
1308                                        0,
1309                                        0,
1310                                        0,
1311                                        0,
1312                                        /*129*/ 0,
1313                                        0,
1314                                        0,
1315                                        0,
1316                                        0,
1317                                        /*134*/ 0,
1318                                        0,
1319                                        0,
1320                                        0,
1321                                        0,
1322                                        0,
1323                                        /*140*/ 0,
1324                                        0,
1325                                        0,
1326                                        0,
1327                                        0,
1328                                        /*145*/ 0,
1329                                        0,
1330                                        0,
1331                                        0,
1332                                        0,
1333                                        /*150*/ 0,
1334                                        0};
1335 
1336 /*MC
1337    MATELEMENTAL = "elemental" - A matrix type for dense matrices using the Elemental package
1338 
1339   Use ./configure --download-elemental to install PETSc to use Elemental
1340 
1341    Options Database Keys:
1342 + -mat_type elemental - sets the matrix type to "elemental" during a call to MatSetFromOptions()
1343 . -pc_factor_mat_solver_type elemental - to use this direct solver with the option -pc_type lu
1344 - -mat_elemental_grid_height - sets Grid Height for 2D cyclic ordering of internal matrix
1345 
1346   Level: beginner
1347 
1348   Note:
1349    Note unlike most matrix formats, this format does not store all the matrix entries for a contiguous
1350    range of rows on an MPI rank. Use `MatGetOwnershipIS()` to determine what values are stored on
1351    the given rank.
1352 
1353 .seealso: `MATDENSE`, `MATSCALAPACK`, `MatGetOwnershipIS()`
1354 M*/
1355 
1356 PETSC_EXTERN PetscErrorCode MatCreate_Elemental(Mat A)
1357 {
1358   Mat_Elemental      *a;
1359   PetscBool           flg, flg1;
1360   Mat_Elemental_Grid *commgrid;
1361   MPI_Comm            icomm;
1362   PetscInt            optv1;
1363 
1364   PetscFunctionBegin;
1365   PetscCall(PetscMemcpy(A->ops, &MatOps_Values, sizeof(struct _MatOps)));
1366   A->insertmode = NOT_SET_VALUES;
1367 
1368   PetscCall(PetscNew(&a));
1369   A->data = (void *)a;
1370 
1371   /* Set up the elemental matrix */
1372   El::mpi::Comm cxxcomm(PetscObjectComm((PetscObject)A));
1373 
1374   /* Grid needs to be shared between multiple Mats on the same communicator, implement by attribute caching on the MPI_Comm */
1375   if (Petsc_Elemental_keyval == MPI_KEYVAL_INVALID) {
1376     PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN, MPI_COMM_NULL_DELETE_FN, &Petsc_Elemental_keyval, (void *)0));
1377     PetscCall(PetscCitationsRegister(ElementalCitation, &ElementalCite));
1378   }
1379   PetscCall(PetscCommDuplicate(cxxcomm.comm, &icomm, NULL));
1380   PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_Elemental_keyval, (void **)&commgrid, (int *)&flg));
1381   if (!flg) {
1382     PetscCall(PetscNew(&commgrid));
1383 
1384     PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "Elemental Options", "Mat");
1385     /* displayed default grid sizes (CommSize,1) are set by us arbitrarily until El::Grid() is called */
1386     PetscCall(PetscOptionsInt("-mat_elemental_grid_height", "Grid Height", "None", El::mpi::Size(cxxcomm), &optv1, &flg1));
1387     if (flg1) {
1388       PetscCheck((El::mpi::Size(cxxcomm) % optv1) == 0, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_INCOMP, "Grid Height %" PetscInt_FMT " must evenly divide CommSize %" PetscInt_FMT, optv1, (PetscInt)El::mpi::Size(cxxcomm));
1389       commgrid->grid = new El::Grid(cxxcomm, optv1); /* use user-provided grid height */
1390     } else {
1391       commgrid->grid = new El::Grid(cxxcomm); /* use Elemental default grid sizes */
1392       /* printf("new commgrid->grid = %p\n",commgrid->grid);  -- memory leak revealed by valgrind? */
1393     }
1394     commgrid->grid_refct = 1;
1395     PetscCallMPI(MPI_Comm_set_attr(icomm, Petsc_Elemental_keyval, (void *)commgrid));
1396 
1397     a->pivoting = 1;
1398     PetscCall(PetscOptionsInt("-mat_elemental_pivoting", "Pivoting", "None", a->pivoting, &a->pivoting, NULL));
1399 
1400     PetscOptionsEnd();
1401   } else {
1402     commgrid->grid_refct++;
1403   }
1404   PetscCall(PetscCommDestroy(&icomm));
1405   a->grid        = commgrid->grid;
1406   a->emat        = new El::DistMatrix<PetscElemScalar>(*a->grid);
1407   a->roworiented = PETSC_TRUE;
1408 
1409   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetOwnershipIS_C", MatGetOwnershipIS_Elemental));
1410   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_elemental_mpidense_C", MatProductSetFromOptions_Elemental_MPIDense));
1411   PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATELEMENTAL));
1412   PetscFunctionReturn(0);
1413 }
1414