xref: /petsc/src/mat/impls/elemental/matelem.cxx (revision 2d30e087755efd99e28fdfe792ffbeb2ee1ea928)
1 #include <petsc/private/petscelemental.h>
2 
3 const char       ElementalCitation[] = "@Article{Elemental2012,\n"
4                                        "  author  = {Jack Poulson and Bryan Marker and Jeff R. Hammond and Nichols A. Romero and Robert {v}an~{d}e~{G}eijn},\n"
5                                        "  title   = {Elemental: A New Framework for Distributed Memory Dense Matrix Computations},\n"
6                                        "  journal = {{ACM} Transactions on Mathematical Software},\n"
7                                        "  volume  = {39},\n"
8                                        "  number  = {2},\n"
9                                        "  year    = {2013}\n"
10                                        "}\n";
11 static PetscBool ElementalCite       = PETSC_FALSE;
12 
13 /*
14     The variable Petsc_Elemental_keyval is used to indicate an MPI attribute that
15   is attached to a communicator, in this case the attribute is a Mat_Elemental_Grid
16 */
17 static PetscMPIInt Petsc_Elemental_keyval = MPI_KEYVAL_INVALID;
18 
19 static PetscErrorCode MatView_Elemental(Mat A, PetscViewer viewer) {
20   Mat_Elemental *a = (Mat_Elemental *)A->data;
21   PetscBool      iascii;
22 
23   PetscFunctionBegin;
24   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
25   if (iascii) {
26     PetscViewerFormat format;
27     PetscCall(PetscViewerGetFormat(viewer, &format));
28     if (format == PETSC_VIEWER_ASCII_INFO) {
29       /* call elemental viewing function */
30       PetscCall(PetscViewerASCIIPrintf(viewer, "Elemental run parameters:\n"));
31       PetscCall(PetscViewerASCIIPrintf(viewer, "  allocated entries=%zu\n", (*a->emat).AllocatedMemory()));
32       PetscCall(PetscViewerASCIIPrintf(viewer, "  grid height=%d, grid width=%d\n", (*a->emat).Grid().Height(), (*a->emat).Grid().Width()));
33       if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
34         /* call elemental viewing function */
35         PetscCall(PetscPrintf(PetscObjectComm((PetscObject)viewer), "test matview_elemental 2\n"));
36       }
37 
38     } else if (format == PETSC_VIEWER_DEFAULT) {
39       PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
40       El::Print(*a->emat, "Elemental matrix (cyclic ordering)");
41       PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
42       if (A->factortype == MAT_FACTOR_NONE) {
43         Mat Adense;
44         PetscCall(MatConvert(A, MATDENSE, MAT_INITIAL_MATRIX, &Adense));
45         PetscCall(MatView(Adense, viewer));
46         PetscCall(MatDestroy(&Adense));
47       }
48     } else SETERRQ(PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Format");
49   } else {
50     /* convert to dense format and call MatView() */
51     Mat Adense;
52     PetscCall(MatConvert(A, MATDENSE, MAT_INITIAL_MATRIX, &Adense));
53     PetscCall(MatView(Adense, viewer));
54     PetscCall(MatDestroy(&Adense));
55   }
56   PetscFunctionReturn(0);
57 }
58 
59 static PetscErrorCode MatGetInfo_Elemental(Mat A, MatInfoType flag, MatInfo *info) {
60   Mat_Elemental *a = (Mat_Elemental *)A->data;
61 
62   PetscFunctionBegin;
63   info->block_size = 1.0;
64 
65   if (flag == MAT_LOCAL) {
66     info->nz_allocated = (*a->emat).AllocatedMemory(); /* locally allocated */
67     info->nz_used      = info->nz_allocated;
68   } else if (flag == MAT_GLOBAL_MAX) {
69     //PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin)));
70     /* see MatGetInfo_MPIAIJ() for getting global info->nz_allocated! */
71     //SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP," MAT_GLOBAL_MAX not written yet");
72   } else if (flag == MAT_GLOBAL_SUM) {
73     //SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP," MAT_GLOBAL_SUM not written yet");
74     info->nz_allocated = (*a->emat).AllocatedMemory(); /* locally allocated */
75     info->nz_used      = info->nz_allocated;           /* assume Elemental does accurate allocation */
76     //PetscCall(MPIU_Allreduce(isend,irecv,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A)));
77     //PetscPrintf(PETSC_COMM_SELF,"    ... [%d] locally allocated %g\n",rank,info->nz_allocated);
78   }
79 
80   info->nz_unneeded       = 0.0;
81   info->assemblies        = A->num_ass;
82   info->mallocs           = 0;
83   info->memory            = ((PetscObject)A)->mem;
84   info->fill_ratio_given  = 0; /* determined by Elemental */
85   info->fill_ratio_needed = 0;
86   info->factor_mallocs    = 0;
87   PetscFunctionReturn(0);
88 }
89 
90 PetscErrorCode MatSetOption_Elemental(Mat A, MatOption op, PetscBool flg) {
91   Mat_Elemental *a = (Mat_Elemental *)A->data;
92 
93   PetscFunctionBegin;
94   switch (op) {
95   case MAT_NEW_NONZERO_LOCATIONS:
96   case MAT_NEW_NONZERO_LOCATION_ERR:
97   case MAT_NEW_NONZERO_ALLOCATION_ERR:
98   case MAT_SYMMETRIC:
99   case MAT_SORTED_FULL:
100   case MAT_HERMITIAN: break;
101   case MAT_ROW_ORIENTED: a->roworiented = flg; break;
102   default: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %s", MatOptions[op]);
103   }
104   PetscFunctionReturn(0);
105 }
106 
107 static PetscErrorCode MatSetValues_Elemental(Mat A, PetscInt nr, const PetscInt *rows, PetscInt nc, const PetscInt *cols, const PetscScalar *vals, InsertMode imode) {
108   Mat_Elemental *a = (Mat_Elemental *)A->data;
109   PetscInt       i, j, rrank, ridx, crank, cidx, erow, ecol, numQueues = 0;
110 
111   PetscFunctionBegin;
112   // TODO: Initialize matrix to all zeros?
113 
114   // Count the number of queues from this process
115   if (a->roworiented) {
116     for (i = 0; i < nr; i++) {
117       if (rows[i] < 0) continue;
118       P2RO(A, 0, rows[i], &rrank, &ridx);
119       RO2E(A, 0, rrank, ridx, &erow);
120       PetscCheck(rrank >= 0 && ridx >= 0 && erow >= 0, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Incorrect row translation");
121       for (j = 0; j < nc; j++) {
122         if (cols[j] < 0) continue;
123         P2RO(A, 1, cols[j], &crank, &cidx);
124         RO2E(A, 1, crank, cidx, &ecol);
125         PetscCheck(crank >= 0 && cidx >= 0 && ecol >= 0, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Incorrect col translation");
126         if (!a->emat->IsLocal(erow, ecol)) { /* off-proc entry */
127           /* printf("Will later remotely update (%d,%d)\n",erow,ecol); */
128           PetscCheck(imode == ADD_VALUES, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only ADD_VALUES to off-processor entry is supported");
129           ++numQueues;
130           continue;
131         }
132         /* printf("Locally updating (%d,%d)\n",erow,ecol); */
133         switch (imode) {
134         case INSERT_VALUES: a->emat->Set(erow, ecol, (PetscElemScalar)vals[i * nc + j]); break;
135         case ADD_VALUES: a->emat->Update(erow, ecol, (PetscElemScalar)vals[i * nc + j]); break;
136         default: SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for InsertMode %d", (int)imode);
137         }
138       }
139     }
140 
141     /* printf("numQueues=%d\n",numQueues); */
142     a->emat->Reserve(numQueues);
143     for (i = 0; i < nr; i++) {
144       if (rows[i] < 0) continue;
145       P2RO(A, 0, rows[i], &rrank, &ridx);
146       RO2E(A, 0, rrank, ridx, &erow);
147       for (j = 0; j < nc; j++) {
148         if (cols[j] < 0) continue;
149         P2RO(A, 1, cols[j], &crank, &cidx);
150         RO2E(A, 1, crank, cidx, &ecol);
151         if (!a->emat->IsLocal(erow, ecol)) { /*off-proc entry*/
152           /* printf("Queueing remotely update of (%d,%d)\n",erow,ecol); */
153           a->emat->QueueUpdate(erow, ecol, vals[i * nc + j]);
154         }
155       }
156     }
157   } else { /* columnoriented */
158     for (j = 0; j < nc; j++) {
159       if (cols[j] < 0) continue;
160       P2RO(A, 1, cols[j], &crank, &cidx);
161       RO2E(A, 1, crank, cidx, &ecol);
162       PetscCheck(crank >= 0 && cidx >= 0 && ecol >= 0, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Incorrect col translation");
163       for (i = 0; i < nr; i++) {
164         if (rows[i] < 0) continue;
165         P2RO(A, 0, rows[i], &rrank, &ridx);
166         RO2E(A, 0, rrank, ridx, &erow);
167         PetscCheck(rrank >= 0 && ridx >= 0 && erow >= 0, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Incorrect row translation");
168         if (!a->emat->IsLocal(erow, ecol)) { /* off-proc entry */
169           /* printf("Will later remotely update (%d,%d)\n",erow,ecol); */
170           PetscCheck(imode == ADD_VALUES, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only ADD_VALUES to off-processor entry is supported");
171           ++numQueues;
172           continue;
173         }
174         /* printf("Locally updating (%d,%d)\n",erow,ecol); */
175         switch (imode) {
176         case INSERT_VALUES: a->emat->Set(erow, ecol, (PetscElemScalar)vals[i + j * nr]); break;
177         case ADD_VALUES: a->emat->Update(erow, ecol, (PetscElemScalar)vals[i + j * nr]); break;
178         default: SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for InsertMode %d", (int)imode);
179         }
180       }
181     }
182 
183     /* printf("numQueues=%d\n",numQueues); */
184     a->emat->Reserve(numQueues);
185     for (j = 0; j < nc; j++) {
186       if (cols[j] < 0) continue;
187       P2RO(A, 1, cols[j], &crank, &cidx);
188       RO2E(A, 1, crank, cidx, &ecol);
189 
190       for (i = 0; i < nr; i++) {
191         if (rows[i] < 0) continue;
192         P2RO(A, 0, rows[i], &rrank, &ridx);
193         RO2E(A, 0, rrank, ridx, &erow);
194         if (!a->emat->IsLocal(erow, ecol)) { /*off-proc entry*/
195           /* printf("Queueing remotely update of (%d,%d)\n",erow,ecol); */
196           a->emat->QueueUpdate(erow, ecol, vals[i + j * nr]);
197         }
198       }
199     }
200   }
201   PetscFunctionReturn(0);
202 }
203 
204 static PetscErrorCode MatMult_Elemental(Mat A, Vec X, Vec Y) {
205   Mat_Elemental         *a = (Mat_Elemental *)A->data;
206   const PetscElemScalar *x;
207   PetscElemScalar       *y;
208   PetscElemScalar        one = 1, zero = 0;
209 
210   PetscFunctionBegin;
211   PetscCall(VecGetArrayRead(X, (const PetscScalar **)&x));
212   PetscCall(VecGetArray(Y, (PetscScalar **)&y));
213   { /* Scoping so that constructor is called before pointer is returned */
214     El::DistMatrix<PetscElemScalar, El::VC, El::STAR> xe, ye;
215     xe.LockedAttach(A->cmap->N, 1, *a->grid, 0, 0, x, A->cmap->n);
216     ye.Attach(A->rmap->N, 1, *a->grid, 0, 0, y, A->rmap->n);
217     El::Gemv(El::NORMAL, one, *a->emat, xe, zero, ye);
218   }
219   PetscCall(VecRestoreArrayRead(X, (const PetscScalar **)&x));
220   PetscCall(VecRestoreArray(Y, (PetscScalar **)&y));
221   PetscFunctionReturn(0);
222 }
223 
224 static PetscErrorCode MatMultTranspose_Elemental(Mat A, Vec X, Vec Y) {
225   Mat_Elemental         *a = (Mat_Elemental *)A->data;
226   const PetscElemScalar *x;
227   PetscElemScalar       *y;
228   PetscElemScalar        one = 1, zero = 0;
229 
230   PetscFunctionBegin;
231   PetscCall(VecGetArrayRead(X, (const PetscScalar **)&x));
232   PetscCall(VecGetArray(Y, (PetscScalar **)&y));
233   { /* Scoping so that constructor is called before pointer is returned */
234     El::DistMatrix<PetscElemScalar, El::VC, El::STAR> xe, ye;
235     xe.LockedAttach(A->rmap->N, 1, *a->grid, 0, 0, x, A->rmap->n);
236     ye.Attach(A->cmap->N, 1, *a->grid, 0, 0, y, A->cmap->n);
237     El::Gemv(El::TRANSPOSE, one, *a->emat, xe, zero, ye);
238   }
239   PetscCall(VecRestoreArrayRead(X, (const PetscScalar **)&x));
240   PetscCall(VecRestoreArray(Y, (PetscScalar **)&y));
241   PetscFunctionReturn(0);
242 }
243 
244 static PetscErrorCode MatMultAdd_Elemental(Mat A, Vec X, Vec Y, Vec Z) {
245   Mat_Elemental         *a = (Mat_Elemental *)A->data;
246   const PetscElemScalar *x;
247   PetscElemScalar       *z;
248   PetscElemScalar        one = 1;
249 
250   PetscFunctionBegin;
251   if (Y != Z) PetscCall(VecCopy(Y, Z));
252   PetscCall(VecGetArrayRead(X, (const PetscScalar **)&x));
253   PetscCall(VecGetArray(Z, (PetscScalar **)&z));
254   { /* Scoping so that constructor is called before pointer is returned */
255     El::DistMatrix<PetscElemScalar, El::VC, El::STAR> xe, ze;
256     xe.LockedAttach(A->cmap->N, 1, *a->grid, 0, 0, x, A->cmap->n);
257     ze.Attach(A->rmap->N, 1, *a->grid, 0, 0, z, A->rmap->n);
258     El::Gemv(El::NORMAL, one, *a->emat, xe, one, ze);
259   }
260   PetscCall(VecRestoreArrayRead(X, (const PetscScalar **)&x));
261   PetscCall(VecRestoreArray(Z, (PetscScalar **)&z));
262   PetscFunctionReturn(0);
263 }
264 
265 static PetscErrorCode MatMultTransposeAdd_Elemental(Mat A, Vec X, Vec Y, Vec Z) {
266   Mat_Elemental         *a = (Mat_Elemental *)A->data;
267   const PetscElemScalar *x;
268   PetscElemScalar       *z;
269   PetscElemScalar        one = 1;
270 
271   PetscFunctionBegin;
272   if (Y != Z) PetscCall(VecCopy(Y, Z));
273   PetscCall(VecGetArrayRead(X, (const PetscScalar **)&x));
274   PetscCall(VecGetArray(Z, (PetscScalar **)&z));
275   { /* Scoping so that constructor is called before pointer is returned */
276     El::DistMatrix<PetscElemScalar, El::VC, El::STAR> xe, ze;
277     xe.LockedAttach(A->rmap->N, 1, *a->grid, 0, 0, x, A->rmap->n);
278     ze.Attach(A->cmap->N, 1, *a->grid, 0, 0, z, A->cmap->n);
279     El::Gemv(El::TRANSPOSE, one, *a->emat, xe, one, ze);
280   }
281   PetscCall(VecRestoreArrayRead(X, (const PetscScalar **)&x));
282   PetscCall(VecRestoreArray(Z, (PetscScalar **)&z));
283   PetscFunctionReturn(0);
284 }
285 
286 PetscErrorCode MatMatMultNumeric_Elemental(Mat A, Mat B, Mat C) {
287   Mat_Elemental  *a   = (Mat_Elemental *)A->data;
288   Mat_Elemental  *b   = (Mat_Elemental *)B->data;
289   Mat_Elemental  *c   = (Mat_Elemental *)C->data;
290   PetscElemScalar one = 1, zero = 0;
291 
292   PetscFunctionBegin;
293   { /* Scoping so that constructor is called before pointer is returned */
294     El::Gemm(El::NORMAL, El::NORMAL, one, *a->emat, *b->emat, zero, *c->emat);
295   }
296   C->assembled = PETSC_TRUE;
297   PetscFunctionReturn(0);
298 }
299 
300 PetscErrorCode MatMatMultSymbolic_Elemental(Mat A, Mat B, PetscReal fill, Mat Ce) {
301   PetscFunctionBegin;
302   PetscCall(MatSetSizes(Ce, A->rmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
303   PetscCall(MatSetType(Ce, MATELEMENTAL));
304   PetscCall(MatSetUp(Ce));
305   Ce->ops->matmultnumeric = MatMatMultNumeric_Elemental;
306   PetscFunctionReturn(0);
307 }
308 
309 static PetscErrorCode MatMatTransposeMultNumeric_Elemental(Mat A, Mat B, Mat C) {
310   Mat_Elemental  *a   = (Mat_Elemental *)A->data;
311   Mat_Elemental  *b   = (Mat_Elemental *)B->data;
312   Mat_Elemental  *c   = (Mat_Elemental *)C->data;
313   PetscElemScalar one = 1, zero = 0;
314 
315   PetscFunctionBegin;
316   { /* Scoping so that constructor is called before pointer is returned */
317     El::Gemm(El::NORMAL, El::TRANSPOSE, one, *a->emat, *b->emat, zero, *c->emat);
318   }
319   C->assembled = PETSC_TRUE;
320   PetscFunctionReturn(0);
321 }
322 
323 static PetscErrorCode MatMatTransposeMultSymbolic_Elemental(Mat A, Mat B, PetscReal fill, Mat C) {
324   PetscFunctionBegin;
325   PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, PETSC_DECIDE, PETSC_DECIDE));
326   PetscCall(MatSetType(C, MATELEMENTAL));
327   PetscCall(MatSetUp(C));
328   PetscFunctionReturn(0);
329 }
330 
331 /* --------------------------------------- */
332 static PetscErrorCode MatProductSetFromOptions_Elemental_AB(Mat C) {
333   PetscFunctionBegin;
334   C->ops->matmultsymbolic = MatMatMultSymbolic_Elemental;
335   C->ops->productsymbolic = MatProductSymbolic_AB;
336   PetscFunctionReturn(0);
337 }
338 
339 static PetscErrorCode MatProductSetFromOptions_Elemental_ABt(Mat C) {
340   PetscFunctionBegin;
341   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_Elemental;
342   C->ops->productsymbolic          = MatProductSymbolic_ABt;
343   PetscFunctionReturn(0);
344 }
345 
346 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_Elemental(Mat C) {
347   Mat_Product *product = C->product;
348 
349   PetscFunctionBegin;
350   switch (product->type) {
351   case MATPRODUCT_AB: PetscCall(MatProductSetFromOptions_Elemental_AB(C)); break;
352   case MATPRODUCT_ABt: PetscCall(MatProductSetFromOptions_Elemental_ABt(C)); break;
353   default: break;
354   }
355   PetscFunctionReturn(0);
356 }
357 
358 PetscErrorCode MatMatMultNumeric_Elemental_MPIDense(Mat A, Mat B, Mat C) {
359   Mat Be, Ce;
360 
361   PetscFunctionBegin;
362   PetscCall(MatConvert(B, MATELEMENTAL, MAT_INITIAL_MATRIX, &Be));
363   PetscCall(MatMatMult(A, Be, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &Ce));
364   PetscCall(MatConvert(Ce, MATMPIDENSE, MAT_REUSE_MATRIX, &C));
365   PetscCall(MatDestroy(&Be));
366   PetscCall(MatDestroy(&Ce));
367   PetscFunctionReturn(0);
368 }
369 
370 PetscErrorCode MatMatMultSymbolic_Elemental_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) {
371   PetscFunctionBegin;
372   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
373   PetscCall(MatSetType(C, MATMPIDENSE));
374   PetscCall(MatSetUp(C));
375   C->ops->matmultnumeric = MatMatMultNumeric_Elemental_MPIDense;
376   PetscFunctionReturn(0);
377 }
378 
379 PetscErrorCode MatProductSetFromOptions_Elemental_MPIDense_AB(Mat C) {
380   PetscFunctionBegin;
381   C->ops->matmultsymbolic = MatMatMultSymbolic_Elemental_MPIDense;
382   C->ops->productsymbolic = MatProductSymbolic_AB;
383   PetscFunctionReturn(0);
384 }
385 
386 PetscErrorCode MatProductSetFromOptions_Elemental_MPIDense(Mat C) {
387   Mat_Product *product = C->product;
388 
389   PetscFunctionBegin;
390   if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_Elemental_MPIDense_AB(C));
391   PetscFunctionReturn(0);
392 }
393 /* --------------------------------------- */
394 
395 static PetscErrorCode MatGetDiagonal_Elemental(Mat A, Vec D) {
396   PetscInt        i, nrows, ncols, nD, rrank, ridx, crank, cidx;
397   Mat_Elemental  *a = (Mat_Elemental *)A->data;
398   PetscElemScalar v;
399   MPI_Comm        comm;
400 
401   PetscFunctionBegin;
402   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
403   PetscCall(MatGetSize(A, &nrows, &ncols));
404   nD = nrows > ncols ? ncols : nrows;
405   for (i = 0; i < nD; i++) {
406     PetscInt erow, ecol;
407     P2RO(A, 0, i, &rrank, &ridx);
408     RO2E(A, 0, rrank, ridx, &erow);
409     PetscCheck(rrank >= 0 && ridx >= 0 && erow >= 0, comm, PETSC_ERR_PLIB, "Incorrect row translation");
410     P2RO(A, 1, i, &crank, &cidx);
411     RO2E(A, 1, crank, cidx, &ecol);
412     PetscCheck(crank >= 0 && cidx >= 0 && ecol >= 0, comm, PETSC_ERR_PLIB, "Incorrect col translation");
413     v = a->emat->Get(erow, ecol);
414     PetscCall(VecSetValues(D, 1, &i, (PetscScalar *)&v, INSERT_VALUES));
415   }
416   PetscCall(VecAssemblyBegin(D));
417   PetscCall(VecAssemblyEnd(D));
418   PetscFunctionReturn(0);
419 }
420 
421 static PetscErrorCode MatDiagonalScale_Elemental(Mat X, Vec L, Vec R) {
422   Mat_Elemental         *x = (Mat_Elemental *)X->data;
423   const PetscElemScalar *d;
424 
425   PetscFunctionBegin;
426   if (R) {
427     PetscCall(VecGetArrayRead(R, (const PetscScalar **)&d));
428     El::DistMatrix<PetscElemScalar, El::VC, El::STAR> de;
429     de.LockedAttach(X->cmap->N, 1, *x->grid, 0, 0, d, X->cmap->n);
430     El::DiagonalScale(El::RIGHT, El::NORMAL, de, *x->emat);
431     PetscCall(VecRestoreArrayRead(R, (const PetscScalar **)&d));
432   }
433   if (L) {
434     PetscCall(VecGetArrayRead(L, (const PetscScalar **)&d));
435     El::DistMatrix<PetscElemScalar, El::VC, El::STAR> de;
436     de.LockedAttach(X->rmap->N, 1, *x->grid, 0, 0, d, X->rmap->n);
437     El::DiagonalScale(El::LEFT, El::NORMAL, de, *x->emat);
438     PetscCall(VecRestoreArrayRead(L, (const PetscScalar **)&d));
439   }
440   PetscFunctionReturn(0);
441 }
442 
443 static PetscErrorCode MatMissingDiagonal_Elemental(Mat A, PetscBool *missing, PetscInt *d) {
444   PetscFunctionBegin;
445   *missing = PETSC_FALSE;
446   PetscFunctionReturn(0);
447 }
448 
449 static PetscErrorCode MatScale_Elemental(Mat X, PetscScalar a) {
450   Mat_Elemental *x = (Mat_Elemental *)X->data;
451 
452   PetscFunctionBegin;
453   El::Scale((PetscElemScalar)a, *x->emat);
454   PetscFunctionReturn(0);
455 }
456 
457 /*
458   MatAXPY - Computes Y = a*X + Y.
459 */
460 static PetscErrorCode MatAXPY_Elemental(Mat Y, PetscScalar a, Mat X, MatStructure str) {
461   Mat_Elemental *x = (Mat_Elemental *)X->data;
462   Mat_Elemental *y = (Mat_Elemental *)Y->data;
463 
464   PetscFunctionBegin;
465   El::Axpy((PetscElemScalar)a, *x->emat, *y->emat);
466   PetscCall(PetscObjectStateIncrease((PetscObject)Y));
467   PetscFunctionReturn(0);
468 }
469 
470 static PetscErrorCode MatCopy_Elemental(Mat A, Mat B, MatStructure str) {
471   Mat_Elemental *a = (Mat_Elemental *)A->data;
472   Mat_Elemental *b = (Mat_Elemental *)B->data;
473 
474   PetscFunctionBegin;
475   El::Copy(*a->emat, *b->emat);
476   PetscCall(PetscObjectStateIncrease((PetscObject)B));
477   PetscFunctionReturn(0);
478 }
479 
480 static PetscErrorCode MatDuplicate_Elemental(Mat A, MatDuplicateOption op, Mat *B) {
481   Mat            Be;
482   MPI_Comm       comm;
483   Mat_Elemental *a = (Mat_Elemental *)A->data;
484 
485   PetscFunctionBegin;
486   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
487   PetscCall(MatCreate(comm, &Be));
488   PetscCall(MatSetSizes(Be, A->rmap->n, A->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
489   PetscCall(MatSetType(Be, MATELEMENTAL));
490   PetscCall(MatSetUp(Be));
491   *B = Be;
492   if (op == MAT_COPY_VALUES) {
493     Mat_Elemental *b = (Mat_Elemental *)Be->data;
494     El::Copy(*a->emat, *b->emat);
495   }
496   Be->assembled = PETSC_TRUE;
497   PetscFunctionReturn(0);
498 }
499 
500 static PetscErrorCode MatTranspose_Elemental(Mat A, MatReuse reuse, Mat *B) {
501   Mat            Be = *B;
502   MPI_Comm       comm;
503   Mat_Elemental *a = (Mat_Elemental *)A->data, *b;
504 
505   PetscFunctionBegin;
506   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B));
507   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
508   /* Only out-of-place supported */
509   PetscCheck(reuse != MAT_INPLACE_MATRIX, comm, PETSC_ERR_SUP, "Only out-of-place supported");
510   if (reuse == MAT_INITIAL_MATRIX) {
511     PetscCall(MatCreate(comm, &Be));
512     PetscCall(MatSetSizes(Be, A->cmap->n, A->rmap->n, PETSC_DECIDE, PETSC_DECIDE));
513     PetscCall(MatSetType(Be, MATELEMENTAL));
514     PetscCall(MatSetUp(Be));
515     *B = Be;
516   }
517   b = (Mat_Elemental *)Be->data;
518   El::Transpose(*a->emat, *b->emat);
519   Be->assembled = PETSC_TRUE;
520   PetscFunctionReturn(0);
521 }
522 
523 static PetscErrorCode MatConjugate_Elemental(Mat A) {
524   Mat_Elemental *a = (Mat_Elemental *)A->data;
525 
526   PetscFunctionBegin;
527   El::Conjugate(*a->emat);
528   PetscFunctionReturn(0);
529 }
530 
531 static PetscErrorCode MatHermitianTranspose_Elemental(Mat A, MatReuse reuse, Mat *B) {
532   Mat            Be = *B;
533   MPI_Comm       comm;
534   Mat_Elemental *a = (Mat_Elemental *)A->data, *b;
535 
536   PetscFunctionBegin;
537   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
538   /* Only out-of-place supported */
539   if (reuse == MAT_INITIAL_MATRIX) {
540     PetscCall(MatCreate(comm, &Be));
541     PetscCall(MatSetSizes(Be, A->cmap->n, A->rmap->n, PETSC_DECIDE, PETSC_DECIDE));
542     PetscCall(MatSetType(Be, MATELEMENTAL));
543     PetscCall(MatSetUp(Be));
544     *B = Be;
545   }
546   b = (Mat_Elemental *)Be->data;
547   El::Adjoint(*a->emat, *b->emat);
548   Be->assembled = PETSC_TRUE;
549   PetscFunctionReturn(0);
550 }
551 
552 static PetscErrorCode MatSolve_Elemental(Mat A, Vec B, Vec X) {
553   Mat_Elemental   *a = (Mat_Elemental *)A->data;
554   PetscElemScalar *x;
555   PetscInt         pivoting = a->pivoting;
556 
557   PetscFunctionBegin;
558   PetscCall(VecCopy(B, X));
559   PetscCall(VecGetArray(X, (PetscScalar **)&x));
560 
561   El::DistMatrix<PetscElemScalar, El::VC, El::STAR> xe;
562   xe.Attach(A->rmap->N, 1, *a->grid, 0, 0, x, A->rmap->n);
563   El::DistMatrix<PetscElemScalar, El::MC, El::MR> xer(xe);
564   switch (A->factortype) {
565   case MAT_FACTOR_LU:
566     if (pivoting == 0) {
567       El::lu::SolveAfter(El::NORMAL, *a->emat, xer);
568     } else if (pivoting == 1) {
569       El::lu::SolveAfter(El::NORMAL, *a->emat, *a->P, xer);
570     } else { /* pivoting == 2 */
571       El::lu::SolveAfter(El::NORMAL, *a->emat, *a->P, *a->Q, xer);
572     }
573     break;
574   case MAT_FACTOR_CHOLESKY: El::cholesky::SolveAfter(El::UPPER, El::NORMAL, *a->emat, xer); break;
575   default: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unfactored Matrix or Unsupported MatFactorType"); break;
576   }
577   El::Copy(xer, xe);
578 
579   PetscCall(VecRestoreArray(X, (PetscScalar **)&x));
580   PetscFunctionReturn(0);
581 }
582 
583 static PetscErrorCode MatSolveAdd_Elemental(Mat A, Vec B, Vec Y, Vec X) {
584   PetscFunctionBegin;
585   PetscCall(MatSolve_Elemental(A, B, X));
586   PetscCall(VecAXPY(X, 1, Y));
587   PetscFunctionReturn(0);
588 }
589 
590 static PetscErrorCode MatMatSolve_Elemental(Mat A, Mat B, Mat X) {
591   Mat_Elemental *a = (Mat_Elemental *)A->data;
592   Mat_Elemental *x;
593   Mat            C;
594   PetscInt       pivoting = a->pivoting;
595   PetscBool      flg;
596   MatType        type;
597 
598   PetscFunctionBegin;
599   PetscCall(MatGetType(X, &type));
600   PetscCall(PetscStrcmp(type, MATELEMENTAL, &flg));
601   if (!flg) {
602     PetscCall(MatConvert(B, MATELEMENTAL, MAT_INITIAL_MATRIX, &C));
603     x = (Mat_Elemental *)C->data;
604   } else {
605     x = (Mat_Elemental *)X->data;
606     El::Copy(*((Mat_Elemental *)B->data)->emat, *x->emat);
607   }
608   switch (A->factortype) {
609   case MAT_FACTOR_LU:
610     if (pivoting == 0) {
611       El::lu::SolveAfter(El::NORMAL, *a->emat, *x->emat);
612     } else if (pivoting == 1) {
613       El::lu::SolveAfter(El::NORMAL, *a->emat, *a->P, *x->emat);
614     } else {
615       El::lu::SolveAfter(El::NORMAL, *a->emat, *a->P, *a->Q, *x->emat);
616     }
617     break;
618   case MAT_FACTOR_CHOLESKY: El::cholesky::SolveAfter(El::UPPER, El::NORMAL, *a->emat, *x->emat); break;
619   default: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unfactored Matrix or Unsupported MatFactorType"); break;
620   }
621   if (!flg) {
622     PetscCall(MatConvert(C, type, MAT_REUSE_MATRIX, &X));
623     PetscCall(MatDestroy(&C));
624   }
625   PetscFunctionReturn(0);
626 }
627 
628 static PetscErrorCode MatLUFactor_Elemental(Mat A, IS row, IS col, const MatFactorInfo *info) {
629   Mat_Elemental *a        = (Mat_Elemental *)A->data;
630   PetscInt       pivoting = a->pivoting;
631 
632   PetscFunctionBegin;
633   if (pivoting == 0) {
634     El::LU(*a->emat);
635   } else if (pivoting == 1) {
636     El::LU(*a->emat, *a->P);
637   } else {
638     El::LU(*a->emat, *a->P, *a->Q);
639   }
640   A->factortype = MAT_FACTOR_LU;
641   A->assembled  = PETSC_TRUE;
642 
643   PetscCall(PetscFree(A->solvertype));
644   PetscCall(PetscStrallocpy(MATSOLVERELEMENTAL, &A->solvertype));
645   PetscFunctionReturn(0);
646 }
647 
648 static PetscErrorCode MatLUFactorNumeric_Elemental(Mat F, Mat A, const MatFactorInfo *info) {
649   PetscFunctionBegin;
650   PetscCall(MatCopy(A, F, SAME_NONZERO_PATTERN));
651   PetscCall(MatLUFactor_Elemental(F, 0, 0, info));
652   PetscFunctionReturn(0);
653 }
654 
655 static PetscErrorCode MatLUFactorSymbolic_Elemental(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info) {
656   PetscFunctionBegin;
657   /* F is created and allocated by MatGetFactor_elemental_petsc(), skip this routine. */
658   PetscFunctionReturn(0);
659 }
660 
661 static PetscErrorCode MatCholeskyFactor_Elemental(Mat A, IS perm, const MatFactorInfo *info) {
662   Mat_Elemental                                    *a = (Mat_Elemental *)A->data;
663   El::DistMatrix<PetscElemScalar, El::MC, El::STAR> d;
664 
665   PetscFunctionBegin;
666   El::Cholesky(El::UPPER, *a->emat);
667   A->factortype = MAT_FACTOR_CHOLESKY;
668   A->assembled  = PETSC_TRUE;
669 
670   PetscCall(PetscFree(A->solvertype));
671   PetscCall(PetscStrallocpy(MATSOLVERELEMENTAL, &A->solvertype));
672   PetscFunctionReturn(0);
673 }
674 
675 static PetscErrorCode MatCholeskyFactorNumeric_Elemental(Mat F, Mat A, const MatFactorInfo *info) {
676   PetscFunctionBegin;
677   PetscCall(MatCopy(A, F, SAME_NONZERO_PATTERN));
678   PetscCall(MatCholeskyFactor_Elemental(F, 0, info));
679   PetscFunctionReturn(0);
680 }
681 
682 static PetscErrorCode MatCholeskyFactorSymbolic_Elemental(Mat F, Mat A, IS perm, const MatFactorInfo *info) {
683   PetscFunctionBegin;
684   /* F is created and allocated by MatGetFactor_elemental_petsc(), skip this routine. */
685   PetscFunctionReturn(0);
686 }
687 
688 PetscErrorCode MatFactorGetSolverType_elemental_elemental(Mat A, MatSolverType *type) {
689   PetscFunctionBegin;
690   *type = MATSOLVERELEMENTAL;
691   PetscFunctionReturn(0);
692 }
693 
694 static PetscErrorCode MatGetFactor_elemental_elemental(Mat A, MatFactorType ftype, Mat *F) {
695   Mat B;
696 
697   PetscFunctionBegin;
698   /* Create the factorization matrix */
699   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
700   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
701   PetscCall(MatSetType(B, MATELEMENTAL));
702   PetscCall(MatSetUp(B));
703   B->factortype      = ftype;
704   B->trivialsymbolic = PETSC_TRUE;
705   PetscCall(PetscFree(B->solvertype));
706   PetscCall(PetscStrallocpy(MATSOLVERELEMENTAL, &B->solvertype));
707 
708   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_elemental_elemental));
709   *F = B;
710   PetscFunctionReturn(0);
711 }
712 
713 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_Elemental(void) {
714   PetscFunctionBegin;
715   PetscCall(MatSolverTypeRegister(MATSOLVERELEMENTAL, MATELEMENTAL, MAT_FACTOR_LU, MatGetFactor_elemental_elemental));
716   PetscCall(MatSolverTypeRegister(MATSOLVERELEMENTAL, MATELEMENTAL, MAT_FACTOR_CHOLESKY, MatGetFactor_elemental_elemental));
717   PetscFunctionReturn(0);
718 }
719 
720 static PetscErrorCode MatNorm_Elemental(Mat A, NormType type, PetscReal *nrm) {
721   Mat_Elemental *a = (Mat_Elemental *)A->data;
722 
723   PetscFunctionBegin;
724   switch (type) {
725   case NORM_1: *nrm = El::OneNorm(*a->emat); break;
726   case NORM_FROBENIUS: *nrm = El::FrobeniusNorm(*a->emat); break;
727   case NORM_INFINITY: *nrm = El::InfinityNorm(*a->emat); break;
728   default: SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Unsupported norm type");
729   }
730   PetscFunctionReturn(0);
731 }
732 
733 static PetscErrorCode MatZeroEntries_Elemental(Mat A) {
734   Mat_Elemental *a = (Mat_Elemental *)A->data;
735 
736   PetscFunctionBegin;
737   El::Zero(*a->emat);
738   PetscFunctionReturn(0);
739 }
740 
741 static PetscErrorCode MatGetOwnershipIS_Elemental(Mat A, IS *rows, IS *cols) {
742   Mat_Elemental *a = (Mat_Elemental *)A->data;
743   PetscInt       i, m, shift, stride, *idx;
744 
745   PetscFunctionBegin;
746   if (rows) {
747     m      = a->emat->LocalHeight();
748     shift  = a->emat->ColShift();
749     stride = a->emat->ColStride();
750     PetscCall(PetscMalloc1(m, &idx));
751     for (i = 0; i < m; i++) {
752       PetscInt rank, offset;
753       E2RO(A, 0, shift + i * stride, &rank, &offset);
754       RO2P(A, 0, rank, offset, &idx[i]);
755     }
756     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, rows));
757   }
758   if (cols) {
759     m      = a->emat->LocalWidth();
760     shift  = a->emat->RowShift();
761     stride = a->emat->RowStride();
762     PetscCall(PetscMalloc1(m, &idx));
763     for (i = 0; i < m; i++) {
764       PetscInt rank, offset;
765       E2RO(A, 1, shift + i * stride, &rank, &offset);
766       RO2P(A, 1, rank, offset, &idx[i]);
767     }
768     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, cols));
769   }
770   PetscFunctionReturn(0);
771 }
772 
773 static PetscErrorCode MatConvert_Elemental_Dense(Mat A, MatType newtype, MatReuse reuse, Mat *B) {
774   Mat             Bmpi;
775   Mat_Elemental  *a = (Mat_Elemental *)A->data;
776   MPI_Comm        comm;
777   IS              isrows, iscols;
778   PetscInt        rrank, ridx, crank, cidx, nrows, ncols, i, j, erow, ecol, elrow, elcol;
779   const PetscInt *rows, *cols;
780   PetscElemScalar v;
781   const El::Grid &grid = a->emat->Grid();
782 
783   PetscFunctionBegin;
784   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
785 
786   if (reuse == MAT_REUSE_MATRIX) {
787     Bmpi = *B;
788   } else {
789     PetscCall(MatCreate(comm, &Bmpi));
790     PetscCall(MatSetSizes(Bmpi, A->rmap->n, A->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
791     PetscCall(MatSetType(Bmpi, MATDENSE));
792     PetscCall(MatSetUp(Bmpi));
793   }
794 
795   /* Get local entries of A */
796   PetscCall(MatGetOwnershipIS(A, &isrows, &iscols));
797   PetscCall(ISGetLocalSize(isrows, &nrows));
798   PetscCall(ISGetIndices(isrows, &rows));
799   PetscCall(ISGetLocalSize(iscols, &ncols));
800   PetscCall(ISGetIndices(iscols, &cols));
801 
802   if (a->roworiented) {
803     for (i = 0; i < nrows; i++) {
804       P2RO(A, 0, rows[i], &rrank, &ridx); /* convert indices between PETSc <-> (Rank,Offset) <-> Elemental */
805       RO2E(A, 0, rrank, ridx, &erow);
806       PetscCheck(rrank >= 0 && ridx >= 0 && erow >= 0, comm, PETSC_ERR_PLIB, "Incorrect row translation");
807       for (j = 0; j < ncols; j++) {
808         P2RO(A, 1, cols[j], &crank, &cidx);
809         RO2E(A, 1, crank, cidx, &ecol);
810         PetscCheck(crank >= 0 && cidx >= 0 && ecol >= 0, comm, PETSC_ERR_PLIB, "Incorrect col translation");
811 
812         elrow = erow / grid.MCSize(); /* Elemental local row index */
813         elcol = ecol / grid.MRSize(); /* Elemental local column index */
814         v     = a->emat->GetLocal(elrow, elcol);
815         PetscCall(MatSetValues(Bmpi, 1, &rows[i], 1, &cols[j], (PetscScalar *)&v, INSERT_VALUES));
816       }
817     }
818   } else { /* column-oriented */
819     for (j = 0; j < ncols; j++) {
820       P2RO(A, 1, cols[j], &crank, &cidx);
821       RO2E(A, 1, crank, cidx, &ecol);
822       PetscCheck(crank >= 0 && cidx >= 0 && ecol >= 0, comm, PETSC_ERR_PLIB, "Incorrect col translation");
823       for (i = 0; i < nrows; i++) {
824         P2RO(A, 0, rows[i], &rrank, &ridx); /* convert indices between PETSc <-> (Rank,Offset) <-> Elemental */
825         RO2E(A, 0, rrank, ridx, &erow);
826         PetscCheck(rrank >= 0 && ridx >= 0 && erow >= 0, comm, PETSC_ERR_PLIB, "Incorrect row translation");
827 
828         elrow = erow / grid.MCSize(); /* Elemental local row index */
829         elcol = ecol / grid.MRSize(); /* Elemental local column index */
830         v     = a->emat->GetLocal(elrow, elcol);
831         PetscCall(MatSetValues(Bmpi, 1, &rows[i], 1, &cols[j], (PetscScalar *)&v, INSERT_VALUES));
832       }
833     }
834   }
835   PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY));
836   PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY));
837   if (reuse == MAT_INPLACE_MATRIX) {
838     PetscCall(MatHeaderReplace(A, &Bmpi));
839   } else {
840     *B = Bmpi;
841   }
842   PetscCall(ISDestroy(&isrows));
843   PetscCall(ISDestroy(&iscols));
844   PetscFunctionReturn(0);
845 }
846 
847 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) {
848   Mat                mat_elemental;
849   PetscInt           M = A->rmap->N, N = A->cmap->N, row, ncols;
850   const PetscInt    *cols;
851   const PetscScalar *vals;
852 
853   PetscFunctionBegin;
854   if (reuse == MAT_REUSE_MATRIX) {
855     mat_elemental = *newmat;
856     PetscCall(MatZeroEntries(mat_elemental));
857   } else {
858     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental));
859     PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, M, N));
860     PetscCall(MatSetType(mat_elemental, MATELEMENTAL));
861     PetscCall(MatSetUp(mat_elemental));
862   }
863   for (row = 0; row < M; row++) {
864     PetscCall(MatGetRow(A, row, &ncols, &cols, &vals));
865     /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
866     PetscCall(MatSetValues(mat_elemental, 1, &row, ncols, cols, vals, ADD_VALUES));
867     PetscCall(MatRestoreRow(A, row, &ncols, &cols, &vals));
868   }
869   PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY));
870   PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY));
871 
872   if (reuse == MAT_INPLACE_MATRIX) {
873     PetscCall(MatHeaderReplace(A, &mat_elemental));
874   } else {
875     *newmat = mat_elemental;
876   }
877   PetscFunctionReturn(0);
878 }
879 
880 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) {
881   Mat                mat_elemental;
882   PetscInt           row, ncols, rstart = A->rmap->rstart, rend = A->rmap->rend, j;
883   const PetscInt    *cols;
884   const PetscScalar *vals;
885 
886   PetscFunctionBegin;
887   if (reuse == MAT_REUSE_MATRIX) {
888     mat_elemental = *newmat;
889     PetscCall(MatZeroEntries(mat_elemental));
890   } else {
891     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental));
892     PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, A->rmap->N, A->cmap->N));
893     PetscCall(MatSetType(mat_elemental, MATELEMENTAL));
894     PetscCall(MatSetUp(mat_elemental));
895   }
896   for (row = rstart; row < rend; row++) {
897     PetscCall(MatGetRow(A, row, &ncols, &cols, &vals));
898     for (j = 0; j < ncols; j++) {
899       /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
900       PetscCall(MatSetValues(mat_elemental, 1, &row, 1, &cols[j], &vals[j], ADD_VALUES));
901     }
902     PetscCall(MatRestoreRow(A, row, &ncols, &cols, &vals));
903   }
904   PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY));
905   PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY));
906 
907   if (reuse == MAT_INPLACE_MATRIX) {
908     PetscCall(MatHeaderReplace(A, &mat_elemental));
909   } else {
910     *newmat = mat_elemental;
911   }
912   PetscFunctionReturn(0);
913 }
914 
915 PETSC_INTERN PetscErrorCode MatConvert_SeqSBAIJ_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) {
916   Mat                mat_elemental;
917   PetscInt           M = A->rmap->N, N = A->cmap->N, row, ncols, j;
918   const PetscInt    *cols;
919   const PetscScalar *vals;
920 
921   PetscFunctionBegin;
922   if (reuse == MAT_REUSE_MATRIX) {
923     mat_elemental = *newmat;
924     PetscCall(MatZeroEntries(mat_elemental));
925   } else {
926     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental));
927     PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, M, N));
928     PetscCall(MatSetType(mat_elemental, MATELEMENTAL));
929     PetscCall(MatSetUp(mat_elemental));
930   }
931   PetscCall(MatGetRowUpperTriangular(A));
932   for (row = 0; row < M; row++) {
933     PetscCall(MatGetRow(A, row, &ncols, &cols, &vals));
934     /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
935     PetscCall(MatSetValues(mat_elemental, 1, &row, ncols, cols, vals, ADD_VALUES));
936     for (j = 0; j < ncols; j++) { /* lower triangular part */
937       PetscScalar v;
938       if (cols[j] == row) continue;
939       v = A->hermitian == PETSC_BOOL3_TRUE ? PetscConj(vals[j]) : vals[j];
940       PetscCall(MatSetValues(mat_elemental, 1, &cols[j], 1, &row, &v, ADD_VALUES));
941     }
942     PetscCall(MatRestoreRow(A, row, &ncols, &cols, &vals));
943   }
944   PetscCall(MatRestoreRowUpperTriangular(A));
945   PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY));
946   PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY));
947 
948   if (reuse == MAT_INPLACE_MATRIX) {
949     PetscCall(MatHeaderReplace(A, &mat_elemental));
950   } else {
951     *newmat = mat_elemental;
952   }
953   PetscFunctionReturn(0);
954 }
955 
956 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) {
957   Mat                mat_elemental;
958   PetscInt           M = A->rmap->N, N = A->cmap->N, row, ncols, j, rstart = A->rmap->rstart, rend = A->rmap->rend;
959   const PetscInt    *cols;
960   const PetscScalar *vals;
961 
962   PetscFunctionBegin;
963   if (reuse == MAT_REUSE_MATRIX) {
964     mat_elemental = *newmat;
965     PetscCall(MatZeroEntries(mat_elemental));
966   } else {
967     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental));
968     PetscCall(MatSetSizes(mat_elemental, PETSC_DECIDE, PETSC_DECIDE, M, N));
969     PetscCall(MatSetType(mat_elemental, MATELEMENTAL));
970     PetscCall(MatSetUp(mat_elemental));
971   }
972   PetscCall(MatGetRowUpperTriangular(A));
973   for (row = rstart; row < rend; row++) {
974     PetscCall(MatGetRow(A, row, &ncols, &cols, &vals));
975     /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
976     PetscCall(MatSetValues(mat_elemental, 1, &row, ncols, cols, vals, ADD_VALUES));
977     for (j = 0; j < ncols; j++) { /* lower triangular part */
978       PetscScalar v;
979       if (cols[j] == row) continue;
980       v = A->hermitian == PETSC_BOOL3_TRUE ? PetscConj(vals[j]) : vals[j];
981       PetscCall(MatSetValues(mat_elemental, 1, &cols[j], 1, &row, &v, ADD_VALUES));
982     }
983     PetscCall(MatRestoreRow(A, row, &ncols, &cols, &vals));
984   }
985   PetscCall(MatRestoreRowUpperTriangular(A));
986   PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY));
987   PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY));
988 
989   if (reuse == MAT_INPLACE_MATRIX) {
990     PetscCall(MatHeaderReplace(A, &mat_elemental));
991   } else {
992     *newmat = mat_elemental;
993   }
994   PetscFunctionReturn(0);
995 }
996 
997 static PetscErrorCode MatDestroy_Elemental(Mat A) {
998   Mat_Elemental      *a = (Mat_Elemental *)A->data;
999   Mat_Elemental_Grid *commgrid;
1000   PetscBool           flg;
1001   MPI_Comm            icomm;
1002 
1003   PetscFunctionBegin;
1004   delete a->emat;
1005   delete a->P;
1006   delete a->Q;
1007 
1008   El::mpi::Comm cxxcomm(PetscObjectComm((PetscObject)A));
1009   PetscCall(PetscCommDuplicate(cxxcomm.comm, &icomm, NULL));
1010   PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_Elemental_keyval, (void **)&commgrid, (int *)&flg));
1011   if (--commgrid->grid_refct == 0) {
1012     delete commgrid->grid;
1013     PetscCall(PetscFree(commgrid));
1014     PetscCallMPI(MPI_Comm_free_keyval(&Petsc_Elemental_keyval));
1015   }
1016   PetscCall(PetscCommDestroy(&icomm));
1017   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetOwnershipIS_C", NULL));
1018   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL));
1019   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_elemental_mpidense_C", NULL));
1020   PetscCall(PetscFree(A->data));
1021   PetscFunctionReturn(0);
1022 }
1023 
1024 PetscErrorCode MatSetUp_Elemental(Mat A) {
1025   Mat_Elemental *a = (Mat_Elemental *)A->data;
1026   MPI_Comm       comm;
1027   PetscMPIInt    rsize, csize;
1028   PetscInt       n;
1029 
1030   PetscFunctionBegin;
1031   PetscCall(PetscLayoutSetUp(A->rmap));
1032   PetscCall(PetscLayoutSetUp(A->cmap));
1033 
1034   /* Check if local row and column sizes are equally distributed.
1035      Jed: Elemental uses "element" cyclic ordering so the sizes need to match that
1036      exactly.  The strategy in MatElemental is for PETSc to implicitly permute to block ordering (like would be returned by
1037      PetscSplitOwnership(comm,&n,&N), at which point Elemental matrices can act on PETSc vectors without redistributing the vectors. */
1038   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1039   n = PETSC_DECIDE;
1040   PetscCall(PetscSplitOwnership(comm, &n, &A->rmap->N));
1041   PetscCheck(n == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Local row size %" PetscInt_FMT " of ELEMENTAL matrix must be equally distributed", A->rmap->n);
1042 
1043   n = PETSC_DECIDE;
1044   PetscCall(PetscSplitOwnership(comm, &n, &A->cmap->N));
1045   PetscCheck(n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Local column size %" PetscInt_FMT " of ELEMENTAL matrix must be equally distributed", A->cmap->n);
1046 
1047   a->emat->Resize(A->rmap->N, A->cmap->N);
1048   El::Zero(*a->emat);
1049 
1050   PetscCallMPI(MPI_Comm_size(A->rmap->comm, &rsize));
1051   PetscCallMPI(MPI_Comm_size(A->cmap->comm, &csize));
1052   PetscCheck(csize == rsize, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_INCOMP, "Cannot use row and column communicators of different sizes");
1053   a->commsize = rsize;
1054   a->mr[0]    = A->rmap->N % rsize;
1055   if (!a->mr[0]) a->mr[0] = rsize;
1056   a->mr[1] = A->cmap->N % csize;
1057   if (!a->mr[1]) a->mr[1] = csize;
1058   a->m[0] = A->rmap->N / rsize + (a->mr[0] != rsize);
1059   a->m[1] = A->cmap->N / csize + (a->mr[1] != csize);
1060   PetscFunctionReturn(0);
1061 }
1062 
1063 PetscErrorCode MatAssemblyBegin_Elemental(Mat A, MatAssemblyType type) {
1064   Mat_Elemental *a = (Mat_Elemental *)A->data;
1065 
1066   PetscFunctionBegin;
1067   /* printf("Calling ProcessQueues\n"); */
1068   a->emat->ProcessQueues();
1069   /* printf("Finished ProcessQueues\n"); */
1070   PetscFunctionReturn(0);
1071 }
1072 
1073 PetscErrorCode MatAssemblyEnd_Elemental(Mat A, MatAssemblyType type) {
1074   PetscFunctionBegin;
1075   /* Currently does nothing */
1076   PetscFunctionReturn(0);
1077 }
1078 
1079 PetscErrorCode MatLoad_Elemental(Mat newMat, PetscViewer viewer) {
1080   Mat      Adense, Ae;
1081   MPI_Comm comm;
1082 
1083   PetscFunctionBegin;
1084   PetscCall(PetscObjectGetComm((PetscObject)newMat, &comm));
1085   PetscCall(MatCreate(comm, &Adense));
1086   PetscCall(MatSetType(Adense, MATDENSE));
1087   PetscCall(MatLoad(Adense, viewer));
1088   PetscCall(MatConvert(Adense, MATELEMENTAL, MAT_INITIAL_MATRIX, &Ae));
1089   PetscCall(MatDestroy(&Adense));
1090   PetscCall(MatHeaderReplace(newMat, &Ae));
1091   PetscFunctionReturn(0);
1092 }
1093 
1094 /* -------------------------------------------------------------------*/
1095 static struct _MatOps MatOps_Values = {MatSetValues_Elemental,
1096                                        0,
1097                                        0,
1098                                        MatMult_Elemental,
1099                                        /* 4*/ MatMultAdd_Elemental,
1100                                        MatMultTranspose_Elemental,
1101                                        MatMultTransposeAdd_Elemental,
1102                                        MatSolve_Elemental,
1103                                        MatSolveAdd_Elemental,
1104                                        0,
1105                                        /*10*/ 0,
1106                                        MatLUFactor_Elemental,
1107                                        MatCholeskyFactor_Elemental,
1108                                        0,
1109                                        MatTranspose_Elemental,
1110                                        /*15*/ MatGetInfo_Elemental,
1111                                        0,
1112                                        MatGetDiagonal_Elemental,
1113                                        MatDiagonalScale_Elemental,
1114                                        MatNorm_Elemental,
1115                                        /*20*/ MatAssemblyBegin_Elemental,
1116                                        MatAssemblyEnd_Elemental,
1117                                        MatSetOption_Elemental,
1118                                        MatZeroEntries_Elemental,
1119                                        /*24*/ 0,
1120                                        MatLUFactorSymbolic_Elemental,
1121                                        MatLUFactorNumeric_Elemental,
1122                                        MatCholeskyFactorSymbolic_Elemental,
1123                                        MatCholeskyFactorNumeric_Elemental,
1124                                        /*29*/ MatSetUp_Elemental,
1125                                        0,
1126                                        0,
1127                                        0,
1128                                        0,
1129                                        /*34*/ MatDuplicate_Elemental,
1130                                        0,
1131                                        0,
1132                                        0,
1133                                        0,
1134                                        /*39*/ MatAXPY_Elemental,
1135                                        0,
1136                                        0,
1137                                        0,
1138                                        MatCopy_Elemental,
1139                                        /*44*/ 0,
1140                                        MatScale_Elemental,
1141                                        MatShift_Basic,
1142                                        0,
1143                                        0,
1144                                        /*49*/ 0,
1145                                        0,
1146                                        0,
1147                                        0,
1148                                        0,
1149                                        /*54*/ 0,
1150                                        0,
1151                                        0,
1152                                        0,
1153                                        0,
1154                                        /*59*/ 0,
1155                                        MatDestroy_Elemental,
1156                                        MatView_Elemental,
1157                                        0,
1158                                        0,
1159                                        /*64*/ 0,
1160                                        0,
1161                                        0,
1162                                        0,
1163                                        0,
1164                                        /*69*/ 0,
1165                                        0,
1166                                        MatConvert_Elemental_Dense,
1167                                        0,
1168                                        0,
1169                                        /*74*/ 0,
1170                                        0,
1171                                        0,
1172                                        0,
1173                                        0,
1174                                        /*79*/ 0,
1175                                        0,
1176                                        0,
1177                                        0,
1178                                        MatLoad_Elemental,
1179                                        /*84*/ 0,
1180                                        0,
1181                                        0,
1182                                        0,
1183                                        0,
1184                                        /*89*/ 0,
1185                                        0,
1186                                        MatMatMultNumeric_Elemental,
1187                                        0,
1188                                        0,
1189                                        /*94*/ 0,
1190                                        0,
1191                                        0,
1192                                        MatMatTransposeMultNumeric_Elemental,
1193                                        0,
1194                                        /*99*/ MatProductSetFromOptions_Elemental,
1195                                        0,
1196                                        0,
1197                                        MatConjugate_Elemental,
1198                                        0,
1199                                        /*104*/ 0,
1200                                        0,
1201                                        0,
1202                                        0,
1203                                        0,
1204                                        /*109*/ MatMatSolve_Elemental,
1205                                        0,
1206                                        0,
1207                                        0,
1208                                        MatMissingDiagonal_Elemental,
1209                                        /*114*/ 0,
1210                                        0,
1211                                        0,
1212                                        0,
1213                                        0,
1214                                        /*119*/ 0,
1215                                        MatHermitianTranspose_Elemental,
1216                                        0,
1217                                        0,
1218                                        0,
1219                                        /*124*/ 0,
1220                                        0,
1221                                        0,
1222                                        0,
1223                                        0,
1224                                        /*129*/ 0,
1225                                        0,
1226                                        0,
1227                                        0,
1228                                        0,
1229                                        /*134*/ 0,
1230                                        0,
1231                                        0,
1232                                        0,
1233                                        0,
1234                                        0,
1235                                        /*140*/ 0,
1236                                        0,
1237                                        0,
1238                                        0,
1239                                        0,
1240                                        /*145*/ 0,
1241                                        0,
1242                                        0,
1243                                        0,
1244                                        0,
1245                                        /*150*/ 0};
1246 
1247 /*MC
1248    MATELEMENTAL = "elemental" - A matrix type for dense matrices using the Elemental package
1249 
1250   Use ./configure --download-elemental to install PETSc to use Elemental
1251 
1252    Options Database Keys:
1253 + -mat_type elemental - sets the matrix type to "elemental" during a call to MatSetFromOptions()
1254 . -pc_factor_mat_solver_type elemental - to use this direct solver with the option -pc_type lu
1255 - -mat_elemental_grid_height - sets Grid Height for 2D cyclic ordering of internal matrix
1256 
1257   Level: beginner
1258 
1259   Note:
1260    Note unlike most matrix formats, this format does not store all the matrix entries for a contiguous
1261    range of rows on an MPI rank. Use `MatGetOwnershipIS()` to determine what values are stored on
1262    the given rank.
1263 
1264 .seealso: `MATDENSE`, `MATSCALAPACK`, `MatGetOwnershipIS()`
1265 M*/
1266 
1267 PETSC_EXTERN PetscErrorCode MatCreate_Elemental(Mat A) {
1268   Mat_Elemental      *a;
1269   PetscBool           flg, flg1;
1270   Mat_Elemental_Grid *commgrid;
1271   MPI_Comm            icomm;
1272   PetscInt            optv1;
1273 
1274   PetscFunctionBegin;
1275   PetscCall(PetscMemcpy(A->ops, &MatOps_Values, sizeof(struct _MatOps)));
1276   A->insertmode = NOT_SET_VALUES;
1277 
1278   PetscCall(PetscNewLog(A, &a));
1279   A->data = (void *)a;
1280 
1281   /* Set up the elemental matrix */
1282   El::mpi::Comm cxxcomm(PetscObjectComm((PetscObject)A));
1283 
1284   /* Grid needs to be shared between multiple Mats on the same communicator, implement by attribute caching on the MPI_Comm */
1285   if (Petsc_Elemental_keyval == MPI_KEYVAL_INVALID) {
1286     PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN, MPI_COMM_NULL_DELETE_FN, &Petsc_Elemental_keyval, (void *)0));
1287     PetscCall(PetscCitationsRegister(ElementalCitation, &ElementalCite));
1288   }
1289   PetscCall(PetscCommDuplicate(cxxcomm.comm, &icomm, NULL));
1290   PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_Elemental_keyval, (void **)&commgrid, (int *)&flg));
1291   if (!flg) {
1292     PetscCall(PetscNewLog(A, &commgrid));
1293 
1294     PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "Elemental Options", "Mat");
1295     /* displayed default grid sizes (CommSize,1) are set by us arbitrarily until El::Grid() is called */
1296     PetscCall(PetscOptionsInt("-mat_elemental_grid_height", "Grid Height", "None", El::mpi::Size(cxxcomm), &optv1, &flg1));
1297     if (flg1) {
1298       PetscCheck((El::mpi::Size(cxxcomm) % optv1) == 0, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_INCOMP, "Grid Height %" PetscInt_FMT " must evenly divide CommSize %" PetscInt_FMT, optv1, (PetscInt)El::mpi::Size(cxxcomm));
1299       commgrid->grid = new El::Grid(cxxcomm, optv1); /* use user-provided grid height */
1300     } else {
1301       commgrid->grid = new El::Grid(cxxcomm); /* use Elemental default grid sizes */
1302       /* printf("new commgrid->grid = %p\n",commgrid->grid);  -- memory leak revealed by valgrind? */
1303     }
1304     commgrid->grid_refct = 1;
1305     PetscCallMPI(MPI_Comm_set_attr(icomm, Petsc_Elemental_keyval, (void *)commgrid));
1306 
1307     a->pivoting = 1;
1308     PetscCall(PetscOptionsInt("-mat_elemental_pivoting", "Pivoting", "None", a->pivoting, &a->pivoting, NULL));
1309 
1310     PetscOptionsEnd();
1311   } else {
1312     commgrid->grid_refct++;
1313   }
1314   PetscCall(PetscCommDestroy(&icomm));
1315   a->grid        = commgrid->grid;
1316   a->emat        = new El::DistMatrix<PetscElemScalar>(*a->grid);
1317   a->roworiented = PETSC_TRUE;
1318 
1319   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetOwnershipIS_C", MatGetOwnershipIS_Elemental));
1320   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_elemental_mpidense_C", MatProductSetFromOptions_Elemental_MPIDense));
1321   PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATELEMENTAL));
1322   PetscFunctionReturn(0);
1323 }
1324