xref: /petsc/src/mat/impls/elemental/matelem.cxx (revision fdf6c4e30aafdbc795e4f76379caa977fd5cdf5a)
1 #include <petsc/private/petscelemental.h>
2 
3 const char ElementalCitation[] = "@Article{Elemental2012,\n"
4 "  author  = {Jack Poulson and Bryan Marker and Jeff R. Hammond and Nichols A. Romero and Robert {v}an~{d}e~{G}eijn},\n"
5 "  title   = {Elemental: A New Framework for Distributed Memory Dense Matrix Computations},\n"
6 "  journal = {{ACM} Transactions on Mathematical Software},\n"
7 "  volume  = {39},\n"
8 "  number  = {2},\n"
9 "  year    = {2013}\n"
10 "}\n";
11 static PetscBool ElementalCite = PETSC_FALSE;
12 
13 /*
14     The variable Petsc_Elemental_keyval is used to indicate an MPI attribute that
15   is attached to a communicator, in this case the attribute is a Mat_Elemental_Grid
16 */
17 static PetscMPIInt Petsc_Elemental_keyval = MPI_KEYVAL_INVALID;
18 
19 static PetscErrorCode MatView_Elemental(Mat A,PetscViewer viewer)
20 {
21   Mat_Elemental  *a = (Mat_Elemental*)A->data;
22   PetscBool      iascii;
23 
24   PetscFunctionBegin;
25   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
26   if (iascii) {
27     PetscViewerFormat format;
28     PetscCall(PetscViewerGetFormat(viewer,&format));
29     if (format == PETSC_VIEWER_ASCII_INFO) {
30       /* call elemental viewing function */
31       PetscCall(PetscViewerASCIIPrintf(viewer,"Elemental run parameters:\n"));
32       PetscCall(PetscViewerASCIIPrintf(viewer,"  allocated entries=%zu\n",(*a->emat).AllocatedMemory()));
33       PetscCall(PetscViewerASCIIPrintf(viewer,"  grid height=%d, grid width=%d\n",(*a->emat).Grid().Height(),(*a->emat).Grid().Width()));
34       if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
35         /* call elemental viewing function */
36         PetscCall(PetscPrintf(PetscObjectComm((PetscObject)viewer),"test matview_elemental 2\n"));
37       }
38 
39     } else if (format == PETSC_VIEWER_DEFAULT) {
40       PetscCall(PetscViewerASCIIUseTabs(viewer,PETSC_FALSE));
41       El::Print( *a->emat, "Elemental matrix (cyclic ordering)");
42       PetscCall(PetscViewerASCIIUseTabs(viewer,PETSC_TRUE));
43       if (A->factortype == MAT_FACTOR_NONE) {
44         Mat Adense;
45         PetscCall(MatConvert(A,MATDENSE,MAT_INITIAL_MATRIX,&Adense));
46         PetscCall(MatView(Adense,viewer));
47         PetscCall(MatDestroy(&Adense));
48       }
49     } else SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Format");
50   } else {
51     /* convert to dense format and call MatView() */
52     Mat Adense;
53     PetscCall(MatConvert(A,MATDENSE,MAT_INITIAL_MATRIX,&Adense));
54     PetscCall(MatView(Adense,viewer));
55     PetscCall(MatDestroy(&Adense));
56   }
57   PetscFunctionReturn(0);
58 }
59 
60 static PetscErrorCode MatGetInfo_Elemental(Mat A,MatInfoType flag,MatInfo *info)
61 {
62   Mat_Elemental  *a = (Mat_Elemental*)A->data;
63 
64   PetscFunctionBegin;
65   info->block_size = 1.0;
66 
67   if (flag == MAT_LOCAL) {
68     info->nz_allocated   = (*a->emat).AllocatedMemory(); /* locally allocated */
69     info->nz_used        = info->nz_allocated;
70   } else if (flag == MAT_GLOBAL_MAX) {
71     //PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin)));
72     /* see MatGetInfo_MPIAIJ() for getting global info->nz_allocated! */
73     //SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP," MAT_GLOBAL_MAX not written yet");
74   } else if (flag == MAT_GLOBAL_SUM) {
75     //SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP," MAT_GLOBAL_SUM not written yet");
76     info->nz_allocated   = (*a->emat).AllocatedMemory(); /* locally allocated */
77     info->nz_used        = info->nz_allocated; /* assume Elemental does accurate allocation */
78     //PetscCall(MPIU_Allreduce(isend,irecv,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A)));
79     //PetscPrintf(PETSC_COMM_SELF,"    ... [%d] locally allocated %g\n",rank,info->nz_allocated);
80   }
81 
82   info->nz_unneeded       = 0.0;
83   info->assemblies        = A->num_ass;
84   info->mallocs           = 0;
85   info->memory            = ((PetscObject)A)->mem;
86   info->fill_ratio_given  = 0; /* determined by Elemental */
87   info->fill_ratio_needed = 0;
88   info->factor_mallocs    = 0;
89   PetscFunctionReturn(0);
90 }
91 
92 PetscErrorCode MatSetOption_Elemental(Mat A,MatOption op,PetscBool flg)
93 {
94   Mat_Elemental  *a = (Mat_Elemental*)A->data;
95 
96   PetscFunctionBegin;
97   switch (op) {
98   case MAT_NEW_NONZERO_LOCATIONS:
99   case MAT_NEW_NONZERO_LOCATION_ERR:
100   case MAT_NEW_NONZERO_ALLOCATION_ERR:
101   case MAT_SYMMETRIC:
102   case MAT_SORTED_FULL:
103   case MAT_HERMITIAN:
104     break;
105   case MAT_ROW_ORIENTED:
106     a->roworiented = flg;
107     break;
108   default:
109     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
110   }
111   PetscFunctionReturn(0);
112 }
113 
114 static PetscErrorCode MatSetValues_Elemental(Mat A,PetscInt nr,const PetscInt *rows,PetscInt nc,const PetscInt *cols,const PetscScalar *vals,InsertMode imode)
115 {
116   Mat_Elemental  *a = (Mat_Elemental*)A->data;
117   PetscInt       i,j,rrank,ridx,crank,cidx,erow,ecol,numQueues=0;
118 
119   PetscFunctionBegin;
120   // TODO: Initialize matrix to all zeros?
121 
122   // Count the number of queues from this process
123   if (a->roworiented) {
124     for (i=0; i<nr; i++) {
125       if (rows[i] < 0) continue;
126       P2RO(A,0,rows[i],&rrank,&ridx);
127       RO2E(A,0,rrank,ridx,&erow);
128       PetscCheck(rrank >= 0 && ridx >= 0 && erow >= 0,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Incorrect row translation");
129       for (j=0; j<nc; j++) {
130         if (cols[j] < 0) continue;
131         P2RO(A,1,cols[j],&crank,&cidx);
132         RO2E(A,1,crank,cidx,&ecol);
133         PetscCheck(crank >= 0 && cidx >= 0 && ecol >= 0,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Incorrect col translation");
134         if (!a->emat->IsLocal(erow,ecol)) { /* off-proc entry */
135           /* printf("Will later remotely update (%d,%d)\n",erow,ecol); */
136           PetscCheck(imode == ADD_VALUES,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only ADD_VALUES to off-processor entry is supported");
137           ++numQueues;
138           continue;
139         }
140         /* printf("Locally updating (%d,%d)\n",erow,ecol); */
141         switch (imode) {
142         case INSERT_VALUES: a->emat->Set(erow,ecol,(PetscElemScalar)vals[i*nc+j]); break;
143         case ADD_VALUES: a->emat->Update(erow,ecol,(PetscElemScalar)vals[i*nc+j]); break;
144         default: SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for InsertMode %d",(int)imode);
145         }
146       }
147     }
148 
149     /* printf("numQueues=%d\n",numQueues); */
150     a->emat->Reserve( numQueues);
151     for (i=0; i<nr; i++) {
152       if (rows[i] < 0) continue;
153       P2RO(A,0,rows[i],&rrank,&ridx);
154       RO2E(A,0,rrank,ridx,&erow);
155       for (j=0; j<nc; j++) {
156         if (cols[j] < 0) continue;
157         P2RO(A,1,cols[j],&crank,&cidx);
158         RO2E(A,1,crank,cidx,&ecol);
159         if (!a->emat->IsLocal(erow,ecol)) { /*off-proc entry*/
160           /* printf("Queueing remotely update of (%d,%d)\n",erow,ecol); */
161           a->emat->QueueUpdate( erow, ecol, vals[i*nc+j]);
162         }
163       }
164     }
165   } else { /* columnoriented */
166     for (j=0; j<nc; j++) {
167       if (cols[j] < 0) continue;
168       P2RO(A,1,cols[j],&crank,&cidx);
169       RO2E(A,1,crank,cidx,&ecol);
170       PetscCheck(crank >= 0 && cidx >= 0 && ecol >= 0,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Incorrect col translation");
171       for (i=0; i<nr; i++) {
172         if (rows[i] < 0) continue;
173         P2RO(A,0,rows[i],&rrank,&ridx);
174         RO2E(A,0,rrank,ridx,&erow);
175         PetscCheck(rrank >= 0 && ridx >= 0 && erow >= 0,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Incorrect row translation");
176         if (!a->emat->IsLocal(erow,ecol)) { /* off-proc entry */
177           /* printf("Will later remotely update (%d,%d)\n",erow,ecol); */
178           PetscCheck(imode == ADD_VALUES,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only ADD_VALUES to off-processor entry is supported");
179           ++numQueues;
180           continue;
181         }
182         /* printf("Locally updating (%d,%d)\n",erow,ecol); */
183         switch (imode) {
184         case INSERT_VALUES: a->emat->Set(erow,ecol,(PetscElemScalar)vals[i+j*nr]); break;
185         case ADD_VALUES: a->emat->Update(erow,ecol,(PetscElemScalar)vals[i+j*nr]); break;
186         default: SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for InsertMode %d",(int)imode);
187         }
188       }
189     }
190 
191     /* printf("numQueues=%d\n",numQueues); */
192     a->emat->Reserve( numQueues);
193     for (j=0; j<nc; j++) {
194       if (cols[j] < 0) continue;
195       P2RO(A,1,cols[j],&crank,&cidx);
196       RO2E(A,1,crank,cidx,&ecol);
197 
198       for (i=0; i<nr; i++) {
199         if (rows[i] < 0) continue;
200         P2RO(A,0,rows[i],&rrank,&ridx);
201         RO2E(A,0,rrank,ridx,&erow);
202         if (!a->emat->IsLocal(erow,ecol)) { /*off-proc entry*/
203           /* printf("Queueing remotely update of (%d,%d)\n",erow,ecol); */
204           a->emat->QueueUpdate( erow, ecol, vals[i+j*nr]);
205         }
206       }
207     }
208   }
209   PetscFunctionReturn(0);
210 }
211 
212 static PetscErrorCode MatMult_Elemental(Mat A,Vec X,Vec Y)
213 {
214   Mat_Elemental         *a = (Mat_Elemental*)A->data;
215   const PetscElemScalar *x;
216   PetscElemScalar       *y;
217   PetscElemScalar       one = 1,zero = 0;
218 
219   PetscFunctionBegin;
220   PetscCall(VecGetArrayRead(X,(const PetscScalar **)&x));
221   PetscCall(VecGetArray(Y,(PetscScalar **)&y));
222   { /* Scoping so that constructor is called before pointer is returned */
223     El::DistMatrix<PetscElemScalar,El::VC,El::STAR> xe, ye;
224     xe.LockedAttach(A->cmap->N,1,*a->grid,0,0,x,A->cmap->n);
225     ye.Attach(A->rmap->N,1,*a->grid,0,0,y,A->rmap->n);
226     El::Gemv(El::NORMAL,one,*a->emat,xe,zero,ye);
227   }
228   PetscCall(VecRestoreArrayRead(X,(const PetscScalar **)&x));
229   PetscCall(VecRestoreArray(Y,(PetscScalar **)&y));
230   PetscFunctionReturn(0);
231 }
232 
233 static PetscErrorCode MatMultTranspose_Elemental(Mat A,Vec X,Vec Y)
234 {
235   Mat_Elemental         *a = (Mat_Elemental*)A->data;
236   const PetscElemScalar *x;
237   PetscElemScalar       *y;
238   PetscElemScalar       one = 1,zero = 0;
239 
240   PetscFunctionBegin;
241   PetscCall(VecGetArrayRead(X,(const PetscScalar **)&x));
242   PetscCall(VecGetArray(Y,(PetscScalar **)&y));
243   { /* Scoping so that constructor is called before pointer is returned */
244     El::DistMatrix<PetscElemScalar,El::VC,El::STAR> xe, ye;
245     xe.LockedAttach(A->rmap->N,1,*a->grid,0,0,x,A->rmap->n);
246     ye.Attach(A->cmap->N,1,*a->grid,0,0,y,A->cmap->n);
247     El::Gemv(El::TRANSPOSE,one,*a->emat,xe,zero,ye);
248   }
249   PetscCall(VecRestoreArrayRead(X,(const PetscScalar **)&x));
250   PetscCall(VecRestoreArray(Y,(PetscScalar **)&y));
251   PetscFunctionReturn(0);
252 }
253 
254 static PetscErrorCode MatMultAdd_Elemental(Mat A,Vec X,Vec Y,Vec Z)
255 {
256   Mat_Elemental         *a = (Mat_Elemental*)A->data;
257   const PetscElemScalar *x;
258   PetscElemScalar       *z;
259   PetscElemScalar       one = 1;
260 
261   PetscFunctionBegin;
262   if (Y != Z) PetscCall(VecCopy(Y,Z));
263   PetscCall(VecGetArrayRead(X,(const PetscScalar **)&x));
264   PetscCall(VecGetArray(Z,(PetscScalar **)&z));
265   { /* Scoping so that constructor is called before pointer is returned */
266     El::DistMatrix<PetscElemScalar,El::VC,El::STAR> xe, ze;
267     xe.LockedAttach(A->cmap->N,1,*a->grid,0,0,x,A->cmap->n);
268     ze.Attach(A->rmap->N,1,*a->grid,0,0,z,A->rmap->n);
269     El::Gemv(El::NORMAL,one,*a->emat,xe,one,ze);
270   }
271   PetscCall(VecRestoreArrayRead(X,(const PetscScalar **)&x));
272   PetscCall(VecRestoreArray(Z,(PetscScalar **)&z));
273   PetscFunctionReturn(0);
274 }
275 
276 static PetscErrorCode MatMultTransposeAdd_Elemental(Mat A,Vec X,Vec Y,Vec Z)
277 {
278   Mat_Elemental         *a = (Mat_Elemental*)A->data;
279   const PetscElemScalar *x;
280   PetscElemScalar       *z;
281   PetscElemScalar       one = 1;
282 
283   PetscFunctionBegin;
284   if (Y != Z) PetscCall(VecCopy(Y,Z));
285   PetscCall(VecGetArrayRead(X,(const PetscScalar **)&x));
286   PetscCall(VecGetArray(Z,(PetscScalar **)&z));
287   { /* Scoping so that constructor is called before pointer is returned */
288     El::DistMatrix<PetscElemScalar,El::VC,El::STAR> xe, ze;
289     xe.LockedAttach(A->rmap->N,1,*a->grid,0,0,x,A->rmap->n);
290     ze.Attach(A->cmap->N,1,*a->grid,0,0,z,A->cmap->n);
291     El::Gemv(El::TRANSPOSE,one,*a->emat,xe,one,ze);
292   }
293   PetscCall(VecRestoreArrayRead(X,(const PetscScalar **)&x));
294   PetscCall(VecRestoreArray(Z,(PetscScalar **)&z));
295   PetscFunctionReturn(0);
296 }
297 
298 PetscErrorCode MatMatMultNumeric_Elemental(Mat A,Mat B,Mat C)
299 {
300   Mat_Elemental    *a = (Mat_Elemental*)A->data;
301   Mat_Elemental    *b = (Mat_Elemental*)B->data;
302   Mat_Elemental    *c = (Mat_Elemental*)C->data;
303   PetscElemScalar  one = 1,zero = 0;
304 
305   PetscFunctionBegin;
306   { /* Scoping so that constructor is called before pointer is returned */
307     El::Gemm(El::NORMAL,El::NORMAL,one,*a->emat,*b->emat,zero,*c->emat);
308   }
309   C->assembled = PETSC_TRUE;
310   PetscFunctionReturn(0);
311 }
312 
313 PetscErrorCode MatMatMultSymbolic_Elemental(Mat A,Mat B,PetscReal fill,Mat Ce)
314 {
315   PetscFunctionBegin;
316   PetscCall(MatSetSizes(Ce,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE));
317   PetscCall(MatSetType(Ce,MATELEMENTAL));
318   PetscCall(MatSetUp(Ce));
319   Ce->ops->matmultnumeric = MatMatMultNumeric_Elemental;
320   PetscFunctionReturn(0);
321 }
322 
323 static PetscErrorCode MatMatTransposeMultNumeric_Elemental(Mat A,Mat B,Mat C)
324 {
325   Mat_Elemental      *a = (Mat_Elemental*)A->data;
326   Mat_Elemental      *b = (Mat_Elemental*)B->data;
327   Mat_Elemental      *c = (Mat_Elemental*)C->data;
328   PetscElemScalar    one = 1,zero = 0;
329 
330   PetscFunctionBegin;
331   { /* Scoping so that constructor is called before pointer is returned */
332     El::Gemm(El::NORMAL,El::TRANSPOSE,one,*a->emat,*b->emat,zero,*c->emat);
333   }
334   C->assembled = PETSC_TRUE;
335   PetscFunctionReturn(0);
336 }
337 
338 static PetscErrorCode MatMatTransposeMultSymbolic_Elemental(Mat A,Mat B,PetscReal fill,Mat C)
339 {
340   PetscFunctionBegin;
341   PetscCall(MatSetSizes(C,A->rmap->n,B->rmap->n,PETSC_DECIDE,PETSC_DECIDE));
342   PetscCall(MatSetType(C,MATELEMENTAL));
343   PetscCall(MatSetUp(C));
344   PetscFunctionReturn(0);
345 }
346 
347 /* --------------------------------------- */
348 static PetscErrorCode MatProductSetFromOptions_Elemental_AB(Mat C)
349 {
350   PetscFunctionBegin;
351   C->ops->matmultsymbolic = MatMatMultSymbolic_Elemental;
352   C->ops->productsymbolic = MatProductSymbolic_AB;
353   PetscFunctionReturn(0);
354 }
355 
356 static PetscErrorCode MatProductSetFromOptions_Elemental_ABt(Mat C)
357 {
358   PetscFunctionBegin;
359   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_Elemental;
360   C->ops->productsymbolic          = MatProductSymbolic_ABt;
361   PetscFunctionReturn(0);
362 }
363 
364 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_Elemental(Mat C)
365 {
366   Mat_Product    *product = C->product;
367 
368   PetscFunctionBegin;
369   switch (product->type) {
370   case MATPRODUCT_AB:
371     PetscCall(MatProductSetFromOptions_Elemental_AB(C));
372     break;
373   case MATPRODUCT_ABt:
374     PetscCall(MatProductSetFromOptions_Elemental_ABt(C));
375     break;
376   default:
377     break;
378   }
379   PetscFunctionReturn(0);
380 }
381 
382 PetscErrorCode MatMatMultNumeric_Elemental_MPIDense(Mat A,Mat B,Mat C)
383 {
384   Mat            Be,Ce;
385 
386   PetscFunctionBegin;
387   PetscCall(MatConvert(B,MATELEMENTAL,MAT_INITIAL_MATRIX,&Be));
388   PetscCall(MatMatMult(A,Be,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ce));
389   PetscCall(MatConvert(Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C));
390   PetscCall(MatDestroy(&Be));
391   PetscCall(MatDestroy(&Ce));
392   PetscFunctionReturn(0);
393 }
394 
395 PetscErrorCode MatMatMultSymbolic_Elemental_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)
396 {
397   PetscFunctionBegin;
398   PetscCall(MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE));
399   PetscCall(MatSetType(C,MATMPIDENSE));
400   PetscCall(MatSetUp(C));
401   C->ops->matmultnumeric = MatMatMultNumeric_Elemental_MPIDense;
402   PetscFunctionReturn(0);
403 }
404 
405 PetscErrorCode MatProductSetFromOptions_Elemental_MPIDense_AB(Mat C)
406 {
407   PetscFunctionBegin;
408   C->ops->matmultsymbolic = MatMatMultSymbolic_Elemental_MPIDense;
409   C->ops->productsymbolic = MatProductSymbolic_AB;
410   PetscFunctionReturn(0);
411 }
412 
413 PetscErrorCode MatProductSetFromOptions_Elemental_MPIDense(Mat C)
414 {
415   Mat_Product    *product = C->product;
416 
417   PetscFunctionBegin;
418   if (product->type == MATPRODUCT_AB) {
419     PetscCall(MatProductSetFromOptions_Elemental_MPIDense_AB(C));
420   }
421   PetscFunctionReturn(0);
422 }
423 /* --------------------------------------- */
424 
425 static PetscErrorCode MatGetDiagonal_Elemental(Mat A,Vec D)
426 {
427   PetscInt        i,nrows,ncols,nD,rrank,ridx,crank,cidx;
428   Mat_Elemental   *a = (Mat_Elemental*)A->data;
429   PetscElemScalar v;
430   MPI_Comm        comm;
431 
432   PetscFunctionBegin;
433   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
434   PetscCall(MatGetSize(A,&nrows,&ncols));
435   nD = nrows>ncols ? ncols : nrows;
436   for (i=0; i<nD; i++) {
437     PetscInt erow,ecol;
438     P2RO(A,0,i,&rrank,&ridx);
439     RO2E(A,0,rrank,ridx,&erow);
440     PetscCheck(rrank >= 0 && ridx >= 0 && erow >= 0,comm,PETSC_ERR_PLIB,"Incorrect row translation");
441     P2RO(A,1,i,&crank,&cidx);
442     RO2E(A,1,crank,cidx,&ecol);
443     PetscCheck(crank >= 0 && cidx >= 0 && ecol >= 0,comm,PETSC_ERR_PLIB,"Incorrect col translation");
444     v = a->emat->Get(erow,ecol);
445     PetscCall(VecSetValues(D,1,&i,(PetscScalar*)&v,INSERT_VALUES));
446   }
447   PetscCall(VecAssemblyBegin(D));
448   PetscCall(VecAssemblyEnd(D));
449   PetscFunctionReturn(0);
450 }
451 
452 static PetscErrorCode MatDiagonalScale_Elemental(Mat X,Vec L,Vec R)
453 {
454   Mat_Elemental         *x = (Mat_Elemental*)X->data;
455   const PetscElemScalar *d;
456 
457   PetscFunctionBegin;
458   if (R) {
459     PetscCall(VecGetArrayRead(R,(const PetscScalar **)&d));
460     El::DistMatrix<PetscElemScalar,El::VC,El::STAR> de;
461     de.LockedAttach(X->cmap->N,1,*x->grid,0,0,d,X->cmap->n);
462     El::DiagonalScale(El::RIGHT,El::NORMAL,de,*x->emat);
463     PetscCall(VecRestoreArrayRead(R,(const PetscScalar **)&d));
464   }
465   if (L) {
466     PetscCall(VecGetArrayRead(L,(const PetscScalar **)&d));
467     El::DistMatrix<PetscElemScalar,El::VC,El::STAR> de;
468     de.LockedAttach(X->rmap->N,1,*x->grid,0,0,d,X->rmap->n);
469     El::DiagonalScale(El::LEFT,El::NORMAL,de,*x->emat);
470     PetscCall(VecRestoreArrayRead(L,(const PetscScalar **)&d));
471   }
472   PetscFunctionReturn(0);
473 }
474 
475 static PetscErrorCode MatMissingDiagonal_Elemental(Mat A,PetscBool *missing,PetscInt *d)
476 {
477   PetscFunctionBegin;
478   *missing = PETSC_FALSE;
479   PetscFunctionReturn(0);
480 }
481 
482 static PetscErrorCode MatScale_Elemental(Mat X,PetscScalar a)
483 {
484   Mat_Elemental  *x = (Mat_Elemental*)X->data;
485 
486   PetscFunctionBegin;
487   El::Scale((PetscElemScalar)a,*x->emat);
488   PetscFunctionReturn(0);
489 }
490 
491 /*
492   MatAXPY - Computes Y = a*X + Y.
493 */
494 static PetscErrorCode MatAXPY_Elemental(Mat Y,PetscScalar a,Mat X,MatStructure str)
495 {
496   Mat_Elemental  *x = (Mat_Elemental*)X->data;
497   Mat_Elemental  *y = (Mat_Elemental*)Y->data;
498 
499   PetscFunctionBegin;
500   El::Axpy((PetscElemScalar)a,*x->emat,*y->emat);
501   PetscCall(PetscObjectStateIncrease((PetscObject)Y));
502   PetscFunctionReturn(0);
503 }
504 
505 static PetscErrorCode MatCopy_Elemental(Mat A,Mat B,MatStructure str)
506 {
507   Mat_Elemental *a=(Mat_Elemental*)A->data;
508   Mat_Elemental *b=(Mat_Elemental*)B->data;
509 
510   PetscFunctionBegin;
511   El::Copy(*a->emat,*b->emat);
512   PetscCall(PetscObjectStateIncrease((PetscObject)B));
513   PetscFunctionReturn(0);
514 }
515 
516 static PetscErrorCode MatDuplicate_Elemental(Mat A,MatDuplicateOption op,Mat *B)
517 {
518   Mat            Be;
519   MPI_Comm       comm;
520   Mat_Elemental  *a=(Mat_Elemental*)A->data;
521 
522   PetscFunctionBegin;
523   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
524   PetscCall(MatCreate(comm,&Be));
525   PetscCall(MatSetSizes(Be,A->rmap->n,A->cmap->n,PETSC_DECIDE,PETSC_DECIDE));
526   PetscCall(MatSetType(Be,MATELEMENTAL));
527   PetscCall(MatSetUp(Be));
528   *B = Be;
529   if (op == MAT_COPY_VALUES) {
530     Mat_Elemental *b=(Mat_Elemental*)Be->data;
531     El::Copy(*a->emat,*b->emat);
532   }
533   Be->assembled = PETSC_TRUE;
534   PetscFunctionReturn(0);
535 }
536 
537 static PetscErrorCode MatTranspose_Elemental(Mat A,MatReuse reuse,Mat *B)
538 {
539   Mat            Be = *B;
540   MPI_Comm       comm;
541   Mat_Elemental  *a = (Mat_Elemental*)A->data, *b;
542 
543   PetscFunctionBegin;
544   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
545   /* Only out-of-place supported */
546   PetscCheck(reuse != MAT_INPLACE_MATRIX,comm,PETSC_ERR_SUP,"Only out-of-place supported");
547   if (reuse == MAT_INITIAL_MATRIX) {
548     PetscCall(MatCreate(comm,&Be));
549     PetscCall(MatSetSizes(Be,A->cmap->n,A->rmap->n,PETSC_DECIDE,PETSC_DECIDE));
550     PetscCall(MatSetType(Be,MATELEMENTAL));
551     PetscCall(MatSetUp(Be));
552     *B = Be;
553   }
554   b = (Mat_Elemental*)Be->data;
555   El::Transpose(*a->emat,*b->emat);
556   Be->assembled = PETSC_TRUE;
557   PetscFunctionReturn(0);
558 }
559 
560 static PetscErrorCode MatConjugate_Elemental(Mat A)
561 {
562   Mat_Elemental  *a = (Mat_Elemental*)A->data;
563 
564   PetscFunctionBegin;
565   El::Conjugate(*a->emat);
566   PetscFunctionReturn(0);
567 }
568 
569 static PetscErrorCode MatHermitianTranspose_Elemental(Mat A,MatReuse reuse,Mat *B)
570 {
571   Mat            Be = *B;
572   MPI_Comm       comm;
573   Mat_Elemental  *a = (Mat_Elemental*)A->data, *b;
574 
575   PetscFunctionBegin;
576   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
577   /* Only out-of-place supported */
578   if (reuse == MAT_INITIAL_MATRIX) {
579     PetscCall(MatCreate(comm,&Be));
580     PetscCall(MatSetSizes(Be,A->cmap->n,A->rmap->n,PETSC_DECIDE,PETSC_DECIDE));
581     PetscCall(MatSetType(Be,MATELEMENTAL));
582     PetscCall(MatSetUp(Be));
583     *B = Be;
584   }
585   b = (Mat_Elemental*)Be->data;
586   El::Adjoint(*a->emat,*b->emat);
587   Be->assembled = PETSC_TRUE;
588   PetscFunctionReturn(0);
589 }
590 
591 static PetscErrorCode MatSolve_Elemental(Mat A,Vec B,Vec X)
592 {
593   Mat_Elemental     *a = (Mat_Elemental*)A->data;
594   PetscElemScalar   *x;
595   PetscInt          pivoting = a->pivoting;
596 
597   PetscFunctionBegin;
598   PetscCall(VecCopy(B,X));
599   PetscCall(VecGetArray(X,(PetscScalar **)&x));
600 
601   El::DistMatrix<PetscElemScalar,El::VC,El::STAR> xe;
602   xe.Attach(A->rmap->N,1,*a->grid,0,0,x,A->rmap->n);
603   El::DistMatrix<PetscElemScalar,El::MC,El::MR> xer(xe);
604   switch (A->factortype) {
605   case MAT_FACTOR_LU:
606     if (pivoting == 0) {
607       El::lu::SolveAfter(El::NORMAL,*a->emat,xer);
608     } else if (pivoting == 1) {
609       El::lu::SolveAfter(El::NORMAL,*a->emat,*a->P,xer);
610     } else { /* pivoting == 2 */
611       El::lu::SolveAfter(El::NORMAL,*a->emat,*a->P,*a->Q,xer);
612     }
613     break;
614   case MAT_FACTOR_CHOLESKY:
615     El::cholesky::SolveAfter(El::UPPER,El::NORMAL,*a->emat,xer);
616     break;
617   default:
618     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType");
619     break;
620   }
621   El::Copy(xer,xe);
622 
623   PetscCall(VecRestoreArray(X,(PetscScalar **)&x));
624   PetscFunctionReturn(0);
625 }
626 
627 static PetscErrorCode MatSolveAdd_Elemental(Mat A,Vec B,Vec Y,Vec X)
628 {
629   PetscFunctionBegin;
630   PetscCall(MatSolve_Elemental(A,B,X));
631   PetscCall(VecAXPY(X,1,Y));
632   PetscFunctionReturn(0);
633 }
634 
635 static PetscErrorCode MatMatSolve_Elemental(Mat A,Mat B,Mat X)
636 {
637   Mat_Elemental  *a = (Mat_Elemental*)A->data;
638   Mat_Elemental  *x;
639   Mat            C;
640   PetscInt       pivoting = a->pivoting;
641   PetscBool      flg;
642   MatType        type;
643 
644   PetscFunctionBegin;
645   PetscCall(MatGetType(X,&type));
646   PetscCall(PetscStrcmp(type,MATELEMENTAL,&flg));
647   if (!flg) {
648     PetscCall(MatConvert(B,MATELEMENTAL,MAT_INITIAL_MATRIX,&C));
649     x = (Mat_Elemental*)C->data;
650   } else {
651     x = (Mat_Elemental*)X->data;
652     El::Copy(*((Mat_Elemental*)B->data)->emat,*x->emat);
653   }
654   switch (A->factortype) {
655   case MAT_FACTOR_LU:
656     if (pivoting == 0) {
657       El::lu::SolveAfter(El::NORMAL,*a->emat,*x->emat);
658     } else if (pivoting == 1) {
659       El::lu::SolveAfter(El::NORMAL,*a->emat,*a->P,*x->emat);
660     } else {
661       El::lu::SolveAfter(El::NORMAL,*a->emat,*a->P,*a->Q,*x->emat);
662     }
663     break;
664   case MAT_FACTOR_CHOLESKY:
665     El::cholesky::SolveAfter(El::UPPER,El::NORMAL,*a->emat,*x->emat);
666     break;
667   default:
668     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType");
669     break;
670   }
671   if (!flg) {
672     PetscCall(MatConvert(C,type,MAT_REUSE_MATRIX,&X));
673     PetscCall(MatDestroy(&C));
674   }
675   PetscFunctionReturn(0);
676 }
677 
678 static PetscErrorCode MatLUFactor_Elemental(Mat A,IS row,IS col,const MatFactorInfo *info)
679 {
680   Mat_Elemental  *a = (Mat_Elemental*)A->data;
681   PetscInt       pivoting = a->pivoting;
682 
683   PetscFunctionBegin;
684   if (pivoting == 0) {
685     El::LU(*a->emat);
686   } else if (pivoting == 1) {
687     El::LU(*a->emat,*a->P);
688   } else {
689     El::LU(*a->emat,*a->P,*a->Q);
690   }
691   A->factortype = MAT_FACTOR_LU;
692   A->assembled  = PETSC_TRUE;
693 
694   PetscCall(PetscFree(A->solvertype));
695   PetscCall(PetscStrallocpy(MATSOLVERELEMENTAL,&A->solvertype));
696   PetscFunctionReturn(0);
697 }
698 
699 static PetscErrorCode  MatLUFactorNumeric_Elemental(Mat F,Mat A,const MatFactorInfo *info)
700 {
701   PetscFunctionBegin;
702   PetscCall(MatCopy(A,F,SAME_NONZERO_PATTERN));
703   PetscCall(MatLUFactor_Elemental(F,0,0,info));
704   PetscFunctionReturn(0);
705 }
706 
707 static PetscErrorCode  MatLUFactorSymbolic_Elemental(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
708 {
709   PetscFunctionBegin;
710   /* F is created and allocated by MatGetFactor_elemental_petsc(), skip this routine. */
711   PetscFunctionReturn(0);
712 }
713 
714 static PetscErrorCode MatCholeskyFactor_Elemental(Mat A,IS perm,const MatFactorInfo *info)
715 {
716   Mat_Elemental  *a = (Mat_Elemental*)A->data;
717   El::DistMatrix<PetscElemScalar,El::MC,El::STAR> d;
718 
719   PetscFunctionBegin;
720   El::Cholesky(El::UPPER,*a->emat);
721   A->factortype = MAT_FACTOR_CHOLESKY;
722   A->assembled  = PETSC_TRUE;
723 
724   PetscCall(PetscFree(A->solvertype));
725   PetscCall(PetscStrallocpy(MATSOLVERELEMENTAL,&A->solvertype));
726   PetscFunctionReturn(0);
727 }
728 
729 static PetscErrorCode MatCholeskyFactorNumeric_Elemental(Mat F,Mat A,const MatFactorInfo *info)
730 {
731   PetscFunctionBegin;
732   PetscCall(MatCopy(A,F,SAME_NONZERO_PATTERN));
733   PetscCall(MatCholeskyFactor_Elemental(F,0,info));
734   PetscFunctionReturn(0);
735 }
736 
737 static PetscErrorCode MatCholeskyFactorSymbolic_Elemental(Mat F,Mat A,IS perm,const MatFactorInfo *info)
738 {
739   PetscFunctionBegin;
740   /* F is created and allocated by MatGetFactor_elemental_petsc(), skip this routine. */
741   PetscFunctionReturn(0);
742 }
743 
744 PetscErrorCode MatFactorGetSolverType_elemental_elemental(Mat A,MatSolverType *type)
745 {
746   PetscFunctionBegin;
747   *type = MATSOLVERELEMENTAL;
748   PetscFunctionReturn(0);
749 }
750 
751 static PetscErrorCode MatGetFactor_elemental_elemental(Mat A,MatFactorType ftype,Mat *F)
752 {
753   Mat            B;
754 
755   PetscFunctionBegin;
756   /* Create the factorization matrix */
757   PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B));
758   PetscCall(MatSetSizes(B,A->rmap->n,A->cmap->n,PETSC_DECIDE,PETSC_DECIDE));
759   PetscCall(MatSetType(B,MATELEMENTAL));
760   PetscCall(MatSetUp(B));
761   B->factortype = ftype;
762   B->trivialsymbolic = PETSC_TRUE;
763   PetscCall(PetscFree(B->solvertype));
764   PetscCall(PetscStrallocpy(MATSOLVERELEMENTAL,&B->solvertype));
765 
766   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_elemental_elemental));
767   *F            = B;
768   PetscFunctionReturn(0);
769 }
770 
771 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_Elemental(void)
772 {
773   PetscFunctionBegin;
774   PetscCall(MatSolverTypeRegister(MATSOLVERELEMENTAL,MATELEMENTAL,        MAT_FACTOR_LU,MatGetFactor_elemental_elemental));
775   PetscCall(MatSolverTypeRegister(MATSOLVERELEMENTAL,MATELEMENTAL,        MAT_FACTOR_CHOLESKY,MatGetFactor_elemental_elemental));
776   PetscFunctionReturn(0);
777 }
778 
779 static PetscErrorCode MatNorm_Elemental(Mat A,NormType type,PetscReal *nrm)
780 {
781   Mat_Elemental *a=(Mat_Elemental*)A->data;
782 
783   PetscFunctionBegin;
784   switch (type) {
785   case NORM_1:
786     *nrm = El::OneNorm(*a->emat);
787     break;
788   case NORM_FROBENIUS:
789     *nrm = El::FrobeniusNorm(*a->emat);
790     break;
791   case NORM_INFINITY:
792     *nrm = El::InfinityNorm(*a->emat);
793     break;
794   default:
795     SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Unsupported norm type");
796   }
797   PetscFunctionReturn(0);
798 }
799 
800 static PetscErrorCode MatZeroEntries_Elemental(Mat A)
801 {
802   Mat_Elemental *a=(Mat_Elemental*)A->data;
803 
804   PetscFunctionBegin;
805   El::Zero(*a->emat);
806   PetscFunctionReturn(0);
807 }
808 
809 static PetscErrorCode MatGetOwnershipIS_Elemental(Mat A,IS *rows,IS *cols)
810 {
811   Mat_Elemental  *a = (Mat_Elemental*)A->data;
812   PetscInt       i,m,shift,stride,*idx;
813 
814   PetscFunctionBegin;
815   if (rows) {
816     m = a->emat->LocalHeight();
817     shift = a->emat->ColShift();
818     stride = a->emat->ColStride();
819     PetscCall(PetscMalloc1(m,&idx));
820     for (i=0; i<m; i++) {
821       PetscInt rank,offset;
822       E2RO(A,0,shift+i*stride,&rank,&offset);
823       RO2P(A,0,rank,offset,&idx[i]);
824     }
825     PetscCall(ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,rows));
826   }
827   if (cols) {
828     m = a->emat->LocalWidth();
829     shift = a->emat->RowShift();
830     stride = a->emat->RowStride();
831     PetscCall(PetscMalloc1(m,&idx));
832     for (i=0; i<m; i++) {
833       PetscInt rank,offset;
834       E2RO(A,1,shift+i*stride,&rank,&offset);
835       RO2P(A,1,rank,offset,&idx[i]);
836     }
837     PetscCall(ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,cols));
838   }
839   PetscFunctionReturn(0);
840 }
841 
842 static PetscErrorCode MatConvert_Elemental_Dense(Mat A,MatType newtype,MatReuse reuse,Mat *B)
843 {
844   Mat                Bmpi;
845   Mat_Elemental      *a = (Mat_Elemental*)A->data;
846   MPI_Comm           comm;
847   IS                 isrows,iscols;
848   PetscInt           rrank,ridx,crank,cidx,nrows,ncols,i,j,erow,ecol,elrow,elcol;
849   const PetscInt     *rows,*cols;
850   PetscElemScalar    v;
851   const El::Grid     &grid = a->emat->Grid();
852 
853   PetscFunctionBegin;
854   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
855 
856   if (reuse == MAT_REUSE_MATRIX) {
857     Bmpi = *B;
858   } else {
859     PetscCall(MatCreate(comm,&Bmpi));
860     PetscCall(MatSetSizes(Bmpi,A->rmap->n,A->cmap->n,PETSC_DECIDE,PETSC_DECIDE));
861     PetscCall(MatSetType(Bmpi,MATDENSE));
862     PetscCall(MatSetUp(Bmpi));
863   }
864 
865   /* Get local entries of A */
866   PetscCall(MatGetOwnershipIS(A,&isrows,&iscols));
867   PetscCall(ISGetLocalSize(isrows,&nrows));
868   PetscCall(ISGetIndices(isrows,&rows));
869   PetscCall(ISGetLocalSize(iscols,&ncols));
870   PetscCall(ISGetIndices(iscols,&cols));
871 
872   if (a->roworiented) {
873     for (i=0; i<nrows; i++) {
874       P2RO(A,0,rows[i],&rrank,&ridx); /* convert indices between PETSc <-> (Rank,Offset) <-> Elemental */
875       RO2E(A,0,rrank,ridx,&erow);
876       PetscCheck(rrank >= 0 && ridx >= 0 && erow >= 0,comm,PETSC_ERR_PLIB,"Incorrect row translation");
877       for (j=0; j<ncols; j++) {
878         P2RO(A,1,cols[j],&crank,&cidx);
879         RO2E(A,1,crank,cidx,&ecol);
880         PetscCheck(crank >= 0 && cidx >= 0 && ecol >= 0,comm,PETSC_ERR_PLIB,"Incorrect col translation");
881 
882         elrow = erow / grid.MCSize(); /* Elemental local row index */
883         elcol = ecol / grid.MRSize(); /* Elemental local column index */
884         v = a->emat->GetLocal(elrow,elcol);
885         PetscCall(MatSetValues(Bmpi,1,&rows[i],1,&cols[j],(PetscScalar *)&v,INSERT_VALUES));
886       }
887     }
888   } else { /* column-oriented */
889     for (j=0; j<ncols; j++) {
890       P2RO(A,1,cols[j],&crank,&cidx);
891       RO2E(A,1,crank,cidx,&ecol);
892       PetscCheck(crank >= 0 && cidx >= 0 && ecol >= 0,comm,PETSC_ERR_PLIB,"Incorrect col translation");
893       for (i=0; i<nrows; i++) {
894         P2RO(A,0,rows[i],&rrank,&ridx); /* convert indices between PETSc <-> (Rank,Offset) <-> Elemental */
895         RO2E(A,0,rrank,ridx,&erow);
896         PetscCheck(rrank >= 0 && ridx >= 0 && erow >= 0,comm,PETSC_ERR_PLIB,"Incorrect row translation");
897 
898         elrow = erow / grid.MCSize(); /* Elemental local row index */
899         elcol = ecol / grid.MRSize(); /* Elemental local column index */
900         v = a->emat->GetLocal(elrow,elcol);
901         PetscCall(MatSetValues(Bmpi,1,&rows[i],1,&cols[j],(PetscScalar *)&v,INSERT_VALUES));
902       }
903     }
904   }
905   PetscCall(MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY));
906   PetscCall(MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY));
907   if (reuse == MAT_INPLACE_MATRIX) {
908     PetscCall(MatHeaderReplace(A,&Bmpi));
909   } else {
910     *B = Bmpi;
911   }
912   PetscCall(ISDestroy(&isrows));
913   PetscCall(ISDestroy(&iscols));
914   PetscFunctionReturn(0);
915 }
916 
917 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
918 {
919   Mat               mat_elemental;
920   PetscInt          M=A->rmap->N,N=A->cmap->N,row,ncols;
921   const PetscInt    *cols;
922   const PetscScalar *vals;
923 
924   PetscFunctionBegin;
925   if (reuse == MAT_REUSE_MATRIX) {
926     mat_elemental = *newmat;
927     PetscCall(MatZeroEntries(mat_elemental));
928   } else {
929     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental));
930     PetscCall(MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,M,N));
931     PetscCall(MatSetType(mat_elemental,MATELEMENTAL));
932     PetscCall(MatSetUp(mat_elemental));
933   }
934   for (row=0; row<M; row++) {
935     PetscCall(MatGetRow(A,row,&ncols,&cols,&vals));
936     /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
937     PetscCall(MatSetValues(mat_elemental,1,&row,ncols,cols,vals,ADD_VALUES));
938     PetscCall(MatRestoreRow(A,row,&ncols,&cols,&vals));
939   }
940   PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY));
941   PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY));
942 
943   if (reuse == MAT_INPLACE_MATRIX) {
944     PetscCall(MatHeaderReplace(A,&mat_elemental));
945   } else {
946     *newmat = mat_elemental;
947   }
948   PetscFunctionReturn(0);
949 }
950 
951 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
952 {
953   Mat               mat_elemental;
954   PetscInt          row,ncols,rstart=A->rmap->rstart,rend=A->rmap->rend,j;
955   const PetscInt    *cols;
956   const PetscScalar *vals;
957 
958   PetscFunctionBegin;
959   if (reuse == MAT_REUSE_MATRIX) {
960     mat_elemental = *newmat;
961     PetscCall(MatZeroEntries(mat_elemental));
962   } else {
963     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental));
964     PetscCall(MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N));
965     PetscCall(MatSetType(mat_elemental,MATELEMENTAL));
966     PetscCall(MatSetUp(mat_elemental));
967   }
968   for (row=rstart; row<rend; row++) {
969     PetscCall(MatGetRow(A,row,&ncols,&cols,&vals));
970     for (j=0; j<ncols; j++) {
971       /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
972       PetscCall(MatSetValues(mat_elemental,1,&row,1,&cols[j],&vals[j],ADD_VALUES));
973     }
974     PetscCall(MatRestoreRow(A,row,&ncols,&cols,&vals));
975   }
976   PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY));
977   PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY));
978 
979   if (reuse == MAT_INPLACE_MATRIX) {
980     PetscCall(MatHeaderReplace(A,&mat_elemental));
981   } else {
982     *newmat = mat_elemental;
983   }
984   PetscFunctionReturn(0);
985 }
986 
987 PETSC_INTERN PetscErrorCode MatConvert_SeqSBAIJ_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
988 {
989   Mat               mat_elemental;
990   PetscInt          M=A->rmap->N,N=A->cmap->N,row,ncols,j;
991   const PetscInt    *cols;
992   const PetscScalar *vals;
993 
994   PetscFunctionBegin;
995   if (reuse == MAT_REUSE_MATRIX) {
996     mat_elemental = *newmat;
997     PetscCall(MatZeroEntries(mat_elemental));
998   } else {
999     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental));
1000     PetscCall(MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,M,N));
1001     PetscCall(MatSetType(mat_elemental,MATELEMENTAL));
1002     PetscCall(MatSetUp(mat_elemental));
1003   }
1004   PetscCall(MatGetRowUpperTriangular(A));
1005   for (row=0; row<M; row++) {
1006     PetscCall(MatGetRow(A,row,&ncols,&cols,&vals));
1007     /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1008     PetscCall(MatSetValues(mat_elemental,1,&row,ncols,cols,vals,ADD_VALUES));
1009     for (j=0; j<ncols; j++) { /* lower triangular part */
1010       PetscScalar v;
1011       if (cols[j] == row) continue;
1012       v    = A->hermitian == PETSC_BOOL3_TRUE ? PetscConj(vals[j]) : vals[j];
1013       PetscCall(MatSetValues(mat_elemental,1,&cols[j],1,&row,&v,ADD_VALUES));
1014     }
1015     PetscCall(MatRestoreRow(A,row,&ncols,&cols,&vals));
1016   }
1017   PetscCall(MatRestoreRowUpperTriangular(A));
1018   PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY));
1019   PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY));
1020 
1021   if (reuse == MAT_INPLACE_MATRIX) {
1022     PetscCall(MatHeaderReplace(A,&mat_elemental));
1023   } else {
1024     *newmat = mat_elemental;
1025   }
1026   PetscFunctionReturn(0);
1027 }
1028 
1029 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1030 {
1031   Mat               mat_elemental;
1032   PetscInt          M=A->rmap->N,N=A->cmap->N,row,ncols,j,rstart=A->rmap->rstart,rend=A->rmap->rend;
1033   const PetscInt    *cols;
1034   const PetscScalar *vals;
1035 
1036   PetscFunctionBegin;
1037   if (reuse == MAT_REUSE_MATRIX) {
1038     mat_elemental = *newmat;
1039     PetscCall(MatZeroEntries(mat_elemental));
1040   } else {
1041     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental));
1042     PetscCall(MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,M,N));
1043     PetscCall(MatSetType(mat_elemental,MATELEMENTAL));
1044     PetscCall(MatSetUp(mat_elemental));
1045   }
1046   PetscCall(MatGetRowUpperTriangular(A));
1047   for (row=rstart; row<rend; row++) {
1048     PetscCall(MatGetRow(A,row,&ncols,&cols,&vals));
1049     /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1050     PetscCall(MatSetValues(mat_elemental,1,&row,ncols,cols,vals,ADD_VALUES));
1051     for (j=0; j<ncols; j++) { /* lower triangular part */
1052       PetscScalar v;
1053       if (cols[j] == row) continue;
1054       v    = A->hermitian == PETSC_BOOL3_TRUE ? PetscConj(vals[j]) : vals[j];
1055       PetscCall(MatSetValues(mat_elemental,1,&cols[j],1,&row,&v,ADD_VALUES));
1056     }
1057     PetscCall(MatRestoreRow(A,row,&ncols,&cols,&vals));
1058   }
1059   PetscCall(MatRestoreRowUpperTriangular(A));
1060   PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY));
1061   PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY));
1062 
1063   if (reuse == MAT_INPLACE_MATRIX) {
1064     PetscCall(MatHeaderReplace(A,&mat_elemental));
1065   } else {
1066     *newmat = mat_elemental;
1067   }
1068   PetscFunctionReturn(0);
1069 }
1070 
1071 static PetscErrorCode MatDestroy_Elemental(Mat A)
1072 {
1073   Mat_Elemental      *a = (Mat_Elemental*)A->data;
1074   Mat_Elemental_Grid *commgrid;
1075   PetscBool          flg;
1076   MPI_Comm           icomm;
1077 
1078   PetscFunctionBegin;
1079   delete a->emat;
1080   delete a->P;
1081   delete a->Q;
1082 
1083   El::mpi::Comm cxxcomm(PetscObjectComm((PetscObject)A));
1084   PetscCall(PetscCommDuplicate(cxxcomm.comm,&icomm,NULL));
1085   PetscCallMPI(MPI_Comm_get_attr(icomm,Petsc_Elemental_keyval,(void**)&commgrid,(int*)&flg));
1086   if (--commgrid->grid_refct == 0) {
1087     delete commgrid->grid;
1088     PetscCall(PetscFree(commgrid));
1089     PetscCallMPI(MPI_Comm_free_keyval(&Petsc_Elemental_keyval));
1090   }
1091   PetscCall(PetscCommDestroy(&icomm));
1092   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",NULL));
1093   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL));
1094   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_elemental_mpidense_C",NULL));
1095   PetscCall(PetscFree(A->data));
1096   PetscFunctionReturn(0);
1097 }
1098 
1099 PetscErrorCode MatSetUp_Elemental(Mat A)
1100 {
1101   Mat_Elemental  *a = (Mat_Elemental*)A->data;
1102   MPI_Comm       comm;
1103   PetscMPIInt    rsize,csize;
1104   PetscInt       n;
1105 
1106   PetscFunctionBegin;
1107   PetscCall(PetscLayoutSetUp(A->rmap));
1108   PetscCall(PetscLayoutSetUp(A->cmap));
1109 
1110   /* Check if local row and column sizes are equally distributed.
1111      Jed: Elemental uses "element" cyclic ordering so the sizes need to match that
1112      exactly.  The strategy in MatElemental is for PETSc to implicitly permute to block ordering (like would be returned by
1113      PetscSplitOwnership(comm,&n,&N), at which point Elemental matrices can act on PETSc vectors without redistributing the vectors. */
1114   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
1115   n = PETSC_DECIDE;
1116   PetscCall(PetscSplitOwnership(comm,&n,&A->rmap->N));
1117   PetscCheck(n == A->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Local row size %" PetscInt_FMT " of ELEMENTAL matrix must be equally distributed",A->rmap->n);
1118 
1119   n = PETSC_DECIDE;
1120   PetscCall(PetscSplitOwnership(comm,&n,&A->cmap->N));
1121   PetscCheck(n == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Local column size %" PetscInt_FMT " of ELEMENTAL matrix must be equally distributed",A->cmap->n);
1122 
1123   a->emat->Resize(A->rmap->N,A->cmap->N);
1124   El::Zero(*a->emat);
1125 
1126   PetscCallMPI(MPI_Comm_size(A->rmap->comm,&rsize));
1127   PetscCallMPI(MPI_Comm_size(A->cmap->comm,&csize));
1128   PetscCheck(csize == rsize,PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_INCOMP,"Cannot use row and column communicators of different sizes");
1129   a->commsize = rsize;
1130   a->mr[0] = A->rmap->N % rsize; if (!a->mr[0]) a->mr[0] = rsize;
1131   a->mr[1] = A->cmap->N % csize; if (!a->mr[1]) a->mr[1] = csize;
1132   a->m[0]  = A->rmap->N / rsize + (a->mr[0] != rsize);
1133   a->m[1]  = A->cmap->N / csize + (a->mr[1] != csize);
1134   PetscFunctionReturn(0);
1135 }
1136 
1137 PetscErrorCode MatAssemblyBegin_Elemental(Mat A, MatAssemblyType type)
1138 {
1139   Mat_Elemental  *a = (Mat_Elemental*)A->data;
1140 
1141   PetscFunctionBegin;
1142   /* printf("Calling ProcessQueues\n"); */
1143   a->emat->ProcessQueues();
1144   /* printf("Finished ProcessQueues\n"); */
1145   PetscFunctionReturn(0);
1146 }
1147 
1148 PetscErrorCode MatAssemblyEnd_Elemental(Mat A, MatAssemblyType type)
1149 {
1150   PetscFunctionBegin;
1151   /* Currently does nothing */
1152   PetscFunctionReturn(0);
1153 }
1154 
1155 PetscErrorCode MatLoad_Elemental(Mat newMat, PetscViewer viewer)
1156 {
1157   Mat            Adense,Ae;
1158   MPI_Comm       comm;
1159 
1160   PetscFunctionBegin;
1161   PetscCall(PetscObjectGetComm((PetscObject)newMat,&comm));
1162   PetscCall(MatCreate(comm,&Adense));
1163   PetscCall(MatSetType(Adense,MATDENSE));
1164   PetscCall(MatLoad(Adense,viewer));
1165   PetscCall(MatConvert(Adense, MATELEMENTAL, MAT_INITIAL_MATRIX,&Ae));
1166   PetscCall(MatDestroy(&Adense));
1167   PetscCall(MatHeaderReplace(newMat,&Ae));
1168   PetscFunctionReturn(0);
1169 }
1170 
1171 /* -------------------------------------------------------------------*/
1172 static struct _MatOps MatOps_Values = {
1173        MatSetValues_Elemental,
1174        0,
1175        0,
1176        MatMult_Elemental,
1177 /* 4*/ MatMultAdd_Elemental,
1178        MatMultTranspose_Elemental,
1179        MatMultTransposeAdd_Elemental,
1180        MatSolve_Elemental,
1181        MatSolveAdd_Elemental,
1182        0,
1183 /*10*/ 0,
1184        MatLUFactor_Elemental,
1185        MatCholeskyFactor_Elemental,
1186        0,
1187        MatTranspose_Elemental,
1188 /*15*/ MatGetInfo_Elemental,
1189        0,
1190        MatGetDiagonal_Elemental,
1191        MatDiagonalScale_Elemental,
1192        MatNorm_Elemental,
1193 /*20*/ MatAssemblyBegin_Elemental,
1194        MatAssemblyEnd_Elemental,
1195        MatSetOption_Elemental,
1196        MatZeroEntries_Elemental,
1197 /*24*/ 0,
1198        MatLUFactorSymbolic_Elemental,
1199        MatLUFactorNumeric_Elemental,
1200        MatCholeskyFactorSymbolic_Elemental,
1201        MatCholeskyFactorNumeric_Elemental,
1202 /*29*/ MatSetUp_Elemental,
1203        0,
1204        0,
1205        0,
1206        0,
1207 /*34*/ MatDuplicate_Elemental,
1208        0,
1209        0,
1210        0,
1211        0,
1212 /*39*/ MatAXPY_Elemental,
1213        0,
1214        0,
1215        0,
1216        MatCopy_Elemental,
1217 /*44*/ 0,
1218        MatScale_Elemental,
1219        MatShift_Basic,
1220        0,
1221        0,
1222 /*49*/ 0,
1223        0,
1224        0,
1225        0,
1226        0,
1227 /*54*/ 0,
1228        0,
1229        0,
1230        0,
1231        0,
1232 /*59*/ 0,
1233        MatDestroy_Elemental,
1234        MatView_Elemental,
1235        0,
1236        0,
1237 /*64*/ 0,
1238        0,
1239        0,
1240        0,
1241        0,
1242 /*69*/ 0,
1243        0,
1244        MatConvert_Elemental_Dense,
1245        0,
1246        0,
1247 /*74*/ 0,
1248        0,
1249        0,
1250        0,
1251        0,
1252 /*79*/ 0,
1253        0,
1254        0,
1255        0,
1256        MatLoad_Elemental,
1257 /*84*/ 0,
1258        0,
1259        0,
1260        0,
1261        0,
1262 /*89*/ 0,
1263        0,
1264        MatMatMultNumeric_Elemental,
1265        0,
1266        0,
1267 /*94*/ 0,
1268        0,
1269        0,
1270        MatMatTransposeMultNumeric_Elemental,
1271        0,
1272 /*99*/ MatProductSetFromOptions_Elemental,
1273        0,
1274        0,
1275        MatConjugate_Elemental,
1276        0,
1277 /*104*/0,
1278        0,
1279        0,
1280        0,
1281        0,
1282 /*109*/MatMatSolve_Elemental,
1283        0,
1284        0,
1285        0,
1286        MatMissingDiagonal_Elemental,
1287 /*114*/0,
1288        0,
1289        0,
1290        0,
1291        0,
1292 /*119*/0,
1293        MatHermitianTranspose_Elemental,
1294        0,
1295        0,
1296        0,
1297 /*124*/0,
1298        0,
1299        0,
1300        0,
1301        0,
1302 /*129*/0,
1303        0,
1304        0,
1305        0,
1306        0,
1307 /*134*/0,
1308        0,
1309        0,
1310        0,
1311        0,
1312        0,
1313 /*140*/0,
1314        0,
1315        0,
1316        0,
1317        0,
1318 /*145*/0,
1319        0,
1320        0,
1321        0,
1322        0
1323 };
1324 
1325 /*MC
1326    MATELEMENTAL = "elemental" - A matrix type for dense matrices using the Elemental package
1327 
1328   Use ./configure --download-elemental to install PETSc to use Elemental
1329 
1330   Option Database Keys:
1331 
1332    Options Database Keys:
1333 + -mat_type elemental - sets the matrix type to "elemental" during a call to MatSetFromOptions()
1334 . -pc_factor_mat_solver_type elemental - to use this direct solver with the option -pc_type lu
1335 - -mat_elemental_grid_height - sets Grid Height for 2D cyclic ordering of internal matrix
1336 
1337   Level: beginner
1338 
1339   Note:
1340    Note unlike most matrix formats, this format does not store all the matrix entries for a contiguous
1341    range of rows on an MPI rank. Use `MatGetOwnershipIS()` to determine what values are stored on
1342    the given rank.
1343 
1344 .seealso: `MATDENSE`, `MATSCALAPACK`, `MatGetOwnershipIS()`
1345 M*/
1346 
1347 PETSC_EXTERN PetscErrorCode MatCreate_Elemental(Mat A)
1348 {
1349   Mat_Elemental      *a;
1350   PetscBool          flg,flg1;
1351   Mat_Elemental_Grid *commgrid;
1352   MPI_Comm           icomm;
1353   PetscInt           optv1;
1354 
1355   PetscFunctionBegin;
1356   PetscCall(PetscMemcpy(A->ops,&MatOps_Values,sizeof(struct _MatOps)));
1357   A->insertmode = NOT_SET_VALUES;
1358 
1359   PetscCall(PetscNewLog(A,&a));
1360   A->data = (void*)a;
1361 
1362   /* Set up the elemental matrix */
1363   El::mpi::Comm cxxcomm(PetscObjectComm((PetscObject)A));
1364 
1365   /* Grid needs to be shared between multiple Mats on the same communicator, implement by attribute caching on the MPI_Comm */
1366   if (Petsc_Elemental_keyval == MPI_KEYVAL_INVALID) {
1367     PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN,MPI_COMM_NULL_DELETE_FN,&Petsc_Elemental_keyval,(void*)0));
1368     PetscCall(PetscCitationsRegister(ElementalCitation,&ElementalCite));
1369   }
1370   PetscCall(PetscCommDuplicate(cxxcomm.comm,&icomm,NULL));
1371   PetscCallMPI(MPI_Comm_get_attr(icomm,Petsc_Elemental_keyval,(void**)&commgrid,(int*)&flg));
1372   if (!flg) {
1373     PetscCall(PetscNewLog(A,&commgrid));
1374 
1375     PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"Elemental Options","Mat");
1376     /* displayed default grid sizes (CommSize,1) are set by us arbitrarily until El::Grid() is called */
1377     PetscCall(PetscOptionsInt("-mat_elemental_grid_height","Grid Height","None",El::mpi::Size(cxxcomm),&optv1,&flg1));
1378     if (flg1) {
1379       PetscCheck((El::mpi::Size(cxxcomm) % optv1) == 0,PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_INCOMP,"Grid Height %" PetscInt_FMT " must evenly divide CommSize %" PetscInt_FMT,optv1,(PetscInt)El::mpi::Size(cxxcomm));
1380       commgrid->grid = new El::Grid(cxxcomm,optv1); /* use user-provided grid height */
1381     } else {
1382       commgrid->grid = new El::Grid(cxxcomm); /* use Elemental default grid sizes */
1383       /* printf("new commgrid->grid = %p\n",commgrid->grid);  -- memory leak revealed by valgrind? */
1384     }
1385     commgrid->grid_refct = 1;
1386     PetscCallMPI(MPI_Comm_set_attr(icomm,Petsc_Elemental_keyval,(void*)commgrid));
1387 
1388     a->pivoting    = 1;
1389     PetscCall(PetscOptionsInt("-mat_elemental_pivoting","Pivoting","None",a->pivoting,&a->pivoting,NULL));
1390 
1391     PetscOptionsEnd();
1392   } else {
1393     commgrid->grid_refct++;
1394   }
1395   PetscCall(PetscCommDestroy(&icomm));
1396   a->grid        = commgrid->grid;
1397   a->emat        = new El::DistMatrix<PetscElemScalar>(*a->grid);
1398   a->roworiented = PETSC_TRUE;
1399 
1400   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",MatGetOwnershipIS_Elemental));
1401   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_elemental_mpidense_C",MatProductSetFromOptions_Elemental_MPIDense));
1402   PetscCall(PetscObjectChangeTypeName((PetscObject)A,MATELEMENTAL));
1403   PetscFunctionReturn(0);
1404 }
1405