xref: /petsc/src/mat/impls/elemental/matelem.cxx (revision ebead697dbf761eb322f829370bbe90b3bd93fa3)
1 #include <petsc/private/petscelemental.h>
2 
3 const char ElementalCitation[] = "@Article{Elemental2012,\n"
4 "  author  = {Jack Poulson and Bryan Marker and Jeff R. Hammond and Nichols A. Romero and Robert {v}an~{d}e~{G}eijn},\n"
5 "  title   = {Elemental: A New Framework for Distributed Memory Dense Matrix Computations},\n"
6 "  journal = {{ACM} Transactions on Mathematical Software},\n"
7 "  volume  = {39},\n"
8 "  number  = {2},\n"
9 "  year    = {2013}\n"
10 "}\n";
11 static PetscBool ElementalCite = PETSC_FALSE;
12 
13 /*
14     The variable Petsc_Elemental_keyval is used to indicate an MPI attribute that
15   is attached to a communicator, in this case the attribute is a Mat_Elemental_Grid
16 */
17 static PetscMPIInt Petsc_Elemental_keyval = MPI_KEYVAL_INVALID;
18 
19 static PetscErrorCode MatView_Elemental(Mat A,PetscViewer viewer)
20 {
21   Mat_Elemental  *a = (Mat_Elemental*)A->data;
22   PetscBool      iascii;
23 
24   PetscFunctionBegin;
25   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
26   if (iascii) {
27     PetscViewerFormat format;
28     PetscCall(PetscViewerGetFormat(viewer,&format));
29     if (format == PETSC_VIEWER_ASCII_INFO) {
30       /* call elemental viewing function */
31       PetscCall(PetscViewerASCIIPrintf(viewer,"Elemental run parameters:\n"));
32       PetscCall(PetscViewerASCIIPrintf(viewer,"  allocated entries=%zu\n",(*a->emat).AllocatedMemory()));
33       PetscCall(PetscViewerASCIIPrintf(viewer,"  grid height=%d, grid width=%d\n",(*a->emat).Grid().Height(),(*a->emat).Grid().Width()));
34       if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
35         /* call elemental viewing function */
36         PetscCall(PetscPrintf(PetscObjectComm((PetscObject)viewer),"test matview_elemental 2\n"));
37       }
38 
39     } else if (format == PETSC_VIEWER_DEFAULT) {
40       PetscCall(PetscViewerASCIIUseTabs(viewer,PETSC_FALSE));
41       El::Print( *a->emat, "Elemental matrix (cyclic ordering)");
42       PetscCall(PetscViewerASCIIUseTabs(viewer,PETSC_TRUE));
43       if (A->factortype == MAT_FACTOR_NONE) {
44         Mat Adense;
45         PetscCall(MatConvert(A,MATDENSE,MAT_INITIAL_MATRIX,&Adense));
46         PetscCall(MatView(Adense,viewer));
47         PetscCall(MatDestroy(&Adense));
48       }
49     } else SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Format");
50   } else {
51     /* convert to dense format and call MatView() */
52     Mat Adense;
53     PetscCall(MatConvert(A,MATDENSE,MAT_INITIAL_MATRIX,&Adense));
54     PetscCall(MatView(Adense,viewer));
55     PetscCall(MatDestroy(&Adense));
56   }
57   PetscFunctionReturn(0);
58 }
59 
60 static PetscErrorCode MatGetInfo_Elemental(Mat A,MatInfoType flag,MatInfo *info)
61 {
62   Mat_Elemental  *a = (Mat_Elemental*)A->data;
63 
64   PetscFunctionBegin;
65   info->block_size = 1.0;
66 
67   if (flag == MAT_LOCAL) {
68     info->nz_allocated   = (*a->emat).AllocatedMemory(); /* locally allocated */
69     info->nz_used        = info->nz_allocated;
70   } else if (flag == MAT_GLOBAL_MAX) {
71     //PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin)));
72     /* see MatGetInfo_MPIAIJ() for getting global info->nz_allocated! */
73     //SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP," MAT_GLOBAL_MAX not written yet");
74   } else if (flag == MAT_GLOBAL_SUM) {
75     //SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP," MAT_GLOBAL_SUM not written yet");
76     info->nz_allocated   = (*a->emat).AllocatedMemory(); /* locally allocated */
77     info->nz_used        = info->nz_allocated; /* assume Elemental does accurate allocation */
78     //PetscCall(MPIU_Allreduce(isend,irecv,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A)));
79     //PetscPrintf(PETSC_COMM_SELF,"    ... [%d] locally allocated %g\n",rank,info->nz_allocated);
80   }
81 
82   info->nz_unneeded       = 0.0;
83   info->assemblies        = A->num_ass;
84   info->mallocs           = 0;
85   info->memory            = ((PetscObject)A)->mem;
86   info->fill_ratio_given  = 0; /* determined by Elemental */
87   info->fill_ratio_needed = 0;
88   info->factor_mallocs    = 0;
89   PetscFunctionReturn(0);
90 }
91 
92 PetscErrorCode MatSetOption_Elemental(Mat A,MatOption op,PetscBool flg)
93 {
94   Mat_Elemental  *a = (Mat_Elemental*)A->data;
95 
96   PetscFunctionBegin;
97   switch (op) {
98   case MAT_NEW_NONZERO_LOCATIONS:
99   case MAT_NEW_NONZERO_LOCATION_ERR:
100   case MAT_NEW_NONZERO_ALLOCATION_ERR:
101   case MAT_SYMMETRIC:
102   case MAT_SORTED_FULL:
103   case MAT_HERMITIAN:
104     break;
105   case MAT_ROW_ORIENTED:
106     a->roworiented = flg;
107     break;
108   default:
109     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
110   }
111   PetscFunctionReturn(0);
112 }
113 
114 static PetscErrorCode MatSetValues_Elemental(Mat A,PetscInt nr,const PetscInt *rows,PetscInt nc,const PetscInt *cols,const PetscScalar *vals,InsertMode imode)
115 {
116   Mat_Elemental  *a = (Mat_Elemental*)A->data;
117   PetscInt       i,j,rrank,ridx,crank,cidx,erow,ecol,numQueues=0;
118 
119   PetscFunctionBegin;
120   // TODO: Initialize matrix to all zeros?
121 
122   // Count the number of queues from this process
123   if (a->roworiented) {
124     for (i=0; i<nr; i++) {
125       if (rows[i] < 0) continue;
126       P2RO(A,0,rows[i],&rrank,&ridx);
127       RO2E(A,0,rrank,ridx,&erow);
128       PetscCheck(rrank >= 0 && ridx >= 0 && erow >= 0,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Incorrect row translation");
129       for (j=0; j<nc; j++) {
130         if (cols[j] < 0) continue;
131         P2RO(A,1,cols[j],&crank,&cidx);
132         RO2E(A,1,crank,cidx,&ecol);
133         PetscCheck(crank >= 0 && cidx >= 0 && ecol >= 0,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Incorrect col translation");
134         if (!a->emat->IsLocal(erow,ecol)) { /* off-proc entry */
135           /* printf("Will later remotely update (%d,%d)\n",erow,ecol); */
136           PetscCheck(imode == ADD_VALUES,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only ADD_VALUES to off-processor entry is supported");
137           ++numQueues;
138           continue;
139         }
140         /* printf("Locally updating (%d,%d)\n",erow,ecol); */
141         switch (imode) {
142         case INSERT_VALUES: a->emat->Set(erow,ecol,(PetscElemScalar)vals[i*nc+j]); break;
143         case ADD_VALUES: a->emat->Update(erow,ecol,(PetscElemScalar)vals[i*nc+j]); break;
144         default: SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for InsertMode %d",(int)imode);
145         }
146       }
147     }
148 
149     /* printf("numQueues=%d\n",numQueues); */
150     a->emat->Reserve( numQueues);
151     for (i=0; i<nr; i++) {
152       if (rows[i] < 0) continue;
153       P2RO(A,0,rows[i],&rrank,&ridx);
154       RO2E(A,0,rrank,ridx,&erow);
155       for (j=0; j<nc; j++) {
156         if (cols[j] < 0) continue;
157         P2RO(A,1,cols[j],&crank,&cidx);
158         RO2E(A,1,crank,cidx,&ecol);
159         if (!a->emat->IsLocal(erow,ecol)) { /*off-proc entry*/
160           /* printf("Queueing remotely update of (%d,%d)\n",erow,ecol); */
161           a->emat->QueueUpdate( erow, ecol, vals[i*nc+j]);
162         }
163       }
164     }
165   } else { /* columnoriented */
166     for (j=0; j<nc; j++) {
167       if (cols[j] < 0) continue;
168       P2RO(A,1,cols[j],&crank,&cidx);
169       RO2E(A,1,crank,cidx,&ecol);
170       PetscCheck(crank >= 0 && cidx >= 0 && ecol >= 0,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Incorrect col translation");
171       for (i=0; i<nr; i++) {
172         if (rows[i] < 0) continue;
173         P2RO(A,0,rows[i],&rrank,&ridx);
174         RO2E(A,0,rrank,ridx,&erow);
175         PetscCheck(rrank >= 0 && ridx >= 0 && erow >= 0,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Incorrect row translation");
176         if (!a->emat->IsLocal(erow,ecol)) { /* off-proc entry */
177           /* printf("Will later remotely update (%d,%d)\n",erow,ecol); */
178           PetscCheck(imode == ADD_VALUES,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only ADD_VALUES to off-processor entry is supported");
179           ++numQueues;
180           continue;
181         }
182         /* printf("Locally updating (%d,%d)\n",erow,ecol); */
183         switch (imode) {
184         case INSERT_VALUES: a->emat->Set(erow,ecol,(PetscElemScalar)vals[i+j*nr]); break;
185         case ADD_VALUES: a->emat->Update(erow,ecol,(PetscElemScalar)vals[i+j*nr]); break;
186         default: SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for InsertMode %d",(int)imode);
187         }
188       }
189     }
190 
191     /* printf("numQueues=%d\n",numQueues); */
192     a->emat->Reserve( numQueues);
193     for (j=0; j<nc; j++) {
194       if (cols[j] < 0) continue;
195       P2RO(A,1,cols[j],&crank,&cidx);
196       RO2E(A,1,crank,cidx,&ecol);
197 
198       for (i=0; i<nr; i++) {
199         if (rows[i] < 0) continue;
200         P2RO(A,0,rows[i],&rrank,&ridx);
201         RO2E(A,0,rrank,ridx,&erow);
202         if (!a->emat->IsLocal(erow,ecol)) { /*off-proc entry*/
203           /* printf("Queueing remotely update of (%d,%d)\n",erow,ecol); */
204           a->emat->QueueUpdate( erow, ecol, vals[i+j*nr]);
205         }
206       }
207     }
208   }
209   PetscFunctionReturn(0);
210 }
211 
212 static PetscErrorCode MatMult_Elemental(Mat A,Vec X,Vec Y)
213 {
214   Mat_Elemental         *a = (Mat_Elemental*)A->data;
215   const PetscElemScalar *x;
216   PetscElemScalar       *y;
217   PetscElemScalar       one = 1,zero = 0;
218 
219   PetscFunctionBegin;
220   PetscCall(VecGetArrayRead(X,(const PetscScalar **)&x));
221   PetscCall(VecGetArray(Y,(PetscScalar **)&y));
222   { /* Scoping so that constructor is called before pointer is returned */
223     El::DistMatrix<PetscElemScalar,El::VC,El::STAR> xe, ye;
224     xe.LockedAttach(A->cmap->N,1,*a->grid,0,0,x,A->cmap->n);
225     ye.Attach(A->rmap->N,1,*a->grid,0,0,y,A->rmap->n);
226     El::Gemv(El::NORMAL,one,*a->emat,xe,zero,ye);
227   }
228   PetscCall(VecRestoreArrayRead(X,(const PetscScalar **)&x));
229   PetscCall(VecRestoreArray(Y,(PetscScalar **)&y));
230   PetscFunctionReturn(0);
231 }
232 
233 static PetscErrorCode MatMultTranspose_Elemental(Mat A,Vec X,Vec Y)
234 {
235   Mat_Elemental         *a = (Mat_Elemental*)A->data;
236   const PetscElemScalar *x;
237   PetscElemScalar       *y;
238   PetscElemScalar       one = 1,zero = 0;
239 
240   PetscFunctionBegin;
241   PetscCall(VecGetArrayRead(X,(const PetscScalar **)&x));
242   PetscCall(VecGetArray(Y,(PetscScalar **)&y));
243   { /* Scoping so that constructor is called before pointer is returned */
244     El::DistMatrix<PetscElemScalar,El::VC,El::STAR> xe, ye;
245     xe.LockedAttach(A->rmap->N,1,*a->grid,0,0,x,A->rmap->n);
246     ye.Attach(A->cmap->N,1,*a->grid,0,0,y,A->cmap->n);
247     El::Gemv(El::TRANSPOSE,one,*a->emat,xe,zero,ye);
248   }
249   PetscCall(VecRestoreArrayRead(X,(const PetscScalar **)&x));
250   PetscCall(VecRestoreArray(Y,(PetscScalar **)&y));
251   PetscFunctionReturn(0);
252 }
253 
254 static PetscErrorCode MatMultAdd_Elemental(Mat A,Vec X,Vec Y,Vec Z)
255 {
256   Mat_Elemental         *a = (Mat_Elemental*)A->data;
257   const PetscElemScalar *x;
258   PetscElemScalar       *z;
259   PetscElemScalar       one = 1;
260 
261   PetscFunctionBegin;
262   if (Y != Z) PetscCall(VecCopy(Y,Z));
263   PetscCall(VecGetArrayRead(X,(const PetscScalar **)&x));
264   PetscCall(VecGetArray(Z,(PetscScalar **)&z));
265   { /* Scoping so that constructor is called before pointer is returned */
266     El::DistMatrix<PetscElemScalar,El::VC,El::STAR> xe, ze;
267     xe.LockedAttach(A->cmap->N,1,*a->grid,0,0,x,A->cmap->n);
268     ze.Attach(A->rmap->N,1,*a->grid,0,0,z,A->rmap->n);
269     El::Gemv(El::NORMAL,one,*a->emat,xe,one,ze);
270   }
271   PetscCall(VecRestoreArrayRead(X,(const PetscScalar **)&x));
272   PetscCall(VecRestoreArray(Z,(PetscScalar **)&z));
273   PetscFunctionReturn(0);
274 }
275 
276 static PetscErrorCode MatMultTransposeAdd_Elemental(Mat A,Vec X,Vec Y,Vec Z)
277 {
278   Mat_Elemental         *a = (Mat_Elemental*)A->data;
279   const PetscElemScalar *x;
280   PetscElemScalar       *z;
281   PetscElemScalar       one = 1;
282 
283   PetscFunctionBegin;
284   if (Y != Z) PetscCall(VecCopy(Y,Z));
285   PetscCall(VecGetArrayRead(X,(const PetscScalar **)&x));
286   PetscCall(VecGetArray(Z,(PetscScalar **)&z));
287   { /* Scoping so that constructor is called before pointer is returned */
288     El::DistMatrix<PetscElemScalar,El::VC,El::STAR> xe, ze;
289     xe.LockedAttach(A->rmap->N,1,*a->grid,0,0,x,A->rmap->n);
290     ze.Attach(A->cmap->N,1,*a->grid,0,0,z,A->cmap->n);
291     El::Gemv(El::TRANSPOSE,one,*a->emat,xe,one,ze);
292   }
293   PetscCall(VecRestoreArrayRead(X,(const PetscScalar **)&x));
294   PetscCall(VecRestoreArray(Z,(PetscScalar **)&z));
295   PetscFunctionReturn(0);
296 }
297 
298 PetscErrorCode MatMatMultNumeric_Elemental(Mat A,Mat B,Mat C)
299 {
300   Mat_Elemental    *a = (Mat_Elemental*)A->data;
301   Mat_Elemental    *b = (Mat_Elemental*)B->data;
302   Mat_Elemental    *c = (Mat_Elemental*)C->data;
303   PetscElemScalar  one = 1,zero = 0;
304 
305   PetscFunctionBegin;
306   { /* Scoping so that constructor is called before pointer is returned */
307     El::Gemm(El::NORMAL,El::NORMAL,one,*a->emat,*b->emat,zero,*c->emat);
308   }
309   C->assembled = PETSC_TRUE;
310   PetscFunctionReturn(0);
311 }
312 
313 PetscErrorCode MatMatMultSymbolic_Elemental(Mat A,Mat B,PetscReal fill,Mat Ce)
314 {
315   PetscFunctionBegin;
316   PetscCall(MatSetSizes(Ce,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE));
317   PetscCall(MatSetType(Ce,MATELEMENTAL));
318   PetscCall(MatSetUp(Ce));
319   Ce->ops->matmultnumeric = MatMatMultNumeric_Elemental;
320   PetscFunctionReturn(0);
321 }
322 
323 static PetscErrorCode MatMatTransposeMultNumeric_Elemental(Mat A,Mat B,Mat C)
324 {
325   Mat_Elemental      *a = (Mat_Elemental*)A->data;
326   Mat_Elemental      *b = (Mat_Elemental*)B->data;
327   Mat_Elemental      *c = (Mat_Elemental*)C->data;
328   PetscElemScalar    one = 1,zero = 0;
329 
330   PetscFunctionBegin;
331   { /* Scoping so that constructor is called before pointer is returned */
332     El::Gemm(El::NORMAL,El::TRANSPOSE,one,*a->emat,*b->emat,zero,*c->emat);
333   }
334   C->assembled = PETSC_TRUE;
335   PetscFunctionReturn(0);
336 }
337 
338 static PetscErrorCode MatMatTransposeMultSymbolic_Elemental(Mat A,Mat B,PetscReal fill,Mat C)
339 {
340   PetscFunctionBegin;
341   PetscCall(MatSetSizes(C,A->rmap->n,B->rmap->n,PETSC_DECIDE,PETSC_DECIDE));
342   PetscCall(MatSetType(C,MATELEMENTAL));
343   PetscCall(MatSetUp(C));
344   PetscFunctionReturn(0);
345 }
346 
347 /* --------------------------------------- */
348 static PetscErrorCode MatProductSetFromOptions_Elemental_AB(Mat C)
349 {
350   PetscFunctionBegin;
351   C->ops->matmultsymbolic = MatMatMultSymbolic_Elemental;
352   C->ops->productsymbolic = MatProductSymbolic_AB;
353   PetscFunctionReturn(0);
354 }
355 
356 static PetscErrorCode MatProductSetFromOptions_Elemental_ABt(Mat C)
357 {
358   PetscFunctionBegin;
359   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_Elemental;
360   C->ops->productsymbolic          = MatProductSymbolic_ABt;
361   PetscFunctionReturn(0);
362 }
363 
364 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_Elemental(Mat C)
365 {
366   Mat_Product    *product = C->product;
367 
368   PetscFunctionBegin;
369   switch (product->type) {
370   case MATPRODUCT_AB:
371     PetscCall(MatProductSetFromOptions_Elemental_AB(C));
372     break;
373   case MATPRODUCT_ABt:
374     PetscCall(MatProductSetFromOptions_Elemental_ABt(C));
375     break;
376   default:
377     break;
378   }
379   PetscFunctionReturn(0);
380 }
381 
382 PetscErrorCode MatMatMultNumeric_Elemental_MPIDense(Mat A,Mat B,Mat C)
383 {
384   Mat            Be,Ce;
385 
386   PetscFunctionBegin;
387   PetscCall(MatConvert(B,MATELEMENTAL,MAT_INITIAL_MATRIX,&Be));
388   PetscCall(MatMatMult(A,Be,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ce));
389   PetscCall(MatConvert(Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C));
390   PetscCall(MatDestroy(&Be));
391   PetscCall(MatDestroy(&Ce));
392   PetscFunctionReturn(0);
393 }
394 
395 PetscErrorCode MatMatMultSymbolic_Elemental_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)
396 {
397   PetscFunctionBegin;
398   PetscCall(MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE));
399   PetscCall(MatSetType(C,MATMPIDENSE));
400   PetscCall(MatSetUp(C));
401   C->ops->matmultnumeric = MatMatMultNumeric_Elemental_MPIDense;
402   PetscFunctionReturn(0);
403 }
404 
405 PetscErrorCode MatProductSetFromOptions_Elemental_MPIDense_AB(Mat C)
406 {
407   PetscFunctionBegin;
408   C->ops->matmultsymbolic = MatMatMultSymbolic_Elemental_MPIDense;
409   C->ops->productsymbolic = MatProductSymbolic_AB;
410   PetscFunctionReturn(0);
411 }
412 
413 PetscErrorCode MatProductSetFromOptions_Elemental_MPIDense(Mat C)
414 {
415   Mat_Product    *product = C->product;
416 
417   PetscFunctionBegin;
418   if (product->type == MATPRODUCT_AB) {
419     PetscCall(MatProductSetFromOptions_Elemental_MPIDense_AB(C));
420   }
421   PetscFunctionReturn(0);
422 }
423 /* --------------------------------------- */
424 
425 static PetscErrorCode MatGetDiagonal_Elemental(Mat A,Vec D)
426 {
427   PetscInt        i,nrows,ncols,nD,rrank,ridx,crank,cidx;
428   Mat_Elemental   *a = (Mat_Elemental*)A->data;
429   PetscElemScalar v;
430   MPI_Comm        comm;
431 
432   PetscFunctionBegin;
433   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
434   PetscCall(MatGetSize(A,&nrows,&ncols));
435   nD = nrows>ncols ? ncols : nrows;
436   for (i=0; i<nD; i++) {
437     PetscInt erow,ecol;
438     P2RO(A,0,i,&rrank,&ridx);
439     RO2E(A,0,rrank,ridx,&erow);
440     PetscCheck(rrank >= 0 && ridx >= 0 && erow >= 0,comm,PETSC_ERR_PLIB,"Incorrect row translation");
441     P2RO(A,1,i,&crank,&cidx);
442     RO2E(A,1,crank,cidx,&ecol);
443     PetscCheck(crank >= 0 && cidx >= 0 && ecol >= 0,comm,PETSC_ERR_PLIB,"Incorrect col translation");
444     v = a->emat->Get(erow,ecol);
445     PetscCall(VecSetValues(D,1,&i,(PetscScalar*)&v,INSERT_VALUES));
446   }
447   PetscCall(VecAssemblyBegin(D));
448   PetscCall(VecAssemblyEnd(D));
449   PetscFunctionReturn(0);
450 }
451 
452 static PetscErrorCode MatDiagonalScale_Elemental(Mat X,Vec L,Vec R)
453 {
454   Mat_Elemental         *x = (Mat_Elemental*)X->data;
455   const PetscElemScalar *d;
456 
457   PetscFunctionBegin;
458   if (R) {
459     PetscCall(VecGetArrayRead(R,(const PetscScalar **)&d));
460     El::DistMatrix<PetscElemScalar,El::VC,El::STAR> de;
461     de.LockedAttach(X->cmap->N,1,*x->grid,0,0,d,X->cmap->n);
462     El::DiagonalScale(El::RIGHT,El::NORMAL,de,*x->emat);
463     PetscCall(VecRestoreArrayRead(R,(const PetscScalar **)&d));
464   }
465   if (L) {
466     PetscCall(VecGetArrayRead(L,(const PetscScalar **)&d));
467     El::DistMatrix<PetscElemScalar,El::VC,El::STAR> de;
468     de.LockedAttach(X->rmap->N,1,*x->grid,0,0,d,X->rmap->n);
469     El::DiagonalScale(El::LEFT,El::NORMAL,de,*x->emat);
470     PetscCall(VecRestoreArrayRead(L,(const PetscScalar **)&d));
471   }
472   PetscFunctionReturn(0);
473 }
474 
475 static PetscErrorCode MatMissingDiagonal_Elemental(Mat A,PetscBool *missing,PetscInt *d)
476 {
477   PetscFunctionBegin;
478   *missing = PETSC_FALSE;
479   PetscFunctionReturn(0);
480 }
481 
482 static PetscErrorCode MatScale_Elemental(Mat X,PetscScalar a)
483 {
484   Mat_Elemental  *x = (Mat_Elemental*)X->data;
485 
486   PetscFunctionBegin;
487   El::Scale((PetscElemScalar)a,*x->emat);
488   PetscFunctionReturn(0);
489 }
490 
491 /*
492   MatAXPY - Computes Y = a*X + Y.
493 */
494 static PetscErrorCode MatAXPY_Elemental(Mat Y,PetscScalar a,Mat X,MatStructure str)
495 {
496   Mat_Elemental  *x = (Mat_Elemental*)X->data;
497   Mat_Elemental  *y = (Mat_Elemental*)Y->data;
498 
499   PetscFunctionBegin;
500   El::Axpy((PetscElemScalar)a,*x->emat,*y->emat);
501   PetscCall(PetscObjectStateIncrease((PetscObject)Y));
502   PetscFunctionReturn(0);
503 }
504 
505 static PetscErrorCode MatCopy_Elemental(Mat A,Mat B,MatStructure str)
506 {
507   Mat_Elemental *a=(Mat_Elemental*)A->data;
508   Mat_Elemental *b=(Mat_Elemental*)B->data;
509 
510   PetscFunctionBegin;
511   El::Copy(*a->emat,*b->emat);
512   PetscCall(PetscObjectStateIncrease((PetscObject)B));
513   PetscFunctionReturn(0);
514 }
515 
516 static PetscErrorCode MatDuplicate_Elemental(Mat A,MatDuplicateOption op,Mat *B)
517 {
518   Mat            Be;
519   MPI_Comm       comm;
520   Mat_Elemental  *a=(Mat_Elemental*)A->data;
521 
522   PetscFunctionBegin;
523   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
524   PetscCall(MatCreate(comm,&Be));
525   PetscCall(MatSetSizes(Be,A->rmap->n,A->cmap->n,PETSC_DECIDE,PETSC_DECIDE));
526   PetscCall(MatSetType(Be,MATELEMENTAL));
527   PetscCall(MatSetUp(Be));
528   *B = Be;
529   if (op == MAT_COPY_VALUES) {
530     Mat_Elemental *b=(Mat_Elemental*)Be->data;
531     El::Copy(*a->emat,*b->emat);
532   }
533   Be->assembled = PETSC_TRUE;
534   PetscFunctionReturn(0);
535 }
536 
537 static PetscErrorCode MatTranspose_Elemental(Mat A,MatReuse reuse,Mat *B)
538 {
539   Mat            Be = *B;
540   MPI_Comm       comm;
541   Mat_Elemental  *a = (Mat_Elemental*)A->data, *b;
542 
543   PetscFunctionBegin;
544   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A,*B));
545   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
546   /* Only out-of-place supported */
547   PetscCheck(reuse != MAT_INPLACE_MATRIX,comm,PETSC_ERR_SUP,"Only out-of-place supported");
548   if (reuse == MAT_INITIAL_MATRIX) {
549     PetscCall(MatCreate(comm,&Be));
550     PetscCall(MatSetSizes(Be,A->cmap->n,A->rmap->n,PETSC_DECIDE,PETSC_DECIDE));
551     PetscCall(MatSetType(Be,MATELEMENTAL));
552     PetscCall(MatSetUp(Be));
553     *B = Be;
554   }
555   b = (Mat_Elemental*)Be->data;
556   El::Transpose(*a->emat,*b->emat);
557   Be->assembled = PETSC_TRUE;
558   PetscFunctionReturn(0);
559 }
560 
561 static PetscErrorCode MatConjugate_Elemental(Mat A)
562 {
563   Mat_Elemental  *a = (Mat_Elemental*)A->data;
564 
565   PetscFunctionBegin;
566   El::Conjugate(*a->emat);
567   PetscFunctionReturn(0);
568 }
569 
570 static PetscErrorCode MatHermitianTranspose_Elemental(Mat A,MatReuse reuse,Mat *B)
571 {
572   Mat            Be = *B;
573   MPI_Comm       comm;
574   Mat_Elemental  *a = (Mat_Elemental*)A->data, *b;
575 
576   PetscFunctionBegin;
577   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
578   /* Only out-of-place supported */
579   if (reuse == MAT_INITIAL_MATRIX) {
580     PetscCall(MatCreate(comm,&Be));
581     PetscCall(MatSetSizes(Be,A->cmap->n,A->rmap->n,PETSC_DECIDE,PETSC_DECIDE));
582     PetscCall(MatSetType(Be,MATELEMENTAL));
583     PetscCall(MatSetUp(Be));
584     *B = Be;
585   }
586   b = (Mat_Elemental*)Be->data;
587   El::Adjoint(*a->emat,*b->emat);
588   Be->assembled = PETSC_TRUE;
589   PetscFunctionReturn(0);
590 }
591 
592 static PetscErrorCode MatSolve_Elemental(Mat A,Vec B,Vec X)
593 {
594   Mat_Elemental     *a = (Mat_Elemental*)A->data;
595   PetscElemScalar   *x;
596   PetscInt          pivoting = a->pivoting;
597 
598   PetscFunctionBegin;
599   PetscCall(VecCopy(B,X));
600   PetscCall(VecGetArray(X,(PetscScalar **)&x));
601 
602   El::DistMatrix<PetscElemScalar,El::VC,El::STAR> xe;
603   xe.Attach(A->rmap->N,1,*a->grid,0,0,x,A->rmap->n);
604   El::DistMatrix<PetscElemScalar,El::MC,El::MR> xer(xe);
605   switch (A->factortype) {
606   case MAT_FACTOR_LU:
607     if (pivoting == 0) {
608       El::lu::SolveAfter(El::NORMAL,*a->emat,xer);
609     } else if (pivoting == 1) {
610       El::lu::SolveAfter(El::NORMAL,*a->emat,*a->P,xer);
611     } else { /* pivoting == 2 */
612       El::lu::SolveAfter(El::NORMAL,*a->emat,*a->P,*a->Q,xer);
613     }
614     break;
615   case MAT_FACTOR_CHOLESKY:
616     El::cholesky::SolveAfter(El::UPPER,El::NORMAL,*a->emat,xer);
617     break;
618   default:
619     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType");
620     break;
621   }
622   El::Copy(xer,xe);
623 
624   PetscCall(VecRestoreArray(X,(PetscScalar **)&x));
625   PetscFunctionReturn(0);
626 }
627 
628 static PetscErrorCode MatSolveAdd_Elemental(Mat A,Vec B,Vec Y,Vec X)
629 {
630   PetscFunctionBegin;
631   PetscCall(MatSolve_Elemental(A,B,X));
632   PetscCall(VecAXPY(X,1,Y));
633   PetscFunctionReturn(0);
634 }
635 
636 static PetscErrorCode MatMatSolve_Elemental(Mat A,Mat B,Mat X)
637 {
638   Mat_Elemental  *a = (Mat_Elemental*)A->data;
639   Mat_Elemental  *x;
640   Mat            C;
641   PetscInt       pivoting = a->pivoting;
642   PetscBool      flg;
643   MatType        type;
644 
645   PetscFunctionBegin;
646   PetscCall(MatGetType(X,&type));
647   PetscCall(PetscStrcmp(type,MATELEMENTAL,&flg));
648   if (!flg) {
649     PetscCall(MatConvert(B,MATELEMENTAL,MAT_INITIAL_MATRIX,&C));
650     x = (Mat_Elemental*)C->data;
651   } else {
652     x = (Mat_Elemental*)X->data;
653     El::Copy(*((Mat_Elemental*)B->data)->emat,*x->emat);
654   }
655   switch (A->factortype) {
656   case MAT_FACTOR_LU:
657     if (pivoting == 0) {
658       El::lu::SolveAfter(El::NORMAL,*a->emat,*x->emat);
659     } else if (pivoting == 1) {
660       El::lu::SolveAfter(El::NORMAL,*a->emat,*a->P,*x->emat);
661     } else {
662       El::lu::SolveAfter(El::NORMAL,*a->emat,*a->P,*a->Q,*x->emat);
663     }
664     break;
665   case MAT_FACTOR_CHOLESKY:
666     El::cholesky::SolveAfter(El::UPPER,El::NORMAL,*a->emat,*x->emat);
667     break;
668   default:
669     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType");
670     break;
671   }
672   if (!flg) {
673     PetscCall(MatConvert(C,type,MAT_REUSE_MATRIX,&X));
674     PetscCall(MatDestroy(&C));
675   }
676   PetscFunctionReturn(0);
677 }
678 
679 static PetscErrorCode MatLUFactor_Elemental(Mat A,IS row,IS col,const MatFactorInfo *info)
680 {
681   Mat_Elemental  *a = (Mat_Elemental*)A->data;
682   PetscInt       pivoting = a->pivoting;
683 
684   PetscFunctionBegin;
685   if (pivoting == 0) {
686     El::LU(*a->emat);
687   } else if (pivoting == 1) {
688     El::LU(*a->emat,*a->P);
689   } else {
690     El::LU(*a->emat,*a->P,*a->Q);
691   }
692   A->factortype = MAT_FACTOR_LU;
693   A->assembled  = PETSC_TRUE;
694 
695   PetscCall(PetscFree(A->solvertype));
696   PetscCall(PetscStrallocpy(MATSOLVERELEMENTAL,&A->solvertype));
697   PetscFunctionReturn(0);
698 }
699 
700 static PetscErrorCode  MatLUFactorNumeric_Elemental(Mat F,Mat A,const MatFactorInfo *info)
701 {
702   PetscFunctionBegin;
703   PetscCall(MatCopy(A,F,SAME_NONZERO_PATTERN));
704   PetscCall(MatLUFactor_Elemental(F,0,0,info));
705   PetscFunctionReturn(0);
706 }
707 
708 static PetscErrorCode  MatLUFactorSymbolic_Elemental(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
709 {
710   PetscFunctionBegin;
711   /* F is created and allocated by MatGetFactor_elemental_petsc(), skip this routine. */
712   PetscFunctionReturn(0);
713 }
714 
715 static PetscErrorCode MatCholeskyFactor_Elemental(Mat A,IS perm,const MatFactorInfo *info)
716 {
717   Mat_Elemental  *a = (Mat_Elemental*)A->data;
718   El::DistMatrix<PetscElemScalar,El::MC,El::STAR> d;
719 
720   PetscFunctionBegin;
721   El::Cholesky(El::UPPER,*a->emat);
722   A->factortype = MAT_FACTOR_CHOLESKY;
723   A->assembled  = PETSC_TRUE;
724 
725   PetscCall(PetscFree(A->solvertype));
726   PetscCall(PetscStrallocpy(MATSOLVERELEMENTAL,&A->solvertype));
727   PetscFunctionReturn(0);
728 }
729 
730 static PetscErrorCode MatCholeskyFactorNumeric_Elemental(Mat F,Mat A,const MatFactorInfo *info)
731 {
732   PetscFunctionBegin;
733   PetscCall(MatCopy(A,F,SAME_NONZERO_PATTERN));
734   PetscCall(MatCholeskyFactor_Elemental(F,0,info));
735   PetscFunctionReturn(0);
736 }
737 
738 static PetscErrorCode MatCholeskyFactorSymbolic_Elemental(Mat F,Mat A,IS perm,const MatFactorInfo *info)
739 {
740   PetscFunctionBegin;
741   /* F is created and allocated by MatGetFactor_elemental_petsc(), skip this routine. */
742   PetscFunctionReturn(0);
743 }
744 
745 PetscErrorCode MatFactorGetSolverType_elemental_elemental(Mat A,MatSolverType *type)
746 {
747   PetscFunctionBegin;
748   *type = MATSOLVERELEMENTAL;
749   PetscFunctionReturn(0);
750 }
751 
752 static PetscErrorCode MatGetFactor_elemental_elemental(Mat A,MatFactorType ftype,Mat *F)
753 {
754   Mat            B;
755 
756   PetscFunctionBegin;
757   /* Create the factorization matrix */
758   PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B));
759   PetscCall(MatSetSizes(B,A->rmap->n,A->cmap->n,PETSC_DECIDE,PETSC_DECIDE));
760   PetscCall(MatSetType(B,MATELEMENTAL));
761   PetscCall(MatSetUp(B));
762   B->factortype = ftype;
763   B->trivialsymbolic = PETSC_TRUE;
764   PetscCall(PetscFree(B->solvertype));
765   PetscCall(PetscStrallocpy(MATSOLVERELEMENTAL,&B->solvertype));
766 
767   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_elemental_elemental));
768   *F            = B;
769   PetscFunctionReturn(0);
770 }
771 
772 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_Elemental(void)
773 {
774   PetscFunctionBegin;
775   PetscCall(MatSolverTypeRegister(MATSOLVERELEMENTAL,MATELEMENTAL,        MAT_FACTOR_LU,MatGetFactor_elemental_elemental));
776   PetscCall(MatSolverTypeRegister(MATSOLVERELEMENTAL,MATELEMENTAL,        MAT_FACTOR_CHOLESKY,MatGetFactor_elemental_elemental));
777   PetscFunctionReturn(0);
778 }
779 
780 static PetscErrorCode MatNorm_Elemental(Mat A,NormType type,PetscReal *nrm)
781 {
782   Mat_Elemental *a=(Mat_Elemental*)A->data;
783 
784   PetscFunctionBegin;
785   switch (type) {
786   case NORM_1:
787     *nrm = El::OneNorm(*a->emat);
788     break;
789   case NORM_FROBENIUS:
790     *nrm = El::FrobeniusNorm(*a->emat);
791     break;
792   case NORM_INFINITY:
793     *nrm = El::InfinityNorm(*a->emat);
794     break;
795   default:
796     SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Unsupported norm type");
797   }
798   PetscFunctionReturn(0);
799 }
800 
801 static PetscErrorCode MatZeroEntries_Elemental(Mat A)
802 {
803   Mat_Elemental *a=(Mat_Elemental*)A->data;
804 
805   PetscFunctionBegin;
806   El::Zero(*a->emat);
807   PetscFunctionReturn(0);
808 }
809 
810 static PetscErrorCode MatGetOwnershipIS_Elemental(Mat A,IS *rows,IS *cols)
811 {
812   Mat_Elemental  *a = (Mat_Elemental*)A->data;
813   PetscInt       i,m,shift,stride,*idx;
814 
815   PetscFunctionBegin;
816   if (rows) {
817     m = a->emat->LocalHeight();
818     shift = a->emat->ColShift();
819     stride = a->emat->ColStride();
820     PetscCall(PetscMalloc1(m,&idx));
821     for (i=0; i<m; i++) {
822       PetscInt rank,offset;
823       E2RO(A,0,shift+i*stride,&rank,&offset);
824       RO2P(A,0,rank,offset,&idx[i]);
825     }
826     PetscCall(ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,rows));
827   }
828   if (cols) {
829     m = a->emat->LocalWidth();
830     shift = a->emat->RowShift();
831     stride = a->emat->RowStride();
832     PetscCall(PetscMalloc1(m,&idx));
833     for (i=0; i<m; i++) {
834       PetscInt rank,offset;
835       E2RO(A,1,shift+i*stride,&rank,&offset);
836       RO2P(A,1,rank,offset,&idx[i]);
837     }
838     PetscCall(ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,cols));
839   }
840   PetscFunctionReturn(0);
841 }
842 
843 static PetscErrorCode MatConvert_Elemental_Dense(Mat A,MatType newtype,MatReuse reuse,Mat *B)
844 {
845   Mat                Bmpi;
846   Mat_Elemental      *a = (Mat_Elemental*)A->data;
847   MPI_Comm           comm;
848   IS                 isrows,iscols;
849   PetscInt           rrank,ridx,crank,cidx,nrows,ncols,i,j,erow,ecol,elrow,elcol;
850   const PetscInt     *rows,*cols;
851   PetscElemScalar    v;
852   const El::Grid     &grid = a->emat->Grid();
853 
854   PetscFunctionBegin;
855   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
856 
857   if (reuse == MAT_REUSE_MATRIX) {
858     Bmpi = *B;
859   } else {
860     PetscCall(MatCreate(comm,&Bmpi));
861     PetscCall(MatSetSizes(Bmpi,A->rmap->n,A->cmap->n,PETSC_DECIDE,PETSC_DECIDE));
862     PetscCall(MatSetType(Bmpi,MATDENSE));
863     PetscCall(MatSetUp(Bmpi));
864   }
865 
866   /* Get local entries of A */
867   PetscCall(MatGetOwnershipIS(A,&isrows,&iscols));
868   PetscCall(ISGetLocalSize(isrows,&nrows));
869   PetscCall(ISGetIndices(isrows,&rows));
870   PetscCall(ISGetLocalSize(iscols,&ncols));
871   PetscCall(ISGetIndices(iscols,&cols));
872 
873   if (a->roworiented) {
874     for (i=0; i<nrows; i++) {
875       P2RO(A,0,rows[i],&rrank,&ridx); /* convert indices between PETSc <-> (Rank,Offset) <-> Elemental */
876       RO2E(A,0,rrank,ridx,&erow);
877       PetscCheck(rrank >= 0 && ridx >= 0 && erow >= 0,comm,PETSC_ERR_PLIB,"Incorrect row translation");
878       for (j=0; j<ncols; j++) {
879         P2RO(A,1,cols[j],&crank,&cidx);
880         RO2E(A,1,crank,cidx,&ecol);
881         PetscCheck(crank >= 0 && cidx >= 0 && ecol >= 0,comm,PETSC_ERR_PLIB,"Incorrect col translation");
882 
883         elrow = erow / grid.MCSize(); /* Elemental local row index */
884         elcol = ecol / grid.MRSize(); /* Elemental local column index */
885         v = a->emat->GetLocal(elrow,elcol);
886         PetscCall(MatSetValues(Bmpi,1,&rows[i],1,&cols[j],(PetscScalar *)&v,INSERT_VALUES));
887       }
888     }
889   } else { /* column-oriented */
890     for (j=0; j<ncols; j++) {
891       P2RO(A,1,cols[j],&crank,&cidx);
892       RO2E(A,1,crank,cidx,&ecol);
893       PetscCheck(crank >= 0 && cidx >= 0 && ecol >= 0,comm,PETSC_ERR_PLIB,"Incorrect col translation");
894       for (i=0; i<nrows; i++) {
895         P2RO(A,0,rows[i],&rrank,&ridx); /* convert indices between PETSc <-> (Rank,Offset) <-> Elemental */
896         RO2E(A,0,rrank,ridx,&erow);
897         PetscCheck(rrank >= 0 && ridx >= 0 && erow >= 0,comm,PETSC_ERR_PLIB,"Incorrect row translation");
898 
899         elrow = erow / grid.MCSize(); /* Elemental local row index */
900         elcol = ecol / grid.MRSize(); /* Elemental local column index */
901         v = a->emat->GetLocal(elrow,elcol);
902         PetscCall(MatSetValues(Bmpi,1,&rows[i],1,&cols[j],(PetscScalar *)&v,INSERT_VALUES));
903       }
904     }
905   }
906   PetscCall(MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY));
907   PetscCall(MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY));
908   if (reuse == MAT_INPLACE_MATRIX) {
909     PetscCall(MatHeaderReplace(A,&Bmpi));
910   } else {
911     *B = Bmpi;
912   }
913   PetscCall(ISDestroy(&isrows));
914   PetscCall(ISDestroy(&iscols));
915   PetscFunctionReturn(0);
916 }
917 
918 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
919 {
920   Mat               mat_elemental;
921   PetscInt          M=A->rmap->N,N=A->cmap->N,row,ncols;
922   const PetscInt    *cols;
923   const PetscScalar *vals;
924 
925   PetscFunctionBegin;
926   if (reuse == MAT_REUSE_MATRIX) {
927     mat_elemental = *newmat;
928     PetscCall(MatZeroEntries(mat_elemental));
929   } else {
930     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental));
931     PetscCall(MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,M,N));
932     PetscCall(MatSetType(mat_elemental,MATELEMENTAL));
933     PetscCall(MatSetUp(mat_elemental));
934   }
935   for (row=0; row<M; row++) {
936     PetscCall(MatGetRow(A,row,&ncols,&cols,&vals));
937     /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
938     PetscCall(MatSetValues(mat_elemental,1,&row,ncols,cols,vals,ADD_VALUES));
939     PetscCall(MatRestoreRow(A,row,&ncols,&cols,&vals));
940   }
941   PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY));
942   PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY));
943 
944   if (reuse == MAT_INPLACE_MATRIX) {
945     PetscCall(MatHeaderReplace(A,&mat_elemental));
946   } else {
947     *newmat = mat_elemental;
948   }
949   PetscFunctionReturn(0);
950 }
951 
952 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
953 {
954   Mat               mat_elemental;
955   PetscInt          row,ncols,rstart=A->rmap->rstart,rend=A->rmap->rend,j;
956   const PetscInt    *cols;
957   const PetscScalar *vals;
958 
959   PetscFunctionBegin;
960   if (reuse == MAT_REUSE_MATRIX) {
961     mat_elemental = *newmat;
962     PetscCall(MatZeroEntries(mat_elemental));
963   } else {
964     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental));
965     PetscCall(MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N));
966     PetscCall(MatSetType(mat_elemental,MATELEMENTAL));
967     PetscCall(MatSetUp(mat_elemental));
968   }
969   for (row=rstart; row<rend; row++) {
970     PetscCall(MatGetRow(A,row,&ncols,&cols,&vals));
971     for (j=0; j<ncols; j++) {
972       /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
973       PetscCall(MatSetValues(mat_elemental,1,&row,1,&cols[j],&vals[j],ADD_VALUES));
974     }
975     PetscCall(MatRestoreRow(A,row,&ncols,&cols,&vals));
976   }
977   PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY));
978   PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY));
979 
980   if (reuse == MAT_INPLACE_MATRIX) {
981     PetscCall(MatHeaderReplace(A,&mat_elemental));
982   } else {
983     *newmat = mat_elemental;
984   }
985   PetscFunctionReturn(0);
986 }
987 
988 PETSC_INTERN PetscErrorCode MatConvert_SeqSBAIJ_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
989 {
990   Mat               mat_elemental;
991   PetscInt          M=A->rmap->N,N=A->cmap->N,row,ncols,j;
992   const PetscInt    *cols;
993   const PetscScalar *vals;
994 
995   PetscFunctionBegin;
996   if (reuse == MAT_REUSE_MATRIX) {
997     mat_elemental = *newmat;
998     PetscCall(MatZeroEntries(mat_elemental));
999   } else {
1000     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental));
1001     PetscCall(MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,M,N));
1002     PetscCall(MatSetType(mat_elemental,MATELEMENTAL));
1003     PetscCall(MatSetUp(mat_elemental));
1004   }
1005   PetscCall(MatGetRowUpperTriangular(A));
1006   for (row=0; row<M; row++) {
1007     PetscCall(MatGetRow(A,row,&ncols,&cols,&vals));
1008     /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1009     PetscCall(MatSetValues(mat_elemental,1,&row,ncols,cols,vals,ADD_VALUES));
1010     for (j=0; j<ncols; j++) { /* lower triangular part */
1011       PetscScalar v;
1012       if (cols[j] == row) continue;
1013       v    = A->hermitian == PETSC_BOOL3_TRUE ? PetscConj(vals[j]) : vals[j];
1014       PetscCall(MatSetValues(mat_elemental,1,&cols[j],1,&row,&v,ADD_VALUES));
1015     }
1016     PetscCall(MatRestoreRow(A,row,&ncols,&cols,&vals));
1017   }
1018   PetscCall(MatRestoreRowUpperTriangular(A));
1019   PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY));
1020   PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY));
1021 
1022   if (reuse == MAT_INPLACE_MATRIX) {
1023     PetscCall(MatHeaderReplace(A,&mat_elemental));
1024   } else {
1025     *newmat = mat_elemental;
1026   }
1027   PetscFunctionReturn(0);
1028 }
1029 
1030 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1031 {
1032   Mat               mat_elemental;
1033   PetscInt          M=A->rmap->N,N=A->cmap->N,row,ncols,j,rstart=A->rmap->rstart,rend=A->rmap->rend;
1034   const PetscInt    *cols;
1035   const PetscScalar *vals;
1036 
1037   PetscFunctionBegin;
1038   if (reuse == MAT_REUSE_MATRIX) {
1039     mat_elemental = *newmat;
1040     PetscCall(MatZeroEntries(mat_elemental));
1041   } else {
1042     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental));
1043     PetscCall(MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,M,N));
1044     PetscCall(MatSetType(mat_elemental,MATELEMENTAL));
1045     PetscCall(MatSetUp(mat_elemental));
1046   }
1047   PetscCall(MatGetRowUpperTriangular(A));
1048   for (row=rstart; row<rend; row++) {
1049     PetscCall(MatGetRow(A,row,&ncols,&cols,&vals));
1050     /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1051     PetscCall(MatSetValues(mat_elemental,1,&row,ncols,cols,vals,ADD_VALUES));
1052     for (j=0; j<ncols; j++) { /* lower triangular part */
1053       PetscScalar v;
1054       if (cols[j] == row) continue;
1055       v    = A->hermitian == PETSC_BOOL3_TRUE ? PetscConj(vals[j]) : vals[j];
1056       PetscCall(MatSetValues(mat_elemental,1,&cols[j],1,&row,&v,ADD_VALUES));
1057     }
1058     PetscCall(MatRestoreRow(A,row,&ncols,&cols,&vals));
1059   }
1060   PetscCall(MatRestoreRowUpperTriangular(A));
1061   PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY));
1062   PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY));
1063 
1064   if (reuse == MAT_INPLACE_MATRIX) {
1065     PetscCall(MatHeaderReplace(A,&mat_elemental));
1066   } else {
1067     *newmat = mat_elemental;
1068   }
1069   PetscFunctionReturn(0);
1070 }
1071 
1072 static PetscErrorCode MatDestroy_Elemental(Mat A)
1073 {
1074   Mat_Elemental      *a = (Mat_Elemental*)A->data;
1075   Mat_Elemental_Grid *commgrid;
1076   PetscBool          flg;
1077   MPI_Comm           icomm;
1078 
1079   PetscFunctionBegin;
1080   delete a->emat;
1081   delete a->P;
1082   delete a->Q;
1083 
1084   El::mpi::Comm cxxcomm(PetscObjectComm((PetscObject)A));
1085   PetscCall(PetscCommDuplicate(cxxcomm.comm,&icomm,NULL));
1086   PetscCallMPI(MPI_Comm_get_attr(icomm,Petsc_Elemental_keyval,(void**)&commgrid,(int*)&flg));
1087   if (--commgrid->grid_refct == 0) {
1088     delete commgrid->grid;
1089     PetscCall(PetscFree(commgrid));
1090     PetscCallMPI(MPI_Comm_free_keyval(&Petsc_Elemental_keyval));
1091   }
1092   PetscCall(PetscCommDestroy(&icomm));
1093   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",NULL));
1094   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL));
1095   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_elemental_mpidense_C",NULL));
1096   PetscCall(PetscFree(A->data));
1097   PetscFunctionReturn(0);
1098 }
1099 
1100 PetscErrorCode MatSetUp_Elemental(Mat A)
1101 {
1102   Mat_Elemental  *a = (Mat_Elemental*)A->data;
1103   MPI_Comm       comm;
1104   PetscMPIInt    rsize,csize;
1105   PetscInt       n;
1106 
1107   PetscFunctionBegin;
1108   PetscCall(PetscLayoutSetUp(A->rmap));
1109   PetscCall(PetscLayoutSetUp(A->cmap));
1110 
1111   /* Check if local row and column sizes are equally distributed.
1112      Jed: Elemental uses "element" cyclic ordering so the sizes need to match that
1113      exactly.  The strategy in MatElemental is for PETSc to implicitly permute to block ordering (like would be returned by
1114      PetscSplitOwnership(comm,&n,&N), at which point Elemental matrices can act on PETSc vectors without redistributing the vectors. */
1115   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
1116   n = PETSC_DECIDE;
1117   PetscCall(PetscSplitOwnership(comm,&n,&A->rmap->N));
1118   PetscCheck(n == A->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Local row size %" PetscInt_FMT " of ELEMENTAL matrix must be equally distributed",A->rmap->n);
1119 
1120   n = PETSC_DECIDE;
1121   PetscCall(PetscSplitOwnership(comm,&n,&A->cmap->N));
1122   PetscCheck(n == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Local column size %" PetscInt_FMT " of ELEMENTAL matrix must be equally distributed",A->cmap->n);
1123 
1124   a->emat->Resize(A->rmap->N,A->cmap->N);
1125   El::Zero(*a->emat);
1126 
1127   PetscCallMPI(MPI_Comm_size(A->rmap->comm,&rsize));
1128   PetscCallMPI(MPI_Comm_size(A->cmap->comm,&csize));
1129   PetscCheck(csize == rsize,PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_INCOMP,"Cannot use row and column communicators of different sizes");
1130   a->commsize = rsize;
1131   a->mr[0] = A->rmap->N % rsize; if (!a->mr[0]) a->mr[0] = rsize;
1132   a->mr[1] = A->cmap->N % csize; if (!a->mr[1]) a->mr[1] = csize;
1133   a->m[0]  = A->rmap->N / rsize + (a->mr[0] != rsize);
1134   a->m[1]  = A->cmap->N / csize + (a->mr[1] != csize);
1135   PetscFunctionReturn(0);
1136 }
1137 
1138 PetscErrorCode MatAssemblyBegin_Elemental(Mat A, MatAssemblyType type)
1139 {
1140   Mat_Elemental  *a = (Mat_Elemental*)A->data;
1141 
1142   PetscFunctionBegin;
1143   /* printf("Calling ProcessQueues\n"); */
1144   a->emat->ProcessQueues();
1145   /* printf("Finished ProcessQueues\n"); */
1146   PetscFunctionReturn(0);
1147 }
1148 
1149 PetscErrorCode MatAssemblyEnd_Elemental(Mat A, MatAssemblyType type)
1150 {
1151   PetscFunctionBegin;
1152   /* Currently does nothing */
1153   PetscFunctionReturn(0);
1154 }
1155 
1156 PetscErrorCode MatLoad_Elemental(Mat newMat, PetscViewer viewer)
1157 {
1158   Mat            Adense,Ae;
1159   MPI_Comm       comm;
1160 
1161   PetscFunctionBegin;
1162   PetscCall(PetscObjectGetComm((PetscObject)newMat,&comm));
1163   PetscCall(MatCreate(comm,&Adense));
1164   PetscCall(MatSetType(Adense,MATDENSE));
1165   PetscCall(MatLoad(Adense,viewer));
1166   PetscCall(MatConvert(Adense, MATELEMENTAL, MAT_INITIAL_MATRIX,&Ae));
1167   PetscCall(MatDestroy(&Adense));
1168   PetscCall(MatHeaderReplace(newMat,&Ae));
1169   PetscFunctionReturn(0);
1170 }
1171 
1172 /* -------------------------------------------------------------------*/
1173 static struct _MatOps MatOps_Values = {
1174        MatSetValues_Elemental,
1175        0,
1176        0,
1177        MatMult_Elemental,
1178 /* 4*/ MatMultAdd_Elemental,
1179        MatMultTranspose_Elemental,
1180        MatMultTransposeAdd_Elemental,
1181        MatSolve_Elemental,
1182        MatSolveAdd_Elemental,
1183        0,
1184 /*10*/ 0,
1185        MatLUFactor_Elemental,
1186        MatCholeskyFactor_Elemental,
1187        0,
1188        MatTranspose_Elemental,
1189 /*15*/ MatGetInfo_Elemental,
1190        0,
1191        MatGetDiagonal_Elemental,
1192        MatDiagonalScale_Elemental,
1193        MatNorm_Elemental,
1194 /*20*/ MatAssemblyBegin_Elemental,
1195        MatAssemblyEnd_Elemental,
1196        MatSetOption_Elemental,
1197        MatZeroEntries_Elemental,
1198 /*24*/ 0,
1199        MatLUFactorSymbolic_Elemental,
1200        MatLUFactorNumeric_Elemental,
1201        MatCholeskyFactorSymbolic_Elemental,
1202        MatCholeskyFactorNumeric_Elemental,
1203 /*29*/ MatSetUp_Elemental,
1204        0,
1205        0,
1206        0,
1207        0,
1208 /*34*/ MatDuplicate_Elemental,
1209        0,
1210        0,
1211        0,
1212        0,
1213 /*39*/ MatAXPY_Elemental,
1214        0,
1215        0,
1216        0,
1217        MatCopy_Elemental,
1218 /*44*/ 0,
1219        MatScale_Elemental,
1220        MatShift_Basic,
1221        0,
1222        0,
1223 /*49*/ 0,
1224        0,
1225        0,
1226        0,
1227        0,
1228 /*54*/ 0,
1229        0,
1230        0,
1231        0,
1232        0,
1233 /*59*/ 0,
1234        MatDestroy_Elemental,
1235        MatView_Elemental,
1236        0,
1237        0,
1238 /*64*/ 0,
1239        0,
1240        0,
1241        0,
1242        0,
1243 /*69*/ 0,
1244        0,
1245        MatConvert_Elemental_Dense,
1246        0,
1247        0,
1248 /*74*/ 0,
1249        0,
1250        0,
1251        0,
1252        0,
1253 /*79*/ 0,
1254        0,
1255        0,
1256        0,
1257        MatLoad_Elemental,
1258 /*84*/ 0,
1259        0,
1260        0,
1261        0,
1262        0,
1263 /*89*/ 0,
1264        0,
1265        MatMatMultNumeric_Elemental,
1266        0,
1267        0,
1268 /*94*/ 0,
1269        0,
1270        0,
1271        MatMatTransposeMultNumeric_Elemental,
1272        0,
1273 /*99*/ MatProductSetFromOptions_Elemental,
1274        0,
1275        0,
1276        MatConjugate_Elemental,
1277        0,
1278 /*104*/0,
1279        0,
1280        0,
1281        0,
1282        0,
1283 /*109*/MatMatSolve_Elemental,
1284        0,
1285        0,
1286        0,
1287        MatMissingDiagonal_Elemental,
1288 /*114*/0,
1289        0,
1290        0,
1291        0,
1292        0,
1293 /*119*/0,
1294        MatHermitianTranspose_Elemental,
1295        0,
1296        0,
1297        0,
1298 /*124*/0,
1299        0,
1300        0,
1301        0,
1302        0,
1303 /*129*/0,
1304        0,
1305        0,
1306        0,
1307        0,
1308 /*134*/0,
1309        0,
1310        0,
1311        0,
1312        0,
1313        0,
1314 /*140*/0,
1315        0,
1316        0,
1317        0,
1318        0,
1319 /*145*/0,
1320        0,
1321        0,
1322        0,
1323        0,
1324 /*150*/0
1325 };
1326 
1327 /*MC
1328    MATELEMENTAL = "elemental" - A matrix type for dense matrices using the Elemental package
1329 
1330   Use ./configure --download-elemental to install PETSc to use Elemental
1331 
1332   Option Database Keys:
1333 
1334    Options Database Keys:
1335 + -mat_type elemental - sets the matrix type to "elemental" during a call to MatSetFromOptions()
1336 . -pc_factor_mat_solver_type elemental - to use this direct solver with the option -pc_type lu
1337 - -mat_elemental_grid_height - sets Grid Height for 2D cyclic ordering of internal matrix
1338 
1339   Level: beginner
1340 
1341   Note:
1342    Note unlike most matrix formats, this format does not store all the matrix entries for a contiguous
1343    range of rows on an MPI rank. Use `MatGetOwnershipIS()` to determine what values are stored on
1344    the given rank.
1345 
1346 .seealso: `MATDENSE`, `MATSCALAPACK`, `MatGetOwnershipIS()`
1347 M*/
1348 
1349 PETSC_EXTERN PetscErrorCode MatCreate_Elemental(Mat A)
1350 {
1351   Mat_Elemental      *a;
1352   PetscBool          flg,flg1;
1353   Mat_Elemental_Grid *commgrid;
1354   MPI_Comm           icomm;
1355   PetscInt           optv1;
1356 
1357   PetscFunctionBegin;
1358   PetscCall(PetscMemcpy(A->ops,&MatOps_Values,sizeof(struct _MatOps)));
1359   A->insertmode = NOT_SET_VALUES;
1360 
1361   PetscCall(PetscNewLog(A,&a));
1362   A->data = (void*)a;
1363 
1364   /* Set up the elemental matrix */
1365   El::mpi::Comm cxxcomm(PetscObjectComm((PetscObject)A));
1366 
1367   /* Grid needs to be shared between multiple Mats on the same communicator, implement by attribute caching on the MPI_Comm */
1368   if (Petsc_Elemental_keyval == MPI_KEYVAL_INVALID) {
1369     PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN,MPI_COMM_NULL_DELETE_FN,&Petsc_Elemental_keyval,(void*)0));
1370     PetscCall(PetscCitationsRegister(ElementalCitation,&ElementalCite));
1371   }
1372   PetscCall(PetscCommDuplicate(cxxcomm.comm,&icomm,NULL));
1373   PetscCallMPI(MPI_Comm_get_attr(icomm,Petsc_Elemental_keyval,(void**)&commgrid,(int*)&flg));
1374   if (!flg) {
1375     PetscCall(PetscNewLog(A,&commgrid));
1376 
1377     PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"Elemental Options","Mat");
1378     /* displayed default grid sizes (CommSize,1) are set by us arbitrarily until El::Grid() is called */
1379     PetscCall(PetscOptionsInt("-mat_elemental_grid_height","Grid Height","None",El::mpi::Size(cxxcomm),&optv1,&flg1));
1380     if (flg1) {
1381       PetscCheck((El::mpi::Size(cxxcomm) % optv1) == 0,PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_INCOMP,"Grid Height %" PetscInt_FMT " must evenly divide CommSize %" PetscInt_FMT,optv1,(PetscInt)El::mpi::Size(cxxcomm));
1382       commgrid->grid = new El::Grid(cxxcomm,optv1); /* use user-provided grid height */
1383     } else {
1384       commgrid->grid = new El::Grid(cxxcomm); /* use Elemental default grid sizes */
1385       /* printf("new commgrid->grid = %p\n",commgrid->grid);  -- memory leak revealed by valgrind? */
1386     }
1387     commgrid->grid_refct = 1;
1388     PetscCallMPI(MPI_Comm_set_attr(icomm,Petsc_Elemental_keyval,(void*)commgrid));
1389 
1390     a->pivoting    = 1;
1391     PetscCall(PetscOptionsInt("-mat_elemental_pivoting","Pivoting","None",a->pivoting,&a->pivoting,NULL));
1392 
1393     PetscOptionsEnd();
1394   } else {
1395     commgrid->grid_refct++;
1396   }
1397   PetscCall(PetscCommDestroy(&icomm));
1398   a->grid        = commgrid->grid;
1399   a->emat        = new El::DistMatrix<PetscElemScalar>(*a->grid);
1400   a->roworiented = PETSC_TRUE;
1401 
1402   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",MatGetOwnershipIS_Elemental));
1403   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatProductSetFromOptions_elemental_mpidense_C",MatProductSetFromOptions_Elemental_MPIDense));
1404   PetscCall(PetscObjectChangeTypeName((PetscObject)A,MATELEMENTAL));
1405   PetscFunctionReturn(0);
1406 }
1407