xref: /petsc/src/mat/impls/elemental/matelem.cxx (revision c4762a1b19cd2af06abeed90e8f9d34fb975dd94)
1 #include <../src/mat/impls/elemental/matelemimpl.h> /*I "petscmat.h" I*/
2 
3 /*
4     The variable Petsc_Elemental_keyval is used to indicate an MPI attribute that
5   is attached to a communicator, in this case the attribute is a Mat_Elemental_Grid
6 */
7 static PetscMPIInt Petsc_Elemental_keyval = MPI_KEYVAL_INVALID;
8 
9 /*@C
10    PetscElementalInitializePackage - Initialize Elemental package
11 
12    Logically Collective
13 
14    Level: developer
15 
16 .seealso: MATELEMENTAL, PetscElementalFinalizePackage()
17 @*/
18 PetscErrorCode PetscElementalInitializePackage(void)
19 {
20   PetscErrorCode ierr;
21 
22   PetscFunctionBegin;
23   if (El::Initialized()) PetscFunctionReturn(0);
24   El::Initialize();   /* called by the 1st call of MatCreate_Elemental */
25   ierr = PetscRegisterFinalize(PetscElementalFinalizePackage);CHKERRQ(ierr);
26   PetscFunctionReturn(0);
27 }
28 
29 /*@C
30    PetscElementalFinalizePackage - Finalize Elemental package
31 
32    Logically Collective
33 
34    Level: developer
35 
36 .seealso: MATELEMENTAL, PetscElementalInitializePackage()
37 @*/
38 PetscErrorCode PetscElementalFinalizePackage(void)
39 {
40   PetscFunctionBegin;
41   El::Finalize();  /* called by PetscFinalize() */
42   PetscFunctionReturn(0);
43 }
44 
45 static PetscErrorCode MatView_Elemental(Mat A,PetscViewer viewer)
46 {
47   PetscErrorCode ierr;
48   Mat_Elemental  *a = (Mat_Elemental*)A->data;
49   PetscBool      iascii;
50 
51   PetscFunctionBegin;
52   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
53   if (iascii) {
54     PetscViewerFormat format;
55     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
56     if (format == PETSC_VIEWER_ASCII_INFO) {
57       /* call elemental viewing function */
58       ierr = PetscViewerASCIIPrintf(viewer,"Elemental run parameters:\n");CHKERRQ(ierr);
59       ierr = PetscViewerASCIIPrintf(viewer,"  allocated entries=%d\n",(*a->emat).AllocatedMemory());CHKERRQ(ierr);
60       ierr = PetscViewerASCIIPrintf(viewer,"  grid height=%d, grid width=%d\n",(*a->emat).Grid().Height(),(*a->emat).Grid().Width());CHKERRQ(ierr);
61       if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
62         /* call elemental viewing function */
63         ierr = PetscPrintf(PetscObjectComm((PetscObject)viewer),"test matview_elemental 2\n");CHKERRQ(ierr);
64       }
65 
66     } else if (format == PETSC_VIEWER_DEFAULT) {
67       ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
68       El::Print( *a->emat, "Elemental matrix (cyclic ordering)" );
69       ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
70       if (A->factortype == MAT_FACTOR_NONE){
71         Mat Adense;
72         ierr = MatConvert(A,MATDENSE,MAT_INITIAL_MATRIX,&Adense);CHKERRQ(ierr);
73         ierr = MatView(Adense,viewer);CHKERRQ(ierr);
74         ierr = MatDestroy(&Adense);CHKERRQ(ierr);
75       }
76     } else SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Format");
77   } else {
78     /* convert to dense format and call MatView() */
79     Mat Adense;
80     ierr = MatConvert(A,MATDENSE,MAT_INITIAL_MATRIX,&Adense);CHKERRQ(ierr);
81     ierr = MatView(Adense,viewer);CHKERRQ(ierr);
82     ierr = MatDestroy(&Adense);CHKERRQ(ierr);
83   }
84   PetscFunctionReturn(0);
85 }
86 
87 static PetscErrorCode MatGetInfo_Elemental(Mat A,MatInfoType flag,MatInfo *info)
88 {
89   Mat_Elemental  *a = (Mat_Elemental*)A->data;
90 
91   PetscFunctionBegin;
92   info->block_size = 1.0;
93 
94   if (flag == MAT_LOCAL) {
95     info->nz_allocated   = (*a->emat).AllocatedMemory(); /* locally allocated */
96     info->nz_used        = info->nz_allocated;
97   } else if (flag == MAT_GLOBAL_MAX) {
98     //ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
99     /* see MatGetInfo_MPIAIJ() for getting global info->nz_allocated! */
100     //SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP," MAT_GLOBAL_MAX not written yet");
101   } else if (flag == MAT_GLOBAL_SUM) {
102     //SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP," MAT_GLOBAL_SUM not written yet");
103     info->nz_allocated   = (*a->emat).AllocatedMemory(); /* locally allocated */
104     info->nz_used        = info->nz_allocated; /* assume Elemental does accurate allocation */
105     //ierr = MPIU_Allreduce(isend,irecv,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
106     //PetscPrintf(PETSC_COMM_SELF,"    ... [%d] locally allocated %g\n",rank,info->nz_allocated);
107   }
108 
109   info->nz_unneeded       = 0.0;
110   info->assemblies        = A->num_ass;
111   info->mallocs           = 0;
112   info->memory            = ((PetscObject)A)->mem;
113   info->fill_ratio_given  = 0; /* determined by Elemental */
114   info->fill_ratio_needed = 0;
115   info->factor_mallocs    = 0;
116   PetscFunctionReturn(0);
117 }
118 
119 PetscErrorCode MatSetOption_Elemental(Mat A,MatOption op,PetscBool flg)
120 {
121   Mat_Elemental  *a = (Mat_Elemental*)A->data;
122 
123   PetscFunctionBegin;
124   switch (op) {
125   case MAT_NEW_NONZERO_LOCATIONS:
126   case MAT_NEW_NONZERO_LOCATION_ERR:
127   case MAT_NEW_NONZERO_ALLOCATION_ERR:
128   case MAT_SYMMETRIC:
129   case MAT_SORTED_FULL:
130   case MAT_HERMITIAN:
131     break;
132   case MAT_ROW_ORIENTED:
133     a->roworiented = flg;
134     break;
135   default:
136     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
137   }
138   PetscFunctionReturn(0);
139 }
140 
141 static PetscErrorCode MatSetValues_Elemental(Mat A,PetscInt nr,const PetscInt *rows,PetscInt nc,const PetscInt *cols,const PetscScalar *vals,InsertMode imode)
142 {
143   Mat_Elemental  *a = (Mat_Elemental*)A->data;
144   PetscInt       i,j,rrank,ridx,crank,cidx,erow,ecol,numQueues=0;
145 
146   PetscFunctionBegin;
147   // TODO: Initialize matrix to all zeros?
148 
149   // Count the number of queues from this process
150   if (a->roworiented) {
151     for (i=0; i<nr; i++) {
152       if (rows[i] < 0) continue;
153       P2RO(A,0,rows[i],&rrank,&ridx);
154       RO2E(A,0,rrank,ridx,&erow);
155       if (rrank < 0 || ridx < 0 || erow < 0) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Incorrect row translation");
156       for (j=0; j<nc; j++) {
157         if (cols[j] < 0) continue;
158         P2RO(A,1,cols[j],&crank,&cidx);
159         RO2E(A,1,crank,cidx,&ecol);
160         if (crank < 0 || cidx < 0 || ecol < 0) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Incorrect col translation");
161         if (!a->emat->IsLocal(erow,ecol) ){ /* off-proc entry */
162           /* printf("Will later remotely update (%d,%d)\n",erow,ecol); */
163           if (imode != ADD_VALUES) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only ADD_VALUES to off-processor entry is supported");
164           ++numQueues;
165           continue;
166         }
167         /* printf("Locally updating (%d,%d)\n",erow,ecol); */
168         switch (imode) {
169         case INSERT_VALUES: a->emat->Set(erow,ecol,(PetscElemScalar)vals[i*nc+j]); break;
170         case ADD_VALUES: a->emat->Update(erow,ecol,(PetscElemScalar)vals[i*nc+j]); break;
171         default: SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for InsertMode %d",(int)imode);
172         }
173       }
174     }
175 
176     /* printf("numQueues=%d\n",numQueues); */
177     a->emat->Reserve( numQueues );
178     for (i=0; i<nr; i++) {
179       if (rows[i] < 0) continue;
180       P2RO(A,0,rows[i],&rrank,&ridx);
181       RO2E(A,0,rrank,ridx,&erow);
182       for (j=0; j<nc; j++) {
183         if (cols[j] < 0) continue;
184         P2RO(A,1,cols[j],&crank,&cidx);
185         RO2E(A,1,crank,cidx,&ecol);
186         if ( !a->emat->IsLocal(erow,ecol) ) { /*off-proc entry*/
187           /* printf("Queueing remotely update of (%d,%d)\n",erow,ecol); */
188           a->emat->QueueUpdate( erow, ecol, vals[i*nc+j] );
189         }
190       }
191     }
192   } else { /* columnoriented */
193     for (j=0; j<nc; j++) {
194       if (cols[j] < 0) continue;
195       P2RO(A,1,cols[j],&crank,&cidx);
196       RO2E(A,1,crank,cidx,&ecol);
197       if (crank < 0 || cidx < 0 || ecol < 0) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Incorrect col translation");
198       for (i=0; i<nr; i++) {
199         if (rows[i] < 0) continue;
200         P2RO(A,0,rows[i],&rrank,&ridx);
201         RO2E(A,0,rrank,ridx,&erow);
202         if (rrank < 0 || ridx < 0 || erow < 0) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Incorrect row translation");
203         if (!a->emat->IsLocal(erow,ecol) ){ /* off-proc entry */
204           /* printf("Will later remotely update (%d,%d)\n",erow,ecol); */
205           if (imode != ADD_VALUES) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only ADD_VALUES to off-processor entry is supported");
206           ++numQueues;
207           continue;
208         }
209         /* printf("Locally updating (%d,%d)\n",erow,ecol); */
210         switch (imode) {
211         case INSERT_VALUES: a->emat->Set(erow,ecol,(PetscElemScalar)vals[i+j*nr]); break;
212         case ADD_VALUES: a->emat->Update(erow,ecol,(PetscElemScalar)vals[i+j*nr]); break;
213         default: SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for InsertMode %d",(int)imode);
214         }
215       }
216     }
217 
218     /* printf("numQueues=%d\n",numQueues); */
219     a->emat->Reserve( numQueues );
220     for (j=0; j<nc; j++) {
221       if (cols[j] < 0) continue;
222       P2RO(A,1,cols[j],&crank,&cidx);
223       RO2E(A,1,crank,cidx,&ecol);
224 
225       for (i=0; i<nr; i++) {
226         if (rows[i] < 0) continue;
227         P2RO(A,0,rows[i],&rrank,&ridx);
228         RO2E(A,0,rrank,ridx,&erow);
229         if ( !a->emat->IsLocal(erow,ecol) ) { /*off-proc entry*/
230           /* printf("Queueing remotely update of (%d,%d)\n",erow,ecol); */
231           a->emat->QueueUpdate( erow, ecol, vals[i+j*nr] );
232         }
233       }
234     }
235   }
236   PetscFunctionReturn(0);
237 }
238 
239 static PetscErrorCode MatMult_Elemental(Mat A,Vec X,Vec Y)
240 {
241   Mat_Elemental         *a = (Mat_Elemental*)A->data;
242   PetscErrorCode        ierr;
243   const PetscElemScalar *x;
244   PetscElemScalar       *y;
245   PetscElemScalar       one = 1,zero = 0;
246 
247   PetscFunctionBegin;
248   ierr = VecGetArrayRead(X,(const PetscScalar **)&x);CHKERRQ(ierr);
249   ierr = VecGetArray(Y,(PetscScalar **)&y);CHKERRQ(ierr);
250   { /* Scoping so that constructor is called before pointer is returned */
251     El::DistMatrix<PetscElemScalar,El::VC,El::STAR> xe, ye;
252     xe.LockedAttach(A->cmap->N,1,*a->grid,0,0,x,A->cmap->n);
253     ye.Attach(A->rmap->N,1,*a->grid,0,0,y,A->rmap->n);
254     El::Gemv(El::NORMAL,one,*a->emat,xe,zero,ye);
255   }
256   ierr = VecRestoreArrayRead(X,(const PetscScalar **)&x);CHKERRQ(ierr);
257   ierr = VecRestoreArray(Y,(PetscScalar **)&y);CHKERRQ(ierr);
258   PetscFunctionReturn(0);
259 }
260 
261 static PetscErrorCode MatMultTranspose_Elemental(Mat A,Vec X,Vec Y)
262 {
263   Mat_Elemental         *a = (Mat_Elemental*)A->data;
264   PetscErrorCode        ierr;
265   const PetscElemScalar *x;
266   PetscElemScalar       *y;
267   PetscElemScalar       one = 1,zero = 0;
268 
269   PetscFunctionBegin;
270   ierr = VecGetArrayRead(X,(const PetscScalar **)&x);CHKERRQ(ierr);
271   ierr = VecGetArray(Y,(PetscScalar **)&y);CHKERRQ(ierr);
272   { /* Scoping so that constructor is called before pointer is returned */
273     El::DistMatrix<PetscElemScalar,El::VC,El::STAR> xe, ye;
274     xe.LockedAttach(A->rmap->N,1,*a->grid,0,0,x,A->rmap->n);
275     ye.Attach(A->cmap->N,1,*a->grid,0,0,y,A->cmap->n);
276     El::Gemv(El::TRANSPOSE,one,*a->emat,xe,zero,ye);
277   }
278   ierr = VecRestoreArrayRead(X,(const PetscScalar **)&x);CHKERRQ(ierr);
279   ierr = VecRestoreArray(Y,(PetscScalar **)&y);CHKERRQ(ierr);
280   PetscFunctionReturn(0);
281 }
282 
283 static PetscErrorCode MatMultAdd_Elemental(Mat A,Vec X,Vec Y,Vec Z)
284 {
285   Mat_Elemental         *a = (Mat_Elemental*)A->data;
286   PetscErrorCode        ierr;
287   const PetscElemScalar *x;
288   PetscElemScalar       *z;
289   PetscElemScalar       one = 1;
290 
291   PetscFunctionBegin;
292   if (Y != Z) {ierr = VecCopy(Y,Z);CHKERRQ(ierr);}
293   ierr = VecGetArrayRead(X,(const PetscScalar **)&x);CHKERRQ(ierr);
294   ierr = VecGetArray(Z,(PetscScalar **)&z);CHKERRQ(ierr);
295   { /* Scoping so that constructor is called before pointer is returned */
296     El::DistMatrix<PetscElemScalar,El::VC,El::STAR> xe, ze;
297     xe.LockedAttach(A->cmap->N,1,*a->grid,0,0,x,A->cmap->n);
298     ze.Attach(A->rmap->N,1,*a->grid,0,0,z,A->rmap->n);
299     El::Gemv(El::NORMAL,one,*a->emat,xe,one,ze);
300   }
301   ierr = VecRestoreArrayRead(X,(const PetscScalar **)&x);CHKERRQ(ierr);
302   ierr = VecRestoreArray(Z,(PetscScalar **)&z);CHKERRQ(ierr);
303   PetscFunctionReturn(0);
304 }
305 
306 static PetscErrorCode MatMultTransposeAdd_Elemental(Mat A,Vec X,Vec Y,Vec Z)
307 {
308   Mat_Elemental         *a = (Mat_Elemental*)A->data;
309   PetscErrorCode        ierr;
310   const PetscElemScalar *x;
311   PetscElemScalar       *z;
312   PetscElemScalar       one = 1;
313 
314   PetscFunctionBegin;
315   if (Y != Z) {ierr = VecCopy(Y,Z);CHKERRQ(ierr);}
316   ierr = VecGetArrayRead(X,(const PetscScalar **)&x);CHKERRQ(ierr);
317   ierr = VecGetArray(Z,(PetscScalar **)&z);CHKERRQ(ierr);
318   { /* Scoping so that constructor is called before pointer is returned */
319     El::DistMatrix<PetscElemScalar,El::VC,El::STAR> xe, ze;
320     xe.LockedAttach(A->rmap->N,1,*a->grid,0,0,x,A->rmap->n);
321     ze.Attach(A->cmap->N,1,*a->grid,0,0,z,A->cmap->n);
322     El::Gemv(El::TRANSPOSE,one,*a->emat,xe,one,ze);
323   }
324   ierr = VecRestoreArrayRead(X,(const PetscScalar **)&x);CHKERRQ(ierr);
325   ierr = VecRestoreArray(Z,(PetscScalar **)&z);CHKERRQ(ierr);
326   PetscFunctionReturn(0);
327 }
328 
329 static PetscErrorCode MatMatMultNumeric_Elemental(Mat A,Mat B,Mat C)
330 {
331   Mat_Elemental    *a = (Mat_Elemental*)A->data;
332   Mat_Elemental    *b = (Mat_Elemental*)B->data;
333   Mat_Elemental    *c = (Mat_Elemental*)C->data;
334   PetscElemScalar  one = 1,zero = 0;
335 
336   PetscFunctionBegin;
337   { /* Scoping so that constructor is called before pointer is returned */
338     El::Gemm(El::NORMAL,El::NORMAL,one,*a->emat,*b->emat,zero,*c->emat);
339   }
340   C->assembled = PETSC_TRUE;
341   PetscFunctionReturn(0);
342 }
343 
344 static PetscErrorCode MatMatMultSymbolic_Elemental(Mat A,Mat B,PetscReal fill,Mat *C)
345 {
346   PetscErrorCode ierr;
347   Mat            Ce;
348   MPI_Comm       comm;
349 
350   PetscFunctionBegin;
351   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
352   ierr = MatCreate(comm,&Ce);CHKERRQ(ierr);
353   ierr = MatSetSizes(Ce,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
354   ierr = MatSetType(Ce,MATELEMENTAL);CHKERRQ(ierr);
355   ierr = MatSetUp(Ce);CHKERRQ(ierr);
356   *C = Ce;
357   PetscFunctionReturn(0);
358 }
359 
360 static PetscErrorCode MatMatMult_Elemental(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
361 {
362   PetscErrorCode ierr;
363 
364   PetscFunctionBegin;
365   if (scall == MAT_INITIAL_MATRIX){
366     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
367     ierr = MatMatMultSymbolic_Elemental(A,B,1.0,C);CHKERRQ(ierr);
368     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
369   }
370   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
371   ierr = MatMatMultNumeric_Elemental(A,B,*C);CHKERRQ(ierr);
372   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
373   PetscFunctionReturn(0);
374 }
375 
376 static PetscErrorCode MatMatTransposeMultNumeric_Elemental(Mat A,Mat B,Mat C)
377 {
378   Mat_Elemental      *a = (Mat_Elemental*)A->data;
379   Mat_Elemental      *b = (Mat_Elemental*)B->data;
380   Mat_Elemental      *c = (Mat_Elemental*)C->data;
381   PetscElemScalar    one = 1,zero = 0;
382 
383   PetscFunctionBegin;
384   { /* Scoping so that constructor is called before pointer is returned */
385     El::Gemm(El::NORMAL,El::TRANSPOSE,one,*a->emat,*b->emat,zero,*c->emat);
386   }
387   C->assembled = PETSC_TRUE;
388   PetscFunctionReturn(0);
389 }
390 
391 static PetscErrorCode MatMatTransposeMultSymbolic_Elemental(Mat A,Mat B,PetscReal fill,Mat *C)
392 {
393   PetscErrorCode ierr;
394   Mat            Ce;
395   MPI_Comm       comm;
396 
397   PetscFunctionBegin;
398   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
399   ierr = MatCreate(comm,&Ce);CHKERRQ(ierr);
400   ierr = MatSetSizes(Ce,A->rmap->n,B->rmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
401   ierr = MatSetType(Ce,MATELEMENTAL);CHKERRQ(ierr);
402   ierr = MatSetUp(Ce);CHKERRQ(ierr);
403   *C = Ce;
404   PetscFunctionReturn(0);
405 }
406 
407 static PetscErrorCode MatMatTransposeMult_Elemental(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
408 {
409   PetscErrorCode ierr;
410 
411   PetscFunctionBegin;
412   if (scall == MAT_INITIAL_MATRIX){
413     ierr = PetscLogEventBegin(MAT_MatTransposeMultSymbolic,A,B,0,0);CHKERRQ(ierr);
414     ierr = MatMatMultSymbolic_Elemental(A,B,1.0,C);CHKERRQ(ierr);
415     ierr = PetscLogEventEnd(MAT_MatTransposeMultSymbolic,A,B,0,0);CHKERRQ(ierr);
416   }
417   ierr = PetscLogEventBegin(MAT_MatTransposeMultNumeric,A,B,0,0);CHKERRQ(ierr);
418   ierr = MatMatTransposeMultNumeric_Elemental(A,B,*C);CHKERRQ(ierr);
419   ierr = PetscLogEventEnd(MAT_MatTransposeMultNumeric,A,B,0,0);CHKERRQ(ierr);
420   PetscFunctionReturn(0);
421 }
422 
423 static PetscErrorCode MatGetDiagonal_Elemental(Mat A,Vec D)
424 {
425   PetscInt        i,nrows,ncols,nD,rrank,ridx,crank,cidx;
426   Mat_Elemental   *a = (Mat_Elemental*)A->data;
427   PetscErrorCode  ierr;
428   PetscElemScalar v;
429   MPI_Comm        comm;
430 
431   PetscFunctionBegin;
432   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
433   ierr = MatGetSize(A,&nrows,&ncols);CHKERRQ(ierr);
434   nD = nrows>ncols ? ncols : nrows;
435   for (i=0; i<nD; i++) {
436     PetscInt erow,ecol;
437     P2RO(A,0,i,&rrank,&ridx);
438     RO2E(A,0,rrank,ridx,&erow);
439     if (rrank < 0 || ridx < 0 || erow < 0) SETERRQ(comm,PETSC_ERR_PLIB,"Incorrect row translation");
440     P2RO(A,1,i,&crank,&cidx);
441     RO2E(A,1,crank,cidx,&ecol);
442     if (crank < 0 || cidx < 0 || ecol < 0) SETERRQ(comm,PETSC_ERR_PLIB,"Incorrect col translation");
443     v = a->emat->Get(erow,ecol);
444     ierr = VecSetValues(D,1,&i,(PetscScalar*)&v,INSERT_VALUES);CHKERRQ(ierr);
445   }
446   ierr = VecAssemblyBegin(D);CHKERRQ(ierr);
447   ierr = VecAssemblyEnd(D);CHKERRQ(ierr);
448   PetscFunctionReturn(0);
449 }
450 
451 static PetscErrorCode MatDiagonalScale_Elemental(Mat X,Vec L,Vec R)
452 {
453   Mat_Elemental         *x = (Mat_Elemental*)X->data;
454   const PetscElemScalar *d;
455   PetscErrorCode        ierr;
456 
457   PetscFunctionBegin;
458   if (R) {
459     ierr = VecGetArrayRead(R,(const PetscScalar **)&d);CHKERRQ(ierr);
460     El::DistMatrix<PetscElemScalar,El::VC,El::STAR> de;
461     de.LockedAttach(X->cmap->N,1,*x->grid,0,0,d,X->cmap->n);
462     El::DiagonalScale(El::RIGHT,El::NORMAL,de,*x->emat);
463     ierr = VecRestoreArrayRead(R,(const PetscScalar **)&d);CHKERRQ(ierr);
464   }
465   if (L) {
466     ierr = VecGetArrayRead(L,(const PetscScalar **)&d);CHKERRQ(ierr);
467     El::DistMatrix<PetscElemScalar,El::VC,El::STAR> de;
468     de.LockedAttach(X->rmap->N,1,*x->grid,0,0,d,X->rmap->n);
469     El::DiagonalScale(El::LEFT,El::NORMAL,de,*x->emat);
470     ierr = VecRestoreArrayRead(L,(const PetscScalar **)&d);CHKERRQ(ierr);
471   }
472   PetscFunctionReturn(0);
473 }
474 
475 static PetscErrorCode MatMissingDiagonal_Elemental(Mat A,PetscBool *missing,PetscInt *d)
476 {
477   PetscFunctionBegin;
478   *missing = PETSC_FALSE;
479   PetscFunctionReturn(0);
480 }
481 
482 static PetscErrorCode MatScale_Elemental(Mat X,PetscScalar a)
483 {
484   Mat_Elemental  *x = (Mat_Elemental*)X->data;
485 
486   PetscFunctionBegin;
487   El::Scale((PetscElemScalar)a,*x->emat);
488   PetscFunctionReturn(0);
489 }
490 
491 /*
492   MatAXPY - Computes Y = a*X + Y.
493 */
494 static PetscErrorCode MatAXPY_Elemental(Mat Y,PetscScalar a,Mat X,MatStructure str)
495 {
496   Mat_Elemental  *x = (Mat_Elemental*)X->data;
497   Mat_Elemental  *y = (Mat_Elemental*)Y->data;
498   PetscErrorCode ierr;
499 
500   PetscFunctionBegin;
501   El::Axpy((PetscElemScalar)a,*x->emat,*y->emat);
502   ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
503   PetscFunctionReturn(0);
504 }
505 
506 static PetscErrorCode MatCopy_Elemental(Mat A,Mat B,MatStructure str)
507 {
508   Mat_Elemental *a=(Mat_Elemental*)A->data;
509   Mat_Elemental *b=(Mat_Elemental*)B->data;
510   PetscErrorCode ierr;
511 
512   PetscFunctionBegin;
513   El::Copy(*a->emat,*b->emat);
514   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
515   PetscFunctionReturn(0);
516 }
517 
518 static PetscErrorCode MatDuplicate_Elemental(Mat A,MatDuplicateOption op,Mat *B)
519 {
520   Mat            Be;
521   MPI_Comm       comm;
522   Mat_Elemental  *a=(Mat_Elemental*)A->data;
523   PetscErrorCode ierr;
524 
525   PetscFunctionBegin;
526   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
527   ierr = MatCreate(comm,&Be);CHKERRQ(ierr);
528   ierr = MatSetSizes(Be,A->rmap->n,A->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
529   ierr = MatSetType(Be,MATELEMENTAL);CHKERRQ(ierr);
530   ierr = MatSetUp(Be);CHKERRQ(ierr);
531   *B = Be;
532   if (op == MAT_COPY_VALUES) {
533     Mat_Elemental *b=(Mat_Elemental*)Be->data;
534     El::Copy(*a->emat,*b->emat);
535   }
536   Be->assembled = PETSC_TRUE;
537   PetscFunctionReturn(0);
538 }
539 
540 static PetscErrorCode MatTranspose_Elemental(Mat A,MatReuse reuse,Mat *B)
541 {
542   Mat            Be = *B;
543   PetscErrorCode ierr;
544   MPI_Comm       comm;
545   Mat_Elemental  *a = (Mat_Elemental*)A->data, *b;
546 
547   PetscFunctionBegin;
548   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
549   /* Only out-of-place supported */
550   if (reuse == MAT_INPLACE_MATRIX) SETERRQ(comm,PETSC_ERR_SUP,"Only out-of-place supported");
551   if (reuse == MAT_INITIAL_MATRIX) {
552     ierr = MatCreate(comm,&Be);CHKERRQ(ierr);
553     ierr = MatSetSizes(Be,A->cmap->n,A->rmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
554     ierr = MatSetType(Be,MATELEMENTAL);CHKERRQ(ierr);
555     ierr = MatSetUp(Be);CHKERRQ(ierr);
556     *B = Be;
557   }
558   b = (Mat_Elemental*)Be->data;
559   El::Transpose(*a->emat,*b->emat);
560   Be->assembled = PETSC_TRUE;
561   PetscFunctionReturn(0);
562 }
563 
564 static PetscErrorCode MatConjugate_Elemental(Mat A)
565 {
566   Mat_Elemental  *a = (Mat_Elemental*)A->data;
567 
568   PetscFunctionBegin;
569   El::Conjugate(*a->emat);
570   PetscFunctionReturn(0);
571 }
572 
573 static PetscErrorCode MatHermitianTranspose_Elemental(Mat A,MatReuse reuse,Mat *B)
574 {
575   Mat            Be = *B;
576   PetscErrorCode ierr;
577   MPI_Comm       comm;
578   Mat_Elemental  *a = (Mat_Elemental*)A->data, *b;
579 
580   PetscFunctionBegin;
581   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
582   /* Only out-of-place supported */
583   if (reuse == MAT_INITIAL_MATRIX){
584     ierr = MatCreate(comm,&Be);CHKERRQ(ierr);
585     ierr = MatSetSizes(Be,A->cmap->n,A->rmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
586     ierr = MatSetType(Be,MATELEMENTAL);CHKERRQ(ierr);
587     ierr = MatSetUp(Be);CHKERRQ(ierr);
588     *B = Be;
589   }
590   b = (Mat_Elemental*)Be->data;
591   El::Adjoint(*a->emat,*b->emat);
592   Be->assembled = PETSC_TRUE;
593   PetscFunctionReturn(0);
594 }
595 
596 static PetscErrorCode MatSolve_Elemental(Mat A,Vec B,Vec X)
597 {
598   Mat_Elemental     *a = (Mat_Elemental*)A->data;
599   PetscErrorCode    ierr;
600   PetscElemScalar   *x;
601   PetscInt          pivoting = a->pivoting;
602 
603   PetscFunctionBegin;
604   ierr = VecCopy(B,X);CHKERRQ(ierr);
605   ierr = VecGetArray(X,(PetscScalar **)&x);CHKERRQ(ierr);
606 
607   El::DistMatrix<PetscElemScalar,El::VC,El::STAR> xe;
608   xe.Attach(A->rmap->N,1,*a->grid,0,0,x,A->rmap->n);
609   El::DistMatrix<PetscElemScalar,El::MC,El::MR> xer(xe);
610   switch (A->factortype) {
611   case MAT_FACTOR_LU:
612     if (pivoting == 0) {
613       El::lu::SolveAfter(El::NORMAL,*a->emat,xer);
614     } else if (pivoting == 1) {
615       El::lu::SolveAfter(El::NORMAL,*a->emat,*a->P,xer);
616     } else { /* pivoting == 2 */
617       El::lu::SolveAfter(El::NORMAL,*a->emat,*a->P,*a->Q,xer);
618     }
619     break;
620   case MAT_FACTOR_CHOLESKY:
621     El::cholesky::SolveAfter(El::UPPER,El::NORMAL,*a->emat,xer);
622     break;
623   default:
624     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType");
625     break;
626   }
627   El::Copy(xer,xe);
628 
629   ierr = VecRestoreArray(X,(PetscScalar **)&x);CHKERRQ(ierr);
630   PetscFunctionReturn(0);
631 }
632 
633 static PetscErrorCode MatSolveAdd_Elemental(Mat A,Vec B,Vec Y,Vec X)
634 {
635   PetscErrorCode    ierr;
636 
637   PetscFunctionBegin;
638   ierr = MatSolve_Elemental(A,B,X);CHKERRQ(ierr);
639   ierr = VecAXPY(X,1,Y);CHKERRQ(ierr);
640   PetscFunctionReturn(0);
641 }
642 
643 static PetscErrorCode MatMatSolve_Elemental(Mat A,Mat B,Mat X)
644 {
645   Mat_Elemental *a=(Mat_Elemental*)A->data;
646   Mat_Elemental *b=(Mat_Elemental*)B->data;
647   Mat_Elemental *x=(Mat_Elemental*)X->data;
648   PetscInt      pivoting = a->pivoting;
649 
650   PetscFunctionBegin;
651   El::Copy(*b->emat,*x->emat);
652   switch (A->factortype) {
653   case MAT_FACTOR_LU:
654     if (pivoting == 0) {
655       El::lu::SolveAfter(El::NORMAL,*a->emat,*x->emat);
656     } else if (pivoting == 1) {
657       El::lu::SolveAfter(El::NORMAL,*a->emat,*a->P,*x->emat);
658     } else {
659       El::lu::SolveAfter(El::NORMAL,*a->emat,*a->P,*a->Q,*x->emat);
660     }
661     break;
662   case MAT_FACTOR_CHOLESKY:
663     El::cholesky::SolveAfter(El::UPPER,El::NORMAL,*a->emat,*x->emat);
664     break;
665   default:
666     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType");
667     break;
668   }
669   PetscFunctionReturn(0);
670 }
671 
672 static PetscErrorCode MatLUFactor_Elemental(Mat A,IS row,IS col,const MatFactorInfo *info)
673 {
674   Mat_Elemental  *a = (Mat_Elemental*)A->data;
675   PetscErrorCode ierr;
676   PetscInt       pivoting = a->pivoting;
677 
678   PetscFunctionBegin;
679   if (pivoting == 0) {
680     El::LU(*a->emat);
681   } else if (pivoting == 1) {
682     El::LU(*a->emat,*a->P);
683   } else {
684     El::LU(*a->emat,*a->P,*a->Q);
685   }
686   A->factortype = MAT_FACTOR_LU;
687   A->assembled  = PETSC_TRUE;
688 
689   ierr = PetscFree(A->solvertype);CHKERRQ(ierr);
690   ierr = PetscStrallocpy(MATSOLVERELEMENTAL,&A->solvertype);CHKERRQ(ierr);
691   PetscFunctionReturn(0);
692 }
693 
694 static PetscErrorCode  MatLUFactorNumeric_Elemental(Mat F,Mat A,const MatFactorInfo *info)
695 {
696   PetscErrorCode ierr;
697 
698   PetscFunctionBegin;
699   ierr = MatCopy(A,F,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
700   ierr = MatLUFactor_Elemental(F,0,0,info);CHKERRQ(ierr);
701   PetscFunctionReturn(0);
702 }
703 
704 static PetscErrorCode  MatLUFactorSymbolic_Elemental(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
705 {
706   PetscFunctionBegin;
707   /* F is create and allocated by MatGetFactor_elemental_petsc(), skip this routine. */
708   PetscFunctionReturn(0);
709 }
710 
711 static PetscErrorCode MatCholeskyFactor_Elemental(Mat A,IS perm,const MatFactorInfo *info)
712 {
713   Mat_Elemental  *a = (Mat_Elemental*)A->data;
714   El::DistMatrix<PetscElemScalar,El::MC,El::STAR> d;
715   PetscErrorCode ierr;
716 
717   PetscFunctionBegin;
718   El::Cholesky(El::UPPER,*a->emat);
719   A->factortype = MAT_FACTOR_CHOLESKY;
720   A->assembled  = PETSC_TRUE;
721 
722   ierr = PetscFree(A->solvertype);CHKERRQ(ierr);
723   ierr = PetscStrallocpy(MATSOLVERELEMENTAL,&A->solvertype);CHKERRQ(ierr);
724   PetscFunctionReturn(0);
725 }
726 
727 static PetscErrorCode MatCholeskyFactorNumeric_Elemental(Mat F,Mat A,const MatFactorInfo *info)
728 {
729   PetscErrorCode ierr;
730 
731   PetscFunctionBegin;
732   ierr = MatCopy(A,F,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
733   ierr = MatCholeskyFactor_Elemental(F,0,info);CHKERRQ(ierr);
734   PetscFunctionReturn(0);
735 }
736 
737 static PetscErrorCode MatCholeskyFactorSymbolic_Elemental(Mat F,Mat A,IS perm,const MatFactorInfo *info)
738 {
739   PetscFunctionBegin;
740   /* F is create and allocated by MatGetFactor_elemental_petsc(), skip this routine. */
741   PetscFunctionReturn(0);
742 }
743 
744 PetscErrorCode MatFactorGetSolverType_elemental_elemental(Mat A,MatSolverType *type)
745 {
746   PetscFunctionBegin;
747   *type = MATSOLVERELEMENTAL;
748   PetscFunctionReturn(0);
749 }
750 
751 static PetscErrorCode MatGetFactor_elemental_elemental(Mat A,MatFactorType ftype,Mat *F)
752 {
753   Mat            B;
754   PetscErrorCode ierr;
755 
756   PetscFunctionBegin;
757   /* Create the factorization matrix */
758   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
759   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
760   ierr = MatSetType(B,MATELEMENTAL);CHKERRQ(ierr);
761   ierr = MatSetUp(B);CHKERRQ(ierr);
762   B->factortype = ftype;
763   ierr = PetscFree(B->solvertype);CHKERRQ(ierr);
764   ierr = PetscStrallocpy(MATSOLVERELEMENTAL,&B->solvertype);CHKERRQ(ierr);
765 
766   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_elemental_elemental);CHKERRQ(ierr);
767   *F            = B;
768   PetscFunctionReturn(0);
769 }
770 
771 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_Elemental(void)
772 {
773   PetscErrorCode ierr;
774 
775   PetscFunctionBegin;
776   ierr = MatSolverTypeRegister(MATSOLVERELEMENTAL,MATELEMENTAL,        MAT_FACTOR_LU,MatGetFactor_elemental_elemental);CHKERRQ(ierr);
777   ierr = MatSolverTypeRegister(MATSOLVERELEMENTAL,MATELEMENTAL,        MAT_FACTOR_CHOLESKY,MatGetFactor_elemental_elemental);CHKERRQ(ierr);
778   PetscFunctionReturn(0);
779 }
780 
781 static PetscErrorCode MatNorm_Elemental(Mat A,NormType type,PetscReal *nrm)
782 {
783   Mat_Elemental *a=(Mat_Elemental*)A->data;
784 
785   PetscFunctionBegin;
786   switch (type){
787   case NORM_1:
788     *nrm = El::OneNorm(*a->emat);
789     break;
790   case NORM_FROBENIUS:
791     *nrm = El::FrobeniusNorm(*a->emat);
792     break;
793   case NORM_INFINITY:
794     *nrm = El::InfinityNorm(*a->emat);
795     break;
796   default:
797     printf("Error: unsupported norm type!\n");
798   }
799   PetscFunctionReturn(0);
800 }
801 
802 static PetscErrorCode MatZeroEntries_Elemental(Mat A)
803 {
804   Mat_Elemental *a=(Mat_Elemental*)A->data;
805 
806   PetscFunctionBegin;
807   El::Zero(*a->emat);
808   PetscFunctionReturn(0);
809 }
810 
811 static PetscErrorCode MatGetOwnershipIS_Elemental(Mat A,IS *rows,IS *cols)
812 {
813   Mat_Elemental  *a = (Mat_Elemental*)A->data;
814   PetscErrorCode ierr;
815   PetscInt       i,m,shift,stride,*idx;
816 
817   PetscFunctionBegin;
818   if (rows) {
819     m = a->emat->LocalHeight();
820     shift = a->emat->ColShift();
821     stride = a->emat->ColStride();
822     ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
823     for (i=0; i<m; i++) {
824       PetscInt rank,offset;
825       E2RO(A,0,shift+i*stride,&rank,&offset);
826       RO2P(A,0,rank,offset,&idx[i]);
827     }
828     ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,rows);CHKERRQ(ierr);
829   }
830   if (cols) {
831     m = a->emat->LocalWidth();
832     shift = a->emat->RowShift();
833     stride = a->emat->RowStride();
834     ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
835     for (i=0; i<m; i++) {
836       PetscInt rank,offset;
837       E2RO(A,1,shift+i*stride,&rank,&offset);
838       RO2P(A,1,rank,offset,&idx[i]);
839     }
840     ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,cols);CHKERRQ(ierr);
841   }
842   PetscFunctionReturn(0);
843 }
844 
845 static PetscErrorCode MatConvert_Elemental_Dense(Mat A,MatType newtype,MatReuse reuse,Mat *B)
846 {
847   Mat                Bmpi;
848   Mat_Elemental      *a = (Mat_Elemental*)A->data;
849   MPI_Comm           comm;
850   PetscErrorCode     ierr;
851   IS                 isrows,iscols;
852   PetscInt           rrank,ridx,crank,cidx,nrows,ncols,i,j,erow,ecol,elrow,elcol;
853   const PetscInt     *rows,*cols;
854   PetscElemScalar    v;
855   const El::Grid     &grid = a->emat->Grid();
856 
857   PetscFunctionBegin;
858   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
859 
860   if (reuse == MAT_REUSE_MATRIX) {
861     Bmpi = *B;
862   } else {
863     ierr = MatCreate(comm,&Bmpi);CHKERRQ(ierr);
864     ierr = MatSetSizes(Bmpi,A->rmap->n,A->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
865     ierr = MatSetType(Bmpi,MATDENSE);CHKERRQ(ierr);
866     ierr = MatSetUp(Bmpi);CHKERRQ(ierr);
867   }
868 
869   /* Get local entries of A */
870   ierr = MatGetOwnershipIS(A,&isrows,&iscols);CHKERRQ(ierr);
871   ierr = ISGetLocalSize(isrows,&nrows);CHKERRQ(ierr);
872   ierr = ISGetIndices(isrows,&rows);CHKERRQ(ierr);
873   ierr = ISGetLocalSize(iscols,&ncols);CHKERRQ(ierr);
874   ierr = ISGetIndices(iscols,&cols);CHKERRQ(ierr);
875 
876   if (a->roworiented) {
877     for (i=0; i<nrows; i++) {
878       P2RO(A,0,rows[i],&rrank,&ridx); /* convert indices between PETSc <-> (Rank,Offset) <-> Elemental */
879       RO2E(A,0,rrank,ridx,&erow);
880       if (rrank < 0 || ridx < 0 || erow < 0) SETERRQ(comm,PETSC_ERR_PLIB,"Incorrect row translation");
881       for (j=0; j<ncols; j++) {
882         P2RO(A,1,cols[j],&crank,&cidx);
883         RO2E(A,1,crank,cidx,&ecol);
884         if (crank < 0 || cidx < 0 || ecol < 0) SETERRQ(comm,PETSC_ERR_PLIB,"Incorrect col translation");
885 
886         elrow = erow / grid.MCSize(); /* Elemental local row index */
887         elcol = ecol / grid.MRSize(); /* Elemental local column index */
888         v = a->emat->GetLocal(elrow,elcol);
889         ierr = MatSetValues(Bmpi,1,&rows[i],1,&cols[j],(PetscScalar *)&v,INSERT_VALUES);CHKERRQ(ierr);
890       }
891     }
892   } else { /* column-oriented */
893     for (j=0; j<ncols; j++) {
894       P2RO(A,1,cols[j],&crank,&cidx);
895       RO2E(A,1,crank,cidx,&ecol);
896       if (crank < 0 || cidx < 0 || ecol < 0) SETERRQ(comm,PETSC_ERR_PLIB,"Incorrect col translation");
897       for (i=0; i<nrows; i++) {
898         P2RO(A,0,rows[i],&rrank,&ridx); /* convert indices between PETSc <-> (Rank,Offset) <-> Elemental */
899         RO2E(A,0,rrank,ridx,&erow);
900         if (rrank < 0 || ridx < 0 || erow < 0) SETERRQ(comm,PETSC_ERR_PLIB,"Incorrect row translation");
901 
902         elrow = erow / grid.MCSize(); /* Elemental local row index */
903         elcol = ecol / grid.MRSize(); /* Elemental local column index */
904         v = a->emat->GetLocal(elrow,elcol);
905         ierr = MatSetValues(Bmpi,1,&rows[i],1,&cols[j],(PetscScalar *)&v,INSERT_VALUES);CHKERRQ(ierr);
906       }
907     }
908   }
909   ierr = MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
910   ierr = MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
911   if (reuse == MAT_INPLACE_MATRIX) {
912     ierr = MatHeaderReplace(A,&Bmpi);CHKERRQ(ierr);
913   } else {
914     *B = Bmpi;
915   }
916   ierr = ISDestroy(&isrows);CHKERRQ(ierr);
917   ierr = ISDestroy(&iscols);CHKERRQ(ierr);
918   PetscFunctionReturn(0);
919 }
920 
921 PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
922 {
923   Mat               mat_elemental;
924   PetscErrorCode    ierr;
925   PetscInt          M=A->rmap->N,N=A->cmap->N,row,ncols;
926   const PetscInt    *cols;
927   const PetscScalar *vals;
928 
929   PetscFunctionBegin;
930   if (reuse == MAT_REUSE_MATRIX) {
931     mat_elemental = *newmat;
932     ierr = MatZeroEntries(mat_elemental);CHKERRQ(ierr);
933   } else {
934     ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr);
935     ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr);
936     ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr);
937     ierr = MatSetUp(mat_elemental);CHKERRQ(ierr);
938   }
939   for (row=0; row<M; row++) {
940     ierr = MatGetRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr);
941     /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
942     ierr = MatSetValues(mat_elemental,1,&row,ncols,cols,vals,ADD_VALUES);CHKERRQ(ierr);
943     ierr = MatRestoreRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr);
944   }
945   ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
946   ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
947 
948   if (reuse == MAT_INPLACE_MATRIX) {
949     ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr);
950   } else {
951     *newmat = mat_elemental;
952   }
953   PetscFunctionReturn(0);
954 }
955 
956 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
957 {
958   Mat               mat_elemental;
959   PetscErrorCode    ierr;
960   PetscInt          row,ncols,rstart=A->rmap->rstart,rend=A->rmap->rend,j;
961   const PetscInt    *cols;
962   const PetscScalar *vals;
963 
964   PetscFunctionBegin;
965   if (reuse == MAT_REUSE_MATRIX) {
966     mat_elemental = *newmat;
967     ierr = MatZeroEntries(mat_elemental);CHKERRQ(ierr);
968   } else {
969     ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr);
970     ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
971     ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr);
972     ierr = MatSetUp(mat_elemental);CHKERRQ(ierr);
973   }
974   for (row=rstart; row<rend; row++) {
975     ierr = MatGetRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr);
976     for (j=0; j<ncols; j++) {
977       /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
978       ierr = MatSetValues(mat_elemental,1,&row,1,&cols[j],&vals[j],ADD_VALUES);CHKERRQ(ierr);
979     }
980     ierr = MatRestoreRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr);
981   }
982   ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
983   ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
984 
985   if (reuse == MAT_INPLACE_MATRIX) {
986     ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr);
987   } else {
988     *newmat = mat_elemental;
989   }
990   PetscFunctionReturn(0);
991 }
992 
993 PETSC_INTERN PetscErrorCode MatConvert_SeqSBAIJ_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
994 {
995   Mat               mat_elemental;
996   PetscErrorCode    ierr;
997   PetscInt          M=A->rmap->N,N=A->cmap->N,row,ncols,j;
998   const PetscInt    *cols;
999   const PetscScalar *vals;
1000 
1001   PetscFunctionBegin;
1002   if (reuse == MAT_REUSE_MATRIX) {
1003     mat_elemental = *newmat;
1004     ierr = MatZeroEntries(mat_elemental);CHKERRQ(ierr);
1005   } else {
1006     ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr);
1007     ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1008     ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr);
1009     ierr = MatSetUp(mat_elemental);CHKERRQ(ierr);
1010   }
1011   ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1012   for (row=0; row<M; row++) {
1013     ierr = MatGetRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr);
1014     /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1015     ierr = MatSetValues(mat_elemental,1,&row,ncols,cols,vals,ADD_VALUES);CHKERRQ(ierr);
1016     for (j=0; j<ncols; j++) { /* lower triangular part */
1017       PetscScalar v;
1018       if (cols[j] == row) continue;
1019       v    = A->hermitian ? PetscConj(vals[j]) : vals[j];
1020       ierr = MatSetValues(mat_elemental,1,&cols[j],1,&row,&v,ADD_VALUES);CHKERRQ(ierr);
1021     }
1022     ierr = MatRestoreRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr);
1023   }
1024   ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1025   ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1026   ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1027 
1028   if (reuse == MAT_INPLACE_MATRIX) {
1029     ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr);
1030   } else {
1031     *newmat = mat_elemental;
1032   }
1033   PetscFunctionReturn(0);
1034 }
1035 
1036 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1037 {
1038   Mat               mat_elemental;
1039   PetscErrorCode    ierr;
1040   PetscInt          M=A->rmap->N,N=A->cmap->N,row,ncols,j,rstart=A->rmap->rstart,rend=A->rmap->rend;
1041   const PetscInt    *cols;
1042   const PetscScalar *vals;
1043 
1044   PetscFunctionBegin;
1045   if (reuse == MAT_REUSE_MATRIX) {
1046     mat_elemental = *newmat;
1047     ierr = MatZeroEntries(mat_elemental);CHKERRQ(ierr);
1048   } else {
1049     ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr);
1050     ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1051     ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr);
1052     ierr = MatSetUp(mat_elemental);CHKERRQ(ierr);
1053   }
1054   ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1055   for (row=rstart; row<rend; row++) {
1056     ierr = MatGetRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr);
1057     /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1058     ierr = MatSetValues(mat_elemental,1,&row,ncols,cols,vals,ADD_VALUES);CHKERRQ(ierr);
1059     for (j=0; j<ncols; j++) { /* lower triangular part */
1060       PetscScalar v;
1061       if (cols[j] == row) continue;
1062       v    = A->hermitian ? PetscConj(vals[j]) : vals[j];
1063       ierr = MatSetValues(mat_elemental,1,&cols[j],1,&row,&v,ADD_VALUES);CHKERRQ(ierr);
1064     }
1065     ierr = MatRestoreRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr);
1066   }
1067   ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1068   ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1069   ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1070 
1071   if (reuse == MAT_INPLACE_MATRIX) {
1072     ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr);
1073   } else {
1074     *newmat = mat_elemental;
1075   }
1076   PetscFunctionReturn(0);
1077 }
1078 
1079 static PetscErrorCode MatDestroy_Elemental(Mat A)
1080 {
1081   Mat_Elemental      *a = (Mat_Elemental*)A->data;
1082   PetscErrorCode     ierr;
1083   Mat_Elemental_Grid *commgrid;
1084   PetscBool          flg;
1085   MPI_Comm           icomm;
1086 
1087   PetscFunctionBegin;
1088   delete a->emat;
1089   delete a->P;
1090   delete a->Q;
1091 
1092   El::mpi::Comm cxxcomm(PetscObjectComm((PetscObject)A));
1093   ierr = PetscCommDuplicate(cxxcomm.comm,&icomm,NULL);CHKERRQ(ierr);
1094   ierr = MPI_Comm_get_attr(icomm,Petsc_Elemental_keyval,(void**)&commgrid,(int*)&flg);CHKERRQ(ierr);
1095   if (--commgrid->grid_refct == 0) {
1096     delete commgrid->grid;
1097     ierr = PetscFree(commgrid);CHKERRQ(ierr);
1098     ierr = MPI_Comm_free_keyval(&Petsc_Elemental_keyval);CHKERRQ(ierr);
1099   }
1100   ierr = PetscCommDestroy(&icomm);CHKERRQ(ierr);
1101   ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",NULL);CHKERRQ(ierr);
1102   ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);CHKERRQ(ierr);
1103   ierr = PetscFree(A->data);CHKERRQ(ierr);
1104   PetscFunctionReturn(0);
1105 }
1106 
1107 PetscErrorCode MatSetUp_Elemental(Mat A)
1108 {
1109   Mat_Elemental  *a = (Mat_Elemental*)A->data;
1110   PetscErrorCode ierr;
1111   MPI_Comm       comm;
1112   PetscMPIInt    rsize,csize;
1113   PetscInt       n;
1114 
1115   PetscFunctionBegin;
1116   ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
1117   ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
1118 
1119   /* Check if local row and clomun sizes are equally distributed.
1120      Jed: Elemental uses "element" cyclic ordering so the sizes need to match that
1121      exactly.  The strategy in MatElemental is for PETSc to implicitly permute to block ordering (like would be returned by
1122      PetscSplitOwnership(comm,&n,&N)), at which point Elemental matrices can act on PETSc vectors without redistributing the vectors. */
1123   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1124   n = PETSC_DECIDE;
1125   ierr = PetscSplitOwnership(comm,&n,&A->rmap->N);CHKERRQ(ierr);
1126   if (n != A->rmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Local row size %D of ELEMENTAL matrix must be equally distributed",A->rmap->n);
1127 
1128   n = PETSC_DECIDE;
1129   ierr = PetscSplitOwnership(comm,&n,&A->cmap->N);CHKERRQ(ierr);
1130   if (n != A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Local column size %D of ELEMENTAL matrix must be equally distributed",A->cmap->n);
1131 
1132   a->emat->Resize(A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1133   El::Zero(*a->emat);
1134 
1135   ierr = MPI_Comm_size(A->rmap->comm,&rsize);CHKERRQ(ierr);
1136   ierr = MPI_Comm_size(A->cmap->comm,&csize);CHKERRQ(ierr);
1137   if (csize != rsize) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_INCOMP,"Cannot use row and column communicators of different sizes");
1138   a->commsize = rsize;
1139   a->mr[0] = A->rmap->N % rsize; if (!a->mr[0]) a->mr[0] = rsize;
1140   a->mr[1] = A->cmap->N % csize; if (!a->mr[1]) a->mr[1] = csize;
1141   a->m[0]  = A->rmap->N / rsize + (a->mr[0] != rsize);
1142   a->m[1]  = A->cmap->N / csize + (a->mr[1] != csize);
1143   PetscFunctionReturn(0);
1144 }
1145 
1146 PetscErrorCode MatAssemblyBegin_Elemental(Mat A, MatAssemblyType type)
1147 {
1148   Mat_Elemental  *a = (Mat_Elemental*)A->data;
1149 
1150   PetscFunctionBegin;
1151   /* printf("Calling ProcessQueues\n"); */
1152   a->emat->ProcessQueues();
1153   /* printf("Finished ProcessQueues\n"); */
1154   PetscFunctionReturn(0);
1155 }
1156 
1157 PetscErrorCode MatAssemblyEnd_Elemental(Mat A, MatAssemblyType type)
1158 {
1159   PetscFunctionBegin;
1160   /* Currently does nothing */
1161   PetscFunctionReturn(0);
1162 }
1163 
1164 PetscErrorCode MatLoad_Elemental(Mat newMat, PetscViewer viewer)
1165 {
1166   PetscErrorCode ierr;
1167   Mat            Adense,Ae;
1168   MPI_Comm       comm;
1169 
1170   PetscFunctionBegin;
1171   ierr = PetscObjectGetComm((PetscObject)newMat,&comm);CHKERRQ(ierr);
1172   ierr = MatCreate(comm,&Adense);CHKERRQ(ierr);
1173   ierr = MatSetType(Adense,MATDENSE);CHKERRQ(ierr);
1174   ierr = MatLoad(Adense,viewer);CHKERRQ(ierr);
1175   ierr = MatConvert(Adense, MATELEMENTAL, MAT_INITIAL_MATRIX,&Ae);CHKERRQ(ierr);
1176   ierr = MatDestroy(&Adense);CHKERRQ(ierr);
1177   ierr = MatHeaderReplace(newMat,&Ae);CHKERRQ(ierr);
1178   PetscFunctionReturn(0);
1179 }
1180 
1181 /* -------------------------------------------------------------------*/
1182 static struct _MatOps MatOps_Values = {
1183        MatSetValues_Elemental,
1184        0,
1185        0,
1186        MatMult_Elemental,
1187 /* 4*/ MatMultAdd_Elemental,
1188        MatMultTranspose_Elemental,
1189        MatMultTransposeAdd_Elemental,
1190        MatSolve_Elemental,
1191        MatSolveAdd_Elemental,
1192        0,
1193 /*10*/ 0,
1194        MatLUFactor_Elemental,
1195        MatCholeskyFactor_Elemental,
1196        0,
1197        MatTranspose_Elemental,
1198 /*15*/ MatGetInfo_Elemental,
1199        0,
1200        MatGetDiagonal_Elemental,
1201        MatDiagonalScale_Elemental,
1202        MatNorm_Elemental,
1203 /*20*/ MatAssemblyBegin_Elemental,
1204        MatAssemblyEnd_Elemental,
1205        MatSetOption_Elemental,
1206        MatZeroEntries_Elemental,
1207 /*24*/ 0,
1208        MatLUFactorSymbolic_Elemental,
1209        MatLUFactorNumeric_Elemental,
1210        MatCholeskyFactorSymbolic_Elemental,
1211        MatCholeskyFactorNumeric_Elemental,
1212 /*29*/ MatSetUp_Elemental,
1213        0,
1214        0,
1215        0,
1216        0,
1217 /*34*/ MatDuplicate_Elemental,
1218        0,
1219        0,
1220        0,
1221        0,
1222 /*39*/ MatAXPY_Elemental,
1223        0,
1224        0,
1225        0,
1226        MatCopy_Elemental,
1227 /*44*/ 0,
1228        MatScale_Elemental,
1229        MatShift_Basic,
1230        0,
1231        0,
1232 /*49*/ 0,
1233        0,
1234        0,
1235        0,
1236        0,
1237 /*54*/ 0,
1238        0,
1239        0,
1240        0,
1241        0,
1242 /*59*/ 0,
1243        MatDestroy_Elemental,
1244        MatView_Elemental,
1245        0,
1246        0,
1247 /*64*/ 0,
1248        0,
1249        0,
1250        0,
1251        0,
1252 /*69*/ 0,
1253        0,
1254        MatConvert_Elemental_Dense,
1255        0,
1256        0,
1257 /*74*/ 0,
1258        0,
1259        0,
1260        0,
1261        0,
1262 /*79*/ 0,
1263        0,
1264        0,
1265        0,
1266        MatLoad_Elemental,
1267 /*84*/ 0,
1268        0,
1269        0,
1270        0,
1271        0,
1272 /*89*/ MatMatMult_Elemental,
1273        MatMatMultSymbolic_Elemental,
1274        MatMatMultNumeric_Elemental,
1275        0,
1276        0,
1277 /*94*/ 0,
1278        MatMatTransposeMult_Elemental,
1279        MatMatTransposeMultSymbolic_Elemental,
1280        MatMatTransposeMultNumeric_Elemental,
1281        0,
1282 /*99*/ 0,
1283        0,
1284        0,
1285        MatConjugate_Elemental,
1286        0,
1287 /*104*/0,
1288        0,
1289        0,
1290        0,
1291        0,
1292 /*109*/MatMatSolve_Elemental,
1293        0,
1294        0,
1295        0,
1296        MatMissingDiagonal_Elemental,
1297 /*114*/0,
1298        0,
1299        0,
1300        0,
1301        0,
1302 /*119*/0,
1303        MatHermitianTranspose_Elemental,
1304        0,
1305        0,
1306        0,
1307 /*124*/0,
1308        0,
1309        0,
1310        0,
1311        0,
1312 /*129*/0,
1313        0,
1314        0,
1315        0,
1316        0,
1317 /*134*/0,
1318        0,
1319        0,
1320        0,
1321        0
1322 };
1323 
1324 /*MC
1325    MATELEMENTAL = "elemental" - A matrix type for dense matrices using the Elemental package
1326 
1327   Use ./configure --download-elemental to install PETSc to use Elemental
1328 
1329   Use -pc_type lu -pc_factor_mat_solver_type elemental to use this direct solver
1330 
1331    Options Database Keys:
1332 + -mat_type elemental - sets the matrix type to "elemental" during a call to MatSetFromOptions()
1333 - -mat_elemental_grid_height - sets Grid Height for 2D cyclic ordering of internal matrix
1334 
1335   Level: beginner
1336 
1337 .seealso: MATDENSE
1338 M*/
1339 
1340 PETSC_EXTERN PetscErrorCode MatCreate_Elemental(Mat A)
1341 {
1342   Mat_Elemental      *a;
1343   PetscErrorCode     ierr;
1344   PetscBool          flg,flg1;
1345   Mat_Elemental_Grid *commgrid;
1346   MPI_Comm           icomm;
1347   PetscInt           optv1;
1348 
1349   PetscFunctionBegin;
1350   ierr = PetscElementalInitializePackage();CHKERRQ(ierr);
1351   ierr = PetscMemcpy(A->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1352   A->insertmode = NOT_SET_VALUES;
1353 
1354   ierr = PetscNewLog(A,&a);CHKERRQ(ierr);
1355   A->data = (void*)a;
1356 
1357   /* Set up the elemental matrix */
1358   El::mpi::Comm cxxcomm(PetscObjectComm((PetscObject)A));
1359 
1360   /* Grid needs to be shared between multiple Mats on the same communicator, implement by attribute caching on the MPI_Comm */
1361   if (Petsc_Elemental_keyval == MPI_KEYVAL_INVALID) {
1362     ierr = MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN,MPI_COMM_NULL_DELETE_FN,&Petsc_Elemental_keyval,(void*)0);CHKERRQ(ierr);
1363   }
1364   ierr = PetscCommDuplicate(cxxcomm.comm,&icomm,NULL);CHKERRQ(ierr);
1365   ierr = MPI_Comm_get_attr(icomm,Petsc_Elemental_keyval,(void**)&commgrid,(int*)&flg);CHKERRQ(ierr);
1366   if (!flg) {
1367     ierr = PetscNewLog(A,&commgrid);CHKERRQ(ierr);
1368 
1369     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"Elemental Options","Mat");CHKERRQ(ierr);
1370     /* displayed default grid sizes (CommSize,1) are set by us arbitrarily until El::Grid() is called */
1371     ierr = PetscOptionsInt("-mat_elemental_grid_height","Grid Height","None",El::mpi::Size(cxxcomm),&optv1,&flg1);CHKERRQ(ierr);
1372     if (flg1) {
1373       if (El::mpi::Size(cxxcomm) % optv1 != 0) {
1374         SETERRQ2(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_INCOMP,"Grid Height %D must evenly divide CommSize %D",optv1,(PetscInt)El::mpi::Size(cxxcomm));
1375       }
1376       commgrid->grid = new El::Grid(cxxcomm,optv1); /* use user-provided grid height */
1377     } else {
1378       commgrid->grid = new El::Grid(cxxcomm); /* use Elemental default grid sizes */
1379       /* printf("new commgrid->grid = %p\n",commgrid->grid);  -- memory leak revealed by valgrind? */
1380     }
1381     commgrid->grid_refct = 1;
1382     ierr = MPI_Comm_set_attr(icomm,Petsc_Elemental_keyval,(void*)commgrid);CHKERRQ(ierr);
1383 
1384     a->pivoting    = 1;
1385     ierr = PetscOptionsInt("-mat_elemental_pivoting","Pivoting","None",a->pivoting,&a->pivoting,NULL);CHKERRQ(ierr);
1386 
1387     ierr = PetscOptionsEnd();CHKERRQ(ierr);
1388   } else {
1389     commgrid->grid_refct++;
1390   }
1391   ierr = PetscCommDestroy(&icomm);CHKERRQ(ierr);
1392   a->grid        = commgrid->grid;
1393   a->emat        = new El::DistMatrix<PetscElemScalar>(*a->grid);
1394   a->roworiented = PETSC_TRUE;
1395 
1396   ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",MatGetOwnershipIS_Elemental);CHKERRQ(ierr);
1397   ierr = PetscObjectChangeTypeName((PetscObject)A,MATELEMENTAL);CHKERRQ(ierr);
1398   PetscFunctionReturn(0);
1399 }
1400