xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 5d7aebe8f0fe20910fb51b4fa18c26d1fec3565b)
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4 */
5 
6 
7 #include <../src/mat/impls/dense/mpi/mpidense.h>    /*I   "petscmat.h"  I*/
8 #include <../src/mat/impls/aij/mpi/mpiaij.h>
9 #include <petscblaslapack.h>
10 
11 /*@
12 
13       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
14               matrix that represents the operator. For sequential matrices it returns itself.
15 
16     Input Parameter:
17 .      A - the Seq or MPI dense matrix
18 
19     Output Parameter:
20 .      B - the inner matrix
21 
22     Level: intermediate
23 
24 @*/
25 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
26 {
27   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
28   PetscErrorCode ierr;
29   PetscBool      flg;
30 
31   PetscFunctionBegin;
32   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
33   if (flg) *B = mat->A;
34   else *B = A;
35   PetscFunctionReturn(0);
36 }
37 
38 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
39 {
40   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
41   PetscErrorCode ierr;
42   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
43 
44   PetscFunctionBegin;
45   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
46   lrow = row - rstart;
47   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
48   PetscFunctionReturn(0);
49 }
50 
51 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
52 {
53   PetscErrorCode ierr;
54 
55   PetscFunctionBegin;
56   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
57   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
58   PetscFunctionReturn(0);
59 }
60 
61 PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
62 {
63   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
64   PetscErrorCode ierr;
65   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
66   PetscScalar    *array;
67   MPI_Comm       comm;
68   Mat            B;
69 
70   PetscFunctionBegin;
71   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
72 
73   ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr);
74   if (!B) {
75     ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
76     ierr = MatCreate(comm,&B);CHKERRQ(ierr);
77     ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr);
78     ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
79     ierr = MatDenseGetArray(mdn->A,&array);CHKERRQ(ierr);
80     ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr);
81     ierr = MatDenseRestoreArray(mdn->A,&array);CHKERRQ(ierr);
82     ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
83     ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
84     ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr);
85     *a   = B;
86     ierr = MatDestroy(&B);CHKERRQ(ierr);
87   } else *a = B;
88   PetscFunctionReturn(0);
89 }
90 
91 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
92 {
93   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
94   PetscErrorCode ierr;
95   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
96   PetscBool      roworiented = A->roworiented;
97 
98   PetscFunctionBegin;
99   for (i=0; i<m; i++) {
100     if (idxm[i] < 0) continue;
101     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
102     if (idxm[i] >= rstart && idxm[i] < rend) {
103       row = idxm[i] - rstart;
104       if (roworiented) {
105         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
106       } else {
107         for (j=0; j<n; j++) {
108           if (idxn[j] < 0) continue;
109           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
110           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
111         }
112       }
113     } else if (!A->donotstash) {
114       mat->assembled = PETSC_FALSE;
115       if (roworiented) {
116         ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
117       } else {
118         ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
119       }
120     }
121   }
122   PetscFunctionReturn(0);
123 }
124 
125 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
126 {
127   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
128   PetscErrorCode ierr;
129   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
130 
131   PetscFunctionBegin;
132   for (i=0; i<m; i++) {
133     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
134     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
135     if (idxm[i] >= rstart && idxm[i] < rend) {
136       row = idxm[i] - rstart;
137       for (j=0; j<n; j++) {
138         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
139         if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
140         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
141       }
142     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
143   }
144   PetscFunctionReturn(0);
145 }
146 
147 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[])
148 {
149   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
150   PetscErrorCode ierr;
151 
152   PetscFunctionBegin;
153   ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr);
154   PetscFunctionReturn(0);
155 }
156 
157 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar *array[])
158 {
159   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
160   PetscErrorCode ierr;
161 
162   PetscFunctionBegin;
163   ierr = MatDenseGetArrayRead(a->A,array);CHKERRQ(ierr);
164   PetscFunctionReturn(0);
165 }
166 
167 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar array[])
168 {
169   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
170   PetscErrorCode ierr;
171 
172   PetscFunctionBegin;
173   ierr = MatDensePlaceArray(a->A,array);CHKERRQ(ierr);
174   PetscFunctionReturn(0);
175 }
176 
177 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
178 {
179   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
180   PetscErrorCode ierr;
181 
182   PetscFunctionBegin;
183   ierr = MatDenseResetArray(a->A);CHKERRQ(ierr);
184   PetscFunctionReturn(0);
185 }
186 
187 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
188 {
189   Mat_MPIDense   *mat  = (Mat_MPIDense*)A->data,*newmatd;
190   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
191   PetscErrorCode ierr;
192   PetscInt       i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
193   const PetscInt *irow,*icol;
194   PetscScalar    *av,*bv,*v = lmat->v;
195   Mat            newmat;
196   IS             iscol_local;
197   MPI_Comm       comm_is,comm_mat;
198 
199   PetscFunctionBegin;
200   ierr = PetscObjectGetComm((PetscObject)A,&comm_mat);CHKERRQ(ierr);
201   ierr = PetscObjectGetComm((PetscObject)iscol,&comm_is);CHKERRQ(ierr);
202   if (comm_mat != comm_is) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator");
203 
204   ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
205   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
206   ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr);
207   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
208   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
209   ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */
210 
211   /* No parallel redistribution currently supported! Should really check each index set
212      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
213      original matrix! */
214 
215   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
216   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
217 
218   /* Check submatrix call */
219   if (scall == MAT_REUSE_MATRIX) {
220     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
221     /* Really need to test rows and column sizes! */
222     newmat = *B;
223   } else {
224     /* Create and fill new matrix */
225     ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr);
226     ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
227     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
228     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
229   }
230 
231   /* Now extract the data pointers and do the copy, column at a time */
232   newmatd = (Mat_MPIDense*)newmat->data;
233   bv      = ((Mat_SeqDense*)newmatd->A->data)->v;
234 
235   for (i=0; i<Ncols; i++) {
236     av = v + ((Mat_SeqDense*)mat->A->data)->lda*icol[i];
237     for (j=0; j<nrows; j++) {
238       *bv++ = av[irow[j] - rstart];
239     }
240   }
241 
242   /* Assemble the matrices so that the correct flags are set */
243   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
244   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
245 
246   /* Free work space */
247   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
248   ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr);
249   ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
250   *B   = newmat;
251   PetscFunctionReturn(0);
252 }
253 
254 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[])
255 {
256   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
257   PetscErrorCode ierr;
258 
259   PetscFunctionBegin;
260   ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr);
261   PetscFunctionReturn(0);
262 }
263 
264 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar *array[])
265 {
266   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
267   PetscErrorCode ierr;
268 
269   PetscFunctionBegin;
270   ierr = MatDenseRestoreArrayRead(a->A,array);CHKERRQ(ierr);
271   PetscFunctionReturn(0);
272 }
273 
274 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
275 {
276   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
277   MPI_Comm       comm;
278   PetscErrorCode ierr;
279   PetscInt       nstash,reallocs;
280   InsertMode     addv;
281 
282   PetscFunctionBegin;
283   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
284   /* make sure all processors are either in INSERTMODE or ADDMODE */
285   ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,comm);CHKERRQ(ierr);
286   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
287   mat->insertmode = addv; /* in case this processor had no cache */
288 
289   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
290   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
291   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
292   PetscFunctionReturn(0);
293 }
294 
295 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
296 {
297   Mat_MPIDense   *mdn=(Mat_MPIDense*)mat->data;
298   PetscErrorCode ierr;
299   PetscInt       i,*row,*col,flg,j,rstart,ncols;
300   PetscMPIInt    n;
301   PetscScalar    *val;
302 
303   PetscFunctionBegin;
304   /*  wait on receives */
305   while (1) {
306     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
307     if (!flg) break;
308 
309     for (i=0; i<n;) {
310       /* Now identify the consecutive vals belonging to the same row */
311       for (j=i,rstart=row[j]; j<n; j++) {
312         if (row[j] != rstart) break;
313       }
314       if (j < n) ncols = j-i;
315       else       ncols = n-i;
316       /* Now assemble all these values with a single function call */
317       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
318       i    = j;
319     }
320   }
321   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
322 
323   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
324   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
325 
326   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
327     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
328   }
329   PetscFunctionReturn(0);
330 }
331 
332 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
333 {
334   PetscErrorCode ierr;
335   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
336 
337   PetscFunctionBegin;
338   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
339   PetscFunctionReturn(0);
340 }
341 
342 /* the code does not do the diagonal entries correctly unless the
343    matrix is square and the column and row owerships are identical.
344    This is a BUG. The only way to fix it seems to be to access
345    mdn->A and mdn->B directly and not through the MatZeroRows()
346    routine.
347 */
348 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
349 {
350   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
351   PetscErrorCode    ierr;
352   PetscInt          i,*owners = A->rmap->range;
353   PetscInt          *sizes,j,idx,nsends;
354   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
355   PetscInt          *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source;
356   PetscInt          *lens,*lrows,*values;
357   PetscMPIInt       n,imdex,rank = l->rank,size = l->size;
358   MPI_Comm          comm;
359   MPI_Request       *send_waits,*recv_waits;
360   MPI_Status        recv_status,*send_status;
361   PetscBool         found;
362   const PetscScalar *xx;
363   PetscScalar       *bb;
364 
365   PetscFunctionBegin;
366   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
367   if (A->rmap->N != A->cmap->N) SETERRQ(comm,PETSC_ERR_SUP,"Only handles square matrices");
368   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership");
369   /*  first count number of contributors to each processor */
370   ierr = PetscCalloc1(2*size,&sizes);CHKERRQ(ierr);
371   ierr = PetscMalloc1(N+1,&owner);CHKERRQ(ierr);  /* see note*/
372   for (i=0; i<N; i++) {
373     idx   = rows[i];
374     found = PETSC_FALSE;
375     for (j=0; j<size; j++) {
376       if (idx >= owners[j] && idx < owners[j+1]) {
377         sizes[2*j]++; sizes[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
378       }
379     }
380     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
381   }
382   nsends = 0;
383   for (i=0; i<size; i++) nsends += sizes[2*i+1];
384 
385   /* inform other processors of number of messages and max length*/
386   ierr = PetscMaxSum(comm,sizes,&nmax,&nrecvs);CHKERRQ(ierr);
387 
388   /* post receives:   */
389   ierr = PetscMalloc1((nrecvs+1)*(nmax+1),&rvalues);CHKERRQ(ierr);
390   ierr = PetscMalloc1(nrecvs+1,&recv_waits);CHKERRQ(ierr);
391   for (i=0; i<nrecvs; i++) {
392     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
393   }
394 
395   /* do sends:
396       1) starts[i] gives the starting index in svalues for stuff going to
397          the ith processor
398   */
399   ierr = PetscMalloc1(N+1,&svalues);CHKERRQ(ierr);
400   ierr = PetscMalloc1(nsends+1,&send_waits);CHKERRQ(ierr);
401   ierr = PetscMalloc1(size+1,&starts);CHKERRQ(ierr);
402 
403   starts[0] = 0;
404   for (i=1; i<size; i++) starts[i] = starts[i-1] + sizes[2*i-2];
405   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
406 
407   starts[0] = 0;
408   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + sizes[2*i-2];
409   count = 0;
410   for (i=0; i<size; i++) {
411     if (sizes[2*i+1]) {
412       ierr = MPI_Isend(svalues+starts[i],sizes[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
413     }
414   }
415   ierr = PetscFree(starts);CHKERRQ(ierr);
416 
417   base = owners[rank];
418 
419   /*  wait on receives */
420   ierr  = PetscMalloc2(nrecvs,&lens,nrecvs,&source);CHKERRQ(ierr);
421   count = nrecvs;
422   slen  = 0;
423   while (count) {
424     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
425     /* unpack receives into our local space */
426     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
427 
428     source[imdex] = recv_status.MPI_SOURCE;
429     lens[imdex]   = n;
430     slen += n;
431     count--;
432   }
433   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
434 
435   /* move the data into the send scatter */
436   ierr  = PetscMalloc1(slen+1,&lrows);CHKERRQ(ierr);
437   count = 0;
438   for (i=0; i<nrecvs; i++) {
439     values = rvalues + i*nmax;
440     for (j=0; j<lens[i]; j++) {
441       lrows[count++] = values[j] - base;
442     }
443   }
444   ierr = PetscFree(rvalues);CHKERRQ(ierr);
445   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
446   ierr = PetscFree(owner);CHKERRQ(ierr);
447   ierr = PetscFree(sizes);CHKERRQ(ierr);
448 
449   /* fix right hand side if needed */
450   if (x && b) {
451     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
452     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
453     for (i=0; i<slen; i++) {
454       bb[lrows[i]] = diag*xx[lrows[i]];
455     }
456     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
457     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
458   }
459 
460   /* actually zap the local rows */
461   ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
462   if (diag != 0.0) {
463     Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data;
464     PetscInt     m   = ll->lda, i;
465 
466     for (i=0; i<slen; i++) {
467       ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag;
468     }
469   }
470   ierr = PetscFree(lrows);CHKERRQ(ierr);
471 
472   /* wait on sends */
473   if (nsends) {
474     ierr = PetscMalloc1(nsends,&send_status);CHKERRQ(ierr);
475     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
476     ierr = PetscFree(send_status);CHKERRQ(ierr);
477   }
478   ierr = PetscFree(send_waits);CHKERRQ(ierr);
479   ierr = PetscFree(svalues);CHKERRQ(ierr);
480   PetscFunctionReturn(0);
481 }
482 
483 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec);
484 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec);
485 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec);
486 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec);
487 
488 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
489 {
490   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
491   PetscErrorCode ierr;
492 
493   PetscFunctionBegin;
494   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
495   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
496   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
497   PetscFunctionReturn(0);
498 }
499 
500 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
501 {
502   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
503   PetscErrorCode ierr;
504 
505   PetscFunctionBegin;
506   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
507   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
508   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
509   PetscFunctionReturn(0);
510 }
511 
512 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
513 {
514   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
515   PetscErrorCode ierr;
516   PetscScalar    zero = 0.0;
517 
518   PetscFunctionBegin;
519   ierr = VecSet(yy,zero);CHKERRQ(ierr);
520   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
521   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
522   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
523   PetscFunctionReturn(0);
524 }
525 
526 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
527 {
528   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
529   PetscErrorCode ierr;
530 
531   PetscFunctionBegin;
532   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
533   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
534   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
535   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
536   PetscFunctionReturn(0);
537 }
538 
539 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
540 {
541   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
542   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
543   PetscErrorCode ierr;
544   PetscInt       len,i,n,m = A->rmap->n,radd;
545   PetscScalar    *x,zero = 0.0;
546 
547   PetscFunctionBegin;
548   ierr = VecSet(v,zero);CHKERRQ(ierr);
549   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
550   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
551   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
552   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
553   radd = A->rmap->rstart*m;
554   for (i=0; i<len; i++) {
555     x[i] = aloc->v[radd + i*m + i];
556   }
557   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
558   PetscFunctionReturn(0);
559 }
560 
561 PetscErrorCode MatDestroy_MPIDense(Mat mat)
562 {
563   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
564   PetscErrorCode ierr;
565 
566   PetscFunctionBegin;
567 #if defined(PETSC_USE_LOG)
568   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
569 #endif
570   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
571   ierr = MatDestroy(&mdn->A);CHKERRQ(ierr);
572   ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr);
573   ierr = VecScatterDestroy(&mdn->Mvctx);CHKERRQ(ierr);
574 
575   ierr = PetscFree(mat->data);CHKERRQ(ierr);
576   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
577 
578   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr);
579   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr);
580   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL);CHKERRQ(ierr);
581   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL);CHKERRQ(ierr);
582   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);CHKERRQ(ierr);
583   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);CHKERRQ(ierr);
584 #if defined(PETSC_HAVE_ELEMENTAL)
585   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr);
586 #endif
587   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr);
588   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
589   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
590   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
591   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
592   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
593   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
594   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL);CHKERRQ(ierr);
595   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL);CHKERRQ(ierr);
596   PetscFunctionReturn(0);
597 }
598 
599 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
600 {
601   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
602   PetscErrorCode    ierr;
603   PetscViewerFormat format;
604   int               fd;
605   PetscInt          header[4],mmax,N = mat->cmap->N,i,j,m,k;
606   PetscMPIInt       rank,tag  = ((PetscObject)viewer)->tag,size;
607   PetscScalar       *work,*v,*vv;
608   Mat_SeqDense      *a = (Mat_SeqDense*)mdn->A->data;
609 
610   PetscFunctionBegin;
611   if (mdn->size == 1) {
612     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
613   } else {
614     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
615     ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
616     ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
617 
618     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
619     if (format == PETSC_VIEWER_NATIVE) {
620 
621       if (!rank) {
622         /* store the matrix as a dense matrix */
623         header[0] = MAT_FILE_CLASSID;
624         header[1] = mat->rmap->N;
625         header[2] = N;
626         header[3] = MATRIX_BINARY_FORMAT_DENSE;
627         ierr      = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
628 
629         /* get largest work array needed for transposing array */
630         mmax = mat->rmap->n;
631         for (i=1; i<size; i++) {
632           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
633         }
634         ierr = PetscMalloc1(mmax*N,&work);CHKERRQ(ierr);
635 
636         /* write out local array, by rows */
637         m = mat->rmap->n;
638         v = a->v;
639         for (j=0; j<N; j++) {
640           for (i=0; i<m; i++) {
641             work[j + i*N] = *v++;
642           }
643         }
644         ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
645         /* get largest work array to receive messages from other processes, excludes process zero */
646         mmax = 0;
647         for (i=1; i<size; i++) {
648           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
649         }
650         ierr = PetscMalloc1(mmax*N,&vv);CHKERRQ(ierr);
651         for (k = 1; k < size; k++) {
652           v    = vv;
653           m    = mat->rmap->range[k+1] - mat->rmap->range[k];
654           ierr = MPIULong_Recv(v,m*N,MPIU_SCALAR,k,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
655 
656           for (j = 0; j < N; j++) {
657             for (i = 0; i < m; i++) {
658               work[j + i*N] = *v++;
659             }
660           }
661           ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
662         }
663         ierr = PetscFree(work);CHKERRQ(ierr);
664         ierr = PetscFree(vv);CHKERRQ(ierr);
665       } else {
666         ierr = MPIULong_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
667       }
668     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerPushFormat(viewer,PETSC_VIEWER_NATIVE)");
669   }
670   PetscFunctionReturn(0);
671 }
672 
673 extern PetscErrorCode MatView_SeqDense(Mat,PetscViewer);
674 #include <petscdraw.h>
675 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
676 {
677   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
678   PetscErrorCode    ierr;
679   PetscMPIInt       rank = mdn->rank;
680   PetscViewerType   vtype;
681   PetscBool         iascii,isdraw;
682   PetscViewer       sviewer;
683   PetscViewerFormat format;
684 
685   PetscFunctionBegin;
686   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
687   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
688   if (iascii) {
689     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
690     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
691     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
692       MatInfo info;
693       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
694       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
695       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
696       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
697       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
698       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
699       PetscFunctionReturn(0);
700     } else if (format == PETSC_VIEWER_ASCII_INFO) {
701       PetscFunctionReturn(0);
702     }
703   } else if (isdraw) {
704     PetscDraw draw;
705     PetscBool isnull;
706 
707     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
708     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
709     if (isnull) PetscFunctionReturn(0);
710   }
711 
712   {
713     /* assemble the entire matrix onto first processor. */
714     Mat         A;
715     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
716     PetscInt    *cols;
717     PetscScalar *vals;
718 
719     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
720     if (!rank) {
721       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
722     } else {
723       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
724     }
725     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
726     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
727     ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr);
728     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
729 
730     /* Copy the matrix ... This isn't the most efficient means,
731        but it's quick for now */
732     A->insertmode = INSERT_VALUES;
733 
734     row = mat->rmap->rstart;
735     m   = mdn->A->rmap->n;
736     for (i=0; i<m; i++) {
737       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
738       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
739       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
740       row++;
741     }
742 
743     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
744     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
745     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
746     if (!rank) {
747       ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
748       ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
749     }
750     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
751     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
752     ierr = MatDestroy(&A);CHKERRQ(ierr);
753   }
754   PetscFunctionReturn(0);
755 }
756 
757 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
758 {
759   PetscErrorCode ierr;
760   PetscBool      iascii,isbinary,isdraw,issocket;
761 
762   PetscFunctionBegin;
763   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
764   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
765   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
766   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
767 
768   if (iascii || issocket || isdraw) {
769     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
770   } else if (isbinary) {
771     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
772   }
773   PetscFunctionReturn(0);
774 }
775 
776 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
777 {
778   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
779   Mat            mdn  = mat->A;
780   PetscErrorCode ierr;
781   PetscReal      isend[5],irecv[5];
782 
783   PetscFunctionBegin;
784   info->block_size = 1.0;
785 
786   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
787 
788   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
789   isend[3] = info->memory;  isend[4] = info->mallocs;
790   if (flag == MAT_LOCAL) {
791     info->nz_used      = isend[0];
792     info->nz_allocated = isend[1];
793     info->nz_unneeded  = isend[2];
794     info->memory       = isend[3];
795     info->mallocs      = isend[4];
796   } else if (flag == MAT_GLOBAL_MAX) {
797     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
798 
799     info->nz_used      = irecv[0];
800     info->nz_allocated = irecv[1];
801     info->nz_unneeded  = irecv[2];
802     info->memory       = irecv[3];
803     info->mallocs      = irecv[4];
804   } else if (flag == MAT_GLOBAL_SUM) {
805     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
806 
807     info->nz_used      = irecv[0];
808     info->nz_allocated = irecv[1];
809     info->nz_unneeded  = irecv[2];
810     info->memory       = irecv[3];
811     info->mallocs      = irecv[4];
812   }
813   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
814   info->fill_ratio_needed = 0;
815   info->factor_mallocs    = 0;
816   PetscFunctionReturn(0);
817 }
818 
819 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
820 {
821   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
822   PetscErrorCode ierr;
823 
824   PetscFunctionBegin;
825   switch (op) {
826   case MAT_NEW_NONZERO_LOCATIONS:
827   case MAT_NEW_NONZERO_LOCATION_ERR:
828   case MAT_NEW_NONZERO_ALLOCATION_ERR:
829     MatCheckPreallocated(A,1);
830     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
831     break;
832   case MAT_ROW_ORIENTED:
833     MatCheckPreallocated(A,1);
834     a->roworiented = flg;
835     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
836     break;
837   case MAT_NEW_DIAGONALS:
838   case MAT_KEEP_NONZERO_PATTERN:
839   case MAT_USE_HASH_TABLE:
840     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
841     break;
842   case MAT_IGNORE_OFF_PROC_ENTRIES:
843     a->donotstash = flg;
844     break;
845   case MAT_SYMMETRIC:
846   case MAT_STRUCTURALLY_SYMMETRIC:
847   case MAT_HERMITIAN:
848   case MAT_SYMMETRY_ETERNAL:
849   case MAT_IGNORE_LOWER_TRIANGULAR:
850   case MAT_IGNORE_ZERO_ENTRIES:
851     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
852     break;
853   default:
854     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
855   }
856   PetscFunctionReturn(0);
857 }
858 
859 
860 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
861 {
862   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
863   Mat_SeqDense      *mat = (Mat_SeqDense*)mdn->A->data;
864   const PetscScalar *l,*r;
865   PetscScalar       x,*v;
866   PetscErrorCode    ierr;
867   PetscInt          i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n;
868 
869   PetscFunctionBegin;
870   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
871   if (ll) {
872     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
873     if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
874     ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr);
875     for (i=0; i<m; i++) {
876       x = l[i];
877       v = mat->v + i;
878       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
879     }
880     ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr);
881     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
882   }
883   if (rr) {
884     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
885     if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
886     ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
887     ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
888     ierr = VecGetArrayRead(mdn->lvec,&r);CHKERRQ(ierr);
889     for (i=0; i<n; i++) {
890       x = r[i];
891       v = mat->v + i*m;
892       for (j=0; j<m; j++) (*v++) *= x;
893     }
894     ierr = VecRestoreArrayRead(mdn->lvec,&r);CHKERRQ(ierr);
895     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
896   }
897   PetscFunctionReturn(0);
898 }
899 
900 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
901 {
902   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
903   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
904   PetscErrorCode ierr;
905   PetscInt       i,j;
906   PetscReal      sum = 0.0;
907   PetscScalar    *v  = mat->v;
908 
909   PetscFunctionBegin;
910   if (mdn->size == 1) {
911     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
912   } else {
913     if (type == NORM_FROBENIUS) {
914       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
915         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
916       }
917       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
918       *nrm = PetscSqrtReal(*nrm);
919       ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr);
920     } else if (type == NORM_1) {
921       PetscReal *tmp,*tmp2;
922       ierr = PetscMalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr);
923       ierr = PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
924       ierr = PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
925       *nrm = 0.0;
926       v    = mat->v;
927       for (j=0; j<mdn->A->cmap->n; j++) {
928         for (i=0; i<mdn->A->rmap->n; i++) {
929           tmp[j] += PetscAbsScalar(*v);  v++;
930         }
931       }
932       ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
933       for (j=0; j<A->cmap->N; j++) {
934         if (tmp2[j] > *nrm) *nrm = tmp2[j];
935       }
936       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
937       ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr);
938     } else if (type == NORM_INFINITY) { /* max row norm */
939       PetscReal ntemp;
940       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
941       ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
942     } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
943   }
944   PetscFunctionReturn(0);
945 }
946 
947 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
948 {
949   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
950   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
951   Mat            B;
952   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
953   PetscErrorCode ierr;
954   PetscInt       j,i;
955   PetscScalar    *v;
956 
957   PetscFunctionBegin;
958   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
959     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
960     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
961     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
962     ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr);
963   } else {
964     B = *matout;
965   }
966 
967   m    = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v;
968   ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr);
969   for (i=0; i<m; i++) rwork[i] = rstart + i;
970   for (j=0; j<n; j++) {
971     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
972     v   += m;
973   }
974   ierr = PetscFree(rwork);CHKERRQ(ierr);
975   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
976   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
977   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
978     *matout = B;
979   } else {
980     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
981   }
982   PetscFunctionReturn(0);
983 }
984 
985 
986 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
987 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
988 
989 PetscErrorCode MatSetUp_MPIDense(Mat A)
990 {
991   PetscErrorCode ierr;
992 
993   PetscFunctionBegin;
994   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
995   PetscFunctionReturn(0);
996 }
997 
998 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
999 {
1000   PetscErrorCode ierr;
1001   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
1002 
1003   PetscFunctionBegin;
1004   ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr);
1005   ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1006   PetscFunctionReturn(0);
1007 }
1008 
1009 PetscErrorCode  MatConjugate_MPIDense(Mat mat)
1010 {
1011   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
1012   PetscErrorCode ierr;
1013 
1014   PetscFunctionBegin;
1015   ierr = MatConjugate(a->A);CHKERRQ(ierr);
1016   PetscFunctionReturn(0);
1017 }
1018 
1019 PetscErrorCode MatRealPart_MPIDense(Mat A)
1020 {
1021   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1022   PetscErrorCode ierr;
1023 
1024   PetscFunctionBegin;
1025   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1026   PetscFunctionReturn(0);
1027 }
1028 
1029 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1030 {
1031   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1032   PetscErrorCode ierr;
1033 
1034   PetscFunctionBegin;
1035   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1036   PetscFunctionReturn(0);
1037 }
1038 
1039 extern PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*);
1040 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms)
1041 {
1042   PetscErrorCode ierr;
1043   PetscInt       i,n;
1044   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
1045   PetscReal      *work;
1046 
1047   PetscFunctionBegin;
1048   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
1049   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
1050   ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr);
1051   if (type == NORM_2) {
1052     for (i=0; i<n; i++) work[i] *= work[i];
1053   }
1054   if (type == NORM_INFINITY) {
1055     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
1056   } else {
1057     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
1058   }
1059   ierr = PetscFree(work);CHKERRQ(ierr);
1060   if (type == NORM_2) {
1061     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
1062   }
1063   PetscFunctionReturn(0);
1064 }
1065 
1066 static PetscErrorCode  MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1067 {
1068   Mat_MPIDense   *d = (Mat_MPIDense*)x->data;
1069   PetscErrorCode ierr;
1070   PetscScalar    *a;
1071   PetscInt       m,n,i;
1072 
1073   PetscFunctionBegin;
1074   ierr = MatGetSize(d->A,&m,&n);CHKERRQ(ierr);
1075   ierr = MatDenseGetArray(d->A,&a);CHKERRQ(ierr);
1076   for (i=0; i<m*n; i++) {
1077     ierr = PetscRandomGetValue(rctx,a+i);CHKERRQ(ierr);
1078   }
1079   ierr = MatDenseRestoreArray(d->A,&a);CHKERRQ(ierr);
1080   PetscFunctionReturn(0);
1081 }
1082 
1083 extern PetscErrorCode MatMatMultNumeric_MPIDense(Mat A,Mat,Mat);
1084 
1085 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool  *missing,PetscInt *d)
1086 {
1087   PetscFunctionBegin;
1088   *missing = PETSC_FALSE;
1089   PetscFunctionReturn(0);
1090 }
1091 
1092 static PetscErrorCode MatMatTransposeMult_MPIDense_MPIDense(Mat,Mat,MatReuse,PetscReal,Mat*);
1093 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat*);
1094 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);
1095 
1096 /* -------------------------------------------------------------------*/
1097 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1098                                         MatGetRow_MPIDense,
1099                                         MatRestoreRow_MPIDense,
1100                                         MatMult_MPIDense,
1101                                 /*  4*/ MatMultAdd_MPIDense,
1102                                         MatMultTranspose_MPIDense,
1103                                         MatMultTransposeAdd_MPIDense,
1104                                         0,
1105                                         0,
1106                                         0,
1107                                 /* 10*/ 0,
1108                                         0,
1109                                         0,
1110                                         0,
1111                                         MatTranspose_MPIDense,
1112                                 /* 15*/ MatGetInfo_MPIDense,
1113                                         MatEqual_MPIDense,
1114                                         MatGetDiagonal_MPIDense,
1115                                         MatDiagonalScale_MPIDense,
1116                                         MatNorm_MPIDense,
1117                                 /* 20*/ MatAssemblyBegin_MPIDense,
1118                                         MatAssemblyEnd_MPIDense,
1119                                         MatSetOption_MPIDense,
1120                                         MatZeroEntries_MPIDense,
1121                                 /* 24*/ MatZeroRows_MPIDense,
1122                                         0,
1123                                         0,
1124                                         0,
1125                                         0,
1126                                 /* 29*/ MatSetUp_MPIDense,
1127                                         0,
1128                                         0,
1129                                         MatGetDiagonalBlock_MPIDense,
1130                                         0,
1131                                 /* 34*/ MatDuplicate_MPIDense,
1132                                         0,
1133                                         0,
1134                                         0,
1135                                         0,
1136                                 /* 39*/ MatAXPY_MPIDense,
1137                                         MatCreateSubMatrices_MPIDense,
1138                                         0,
1139                                         MatGetValues_MPIDense,
1140                                         0,
1141                                 /* 44*/ 0,
1142                                         MatScale_MPIDense,
1143                                         MatShift_Basic,
1144                                         0,
1145                                         0,
1146                                 /* 49*/ MatSetRandom_MPIDense,
1147                                         0,
1148                                         0,
1149                                         0,
1150                                         0,
1151                                 /* 54*/ 0,
1152                                         0,
1153                                         0,
1154                                         0,
1155                                         0,
1156                                 /* 59*/ MatCreateSubMatrix_MPIDense,
1157                                         MatDestroy_MPIDense,
1158                                         MatView_MPIDense,
1159                                         0,
1160                                         0,
1161                                 /* 64*/ 0,
1162                                         0,
1163                                         0,
1164                                         0,
1165                                         0,
1166                                 /* 69*/ 0,
1167                                         0,
1168                                         0,
1169                                         0,
1170                                         0,
1171                                 /* 74*/ 0,
1172                                         0,
1173                                         0,
1174                                         0,
1175                                         0,
1176                                 /* 79*/ 0,
1177                                         0,
1178                                         0,
1179                                         0,
1180                                 /* 83*/ MatLoad_MPIDense,
1181                                         0,
1182                                         0,
1183                                         0,
1184                                         0,
1185                                         0,
1186 #if defined(PETSC_HAVE_ELEMENTAL)
1187                                 /* 89*/ MatMatMult_MPIDense_MPIDense,
1188                                         MatMatMultSymbolic_MPIDense_MPIDense,
1189 #else
1190                                 /* 89*/ 0,
1191                                         0,
1192 #endif
1193                                         MatMatMultNumeric_MPIDense,
1194                                         0,
1195                                         0,
1196                                 /* 94*/ 0,
1197                                         MatMatTransposeMult_MPIDense_MPIDense,
1198                                         MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1199                                         MatMatTransposeMultNumeric_MPIDense_MPIDense,
1200                                         0,
1201                                 /* 99*/ 0,
1202                                         0,
1203                                         0,
1204                                         MatConjugate_MPIDense,
1205                                         0,
1206                                 /*104*/ 0,
1207                                         MatRealPart_MPIDense,
1208                                         MatImaginaryPart_MPIDense,
1209                                         0,
1210                                         0,
1211                                 /*109*/ 0,
1212                                         0,
1213                                         0,
1214                                         0,
1215                                         MatMissingDiagonal_MPIDense,
1216                                 /*114*/ 0,
1217                                         0,
1218                                         0,
1219                                         0,
1220                                         0,
1221                                 /*119*/ 0,
1222                                         0,
1223                                         0,
1224                                         0,
1225                                         0,
1226                                 /*124*/ 0,
1227                                         MatGetColumnNorms_MPIDense,
1228                                         0,
1229                                         0,
1230                                         0,
1231                                 /*129*/ 0,
1232                                         MatTransposeMatMult_MPIDense_MPIDense,
1233                                         MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1234                                         MatTransposeMatMultNumeric_MPIDense_MPIDense,
1235                                         0,
1236                                 /*134*/ 0,
1237                                         0,
1238                                         0,
1239                                         0,
1240                                         0,
1241                                 /*139*/ 0,
1242                                         0,
1243                                         0,
1244                                         0,
1245                                         0,
1246                                 /*144*/ MatCreateMPIMatConcatenateSeqMat_MPIDense
1247 };
1248 
1249 PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1250 {
1251   Mat_MPIDense   *a;
1252   PetscErrorCode ierr;
1253 
1254   PetscFunctionBegin;
1255   if (mat->preallocated) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Matrix has already been preallocated");
1256   mat->preallocated = PETSC_TRUE;
1257   /* Note:  For now, when data is specified above, this assumes the user correctly
1258    allocates the local dense storage space.  We should add error checking. */
1259 
1260   a       = (Mat_MPIDense*)mat->data;
1261   ierr    = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
1262   ierr    = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
1263   a->nvec = mat->cmap->n;
1264 
1265   ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1266   ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr);
1267   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
1268   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1269   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1270   PetscFunctionReturn(0);
1271 }
1272 
1273 #if defined(PETSC_HAVE_ELEMENTAL)
1274 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1275 {
1276   Mat            mat_elemental;
1277   PetscErrorCode ierr;
1278   PetscScalar    *v;
1279   PetscInt       m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols;
1280 
1281   PetscFunctionBegin;
1282   if (reuse == MAT_REUSE_MATRIX) {
1283     mat_elemental = *newmat;
1284     ierr = MatZeroEntries(*newmat);CHKERRQ(ierr);
1285   } else {
1286     ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr);
1287     ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1288     ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr);
1289     ierr = MatSetUp(mat_elemental);CHKERRQ(ierr);
1290     ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
1291   }
1292 
1293   ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr);
1294   for (i=0; i<N; i++) cols[i] = i;
1295   for (i=0; i<m; i++) rows[i] = rstart + i;
1296 
1297   /* PETSc-Elemental interaface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1298   ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr);
1299   ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr);
1300   ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1301   ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1302   ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr);
1303   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1304 
1305   if (reuse == MAT_INPLACE_MATRIX) {
1306     ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr);
1307   } else {
1308     *newmat = mat_elemental;
1309   }
1310   PetscFunctionReturn(0);
1311 }
1312 #endif
1313 
1314 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals)
1315 {
1316   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1317   PetscErrorCode ierr;
1318 
1319   PetscFunctionBegin;
1320   ierr = MatDenseGetColumn(mat->A,col,vals);CHKERRQ(ierr);
1321   PetscFunctionReturn(0);
1322 }
1323 
1324 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals)
1325 {
1326   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1327   PetscErrorCode ierr;
1328 
1329   PetscFunctionBegin;
1330   ierr = MatDenseRestoreColumn(mat->A,vals);CHKERRQ(ierr);
1331   PetscFunctionReturn(0);
1332 }
1333 
1334 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
1335 {
1336   PetscErrorCode ierr;
1337   Mat_MPIDense   *mat;
1338   PetscInt       m,nloc,N;
1339 
1340   PetscFunctionBegin;
1341   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
1342   ierr = MatGetLocalSize(inmat,NULL,&nloc);CHKERRQ(ierr);
1343   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1344     PetscInt sum;
1345 
1346     if (n == PETSC_DECIDE) {
1347       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
1348     }
1349     /* Check sum(n) = N */
1350     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
1351     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
1352 
1353     ierr = MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat);CHKERRQ(ierr);
1354   }
1355 
1356   /* numeric phase */
1357   mat = (Mat_MPIDense*)(*outmat)->data;
1358   ierr = MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1359   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1360   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1361   PetscFunctionReturn(0);
1362 }
1363 
1364 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1365 {
1366   Mat_MPIDense   *a;
1367   PetscErrorCode ierr;
1368 
1369   PetscFunctionBegin;
1370   ierr      = PetscNewLog(mat,&a);CHKERRQ(ierr);
1371   mat->data = (void*)a;
1372   ierr      = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1373 
1374   mat->insertmode = NOT_SET_VALUES;
1375   ierr            = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);CHKERRQ(ierr);
1376   ierr            = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);CHKERRQ(ierr);
1377 
1378   /* build cache for off array entries formed */
1379   a->donotstash = PETSC_FALSE;
1380 
1381   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr);
1382 
1383   /* stuff used for matrix vector multiply */
1384   a->lvec        = 0;
1385   a->Mvctx       = 0;
1386   a->roworiented = PETSC_TRUE;
1387 
1388   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr);
1389   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr);
1390   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense);CHKERRQ(ierr);
1391   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense);CHKERRQ(ierr);
1392   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);CHKERRQ(ierr);
1393   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);CHKERRQ(ierr);
1394 #if defined(PETSC_HAVE_ELEMENTAL)
1395   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr);
1396 #endif
1397   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1398   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1399   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1400   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1401 
1402   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",MatTransposeMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1403   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",MatTransposeMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1404   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",MatTransposeMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1405   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense);CHKERRQ(ierr);
1406   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense);CHKERRQ(ierr);
1407   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1408   PetscFunctionReturn(0);
1409 }
1410 
1411 /*MC
1412    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1413 
1414    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1415    and MATMPIDENSE otherwise.
1416 
1417    Options Database Keys:
1418 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1419 
1420   Level: beginner
1421 
1422 
1423 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1424 M*/
1425 
1426 /*@C
1427    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1428 
1429    Not collective
1430 
1431    Input Parameters:
1432 .  B - the matrix
1433 -  data - optional location of matrix data.  Set data=NULL for PETSc
1434    to control all matrix memory allocation.
1435 
1436    Notes:
1437    The dense format is fully compatible with standard Fortran 77
1438    storage by columns.
1439 
1440    The data input variable is intended primarily for Fortran programmers
1441    who wish to allocate their own matrix memory space.  Most users should
1442    set data=NULL.
1443 
1444    Level: intermediate
1445 
1446 .keywords: matrix,dense, parallel
1447 
1448 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1449 @*/
1450 PetscErrorCode  MatMPIDenseSetPreallocation(Mat B,PetscScalar *data)
1451 {
1452   PetscErrorCode ierr;
1453 
1454   PetscFunctionBegin;
1455   ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr);
1456   PetscFunctionReturn(0);
1457 }
1458 
1459 /*@
1460    MatDensePlaceArray - Allows one to replace the array in a dense array with an
1461    array provided by the user. This is useful to avoid copying an array
1462    into a matrix
1463 
1464    Not Collective
1465 
1466    Input Parameters:
1467 +  mat - the matrix
1468 -  array - the array in column major order
1469 
1470    Notes:
1471    You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be
1472    freed when the matrix is destroyed.
1473 
1474    Level: developer
1475 
1476 .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
1477 
1478 @*/
1479 PetscErrorCode  MatDensePlaceArray(Mat mat,const PetscScalar array[])
1480 {
1481   PetscErrorCode ierr;
1482   PetscFunctionBegin;
1483   ierr = PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
1484   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
1485   PetscFunctionReturn(0);
1486 }
1487 
1488 /*@
1489    MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray()
1490 
1491    Not Collective
1492 
1493    Input Parameters:
1494 .  mat - the matrix
1495 
1496    Notes:
1497    You can only call this after a call to MatDensePlaceArray()
1498 
1499    Level: developer
1500 
1501 .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
1502 
1503 @*/
1504 PetscErrorCode  MatDenseResetArray(Mat mat)
1505 {
1506   PetscErrorCode ierr;
1507   PetscFunctionBegin;
1508   ierr = PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));CHKERRQ(ierr);
1509   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
1510   PetscFunctionReturn(0);
1511 }
1512 
1513 /*@C
1514    MatCreateDense - Creates a parallel matrix in dense format.
1515 
1516    Collective on MPI_Comm
1517 
1518    Input Parameters:
1519 +  comm - MPI communicator
1520 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1521 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1522 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1523 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1524 -  data - optional location of matrix data.  Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1525    to control all matrix memory allocation.
1526 
1527    Output Parameter:
1528 .  A - the matrix
1529 
1530    Notes:
1531    The dense format is fully compatible with standard Fortran 77
1532    storage by columns.
1533 
1534    The data input variable is intended primarily for Fortran programmers
1535    who wish to allocate their own matrix memory space.  Most users should
1536    set data=NULL (PETSC_NULL_SCALAR for Fortran users).
1537 
1538    The user MUST specify either the local or global matrix dimensions
1539    (possibly both).
1540 
1541    Level: intermediate
1542 
1543 .keywords: matrix,dense, parallel
1544 
1545 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1546 @*/
1547 PetscErrorCode  MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1548 {
1549   PetscErrorCode ierr;
1550   PetscMPIInt    size;
1551 
1552   PetscFunctionBegin;
1553   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1554   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1555   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1556   if (size > 1) {
1557     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1558     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1559     if (data) {  /* user provided data array, so no need to assemble */
1560       ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr);
1561       (*A)->assembled = PETSC_TRUE;
1562     }
1563   } else {
1564     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1565     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1566   }
1567   PetscFunctionReturn(0);
1568 }
1569 
1570 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1571 {
1572   Mat            mat;
1573   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
1574   PetscErrorCode ierr;
1575 
1576   PetscFunctionBegin;
1577   *newmat = 0;
1578   ierr    = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr);
1579   ierr    = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1580   ierr    = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1581   a       = (Mat_MPIDense*)mat->data;
1582 
1583   mat->factortype   = A->factortype;
1584   mat->assembled    = PETSC_TRUE;
1585   mat->preallocated = PETSC_TRUE;
1586 
1587   a->size         = oldmat->size;
1588   a->rank         = oldmat->rank;
1589   mat->insertmode = NOT_SET_VALUES;
1590   a->nvec         = oldmat->nvec;
1591   a->donotstash   = oldmat->donotstash;
1592 
1593   ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr);
1594   ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr);
1595 
1596   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1597   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1598   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1599 
1600   *newmat = mat;
1601   PetscFunctionReturn(0);
1602 }
1603 
1604 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat)
1605 {
1606   PetscErrorCode ierr;
1607   PetscMPIInt    rank,size;
1608   const PetscInt *rowners;
1609   PetscInt       i,m,n,nz,j,mMax;
1610   PetscScalar    *array,*vals,*vals_ptr;
1611   Mat_MPIDense   *a = (Mat_MPIDense*)newmat->data;
1612 
1613   PetscFunctionBegin;
1614   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1615   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1616 
1617   /* determine ownership of rows and columns */
1618   m = (newmat->rmap->n < 0) ? PETSC_DECIDE : newmat->rmap->n;
1619   n = (newmat->cmap->n < 0) ? PETSC_DECIDE : newmat->cmap->n;
1620 
1621   ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr);
1622   if (!a->A) {
1623     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
1624   }
1625   ierr = MatDenseGetArray(newmat,&array);CHKERRQ(ierr);
1626   ierr = MatGetLocalSize(newmat,&m,NULL);CHKERRQ(ierr);
1627   ierr = MatGetOwnershipRanges(newmat,&rowners);CHKERRQ(ierr);
1628   ierr = MPI_Reduce(&m,&mMax,1,MPIU_INT,MPI_MAX,0,comm);CHKERRQ(ierr);
1629   if (!rank) {
1630     ierr = PetscMalloc1(mMax*N,&vals);CHKERRQ(ierr);
1631 
1632     /* read in my part of the matrix numerical values  */
1633     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1634 
1635     /* insert into matrix-by row (this is why cannot directly read into array */
1636     vals_ptr = vals;
1637     for (i=0; i<m; i++) {
1638       for (j=0; j<N; j++) {
1639         array[i + j*m] = *vals_ptr++;
1640       }
1641     }
1642 
1643     /* read in other processors and ship out */
1644     for (i=1; i<size; i++) {
1645       nz   = (rowners[i+1] - rowners[i])*N;
1646       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1647       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr);
1648     }
1649   } else {
1650     /* receive numeric values */
1651     ierr = PetscMalloc1(m*N,&vals);CHKERRQ(ierr);
1652 
1653     /* receive message of values*/
1654     ierr = MPIULong_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr);
1655 
1656     /* insert into matrix-by row (this is why cannot directly read into array */
1657     vals_ptr = vals;
1658     for (i=0; i<m; i++) {
1659       for (j=0; j<N; j++) {
1660         array[i + j*m] = *vals_ptr++;
1661       }
1662     }
1663   }
1664   ierr = MatDenseRestoreArray(newmat,&array);CHKERRQ(ierr);
1665   ierr = PetscFree(vals);CHKERRQ(ierr);
1666   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1667   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1668   PetscFunctionReturn(0);
1669 }
1670 
1671 PetscErrorCode MatLoad_MPIDense(Mat newmat,PetscViewer viewer)
1672 {
1673   Mat_MPIDense   *a;
1674   PetscScalar    *vals,*svals;
1675   MPI_Comm       comm;
1676   MPI_Status     status;
1677   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,n,maxnz;
1678   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1679   PetscInt       *ourlens,*procsnz = 0,jj,*mycols,*smycols;
1680   PetscInt       i,nz,j,rstart,rend;
1681   int            fd;
1682   PetscBool      isbinary;
1683   PetscErrorCode ierr;
1684 
1685   PetscFunctionBegin;
1686   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1687   if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)newmat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newmat)->type_name);
1688 
1689   /* force binary viewer to load .info file if it has not yet done so */
1690   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1691   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
1692   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1693   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1694   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1695   if (!rank) {
1696     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
1697     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1698   }
1699   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1700   M    = header[1]; N = header[2]; nz = header[3];
1701 
1702   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
1703   if (newmat->rmap->N < 0) newmat->rmap->N = M;
1704   if (newmat->cmap->N < 0) newmat->cmap->N = N;
1705 
1706   if (newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",M,newmat->rmap->N);
1707   if (newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",N,newmat->cmap->N);
1708 
1709   /*
1710        Handle case where matrix is stored on disk as a dense matrix
1711   */
1712   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1713     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr);
1714     PetscFunctionReturn(0);
1715   }
1716 
1717   /* determine ownership of all rows */
1718   if (newmat->rmap->n < 0) {
1719     ierr = PetscMPIIntCast(M/size + ((M % size) > rank),&m);CHKERRQ(ierr);
1720   } else {
1721     ierr = PetscMPIIntCast(newmat->rmap->n,&m);CHKERRQ(ierr);
1722   }
1723   if (newmat->cmap->n < 0) {
1724     n = PETSC_DECIDE;
1725   } else {
1726     ierr = PetscMPIIntCast(newmat->cmap->n,&n);CHKERRQ(ierr);
1727   }
1728 
1729   ierr       = PetscMalloc1(size+2,&rowners);CHKERRQ(ierr);
1730   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1731   rowners[0] = 0;
1732   for (i=2; i<=size; i++) {
1733     rowners[i] += rowners[i-1];
1734   }
1735   rstart = rowners[rank];
1736   rend   = rowners[rank+1];
1737 
1738   /* distribute row lengths to all processors */
1739   ierr = PetscMalloc1(rend-rstart,&ourlens);CHKERRQ(ierr);
1740   if (!rank) {
1741     ierr = PetscMalloc1(M,&rowlengths);CHKERRQ(ierr);
1742     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1743     ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr);
1744     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1745     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1746     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1747   } else {
1748     ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1749   }
1750 
1751   if (!rank) {
1752     /* calculate the number of nonzeros on each processor */
1753     ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr);
1754     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1755     for (i=0; i<size; i++) {
1756       for (j=rowners[i]; j< rowners[i+1]; j++) {
1757         procsnz[i] += rowlengths[j];
1758       }
1759     }
1760     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1761 
1762     /* determine max buffer needed and allocate it */
1763     maxnz = 0;
1764     for (i=0; i<size; i++) {
1765       maxnz = PetscMax(maxnz,procsnz[i]);
1766     }
1767     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
1768 
1769     /* read in my part of the matrix column indices  */
1770     nz   = procsnz[0];
1771     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
1772     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1773 
1774     /* read in every one elses and ship off */
1775     for (i=1; i<size; i++) {
1776       nz   = procsnz[i];
1777       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1778       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1779     }
1780     ierr = PetscFree(cols);CHKERRQ(ierr);
1781   } else {
1782     /* determine buffer space needed for message */
1783     nz = 0;
1784     for (i=0; i<m; i++) {
1785       nz += ourlens[i];
1786     }
1787     ierr = PetscMalloc1(nz+1,&mycols);CHKERRQ(ierr);
1788 
1789     /* receive message of column indices*/
1790     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1791     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1792     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1793   }
1794 
1795   ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr);
1796   a = (Mat_MPIDense*)newmat->data;
1797   if (!a->A) {
1798     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
1799   }
1800 
1801   if (!rank) {
1802     ierr = PetscMalloc1(maxnz,&vals);CHKERRQ(ierr);
1803 
1804     /* read in my part of the matrix numerical values  */
1805     nz   = procsnz[0];
1806     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1807 
1808     /* insert into matrix */
1809     jj      = rstart;
1810     smycols = mycols;
1811     svals   = vals;
1812     for (i=0; i<m; i++) {
1813       ierr     = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1814       smycols += ourlens[i];
1815       svals   += ourlens[i];
1816       jj++;
1817     }
1818 
1819     /* read in other processors and ship out */
1820     for (i=1; i<size; i++) {
1821       nz   = procsnz[i];
1822       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1823       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
1824     }
1825     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1826   } else {
1827     /* receive numeric values */
1828     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
1829 
1830     /* receive message of values*/
1831     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
1832     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1833     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1834 
1835     /* insert into matrix */
1836     jj      = rstart;
1837     smycols = mycols;
1838     svals   = vals;
1839     for (i=0; i<m; i++) {
1840       ierr     = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1841       smycols += ourlens[i];
1842       svals   += ourlens[i];
1843       jj++;
1844     }
1845   }
1846   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1847   ierr = PetscFree(vals);CHKERRQ(ierr);
1848   ierr = PetscFree(mycols);CHKERRQ(ierr);
1849   ierr = PetscFree(rowners);CHKERRQ(ierr);
1850 
1851   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1852   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1853   PetscFunctionReturn(0);
1854 }
1855 
1856 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool  *flag)
1857 {
1858   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
1859   Mat            a,b;
1860   PetscBool      flg;
1861   PetscErrorCode ierr;
1862 
1863   PetscFunctionBegin;
1864   a    = matA->A;
1865   b    = matB->A;
1866   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
1867   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1868   PetscFunctionReturn(0);
1869 }
1870 
1871 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(Mat A)
1872 {
1873   PetscErrorCode        ierr;
1874   Mat_MPIDense          *a = (Mat_MPIDense*)A->data;
1875   Mat_TransMatMultDense *atb = a->atbdense;
1876 
1877   PetscFunctionBegin;
1878   ierr = PetscFree3(atb->sendbuf,atb->atbarray,atb->recvcounts);CHKERRQ(ierr);
1879   ierr = (atb->destroy)(A);CHKERRQ(ierr);
1880   ierr = PetscFree(atb);CHKERRQ(ierr);
1881   PetscFunctionReturn(0);
1882 }
1883 
1884 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(Mat A)
1885 {
1886   PetscErrorCode        ierr;
1887   Mat_MPIDense          *a = (Mat_MPIDense*)A->data;
1888   Mat_MatTransMultDense *abt = a->abtdense;
1889 
1890   PetscFunctionBegin;
1891   ierr = PetscFree2(abt->buf[0],abt->buf[1]);CHKERRQ(ierr);
1892   ierr = PetscFree2(abt->recvcounts,abt->recvdispls);CHKERRQ(ierr);
1893   ierr = (abt->destroy)(A);CHKERRQ(ierr);
1894   ierr = PetscFree(abt);CHKERRQ(ierr);
1895   PetscFunctionReturn(0);
1896 }
1897 
1898 PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1899 {
1900   Mat_MPIDense   *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
1901   Mat_SeqDense   *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data;
1902   Mat_TransMatMultDense *atb = c->atbdense;
1903   PetscErrorCode ierr;
1904   MPI_Comm       comm;
1905   PetscMPIInt    rank,size,*recvcounts=atb->recvcounts;
1906   PetscScalar    *carray,*atbarray=atb->atbarray,*sendbuf=atb->sendbuf;
1907   PetscInt       i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j;
1908   PetscScalar    _DOne=1.0,_DZero=0.0;
1909   PetscBLASInt   am,an,bn,aN;
1910   const PetscInt *ranges;
1911 
1912   PetscFunctionBegin;
1913   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1914   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1915   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1916 
1917   /* compute atbarray = aseq^T * bseq */
1918   ierr = PetscBLASIntCast(a->A->cmap->n,&an);CHKERRQ(ierr);
1919   ierr = PetscBLASIntCast(b->A->cmap->n,&bn);CHKERRQ(ierr);
1920   ierr = PetscBLASIntCast(a->A->rmap->n,&am);CHKERRQ(ierr);
1921   ierr = PetscBLASIntCast(A->cmap->N,&aN);CHKERRQ(ierr);
1922   PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&an,&bn,&am,&_DOne,aseq->v,&aseq->lda,bseq->v,&bseq->lda,&_DZero,atbarray,&aN));
1923 
1924   ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr);
1925   for (i=0; i<size; i++) recvcounts[i] = (ranges[i+1] - ranges[i])*cN;
1926 
1927   /* arrange atbarray into sendbuf */
1928   k = 0;
1929   for (proc=0; proc<size; proc++) {
1930     for (j=0; j<cN; j++) {
1931       for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM];
1932     }
1933   }
1934   /* sum all atbarray to local values of C */
1935   ierr = MatDenseGetArray(c->A,&carray);CHKERRQ(ierr);
1936   ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRQ(ierr);
1937   ierr = MatDenseRestoreArray(c->A,&carray);CHKERRQ(ierr);
1938   PetscFunctionReturn(0);
1939 }
1940 
1941 PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1942 {
1943   PetscErrorCode        ierr;
1944   Mat                   Cdense;
1945   MPI_Comm              comm;
1946   PetscMPIInt           size;
1947   PetscInt              cm=A->cmap->n,cM,cN=B->cmap->N;
1948   Mat_MPIDense          *c;
1949   Mat_TransMatMultDense *atb;
1950 
1951   PetscFunctionBegin;
1952   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1953   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
1954     SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
1955   }
1956 
1957   /* create matrix product Cdense */
1958   ierr = MatCreate(comm,&Cdense);CHKERRQ(ierr);
1959   ierr = MatSetSizes(Cdense,cm,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
1960   ierr = MatSetType(Cdense,MATMPIDENSE);CHKERRQ(ierr);
1961   ierr = MatMPIDenseSetPreallocation(Cdense,NULL);CHKERRQ(ierr);
1962   ierr = MatAssemblyBegin(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1963   ierr = MatAssemblyEnd(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1964   *C   = Cdense;
1965 
1966   /* create data structure for reuse Cdense */
1967   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1968   ierr = PetscNew(&atb);CHKERRQ(ierr);
1969   cM = Cdense->rmap->N;
1970   ierr = PetscMalloc3(cM*cN,&atb->sendbuf,cM*cN,&atb->atbarray,size,&atb->recvcounts);CHKERRQ(ierr);
1971 
1972   c                    = (Mat_MPIDense*)Cdense->data;
1973   c->atbdense          = atb;
1974   atb->destroy         = Cdense->ops->destroy;
1975   Cdense->ops->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
1976   PetscFunctionReturn(0);
1977 }
1978 
1979 PetscErrorCode MatTransposeMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
1980 {
1981   PetscErrorCode ierr;
1982 
1983   PetscFunctionBegin;
1984   if (scall == MAT_INITIAL_MATRIX) {
1985     ierr = MatTransposeMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr);
1986   }
1987   ierr = MatTransposeMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr);
1988   PetscFunctionReturn(0);
1989 }
1990 
1991 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat *C)
1992 {
1993   PetscErrorCode        ierr;
1994   Mat                   Cdense;
1995   MPI_Comm              comm;
1996   PetscMPIInt           i, size;
1997   PetscInt              maxRows, bufsiz;
1998   Mat_MPIDense          *c;
1999   PetscMPIInt           tag;
2000   const char            *algTypes[2] = {"allgatherv","cyclic"};
2001   PetscInt              alg, nalg = 2;
2002   Mat_MatTransMultDense *abt;
2003 
2004   PetscFunctionBegin;
2005   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2006   alg = 0; /* default is allgatherv */
2007   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MatMatTransposeMult","Mat");CHKERRQ(ierr);
2008   ierr = PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[0],&alg,NULL);CHKERRQ(ierr);
2009   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2010   if (A->cmap->N != B->cmap->N) {
2011     SETERRQ2(comm,PETSC_ERR_ARG_SIZ,"Matrix global column dimensions are incompatible, A (%D) != B (%D)",A->cmap->N,B->cmap->N);
2012   }
2013 
2014   /* create matrix product Cdense */
2015   ierr = MatCreate(comm,&Cdense);CHKERRQ(ierr);
2016   ierr = MatSetSizes(Cdense,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N);CHKERRQ(ierr);
2017   ierr = MatSetType(Cdense,MATMPIDENSE);CHKERRQ(ierr);
2018   ierr = MatMPIDenseSetPreallocation(Cdense,NULL);CHKERRQ(ierr);
2019   ierr = PetscObjectGetNewTag((PetscObject)Cdense, &tag);CHKERRQ(ierr);
2020   ierr = MatAssemblyBegin(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2021   ierr = MatAssemblyEnd(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2022   *C   = Cdense;
2023 
2024   /* create data structure for reuse Cdense */
2025   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2026   ierr = PetscNew(&abt);CHKERRQ(ierr);
2027   abt->tag = tag;
2028   abt->alg = alg;
2029   switch (alg) {
2030   case 1:
2031     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i]));
2032     bufsiz = A->cmap->N * maxRows;
2033     ierr = PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1]));CHKERRQ(ierr);
2034     break;
2035   default:
2036     ierr = PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]));CHKERRQ(ierr);
2037     ierr = PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls));CHKERRQ(ierr);
2038     for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N;
2039     for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i];
2040     break;
2041   }
2042 
2043   c                    = (Mat_MPIDense*)Cdense->data;
2044   c->abtdense          = abt;
2045   abt->destroy         = Cdense->ops->destroy;
2046   Cdense->ops->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
2047   PetscFunctionReturn(0);
2048 }
2049 
2050 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
2051 {
2052   Mat_MPIDense   *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2053   Mat_SeqDense   *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data;
2054   Mat_SeqDense   *cseq=(Mat_SeqDense*)(c->A)->data;
2055   Mat_MatTransMultDense *abt = c->abtdense;
2056   PetscErrorCode ierr;
2057   MPI_Comm       comm;
2058   PetscMPIInt    rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom;
2059   PetscScalar    *sendbuf, *recvbuf=0, *carray;
2060   PetscInt       i,cK=A->cmap->N,k,j,bn;
2061   PetscScalar    _DOne=1.0,_DZero=0.0;
2062   PetscBLASInt   cm, cn, ck;
2063   MPI_Request    reqs[2];
2064   const PetscInt *ranges;
2065 
2066   PetscFunctionBegin;
2067   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2068   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2069   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2070 
2071   ierr = MatGetOwnershipRanges(B,&ranges);CHKERRQ(ierr);
2072   bn = B->rmap->n;
2073   if (bseq->lda == bn) {
2074     sendbuf = bseq->v;
2075   } else {
2076     sendbuf = abt->buf[0];
2077     for (k = 0, i = 0; i < cK; i++) {
2078       for (j = 0; j < bn; j++, k++) {
2079         sendbuf[k] = bseq->v[i * bseq->lda + j];
2080       }
2081     }
2082   }
2083   if (size > 1) {
2084     sendto = (rank + size - 1) % size;
2085     recvfrom = (rank + size + 1) % size;
2086   } else {
2087     sendto = recvfrom = 0;
2088   }
2089   ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr);
2090   ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr);
2091   recvisfrom = rank;
2092   for (i = 0; i < size; i++) {
2093     /* we have finished receiving in sending, bufs can be read/modified */
2094     PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2095     PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
2096 
2097     if (nextrecvisfrom != rank) {
2098       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2099       sendsiz = cK * bn;
2100       recvsiz = cK * nextbn;
2101       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
2102       ierr = MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]);CHKERRQ(ierr);
2103       ierr = MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]);CHKERRQ(ierr);
2104     }
2105 
2106     /* local aseq * sendbuf^T */
2107     ierr = PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn);CHKERRQ(ierr);
2108     carray = &cseq->v[cseq->lda * ranges[recvisfrom]];
2109     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,aseq->v,&aseq->lda,sendbuf,&cn,&_DZero,carray,&cseq->lda));
2110 
2111     if (nextrecvisfrom != rank) {
2112       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
2113       ierr = MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2114     }
2115     bn = nextbn;
2116     recvisfrom = nextrecvisfrom;
2117     sendbuf = recvbuf;
2118   }
2119   PetscFunctionReturn(0);
2120 }
2121 
2122 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
2123 {
2124   Mat_MPIDense   *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2125   Mat_SeqDense   *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data;
2126   Mat_SeqDense   *cseq=(Mat_SeqDense*)(c->A)->data;
2127   Mat_MatTransMultDense *abt = c->abtdense;
2128   PetscErrorCode ierr;
2129   MPI_Comm       comm;
2130   PetscMPIInt    rank,size;
2131   PetscScalar    *sendbuf, *recvbuf;
2132   PetscInt       i,cK=A->cmap->N,k,j,bn;
2133   PetscScalar    _DOne=1.0,_DZero=0.0;
2134   PetscBLASInt   cm, cn, ck;
2135 
2136   PetscFunctionBegin;
2137   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2138   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2139   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2140 
2141   /* copy transpose of B into buf[0] */
2142   bn      = B->rmap->n;
2143   sendbuf = abt->buf[0];
2144   recvbuf = abt->buf[1];
2145   for (k = 0, j = 0; j < bn; j++) {
2146     for (i = 0; i < cK; i++, k++) {
2147       sendbuf[k] = bseq->v[i * bseq->lda + j];
2148     }
2149   }
2150   ierr = MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm);CHKERRQ(ierr);
2151   ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr);
2152   ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr);
2153   ierr = PetscBLASIntCast(c->A->cmap->n,&cn);CHKERRQ(ierr);
2154   PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,aseq->v,&aseq->lda,recvbuf,&ck,&_DZero,cseq->v,&cseq->lda));
2155   PetscFunctionReturn(0);
2156 }
2157 
2158 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2159 {
2160   Mat_MPIDense   *c=(Mat_MPIDense*)C->data;
2161   Mat_MatTransMultDense *abt = c->abtdense;
2162   PetscErrorCode ierr;
2163 
2164   PetscFunctionBegin;
2165   switch (abt->alg) {
2166   case 1:
2167     ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C);CHKERRQ(ierr);
2168     break;
2169   default:
2170     ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C);CHKERRQ(ierr);
2171     break;
2172   }
2173   PetscFunctionReturn(0);
2174 }
2175 
2176 static PetscErrorCode MatMatTransposeMult_MPIDense_MPIDense(Mat A,Mat B, MatReuse scall, PetscReal fill, Mat *C)
2177 {
2178   PetscErrorCode ierr;
2179 
2180   PetscFunctionBegin;
2181   if (scall == MAT_INITIAL_MATRIX) {
2182     ierr = MatMatTransposeMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr);
2183   }
2184   ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr);
2185   PetscFunctionReturn(0);
2186 }
2187 
2188 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(Mat A)
2189 {
2190   PetscErrorCode   ierr;
2191   Mat_MPIDense     *a = (Mat_MPIDense*)A->data;
2192   Mat_MatMultDense *ab = a->abdense;
2193 
2194   PetscFunctionBegin;
2195   ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr);
2196   ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr);
2197   ierr = MatDestroy(&ab->Be);CHKERRQ(ierr);
2198 
2199   ierr = (ab->destroy)(A);CHKERRQ(ierr);
2200   ierr = PetscFree(ab);CHKERRQ(ierr);
2201   PetscFunctionReturn(0);
2202 }
2203 
2204 #if defined(PETSC_HAVE_ELEMENTAL)
2205 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2206 {
2207   PetscErrorCode   ierr;
2208   Mat_MPIDense     *c=(Mat_MPIDense*)C->data;
2209   Mat_MatMultDense *ab=c->abdense;
2210 
2211   PetscFunctionBegin;
2212   ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr);
2213   ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr);
2214   ierr = MatMatMultNumeric(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr);
2215   ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
2216   PetscFunctionReturn(0);
2217 }
2218 
2219 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
2220 {
2221   PetscErrorCode   ierr;
2222   Mat              Ae,Be,Ce;
2223   Mat_MPIDense     *c;
2224   Mat_MatMultDense *ab;
2225 
2226   PetscFunctionBegin;
2227   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
2228     SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2229   }
2230 
2231   /* convert A and B to Elemental matrices Ae and Be */
2232   ierr = MatConvert(A,MATELEMENTAL,MAT_INITIAL_MATRIX, &Ae);CHKERRQ(ierr);
2233   ierr = MatConvert(B,MATELEMENTAL,MAT_INITIAL_MATRIX, &Be);CHKERRQ(ierr);
2234 
2235   /* Ce = Ae*Be */
2236   ierr = MatMatMultSymbolic(Ae,Be,fill,&Ce);CHKERRQ(ierr);
2237   ierr = MatMatMultNumeric(Ae,Be,Ce);CHKERRQ(ierr);
2238 
2239   /* convert Ce to C */
2240   ierr = MatConvert(Ce,MATMPIDENSE,MAT_INITIAL_MATRIX,C);CHKERRQ(ierr);
2241 
2242   /* create data structure for reuse Cdense */
2243   ierr = PetscNew(&ab);CHKERRQ(ierr);
2244   c                  = (Mat_MPIDense*)(*C)->data;
2245   c->abdense         = ab;
2246 
2247   ab->Ae             = Ae;
2248   ab->Be             = Be;
2249   ab->Ce             = Ce;
2250   ab->destroy        = (*C)->ops->destroy;
2251   (*C)->ops->destroy        = MatDestroy_MatMatMult_MPIDense_MPIDense;
2252   (*C)->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
2253   PetscFunctionReturn(0);
2254 }
2255 
2256 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
2257 {
2258   PetscErrorCode ierr;
2259 
2260   PetscFunctionBegin;
2261   if (scall == MAT_INITIAL_MATRIX) { /* simbolic product includes numeric product */
2262     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
2263     ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr);
2264     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
2265   } else {
2266     ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
2267     ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr);
2268     ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
2269   }
2270   PetscFunctionReturn(0);
2271 }
2272 #endif
2273 
2274