xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision a685ae26bbf9fb229cf02e15e73d16b8a9cd69e6)
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4 */
5 
6 
7 #include <../src/mat/impls/dense/mpi/mpidense.h>    /*I   "petscmat.h"  I*/
8 #include <../src/mat/impls/aij/mpi/mpiaij.h>
9 #include <petscblaslapack.h>
10 
11 /*@
12 
13       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
14               matrix that represents the operator. For sequential matrices it returns itself.
15 
16     Input Parameter:
17 .      A - the Seq or MPI dense matrix
18 
19     Output Parameter:
20 .      B - the inner matrix
21 
22     Level: intermediate
23 
24 @*/
25 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
26 {
27   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
28   PetscErrorCode ierr;
29   PetscBool      flg;
30 
31   PetscFunctionBegin;
32   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
33   if (flg) *B = mat->A;
34   else *B = A;
35   PetscFunctionReturn(0);
36 }
37 
38 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
39 {
40   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
41   PetscErrorCode ierr;
42   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
43 
44   PetscFunctionBegin;
45   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
46   lrow = row - rstart;
47   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
48   PetscFunctionReturn(0);
49 }
50 
51 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
52 {
53   PetscErrorCode ierr;
54 
55   PetscFunctionBegin;
56   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
57   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
58   PetscFunctionReturn(0);
59 }
60 
61 PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
62 {
63   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
64   PetscErrorCode ierr;
65   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
66   PetscScalar    *array;
67   MPI_Comm       comm;
68   Mat            B;
69 
70   PetscFunctionBegin;
71   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
72 
73   ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr);
74   if (!B) {
75     ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
76     ierr = MatCreate(comm,&B);CHKERRQ(ierr);
77     ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr);
78     ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
79     ierr = MatDenseGetArray(mdn->A,&array);CHKERRQ(ierr);
80     ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr);
81     ierr = MatDenseRestoreArray(mdn->A,&array);CHKERRQ(ierr);
82     ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
83     ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
84     ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr);
85     *a   = B;
86     ierr = MatDestroy(&B);CHKERRQ(ierr);
87   } else *a = B;
88   PetscFunctionReturn(0);
89 }
90 
91 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
92 {
93   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
94   PetscErrorCode ierr;
95   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
96   PetscBool      roworiented = A->roworiented;
97 
98   PetscFunctionBegin;
99   for (i=0; i<m; i++) {
100     if (idxm[i] < 0) continue;
101     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
102     if (idxm[i] >= rstart && idxm[i] < rend) {
103       row = idxm[i] - rstart;
104       if (roworiented) {
105         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
106       } else {
107         for (j=0; j<n; j++) {
108           if (idxn[j] < 0) continue;
109           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
110           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
111         }
112       }
113     } else if (!A->donotstash) {
114       mat->assembled = PETSC_FALSE;
115       if (roworiented) {
116         ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
117       } else {
118         ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
119       }
120     }
121   }
122   PetscFunctionReturn(0);
123 }
124 
125 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
126 {
127   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
128   PetscErrorCode ierr;
129   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
130 
131   PetscFunctionBegin;
132   for (i=0; i<m; i++) {
133     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
134     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
135     if (idxm[i] >= rstart && idxm[i] < rend) {
136       row = idxm[i] - rstart;
137       for (j=0; j<n; j++) {
138         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
139         if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
140         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
141       }
142     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
143   }
144   PetscFunctionReturn(0);
145 }
146 
147 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[])
148 {
149   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
150   PetscErrorCode ierr;
151 
152   PetscFunctionBegin;
153   ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr);
154   PetscFunctionReturn(0);
155 }
156 
157 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar *array[])
158 {
159   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
160   PetscErrorCode ierr;
161 
162   PetscFunctionBegin;
163   ierr = MatDenseGetArrayRead(a->A,array);CHKERRQ(ierr);
164   PetscFunctionReturn(0);
165 }
166 
167 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar array[])
168 {
169   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
170   PetscErrorCode ierr;
171 
172   PetscFunctionBegin;
173   ierr = MatDensePlaceArray(a->A,array);CHKERRQ(ierr);
174   PetscFunctionReturn(0);
175 }
176 
177 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
178 {
179   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
180   PetscErrorCode ierr;
181 
182   PetscFunctionBegin;
183   ierr = MatDenseResetArray(a->A);CHKERRQ(ierr);
184   PetscFunctionReturn(0);
185 }
186 
187 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
188 {
189   Mat_MPIDense   *mat  = (Mat_MPIDense*)A->data,*newmatd;
190   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
191   PetscErrorCode ierr;
192   PetscInt       i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
193   const PetscInt *irow,*icol;
194   PetscScalar    *av,*bv,*v = lmat->v;
195   Mat            newmat;
196   IS             iscol_local;
197   MPI_Comm       comm_is,comm_mat;
198 
199   PetscFunctionBegin;
200   ierr = PetscObjectGetComm((PetscObject)A,&comm_mat);CHKERRQ(ierr);
201   ierr = PetscObjectGetComm((PetscObject)iscol,&comm_is);CHKERRQ(ierr);
202   if (comm_mat != comm_is) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator");
203 
204   ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
205   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
206   ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr);
207   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
208   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
209   ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */
210 
211   /* No parallel redistribution currently supported! Should really check each index set
212      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
213      original matrix! */
214 
215   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
216   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
217 
218   /* Check submatrix call */
219   if (scall == MAT_REUSE_MATRIX) {
220     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
221     /* Really need to test rows and column sizes! */
222     newmat = *B;
223   } else {
224     /* Create and fill new matrix */
225     ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr);
226     ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
227     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
228     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
229   }
230 
231   /* Now extract the data pointers and do the copy, column at a time */
232   newmatd = (Mat_MPIDense*)newmat->data;
233   bv      = ((Mat_SeqDense*)newmatd->A->data)->v;
234 
235   for (i=0; i<Ncols; i++) {
236     av = v + ((Mat_SeqDense*)mat->A->data)->lda*icol[i];
237     for (j=0; j<nrows; j++) {
238       *bv++ = av[irow[j] - rstart];
239     }
240   }
241 
242   /* Assemble the matrices so that the correct flags are set */
243   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
244   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
245 
246   /* Free work space */
247   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
248   ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr);
249   ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
250   *B   = newmat;
251   PetscFunctionReturn(0);
252 }
253 
254 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[])
255 {
256   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
257   PetscErrorCode ierr;
258 
259   PetscFunctionBegin;
260   ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr);
261   PetscFunctionReturn(0);
262 }
263 
264 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar *array[])
265 {
266   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
267   PetscErrorCode ierr;
268 
269   PetscFunctionBegin;
270   ierr = MatDenseRestoreArrayRead(a->A,array);CHKERRQ(ierr);
271   PetscFunctionReturn(0);
272 }
273 
274 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
275 {
276   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
277   MPI_Comm       comm;
278   PetscErrorCode ierr;
279   PetscInt       nstash,reallocs;
280   InsertMode     addv;
281 
282   PetscFunctionBegin;
283   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
284   /* make sure all processors are either in INSERTMODE or ADDMODE */
285   ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,comm);CHKERRQ(ierr);
286   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
287   mat->insertmode = addv; /* in case this processor had no cache */
288 
289   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
290   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
291   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
292   PetscFunctionReturn(0);
293 }
294 
295 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
296 {
297   Mat_MPIDense   *mdn=(Mat_MPIDense*)mat->data;
298   PetscErrorCode ierr;
299   PetscInt       i,*row,*col,flg,j,rstart,ncols;
300   PetscMPIInt    n;
301   PetscScalar    *val;
302 
303   PetscFunctionBegin;
304   /*  wait on receives */
305   while (1) {
306     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
307     if (!flg) break;
308 
309     for (i=0; i<n;) {
310       /* Now identify the consecutive vals belonging to the same row */
311       for (j=i,rstart=row[j]; j<n; j++) {
312         if (row[j] != rstart) break;
313       }
314       if (j < n) ncols = j-i;
315       else       ncols = n-i;
316       /* Now assemble all these values with a single function call */
317       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
318       i    = j;
319     }
320   }
321   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
322 
323   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
324   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
325 
326   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
327     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
328   }
329   PetscFunctionReturn(0);
330 }
331 
332 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
333 {
334   PetscErrorCode ierr;
335   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
336 
337   PetscFunctionBegin;
338   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
339   PetscFunctionReturn(0);
340 }
341 
342 /* the code does not do the diagonal entries correctly unless the
343    matrix is square and the column and row owerships are identical.
344    This is a BUG. The only way to fix it seems to be to access
345    mdn->A and mdn->B directly and not through the MatZeroRows()
346    routine.
347 */
348 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
349 {
350   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
351   PetscErrorCode    ierr;
352   PetscInt          i,*owners = A->rmap->range;
353   PetscInt          *sizes,j,idx,nsends;
354   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
355   PetscInt          *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source;
356   PetscInt          *lens,*lrows,*values;
357   PetscMPIInt       n,imdex,rank = l->rank,size = l->size;
358   MPI_Comm          comm;
359   MPI_Request       *send_waits,*recv_waits;
360   MPI_Status        recv_status,*send_status;
361   PetscBool         found;
362   const PetscScalar *xx;
363   PetscScalar       *bb;
364 
365   PetscFunctionBegin;
366   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
367   if (A->rmap->N != A->cmap->N) SETERRQ(comm,PETSC_ERR_SUP,"Only handles square matrices");
368   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership");
369   /*  first count number of contributors to each processor */
370   ierr = PetscCalloc1(2*size,&sizes);CHKERRQ(ierr);
371   ierr = PetscMalloc1(N+1,&owner);CHKERRQ(ierr);  /* see note*/
372   for (i=0; i<N; i++) {
373     idx   = rows[i];
374     found = PETSC_FALSE;
375     for (j=0; j<size; j++) {
376       if (idx >= owners[j] && idx < owners[j+1]) {
377         sizes[2*j]++; sizes[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
378       }
379     }
380     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
381   }
382   nsends = 0;
383   for (i=0; i<size; i++) nsends += sizes[2*i+1];
384 
385   /* inform other processors of number of messages and max length*/
386   ierr = PetscMaxSum(comm,sizes,&nmax,&nrecvs);CHKERRQ(ierr);
387 
388   /* post receives:   */
389   ierr = PetscMalloc1((nrecvs+1)*(nmax+1),&rvalues);CHKERRQ(ierr);
390   ierr = PetscMalloc1(nrecvs+1,&recv_waits);CHKERRQ(ierr);
391   for (i=0; i<nrecvs; i++) {
392     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
393   }
394 
395   /* do sends:
396       1) starts[i] gives the starting index in svalues for stuff going to
397          the ith processor
398   */
399   ierr = PetscMalloc1(N+1,&svalues);CHKERRQ(ierr);
400   ierr = PetscMalloc1(nsends+1,&send_waits);CHKERRQ(ierr);
401   ierr = PetscMalloc1(size+1,&starts);CHKERRQ(ierr);
402 
403   starts[0] = 0;
404   for (i=1; i<size; i++) starts[i] = starts[i-1] + sizes[2*i-2];
405   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
406 
407   starts[0] = 0;
408   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + sizes[2*i-2];
409   count = 0;
410   for (i=0; i<size; i++) {
411     if (sizes[2*i+1]) {
412       ierr = MPI_Isend(svalues+starts[i],sizes[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
413     }
414   }
415   ierr = PetscFree(starts);CHKERRQ(ierr);
416 
417   base = owners[rank];
418 
419   /*  wait on receives */
420   ierr  = PetscMalloc2(nrecvs,&lens,nrecvs,&source);CHKERRQ(ierr);
421   count = nrecvs;
422   slen  = 0;
423   while (count) {
424     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
425     /* unpack receives into our local space */
426     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
427 
428     source[imdex] = recv_status.MPI_SOURCE;
429     lens[imdex]   = n;
430     slen += n;
431     count--;
432   }
433   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
434 
435   /* move the data into the send scatter */
436   ierr  = PetscMalloc1(slen+1,&lrows);CHKERRQ(ierr);
437   count = 0;
438   for (i=0; i<nrecvs; i++) {
439     values = rvalues + i*nmax;
440     for (j=0; j<lens[i]; j++) {
441       lrows[count++] = values[j] - base;
442     }
443   }
444   ierr = PetscFree(rvalues);CHKERRQ(ierr);
445   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
446   ierr = PetscFree(owner);CHKERRQ(ierr);
447   ierr = PetscFree(sizes);CHKERRQ(ierr);
448 
449   /* fix right hand side if needed */
450   if (x && b) {
451     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
452     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
453     for (i=0; i<slen; i++) {
454       bb[lrows[i]] = diag*xx[lrows[i]];
455     }
456     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
457     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
458   }
459 
460   /* actually zap the local rows */
461   ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
462   if (diag != 0.0) {
463     Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data;
464     PetscInt     m   = ll->lda, i;
465 
466     for (i=0; i<slen; i++) {
467       ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag;
468     }
469   }
470   ierr = PetscFree(lrows);CHKERRQ(ierr);
471 
472   /* wait on sends */
473   if (nsends) {
474     ierr = PetscMalloc1(nsends,&send_status);CHKERRQ(ierr);
475     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
476     ierr = PetscFree(send_status);CHKERRQ(ierr);
477   }
478   ierr = PetscFree(send_waits);CHKERRQ(ierr);
479   ierr = PetscFree(svalues);CHKERRQ(ierr);
480   PetscFunctionReturn(0);
481 }
482 
483 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec);
484 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec);
485 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec);
486 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec);
487 
488 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
489 {
490   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
491   PetscErrorCode ierr;
492 
493   PetscFunctionBegin;
494   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
495   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
496   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
497   PetscFunctionReturn(0);
498 }
499 
500 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
501 {
502   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
503   PetscErrorCode ierr;
504 
505   PetscFunctionBegin;
506   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
507   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
508   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
509   PetscFunctionReturn(0);
510 }
511 
512 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
513 {
514   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
515   PetscErrorCode ierr;
516   PetscScalar    zero = 0.0;
517 
518   PetscFunctionBegin;
519   ierr = VecSet(yy,zero);CHKERRQ(ierr);
520   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
521   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
522   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
523   PetscFunctionReturn(0);
524 }
525 
526 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
527 {
528   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
529   PetscErrorCode ierr;
530 
531   PetscFunctionBegin;
532   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
533   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
534   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
535   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
536   PetscFunctionReturn(0);
537 }
538 
539 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
540 {
541   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
542   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
543   PetscErrorCode ierr;
544   PetscInt       len,i,n,m = A->rmap->n,radd;
545   PetscScalar    *x,zero = 0.0;
546 
547   PetscFunctionBegin;
548   ierr = VecSet(v,zero);CHKERRQ(ierr);
549   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
550   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
551   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
552   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
553   radd = A->rmap->rstart*m;
554   for (i=0; i<len; i++) {
555     x[i] = aloc->v[radd + i*m + i];
556   }
557   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
558   PetscFunctionReturn(0);
559 }
560 
561 PetscErrorCode MatDestroy_MPIDense(Mat mat)
562 {
563   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
564   PetscErrorCode ierr;
565 
566   PetscFunctionBegin;
567 #if defined(PETSC_USE_LOG)
568   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
569 #endif
570   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
571   ierr = MatDestroy(&mdn->A);CHKERRQ(ierr);
572   ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr);
573   ierr = VecScatterDestroy(&mdn->Mvctx);CHKERRQ(ierr);
574 
575   ierr = PetscFree(mat->data);CHKERRQ(ierr);
576   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
577 
578   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr);
579   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr);
580   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL);CHKERRQ(ierr);
581   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL);CHKERRQ(ierr);
582   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);CHKERRQ(ierr);
583   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);CHKERRQ(ierr);
584 #if defined(PETSC_HAVE_ELEMENTAL)
585   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr);
586 #endif
587   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr);
588   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
589   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
590   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
591   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
592   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
593   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
594   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL);CHKERRQ(ierr);
595   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL);CHKERRQ(ierr);
596   PetscFunctionReturn(0);
597 }
598 
599 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
600 {
601   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
602   PetscErrorCode    ierr;
603   PetscViewerFormat format;
604   int               fd;
605   PetscInt          header[4],mmax,N = mat->cmap->N,i,j,m,k;
606   PetscMPIInt       rank,tag  = ((PetscObject)viewer)->tag,size;
607   PetscScalar       *work,*v,*vv;
608   Mat_SeqDense      *a = (Mat_SeqDense*)mdn->A->data;
609 
610   PetscFunctionBegin;
611   if (mdn->size == 1) {
612     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
613   } else {
614     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
615     ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
616     ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
617 
618     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
619     if (format == PETSC_VIEWER_NATIVE) {
620 
621       if (!rank) {
622         /* store the matrix as a dense matrix */
623         header[0] = MAT_FILE_CLASSID;
624         header[1] = mat->rmap->N;
625         header[2] = N;
626         header[3] = MATRIX_BINARY_FORMAT_DENSE;
627         ierr      = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
628 
629         /* get largest work array needed for transposing array */
630         mmax = mat->rmap->n;
631         for (i=1; i<size; i++) {
632           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
633         }
634         ierr = PetscMalloc1(mmax*N,&work);CHKERRQ(ierr);
635 
636         /* write out local array, by rows */
637         m = mat->rmap->n;
638         v = a->v;
639         for (j=0; j<N; j++) {
640           for (i=0; i<m; i++) {
641             work[j + i*N] = *v++;
642           }
643         }
644         ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
645         /* get largest work array to receive messages from other processes, excludes process zero */
646         mmax = 0;
647         for (i=1; i<size; i++) {
648           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
649         }
650         ierr = PetscMalloc1(mmax*N,&vv);CHKERRQ(ierr);
651         for (k = 1; k < size; k++) {
652           v    = vv;
653           m    = mat->rmap->range[k+1] - mat->rmap->range[k];
654           ierr = MPIULong_Recv(v,m*N,MPIU_SCALAR,k,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
655 
656           for (j = 0; j < N; j++) {
657             for (i = 0; i < m; i++) {
658               work[j + i*N] = *v++;
659             }
660           }
661           ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
662         }
663         ierr = PetscFree(work);CHKERRQ(ierr);
664         ierr = PetscFree(vv);CHKERRQ(ierr);
665       } else {
666         ierr = MPIULong_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
667       }
668     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerPushFormat(viewer,PETSC_VIEWER_NATIVE)");
669   }
670   PetscFunctionReturn(0);
671 }
672 
673 extern PetscErrorCode MatView_SeqDense(Mat,PetscViewer);
674 #include <petscdraw.h>
675 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
676 {
677   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
678   PetscErrorCode    ierr;
679   PetscMPIInt       rank = mdn->rank;
680   PetscViewerType   vtype;
681   PetscBool         iascii,isdraw;
682   PetscViewer       sviewer;
683   PetscViewerFormat format;
684 
685   PetscFunctionBegin;
686   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
687   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
688   if (iascii) {
689     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
690     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
691     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
692       MatInfo info;
693       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
694       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
695       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
696       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
697       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
698       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
699       PetscFunctionReturn(0);
700     } else if (format == PETSC_VIEWER_ASCII_INFO) {
701       PetscFunctionReturn(0);
702     }
703   } else if (isdraw) {
704     PetscDraw draw;
705     PetscBool isnull;
706 
707     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
708     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
709     if (isnull) PetscFunctionReturn(0);
710   }
711 
712   {
713     /* assemble the entire matrix onto first processor. */
714     Mat         A;
715     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
716     PetscInt    *cols;
717     PetscScalar *vals;
718 
719     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
720     if (!rank) {
721       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
722     } else {
723       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
724     }
725     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
726     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
727     ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr);
728     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
729 
730     /* Copy the matrix ... This isn't the most efficient means,
731        but it's quick for now */
732     A->insertmode = INSERT_VALUES;
733 
734     row = mat->rmap->rstart;
735     m   = mdn->A->rmap->n;
736     for (i=0; i<m; i++) {
737       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
738       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
739       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
740       row++;
741     }
742 
743     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
744     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
745     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
746     if (!rank) {
747       ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
748       ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
749     }
750     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
751     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
752     ierr = MatDestroy(&A);CHKERRQ(ierr);
753   }
754   PetscFunctionReturn(0);
755 }
756 
757 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
758 {
759   PetscErrorCode ierr;
760   PetscBool      iascii,isbinary,isdraw,issocket;
761 
762   PetscFunctionBegin;
763   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
764   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
765   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
766   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
767 
768   if (iascii || issocket || isdraw) {
769     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
770   } else if (isbinary) {
771     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
772   }
773   PetscFunctionReturn(0);
774 }
775 
776 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
777 {
778   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
779   Mat            mdn  = mat->A;
780   PetscErrorCode ierr;
781   PetscReal      isend[5],irecv[5];
782 
783   PetscFunctionBegin;
784   info->block_size = 1.0;
785 
786   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
787 
788   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
789   isend[3] = info->memory;  isend[4] = info->mallocs;
790   if (flag == MAT_LOCAL) {
791     info->nz_used      = isend[0];
792     info->nz_allocated = isend[1];
793     info->nz_unneeded  = isend[2];
794     info->memory       = isend[3];
795     info->mallocs      = isend[4];
796   } else if (flag == MAT_GLOBAL_MAX) {
797     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
798 
799     info->nz_used      = irecv[0];
800     info->nz_allocated = irecv[1];
801     info->nz_unneeded  = irecv[2];
802     info->memory       = irecv[3];
803     info->mallocs      = irecv[4];
804   } else if (flag == MAT_GLOBAL_SUM) {
805     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
806 
807     info->nz_used      = irecv[0];
808     info->nz_allocated = irecv[1];
809     info->nz_unneeded  = irecv[2];
810     info->memory       = irecv[3];
811     info->mallocs      = irecv[4];
812   }
813   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
814   info->fill_ratio_needed = 0;
815   info->factor_mallocs    = 0;
816   PetscFunctionReturn(0);
817 }
818 
819 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
820 {
821   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
822   PetscErrorCode ierr;
823 
824   PetscFunctionBegin;
825   switch (op) {
826   case MAT_NEW_NONZERO_LOCATIONS:
827   case MAT_NEW_NONZERO_LOCATION_ERR:
828   case MAT_NEW_NONZERO_ALLOCATION_ERR:
829     MatCheckPreallocated(A,1);
830     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
831     break;
832   case MAT_ROW_ORIENTED:
833     MatCheckPreallocated(A,1);
834     a->roworiented = flg;
835     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
836     break;
837   case MAT_NEW_DIAGONALS:
838   case MAT_KEEP_NONZERO_PATTERN:
839   case MAT_USE_HASH_TABLE:
840     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
841     break;
842   case MAT_IGNORE_OFF_PROC_ENTRIES:
843     a->donotstash = flg;
844     break;
845   case MAT_SYMMETRIC:
846   case MAT_STRUCTURALLY_SYMMETRIC:
847   case MAT_HERMITIAN:
848   case MAT_SYMMETRY_ETERNAL:
849   case MAT_IGNORE_LOWER_TRIANGULAR:
850     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
851     break;
852   default:
853     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
854   }
855   PetscFunctionReturn(0);
856 }
857 
858 
859 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
860 {
861   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
862   Mat_SeqDense      *mat = (Mat_SeqDense*)mdn->A->data;
863   const PetscScalar *l,*r;
864   PetscScalar       x,*v;
865   PetscErrorCode    ierr;
866   PetscInt          i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n;
867 
868   PetscFunctionBegin;
869   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
870   if (ll) {
871     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
872     if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
873     ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr);
874     for (i=0; i<m; i++) {
875       x = l[i];
876       v = mat->v + i;
877       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
878     }
879     ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr);
880     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
881   }
882   if (rr) {
883     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
884     if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
885     ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
886     ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
887     ierr = VecGetArrayRead(mdn->lvec,&r);CHKERRQ(ierr);
888     for (i=0; i<n; i++) {
889       x = r[i];
890       v = mat->v + i*m;
891       for (j=0; j<m; j++) (*v++) *= x;
892     }
893     ierr = VecRestoreArrayRead(mdn->lvec,&r);CHKERRQ(ierr);
894     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
895   }
896   PetscFunctionReturn(0);
897 }
898 
899 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
900 {
901   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
902   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
903   PetscErrorCode ierr;
904   PetscInt       i,j;
905   PetscReal      sum = 0.0;
906   PetscScalar    *v  = mat->v;
907 
908   PetscFunctionBegin;
909   if (mdn->size == 1) {
910     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
911   } else {
912     if (type == NORM_FROBENIUS) {
913       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
914         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
915       }
916       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
917       *nrm = PetscSqrtReal(*nrm);
918       ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr);
919     } else if (type == NORM_1) {
920       PetscReal *tmp,*tmp2;
921       ierr = PetscMalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr);
922       ierr = PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
923       ierr = PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
924       *nrm = 0.0;
925       v    = mat->v;
926       for (j=0; j<mdn->A->cmap->n; j++) {
927         for (i=0; i<mdn->A->rmap->n; i++) {
928           tmp[j] += PetscAbsScalar(*v);  v++;
929         }
930       }
931       ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
932       for (j=0; j<A->cmap->N; j++) {
933         if (tmp2[j] > *nrm) *nrm = tmp2[j];
934       }
935       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
936       ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr);
937     } else if (type == NORM_INFINITY) { /* max row norm */
938       PetscReal ntemp;
939       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
940       ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
941     } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
942   }
943   PetscFunctionReturn(0);
944 }
945 
946 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
947 {
948   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
949   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
950   Mat            B;
951   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
952   PetscErrorCode ierr;
953   PetscInt       j,i;
954   PetscScalar    *v;
955 
956   PetscFunctionBegin;
957   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
958     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
959     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
960     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
961     ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr);
962   } else {
963     B = *matout;
964   }
965 
966   m    = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v;
967   ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr);
968   for (i=0; i<m; i++) rwork[i] = rstart + i;
969   for (j=0; j<n; j++) {
970     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
971     v   += m;
972   }
973   ierr = PetscFree(rwork);CHKERRQ(ierr);
974   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
975   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
976   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
977     *matout = B;
978   } else {
979     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
980   }
981   PetscFunctionReturn(0);
982 }
983 
984 
985 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
986 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
987 
988 PetscErrorCode MatSetUp_MPIDense(Mat A)
989 {
990   PetscErrorCode ierr;
991 
992   PetscFunctionBegin;
993   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
994   PetscFunctionReturn(0);
995 }
996 
997 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
998 {
999   PetscErrorCode ierr;
1000   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
1001 
1002   PetscFunctionBegin;
1003   ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr);
1004   ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1005   PetscFunctionReturn(0);
1006 }
1007 
1008 PetscErrorCode  MatConjugate_MPIDense(Mat mat)
1009 {
1010   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
1011   PetscErrorCode ierr;
1012 
1013   PetscFunctionBegin;
1014   ierr = MatConjugate(a->A);CHKERRQ(ierr);
1015   PetscFunctionReturn(0);
1016 }
1017 
1018 PetscErrorCode MatRealPart_MPIDense(Mat A)
1019 {
1020   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1021   PetscErrorCode ierr;
1022 
1023   PetscFunctionBegin;
1024   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1025   PetscFunctionReturn(0);
1026 }
1027 
1028 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1029 {
1030   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1031   PetscErrorCode ierr;
1032 
1033   PetscFunctionBegin;
1034   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1035   PetscFunctionReturn(0);
1036 }
1037 
1038 extern PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*);
1039 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms)
1040 {
1041   PetscErrorCode ierr;
1042   PetscInt       i,n;
1043   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
1044   PetscReal      *work;
1045 
1046   PetscFunctionBegin;
1047   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
1048   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
1049   ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr);
1050   if (type == NORM_2) {
1051     for (i=0; i<n; i++) work[i] *= work[i];
1052   }
1053   if (type == NORM_INFINITY) {
1054     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
1055   } else {
1056     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
1057   }
1058   ierr = PetscFree(work);CHKERRQ(ierr);
1059   if (type == NORM_2) {
1060     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
1061   }
1062   PetscFunctionReturn(0);
1063 }
1064 
1065 static PetscErrorCode  MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1066 {
1067   Mat_MPIDense   *d = (Mat_MPIDense*)x->data;
1068   PetscErrorCode ierr;
1069   PetscScalar    *a;
1070   PetscInt       m,n,i;
1071 
1072   PetscFunctionBegin;
1073   ierr = MatGetSize(d->A,&m,&n);CHKERRQ(ierr);
1074   ierr = MatDenseGetArray(d->A,&a);CHKERRQ(ierr);
1075   for (i=0; i<m*n; i++) {
1076     ierr = PetscRandomGetValue(rctx,a+i);CHKERRQ(ierr);
1077   }
1078   ierr = MatDenseRestoreArray(d->A,&a);CHKERRQ(ierr);
1079   PetscFunctionReturn(0);
1080 }
1081 
1082 extern PetscErrorCode MatMatMultNumeric_MPIDense(Mat A,Mat,Mat);
1083 
1084 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool  *missing,PetscInt *d)
1085 {
1086   PetscFunctionBegin;
1087   *missing = PETSC_FALSE;
1088   PetscFunctionReturn(0);
1089 }
1090 
1091 static PetscErrorCode MatMatTransposeMult_MPIDense_MPIDense(Mat,Mat,MatReuse,PetscReal,Mat*);
1092 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat*);
1093 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);
1094 
1095 /* -------------------------------------------------------------------*/
1096 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1097                                         MatGetRow_MPIDense,
1098                                         MatRestoreRow_MPIDense,
1099                                         MatMult_MPIDense,
1100                                 /*  4*/ MatMultAdd_MPIDense,
1101                                         MatMultTranspose_MPIDense,
1102                                         MatMultTransposeAdd_MPIDense,
1103                                         0,
1104                                         0,
1105                                         0,
1106                                 /* 10*/ 0,
1107                                         0,
1108                                         0,
1109                                         0,
1110                                         MatTranspose_MPIDense,
1111                                 /* 15*/ MatGetInfo_MPIDense,
1112                                         MatEqual_MPIDense,
1113                                         MatGetDiagonal_MPIDense,
1114                                         MatDiagonalScale_MPIDense,
1115                                         MatNorm_MPIDense,
1116                                 /* 20*/ MatAssemblyBegin_MPIDense,
1117                                         MatAssemblyEnd_MPIDense,
1118                                         MatSetOption_MPIDense,
1119                                         MatZeroEntries_MPIDense,
1120                                 /* 24*/ MatZeroRows_MPIDense,
1121                                         0,
1122                                         0,
1123                                         0,
1124                                         0,
1125                                 /* 29*/ MatSetUp_MPIDense,
1126                                         0,
1127                                         0,
1128                                         MatGetDiagonalBlock_MPIDense,
1129                                         0,
1130                                 /* 34*/ MatDuplicate_MPIDense,
1131                                         0,
1132                                         0,
1133                                         0,
1134                                         0,
1135                                 /* 39*/ MatAXPY_MPIDense,
1136                                         MatCreateSubMatrices_MPIDense,
1137                                         0,
1138                                         MatGetValues_MPIDense,
1139                                         0,
1140                                 /* 44*/ 0,
1141                                         MatScale_MPIDense,
1142                                         MatShift_Basic,
1143                                         0,
1144                                         0,
1145                                 /* 49*/ MatSetRandom_MPIDense,
1146                                         0,
1147                                         0,
1148                                         0,
1149                                         0,
1150                                 /* 54*/ 0,
1151                                         0,
1152                                         0,
1153                                         0,
1154                                         0,
1155                                 /* 59*/ MatCreateSubMatrix_MPIDense,
1156                                         MatDestroy_MPIDense,
1157                                         MatView_MPIDense,
1158                                         0,
1159                                         0,
1160                                 /* 64*/ 0,
1161                                         0,
1162                                         0,
1163                                         0,
1164                                         0,
1165                                 /* 69*/ 0,
1166                                         0,
1167                                         0,
1168                                         0,
1169                                         0,
1170                                 /* 74*/ 0,
1171                                         0,
1172                                         0,
1173                                         0,
1174                                         0,
1175                                 /* 79*/ 0,
1176                                         0,
1177                                         0,
1178                                         0,
1179                                 /* 83*/ MatLoad_MPIDense,
1180                                         0,
1181                                         0,
1182                                         0,
1183                                         0,
1184                                         0,
1185 #if defined(PETSC_HAVE_ELEMENTAL)
1186                                 /* 89*/ MatMatMult_MPIDense_MPIDense,
1187                                         MatMatMultSymbolic_MPIDense_MPIDense,
1188 #else
1189                                 /* 89*/ 0,
1190                                         0,
1191 #endif
1192                                         MatMatMultNumeric_MPIDense,
1193                                         0,
1194                                         0,
1195                                 /* 94*/ 0,
1196                                         MatMatTransposeMult_MPIDense_MPIDense,
1197                                         MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1198                                         MatMatTransposeMultNumeric_MPIDense_MPIDense,
1199                                         0,
1200                                 /* 99*/ 0,
1201                                         0,
1202                                         0,
1203                                         MatConjugate_MPIDense,
1204                                         0,
1205                                 /*104*/ 0,
1206                                         MatRealPart_MPIDense,
1207                                         MatImaginaryPart_MPIDense,
1208                                         0,
1209                                         0,
1210                                 /*109*/ 0,
1211                                         0,
1212                                         0,
1213                                         0,
1214                                         MatMissingDiagonal_MPIDense,
1215                                 /*114*/ 0,
1216                                         0,
1217                                         0,
1218                                         0,
1219                                         0,
1220                                 /*119*/ 0,
1221                                         0,
1222                                         0,
1223                                         0,
1224                                         0,
1225                                 /*124*/ 0,
1226                                         MatGetColumnNorms_MPIDense,
1227                                         0,
1228                                         0,
1229                                         0,
1230                                 /*129*/ 0,
1231                                         MatTransposeMatMult_MPIDense_MPIDense,
1232                                         MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1233                                         MatTransposeMatMultNumeric_MPIDense_MPIDense,
1234                                         0,
1235                                 /*134*/ 0,
1236                                         0,
1237                                         0,
1238                                         0,
1239                                         0,
1240                                 /*139*/ 0,
1241                                         0,
1242                                         0,
1243                                         0,
1244                                         0,
1245                                 /*144*/ MatCreateMPIMatConcatenateSeqMat_MPIDense
1246 };
1247 
1248 PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1249 {
1250   Mat_MPIDense   *a;
1251   PetscErrorCode ierr;
1252 
1253   PetscFunctionBegin;
1254   if (mat->preallocated) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Matrix has already been preallocated");
1255   mat->preallocated = PETSC_TRUE;
1256   /* Note:  For now, when data is specified above, this assumes the user correctly
1257    allocates the local dense storage space.  We should add error checking. */
1258 
1259   a       = (Mat_MPIDense*)mat->data;
1260   ierr    = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
1261   ierr    = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
1262   a->nvec = mat->cmap->n;
1263 
1264   ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1265   ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr);
1266   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
1267   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1268   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1269   PetscFunctionReturn(0);
1270 }
1271 
1272 #if defined(PETSC_HAVE_ELEMENTAL)
1273 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1274 {
1275   Mat            mat_elemental;
1276   PetscErrorCode ierr;
1277   PetscScalar    *v;
1278   PetscInt       m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols;
1279 
1280   PetscFunctionBegin;
1281   if (reuse == MAT_REUSE_MATRIX) {
1282     mat_elemental = *newmat;
1283     ierr = MatZeroEntries(*newmat);CHKERRQ(ierr);
1284   } else {
1285     ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr);
1286     ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1287     ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr);
1288     ierr = MatSetUp(mat_elemental);CHKERRQ(ierr);
1289     ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
1290   }
1291 
1292   ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr);
1293   for (i=0; i<N; i++) cols[i] = i;
1294   for (i=0; i<m; i++) rows[i] = rstart + i;
1295 
1296   /* PETSc-Elemental interaface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1297   ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr);
1298   ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr);
1299   ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1300   ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1301   ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr);
1302   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1303 
1304   if (reuse == MAT_INPLACE_MATRIX) {
1305     ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr);
1306   } else {
1307     *newmat = mat_elemental;
1308   }
1309   PetscFunctionReturn(0);
1310 }
1311 #endif
1312 
1313 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals)
1314 {
1315   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1316   PetscErrorCode ierr;
1317 
1318   PetscFunctionBegin;
1319   ierr = MatDenseGetColumn(mat->A,col,vals);CHKERRQ(ierr);
1320   PetscFunctionReturn(0);
1321 }
1322 
1323 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals)
1324 {
1325   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1326   PetscErrorCode ierr;
1327 
1328   PetscFunctionBegin;
1329   ierr = MatDenseRestoreColumn(mat->A,vals);CHKERRQ(ierr);
1330   PetscFunctionReturn(0);
1331 }
1332 
1333 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
1334 {
1335   PetscErrorCode ierr;
1336   Mat_MPIDense   *mat;
1337   PetscInt       m,nloc,N;
1338 
1339   PetscFunctionBegin;
1340   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
1341   ierr = MatGetLocalSize(inmat,NULL,&nloc);CHKERRQ(ierr);
1342   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1343     PetscInt sum;
1344 
1345     if (n == PETSC_DECIDE) {
1346       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
1347     }
1348     /* Check sum(n) = N */
1349     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
1350     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
1351 
1352     ierr = MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat);CHKERRQ(ierr);
1353   }
1354 
1355   /* numeric phase */
1356   mat = (Mat_MPIDense*)(*outmat)->data;
1357   ierr = MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1358   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1359   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1360   PetscFunctionReturn(0);
1361 }
1362 
1363 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1364 {
1365   Mat_MPIDense   *a;
1366   PetscErrorCode ierr;
1367 
1368   PetscFunctionBegin;
1369   ierr      = PetscNewLog(mat,&a);CHKERRQ(ierr);
1370   mat->data = (void*)a;
1371   ierr      = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1372 
1373   mat->insertmode = NOT_SET_VALUES;
1374   ierr            = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);CHKERRQ(ierr);
1375   ierr            = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);CHKERRQ(ierr);
1376 
1377   /* build cache for off array entries formed */
1378   a->donotstash = PETSC_FALSE;
1379 
1380   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr);
1381 
1382   /* stuff used for matrix vector multiply */
1383   a->lvec        = 0;
1384   a->Mvctx       = 0;
1385   a->roworiented = PETSC_TRUE;
1386 
1387   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr);
1388   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr);
1389   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense);CHKERRQ(ierr);
1390   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense);CHKERRQ(ierr);
1391   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);CHKERRQ(ierr);
1392   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);CHKERRQ(ierr);
1393 #if defined(PETSC_HAVE_ELEMENTAL)
1394   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr);
1395 #endif
1396   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1397   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1398   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1399   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1400 
1401   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",MatTransposeMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1402   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",MatTransposeMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1403   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",MatTransposeMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1404   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense);CHKERRQ(ierr);
1405   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense);CHKERRQ(ierr);
1406   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1407   PetscFunctionReturn(0);
1408 }
1409 
1410 /*MC
1411    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1412 
1413    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1414    and MATMPIDENSE otherwise.
1415 
1416    Options Database Keys:
1417 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1418 
1419   Level: beginner
1420 
1421 
1422 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1423 M*/
1424 
1425 /*@C
1426    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1427 
1428    Not collective
1429 
1430    Input Parameters:
1431 .  B - the matrix
1432 -  data - optional location of matrix data.  Set data=NULL for PETSc
1433    to control all matrix memory allocation.
1434 
1435    Notes:
1436    The dense format is fully compatible with standard Fortran 77
1437    storage by columns.
1438 
1439    The data input variable is intended primarily for Fortran programmers
1440    who wish to allocate their own matrix memory space.  Most users should
1441    set data=NULL.
1442 
1443    Level: intermediate
1444 
1445 .keywords: matrix,dense, parallel
1446 
1447 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1448 @*/
1449 PetscErrorCode  MatMPIDenseSetPreallocation(Mat B,PetscScalar *data)
1450 {
1451   PetscErrorCode ierr;
1452 
1453   PetscFunctionBegin;
1454   ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr);
1455   PetscFunctionReturn(0);
1456 }
1457 
1458 /*@
1459    MatDensePlaceArray - Allows one to replace the array in a dense array with an
1460    array provided by the user. This is useful to avoid copying an array
1461    into a matrix
1462 
1463    Not Collective
1464 
1465    Input Parameters:
1466 +  mat - the matrix
1467 -  array - the array in column major order
1468 
1469    Notes:
1470    You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be
1471    freed when the matrix is destroyed.
1472 
1473    Level: developer
1474 
1475 .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
1476 
1477 @*/
1478 PetscErrorCode  MatDensePlaceArray(Mat mat,const PetscScalar array[])
1479 {
1480   PetscErrorCode ierr;
1481   PetscFunctionBegin;
1482   ierr = PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
1483   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
1484   PetscFunctionReturn(0);
1485 }
1486 
1487 /*@
1488    MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray()
1489 
1490    Not Collective
1491 
1492    Input Parameters:
1493 .  mat - the matrix
1494 
1495    Notes:
1496    You can only call this after a call to MatDensePlaceArray()
1497 
1498    Level: developer
1499 
1500 .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
1501 
1502 @*/
1503 PetscErrorCode  MatDenseResetArray(Mat mat)
1504 {
1505   PetscErrorCode ierr;
1506   PetscFunctionBegin;
1507   ierr = PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));CHKERRQ(ierr);
1508   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
1509   PetscFunctionReturn(0);
1510 }
1511 
1512 /*@C
1513    MatCreateDense - Creates a parallel matrix in dense format.
1514 
1515    Collective on MPI_Comm
1516 
1517    Input Parameters:
1518 +  comm - MPI communicator
1519 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1520 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1521 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1522 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1523 -  data - optional location of matrix data.  Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1524    to control all matrix memory allocation.
1525 
1526    Output Parameter:
1527 .  A - the matrix
1528 
1529    Notes:
1530    The dense format is fully compatible with standard Fortran 77
1531    storage by columns.
1532 
1533    The data input variable is intended primarily for Fortran programmers
1534    who wish to allocate their own matrix memory space.  Most users should
1535    set data=NULL (PETSC_NULL_SCALAR for Fortran users).
1536 
1537    The user MUST specify either the local or global matrix dimensions
1538    (possibly both).
1539 
1540    Level: intermediate
1541 
1542 .keywords: matrix,dense, parallel
1543 
1544 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1545 @*/
1546 PetscErrorCode  MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1547 {
1548   PetscErrorCode ierr;
1549   PetscMPIInt    size;
1550 
1551   PetscFunctionBegin;
1552   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1553   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1554   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1555   if (size > 1) {
1556     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1557     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1558     if (data) {  /* user provided data array, so no need to assemble */
1559       ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr);
1560       (*A)->assembled = PETSC_TRUE;
1561     }
1562   } else {
1563     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1564     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1565   }
1566   PetscFunctionReturn(0);
1567 }
1568 
1569 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1570 {
1571   Mat            mat;
1572   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
1573   PetscErrorCode ierr;
1574 
1575   PetscFunctionBegin;
1576   *newmat = 0;
1577   ierr    = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr);
1578   ierr    = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1579   ierr    = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1580   a       = (Mat_MPIDense*)mat->data;
1581 
1582   mat->factortype   = A->factortype;
1583   mat->assembled    = PETSC_TRUE;
1584   mat->preallocated = PETSC_TRUE;
1585 
1586   a->size         = oldmat->size;
1587   a->rank         = oldmat->rank;
1588   mat->insertmode = NOT_SET_VALUES;
1589   a->nvec         = oldmat->nvec;
1590   a->donotstash   = oldmat->donotstash;
1591 
1592   ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr);
1593   ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr);
1594 
1595   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1596   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1597   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1598 
1599   *newmat = mat;
1600   PetscFunctionReturn(0);
1601 }
1602 
1603 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat)
1604 {
1605   PetscErrorCode ierr;
1606   PetscMPIInt    rank,size;
1607   const PetscInt *rowners;
1608   PetscInt       i,m,n,nz,j,mMax;
1609   PetscScalar    *array,*vals,*vals_ptr;
1610   Mat_MPIDense   *a = (Mat_MPIDense*)newmat->data;
1611 
1612   PetscFunctionBegin;
1613   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1614   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1615 
1616   /* determine ownership of rows and columns */
1617   m = (newmat->rmap->n < 0) ? PETSC_DECIDE : newmat->rmap->n;
1618   n = (newmat->cmap->n < 0) ? PETSC_DECIDE : newmat->cmap->n;
1619 
1620   ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr);
1621   if (!a->A) {
1622     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
1623   }
1624   ierr = MatDenseGetArray(newmat,&array);CHKERRQ(ierr);
1625   ierr = MatGetLocalSize(newmat,&m,NULL);CHKERRQ(ierr);
1626   ierr = MatGetOwnershipRanges(newmat,&rowners);CHKERRQ(ierr);
1627   ierr = MPI_Reduce(&m,&mMax,1,MPIU_INT,MPI_MAX,0,comm);CHKERRQ(ierr);
1628   if (!rank) {
1629     ierr = PetscMalloc1(mMax*N,&vals);CHKERRQ(ierr);
1630 
1631     /* read in my part of the matrix numerical values  */
1632     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1633 
1634     /* insert into matrix-by row (this is why cannot directly read into array */
1635     vals_ptr = vals;
1636     for (i=0; i<m; i++) {
1637       for (j=0; j<N; j++) {
1638         array[i + j*m] = *vals_ptr++;
1639       }
1640     }
1641 
1642     /* read in other processors and ship out */
1643     for (i=1; i<size; i++) {
1644       nz   = (rowners[i+1] - rowners[i])*N;
1645       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1646       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr);
1647     }
1648   } else {
1649     /* receive numeric values */
1650     ierr = PetscMalloc1(m*N,&vals);CHKERRQ(ierr);
1651 
1652     /* receive message of values*/
1653     ierr = MPIULong_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr);
1654 
1655     /* insert into matrix-by row (this is why cannot directly read into array */
1656     vals_ptr = vals;
1657     for (i=0; i<m; i++) {
1658       for (j=0; j<N; j++) {
1659         array[i + j*m] = *vals_ptr++;
1660       }
1661     }
1662   }
1663   ierr = MatDenseRestoreArray(newmat,&array);CHKERRQ(ierr);
1664   ierr = PetscFree(vals);CHKERRQ(ierr);
1665   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1666   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1667   PetscFunctionReturn(0);
1668 }
1669 
1670 PetscErrorCode MatLoad_MPIDense(Mat newmat,PetscViewer viewer)
1671 {
1672   Mat_MPIDense   *a;
1673   PetscScalar    *vals,*svals;
1674   MPI_Comm       comm;
1675   MPI_Status     status;
1676   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,n,maxnz;
1677   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1678   PetscInt       *ourlens,*procsnz = 0,jj,*mycols,*smycols;
1679   PetscInt       i,nz,j,rstart,rend;
1680   int            fd;
1681   PetscBool      isbinary;
1682   PetscErrorCode ierr;
1683 
1684   PetscFunctionBegin;
1685   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1686   if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)newmat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newmat)->type_name);
1687 
1688   /* force binary viewer to load .info file if it has not yet done so */
1689   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1690   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
1691   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1692   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1693   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1694   if (!rank) {
1695     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
1696     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1697   }
1698   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1699   M    = header[1]; N = header[2]; nz = header[3];
1700 
1701   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
1702   if (newmat->rmap->N < 0) newmat->rmap->N = M;
1703   if (newmat->cmap->N < 0) newmat->cmap->N = N;
1704 
1705   if (newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",M,newmat->rmap->N);
1706   if (newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",N,newmat->cmap->N);
1707 
1708   /*
1709        Handle case where matrix is stored on disk as a dense matrix
1710   */
1711   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1712     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr);
1713     PetscFunctionReturn(0);
1714   }
1715 
1716   /* determine ownership of all rows */
1717   if (newmat->rmap->n < 0) {
1718     ierr = PetscMPIIntCast(M/size + ((M % size) > rank),&m);CHKERRQ(ierr);
1719   } else {
1720     ierr = PetscMPIIntCast(newmat->rmap->n,&m);CHKERRQ(ierr);
1721   }
1722   if (newmat->cmap->n < 0) {
1723     n = PETSC_DECIDE;
1724   } else {
1725     ierr = PetscMPIIntCast(newmat->cmap->n,&n);CHKERRQ(ierr);
1726   }
1727 
1728   ierr       = PetscMalloc1(size+2,&rowners);CHKERRQ(ierr);
1729   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1730   rowners[0] = 0;
1731   for (i=2; i<=size; i++) {
1732     rowners[i] += rowners[i-1];
1733   }
1734   rstart = rowners[rank];
1735   rend   = rowners[rank+1];
1736 
1737   /* distribute row lengths to all processors */
1738   ierr = PetscMalloc1(rend-rstart,&ourlens);CHKERRQ(ierr);
1739   if (!rank) {
1740     ierr = PetscMalloc1(M,&rowlengths);CHKERRQ(ierr);
1741     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1742     ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr);
1743     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1744     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1745     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1746   } else {
1747     ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1748   }
1749 
1750   if (!rank) {
1751     /* calculate the number of nonzeros on each processor */
1752     ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr);
1753     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1754     for (i=0; i<size; i++) {
1755       for (j=rowners[i]; j< rowners[i+1]; j++) {
1756         procsnz[i] += rowlengths[j];
1757       }
1758     }
1759     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1760 
1761     /* determine max buffer needed and allocate it */
1762     maxnz = 0;
1763     for (i=0; i<size; i++) {
1764       maxnz = PetscMax(maxnz,procsnz[i]);
1765     }
1766     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
1767 
1768     /* read in my part of the matrix column indices  */
1769     nz   = procsnz[0];
1770     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
1771     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1772 
1773     /* read in every one elses and ship off */
1774     for (i=1; i<size; i++) {
1775       nz   = procsnz[i];
1776       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1777       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1778     }
1779     ierr = PetscFree(cols);CHKERRQ(ierr);
1780   } else {
1781     /* determine buffer space needed for message */
1782     nz = 0;
1783     for (i=0; i<m; i++) {
1784       nz += ourlens[i];
1785     }
1786     ierr = PetscMalloc1(nz+1,&mycols);CHKERRQ(ierr);
1787 
1788     /* receive message of column indices*/
1789     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1790     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1791     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1792   }
1793 
1794   ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr);
1795   a = (Mat_MPIDense*)newmat->data;
1796   if (!a->A) {
1797     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
1798   }
1799 
1800   if (!rank) {
1801     ierr = PetscMalloc1(maxnz,&vals);CHKERRQ(ierr);
1802 
1803     /* read in my part of the matrix numerical values  */
1804     nz   = procsnz[0];
1805     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1806 
1807     /* insert into matrix */
1808     jj      = rstart;
1809     smycols = mycols;
1810     svals   = vals;
1811     for (i=0; i<m; i++) {
1812       ierr     = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1813       smycols += ourlens[i];
1814       svals   += ourlens[i];
1815       jj++;
1816     }
1817 
1818     /* read in other processors and ship out */
1819     for (i=1; i<size; i++) {
1820       nz   = procsnz[i];
1821       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1822       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
1823     }
1824     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1825   } else {
1826     /* receive numeric values */
1827     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
1828 
1829     /* receive message of values*/
1830     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
1831     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1832     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1833 
1834     /* insert into matrix */
1835     jj      = rstart;
1836     smycols = mycols;
1837     svals   = vals;
1838     for (i=0; i<m; i++) {
1839       ierr     = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1840       smycols += ourlens[i];
1841       svals   += ourlens[i];
1842       jj++;
1843     }
1844   }
1845   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1846   ierr = PetscFree(vals);CHKERRQ(ierr);
1847   ierr = PetscFree(mycols);CHKERRQ(ierr);
1848   ierr = PetscFree(rowners);CHKERRQ(ierr);
1849 
1850   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1851   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1852   PetscFunctionReturn(0);
1853 }
1854 
1855 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool  *flag)
1856 {
1857   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
1858   Mat            a,b;
1859   PetscBool      flg;
1860   PetscErrorCode ierr;
1861 
1862   PetscFunctionBegin;
1863   a    = matA->A;
1864   b    = matB->A;
1865   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
1866   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1867   PetscFunctionReturn(0);
1868 }
1869 
1870 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(Mat A)
1871 {
1872   PetscErrorCode        ierr;
1873   Mat_MPIDense          *a = (Mat_MPIDense*)A->data;
1874   Mat_TransMatMultDense *atb = a->atbdense;
1875 
1876   PetscFunctionBegin;
1877   ierr = PetscFree3(atb->sendbuf,atb->atbarray,atb->recvcounts);CHKERRQ(ierr);
1878   ierr = (atb->destroy)(A);CHKERRQ(ierr);
1879   ierr = PetscFree(atb);CHKERRQ(ierr);
1880   PetscFunctionReturn(0);
1881 }
1882 
1883 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(Mat A)
1884 {
1885   PetscErrorCode        ierr;
1886   Mat_MPIDense          *a = (Mat_MPIDense*)A->data;
1887   Mat_MatTransMultDense *abt = a->abtdense;
1888 
1889   PetscFunctionBegin;
1890   ierr = PetscFree2(abt->buf[0],abt->buf[1]);CHKERRQ(ierr);
1891   ierr = PetscFree2(abt->recvcounts,abt->recvdispls);CHKERRQ(ierr);
1892   ierr = (abt->destroy)(A);CHKERRQ(ierr);
1893   ierr = PetscFree(abt);CHKERRQ(ierr);
1894   PetscFunctionReturn(0);
1895 }
1896 
1897 PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1898 {
1899   Mat_MPIDense   *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
1900   Mat_SeqDense   *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data;
1901   Mat_TransMatMultDense *atb = c->atbdense;
1902   PetscErrorCode ierr;
1903   MPI_Comm       comm;
1904   PetscMPIInt    rank,size,*recvcounts=atb->recvcounts;
1905   PetscScalar    *carray,*atbarray=atb->atbarray,*sendbuf=atb->sendbuf;
1906   PetscInt       i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j;
1907   PetscScalar    _DOne=1.0,_DZero=0.0;
1908   PetscBLASInt   am,an,bn,aN;
1909   const PetscInt *ranges;
1910 
1911   PetscFunctionBegin;
1912   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1913   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1914   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1915 
1916   /* compute atbarray = aseq^T * bseq */
1917   ierr = PetscBLASIntCast(a->A->cmap->n,&an);CHKERRQ(ierr);
1918   ierr = PetscBLASIntCast(b->A->cmap->n,&bn);CHKERRQ(ierr);
1919   ierr = PetscBLASIntCast(a->A->rmap->n,&am);CHKERRQ(ierr);
1920   ierr = PetscBLASIntCast(A->cmap->N,&aN);CHKERRQ(ierr);
1921   PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&an,&bn,&am,&_DOne,aseq->v,&aseq->lda,bseq->v,&bseq->lda,&_DZero,atbarray,&aN));
1922 
1923   ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr);
1924   for (i=0; i<size; i++) recvcounts[i] = (ranges[i+1] - ranges[i])*cN;
1925 
1926   /* arrange atbarray into sendbuf */
1927   k = 0;
1928   for (proc=0; proc<size; proc++) {
1929     for (j=0; j<cN; j++) {
1930       for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM];
1931     }
1932   }
1933   /* sum all atbarray to local values of C */
1934   ierr = MatDenseGetArray(c->A,&carray);CHKERRQ(ierr);
1935   ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRQ(ierr);
1936   ierr = MatDenseRestoreArray(c->A,&carray);CHKERRQ(ierr);
1937   PetscFunctionReturn(0);
1938 }
1939 
1940 PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1941 {
1942   PetscErrorCode        ierr;
1943   Mat                   Cdense;
1944   MPI_Comm              comm;
1945   PetscMPIInt           size;
1946   PetscInt              cm=A->cmap->n,cM,cN=B->cmap->N;
1947   Mat_MPIDense          *c;
1948   Mat_TransMatMultDense *atb;
1949 
1950   PetscFunctionBegin;
1951   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1952   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
1953     SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
1954   }
1955 
1956   /* create matrix product Cdense */
1957   ierr = MatCreate(comm,&Cdense);CHKERRQ(ierr);
1958   ierr = MatSetSizes(Cdense,cm,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
1959   ierr = MatSetType(Cdense,MATMPIDENSE);CHKERRQ(ierr);
1960   ierr = MatMPIDenseSetPreallocation(Cdense,NULL);CHKERRQ(ierr);
1961   ierr = MatAssemblyBegin(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1962   ierr = MatAssemblyEnd(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1963   *C   = Cdense;
1964 
1965   /* create data structure for reuse Cdense */
1966   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1967   ierr = PetscNew(&atb);CHKERRQ(ierr);
1968   cM = Cdense->rmap->N;
1969   ierr = PetscMalloc3(cM*cN,&atb->sendbuf,cM*cN,&atb->atbarray,size,&atb->recvcounts);CHKERRQ(ierr);
1970 
1971   c                    = (Mat_MPIDense*)Cdense->data;
1972   c->atbdense          = atb;
1973   atb->destroy         = Cdense->ops->destroy;
1974   Cdense->ops->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
1975   PetscFunctionReturn(0);
1976 }
1977 
1978 PetscErrorCode MatTransposeMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
1979 {
1980   PetscErrorCode ierr;
1981 
1982   PetscFunctionBegin;
1983   if (scall == MAT_INITIAL_MATRIX) {
1984     ierr = MatTransposeMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr);
1985   }
1986   ierr = MatTransposeMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr);
1987   PetscFunctionReturn(0);
1988 }
1989 
1990 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat *C)
1991 {
1992   PetscErrorCode        ierr;
1993   Mat                   Cdense;
1994   MPI_Comm              comm;
1995   PetscMPIInt           i, size;
1996   PetscInt              maxRows, bufsiz;
1997   Mat_MPIDense          *c;
1998   PetscMPIInt           tag;
1999   const char            *algTypes[2] = {"allgatherv","cyclic"};
2000   PetscInt              alg, nalg = 2;
2001   Mat_MatTransMultDense *abt;
2002 
2003   PetscFunctionBegin;
2004   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2005   alg = 0; /* default is allgatherv */
2006   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MatMatTransposeMult","Mat");CHKERRQ(ierr);
2007   ierr = PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[0],&alg,NULL);CHKERRQ(ierr);
2008   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2009   if (A->cmap->N != B->cmap->N) {
2010     SETERRQ2(comm,PETSC_ERR_ARG_SIZ,"Matrix global column dimensions are incompatible, A (%D) != B (%D)",A->cmap->N,B->cmap->N);
2011   }
2012 
2013   /* create matrix product Cdense */
2014   ierr = MatCreate(comm,&Cdense);CHKERRQ(ierr);
2015   ierr = MatSetSizes(Cdense,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N);CHKERRQ(ierr);
2016   ierr = MatSetType(Cdense,MATMPIDENSE);CHKERRQ(ierr);
2017   ierr = MatMPIDenseSetPreallocation(Cdense,NULL);CHKERRQ(ierr);
2018   ierr = PetscObjectGetNewTag((PetscObject)Cdense, &tag);CHKERRQ(ierr);
2019   ierr = MatAssemblyBegin(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2020   ierr = MatAssemblyEnd(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2021   *C   = Cdense;
2022 
2023   /* create data structure for reuse Cdense */
2024   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2025   ierr = PetscNew(&abt);CHKERRQ(ierr);
2026   abt->tag = tag;
2027   abt->alg = alg;
2028   switch (alg) {
2029   case 1:
2030     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i]));
2031     bufsiz = A->cmap->N * maxRows;
2032     ierr = PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1]));CHKERRQ(ierr);
2033     break;
2034   default:
2035     ierr = PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]));CHKERRQ(ierr);
2036     ierr = PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls));CHKERRQ(ierr);
2037     for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N;
2038     for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i];
2039     break;
2040   }
2041 
2042   c                    = (Mat_MPIDense*)Cdense->data;
2043   c->abtdense          = abt;
2044   abt->destroy         = Cdense->ops->destroy;
2045   Cdense->ops->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
2046   PetscFunctionReturn(0);
2047 }
2048 
2049 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
2050 {
2051   Mat_MPIDense   *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2052   Mat_SeqDense   *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data;
2053   Mat_SeqDense   *cseq=(Mat_SeqDense*)(c->A)->data;
2054   Mat_MatTransMultDense *abt = c->abtdense;
2055   PetscErrorCode ierr;
2056   MPI_Comm       comm;
2057   PetscMPIInt    rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom;
2058   PetscScalar    *sendbuf, *recvbuf=0, *carray;
2059   PetscInt       i,cK=A->cmap->N,k,j,bn;
2060   PetscScalar    _DOne=1.0,_DZero=0.0;
2061   PetscBLASInt   cm, cn, ck;
2062   MPI_Request    reqs[2];
2063   const PetscInt *ranges;
2064 
2065   PetscFunctionBegin;
2066   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2067   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2068   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2069 
2070   ierr = MatGetOwnershipRanges(B,&ranges);CHKERRQ(ierr);
2071   bn = B->rmap->n;
2072   if (bseq->lda == bn) {
2073     sendbuf = bseq->v;
2074   } else {
2075     sendbuf = abt->buf[0];
2076     for (k = 0, i = 0; i < cK; i++) {
2077       for (j = 0; j < bn; j++, k++) {
2078         sendbuf[k] = bseq->v[i * bseq->lda + j];
2079       }
2080     }
2081   }
2082   if (size > 1) {
2083     sendto = (rank + size - 1) % size;
2084     recvfrom = (rank + size + 1) % size;
2085   } else {
2086     sendto = recvfrom = 0;
2087   }
2088   ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr);
2089   ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr);
2090   recvisfrom = rank;
2091   for (i = 0; i < size; i++) {
2092     /* we have finished receiving in sending, bufs can be read/modified */
2093     PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2094     PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
2095 
2096     if (nextrecvisfrom != rank) {
2097       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2098       sendsiz = cK * bn;
2099       recvsiz = cK * nextbn;
2100       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
2101       ierr = MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]);CHKERRQ(ierr);
2102       ierr = MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]);CHKERRQ(ierr);
2103     }
2104 
2105     /* local aseq * sendbuf^T */
2106     ierr = PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn);CHKERRQ(ierr);
2107     carray = &cseq->v[cseq->lda * ranges[recvisfrom]];
2108     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,aseq->v,&aseq->lda,sendbuf,&cn,&_DZero,carray,&cseq->lda));
2109 
2110     if (nextrecvisfrom != rank) {
2111       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
2112       ierr = MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2113     }
2114     bn = nextbn;
2115     recvisfrom = nextrecvisfrom;
2116     sendbuf = recvbuf;
2117   }
2118   PetscFunctionReturn(0);
2119 }
2120 
2121 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
2122 {
2123   Mat_MPIDense   *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2124   Mat_SeqDense   *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data;
2125   Mat_SeqDense   *cseq=(Mat_SeqDense*)(c->A)->data;
2126   Mat_MatTransMultDense *abt = c->abtdense;
2127   PetscErrorCode ierr;
2128   MPI_Comm       comm;
2129   PetscMPIInt    rank,size;
2130   PetscScalar    *sendbuf, *recvbuf;
2131   PetscInt       i,cK=A->cmap->N,k,j,bn;
2132   PetscScalar    _DOne=1.0,_DZero=0.0;
2133   PetscBLASInt   cm, cn, ck;
2134 
2135   PetscFunctionBegin;
2136   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2137   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2138   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2139 
2140   /* copy transpose of B into buf[0] */
2141   bn      = B->rmap->n;
2142   sendbuf = abt->buf[0];
2143   recvbuf = abt->buf[1];
2144   for (k = 0, j = 0; j < bn; j++) {
2145     for (i = 0; i < cK; i++, k++) {
2146       sendbuf[k] = bseq->v[i * bseq->lda + j];
2147     }
2148   }
2149   ierr = MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm);CHKERRQ(ierr);
2150   ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr);
2151   ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr);
2152   ierr = PetscBLASIntCast(c->A->cmap->n,&cn);CHKERRQ(ierr);
2153   PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,aseq->v,&aseq->lda,recvbuf,&ck,&_DZero,cseq->v,&cseq->lda));
2154   PetscFunctionReturn(0);
2155 }
2156 
2157 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2158 {
2159   Mat_MPIDense   *c=(Mat_MPIDense*)C->data;
2160   Mat_MatTransMultDense *abt = c->abtdense;
2161   PetscErrorCode ierr;
2162 
2163   PetscFunctionBegin;
2164   switch (abt->alg) {
2165   case 1:
2166     ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C);CHKERRQ(ierr);
2167     break;
2168   default:
2169     ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C);CHKERRQ(ierr);
2170     break;
2171   }
2172   PetscFunctionReturn(0);
2173 }
2174 
2175 static PetscErrorCode MatMatTransposeMult_MPIDense_MPIDense(Mat A,Mat B, MatReuse scall, PetscReal fill, Mat *C)
2176 {
2177   PetscErrorCode ierr;
2178 
2179   PetscFunctionBegin;
2180   if (scall == MAT_INITIAL_MATRIX) {
2181     ierr = MatMatTransposeMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr);
2182   }
2183   ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr);
2184   PetscFunctionReturn(0);
2185 }
2186 
2187 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(Mat A)
2188 {
2189   PetscErrorCode   ierr;
2190   Mat_MPIDense     *a = (Mat_MPIDense*)A->data;
2191   Mat_MatMultDense *ab = a->abdense;
2192 
2193   PetscFunctionBegin;
2194   ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr);
2195   ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr);
2196   ierr = MatDestroy(&ab->Be);CHKERRQ(ierr);
2197 
2198   ierr = (ab->destroy)(A);CHKERRQ(ierr);
2199   ierr = PetscFree(ab);CHKERRQ(ierr);
2200   PetscFunctionReturn(0);
2201 }
2202 
2203 #if defined(PETSC_HAVE_ELEMENTAL)
2204 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2205 {
2206   PetscErrorCode   ierr;
2207   Mat_MPIDense     *c=(Mat_MPIDense*)C->data;
2208   Mat_MatMultDense *ab=c->abdense;
2209 
2210   PetscFunctionBegin;
2211   ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr);
2212   ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr);
2213   ierr = MatMatMultNumeric(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr);
2214   ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
2215   PetscFunctionReturn(0);
2216 }
2217 
2218 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
2219 {
2220   PetscErrorCode   ierr;
2221   Mat              Ae,Be,Ce;
2222   Mat_MPIDense     *c;
2223   Mat_MatMultDense *ab;
2224 
2225   PetscFunctionBegin;
2226   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
2227     SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2228   }
2229 
2230   /* convert A and B to Elemental matrices Ae and Be */
2231   ierr = MatConvert(A,MATELEMENTAL,MAT_INITIAL_MATRIX, &Ae);CHKERRQ(ierr);
2232   ierr = MatConvert(B,MATELEMENTAL,MAT_INITIAL_MATRIX, &Be);CHKERRQ(ierr);
2233 
2234   /* Ce = Ae*Be */
2235   ierr = MatMatMultSymbolic(Ae,Be,fill,&Ce);CHKERRQ(ierr);
2236   ierr = MatMatMultNumeric(Ae,Be,Ce);CHKERRQ(ierr);
2237 
2238   /* convert Ce to C */
2239   ierr = MatConvert(Ce,MATMPIDENSE,MAT_INITIAL_MATRIX,C);CHKERRQ(ierr);
2240 
2241   /* create data structure for reuse Cdense */
2242   ierr = PetscNew(&ab);CHKERRQ(ierr);
2243   c                  = (Mat_MPIDense*)(*C)->data;
2244   c->abdense         = ab;
2245 
2246   ab->Ae             = Ae;
2247   ab->Be             = Be;
2248   ab->Ce             = Ce;
2249   ab->destroy        = (*C)->ops->destroy;
2250   (*C)->ops->destroy        = MatDestroy_MatMatMult_MPIDense_MPIDense;
2251   (*C)->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
2252   PetscFunctionReturn(0);
2253 }
2254 
2255 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
2256 {
2257   PetscErrorCode ierr;
2258 
2259   PetscFunctionBegin;
2260   if (scall == MAT_INITIAL_MATRIX) { /* simbolic product includes numeric product */
2261     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
2262     ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr);
2263     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
2264   } else {
2265     ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
2266     ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr);
2267     ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
2268   }
2269   PetscFunctionReturn(0);
2270 }
2271 #endif
2272 
2273