xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 5b6bfdb9644f185dbf5e5a09b808ec241507e1e7)
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4 */
5 
6 
7 #include <../src/mat/impls/dense/mpi/mpidense.h>    /*I   "petscmat.h"  I*/
8 #include <../src/mat/impls/aij/mpi/mpiaij.h>
9 #include <petscblaslapack.h>
10 
11 /*@
12 
13       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
14               matrix that represents the operator. For sequential matrices it returns itself.
15 
16     Input Parameter:
17 .      A - the Seq or MPI dense matrix
18 
19     Output Parameter:
20 .      B - the inner matrix
21 
22     Level: intermediate
23 
24 @*/
25 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
26 {
27   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
28   PetscErrorCode ierr;
29   PetscBool      flg;
30 
31   PetscFunctionBegin;
32   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
33   if (flg) *B = mat->A;
34   else *B = A;
35   PetscFunctionReturn(0);
36 }
37 
38 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
39 {
40   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
41   PetscErrorCode ierr;
42   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
43 
44   PetscFunctionBegin;
45   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
46   lrow = row - rstart;
47   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
48   PetscFunctionReturn(0);
49 }
50 
51 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
52 {
53   PetscErrorCode ierr;
54 
55   PetscFunctionBegin;
56   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
57   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
58   PetscFunctionReturn(0);
59 }
60 
61 PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
62 {
63   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
64   PetscErrorCode ierr;
65   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
66   PetscScalar    *array;
67   MPI_Comm       comm;
68   Mat            B;
69 
70   PetscFunctionBegin;
71   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
72 
73   ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr);
74   if (!B) {
75     ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
76     ierr = MatCreate(comm,&B);CHKERRQ(ierr);
77     ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr);
78     ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
79     ierr = MatDenseGetArray(mdn->A,&array);CHKERRQ(ierr);
80     ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr);
81     ierr = MatDenseRestoreArray(mdn->A,&array);CHKERRQ(ierr);
82     ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
83     ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
84     ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr);
85     *a   = B;
86     ierr = MatDestroy(&B);CHKERRQ(ierr);
87   } else *a = B;
88   PetscFunctionReturn(0);
89 }
90 
91 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
92 {
93   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
94   PetscErrorCode ierr;
95   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
96   PetscBool      roworiented = A->roworiented;
97 
98   PetscFunctionBegin;
99   for (i=0; i<m; i++) {
100     if (idxm[i] < 0) continue;
101     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
102     if (idxm[i] >= rstart && idxm[i] < rend) {
103       row = idxm[i] - rstart;
104       if (roworiented) {
105         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
106       } else {
107         for (j=0; j<n; j++) {
108           if (idxn[j] < 0) continue;
109           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
110           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
111         }
112       }
113     } else if (!A->donotstash) {
114       mat->assembled = PETSC_FALSE;
115       if (roworiented) {
116         ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
117       } else {
118         ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
119       }
120     }
121   }
122   PetscFunctionReturn(0);
123 }
124 
125 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
126 {
127   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
128   PetscErrorCode ierr;
129   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
130 
131   PetscFunctionBegin;
132   for (i=0; i<m; i++) {
133     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
134     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
135     if (idxm[i] >= rstart && idxm[i] < rend) {
136       row = idxm[i] - rstart;
137       for (j=0; j<n; j++) {
138         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
139         if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
140         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
141       }
142     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
143   }
144   PetscFunctionReturn(0);
145 }
146 
147 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[])
148 {
149   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
150   PetscErrorCode ierr;
151 
152   PetscFunctionBegin;
153   ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr);
154   PetscFunctionReturn(0);
155 }
156 
157 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar *array[])
158 {
159   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
160   PetscErrorCode ierr;
161 
162   PetscFunctionBegin;
163   ierr = MatDenseGetArrayRead(a->A,array);CHKERRQ(ierr);
164   PetscFunctionReturn(0);
165 }
166 
167 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar array[])
168 {
169   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
170   PetscErrorCode ierr;
171 
172   PetscFunctionBegin;
173   ierr = MatDensePlaceArray(a->A,array);CHKERRQ(ierr);
174   PetscFunctionReturn(0);
175 }
176 
177 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
178 {
179   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
180   PetscErrorCode ierr;
181 
182   PetscFunctionBegin;
183   ierr = MatDenseResetArray(a->A);CHKERRQ(ierr);
184   PetscFunctionReturn(0);
185 }
186 
187 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
188 {
189   Mat_MPIDense   *mat  = (Mat_MPIDense*)A->data,*newmatd;
190   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
191   PetscErrorCode ierr;
192   PetscInt       i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
193   const PetscInt *irow,*icol;
194   PetscScalar    *av,*bv,*v = lmat->v;
195   Mat            newmat;
196   IS             iscol_local;
197 
198   PetscFunctionBegin;
199   ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
200   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
201   ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr);
202   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
203   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
204   ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */
205 
206   /* No parallel redistribution currently supported! Should really check each index set
207      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
208      original matrix! */
209 
210   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
211   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
212 
213   /* Check submatrix call */
214   if (scall == MAT_REUSE_MATRIX) {
215     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
216     /* Really need to test rows and column sizes! */
217     newmat = *B;
218   } else {
219     /* Create and fill new matrix */
220     ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr);
221     ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
222     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
223     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
224   }
225 
226   /* Now extract the data pointers and do the copy, column at a time */
227   newmatd = (Mat_MPIDense*)newmat->data;
228   bv      = ((Mat_SeqDense*)newmatd->A->data)->v;
229 
230   for (i=0; i<Ncols; i++) {
231     av = v + ((Mat_SeqDense*)mat->A->data)->lda*icol[i];
232     for (j=0; j<nrows; j++) {
233       *bv++ = av[irow[j] - rstart];
234     }
235   }
236 
237   /* Assemble the matrices so that the correct flags are set */
238   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
239   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
240 
241   /* Free work space */
242   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
243   ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr);
244   ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
245   *B   = newmat;
246   PetscFunctionReturn(0);
247 }
248 
249 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[])
250 {
251   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
252   PetscErrorCode ierr;
253 
254   PetscFunctionBegin;
255   ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr);
256   PetscFunctionReturn(0);
257 }
258 
259 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar *array[])
260 {
261   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
262   PetscErrorCode ierr;
263 
264   PetscFunctionBegin;
265   ierr = MatDenseRestoreArrayRead(a->A,array);CHKERRQ(ierr);
266   PetscFunctionReturn(0);
267 }
268 
269 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
270 {
271   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
272   MPI_Comm       comm;
273   PetscErrorCode ierr;
274   PetscInt       nstash,reallocs;
275   InsertMode     addv;
276 
277   PetscFunctionBegin;
278   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
279   /* make sure all processors are either in INSERTMODE or ADDMODE */
280   ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,comm);CHKERRQ(ierr);
281   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
282   mat->insertmode = addv; /* in case this processor had no cache */
283 
284   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
285   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
286   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
287   PetscFunctionReturn(0);
288 }
289 
290 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
291 {
292   Mat_MPIDense   *mdn=(Mat_MPIDense*)mat->data;
293   PetscErrorCode ierr;
294   PetscInt       i,*row,*col,flg,j,rstart,ncols;
295   PetscMPIInt    n;
296   PetscScalar    *val;
297 
298   PetscFunctionBegin;
299   /*  wait on receives */
300   while (1) {
301     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
302     if (!flg) break;
303 
304     for (i=0; i<n;) {
305       /* Now identify the consecutive vals belonging to the same row */
306       for (j=i,rstart=row[j]; j<n; j++) {
307         if (row[j] != rstart) break;
308       }
309       if (j < n) ncols = j-i;
310       else       ncols = n-i;
311       /* Now assemble all these values with a single function call */
312       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
313       i    = j;
314     }
315   }
316   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
317 
318   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
319   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
320 
321   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
322     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
323   }
324   PetscFunctionReturn(0);
325 }
326 
327 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
328 {
329   PetscErrorCode ierr;
330   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
331 
332   PetscFunctionBegin;
333   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
334   PetscFunctionReturn(0);
335 }
336 
337 /* the code does not do the diagonal entries correctly unless the
338    matrix is square and the column and row owerships are identical.
339    This is a BUG. The only way to fix it seems to be to access
340    mdn->A and mdn->B directly and not through the MatZeroRows()
341    routine.
342 */
343 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
344 {
345   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
346   PetscErrorCode    ierr;
347   PetscInt          i,*owners = A->rmap->range;
348   PetscInt          *sizes,j,idx,nsends;
349   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
350   PetscInt          *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source;
351   PetscInt          *lens,*lrows,*values;
352   PetscMPIInt       n,imdex,rank = l->rank,size = l->size;
353   MPI_Comm          comm;
354   MPI_Request       *send_waits,*recv_waits;
355   MPI_Status        recv_status,*send_status;
356   PetscBool         found;
357   const PetscScalar *xx;
358   PetscScalar       *bb;
359 
360   PetscFunctionBegin;
361   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
362   if (A->rmap->N != A->cmap->N) SETERRQ(comm,PETSC_ERR_SUP,"Only handles square matrices");
363   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership");
364   /*  first count number of contributors to each processor */
365   ierr = PetscCalloc1(2*size,&sizes);CHKERRQ(ierr);
366   ierr = PetscMalloc1(N+1,&owner);CHKERRQ(ierr);  /* see note*/
367   for (i=0; i<N; i++) {
368     idx   = rows[i];
369     found = PETSC_FALSE;
370     for (j=0; j<size; j++) {
371       if (idx >= owners[j] && idx < owners[j+1]) {
372         sizes[2*j]++; sizes[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
373       }
374     }
375     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
376   }
377   nsends = 0;
378   for (i=0; i<size; i++) nsends += sizes[2*i+1];
379 
380   /* inform other processors of number of messages and max length*/
381   ierr = PetscMaxSum(comm,sizes,&nmax,&nrecvs);CHKERRQ(ierr);
382 
383   /* post receives:   */
384   ierr = PetscMalloc1((nrecvs+1)*(nmax+1),&rvalues);CHKERRQ(ierr);
385   ierr = PetscMalloc1(nrecvs+1,&recv_waits);CHKERRQ(ierr);
386   for (i=0; i<nrecvs; i++) {
387     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
388   }
389 
390   /* do sends:
391       1) starts[i] gives the starting index in svalues for stuff going to
392          the ith processor
393   */
394   ierr = PetscMalloc1(N+1,&svalues);CHKERRQ(ierr);
395   ierr = PetscMalloc1(nsends+1,&send_waits);CHKERRQ(ierr);
396   ierr = PetscMalloc1(size+1,&starts);CHKERRQ(ierr);
397 
398   starts[0] = 0;
399   for (i=1; i<size; i++) starts[i] = starts[i-1] + sizes[2*i-2];
400   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
401 
402   starts[0] = 0;
403   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + sizes[2*i-2];
404   count = 0;
405   for (i=0; i<size; i++) {
406     if (sizes[2*i+1]) {
407       ierr = MPI_Isend(svalues+starts[i],sizes[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
408     }
409   }
410   ierr = PetscFree(starts);CHKERRQ(ierr);
411 
412   base = owners[rank];
413 
414   /*  wait on receives */
415   ierr  = PetscMalloc2(nrecvs,&lens,nrecvs,&source);CHKERRQ(ierr);
416   count = nrecvs;
417   slen  = 0;
418   while (count) {
419     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
420     /* unpack receives into our local space */
421     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
422 
423     source[imdex] = recv_status.MPI_SOURCE;
424     lens[imdex]   = n;
425     slen += n;
426     count--;
427   }
428   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
429 
430   /* move the data into the send scatter */
431   ierr  = PetscMalloc1(slen+1,&lrows);CHKERRQ(ierr);
432   count = 0;
433   for (i=0; i<nrecvs; i++) {
434     values = rvalues + i*nmax;
435     for (j=0; j<lens[i]; j++) {
436       lrows[count++] = values[j] - base;
437     }
438   }
439   ierr = PetscFree(rvalues);CHKERRQ(ierr);
440   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
441   ierr = PetscFree(owner);CHKERRQ(ierr);
442   ierr = PetscFree(sizes);CHKERRQ(ierr);
443 
444   /* fix right hand side if needed */
445   if (x && b) {
446     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
447     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
448     for (i=0; i<slen; i++) {
449       bb[lrows[i]] = diag*xx[lrows[i]];
450     }
451     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
452     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
453   }
454 
455   /* actually zap the local rows */
456   ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
457   if (diag != 0.0) {
458     Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data;
459     PetscInt     m   = ll->lda, i;
460 
461     for (i=0; i<slen; i++) {
462       ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag;
463     }
464   }
465   ierr = PetscFree(lrows);CHKERRQ(ierr);
466 
467   /* wait on sends */
468   if (nsends) {
469     ierr = PetscMalloc1(nsends,&send_status);CHKERRQ(ierr);
470     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
471     ierr = PetscFree(send_status);CHKERRQ(ierr);
472   }
473   ierr = PetscFree(send_waits);CHKERRQ(ierr);
474   ierr = PetscFree(svalues);CHKERRQ(ierr);
475   PetscFunctionReturn(0);
476 }
477 
478 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec);
479 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec);
480 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec);
481 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec);
482 
483 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
484 {
485   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
486   PetscErrorCode ierr;
487 
488   PetscFunctionBegin;
489   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
490   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
491   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
492   PetscFunctionReturn(0);
493 }
494 
495 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
496 {
497   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
498   PetscErrorCode ierr;
499 
500   PetscFunctionBegin;
501   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
502   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
503   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
504   PetscFunctionReturn(0);
505 }
506 
507 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
508 {
509   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
510   PetscErrorCode ierr;
511   PetscScalar    zero = 0.0;
512 
513   PetscFunctionBegin;
514   ierr = VecSet(yy,zero);CHKERRQ(ierr);
515   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
516   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
517   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
518   PetscFunctionReturn(0);
519 }
520 
521 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
522 {
523   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
524   PetscErrorCode ierr;
525 
526   PetscFunctionBegin;
527   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
528   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
529   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
530   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
531   PetscFunctionReturn(0);
532 }
533 
534 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
535 {
536   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
537   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
538   PetscErrorCode ierr;
539   PetscInt       len,i,n,m = A->rmap->n,radd;
540   PetscScalar    *x,zero = 0.0;
541 
542   PetscFunctionBegin;
543   ierr = VecSet(v,zero);CHKERRQ(ierr);
544   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
545   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
546   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
547   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
548   radd = A->rmap->rstart*m;
549   for (i=0; i<len; i++) {
550     x[i] = aloc->v[radd + i*m + i];
551   }
552   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
553   PetscFunctionReturn(0);
554 }
555 
556 PetscErrorCode MatDestroy_MPIDense(Mat mat)
557 {
558   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
559   PetscErrorCode ierr;
560 
561   PetscFunctionBegin;
562 #if defined(PETSC_USE_LOG)
563   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
564 #endif
565   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
566   ierr = MatDestroy(&mdn->A);CHKERRQ(ierr);
567   ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr);
568   ierr = VecScatterDestroy(&mdn->Mvctx);CHKERRQ(ierr);
569 
570   ierr = PetscFree(mat->data);CHKERRQ(ierr);
571   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
572 
573   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr);
574   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr);
575   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL);CHKERRQ(ierr);
576   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL);CHKERRQ(ierr);
577   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);CHKERRQ(ierr);
578   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);CHKERRQ(ierr);
579 #if defined(PETSC_HAVE_ELEMENTAL)
580   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr);
581 #endif
582   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr);
583   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
584   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
585   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
586   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
587   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
588   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
589   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL);CHKERRQ(ierr);
590   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL);CHKERRQ(ierr);
591   PetscFunctionReturn(0);
592 }
593 
594 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
595 {
596   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
597   PetscErrorCode    ierr;
598   PetscViewerFormat format;
599   int               fd;
600   PetscInt          header[4],mmax,N = mat->cmap->N,i,j,m,k;
601   PetscMPIInt       rank,tag  = ((PetscObject)viewer)->tag,size;
602   PetscScalar       *work,*v,*vv;
603   Mat_SeqDense      *a = (Mat_SeqDense*)mdn->A->data;
604 
605   PetscFunctionBegin;
606   if (mdn->size == 1) {
607     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
608   } else {
609     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
610     ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
611     ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
612 
613     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
614     if (format == PETSC_VIEWER_NATIVE) {
615 
616       if (!rank) {
617         /* store the matrix as a dense matrix */
618         header[0] = MAT_FILE_CLASSID;
619         header[1] = mat->rmap->N;
620         header[2] = N;
621         header[3] = MATRIX_BINARY_FORMAT_DENSE;
622         ierr      = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
623 
624         /* get largest work array needed for transposing array */
625         mmax = mat->rmap->n;
626         for (i=1; i<size; i++) {
627           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
628         }
629         ierr = PetscMalloc1(mmax*N,&work);CHKERRQ(ierr);
630 
631         /* write out local array, by rows */
632         m = mat->rmap->n;
633         v = a->v;
634         for (j=0; j<N; j++) {
635           for (i=0; i<m; i++) {
636             work[j + i*N] = *v++;
637           }
638         }
639         ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
640         /* get largest work array to receive messages from other processes, excludes process zero */
641         mmax = 0;
642         for (i=1; i<size; i++) {
643           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
644         }
645         ierr = PetscMalloc1(mmax*N,&vv);CHKERRQ(ierr);
646         for (k = 1; k < size; k++) {
647           v    = vv;
648           m    = mat->rmap->range[k+1] - mat->rmap->range[k];
649           ierr = MPIULong_Recv(v,m*N,MPIU_SCALAR,k,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
650 
651           for (j = 0; j < N; j++) {
652             for (i = 0; i < m; i++) {
653               work[j + i*N] = *v++;
654             }
655           }
656           ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
657         }
658         ierr = PetscFree(work);CHKERRQ(ierr);
659         ierr = PetscFree(vv);CHKERRQ(ierr);
660       } else {
661         ierr = MPIULong_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
662       }
663     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerPushFormat(viewer,PETSC_VIEWER_NATIVE)");
664   }
665   PetscFunctionReturn(0);
666 }
667 
668 extern PetscErrorCode MatView_SeqDense(Mat,PetscViewer);
669 #include <petscdraw.h>
670 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
671 {
672   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
673   PetscErrorCode    ierr;
674   PetscMPIInt       rank = mdn->rank;
675   PetscViewerType   vtype;
676   PetscBool         iascii,isdraw;
677   PetscViewer       sviewer;
678   PetscViewerFormat format;
679 
680   PetscFunctionBegin;
681   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
682   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
683   if (iascii) {
684     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
685     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
686     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
687       MatInfo info;
688       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
689       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
690       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
691       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
692       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
693       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
694       PetscFunctionReturn(0);
695     } else if (format == PETSC_VIEWER_ASCII_INFO) {
696       PetscFunctionReturn(0);
697     }
698   } else if (isdraw) {
699     PetscDraw draw;
700     PetscBool isnull;
701 
702     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
703     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
704     if (isnull) PetscFunctionReturn(0);
705   }
706 
707   {
708     /* assemble the entire matrix onto first processor. */
709     Mat         A;
710     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
711     PetscInt    *cols;
712     PetscScalar *vals;
713 
714     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
715     if (!rank) {
716       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
717     } else {
718       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
719     }
720     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
721     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
722     ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr);
723     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
724 
725     /* Copy the matrix ... This isn't the most efficient means,
726        but it's quick for now */
727     A->insertmode = INSERT_VALUES;
728 
729     row = mat->rmap->rstart;
730     m   = mdn->A->rmap->n;
731     for (i=0; i<m; i++) {
732       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
733       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
734       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
735       row++;
736     }
737 
738     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
739     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
740     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
741     if (!rank) {
742       ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
743       ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
744     }
745     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
746     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
747     ierr = MatDestroy(&A);CHKERRQ(ierr);
748   }
749   PetscFunctionReturn(0);
750 }
751 
752 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
753 {
754   PetscErrorCode ierr;
755   PetscBool      iascii,isbinary,isdraw,issocket;
756 
757   PetscFunctionBegin;
758   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
759   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
760   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
761   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
762 
763   if (iascii || issocket || isdraw) {
764     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
765   } else if (isbinary) {
766     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
767   }
768   PetscFunctionReturn(0);
769 }
770 
771 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
772 {
773   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
774   Mat            mdn  = mat->A;
775   PetscErrorCode ierr;
776   PetscReal      isend[5],irecv[5];
777 
778   PetscFunctionBegin;
779   info->block_size = 1.0;
780 
781   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
782 
783   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
784   isend[3] = info->memory;  isend[4] = info->mallocs;
785   if (flag == MAT_LOCAL) {
786     info->nz_used      = isend[0];
787     info->nz_allocated = isend[1];
788     info->nz_unneeded  = isend[2];
789     info->memory       = isend[3];
790     info->mallocs      = isend[4];
791   } else if (flag == MAT_GLOBAL_MAX) {
792     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
793 
794     info->nz_used      = irecv[0];
795     info->nz_allocated = irecv[1];
796     info->nz_unneeded  = irecv[2];
797     info->memory       = irecv[3];
798     info->mallocs      = irecv[4];
799   } else if (flag == MAT_GLOBAL_SUM) {
800     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
801 
802     info->nz_used      = irecv[0];
803     info->nz_allocated = irecv[1];
804     info->nz_unneeded  = irecv[2];
805     info->memory       = irecv[3];
806     info->mallocs      = irecv[4];
807   }
808   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
809   info->fill_ratio_needed = 0;
810   info->factor_mallocs    = 0;
811   PetscFunctionReturn(0);
812 }
813 
814 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
815 {
816   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
817   PetscErrorCode ierr;
818 
819   PetscFunctionBegin;
820   switch (op) {
821   case MAT_NEW_NONZERO_LOCATIONS:
822   case MAT_NEW_NONZERO_LOCATION_ERR:
823   case MAT_NEW_NONZERO_ALLOCATION_ERR:
824     MatCheckPreallocated(A,1);
825     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
826     break;
827   case MAT_ROW_ORIENTED:
828     MatCheckPreallocated(A,1);
829     a->roworiented = flg;
830     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
831     break;
832   case MAT_NEW_DIAGONALS:
833   case MAT_KEEP_NONZERO_PATTERN:
834   case MAT_USE_HASH_TABLE:
835     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
836     break;
837   case MAT_IGNORE_OFF_PROC_ENTRIES:
838     a->donotstash = flg;
839     break;
840   case MAT_SYMMETRIC:
841   case MAT_STRUCTURALLY_SYMMETRIC:
842   case MAT_HERMITIAN:
843   case MAT_SYMMETRY_ETERNAL:
844   case MAT_IGNORE_LOWER_TRIANGULAR:
845     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
846     break;
847   default:
848     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
849   }
850   PetscFunctionReturn(0);
851 }
852 
853 
854 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
855 {
856   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
857   Mat_SeqDense      *mat = (Mat_SeqDense*)mdn->A->data;
858   const PetscScalar *l,*r;
859   PetscScalar       x,*v;
860   PetscErrorCode    ierr;
861   PetscInt          i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n;
862 
863   PetscFunctionBegin;
864   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
865   if (ll) {
866     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
867     if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
868     ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr);
869     for (i=0; i<m; i++) {
870       x = l[i];
871       v = mat->v + i;
872       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
873     }
874     ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr);
875     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
876   }
877   if (rr) {
878     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
879     if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
880     ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
881     ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
882     ierr = VecGetArrayRead(mdn->lvec,&r);CHKERRQ(ierr);
883     for (i=0; i<n; i++) {
884       x = r[i];
885       v = mat->v + i*m;
886       for (j=0; j<m; j++) (*v++) *= x;
887     }
888     ierr = VecRestoreArrayRead(mdn->lvec,&r);CHKERRQ(ierr);
889     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
890   }
891   PetscFunctionReturn(0);
892 }
893 
894 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
895 {
896   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
897   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
898   PetscErrorCode ierr;
899   PetscInt       i,j;
900   PetscReal      sum = 0.0;
901   PetscScalar    *v  = mat->v;
902 
903   PetscFunctionBegin;
904   if (mdn->size == 1) {
905     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
906   } else {
907     if (type == NORM_FROBENIUS) {
908       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
909         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
910       }
911       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
912       *nrm = PetscSqrtReal(*nrm);
913       ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr);
914     } else if (type == NORM_1) {
915       PetscReal *tmp,*tmp2;
916       ierr = PetscMalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr);
917       ierr = PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
918       ierr = PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
919       *nrm = 0.0;
920       v    = mat->v;
921       for (j=0; j<mdn->A->cmap->n; j++) {
922         for (i=0; i<mdn->A->rmap->n; i++) {
923           tmp[j] += PetscAbsScalar(*v);  v++;
924         }
925       }
926       ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
927       for (j=0; j<A->cmap->N; j++) {
928         if (tmp2[j] > *nrm) *nrm = tmp2[j];
929       }
930       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
931       ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr);
932     } else if (type == NORM_INFINITY) { /* max row norm */
933       PetscReal ntemp;
934       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
935       ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
936     } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
937   }
938   PetscFunctionReturn(0);
939 }
940 
941 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
942 {
943   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
944   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
945   Mat            B;
946   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
947   PetscErrorCode ierr;
948   PetscInt       j,i;
949   PetscScalar    *v;
950 
951   PetscFunctionBegin;
952   if (reuse == MAT_INPLACE_MATRIX  && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports square matrix only in-place");
953   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
954     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
955     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
956     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
957     ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr);
958   } else {
959     B = *matout;
960   }
961 
962   m    = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v;
963   ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr);
964   for (i=0; i<m; i++) rwork[i] = rstart + i;
965   for (j=0; j<n; j++) {
966     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
967     v   += m;
968   }
969   ierr = PetscFree(rwork);CHKERRQ(ierr);
970   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
971   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
972   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
973     *matout = B;
974   } else {
975     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
976   }
977   PetscFunctionReturn(0);
978 }
979 
980 
981 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
982 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
983 
984 PetscErrorCode MatSetUp_MPIDense(Mat A)
985 {
986   PetscErrorCode ierr;
987 
988   PetscFunctionBegin;
989   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
990   PetscFunctionReturn(0);
991 }
992 
993 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
994 {
995   PetscErrorCode ierr;
996   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
997 
998   PetscFunctionBegin;
999   ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr);
1000   ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1001   PetscFunctionReturn(0);
1002 }
1003 
1004 PetscErrorCode  MatConjugate_MPIDense(Mat mat)
1005 {
1006   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
1007   PetscErrorCode ierr;
1008 
1009   PetscFunctionBegin;
1010   ierr = MatConjugate(a->A);CHKERRQ(ierr);
1011   PetscFunctionReturn(0);
1012 }
1013 
1014 PetscErrorCode MatRealPart_MPIDense(Mat A)
1015 {
1016   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1017   PetscErrorCode ierr;
1018 
1019   PetscFunctionBegin;
1020   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1021   PetscFunctionReturn(0);
1022 }
1023 
1024 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1025 {
1026   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1027   PetscErrorCode ierr;
1028 
1029   PetscFunctionBegin;
1030   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1031   PetscFunctionReturn(0);
1032 }
1033 
1034 extern PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*);
1035 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms)
1036 {
1037   PetscErrorCode ierr;
1038   PetscInt       i,n;
1039   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
1040   PetscReal      *work;
1041 
1042   PetscFunctionBegin;
1043   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
1044   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
1045   ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr);
1046   if (type == NORM_2) {
1047     for (i=0; i<n; i++) work[i] *= work[i];
1048   }
1049   if (type == NORM_INFINITY) {
1050     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
1051   } else {
1052     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
1053   }
1054   ierr = PetscFree(work);CHKERRQ(ierr);
1055   if (type == NORM_2) {
1056     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
1057   }
1058   PetscFunctionReturn(0);
1059 }
1060 
1061 static PetscErrorCode  MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1062 {
1063   Mat_MPIDense   *d = (Mat_MPIDense*)x->data;
1064   PetscErrorCode ierr;
1065   PetscScalar    *a;
1066   PetscInt       m,n,i;
1067 
1068   PetscFunctionBegin;
1069   ierr = MatGetSize(d->A,&m,&n);CHKERRQ(ierr);
1070   ierr = MatDenseGetArray(d->A,&a);CHKERRQ(ierr);
1071   for (i=0; i<m*n; i++) {
1072     ierr = PetscRandomGetValue(rctx,a+i);CHKERRQ(ierr);
1073   }
1074   ierr = MatDenseRestoreArray(d->A,&a);CHKERRQ(ierr);
1075   PetscFunctionReturn(0);
1076 }
1077 
1078 extern PetscErrorCode MatMatMultNumeric_MPIDense(Mat A,Mat,Mat);
1079 
1080 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool  *missing,PetscInt *d)
1081 {
1082   PetscFunctionBegin;
1083   *missing = PETSC_FALSE;
1084   PetscFunctionReturn(0);
1085 }
1086 
1087 /* -------------------------------------------------------------------*/
1088 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1089                                         MatGetRow_MPIDense,
1090                                         MatRestoreRow_MPIDense,
1091                                         MatMult_MPIDense,
1092                                 /*  4*/ MatMultAdd_MPIDense,
1093                                         MatMultTranspose_MPIDense,
1094                                         MatMultTransposeAdd_MPIDense,
1095                                         0,
1096                                         0,
1097                                         0,
1098                                 /* 10*/ 0,
1099                                         0,
1100                                         0,
1101                                         0,
1102                                         MatTranspose_MPIDense,
1103                                 /* 15*/ MatGetInfo_MPIDense,
1104                                         MatEqual_MPIDense,
1105                                         MatGetDiagonal_MPIDense,
1106                                         MatDiagonalScale_MPIDense,
1107                                         MatNorm_MPIDense,
1108                                 /* 20*/ MatAssemblyBegin_MPIDense,
1109                                         MatAssemblyEnd_MPIDense,
1110                                         MatSetOption_MPIDense,
1111                                         MatZeroEntries_MPIDense,
1112                                 /* 24*/ MatZeroRows_MPIDense,
1113                                         0,
1114                                         0,
1115                                         0,
1116                                         0,
1117                                 /* 29*/ MatSetUp_MPIDense,
1118                                         0,
1119                                         0,
1120                                         MatGetDiagonalBlock_MPIDense,
1121                                         0,
1122                                 /* 34*/ MatDuplicate_MPIDense,
1123                                         0,
1124                                         0,
1125                                         0,
1126                                         0,
1127                                 /* 39*/ MatAXPY_MPIDense,
1128                                         MatCreateSubMatrices_MPIDense,
1129                                         0,
1130                                         MatGetValues_MPIDense,
1131                                         0,
1132                                 /* 44*/ 0,
1133                                         MatScale_MPIDense,
1134                                         MatShift_Basic,
1135                                         0,
1136                                         0,
1137                                 /* 49*/ MatSetRandom_MPIDense,
1138                                         0,
1139                                         0,
1140                                         0,
1141                                         0,
1142                                 /* 54*/ 0,
1143                                         0,
1144                                         0,
1145                                         0,
1146                                         0,
1147                                 /* 59*/ MatCreateSubMatrix_MPIDense,
1148                                         MatDestroy_MPIDense,
1149                                         MatView_MPIDense,
1150                                         0,
1151                                         0,
1152                                 /* 64*/ 0,
1153                                         0,
1154                                         0,
1155                                         0,
1156                                         0,
1157                                 /* 69*/ 0,
1158                                         0,
1159                                         0,
1160                                         0,
1161                                         0,
1162                                 /* 74*/ 0,
1163                                         0,
1164                                         0,
1165                                         0,
1166                                         0,
1167                                 /* 79*/ 0,
1168                                         0,
1169                                         0,
1170                                         0,
1171                                 /* 83*/ MatLoad_MPIDense,
1172                                         0,
1173                                         0,
1174                                         0,
1175                                         0,
1176                                         0,
1177 #if defined(PETSC_HAVE_ELEMENTAL)
1178                                 /* 89*/ MatMatMult_MPIDense_MPIDense,
1179                                         MatMatMultSymbolic_MPIDense_MPIDense,
1180 #else
1181                                 /* 89*/ 0,
1182                                         0,
1183 #endif
1184                                         MatMatMultNumeric_MPIDense,
1185                                         0,
1186                                         0,
1187                                 /* 94*/ 0,
1188                                         0,
1189                                         0,
1190                                         0,
1191                                         0,
1192                                 /* 99*/ 0,
1193                                         0,
1194                                         0,
1195                                         MatConjugate_MPIDense,
1196                                         0,
1197                                 /*104*/ 0,
1198                                         MatRealPart_MPIDense,
1199                                         MatImaginaryPart_MPIDense,
1200                                         0,
1201                                         0,
1202                                 /*109*/ 0,
1203                                         0,
1204                                         0,
1205                                         0,
1206                                         MatMissingDiagonal_MPIDense,
1207                                 /*114*/ 0,
1208                                         0,
1209                                         0,
1210                                         0,
1211                                         0,
1212                                 /*119*/ 0,
1213                                         0,
1214                                         0,
1215                                         0,
1216                                         0,
1217                                 /*124*/ 0,
1218                                         MatGetColumnNorms_MPIDense,
1219                                         0,
1220                                         0,
1221                                         0,
1222                                 /*129*/ 0,
1223                                         MatTransposeMatMult_MPIDense_MPIDense,
1224                                         MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1225                                         MatTransposeMatMultNumeric_MPIDense_MPIDense,
1226                                         0,
1227                                 /*134*/ 0,
1228                                         0,
1229                                         0,
1230                                         0,
1231                                         0,
1232                                 /*139*/ 0,
1233                                         0,
1234                                         0,
1235                                         0,
1236                                         0,
1237                                 /*144*/ MatCreateMPIMatConcatenateSeqMat_MPIDense
1238 };
1239 
1240 PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1241 {
1242   Mat_MPIDense   *a;
1243   PetscErrorCode ierr;
1244 
1245   PetscFunctionBegin;
1246   mat->preallocated = PETSC_TRUE;
1247   /* Note:  For now, when data is specified above, this assumes the user correctly
1248    allocates the local dense storage space.  We should add error checking. */
1249 
1250   a       = (Mat_MPIDense*)mat->data;
1251   ierr    = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
1252   ierr    = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
1253   a->nvec = mat->cmap->n;
1254 
1255   ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1256   ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr);
1257   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
1258   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1259   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1260   PetscFunctionReturn(0);
1261 }
1262 
1263 #if defined(PETSC_HAVE_ELEMENTAL)
1264 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1265 {
1266   Mat            mat_elemental;
1267   PetscErrorCode ierr;
1268   PetscScalar    *v;
1269   PetscInt       m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols;
1270 
1271   PetscFunctionBegin;
1272   if (reuse == MAT_REUSE_MATRIX) {
1273     mat_elemental = *newmat;
1274     ierr = MatZeroEntries(*newmat);CHKERRQ(ierr);
1275   } else {
1276     ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr);
1277     ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1278     ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr);
1279     ierr = MatSetUp(mat_elemental);CHKERRQ(ierr);
1280     ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
1281   }
1282 
1283   ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr);
1284   for (i=0; i<N; i++) cols[i] = i;
1285   for (i=0; i<m; i++) rows[i] = rstart + i;
1286 
1287   /* PETSc-Elemental interaface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1288   ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr);
1289   ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr);
1290   ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1291   ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1292   ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr);
1293   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1294 
1295   if (reuse == MAT_INPLACE_MATRIX) {
1296     ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr);
1297   } else {
1298     *newmat = mat_elemental;
1299   }
1300   PetscFunctionReturn(0);
1301 }
1302 #endif
1303 
1304 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals)
1305 {
1306   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1307   PetscErrorCode ierr;
1308 
1309   PetscFunctionBegin;
1310   ierr = MatDenseGetColumn(mat->A,col,vals);CHKERRQ(ierr);
1311   PetscFunctionReturn(0);
1312 }
1313 
1314 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals)
1315 {
1316   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1317   PetscErrorCode ierr;
1318 
1319   PetscFunctionBegin;
1320   ierr = MatDenseRestoreColumn(mat->A,vals);CHKERRQ(ierr);
1321   PetscFunctionReturn(0);
1322 }
1323 
1324 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
1325 {
1326   PetscErrorCode ierr;
1327   Mat_MPIDense   *mat;
1328   PetscInt       m,nloc,N;
1329 
1330   PetscFunctionBegin;
1331   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
1332   ierr = MatGetLocalSize(inmat,NULL,&nloc);CHKERRQ(ierr);
1333   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1334     PetscInt sum;
1335 
1336     if (n == PETSC_DECIDE) {
1337       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
1338     }
1339     /* Check sum(n) = N */
1340     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
1341     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
1342 
1343     ierr = MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat);CHKERRQ(ierr);
1344   }
1345 
1346   /* numeric phase */
1347   mat = (Mat_MPIDense*)(*outmat)->data;
1348   ierr = MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1349   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1350   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1351   PetscFunctionReturn(0);
1352 }
1353 
1354 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1355 {
1356   Mat_MPIDense   *a;
1357   PetscErrorCode ierr;
1358 
1359   PetscFunctionBegin;
1360   ierr      = PetscNewLog(mat,&a);CHKERRQ(ierr);
1361   mat->data = (void*)a;
1362   ierr      = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1363 
1364   mat->insertmode = NOT_SET_VALUES;
1365   ierr            = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);CHKERRQ(ierr);
1366   ierr            = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);CHKERRQ(ierr);
1367 
1368   /* build cache for off array entries formed */
1369   a->donotstash = PETSC_FALSE;
1370 
1371   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr);
1372 
1373   /* stuff used for matrix vector multiply */
1374   a->lvec        = 0;
1375   a->Mvctx       = 0;
1376   a->roworiented = PETSC_TRUE;
1377 
1378   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr);
1379   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr);
1380   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense);CHKERRQ(ierr);
1381   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense);CHKERRQ(ierr);
1382   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);CHKERRQ(ierr);
1383   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);CHKERRQ(ierr);
1384 #if defined(PETSC_HAVE_ELEMENTAL)
1385   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr);
1386 #endif
1387   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1388   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1389   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1390   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1391 
1392   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",MatTransposeMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1393   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",MatTransposeMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1394   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",MatTransposeMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1395   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense);CHKERRQ(ierr);
1396   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense);CHKERRQ(ierr);
1397   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1398   PetscFunctionReturn(0);
1399 }
1400 
1401 /*MC
1402    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1403 
1404    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1405    and MATMPIDENSE otherwise.
1406 
1407    Options Database Keys:
1408 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1409 
1410   Level: beginner
1411 
1412 
1413 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1414 M*/
1415 
1416 /*@C
1417    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1418 
1419    Not collective
1420 
1421    Input Parameters:
1422 .  B - the matrix
1423 -  data - optional location of matrix data.  Set data=NULL for PETSc
1424    to control all matrix memory allocation.
1425 
1426    Notes:
1427    The dense format is fully compatible with standard Fortran 77
1428    storage by columns.
1429 
1430    The data input variable is intended primarily for Fortran programmers
1431    who wish to allocate their own matrix memory space.  Most users should
1432    set data=NULL.
1433 
1434    Level: intermediate
1435 
1436 .keywords: matrix,dense, parallel
1437 
1438 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1439 @*/
1440 PetscErrorCode  MatMPIDenseSetPreallocation(Mat B,PetscScalar *data)
1441 {
1442   PetscErrorCode ierr;
1443 
1444   PetscFunctionBegin;
1445   ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr);
1446   PetscFunctionReturn(0);
1447 }
1448 
1449 /*@
1450    MatDensePlaceArray - Allows one to replace the array in a dense array with an
1451    array provided by the user. This is useful to avoid copying an array
1452    into a matrix
1453 
1454    Not Collective
1455 
1456    Input Parameters:
1457 +  mat - the matrix
1458 -  array - the array in column major order
1459 
1460    Notes:
1461    You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be
1462    freed when the matrix is destroyed.
1463 
1464    Level: developer
1465 
1466 .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
1467 
1468 @*/
1469 PetscErrorCode  MatDensePlaceArray(Mat mat,const PetscScalar array[])
1470 {
1471   PetscErrorCode ierr;
1472   PetscFunctionBegin;
1473   ierr = PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
1474   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
1475   PetscFunctionReturn(0);
1476 }
1477 
1478 /*@
1479    MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray()
1480 
1481    Not Collective
1482 
1483    Input Parameters:
1484 .  mat - the matrix
1485 
1486    Notes:
1487    You can only call this after a call to MatDensePlaceArray()
1488 
1489    Level: developer
1490 
1491 .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
1492 
1493 @*/
1494 PetscErrorCode  MatDenseResetArray(Mat mat)
1495 {
1496   PetscErrorCode ierr;
1497   PetscFunctionBegin;
1498   ierr = PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));CHKERRQ(ierr);
1499   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
1500   PetscFunctionReturn(0);
1501 }
1502 
1503 /*@C
1504    MatCreateDense - Creates a parallel matrix in dense format.
1505 
1506    Collective on MPI_Comm
1507 
1508    Input Parameters:
1509 +  comm - MPI communicator
1510 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1511 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1512 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1513 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1514 -  data - optional location of matrix data.  Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1515    to control all matrix memory allocation.
1516 
1517    Output Parameter:
1518 .  A - the matrix
1519 
1520    Notes:
1521    The dense format is fully compatible with standard Fortran 77
1522    storage by columns.
1523 
1524    The data input variable is intended primarily for Fortran programmers
1525    who wish to allocate their own matrix memory space.  Most users should
1526    set data=NULL (PETSC_NULL_SCALAR for Fortran users).
1527 
1528    The user MUST specify either the local or global matrix dimensions
1529    (possibly both).
1530 
1531    Level: intermediate
1532 
1533 .keywords: matrix,dense, parallel
1534 
1535 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1536 @*/
1537 PetscErrorCode  MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1538 {
1539   PetscErrorCode ierr;
1540   PetscMPIInt    size;
1541 
1542   PetscFunctionBegin;
1543   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1544   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1545   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1546   if (size > 1) {
1547     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1548     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1549     if (data) {  /* user provided data array, so no need to assemble */
1550       ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr);
1551       (*A)->assembled = PETSC_TRUE;
1552     }
1553   } else {
1554     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1555     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1556   }
1557   PetscFunctionReturn(0);
1558 }
1559 
1560 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1561 {
1562   Mat            mat;
1563   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
1564   PetscErrorCode ierr;
1565 
1566   PetscFunctionBegin;
1567   *newmat = 0;
1568   ierr    = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr);
1569   ierr    = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1570   ierr    = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1571   a       = (Mat_MPIDense*)mat->data;
1572   ierr    = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1573 
1574   mat->factortype   = A->factortype;
1575   mat->assembled    = PETSC_TRUE;
1576   mat->preallocated = PETSC_TRUE;
1577 
1578   a->size         = oldmat->size;
1579   a->rank         = oldmat->rank;
1580   mat->insertmode = NOT_SET_VALUES;
1581   a->nvec         = oldmat->nvec;
1582   a->donotstash   = oldmat->donotstash;
1583 
1584   ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr);
1585   ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr);
1586 
1587   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1588   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1589   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1590 
1591   *newmat = mat;
1592   PetscFunctionReturn(0);
1593 }
1594 
1595 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat)
1596 {
1597   PetscErrorCode ierr;
1598   PetscMPIInt    rank,size;
1599   const PetscInt *rowners;
1600   PetscInt       i,m,n,nz,j,mMax;
1601   PetscScalar    *array,*vals,*vals_ptr;
1602   Mat_MPIDense   *a = (Mat_MPIDense*)newmat->data;
1603 
1604   PetscFunctionBegin;
1605   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1606   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1607 
1608   /* determine ownership of rows and columns */
1609   m = (newmat->rmap->n < 0) ? PETSC_DECIDE : newmat->rmap->n;
1610   n = (newmat->cmap->n < 0) ? PETSC_DECIDE : newmat->cmap->n;
1611 
1612   ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr);
1613   if (!a->A || !((Mat_SeqDense*)(a->A->data))->user_alloc) {
1614     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
1615   }
1616   ierr = MatDenseGetArray(newmat,&array);CHKERRQ(ierr);
1617   ierr = MatGetLocalSize(newmat,&m,NULL);CHKERRQ(ierr);
1618   ierr = MatGetOwnershipRanges(newmat,&rowners);CHKERRQ(ierr);
1619   ierr = MPI_Reduce(&m,&mMax,1,MPIU_INT,MPI_MAX,0,comm);CHKERRQ(ierr);
1620   if (!rank) {
1621     ierr = PetscMalloc1(mMax*N,&vals);CHKERRQ(ierr);
1622 
1623     /* read in my part of the matrix numerical values  */
1624     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1625 
1626     /* insert into matrix-by row (this is why cannot directly read into array */
1627     vals_ptr = vals;
1628     for (i=0; i<m; i++) {
1629       for (j=0; j<N; j++) {
1630         array[i + j*m] = *vals_ptr++;
1631       }
1632     }
1633 
1634     /* read in other processors and ship out */
1635     for (i=1; i<size; i++) {
1636       nz   = (rowners[i+1] - rowners[i])*N;
1637       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1638       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr);
1639     }
1640   } else {
1641     /* receive numeric values */
1642     ierr = PetscMalloc1(m*N,&vals);CHKERRQ(ierr);
1643 
1644     /* receive message of values*/
1645     ierr = MPIULong_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr);
1646 
1647     /* insert into matrix-by row (this is why cannot directly read into array */
1648     vals_ptr = vals;
1649     for (i=0; i<m; i++) {
1650       for (j=0; j<N; j++) {
1651         array[i + j*m] = *vals_ptr++;
1652       }
1653     }
1654   }
1655   ierr = MatDenseRestoreArray(newmat,&array);CHKERRQ(ierr);
1656   ierr = PetscFree(vals);CHKERRQ(ierr);
1657   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1658   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1659   PetscFunctionReturn(0);
1660 }
1661 
1662 PetscErrorCode MatLoad_MPIDense(Mat newmat,PetscViewer viewer)
1663 {
1664   Mat_MPIDense   *a;
1665   PetscScalar    *vals,*svals;
1666   MPI_Comm       comm;
1667   MPI_Status     status;
1668   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,n,maxnz;
1669   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1670   PetscInt       *ourlens,*procsnz = 0,jj,*mycols,*smycols;
1671   PetscInt       i,nz,j,rstart,rend;
1672   int            fd;
1673   PetscErrorCode ierr;
1674 
1675   PetscFunctionBegin;
1676   /* force binary viewer to load .info file if it has not yet done so */
1677   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1678   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
1679   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1680   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1681   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1682   if (!rank) {
1683     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
1684     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1685   }
1686   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1687   M    = header[1]; N = header[2]; nz = header[3];
1688 
1689   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
1690   if (newmat->rmap->N < 0) newmat->rmap->N = M;
1691   if (newmat->cmap->N < 0) newmat->cmap->N = N;
1692 
1693   if (newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",M,newmat->rmap->N);
1694   if (newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",N,newmat->cmap->N);
1695 
1696   /*
1697        Handle case where matrix is stored on disk as a dense matrix
1698   */
1699   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1700     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr);
1701     PetscFunctionReturn(0);
1702   }
1703 
1704   /* determine ownership of all rows */
1705   if (newmat->rmap->n < 0) {
1706     ierr = PetscMPIIntCast(M/size + ((M % size) > rank),&m);CHKERRQ(ierr);
1707   } else {
1708     ierr = PetscMPIIntCast(newmat->rmap->n,&m);CHKERRQ(ierr);
1709   }
1710   if (newmat->cmap->n < 0) {
1711     n = PETSC_DECIDE;
1712   } else {
1713     ierr = PetscMPIIntCast(newmat->cmap->n,&n);CHKERRQ(ierr);
1714   }
1715 
1716   ierr       = PetscMalloc1(size+2,&rowners);CHKERRQ(ierr);
1717   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1718   rowners[0] = 0;
1719   for (i=2; i<=size; i++) {
1720     rowners[i] += rowners[i-1];
1721   }
1722   rstart = rowners[rank];
1723   rend   = rowners[rank+1];
1724 
1725   /* distribute row lengths to all processors */
1726   ierr = PetscMalloc1(rend-rstart,&ourlens);CHKERRQ(ierr);
1727   if (!rank) {
1728     ierr = PetscMalloc1(M,&rowlengths);CHKERRQ(ierr);
1729     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1730     ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr);
1731     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1732     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1733     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1734   } else {
1735     ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1736   }
1737 
1738   if (!rank) {
1739     /* calculate the number of nonzeros on each processor */
1740     ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr);
1741     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1742     for (i=0; i<size; i++) {
1743       for (j=rowners[i]; j< rowners[i+1]; j++) {
1744         procsnz[i] += rowlengths[j];
1745       }
1746     }
1747     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1748 
1749     /* determine max buffer needed and allocate it */
1750     maxnz = 0;
1751     for (i=0; i<size; i++) {
1752       maxnz = PetscMax(maxnz,procsnz[i]);
1753     }
1754     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
1755 
1756     /* read in my part of the matrix column indices  */
1757     nz   = procsnz[0];
1758     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
1759     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1760 
1761     /* read in every one elses and ship off */
1762     for (i=1; i<size; i++) {
1763       nz   = procsnz[i];
1764       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1765       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1766     }
1767     ierr = PetscFree(cols);CHKERRQ(ierr);
1768   } else {
1769     /* determine buffer space needed for message */
1770     nz = 0;
1771     for (i=0; i<m; i++) {
1772       nz += ourlens[i];
1773     }
1774     ierr = PetscMalloc1(nz+1,&mycols);CHKERRQ(ierr);
1775 
1776     /* receive message of column indices*/
1777     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1778     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1779     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1780   }
1781 
1782   ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr);
1783   a = (Mat_MPIDense*)newmat->data;
1784   if (!a->A || !((Mat_SeqDense*)(a->A->data))->user_alloc) {
1785     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
1786   }
1787 
1788   if (!rank) {
1789     ierr = PetscMalloc1(maxnz,&vals);CHKERRQ(ierr);
1790 
1791     /* read in my part of the matrix numerical values  */
1792     nz   = procsnz[0];
1793     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1794 
1795     /* insert into matrix */
1796     jj      = rstart;
1797     smycols = mycols;
1798     svals   = vals;
1799     for (i=0; i<m; i++) {
1800       ierr     = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1801       smycols += ourlens[i];
1802       svals   += ourlens[i];
1803       jj++;
1804     }
1805 
1806     /* read in other processors and ship out */
1807     for (i=1; i<size; i++) {
1808       nz   = procsnz[i];
1809       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1810       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
1811     }
1812     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1813   } else {
1814     /* receive numeric values */
1815     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
1816 
1817     /* receive message of values*/
1818     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
1819     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1820     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1821 
1822     /* insert into matrix */
1823     jj      = rstart;
1824     smycols = mycols;
1825     svals   = vals;
1826     for (i=0; i<m; i++) {
1827       ierr     = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1828       smycols += ourlens[i];
1829       svals   += ourlens[i];
1830       jj++;
1831     }
1832   }
1833   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1834   ierr = PetscFree(vals);CHKERRQ(ierr);
1835   ierr = PetscFree(mycols);CHKERRQ(ierr);
1836   ierr = PetscFree(rowners);CHKERRQ(ierr);
1837 
1838   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1839   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1840   PetscFunctionReturn(0);
1841 }
1842 
1843 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool  *flag)
1844 {
1845   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
1846   Mat            a,b;
1847   PetscBool      flg;
1848   PetscErrorCode ierr;
1849 
1850   PetscFunctionBegin;
1851   a    = matA->A;
1852   b    = matB->A;
1853   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
1854   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1855   PetscFunctionReturn(0);
1856 }
1857 
1858 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(Mat A)
1859 {
1860   PetscErrorCode        ierr;
1861   Mat_MPIDense          *a = (Mat_MPIDense*)A->data;
1862   Mat_TransMatMultDense *atb = a->atbdense;
1863 
1864   PetscFunctionBegin;
1865   ierr = PetscFree3(atb->sendbuf,atb->atbarray,atb->recvcounts);CHKERRQ(ierr);
1866   ierr = (atb->destroy)(A);CHKERRQ(ierr);
1867   ierr = PetscFree(atb);CHKERRQ(ierr);
1868   PetscFunctionReturn(0);
1869 }
1870 
1871 PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1872 {
1873   Mat_MPIDense   *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
1874   Mat_SeqDense   *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data;
1875   Mat_TransMatMultDense *atb = c->atbdense;
1876   PetscErrorCode ierr;
1877   MPI_Comm       comm;
1878   PetscMPIInt    rank,size,*recvcounts=atb->recvcounts;
1879   PetscScalar    *carray,*atbarray=atb->atbarray,*sendbuf=atb->sendbuf;
1880   PetscInt       i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j;
1881   PetscScalar    _DOne=1.0,_DZero=0.0;
1882   PetscBLASInt   am,an,bn,aN;
1883   const PetscInt *ranges;
1884 
1885   PetscFunctionBegin;
1886   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1887   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1888   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1889 
1890   /* compute atbarray = aseq^T * bseq */
1891   ierr = PetscBLASIntCast(a->A->cmap->n,&an);CHKERRQ(ierr);
1892   ierr = PetscBLASIntCast(b->A->cmap->n,&bn);CHKERRQ(ierr);
1893   ierr = PetscBLASIntCast(a->A->rmap->n,&am);CHKERRQ(ierr);
1894   ierr = PetscBLASIntCast(A->cmap->N,&aN);CHKERRQ(ierr);
1895   PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&an,&bn,&am,&_DOne,aseq->v,&aseq->lda,bseq->v,&bseq->lda,&_DZero,atbarray,&aN));
1896 
1897   ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr);
1898   for (i=0; i<size; i++) recvcounts[i] = (ranges[i+1] - ranges[i])*cN;
1899 
1900   /* arrange atbarray into sendbuf */
1901   k = 0;
1902   for (proc=0; proc<size; proc++) {
1903     for (j=0; j<cN; j++) {
1904       for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM];
1905     }
1906   }
1907   /* sum all atbarray to local values of C */
1908   ierr = MatDenseGetArray(c->A,&carray);CHKERRQ(ierr);
1909   ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRQ(ierr);
1910   ierr = MatDenseRestoreArray(c->A,&carray);CHKERRQ(ierr);
1911   PetscFunctionReturn(0);
1912 }
1913 
1914 PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1915 {
1916   PetscErrorCode        ierr;
1917   Mat                   Cdense;
1918   MPI_Comm              comm;
1919   PetscMPIInt           size;
1920   PetscInt              cm=A->cmap->n,cM,cN=B->cmap->N;
1921   Mat_MPIDense          *c;
1922   Mat_TransMatMultDense *atb;
1923 
1924   PetscFunctionBegin;
1925   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1926   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
1927     SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
1928   }
1929 
1930   /* create matrix product Cdense */
1931   ierr = MatCreate(comm,&Cdense);CHKERRQ(ierr);
1932   ierr = MatSetSizes(Cdense,cm,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
1933   ierr = MatSetType(Cdense,MATMPIDENSE);CHKERRQ(ierr);
1934   ierr = MatMPIDenseSetPreallocation(Cdense,NULL);CHKERRQ(ierr);
1935   ierr = MatAssemblyBegin(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1936   ierr = MatAssemblyEnd(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1937   *C   = Cdense;
1938 
1939   /* create data structure for reuse Cdense */
1940   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1941   ierr = PetscNew(&atb);CHKERRQ(ierr);
1942   cM = Cdense->rmap->N;
1943   ierr = PetscMalloc3(cM*cN,&atb->sendbuf,cM*cN,&atb->atbarray,size,&atb->recvcounts);CHKERRQ(ierr);
1944 
1945   c                    = (Mat_MPIDense*)Cdense->data;
1946   c->atbdense          = atb;
1947   atb->destroy         = Cdense->ops->destroy;
1948   Cdense->ops->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
1949   PetscFunctionReturn(0);
1950 }
1951 
1952 PetscErrorCode MatTransposeMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
1953 {
1954   PetscErrorCode ierr;
1955 
1956   PetscFunctionBegin;
1957   if (scall == MAT_INITIAL_MATRIX) {
1958     ierr = MatTransposeMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr);
1959   }
1960   ierr = MatTransposeMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr);
1961   PetscFunctionReturn(0);
1962 }
1963 
1964 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(Mat A)
1965 {
1966   PetscErrorCode   ierr;
1967   Mat_MPIDense     *a = (Mat_MPIDense*)A->data;
1968   Mat_MatMultDense *ab = a->abdense;
1969 
1970   PetscFunctionBegin;
1971   ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr);
1972   ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr);
1973   ierr = MatDestroy(&ab->Be);CHKERRQ(ierr);
1974 
1975   ierr = (ab->destroy)(A);CHKERRQ(ierr);
1976   ierr = PetscFree(ab);CHKERRQ(ierr);
1977   PetscFunctionReturn(0);
1978 }
1979 
1980 #if defined(PETSC_HAVE_ELEMENTAL)
1981 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1982 {
1983   PetscErrorCode   ierr;
1984   Mat_MPIDense     *c=(Mat_MPIDense*)C->data;
1985   Mat_MatMultDense *ab=c->abdense;
1986 
1987   PetscFunctionBegin;
1988   ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr);
1989   ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr);
1990   ierr = MatMatMultNumeric(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr);
1991   ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
1992   PetscFunctionReturn(0);
1993 }
1994 
1995 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1996 {
1997   PetscErrorCode   ierr;
1998   Mat              Ae,Be,Ce;
1999   Mat_MPIDense     *c;
2000   Mat_MatMultDense *ab;
2001 
2002   PetscFunctionBegin;
2003   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
2004     SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2005   }
2006 
2007   /* convert A and B to Elemental matrices Ae and Be */
2008   ierr = MatConvert(A,MATELEMENTAL,MAT_INITIAL_MATRIX, &Ae);CHKERRQ(ierr);
2009   ierr = MatConvert(B,MATELEMENTAL,MAT_INITIAL_MATRIX, &Be);CHKERRQ(ierr);
2010 
2011   /* Ce = Ae*Be */
2012   ierr = MatMatMultSymbolic(Ae,Be,fill,&Ce);CHKERRQ(ierr);
2013   ierr = MatMatMultNumeric(Ae,Be,Ce);CHKERRQ(ierr);
2014 
2015   /* convert Ce to C */
2016   ierr = MatConvert(Ce,MATMPIDENSE,MAT_INITIAL_MATRIX,C);CHKERRQ(ierr);
2017 
2018   /* create data structure for reuse Cdense */
2019   ierr = PetscNew(&ab);CHKERRQ(ierr);
2020   c                  = (Mat_MPIDense*)(*C)->data;
2021   c->abdense         = ab;
2022 
2023   ab->Ae             = Ae;
2024   ab->Be             = Be;
2025   ab->Ce             = Ce;
2026   ab->destroy        = (*C)->ops->destroy;
2027   (*C)->ops->destroy        = MatDestroy_MatMatMult_MPIDense_MPIDense;
2028   (*C)->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
2029   PetscFunctionReturn(0);
2030 }
2031 
2032 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
2033 {
2034   PetscErrorCode ierr;
2035 
2036   PetscFunctionBegin;
2037   if (scall == MAT_INITIAL_MATRIX) { /* simbolic product includes numeric product */
2038     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
2039     ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr);
2040     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
2041   } else {
2042     ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
2043     ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr);
2044     ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
2045   }
2046   PetscFunctionReturn(0);
2047 }
2048 #endif
2049 
2050