xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 55e7fe800d976e85ed2b5cd8bfdef564daa37bd9)
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4 */
5 
6 
7 #include <../src/mat/impls/dense/mpi/mpidense.h>    /*I   "petscmat.h"  I*/
8 #include <../src/mat/impls/aij/mpi/mpiaij.h>
9 #include <petscblaslapack.h>
10 
11 /*@
12 
13       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
14               matrix that represents the operator. For sequential matrices it returns itself.
15 
16     Input Parameter:
17 .      A - the Seq or MPI dense matrix
18 
19     Output Parameter:
20 .      B - the inner matrix
21 
22     Level: intermediate
23 
24 @*/
25 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
26 {
27   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
28   PetscErrorCode ierr;
29   PetscBool      flg;
30 
31   PetscFunctionBegin;
32   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
33   if (flg) *B = mat->A;
34   else *B = A;
35   PetscFunctionReturn(0);
36 }
37 
38 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
39 {
40   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
41   PetscErrorCode ierr;
42   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
43 
44   PetscFunctionBegin;
45   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
46   lrow = row - rstart;
47   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
48   PetscFunctionReturn(0);
49 }
50 
51 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
52 {
53   PetscErrorCode ierr;
54 
55   PetscFunctionBegin;
56   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
57   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
58   PetscFunctionReturn(0);
59 }
60 
61 PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
62 {
63   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
64   PetscErrorCode ierr;
65   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
66   PetscScalar    *array;
67   MPI_Comm       comm;
68   Mat            B;
69 
70   PetscFunctionBegin;
71   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
72 
73   ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr);
74   if (!B) {
75     ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
76     ierr = MatCreate(comm,&B);CHKERRQ(ierr);
77     ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr);
78     ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
79     ierr = MatDenseGetArray(mdn->A,&array);CHKERRQ(ierr);
80     ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr);
81     ierr = MatDenseRestoreArray(mdn->A,&array);CHKERRQ(ierr);
82     ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
83     ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
84     ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr);
85     *a   = B;
86     ierr = MatDestroy(&B);CHKERRQ(ierr);
87   } else *a = B;
88   PetscFunctionReturn(0);
89 }
90 
91 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
92 {
93   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
94   PetscErrorCode ierr;
95   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
96   PetscBool      roworiented = A->roworiented;
97 
98   PetscFunctionBegin;
99   for (i=0; i<m; i++) {
100     if (idxm[i] < 0) continue;
101     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
102     if (idxm[i] >= rstart && idxm[i] < rend) {
103       row = idxm[i] - rstart;
104       if (roworiented) {
105         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
106       } else {
107         for (j=0; j<n; j++) {
108           if (idxn[j] < 0) continue;
109           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
110           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
111         }
112       }
113     } else if (!A->donotstash) {
114       mat->assembled = PETSC_FALSE;
115       if (roworiented) {
116         ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
117       } else {
118         ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
119       }
120     }
121   }
122   PetscFunctionReturn(0);
123 }
124 
125 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
126 {
127   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
128   PetscErrorCode ierr;
129   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
130 
131   PetscFunctionBegin;
132   for (i=0; i<m; i++) {
133     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
134     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
135     if (idxm[i] >= rstart && idxm[i] < rend) {
136       row = idxm[i] - rstart;
137       for (j=0; j<n; j++) {
138         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
139         if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
140         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
141       }
142     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
143   }
144   PetscFunctionReturn(0);
145 }
146 
147 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[])
148 {
149   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
150   PetscErrorCode ierr;
151 
152   PetscFunctionBegin;
153   ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr);
154   PetscFunctionReturn(0);
155 }
156 
157 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar *array[])
158 {
159   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
160   PetscErrorCode ierr;
161 
162   PetscFunctionBegin;
163   ierr = MatDenseGetArrayRead(a->A,array);CHKERRQ(ierr);
164   PetscFunctionReturn(0);
165 }
166 
167 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar array[])
168 {
169   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
170   PetscErrorCode ierr;
171 
172   PetscFunctionBegin;
173   ierr = MatDensePlaceArray(a->A,array);CHKERRQ(ierr);
174   PetscFunctionReturn(0);
175 }
176 
177 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
178 {
179   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
180   PetscErrorCode ierr;
181 
182   PetscFunctionBegin;
183   ierr = MatDenseResetArray(a->A);CHKERRQ(ierr);
184   PetscFunctionReturn(0);
185 }
186 
187 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
188 {
189   Mat_MPIDense   *mat  = (Mat_MPIDense*)A->data,*newmatd;
190   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
191   PetscErrorCode ierr;
192   PetscInt       i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
193   const PetscInt *irow,*icol;
194   PetscScalar    *av,*bv,*v = lmat->v;
195   Mat            newmat;
196   IS             iscol_local;
197   MPI_Comm       comm_is,comm_mat;
198 
199   PetscFunctionBegin;
200   ierr = PetscObjectGetComm((PetscObject)A,&comm_mat);CHKERRQ(ierr);
201   ierr = PetscObjectGetComm((PetscObject)iscol,&comm_is);CHKERRQ(ierr);
202   if (comm_mat != comm_is) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator");
203 
204   ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
205   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
206   ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr);
207   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
208   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
209   ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */
210 
211   /* No parallel redistribution currently supported! Should really check each index set
212      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
213      original matrix! */
214 
215   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
216   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
217 
218   /* Check submatrix call */
219   if (scall == MAT_REUSE_MATRIX) {
220     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
221     /* Really need to test rows and column sizes! */
222     newmat = *B;
223   } else {
224     /* Create and fill new matrix */
225     ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr);
226     ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
227     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
228     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
229   }
230 
231   /* Now extract the data pointers and do the copy, column at a time */
232   newmatd = (Mat_MPIDense*)newmat->data;
233   bv      = ((Mat_SeqDense*)newmatd->A->data)->v;
234 
235   for (i=0; i<Ncols; i++) {
236     av = v + ((Mat_SeqDense*)mat->A->data)->lda*icol[i];
237     for (j=0; j<nrows; j++) {
238       *bv++ = av[irow[j] - rstart];
239     }
240   }
241 
242   /* Assemble the matrices so that the correct flags are set */
243   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
244   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
245 
246   /* Free work space */
247   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
248   ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr);
249   ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
250   *B   = newmat;
251   PetscFunctionReturn(0);
252 }
253 
254 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[])
255 {
256   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
257   PetscErrorCode ierr;
258 
259   PetscFunctionBegin;
260   ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr);
261   PetscFunctionReturn(0);
262 }
263 
264 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar *array[])
265 {
266   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
267   PetscErrorCode ierr;
268 
269   PetscFunctionBegin;
270   ierr = MatDenseRestoreArrayRead(a->A,array);CHKERRQ(ierr);
271   PetscFunctionReturn(0);
272 }
273 
274 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
275 {
276   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
277   MPI_Comm       comm;
278   PetscErrorCode ierr;
279   PetscInt       nstash,reallocs;
280   InsertMode     addv;
281 
282   PetscFunctionBegin;
283   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
284   /* make sure all processors are either in INSERTMODE or ADDMODE */
285   ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,comm);CHKERRQ(ierr);
286   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
287   mat->insertmode = addv; /* in case this processor had no cache */
288 
289   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
290   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
291   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
292   PetscFunctionReturn(0);
293 }
294 
295 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
296 {
297   Mat_MPIDense   *mdn=(Mat_MPIDense*)mat->data;
298   PetscErrorCode ierr;
299   PetscInt       i,*row,*col,flg,j,rstart,ncols;
300   PetscMPIInt    n;
301   PetscScalar    *val;
302 
303   PetscFunctionBegin;
304   /*  wait on receives */
305   while (1) {
306     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
307     if (!flg) break;
308 
309     for (i=0; i<n;) {
310       /* Now identify the consecutive vals belonging to the same row */
311       for (j=i,rstart=row[j]; j<n; j++) {
312         if (row[j] != rstart) break;
313       }
314       if (j < n) ncols = j-i;
315       else       ncols = n-i;
316       /* Now assemble all these values with a single function call */
317       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
318       i    = j;
319     }
320   }
321   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
322 
323   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
324   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
325 
326   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
327     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
328   }
329   PetscFunctionReturn(0);
330 }
331 
332 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
333 {
334   PetscErrorCode ierr;
335   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
336 
337   PetscFunctionBegin;
338   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
339   PetscFunctionReturn(0);
340 }
341 
342 /* the code does not do the diagonal entries correctly unless the
343    matrix is square and the column and row owerships are identical.
344    This is a BUG. The only way to fix it seems to be to access
345    mdn->A and mdn->B directly and not through the MatZeroRows()
346    routine.
347 */
348 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
349 {
350   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
351   PetscErrorCode    ierr;
352   PetscInt          i,*owners = A->rmap->range;
353   PetscInt          *sizes,j,idx,nsends;
354   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
355   PetscInt          *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source;
356   PetscInt          *lens,*lrows,*values;
357   PetscMPIInt       n,imdex,rank = l->rank,size = l->size;
358   MPI_Comm          comm;
359   MPI_Request       *send_waits,*recv_waits;
360   MPI_Status        recv_status,*send_status;
361   PetscBool         found;
362   const PetscScalar *xx;
363   PetscScalar       *bb;
364 
365   PetscFunctionBegin;
366   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
367   if (A->rmap->N != A->cmap->N) SETERRQ(comm,PETSC_ERR_SUP,"Only handles square matrices");
368   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership");
369   /*  first count number of contributors to each processor */
370   ierr = PetscCalloc1(2*size,&sizes);CHKERRQ(ierr);
371   ierr = PetscMalloc1(N+1,&owner);CHKERRQ(ierr);  /* see note*/
372   for (i=0; i<N; i++) {
373     idx   = rows[i];
374     found = PETSC_FALSE;
375     for (j=0; j<size; j++) {
376       if (idx >= owners[j] && idx < owners[j+1]) {
377         sizes[2*j]++; sizes[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
378       }
379     }
380     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
381   }
382   nsends = 0;
383   for (i=0; i<size; i++) nsends += sizes[2*i+1];
384 
385   /* inform other processors of number of messages and max length*/
386   ierr = PetscMaxSum(comm,sizes,&nmax,&nrecvs);CHKERRQ(ierr);
387 
388   /* post receives:   */
389   ierr = PetscMalloc1((nrecvs+1)*(nmax+1),&rvalues);CHKERRQ(ierr);
390   ierr = PetscMalloc1(nrecvs+1,&recv_waits);CHKERRQ(ierr);
391   for (i=0; i<nrecvs; i++) {
392     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
393   }
394 
395   /* do sends:
396       1) starts[i] gives the starting index in svalues for stuff going to
397          the ith processor
398   */
399   ierr = PetscMalloc1(N+1,&svalues);CHKERRQ(ierr);
400   ierr = PetscMalloc1(nsends+1,&send_waits);CHKERRQ(ierr);
401   ierr = PetscMalloc1(size+1,&starts);CHKERRQ(ierr);
402 
403   starts[0] = 0;
404   for (i=1; i<size; i++) starts[i] = starts[i-1] + sizes[2*i-2];
405   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
406 
407   starts[0] = 0;
408   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + sizes[2*i-2];
409   count = 0;
410   for (i=0; i<size; i++) {
411     if (sizes[2*i+1]) {
412       ierr = MPI_Isend(svalues+starts[i],sizes[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
413     }
414   }
415   ierr = PetscFree(starts);CHKERRQ(ierr);
416 
417   base = owners[rank];
418 
419   /*  wait on receives */
420   ierr  = PetscMalloc2(nrecvs,&lens,nrecvs,&source);CHKERRQ(ierr);
421   count = nrecvs;
422   slen  = 0;
423   while (count) {
424     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
425     /* unpack receives into our local space */
426     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
427 
428     source[imdex] = recv_status.MPI_SOURCE;
429     lens[imdex]   = n;
430     slen += n;
431     count--;
432   }
433   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
434 
435   /* move the data into the send scatter */
436   ierr  = PetscMalloc1(slen+1,&lrows);CHKERRQ(ierr);
437   count = 0;
438   for (i=0; i<nrecvs; i++) {
439     values = rvalues + i*nmax;
440     for (j=0; j<lens[i]; j++) {
441       lrows[count++] = values[j] - base;
442     }
443   }
444   ierr = PetscFree(rvalues);CHKERRQ(ierr);
445   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
446   ierr = PetscFree(owner);CHKERRQ(ierr);
447   ierr = PetscFree(sizes);CHKERRQ(ierr);
448 
449   /* fix right hand side if needed */
450   if (x && b) {
451     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
452     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
453     for (i=0; i<slen; i++) {
454       bb[lrows[i]] = diag*xx[lrows[i]];
455     }
456     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
457     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
458   }
459 
460   /* actually zap the local rows */
461   ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
462   if (diag != 0.0) {
463     Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data;
464     PetscInt     m   = ll->lda, i;
465 
466     for (i=0; i<slen; i++) {
467       ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag;
468     }
469   }
470   ierr = PetscFree(lrows);CHKERRQ(ierr);
471 
472   /* wait on sends */
473   if (nsends) {
474     ierr = PetscMalloc1(nsends,&send_status);CHKERRQ(ierr);
475     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
476     ierr = PetscFree(send_status);CHKERRQ(ierr);
477   }
478   ierr = PetscFree(send_waits);CHKERRQ(ierr);
479   ierr = PetscFree(svalues);CHKERRQ(ierr);
480   PetscFunctionReturn(0);
481 }
482 
483 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec);
484 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec);
485 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec);
486 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec);
487 
488 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
489 {
490   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
491   PetscErrorCode ierr;
492 
493   PetscFunctionBegin;
494   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
495   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
496   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
497   PetscFunctionReturn(0);
498 }
499 
500 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
501 {
502   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
503   PetscErrorCode ierr;
504 
505   PetscFunctionBegin;
506   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
507   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
508   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
509   PetscFunctionReturn(0);
510 }
511 
512 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
513 {
514   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
515   PetscErrorCode ierr;
516   PetscScalar    zero = 0.0;
517 
518   PetscFunctionBegin;
519   ierr = VecSet(yy,zero);CHKERRQ(ierr);
520   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
521   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
522   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
523   PetscFunctionReturn(0);
524 }
525 
526 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
527 {
528   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
529   PetscErrorCode ierr;
530 
531   PetscFunctionBegin;
532   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
533   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
534   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
535   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
536   PetscFunctionReturn(0);
537 }
538 
539 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
540 {
541   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
542   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
543   PetscErrorCode ierr;
544   PetscInt       len,i,n,m = A->rmap->n,radd;
545   PetscScalar    *x,zero = 0.0;
546 
547   PetscFunctionBegin;
548   ierr = VecSet(v,zero);CHKERRQ(ierr);
549   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
550   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
551   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
552   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
553   radd = A->rmap->rstart*m;
554   for (i=0; i<len; i++) {
555     x[i] = aloc->v[radd + i*m + i];
556   }
557   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
558   PetscFunctionReturn(0);
559 }
560 
561 PetscErrorCode MatDestroy_MPIDense(Mat mat)
562 {
563   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
564   PetscErrorCode ierr;
565 
566   PetscFunctionBegin;
567 #if defined(PETSC_USE_LOG)
568   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
569 #endif
570   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
571   ierr = MatDestroy(&mdn->A);CHKERRQ(ierr);
572   ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr);
573   ierr = VecScatterDestroy(&mdn->Mvctx);CHKERRQ(ierr);
574 
575   ierr = PetscFree(mat->data);CHKERRQ(ierr);
576   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
577 
578   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr);
579   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr);
580   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL);CHKERRQ(ierr);
581   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL);CHKERRQ(ierr);
582   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);CHKERRQ(ierr);
583   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);CHKERRQ(ierr);
584 #if defined(PETSC_HAVE_ELEMENTAL)
585   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr);
586 #endif
587   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr);
588   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
589   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
590   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
591   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
592   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
593   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
594   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL);CHKERRQ(ierr);
595   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL);CHKERRQ(ierr);
596   PetscFunctionReturn(0);
597 }
598 
599 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
600 {
601   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
602   PetscErrorCode    ierr;
603   PetscViewerFormat format;
604   int               fd;
605   PetscInt          header[4],mmax,N = mat->cmap->N,i,j,m,k;
606   PetscMPIInt       rank,tag  = ((PetscObject)viewer)->tag,size;
607   PetscScalar       *work,*v,*vv;
608   Mat_SeqDense      *a = (Mat_SeqDense*)mdn->A->data;
609 
610   PetscFunctionBegin;
611   if (mdn->size == 1) {
612     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
613   } else {
614     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
615     ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
616     ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
617 
618     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
619     if (format == PETSC_VIEWER_NATIVE) {
620 
621       if (!rank) {
622         /* store the matrix as a dense matrix */
623         header[0] = MAT_FILE_CLASSID;
624         header[1] = mat->rmap->N;
625         header[2] = N;
626         header[3] = MATRIX_BINARY_FORMAT_DENSE;
627         ierr      = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
628 
629         /* get largest work array needed for transposing array */
630         mmax = mat->rmap->n;
631         for (i=1; i<size; i++) {
632           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
633         }
634         ierr = PetscMalloc1(mmax*N,&work);CHKERRQ(ierr);
635 
636         /* write out local array, by rows */
637         m = mat->rmap->n;
638         v = a->v;
639         for (j=0; j<N; j++) {
640           for (i=0; i<m; i++) {
641             work[j + i*N] = *v++;
642           }
643         }
644         ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
645         /* get largest work array to receive messages from other processes, excludes process zero */
646         mmax = 0;
647         for (i=1; i<size; i++) {
648           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
649         }
650         ierr = PetscMalloc1(mmax*N,&vv);CHKERRQ(ierr);
651         for (k = 1; k < size; k++) {
652           v    = vv;
653           m    = mat->rmap->range[k+1] - mat->rmap->range[k];
654           ierr = MPIULong_Recv(v,m*N,MPIU_SCALAR,k,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
655 
656           for (j = 0; j < N; j++) {
657             for (i = 0; i < m; i++) {
658               work[j + i*N] = *v++;
659             }
660           }
661           ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
662         }
663         ierr = PetscFree(work);CHKERRQ(ierr);
664         ierr = PetscFree(vv);CHKERRQ(ierr);
665       } else {
666         ierr = MPIULong_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
667       }
668     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerPushFormat(viewer,PETSC_VIEWER_NATIVE)");
669   }
670   PetscFunctionReturn(0);
671 }
672 
673 extern PetscErrorCode MatView_SeqDense(Mat,PetscViewer);
674 #include <petscdraw.h>
675 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
676 {
677   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
678   PetscErrorCode    ierr;
679   PetscMPIInt       rank = mdn->rank;
680   PetscViewerType   vtype;
681   PetscBool         iascii,isdraw;
682   PetscViewer       sviewer;
683   PetscViewerFormat format;
684 
685   PetscFunctionBegin;
686   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
687   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
688   if (iascii) {
689     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
690     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
691     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
692       MatInfo info;
693       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
694       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
695       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
696       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
697       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
698       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
699       PetscFunctionReturn(0);
700     } else if (format == PETSC_VIEWER_ASCII_INFO) {
701       PetscFunctionReturn(0);
702     }
703   } else if (isdraw) {
704     PetscDraw draw;
705     PetscBool isnull;
706 
707     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
708     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
709     if (isnull) PetscFunctionReturn(0);
710   }
711 
712   {
713     /* assemble the entire matrix onto first processor. */
714     Mat         A;
715     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
716     PetscInt    *cols;
717     PetscScalar *vals;
718 
719     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
720     if (!rank) {
721       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
722     } else {
723       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
724     }
725     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
726     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
727     ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr);
728     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
729 
730     /* Copy the matrix ... This isn't the most efficient means,
731        but it's quick for now */
732     A->insertmode = INSERT_VALUES;
733 
734     row = mat->rmap->rstart;
735     m   = mdn->A->rmap->n;
736     for (i=0; i<m; i++) {
737       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
738       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
739       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
740       row++;
741     }
742 
743     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
744     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
745     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
746     if (!rank) {
747       ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
748       ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
749     }
750     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
751     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
752     ierr = MatDestroy(&A);CHKERRQ(ierr);
753   }
754   PetscFunctionReturn(0);
755 }
756 
757 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
758 {
759   PetscErrorCode ierr;
760   PetscBool      iascii,isbinary,isdraw,issocket;
761 
762   PetscFunctionBegin;
763   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
764   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
765   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
766   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
767 
768   if (iascii || issocket || isdraw) {
769     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
770   } else if (isbinary) {
771     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
772   }
773   PetscFunctionReturn(0);
774 }
775 
776 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
777 {
778   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
779   Mat            mdn  = mat->A;
780   PetscErrorCode ierr;
781   PetscReal      isend[5],irecv[5];
782 
783   PetscFunctionBegin;
784   info->block_size = 1.0;
785 
786   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
787 
788   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
789   isend[3] = info->memory;  isend[4] = info->mallocs;
790   if (flag == MAT_LOCAL) {
791     info->nz_used      = isend[0];
792     info->nz_allocated = isend[1];
793     info->nz_unneeded  = isend[2];
794     info->memory       = isend[3];
795     info->mallocs      = isend[4];
796   } else if (flag == MAT_GLOBAL_MAX) {
797     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
798 
799     info->nz_used      = irecv[0];
800     info->nz_allocated = irecv[1];
801     info->nz_unneeded  = irecv[2];
802     info->memory       = irecv[3];
803     info->mallocs      = irecv[4];
804   } else if (flag == MAT_GLOBAL_SUM) {
805     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
806 
807     info->nz_used      = irecv[0];
808     info->nz_allocated = irecv[1];
809     info->nz_unneeded  = irecv[2];
810     info->memory       = irecv[3];
811     info->mallocs      = irecv[4];
812   }
813   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
814   info->fill_ratio_needed = 0;
815   info->factor_mallocs    = 0;
816   PetscFunctionReturn(0);
817 }
818 
819 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
820 {
821   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
822   PetscErrorCode ierr;
823 
824   PetscFunctionBegin;
825   switch (op) {
826   case MAT_NEW_NONZERO_LOCATIONS:
827   case MAT_NEW_NONZERO_LOCATION_ERR:
828   case MAT_NEW_NONZERO_ALLOCATION_ERR:
829     MatCheckPreallocated(A,1);
830     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
831     break;
832   case MAT_ROW_ORIENTED:
833     MatCheckPreallocated(A,1);
834     a->roworiented = flg;
835     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
836     break;
837   case MAT_NEW_DIAGONALS:
838   case MAT_KEEP_NONZERO_PATTERN:
839   case MAT_USE_HASH_TABLE:
840     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
841     break;
842   case MAT_IGNORE_OFF_PROC_ENTRIES:
843     a->donotstash = flg;
844     break;
845   case MAT_SYMMETRIC:
846   case MAT_STRUCTURALLY_SYMMETRIC:
847   case MAT_HERMITIAN:
848   case MAT_SYMMETRY_ETERNAL:
849   case MAT_IGNORE_LOWER_TRIANGULAR:
850     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
851     break;
852   default:
853     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
854   }
855   PetscFunctionReturn(0);
856 }
857 
858 
859 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
860 {
861   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
862   Mat_SeqDense      *mat = (Mat_SeqDense*)mdn->A->data;
863   const PetscScalar *l,*r;
864   PetscScalar       x,*v;
865   PetscErrorCode    ierr;
866   PetscInt          i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n;
867 
868   PetscFunctionBegin;
869   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
870   if (ll) {
871     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
872     if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
873     ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr);
874     for (i=0; i<m; i++) {
875       x = l[i];
876       v = mat->v + i;
877       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
878     }
879     ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr);
880     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
881   }
882   if (rr) {
883     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
884     if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
885     ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
886     ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
887     ierr = VecGetArrayRead(mdn->lvec,&r);CHKERRQ(ierr);
888     for (i=0; i<n; i++) {
889       x = r[i];
890       v = mat->v + i*m;
891       for (j=0; j<m; j++) (*v++) *= x;
892     }
893     ierr = VecRestoreArrayRead(mdn->lvec,&r);CHKERRQ(ierr);
894     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
895   }
896   PetscFunctionReturn(0);
897 }
898 
899 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
900 {
901   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
902   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
903   PetscErrorCode ierr;
904   PetscInt       i,j;
905   PetscReal      sum = 0.0;
906   PetscScalar    *v  = mat->v;
907 
908   PetscFunctionBegin;
909   if (mdn->size == 1) {
910     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
911   } else {
912     if (type == NORM_FROBENIUS) {
913       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
914         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
915       }
916       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
917       *nrm = PetscSqrtReal(*nrm);
918       ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr);
919     } else if (type == NORM_1) {
920       PetscReal *tmp,*tmp2;
921       ierr = PetscMalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr);
922       ierr = PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
923       ierr = PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
924       *nrm = 0.0;
925       v    = mat->v;
926       for (j=0; j<mdn->A->cmap->n; j++) {
927         for (i=0; i<mdn->A->rmap->n; i++) {
928           tmp[j] += PetscAbsScalar(*v);  v++;
929         }
930       }
931       ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
932       for (j=0; j<A->cmap->N; j++) {
933         if (tmp2[j] > *nrm) *nrm = tmp2[j];
934       }
935       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
936       ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr);
937     } else if (type == NORM_INFINITY) { /* max row norm */
938       PetscReal ntemp;
939       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
940       ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
941     } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
942   }
943   PetscFunctionReturn(0);
944 }
945 
946 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
947 {
948   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
949   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
950   Mat            B;
951   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
952   PetscErrorCode ierr;
953   PetscInt       j,i;
954   PetscScalar    *v;
955 
956   PetscFunctionBegin;
957   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
958     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
959     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
960     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
961     ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr);
962   } else {
963     B = *matout;
964   }
965 
966   m    = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v;
967   ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr);
968   for (i=0; i<m; i++) rwork[i] = rstart + i;
969   for (j=0; j<n; j++) {
970     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
971     v   += m;
972   }
973   ierr = PetscFree(rwork);CHKERRQ(ierr);
974   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
975   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
976   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
977     *matout = B;
978   } else {
979     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
980   }
981   PetscFunctionReturn(0);
982 }
983 
984 
985 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
986 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
987 
988 PetscErrorCode MatSetUp_MPIDense(Mat A)
989 {
990   PetscErrorCode ierr;
991 
992   PetscFunctionBegin;
993   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
994   PetscFunctionReturn(0);
995 }
996 
997 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
998 {
999   PetscErrorCode ierr;
1000   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
1001 
1002   PetscFunctionBegin;
1003   ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr);
1004   ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1005   PetscFunctionReturn(0);
1006 }
1007 
1008 PetscErrorCode  MatConjugate_MPIDense(Mat mat)
1009 {
1010   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
1011   PetscErrorCode ierr;
1012 
1013   PetscFunctionBegin;
1014   ierr = MatConjugate(a->A);CHKERRQ(ierr);
1015   PetscFunctionReturn(0);
1016 }
1017 
1018 PetscErrorCode MatRealPart_MPIDense(Mat A)
1019 {
1020   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1021   PetscErrorCode ierr;
1022 
1023   PetscFunctionBegin;
1024   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1025   PetscFunctionReturn(0);
1026 }
1027 
1028 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1029 {
1030   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1031   PetscErrorCode ierr;
1032 
1033   PetscFunctionBegin;
1034   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1035   PetscFunctionReturn(0);
1036 }
1037 
1038 extern PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*);
1039 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms)
1040 {
1041   PetscErrorCode ierr;
1042   PetscInt       i,n;
1043   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
1044   PetscReal      *work;
1045 
1046   PetscFunctionBegin;
1047   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
1048   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
1049   ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr);
1050   if (type == NORM_2) {
1051     for (i=0; i<n; i++) work[i] *= work[i];
1052   }
1053   if (type == NORM_INFINITY) {
1054     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
1055   } else {
1056     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
1057   }
1058   ierr = PetscFree(work);CHKERRQ(ierr);
1059   if (type == NORM_2) {
1060     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
1061   }
1062   PetscFunctionReturn(0);
1063 }
1064 
1065 static PetscErrorCode  MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1066 {
1067   Mat_MPIDense   *d = (Mat_MPIDense*)x->data;
1068   PetscErrorCode ierr;
1069   PetscScalar    *a;
1070   PetscInt       m,n,i;
1071 
1072   PetscFunctionBegin;
1073   ierr = MatGetSize(d->A,&m,&n);CHKERRQ(ierr);
1074   ierr = MatDenseGetArray(d->A,&a);CHKERRQ(ierr);
1075   for (i=0; i<m*n; i++) {
1076     ierr = PetscRandomGetValue(rctx,a+i);CHKERRQ(ierr);
1077   }
1078   ierr = MatDenseRestoreArray(d->A,&a);CHKERRQ(ierr);
1079   PetscFunctionReturn(0);
1080 }
1081 
1082 extern PetscErrorCode MatMatMultNumeric_MPIDense(Mat A,Mat,Mat);
1083 
1084 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool  *missing,PetscInt *d)
1085 {
1086   PetscFunctionBegin;
1087   *missing = PETSC_FALSE;
1088   PetscFunctionReturn(0);
1089 }
1090 
1091 /* -------------------------------------------------------------------*/
1092 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1093                                         MatGetRow_MPIDense,
1094                                         MatRestoreRow_MPIDense,
1095                                         MatMult_MPIDense,
1096                                 /*  4*/ MatMultAdd_MPIDense,
1097                                         MatMultTranspose_MPIDense,
1098                                         MatMultTransposeAdd_MPIDense,
1099                                         0,
1100                                         0,
1101                                         0,
1102                                 /* 10*/ 0,
1103                                         0,
1104                                         0,
1105                                         0,
1106                                         MatTranspose_MPIDense,
1107                                 /* 15*/ MatGetInfo_MPIDense,
1108                                         MatEqual_MPIDense,
1109                                         MatGetDiagonal_MPIDense,
1110                                         MatDiagonalScale_MPIDense,
1111                                         MatNorm_MPIDense,
1112                                 /* 20*/ MatAssemblyBegin_MPIDense,
1113                                         MatAssemblyEnd_MPIDense,
1114                                         MatSetOption_MPIDense,
1115                                         MatZeroEntries_MPIDense,
1116                                 /* 24*/ MatZeroRows_MPIDense,
1117                                         0,
1118                                         0,
1119                                         0,
1120                                         0,
1121                                 /* 29*/ MatSetUp_MPIDense,
1122                                         0,
1123                                         0,
1124                                         MatGetDiagonalBlock_MPIDense,
1125                                         0,
1126                                 /* 34*/ MatDuplicate_MPIDense,
1127                                         0,
1128                                         0,
1129                                         0,
1130                                         0,
1131                                 /* 39*/ MatAXPY_MPIDense,
1132                                         MatCreateSubMatrices_MPIDense,
1133                                         0,
1134                                         MatGetValues_MPIDense,
1135                                         0,
1136                                 /* 44*/ 0,
1137                                         MatScale_MPIDense,
1138                                         MatShift_Basic,
1139                                         0,
1140                                         0,
1141                                 /* 49*/ MatSetRandom_MPIDense,
1142                                         0,
1143                                         0,
1144                                         0,
1145                                         0,
1146                                 /* 54*/ 0,
1147                                         0,
1148                                         0,
1149                                         0,
1150                                         0,
1151                                 /* 59*/ MatCreateSubMatrix_MPIDense,
1152                                         MatDestroy_MPIDense,
1153                                         MatView_MPIDense,
1154                                         0,
1155                                         0,
1156                                 /* 64*/ 0,
1157                                         0,
1158                                         0,
1159                                         0,
1160                                         0,
1161                                 /* 69*/ 0,
1162                                         0,
1163                                         0,
1164                                         0,
1165                                         0,
1166                                 /* 74*/ 0,
1167                                         0,
1168                                         0,
1169                                         0,
1170                                         0,
1171                                 /* 79*/ 0,
1172                                         0,
1173                                         0,
1174                                         0,
1175                                 /* 83*/ MatLoad_MPIDense,
1176                                         0,
1177                                         0,
1178                                         0,
1179                                         0,
1180                                         0,
1181 #if defined(PETSC_HAVE_ELEMENTAL)
1182                                 /* 89*/ MatMatMult_MPIDense_MPIDense,
1183                                         MatMatMultSymbolic_MPIDense_MPIDense,
1184 #else
1185                                 /* 89*/ 0,
1186                                         0,
1187 #endif
1188                                         MatMatMultNumeric_MPIDense,
1189                                         0,
1190                                         0,
1191                                 /* 94*/ 0,
1192                                         0,
1193                                         0,
1194                                         0,
1195                                         0,
1196                                 /* 99*/ 0,
1197                                         0,
1198                                         0,
1199                                         MatConjugate_MPIDense,
1200                                         0,
1201                                 /*104*/ 0,
1202                                         MatRealPart_MPIDense,
1203                                         MatImaginaryPart_MPIDense,
1204                                         0,
1205                                         0,
1206                                 /*109*/ 0,
1207                                         0,
1208                                         0,
1209                                         0,
1210                                         MatMissingDiagonal_MPIDense,
1211                                 /*114*/ 0,
1212                                         0,
1213                                         0,
1214                                         0,
1215                                         0,
1216                                 /*119*/ 0,
1217                                         0,
1218                                         0,
1219                                         0,
1220                                         0,
1221                                 /*124*/ 0,
1222                                         MatGetColumnNorms_MPIDense,
1223                                         0,
1224                                         0,
1225                                         0,
1226                                 /*129*/ 0,
1227                                         MatTransposeMatMult_MPIDense_MPIDense,
1228                                         MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1229                                         MatTransposeMatMultNumeric_MPIDense_MPIDense,
1230                                         0,
1231                                 /*134*/ 0,
1232                                         0,
1233                                         0,
1234                                         0,
1235                                         0,
1236                                 /*139*/ 0,
1237                                         0,
1238                                         0,
1239                                         0,
1240                                         0,
1241                                 /*144*/ MatCreateMPIMatConcatenateSeqMat_MPIDense
1242 };
1243 
1244 PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1245 {
1246   Mat_MPIDense   *a;
1247   PetscErrorCode ierr;
1248 
1249   PetscFunctionBegin;
1250   mat->preallocated = PETSC_TRUE;
1251   /* Note:  For now, when data is specified above, this assumes the user correctly
1252    allocates the local dense storage space.  We should add error checking. */
1253 
1254   a       = (Mat_MPIDense*)mat->data;
1255   ierr    = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
1256   ierr    = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
1257   a->nvec = mat->cmap->n;
1258 
1259   ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1260   ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr);
1261   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
1262   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1263   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1264   PetscFunctionReturn(0);
1265 }
1266 
1267 #if defined(PETSC_HAVE_ELEMENTAL)
1268 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1269 {
1270   Mat            mat_elemental;
1271   PetscErrorCode ierr;
1272   PetscScalar    *v;
1273   PetscInt       m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols;
1274 
1275   PetscFunctionBegin;
1276   if (reuse == MAT_REUSE_MATRIX) {
1277     mat_elemental = *newmat;
1278     ierr = MatZeroEntries(*newmat);CHKERRQ(ierr);
1279   } else {
1280     ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr);
1281     ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1282     ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr);
1283     ierr = MatSetUp(mat_elemental);CHKERRQ(ierr);
1284     ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
1285   }
1286 
1287   ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr);
1288   for (i=0; i<N; i++) cols[i] = i;
1289   for (i=0; i<m; i++) rows[i] = rstart + i;
1290 
1291   /* PETSc-Elemental interaface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1292   ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr);
1293   ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr);
1294   ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1295   ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1296   ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr);
1297   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1298 
1299   if (reuse == MAT_INPLACE_MATRIX) {
1300     ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr);
1301   } else {
1302     *newmat = mat_elemental;
1303   }
1304   PetscFunctionReturn(0);
1305 }
1306 #endif
1307 
1308 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals)
1309 {
1310   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1311   PetscErrorCode ierr;
1312 
1313   PetscFunctionBegin;
1314   ierr = MatDenseGetColumn(mat->A,col,vals);CHKERRQ(ierr);
1315   PetscFunctionReturn(0);
1316 }
1317 
1318 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals)
1319 {
1320   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1321   PetscErrorCode ierr;
1322 
1323   PetscFunctionBegin;
1324   ierr = MatDenseRestoreColumn(mat->A,vals);CHKERRQ(ierr);
1325   PetscFunctionReturn(0);
1326 }
1327 
1328 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
1329 {
1330   PetscErrorCode ierr;
1331   Mat_MPIDense   *mat;
1332   PetscInt       m,nloc,N;
1333 
1334   PetscFunctionBegin;
1335   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
1336   ierr = MatGetLocalSize(inmat,NULL,&nloc);CHKERRQ(ierr);
1337   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1338     PetscInt sum;
1339 
1340     if (n == PETSC_DECIDE) {
1341       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
1342     }
1343     /* Check sum(n) = N */
1344     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
1345     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
1346 
1347     ierr = MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat);CHKERRQ(ierr);
1348   }
1349 
1350   /* numeric phase */
1351   mat = (Mat_MPIDense*)(*outmat)->data;
1352   ierr = MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1353   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1354   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1355   PetscFunctionReturn(0);
1356 }
1357 
1358 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1359 {
1360   Mat_MPIDense   *a;
1361   PetscErrorCode ierr;
1362 
1363   PetscFunctionBegin;
1364   ierr      = PetscNewLog(mat,&a);CHKERRQ(ierr);
1365   mat->data = (void*)a;
1366   ierr      = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1367 
1368   mat->insertmode = NOT_SET_VALUES;
1369   ierr            = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);CHKERRQ(ierr);
1370   ierr            = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);CHKERRQ(ierr);
1371 
1372   /* build cache for off array entries formed */
1373   a->donotstash = PETSC_FALSE;
1374 
1375   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr);
1376 
1377   /* stuff used for matrix vector multiply */
1378   a->lvec        = 0;
1379   a->Mvctx       = 0;
1380   a->roworiented = PETSC_TRUE;
1381 
1382   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr);
1383   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr);
1384   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense);CHKERRQ(ierr);
1385   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense);CHKERRQ(ierr);
1386   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);CHKERRQ(ierr);
1387   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);CHKERRQ(ierr);
1388 #if defined(PETSC_HAVE_ELEMENTAL)
1389   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr);
1390 #endif
1391   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1392   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1393   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1394   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1395 
1396   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",MatTransposeMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1397   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",MatTransposeMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1398   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",MatTransposeMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1399   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense);CHKERRQ(ierr);
1400   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense);CHKERRQ(ierr);
1401   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1402   PetscFunctionReturn(0);
1403 }
1404 
1405 /*MC
1406    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1407 
1408    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1409    and MATMPIDENSE otherwise.
1410 
1411    Options Database Keys:
1412 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1413 
1414   Level: beginner
1415 
1416 
1417 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1418 M*/
1419 
1420 /*@C
1421    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1422 
1423    Not collective
1424 
1425    Input Parameters:
1426 .  B - the matrix
1427 -  data - optional location of matrix data.  Set data=NULL for PETSc
1428    to control all matrix memory allocation.
1429 
1430    Notes:
1431    The dense format is fully compatible with standard Fortran 77
1432    storage by columns.
1433 
1434    The data input variable is intended primarily for Fortran programmers
1435    who wish to allocate their own matrix memory space.  Most users should
1436    set data=NULL.
1437 
1438    Level: intermediate
1439 
1440 .keywords: matrix,dense, parallel
1441 
1442 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1443 @*/
1444 PetscErrorCode  MatMPIDenseSetPreallocation(Mat B,PetscScalar *data)
1445 {
1446   PetscErrorCode ierr;
1447 
1448   PetscFunctionBegin;
1449   ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr);
1450   PetscFunctionReturn(0);
1451 }
1452 
1453 /*@
1454    MatDensePlaceArray - Allows one to replace the array in a dense array with an
1455    array provided by the user. This is useful to avoid copying an array
1456    into a matrix
1457 
1458    Not Collective
1459 
1460    Input Parameters:
1461 +  mat - the matrix
1462 -  array - the array in column major order
1463 
1464    Notes:
1465    You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be
1466    freed when the matrix is destroyed.
1467 
1468    Level: developer
1469 
1470 .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
1471 
1472 @*/
1473 PetscErrorCode  MatDensePlaceArray(Mat mat,const PetscScalar array[])
1474 {
1475   PetscErrorCode ierr;
1476   PetscFunctionBegin;
1477   ierr = PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
1478   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
1479   PetscFunctionReturn(0);
1480 }
1481 
1482 /*@
1483    MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray()
1484 
1485    Not Collective
1486 
1487    Input Parameters:
1488 .  mat - the matrix
1489 
1490    Notes:
1491    You can only call this after a call to MatDensePlaceArray()
1492 
1493    Level: developer
1494 
1495 .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
1496 
1497 @*/
1498 PetscErrorCode  MatDenseResetArray(Mat mat)
1499 {
1500   PetscErrorCode ierr;
1501   PetscFunctionBegin;
1502   ierr = PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));CHKERRQ(ierr);
1503   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
1504   PetscFunctionReturn(0);
1505 }
1506 
1507 /*@C
1508    MatCreateDense - Creates a parallel matrix in dense format.
1509 
1510    Collective on MPI_Comm
1511 
1512    Input Parameters:
1513 +  comm - MPI communicator
1514 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1515 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1516 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1517 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1518 -  data - optional location of matrix data.  Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1519    to control all matrix memory allocation.
1520 
1521    Output Parameter:
1522 .  A - the matrix
1523 
1524    Notes:
1525    The dense format is fully compatible with standard Fortran 77
1526    storage by columns.
1527 
1528    The data input variable is intended primarily for Fortran programmers
1529    who wish to allocate their own matrix memory space.  Most users should
1530    set data=NULL (PETSC_NULL_SCALAR for Fortran users).
1531 
1532    The user MUST specify either the local or global matrix dimensions
1533    (possibly both).
1534 
1535    Level: intermediate
1536 
1537 .keywords: matrix,dense, parallel
1538 
1539 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1540 @*/
1541 PetscErrorCode  MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1542 {
1543   PetscErrorCode ierr;
1544   PetscMPIInt    size;
1545 
1546   PetscFunctionBegin;
1547   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1548   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1549   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1550   if (size > 1) {
1551     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1552     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1553     if (data) {  /* user provided data array, so no need to assemble */
1554       ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr);
1555       (*A)->assembled = PETSC_TRUE;
1556     }
1557   } else {
1558     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1559     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1560   }
1561   PetscFunctionReturn(0);
1562 }
1563 
1564 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1565 {
1566   Mat            mat;
1567   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
1568   PetscErrorCode ierr;
1569 
1570   PetscFunctionBegin;
1571   *newmat = 0;
1572   ierr    = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr);
1573   ierr    = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1574   ierr    = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1575   a       = (Mat_MPIDense*)mat->data;
1576   ierr    = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1577 
1578   mat->factortype   = A->factortype;
1579   mat->assembled    = PETSC_TRUE;
1580   mat->preallocated = PETSC_TRUE;
1581 
1582   a->size         = oldmat->size;
1583   a->rank         = oldmat->rank;
1584   mat->insertmode = NOT_SET_VALUES;
1585   a->nvec         = oldmat->nvec;
1586   a->donotstash   = oldmat->donotstash;
1587 
1588   ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr);
1589   ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr);
1590 
1591   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1592   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1593   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1594 
1595   *newmat = mat;
1596   PetscFunctionReturn(0);
1597 }
1598 
1599 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat)
1600 {
1601   PetscErrorCode ierr;
1602   PetscMPIInt    rank,size;
1603   const PetscInt *rowners;
1604   PetscInt       i,m,n,nz,j,mMax;
1605   PetscScalar    *array,*vals,*vals_ptr;
1606   Mat_MPIDense   *a = (Mat_MPIDense*)newmat->data;
1607 
1608   PetscFunctionBegin;
1609   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1610   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1611 
1612   /* determine ownership of rows and columns */
1613   m = (newmat->rmap->n < 0) ? PETSC_DECIDE : newmat->rmap->n;
1614   n = (newmat->cmap->n < 0) ? PETSC_DECIDE : newmat->cmap->n;
1615 
1616   ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr);
1617   if (!a->A || !((Mat_SeqDense*)(a->A->data))->user_alloc) {
1618     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
1619   }
1620   ierr = MatDenseGetArray(newmat,&array);CHKERRQ(ierr);
1621   ierr = MatGetLocalSize(newmat,&m,NULL);CHKERRQ(ierr);
1622   ierr = MatGetOwnershipRanges(newmat,&rowners);CHKERRQ(ierr);
1623   ierr = MPI_Reduce(&m,&mMax,1,MPIU_INT,MPI_MAX,0,comm);CHKERRQ(ierr);
1624   if (!rank) {
1625     ierr = PetscMalloc1(mMax*N,&vals);CHKERRQ(ierr);
1626 
1627     /* read in my part of the matrix numerical values  */
1628     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1629 
1630     /* insert into matrix-by row (this is why cannot directly read into array */
1631     vals_ptr = vals;
1632     for (i=0; i<m; i++) {
1633       for (j=0; j<N; j++) {
1634         array[i + j*m] = *vals_ptr++;
1635       }
1636     }
1637 
1638     /* read in other processors and ship out */
1639     for (i=1; i<size; i++) {
1640       nz   = (rowners[i+1] - rowners[i])*N;
1641       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1642       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr);
1643     }
1644   } else {
1645     /* receive numeric values */
1646     ierr = PetscMalloc1(m*N,&vals);CHKERRQ(ierr);
1647 
1648     /* receive message of values*/
1649     ierr = MPIULong_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr);
1650 
1651     /* insert into matrix-by row (this is why cannot directly read into array */
1652     vals_ptr = vals;
1653     for (i=0; i<m; i++) {
1654       for (j=0; j<N; j++) {
1655         array[i + j*m] = *vals_ptr++;
1656       }
1657     }
1658   }
1659   ierr = MatDenseRestoreArray(newmat,&array);CHKERRQ(ierr);
1660   ierr = PetscFree(vals);CHKERRQ(ierr);
1661   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1662   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1663   PetscFunctionReturn(0);
1664 }
1665 
1666 PetscErrorCode MatLoad_MPIDense(Mat newmat,PetscViewer viewer)
1667 {
1668   Mat_MPIDense   *a;
1669   PetscScalar    *vals,*svals;
1670   MPI_Comm       comm;
1671   MPI_Status     status;
1672   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,n,maxnz;
1673   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1674   PetscInt       *ourlens,*procsnz = 0,jj,*mycols,*smycols;
1675   PetscInt       i,nz,j,rstart,rend;
1676   int            fd;
1677   PetscBool      isbinary;
1678   PetscErrorCode ierr;
1679 
1680   PetscFunctionBegin;
1681   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1682   if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)newmat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newmat)->type_name);
1683 
1684   /* force binary viewer to load .info file if it has not yet done so */
1685   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1686   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
1687   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1688   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1689   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1690   if (!rank) {
1691     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
1692     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1693   }
1694   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1695   M    = header[1]; N = header[2]; nz = header[3];
1696 
1697   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
1698   if (newmat->rmap->N < 0) newmat->rmap->N = M;
1699   if (newmat->cmap->N < 0) newmat->cmap->N = N;
1700 
1701   if (newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",M,newmat->rmap->N);
1702   if (newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",N,newmat->cmap->N);
1703 
1704   /*
1705        Handle case where matrix is stored on disk as a dense matrix
1706   */
1707   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1708     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr);
1709     PetscFunctionReturn(0);
1710   }
1711 
1712   /* determine ownership of all rows */
1713   if (newmat->rmap->n < 0) {
1714     ierr = PetscMPIIntCast(M/size + ((M % size) > rank),&m);CHKERRQ(ierr);
1715   } else {
1716     ierr = PetscMPIIntCast(newmat->rmap->n,&m);CHKERRQ(ierr);
1717   }
1718   if (newmat->cmap->n < 0) {
1719     n = PETSC_DECIDE;
1720   } else {
1721     ierr = PetscMPIIntCast(newmat->cmap->n,&n);CHKERRQ(ierr);
1722   }
1723 
1724   ierr       = PetscMalloc1(size+2,&rowners);CHKERRQ(ierr);
1725   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1726   rowners[0] = 0;
1727   for (i=2; i<=size; i++) {
1728     rowners[i] += rowners[i-1];
1729   }
1730   rstart = rowners[rank];
1731   rend   = rowners[rank+1];
1732 
1733   /* distribute row lengths to all processors */
1734   ierr = PetscMalloc1(rend-rstart,&ourlens);CHKERRQ(ierr);
1735   if (!rank) {
1736     ierr = PetscMalloc1(M,&rowlengths);CHKERRQ(ierr);
1737     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1738     ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr);
1739     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1740     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1741     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1742   } else {
1743     ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1744   }
1745 
1746   if (!rank) {
1747     /* calculate the number of nonzeros on each processor */
1748     ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr);
1749     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1750     for (i=0; i<size; i++) {
1751       for (j=rowners[i]; j< rowners[i+1]; j++) {
1752         procsnz[i] += rowlengths[j];
1753       }
1754     }
1755     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1756 
1757     /* determine max buffer needed and allocate it */
1758     maxnz = 0;
1759     for (i=0; i<size; i++) {
1760       maxnz = PetscMax(maxnz,procsnz[i]);
1761     }
1762     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
1763 
1764     /* read in my part of the matrix column indices  */
1765     nz   = procsnz[0];
1766     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
1767     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1768 
1769     /* read in every one elses and ship off */
1770     for (i=1; i<size; i++) {
1771       nz   = procsnz[i];
1772       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1773       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1774     }
1775     ierr = PetscFree(cols);CHKERRQ(ierr);
1776   } else {
1777     /* determine buffer space needed for message */
1778     nz = 0;
1779     for (i=0; i<m; i++) {
1780       nz += ourlens[i];
1781     }
1782     ierr = PetscMalloc1(nz+1,&mycols);CHKERRQ(ierr);
1783 
1784     /* receive message of column indices*/
1785     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1786     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1787     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1788   }
1789 
1790   ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr);
1791   a = (Mat_MPIDense*)newmat->data;
1792   if (!a->A || !((Mat_SeqDense*)(a->A->data))->user_alloc) {
1793     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
1794   }
1795 
1796   if (!rank) {
1797     ierr = PetscMalloc1(maxnz,&vals);CHKERRQ(ierr);
1798 
1799     /* read in my part of the matrix numerical values  */
1800     nz   = procsnz[0];
1801     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1802 
1803     /* insert into matrix */
1804     jj      = rstart;
1805     smycols = mycols;
1806     svals   = vals;
1807     for (i=0; i<m; i++) {
1808       ierr     = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1809       smycols += ourlens[i];
1810       svals   += ourlens[i];
1811       jj++;
1812     }
1813 
1814     /* read in other processors and ship out */
1815     for (i=1; i<size; i++) {
1816       nz   = procsnz[i];
1817       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1818       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
1819     }
1820     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1821   } else {
1822     /* receive numeric values */
1823     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
1824 
1825     /* receive message of values*/
1826     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
1827     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1828     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1829 
1830     /* insert into matrix */
1831     jj      = rstart;
1832     smycols = mycols;
1833     svals   = vals;
1834     for (i=0; i<m; i++) {
1835       ierr     = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1836       smycols += ourlens[i];
1837       svals   += ourlens[i];
1838       jj++;
1839     }
1840   }
1841   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1842   ierr = PetscFree(vals);CHKERRQ(ierr);
1843   ierr = PetscFree(mycols);CHKERRQ(ierr);
1844   ierr = PetscFree(rowners);CHKERRQ(ierr);
1845 
1846   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1847   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1848   PetscFunctionReturn(0);
1849 }
1850 
1851 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool  *flag)
1852 {
1853   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
1854   Mat            a,b;
1855   PetscBool      flg;
1856   PetscErrorCode ierr;
1857 
1858   PetscFunctionBegin;
1859   a    = matA->A;
1860   b    = matB->A;
1861   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
1862   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1863   PetscFunctionReturn(0);
1864 }
1865 
1866 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(Mat A)
1867 {
1868   PetscErrorCode        ierr;
1869   Mat_MPIDense          *a = (Mat_MPIDense*)A->data;
1870   Mat_TransMatMultDense *atb = a->atbdense;
1871 
1872   PetscFunctionBegin;
1873   ierr = PetscFree3(atb->sendbuf,atb->atbarray,atb->recvcounts);CHKERRQ(ierr);
1874   ierr = (atb->destroy)(A);CHKERRQ(ierr);
1875   ierr = PetscFree(atb);CHKERRQ(ierr);
1876   PetscFunctionReturn(0);
1877 }
1878 
1879 PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1880 {
1881   Mat_MPIDense   *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
1882   Mat_SeqDense   *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data;
1883   Mat_TransMatMultDense *atb = c->atbdense;
1884   PetscErrorCode ierr;
1885   MPI_Comm       comm;
1886   PetscMPIInt    rank,size,*recvcounts=atb->recvcounts;
1887   PetscScalar    *carray,*atbarray=atb->atbarray,*sendbuf=atb->sendbuf;
1888   PetscInt       i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j;
1889   PetscScalar    _DOne=1.0,_DZero=0.0;
1890   PetscBLASInt   am,an,bn,aN;
1891   const PetscInt *ranges;
1892 
1893   PetscFunctionBegin;
1894   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1895   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1896   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1897 
1898   /* compute atbarray = aseq^T * bseq */
1899   ierr = PetscBLASIntCast(a->A->cmap->n,&an);CHKERRQ(ierr);
1900   ierr = PetscBLASIntCast(b->A->cmap->n,&bn);CHKERRQ(ierr);
1901   ierr = PetscBLASIntCast(a->A->rmap->n,&am);CHKERRQ(ierr);
1902   ierr = PetscBLASIntCast(A->cmap->N,&aN);CHKERRQ(ierr);
1903   PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&an,&bn,&am,&_DOne,aseq->v,&aseq->lda,bseq->v,&bseq->lda,&_DZero,atbarray,&aN));
1904 
1905   ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr);
1906   for (i=0; i<size; i++) recvcounts[i] = (ranges[i+1] - ranges[i])*cN;
1907 
1908   /* arrange atbarray into sendbuf */
1909   k = 0;
1910   for (proc=0; proc<size; proc++) {
1911     for (j=0; j<cN; j++) {
1912       for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM];
1913     }
1914   }
1915   /* sum all atbarray to local values of C */
1916   ierr = MatDenseGetArray(c->A,&carray);CHKERRQ(ierr);
1917   ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRQ(ierr);
1918   ierr = MatDenseRestoreArray(c->A,&carray);CHKERRQ(ierr);
1919   PetscFunctionReturn(0);
1920 }
1921 
1922 PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1923 {
1924   PetscErrorCode        ierr;
1925   Mat                   Cdense;
1926   MPI_Comm              comm;
1927   PetscMPIInt           size;
1928   PetscInt              cm=A->cmap->n,cM,cN=B->cmap->N;
1929   Mat_MPIDense          *c;
1930   Mat_TransMatMultDense *atb;
1931 
1932   PetscFunctionBegin;
1933   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1934   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
1935     SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
1936   }
1937 
1938   /* create matrix product Cdense */
1939   ierr = MatCreate(comm,&Cdense);CHKERRQ(ierr);
1940   ierr = MatSetSizes(Cdense,cm,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
1941   ierr = MatSetType(Cdense,MATMPIDENSE);CHKERRQ(ierr);
1942   ierr = MatMPIDenseSetPreallocation(Cdense,NULL);CHKERRQ(ierr);
1943   ierr = MatAssemblyBegin(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1944   ierr = MatAssemblyEnd(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1945   *C   = Cdense;
1946 
1947   /* create data structure for reuse Cdense */
1948   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1949   ierr = PetscNew(&atb);CHKERRQ(ierr);
1950   cM = Cdense->rmap->N;
1951   ierr = PetscMalloc3(cM*cN,&atb->sendbuf,cM*cN,&atb->atbarray,size,&atb->recvcounts);CHKERRQ(ierr);
1952 
1953   c                    = (Mat_MPIDense*)Cdense->data;
1954   c->atbdense          = atb;
1955   atb->destroy         = Cdense->ops->destroy;
1956   Cdense->ops->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
1957   PetscFunctionReturn(0);
1958 }
1959 
1960 PetscErrorCode MatTransposeMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
1961 {
1962   PetscErrorCode ierr;
1963 
1964   PetscFunctionBegin;
1965   if (scall == MAT_INITIAL_MATRIX) {
1966     ierr = MatTransposeMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr);
1967   }
1968   ierr = MatTransposeMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr);
1969   PetscFunctionReturn(0);
1970 }
1971 
1972 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(Mat A)
1973 {
1974   PetscErrorCode   ierr;
1975   Mat_MPIDense     *a = (Mat_MPIDense*)A->data;
1976   Mat_MatMultDense *ab = a->abdense;
1977 
1978   PetscFunctionBegin;
1979   ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr);
1980   ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr);
1981   ierr = MatDestroy(&ab->Be);CHKERRQ(ierr);
1982 
1983   ierr = (ab->destroy)(A);CHKERRQ(ierr);
1984   ierr = PetscFree(ab);CHKERRQ(ierr);
1985   PetscFunctionReturn(0);
1986 }
1987 
1988 #if defined(PETSC_HAVE_ELEMENTAL)
1989 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1990 {
1991   PetscErrorCode   ierr;
1992   Mat_MPIDense     *c=(Mat_MPIDense*)C->data;
1993   Mat_MatMultDense *ab=c->abdense;
1994 
1995   PetscFunctionBegin;
1996   ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr);
1997   ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr);
1998   ierr = MatMatMultNumeric(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr);
1999   ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
2000   PetscFunctionReturn(0);
2001 }
2002 
2003 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
2004 {
2005   PetscErrorCode   ierr;
2006   Mat              Ae,Be,Ce;
2007   Mat_MPIDense     *c;
2008   Mat_MatMultDense *ab;
2009 
2010   PetscFunctionBegin;
2011   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
2012     SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2013   }
2014 
2015   /* convert A and B to Elemental matrices Ae and Be */
2016   ierr = MatConvert(A,MATELEMENTAL,MAT_INITIAL_MATRIX, &Ae);CHKERRQ(ierr);
2017   ierr = MatConvert(B,MATELEMENTAL,MAT_INITIAL_MATRIX, &Be);CHKERRQ(ierr);
2018 
2019   /* Ce = Ae*Be */
2020   ierr = MatMatMultSymbolic(Ae,Be,fill,&Ce);CHKERRQ(ierr);
2021   ierr = MatMatMultNumeric(Ae,Be,Ce);CHKERRQ(ierr);
2022 
2023   /* convert Ce to C */
2024   ierr = MatConvert(Ce,MATMPIDENSE,MAT_INITIAL_MATRIX,C);CHKERRQ(ierr);
2025 
2026   /* create data structure for reuse Cdense */
2027   ierr = PetscNew(&ab);CHKERRQ(ierr);
2028   c                  = (Mat_MPIDense*)(*C)->data;
2029   c->abdense         = ab;
2030 
2031   ab->Ae             = Ae;
2032   ab->Be             = Be;
2033   ab->Ce             = Ce;
2034   ab->destroy        = (*C)->ops->destroy;
2035   (*C)->ops->destroy        = MatDestroy_MatMatMult_MPIDense_MPIDense;
2036   (*C)->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
2037   PetscFunctionReturn(0);
2038 }
2039 
2040 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
2041 {
2042   PetscErrorCode ierr;
2043 
2044   PetscFunctionBegin;
2045   if (scall == MAT_INITIAL_MATRIX) { /* simbolic product includes numeric product */
2046     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
2047     ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr);
2048     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
2049   } else {
2050     ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
2051     ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr);
2052     ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
2053   }
2054   PetscFunctionReturn(0);
2055 }
2056 #endif
2057 
2058