xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 65f8aed5f7eaa1e2ef2ddeffe666264e0669c876)
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4 */
5 
6 
7 #include <../src/mat/impls/dense/mpi/mpidense.h>    /*I   "petscmat.h"  I*/
8 #include <../src/mat/impls/aij/mpi/mpiaij.h>
9 #include <petscblaslapack.h>
10 
11 /*@
12 
13       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
14               matrix that represents the operator. For sequential matrices it returns itself.
15 
16     Input Parameter:
17 .      A - the Seq or MPI dense matrix
18 
19     Output Parameter:
20 .      B - the inner matrix
21 
22     Level: intermediate
23 
24 @*/
25 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
26 {
27   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
28   PetscErrorCode ierr;
29   PetscBool      flg;
30 
31   PetscFunctionBegin;
32   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
33   if (flg) *B = mat->A;
34   else *B = A;
35   PetscFunctionReturn(0);
36 }
37 
38 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
39 {
40   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
41   PetscErrorCode ierr;
42   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
43 
44   PetscFunctionBegin;
45   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
46   lrow = row - rstart;
47   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
48   PetscFunctionReturn(0);
49 }
50 
51 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
52 {
53   PetscErrorCode ierr;
54 
55   PetscFunctionBegin;
56   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
57   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
58   PetscFunctionReturn(0);
59 }
60 
61 PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
62 {
63   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
64   PetscErrorCode ierr;
65   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
66   PetscScalar    *array;
67   MPI_Comm       comm;
68   Mat            B;
69 
70   PetscFunctionBegin;
71   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
72 
73   ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr);
74   if (!B) {
75     ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
76     ierr = MatCreate(comm,&B);CHKERRQ(ierr);
77     ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr);
78     ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
79     ierr = MatDenseGetArray(mdn->A,&array);CHKERRQ(ierr);
80     ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr);
81     ierr = MatDenseRestoreArray(mdn->A,&array);CHKERRQ(ierr);
82     ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
83     ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
84     ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr);
85     *a   = B;
86     ierr = MatDestroy(&B);CHKERRQ(ierr);
87   } else *a = B;
88   PetscFunctionReturn(0);
89 }
90 
91 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
92 {
93   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
94   PetscErrorCode ierr;
95   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
96   PetscBool      roworiented = A->roworiented;
97 
98   PetscFunctionBegin;
99   for (i=0; i<m; i++) {
100     if (idxm[i] < 0) continue;
101     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
102     if (idxm[i] >= rstart && idxm[i] < rend) {
103       row = idxm[i] - rstart;
104       if (roworiented) {
105         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
106       } else {
107         for (j=0; j<n; j++) {
108           if (idxn[j] < 0) continue;
109           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
110           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
111         }
112       }
113     } else if (!A->donotstash) {
114       mat->assembled = PETSC_FALSE;
115       if (roworiented) {
116         ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
117       } else {
118         ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
119       }
120     }
121   }
122   PetscFunctionReturn(0);
123 }
124 
125 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
126 {
127   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
128   PetscErrorCode ierr;
129   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
130 
131   PetscFunctionBegin;
132   for (i=0; i<m; i++) {
133     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
134     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
135     if (idxm[i] >= rstart && idxm[i] < rend) {
136       row = idxm[i] - rstart;
137       for (j=0; j<n; j++) {
138         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
139         if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
140         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
141       }
142     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
143   }
144   PetscFunctionReturn(0);
145 }
146 
147 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[])
148 {
149   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
150   PetscErrorCode ierr;
151 
152   PetscFunctionBegin;
153   ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr);
154   PetscFunctionReturn(0);
155 }
156 
157 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar *array[])
158 {
159   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
160   PetscErrorCode ierr;
161 
162   PetscFunctionBegin;
163   ierr = MatDenseGetArrayRead(a->A,array);CHKERRQ(ierr);
164   PetscFunctionReturn(0);
165 }
166 
167 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar array[])
168 {
169   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
170   PetscErrorCode ierr;
171 
172   PetscFunctionBegin;
173   ierr = MatDensePlaceArray(a->A,array);CHKERRQ(ierr);
174   PetscFunctionReturn(0);
175 }
176 
177 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
178 {
179   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
180   PetscErrorCode ierr;
181 
182   PetscFunctionBegin;
183   ierr = MatDenseResetArray(a->A);CHKERRQ(ierr);
184   PetscFunctionReturn(0);
185 }
186 
187 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
188 {
189   Mat_MPIDense   *mat  = (Mat_MPIDense*)A->data,*newmatd;
190   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
191   PetscErrorCode ierr;
192   PetscInt       i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
193   const PetscInt *irow,*icol;
194   PetscScalar    *av,*bv,*v = lmat->v;
195   Mat            newmat;
196   IS             iscol_local;
197   MPI_Comm       comm_is,comm_mat;
198 
199   PetscFunctionBegin;
200   ierr = PetscObjectGetComm((PetscObject)A,&comm_mat);CHKERRQ(ierr);
201   ierr = PetscObjectGetComm((PetscObject)iscol,&comm_is);CHKERRQ(ierr);
202   if (comm_mat != comm_is) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator");
203 
204   ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
205   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
206   ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr);
207   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
208   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
209   ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */
210 
211   /* No parallel redistribution currently supported! Should really check each index set
212      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
213      original matrix! */
214 
215   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
216   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
217 
218   /* Check submatrix call */
219   if (scall == MAT_REUSE_MATRIX) {
220     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
221     /* Really need to test rows and column sizes! */
222     newmat = *B;
223   } else {
224     /* Create and fill new matrix */
225     ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr);
226     ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
227     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
228     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
229   }
230 
231   /* Now extract the data pointers and do the copy, column at a time */
232   newmatd = (Mat_MPIDense*)newmat->data;
233   bv      = ((Mat_SeqDense*)newmatd->A->data)->v;
234 
235   for (i=0; i<Ncols; i++) {
236     av = v + ((Mat_SeqDense*)mat->A->data)->lda*icol[i];
237     for (j=0; j<nrows; j++) {
238       *bv++ = av[irow[j] - rstart];
239     }
240   }
241 
242   /* Assemble the matrices so that the correct flags are set */
243   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
244   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
245 
246   /* Free work space */
247   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
248   ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr);
249   ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
250   *B   = newmat;
251   PetscFunctionReturn(0);
252 }
253 
254 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[])
255 {
256   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
257   PetscErrorCode ierr;
258 
259   PetscFunctionBegin;
260   ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr);
261   PetscFunctionReturn(0);
262 }
263 
264 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar *array[])
265 {
266   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
267   PetscErrorCode ierr;
268 
269   PetscFunctionBegin;
270   ierr = MatDenseRestoreArrayRead(a->A,array);CHKERRQ(ierr);
271   PetscFunctionReturn(0);
272 }
273 
274 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
275 {
276   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
277   MPI_Comm       comm;
278   PetscErrorCode ierr;
279   PetscInt       nstash,reallocs;
280   InsertMode     addv;
281 
282   PetscFunctionBegin;
283   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
284   /* make sure all processors are either in INSERTMODE or ADDMODE */
285   ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,comm);CHKERRQ(ierr);
286   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
287   mat->insertmode = addv; /* in case this processor had no cache */
288 
289   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
290   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
291   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
292   PetscFunctionReturn(0);
293 }
294 
295 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
296 {
297   Mat_MPIDense   *mdn=(Mat_MPIDense*)mat->data;
298   PetscErrorCode ierr;
299   PetscInt       i,*row,*col,flg,j,rstart,ncols;
300   PetscMPIInt    n;
301   PetscScalar    *val;
302 
303   PetscFunctionBegin;
304   /*  wait on receives */
305   while (1) {
306     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
307     if (!flg) break;
308 
309     for (i=0; i<n;) {
310       /* Now identify the consecutive vals belonging to the same row */
311       for (j=i,rstart=row[j]; j<n; j++) {
312         if (row[j] != rstart) break;
313       }
314       if (j < n) ncols = j-i;
315       else       ncols = n-i;
316       /* Now assemble all these values with a single function call */
317       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
318       i    = j;
319     }
320   }
321   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
322 
323   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
324   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
325 
326   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
327     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
328   }
329   PetscFunctionReturn(0);
330 }
331 
332 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
333 {
334   PetscErrorCode ierr;
335   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
336 
337   PetscFunctionBegin;
338   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
339   PetscFunctionReturn(0);
340 }
341 
342 /* the code does not do the diagonal entries correctly unless the
343    matrix is square and the column and row owerships are identical.
344    This is a BUG. The only way to fix it seems to be to access
345    mdn->A and mdn->B directly and not through the MatZeroRows()
346    routine.
347 */
348 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
349 {
350   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
351   PetscErrorCode    ierr;
352   PetscInt          i,*owners = A->rmap->range;
353   PetscInt          *sizes,j,idx,nsends;
354   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
355   PetscInt          *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source;
356   PetscInt          *lens,*lrows,*values;
357   PetscMPIInt       n,imdex,rank = l->rank,size = l->size;
358   MPI_Comm          comm;
359   MPI_Request       *send_waits,*recv_waits;
360   MPI_Status        recv_status,*send_status;
361   PetscBool         found;
362   const PetscScalar *xx;
363   PetscScalar       *bb;
364 
365   PetscFunctionBegin;
366   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
367   if (A->rmap->N != A->cmap->N) SETERRQ(comm,PETSC_ERR_SUP,"Only handles square matrices");
368   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership");
369   /*  first count number of contributors to each processor */
370   ierr = PetscCalloc1(2*size,&sizes);CHKERRQ(ierr);
371   ierr = PetscMalloc1(N+1,&owner);CHKERRQ(ierr);  /* see note*/
372   for (i=0; i<N; i++) {
373     idx   = rows[i];
374     found = PETSC_FALSE;
375     for (j=0; j<size; j++) {
376       if (idx >= owners[j] && idx < owners[j+1]) {
377         sizes[2*j]++; sizes[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
378       }
379     }
380     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
381   }
382   nsends = 0;
383   for (i=0; i<size; i++) nsends += sizes[2*i+1];
384 
385   /* inform other processors of number of messages and max length*/
386   ierr = PetscMaxSum(comm,sizes,&nmax,&nrecvs);CHKERRQ(ierr);
387 
388   /* post receives:   */
389   ierr = PetscMalloc1((nrecvs+1)*(nmax+1),&rvalues);CHKERRQ(ierr);
390   ierr = PetscMalloc1(nrecvs+1,&recv_waits);CHKERRQ(ierr);
391   for (i=0; i<nrecvs; i++) {
392     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
393   }
394 
395   /* do sends:
396       1) starts[i] gives the starting index in svalues for stuff going to
397          the ith processor
398   */
399   ierr = PetscMalloc1(N+1,&svalues);CHKERRQ(ierr);
400   ierr = PetscMalloc1(nsends+1,&send_waits);CHKERRQ(ierr);
401   ierr = PetscMalloc1(size+1,&starts);CHKERRQ(ierr);
402 
403   starts[0] = 0;
404   for (i=1; i<size; i++) starts[i] = starts[i-1] + sizes[2*i-2];
405   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
406 
407   starts[0] = 0;
408   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + sizes[2*i-2];
409   count = 0;
410   for (i=0; i<size; i++) {
411     if (sizes[2*i+1]) {
412       ierr = MPI_Isend(svalues+starts[i],sizes[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
413     }
414   }
415   ierr = PetscFree(starts);CHKERRQ(ierr);
416 
417   base = owners[rank];
418 
419   /*  wait on receives */
420   ierr  = PetscMalloc2(nrecvs,&lens,nrecvs,&source);CHKERRQ(ierr);
421   count = nrecvs;
422   slen  = 0;
423   while (count) {
424     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
425     /* unpack receives into our local space */
426     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
427 
428     source[imdex] = recv_status.MPI_SOURCE;
429     lens[imdex]   = n;
430     slen += n;
431     count--;
432   }
433   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
434 
435   /* move the data into the send scatter */
436   ierr  = PetscMalloc1(slen+1,&lrows);CHKERRQ(ierr);
437   count = 0;
438   for (i=0; i<nrecvs; i++) {
439     values = rvalues + i*nmax;
440     for (j=0; j<lens[i]; j++) {
441       lrows[count++] = values[j] - base;
442     }
443   }
444   ierr = PetscFree(rvalues);CHKERRQ(ierr);
445   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
446   ierr = PetscFree(owner);CHKERRQ(ierr);
447   ierr = PetscFree(sizes);CHKERRQ(ierr);
448 
449   /* fix right hand side if needed */
450   if (x && b) {
451     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
452     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
453     for (i=0; i<slen; i++) {
454       bb[lrows[i]] = diag*xx[lrows[i]];
455     }
456     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
457     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
458   }
459 
460   /* actually zap the local rows */
461   ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
462   if (diag != 0.0) {
463     Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data;
464     PetscInt     m   = ll->lda, i;
465 
466     for (i=0; i<slen; i++) {
467       ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag;
468     }
469   }
470   ierr = PetscFree(lrows);CHKERRQ(ierr);
471 
472   /* wait on sends */
473   if (nsends) {
474     ierr = PetscMalloc1(nsends,&send_status);CHKERRQ(ierr);
475     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
476     ierr = PetscFree(send_status);CHKERRQ(ierr);
477   }
478   ierr = PetscFree(send_waits);CHKERRQ(ierr);
479   ierr = PetscFree(svalues);CHKERRQ(ierr);
480   PetscFunctionReturn(0);
481 }
482 
483 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec);
484 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec);
485 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec);
486 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec);
487 
488 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
489 {
490   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
491   PetscErrorCode ierr;
492 
493   PetscFunctionBegin;
494   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
495   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
496   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
497   PetscFunctionReturn(0);
498 }
499 
500 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
501 {
502   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
503   PetscErrorCode ierr;
504 
505   PetscFunctionBegin;
506   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
507   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
508   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
509   PetscFunctionReturn(0);
510 }
511 
512 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
513 {
514   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
515   PetscErrorCode ierr;
516   PetscScalar    zero = 0.0;
517 
518   PetscFunctionBegin;
519   ierr = VecSet(yy,zero);CHKERRQ(ierr);
520   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
521   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
522   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
523   PetscFunctionReturn(0);
524 }
525 
526 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
527 {
528   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
529   PetscErrorCode ierr;
530 
531   PetscFunctionBegin;
532   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
533   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
534   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
535   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
536   PetscFunctionReturn(0);
537 }
538 
539 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
540 {
541   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
542   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
543   PetscErrorCode ierr;
544   PetscInt       len,i,n,m = A->rmap->n,radd;
545   PetscScalar    *x,zero = 0.0;
546 
547   PetscFunctionBegin;
548   ierr = VecSet(v,zero);CHKERRQ(ierr);
549   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
550   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
551   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
552   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
553   radd = A->rmap->rstart*m;
554   for (i=0; i<len; i++) {
555     x[i] = aloc->v[radd + i*m + i];
556   }
557   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
558   PetscFunctionReturn(0);
559 }
560 
561 PetscErrorCode MatDestroy_MPIDense(Mat mat)
562 {
563   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
564   PetscErrorCode ierr;
565 
566   PetscFunctionBegin;
567 #if defined(PETSC_USE_LOG)
568   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
569 #endif
570   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
571   ierr = MatDestroy(&mdn->A);CHKERRQ(ierr);
572   ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr);
573   ierr = VecScatterDestroy(&mdn->Mvctx);CHKERRQ(ierr);
574 
575   ierr = PetscFree(mat->data);CHKERRQ(ierr);
576   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
577 
578   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr);
579   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr);
580   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL);CHKERRQ(ierr);
581   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL);CHKERRQ(ierr);
582   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);CHKERRQ(ierr);
583   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);CHKERRQ(ierr);
584 #if defined(PETSC_HAVE_ELEMENTAL)
585   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr);
586 #endif
587   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr);
588   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
589   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
590   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
591   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
592   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
593   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
594   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL);CHKERRQ(ierr);
595   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL);CHKERRQ(ierr);
596   PetscFunctionReturn(0);
597 }
598 
599 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
600 {
601   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
602   PetscErrorCode    ierr;
603   PetscViewerFormat format;
604   int               fd;
605   PetscInt          header[4],mmax,N = mat->cmap->N,i,j,m,k;
606   PetscMPIInt       rank,tag  = ((PetscObject)viewer)->tag,size;
607   PetscScalar       *work,*v,*vv;
608   Mat_SeqDense      *a = (Mat_SeqDense*)mdn->A->data;
609 
610   PetscFunctionBegin;
611   if (mdn->size == 1) {
612     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
613   } else {
614     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
615     ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
616     ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
617 
618     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
619     if (format == PETSC_VIEWER_NATIVE) {
620 
621       if (!rank) {
622         /* store the matrix as a dense matrix */
623         header[0] = MAT_FILE_CLASSID;
624         header[1] = mat->rmap->N;
625         header[2] = N;
626         header[3] = MATRIX_BINARY_FORMAT_DENSE;
627         ierr      = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
628 
629         /* get largest work array needed for transposing array */
630         mmax = mat->rmap->n;
631         for (i=1; i<size; i++) {
632           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
633         }
634         ierr = PetscMalloc1(mmax*N,&work);CHKERRQ(ierr);
635 
636         /* write out local array, by rows */
637         m = mat->rmap->n;
638         v = a->v;
639         for (j=0; j<N; j++) {
640           for (i=0; i<m; i++) {
641             work[j + i*N] = *v++;
642           }
643         }
644         ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
645         /* get largest work array to receive messages from other processes, excludes process zero */
646         mmax = 0;
647         for (i=1; i<size; i++) {
648           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
649         }
650         ierr = PetscMalloc1(mmax*N,&vv);CHKERRQ(ierr);
651         for (k = 1; k < size; k++) {
652           v    = vv;
653           m    = mat->rmap->range[k+1] - mat->rmap->range[k];
654           ierr = MPIULong_Recv(v,m*N,MPIU_SCALAR,k,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
655 
656           for (j = 0; j < N; j++) {
657             for (i = 0; i < m; i++) {
658               work[j + i*N] = *v++;
659             }
660           }
661           ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
662         }
663         ierr = PetscFree(work);CHKERRQ(ierr);
664         ierr = PetscFree(vv);CHKERRQ(ierr);
665       } else {
666         ierr = MPIULong_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
667       }
668     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerPushFormat(viewer,PETSC_VIEWER_NATIVE)");
669   }
670   PetscFunctionReturn(0);
671 }
672 
673 extern PetscErrorCode MatView_SeqDense(Mat,PetscViewer);
674 #include <petscdraw.h>
675 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
676 {
677   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
678   PetscErrorCode    ierr;
679   PetscMPIInt       rank = mdn->rank;
680   PetscViewerType   vtype;
681   PetscBool         iascii,isdraw;
682   PetscViewer       sviewer;
683   PetscViewerFormat format;
684 
685   PetscFunctionBegin;
686   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
687   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
688   if (iascii) {
689     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
690     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
691     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
692       MatInfo info;
693       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
694       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
695       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
696       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
697       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
698       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
699       PetscFunctionReturn(0);
700     } else if (format == PETSC_VIEWER_ASCII_INFO) {
701       PetscFunctionReturn(0);
702     }
703   } else if (isdraw) {
704     PetscDraw draw;
705     PetscBool isnull;
706 
707     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
708     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
709     if (isnull) PetscFunctionReturn(0);
710   }
711 
712   {
713     /* assemble the entire matrix onto first processor. */
714     Mat         A;
715     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
716     PetscInt    *cols;
717     PetscScalar *vals;
718 
719     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
720     if (!rank) {
721       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
722     } else {
723       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
724     }
725     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
726     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
727     ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr);
728     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
729 
730     /* Copy the matrix ... This isn't the most efficient means,
731        but it's quick for now */
732     A->insertmode = INSERT_VALUES;
733 
734     row = mat->rmap->rstart;
735     m   = mdn->A->rmap->n;
736     for (i=0; i<m; i++) {
737       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
738       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
739       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
740       row++;
741     }
742 
743     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
744     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
745     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
746     if (!rank) {
747       ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
748       ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
749     }
750     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
751     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
752     ierr = MatDestroy(&A);CHKERRQ(ierr);
753   }
754   PetscFunctionReturn(0);
755 }
756 
757 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
758 {
759   PetscErrorCode ierr;
760   PetscBool      iascii,isbinary,isdraw,issocket;
761 
762   PetscFunctionBegin;
763   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
764   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
765   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
766   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
767 
768   if (iascii || issocket || isdraw) {
769     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
770   } else if (isbinary) {
771     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
772   }
773   PetscFunctionReturn(0);
774 }
775 
776 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
777 {
778   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
779   Mat            mdn  = mat->A;
780   PetscErrorCode ierr;
781   PetscReal      isend[5],irecv[5];
782 
783   PetscFunctionBegin;
784   info->block_size = 1.0;
785 
786   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
787 
788   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
789   isend[3] = info->memory;  isend[4] = info->mallocs;
790   if (flag == MAT_LOCAL) {
791     info->nz_used      = isend[0];
792     info->nz_allocated = isend[1];
793     info->nz_unneeded  = isend[2];
794     info->memory       = isend[3];
795     info->mallocs      = isend[4];
796   } else if (flag == MAT_GLOBAL_MAX) {
797     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
798 
799     info->nz_used      = irecv[0];
800     info->nz_allocated = irecv[1];
801     info->nz_unneeded  = irecv[2];
802     info->memory       = irecv[3];
803     info->mallocs      = irecv[4];
804   } else if (flag == MAT_GLOBAL_SUM) {
805     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
806 
807     info->nz_used      = irecv[0];
808     info->nz_allocated = irecv[1];
809     info->nz_unneeded  = irecv[2];
810     info->memory       = irecv[3];
811     info->mallocs      = irecv[4];
812   }
813   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
814   info->fill_ratio_needed = 0;
815   info->factor_mallocs    = 0;
816   PetscFunctionReturn(0);
817 }
818 
819 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
820 {
821   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
822   PetscErrorCode ierr;
823 
824   PetscFunctionBegin;
825   switch (op) {
826   case MAT_NEW_NONZERO_LOCATIONS:
827   case MAT_NEW_NONZERO_LOCATION_ERR:
828   case MAT_NEW_NONZERO_ALLOCATION_ERR:
829     MatCheckPreallocated(A,1);
830     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
831     break;
832   case MAT_ROW_ORIENTED:
833     MatCheckPreallocated(A,1);
834     a->roworiented = flg;
835     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
836     break;
837   case MAT_NEW_DIAGONALS:
838   case MAT_KEEP_NONZERO_PATTERN:
839   case MAT_USE_HASH_TABLE:
840     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
841     break;
842   case MAT_IGNORE_OFF_PROC_ENTRIES:
843     a->donotstash = flg;
844     break;
845   case MAT_SYMMETRIC:
846   case MAT_STRUCTURALLY_SYMMETRIC:
847   case MAT_HERMITIAN:
848   case MAT_SYMMETRY_ETERNAL:
849   case MAT_IGNORE_LOWER_TRIANGULAR:
850     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
851     break;
852   default:
853     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
854   }
855   PetscFunctionReturn(0);
856 }
857 
858 
859 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
860 {
861   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
862   Mat_SeqDense      *mat = (Mat_SeqDense*)mdn->A->data;
863   const PetscScalar *l,*r;
864   PetscScalar       x,*v;
865   PetscErrorCode    ierr;
866   PetscInt          i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n;
867 
868   PetscFunctionBegin;
869   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
870   if (ll) {
871     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
872     if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
873     ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr);
874     for (i=0; i<m; i++) {
875       x = l[i];
876       v = mat->v + i;
877       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
878     }
879     ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr);
880     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
881   }
882   if (rr) {
883     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
884     if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
885     ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
886     ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
887     ierr = VecGetArrayRead(mdn->lvec,&r);CHKERRQ(ierr);
888     for (i=0; i<n; i++) {
889       x = r[i];
890       v = mat->v + i*m;
891       for (j=0; j<m; j++) (*v++) *= x;
892     }
893     ierr = VecRestoreArrayRead(mdn->lvec,&r);CHKERRQ(ierr);
894     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
895   }
896   PetscFunctionReturn(0);
897 }
898 
899 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
900 {
901   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
902   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
903   PetscErrorCode ierr;
904   PetscInt       i,j;
905   PetscReal      sum = 0.0;
906   PetscScalar    *v  = mat->v;
907 
908   PetscFunctionBegin;
909   if (mdn->size == 1) {
910     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
911   } else {
912     if (type == NORM_FROBENIUS) {
913       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
914         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
915       }
916       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
917       *nrm = PetscSqrtReal(*nrm);
918       ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr);
919     } else if (type == NORM_1) {
920       PetscReal *tmp,*tmp2;
921       ierr = PetscMalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr);
922       ierr = PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
923       ierr = PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
924       *nrm = 0.0;
925       v    = mat->v;
926       for (j=0; j<mdn->A->cmap->n; j++) {
927         for (i=0; i<mdn->A->rmap->n; i++) {
928           tmp[j] += PetscAbsScalar(*v);  v++;
929         }
930       }
931       ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
932       for (j=0; j<A->cmap->N; j++) {
933         if (tmp2[j] > *nrm) *nrm = tmp2[j];
934       }
935       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
936       ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr);
937     } else if (type == NORM_INFINITY) { /* max row norm */
938       PetscReal ntemp;
939       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
940       ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
941     } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
942   }
943   PetscFunctionReturn(0);
944 }
945 
946 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
947 {
948   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
949   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
950   Mat            B;
951   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
952   PetscErrorCode ierr;
953   PetscInt       j,i;
954   PetscScalar    *v;
955 
956   PetscFunctionBegin;
957   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
958     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
959     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
960     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
961     ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr);
962   } else {
963     B = *matout;
964   }
965 
966   m    = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v;
967   ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr);
968   for (i=0; i<m; i++) rwork[i] = rstart + i;
969   for (j=0; j<n; j++) {
970     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
971     v   += m;
972   }
973   ierr = PetscFree(rwork);CHKERRQ(ierr);
974   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
975   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
976   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
977     *matout = B;
978   } else {
979     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
980   }
981   PetscFunctionReturn(0);
982 }
983 
984 
985 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
986 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
987 
988 PetscErrorCode MatSetUp_MPIDense(Mat A)
989 {
990   PetscErrorCode ierr;
991 
992   PetscFunctionBegin;
993   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
994   PetscFunctionReturn(0);
995 }
996 
997 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
998 {
999   PetscErrorCode ierr;
1000   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
1001 
1002   PetscFunctionBegin;
1003   ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr);
1004   ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1005   PetscFunctionReturn(0);
1006 }
1007 
1008 PetscErrorCode  MatConjugate_MPIDense(Mat mat)
1009 {
1010   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
1011   PetscErrorCode ierr;
1012 
1013   PetscFunctionBegin;
1014   ierr = MatConjugate(a->A);CHKERRQ(ierr);
1015   PetscFunctionReturn(0);
1016 }
1017 
1018 PetscErrorCode MatRealPart_MPIDense(Mat A)
1019 {
1020   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1021   PetscErrorCode ierr;
1022 
1023   PetscFunctionBegin;
1024   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1025   PetscFunctionReturn(0);
1026 }
1027 
1028 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1029 {
1030   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1031   PetscErrorCode ierr;
1032 
1033   PetscFunctionBegin;
1034   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1035   PetscFunctionReturn(0);
1036 }
1037 
1038 extern PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*);
1039 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms)
1040 {
1041   PetscErrorCode ierr;
1042   PetscInt       i,n;
1043   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
1044   PetscReal      *work;
1045 
1046   PetscFunctionBegin;
1047   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
1048   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
1049   ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr);
1050   if (type == NORM_2) {
1051     for (i=0; i<n; i++) work[i] *= work[i];
1052   }
1053   if (type == NORM_INFINITY) {
1054     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
1055   } else {
1056     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
1057   }
1058   ierr = PetscFree(work);CHKERRQ(ierr);
1059   if (type == NORM_2) {
1060     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
1061   }
1062   PetscFunctionReturn(0);
1063 }
1064 
1065 static PetscErrorCode  MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1066 {
1067   Mat_MPIDense   *d = (Mat_MPIDense*)x->data;
1068   PetscErrorCode ierr;
1069   PetscScalar    *a;
1070   PetscInt       m,n,i;
1071 
1072   PetscFunctionBegin;
1073   ierr = MatGetSize(d->A,&m,&n);CHKERRQ(ierr);
1074   ierr = MatDenseGetArray(d->A,&a);CHKERRQ(ierr);
1075   for (i=0; i<m*n; i++) {
1076     ierr = PetscRandomGetValue(rctx,a+i);CHKERRQ(ierr);
1077   }
1078   ierr = MatDenseRestoreArray(d->A,&a);CHKERRQ(ierr);
1079   PetscFunctionReturn(0);
1080 }
1081 
1082 extern PetscErrorCode MatMatMultNumeric_MPIDense(Mat A,Mat,Mat);
1083 
1084 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool  *missing,PetscInt *d)
1085 {
1086   PetscFunctionBegin;
1087   *missing = PETSC_FALSE;
1088   PetscFunctionReturn(0);
1089 }
1090 
1091 /* -------------------------------------------------------------------*/
1092 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1093                                         MatGetRow_MPIDense,
1094                                         MatRestoreRow_MPIDense,
1095                                         MatMult_MPIDense,
1096                                 /*  4*/ MatMultAdd_MPIDense,
1097                                         MatMultTranspose_MPIDense,
1098                                         MatMultTransposeAdd_MPIDense,
1099                                         0,
1100                                         0,
1101                                         0,
1102                                 /* 10*/ 0,
1103                                         0,
1104                                         0,
1105                                         0,
1106                                         MatTranspose_MPIDense,
1107                                 /* 15*/ MatGetInfo_MPIDense,
1108                                         MatEqual_MPIDense,
1109                                         MatGetDiagonal_MPIDense,
1110                                         MatDiagonalScale_MPIDense,
1111                                         MatNorm_MPIDense,
1112                                 /* 20*/ MatAssemblyBegin_MPIDense,
1113                                         MatAssemblyEnd_MPIDense,
1114                                         MatSetOption_MPIDense,
1115                                         MatZeroEntries_MPIDense,
1116                                 /* 24*/ MatZeroRows_MPIDense,
1117                                         0,
1118                                         0,
1119                                         0,
1120                                         0,
1121                                 /* 29*/ MatSetUp_MPIDense,
1122                                         0,
1123                                         0,
1124                                         MatGetDiagonalBlock_MPIDense,
1125                                         0,
1126                                 /* 34*/ MatDuplicate_MPIDense,
1127                                         0,
1128                                         0,
1129                                         0,
1130                                         0,
1131                                 /* 39*/ MatAXPY_MPIDense,
1132                                         MatCreateSubMatrices_MPIDense,
1133                                         0,
1134                                         MatGetValues_MPIDense,
1135                                         0,
1136                                 /* 44*/ 0,
1137                                         MatScale_MPIDense,
1138                                         MatShift_Basic,
1139                                         0,
1140                                         0,
1141                                 /* 49*/ MatSetRandom_MPIDense,
1142                                         0,
1143                                         0,
1144                                         0,
1145                                         0,
1146                                 /* 54*/ 0,
1147                                         0,
1148                                         0,
1149                                         0,
1150                                         0,
1151                                 /* 59*/ MatCreateSubMatrix_MPIDense,
1152                                         MatDestroy_MPIDense,
1153                                         MatView_MPIDense,
1154                                         0,
1155                                         0,
1156                                 /* 64*/ 0,
1157                                         0,
1158                                         0,
1159                                         0,
1160                                         0,
1161                                 /* 69*/ 0,
1162                                         0,
1163                                         0,
1164                                         0,
1165                                         0,
1166                                 /* 74*/ 0,
1167                                         0,
1168                                         0,
1169                                         0,
1170                                         0,
1171                                 /* 79*/ 0,
1172                                         0,
1173                                         0,
1174                                         0,
1175                                 /* 83*/ MatLoad_MPIDense,
1176                                         0,
1177                                         0,
1178                                         0,
1179                                         0,
1180                                         0,
1181 #if defined(PETSC_HAVE_ELEMENTAL)
1182                                 /* 89*/ MatMatMult_MPIDense_MPIDense,
1183                                         MatMatMultSymbolic_MPIDense_MPIDense,
1184 #else
1185                                 /* 89*/ 0,
1186                                         0,
1187 #endif
1188                                         MatMatMultNumeric_MPIDense,
1189                                         0,
1190                                         0,
1191                                 /* 94*/ 0,
1192                                         0,
1193                                         0,
1194                                         0,
1195                                         0,
1196                                 /* 99*/ 0,
1197                                         0,
1198                                         0,
1199                                         MatConjugate_MPIDense,
1200                                         0,
1201                                 /*104*/ 0,
1202                                         MatRealPart_MPIDense,
1203                                         MatImaginaryPart_MPIDense,
1204                                         0,
1205                                         0,
1206                                 /*109*/ 0,
1207                                         0,
1208                                         0,
1209                                         0,
1210                                         MatMissingDiagonal_MPIDense,
1211                                 /*114*/ 0,
1212                                         0,
1213                                         0,
1214                                         0,
1215                                         0,
1216                                 /*119*/ 0,
1217                                         0,
1218                                         0,
1219                                         0,
1220                                         0,
1221                                 /*124*/ 0,
1222                                         MatGetColumnNorms_MPIDense,
1223                                         0,
1224                                         0,
1225                                         0,
1226                                 /*129*/ 0,
1227                                         MatTransposeMatMult_MPIDense_MPIDense,
1228                                         MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1229                                         MatTransposeMatMultNumeric_MPIDense_MPIDense,
1230                                         0,
1231                                 /*134*/ 0,
1232                                         0,
1233                                         0,
1234                                         0,
1235                                         0,
1236                                 /*139*/ 0,
1237                                         0,
1238                                         0,
1239                                         0,
1240                                         0,
1241                                 /*144*/ MatCreateMPIMatConcatenateSeqMat_MPIDense
1242 };
1243 
1244 PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1245 {
1246   Mat_MPIDense   *a;
1247   PetscErrorCode ierr;
1248 
1249   PetscFunctionBegin;
1250   mat->preallocated = PETSC_TRUE;
1251   /* Note:  For now, when data is specified above, this assumes the user correctly
1252    allocates the local dense storage space.  We should add error checking. */
1253 
1254   a       = (Mat_MPIDense*)mat->data;
1255   ierr    = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
1256   ierr    = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
1257   a->nvec = mat->cmap->n;
1258 
1259   ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1260   ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr);
1261   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
1262   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1263   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1264   PetscFunctionReturn(0);
1265 }
1266 
1267 #if defined(PETSC_HAVE_ELEMENTAL)
1268 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1269 {
1270   Mat            mat_elemental;
1271   PetscErrorCode ierr;
1272   PetscScalar    *v;
1273   PetscInt       m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols;
1274 
1275   PetscFunctionBegin;
1276   if (reuse == MAT_REUSE_MATRIX) {
1277     mat_elemental = *newmat;
1278     ierr = MatZeroEntries(*newmat);CHKERRQ(ierr);
1279   } else {
1280     ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr);
1281     ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1282     ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr);
1283     ierr = MatSetUp(mat_elemental);CHKERRQ(ierr);
1284     ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
1285   }
1286 
1287   ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr);
1288   for (i=0; i<N; i++) cols[i] = i;
1289   for (i=0; i<m; i++) rows[i] = rstart + i;
1290 
1291   /* PETSc-Elemental interaface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1292   ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr);
1293   ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr);
1294   ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1295   ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1296   ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr);
1297   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1298 
1299   if (reuse == MAT_INPLACE_MATRIX) {
1300     ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr);
1301   } else {
1302     *newmat = mat_elemental;
1303   }
1304   PetscFunctionReturn(0);
1305 }
1306 #endif
1307 
1308 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals)
1309 {
1310   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1311   PetscErrorCode ierr;
1312 
1313   PetscFunctionBegin;
1314   ierr = MatDenseGetColumn(mat->A,col,vals);CHKERRQ(ierr);
1315   PetscFunctionReturn(0);
1316 }
1317 
1318 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals)
1319 {
1320   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1321   PetscErrorCode ierr;
1322 
1323   PetscFunctionBegin;
1324   ierr = MatDenseRestoreColumn(mat->A,vals);CHKERRQ(ierr);
1325   PetscFunctionReturn(0);
1326 }
1327 
1328 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
1329 {
1330   PetscErrorCode ierr;
1331   Mat_MPIDense   *mat;
1332   PetscInt       m,nloc,N;
1333 
1334   PetscFunctionBegin;
1335   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
1336   ierr = MatGetLocalSize(inmat,NULL,&nloc);CHKERRQ(ierr);
1337   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1338     PetscInt sum;
1339 
1340     if (n == PETSC_DECIDE) {
1341       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
1342     }
1343     /* Check sum(n) = N */
1344     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
1345     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
1346 
1347     ierr = MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat);CHKERRQ(ierr);
1348   }
1349 
1350   /* numeric phase */
1351   mat = (Mat_MPIDense*)(*outmat)->data;
1352   ierr = MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1353   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1354   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1355   PetscFunctionReturn(0);
1356 }
1357 
1358 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1359 {
1360   Mat_MPIDense   *a;
1361   PetscErrorCode ierr;
1362 
1363   PetscFunctionBegin;
1364   ierr      = PetscNewLog(mat,&a);CHKERRQ(ierr);
1365   mat->data = (void*)a;
1366   ierr      = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1367 
1368   mat->insertmode = NOT_SET_VALUES;
1369   ierr            = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);CHKERRQ(ierr);
1370   ierr            = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);CHKERRQ(ierr);
1371 
1372   /* build cache for off array entries formed */
1373   a->donotstash = PETSC_FALSE;
1374 
1375   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr);
1376 
1377   /* stuff used for matrix vector multiply */
1378   a->lvec        = 0;
1379   a->Mvctx       = 0;
1380   a->roworiented = PETSC_TRUE;
1381 
1382   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr);
1383   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr);
1384   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense);CHKERRQ(ierr);
1385   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense);CHKERRQ(ierr);
1386   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);CHKERRQ(ierr);
1387   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);CHKERRQ(ierr);
1388 #if defined(PETSC_HAVE_ELEMENTAL)
1389   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr);
1390 #endif
1391   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1392   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1393   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1394   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1395 
1396   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",MatTransposeMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1397   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",MatTransposeMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1398   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",MatTransposeMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1399   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense);CHKERRQ(ierr);
1400   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense);CHKERRQ(ierr);
1401   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1402   PetscFunctionReturn(0);
1403 }
1404 
1405 /*MC
1406    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1407 
1408    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1409    and MATMPIDENSE otherwise.
1410 
1411    Options Database Keys:
1412 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1413 
1414   Level: beginner
1415 
1416 
1417 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1418 M*/
1419 
1420 /*@C
1421    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1422 
1423    Not collective
1424 
1425    Input Parameters:
1426 .  B - the matrix
1427 -  data - optional location of matrix data.  Set data=NULL for PETSc
1428    to control all matrix memory allocation.
1429 
1430    Notes:
1431    The dense format is fully compatible with standard Fortran 77
1432    storage by columns.
1433 
1434    The data input variable is intended primarily for Fortran programmers
1435    who wish to allocate their own matrix memory space.  Most users should
1436    set data=NULL.
1437 
1438    Level: intermediate
1439 
1440 .keywords: matrix,dense, parallel
1441 
1442 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1443 @*/
1444 PetscErrorCode  MatMPIDenseSetPreallocation(Mat B,PetscScalar *data)
1445 {
1446   PetscErrorCode ierr;
1447 
1448   PetscFunctionBegin;
1449   ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr);
1450   PetscFunctionReturn(0);
1451 }
1452 
1453 /*@
1454    MatDensePlaceArray - Allows one to replace the array in a dense array with an
1455    array provided by the user. This is useful to avoid copying an array
1456    into a matrix
1457 
1458    Not Collective
1459 
1460    Input Parameters:
1461 +  mat - the matrix
1462 -  array - the array in column major order
1463 
1464    Notes:
1465    You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be
1466    freed when the matrix is destroyed.
1467 
1468    Level: developer
1469 
1470 .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
1471 
1472 @*/
1473 PetscErrorCode  MatDensePlaceArray(Mat mat,const PetscScalar array[])
1474 {
1475   PetscErrorCode ierr;
1476   PetscFunctionBegin;
1477   ierr = PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
1478   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
1479   PetscFunctionReturn(0);
1480 }
1481 
1482 /*@
1483    MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray()
1484 
1485    Not Collective
1486 
1487    Input Parameters:
1488 .  mat - the matrix
1489 
1490    Notes:
1491    You can only call this after a call to MatDensePlaceArray()
1492 
1493    Level: developer
1494 
1495 .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
1496 
1497 @*/
1498 PetscErrorCode  MatDenseResetArray(Mat mat)
1499 {
1500   PetscErrorCode ierr;
1501   PetscFunctionBegin;
1502   ierr = PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));CHKERRQ(ierr);
1503   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
1504   PetscFunctionReturn(0);
1505 }
1506 
1507 /*@C
1508    MatCreateDense - Creates a parallel matrix in dense format.
1509 
1510    Collective on MPI_Comm
1511 
1512    Input Parameters:
1513 +  comm - MPI communicator
1514 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1515 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1516 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1517 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1518 -  data - optional location of matrix data.  Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1519    to control all matrix memory allocation.
1520 
1521    Output Parameter:
1522 .  A - the matrix
1523 
1524    Notes:
1525    The dense format is fully compatible with standard Fortran 77
1526    storage by columns.
1527 
1528    The data input variable is intended primarily for Fortran programmers
1529    who wish to allocate their own matrix memory space.  Most users should
1530    set data=NULL (PETSC_NULL_SCALAR for Fortran users).
1531 
1532    The user MUST specify either the local or global matrix dimensions
1533    (possibly both).
1534 
1535    Level: intermediate
1536 
1537 .keywords: matrix,dense, parallel
1538 
1539 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1540 @*/
1541 PetscErrorCode  MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1542 {
1543   PetscErrorCode ierr;
1544   PetscMPIInt    size;
1545 
1546   PetscFunctionBegin;
1547   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1548   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1549   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1550   if (size > 1) {
1551     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1552     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1553     if (data) {  /* user provided data array, so no need to assemble */
1554       ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr);
1555       (*A)->assembled = PETSC_TRUE;
1556     }
1557   } else {
1558     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1559     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1560   }
1561   PetscFunctionReturn(0);
1562 }
1563 
1564 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1565 {
1566   Mat            mat;
1567   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
1568   PetscErrorCode ierr;
1569 
1570   PetscFunctionBegin;
1571   *newmat = 0;
1572   ierr    = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr);
1573   ierr    = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1574   ierr    = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1575   a       = (Mat_MPIDense*)mat->data;
1576   ierr    = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1577 
1578   mat->factortype   = A->factortype;
1579   mat->assembled    = PETSC_TRUE;
1580   mat->preallocated = PETSC_TRUE;
1581 
1582   a->size         = oldmat->size;
1583   a->rank         = oldmat->rank;
1584   mat->insertmode = NOT_SET_VALUES;
1585   a->nvec         = oldmat->nvec;
1586   a->donotstash   = oldmat->donotstash;
1587 
1588   ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr);
1589   ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr);
1590 
1591   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1592   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1593   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1594 
1595   *newmat = mat;
1596   PetscFunctionReturn(0);
1597 }
1598 
1599 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat)
1600 {
1601   PetscErrorCode ierr;
1602   PetscMPIInt    rank,size;
1603   const PetscInt *rowners;
1604   PetscInt       i,m,n,nz,j,mMax;
1605   PetscScalar    *array,*vals,*vals_ptr;
1606   Mat_MPIDense   *a = (Mat_MPIDense*)newmat->data;
1607 
1608   PetscFunctionBegin;
1609   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1610   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1611 
1612   /* determine ownership of rows and columns */
1613   m = (newmat->rmap->n < 0) ? PETSC_DECIDE : newmat->rmap->n;
1614   n = (newmat->cmap->n < 0) ? PETSC_DECIDE : newmat->cmap->n;
1615 
1616   ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr);
1617   if (!a->A || !((Mat_SeqDense*)(a->A->data))->user_alloc) {
1618     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
1619   }
1620   ierr = MatDenseGetArray(newmat,&array);CHKERRQ(ierr);
1621   ierr = MatGetLocalSize(newmat,&m,NULL);CHKERRQ(ierr);
1622   ierr = MatGetOwnershipRanges(newmat,&rowners);CHKERRQ(ierr);
1623   ierr = MPI_Reduce(&m,&mMax,1,MPIU_INT,MPI_MAX,0,comm);CHKERRQ(ierr);
1624   if (!rank) {
1625     ierr = PetscMalloc1(mMax*N,&vals);CHKERRQ(ierr);
1626 
1627     /* read in my part of the matrix numerical values  */
1628     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1629 
1630     /* insert into matrix-by row (this is why cannot directly read into array */
1631     vals_ptr = vals;
1632     for (i=0; i<m; i++) {
1633       for (j=0; j<N; j++) {
1634         array[i + j*m] = *vals_ptr++;
1635       }
1636     }
1637 
1638     /* read in other processors and ship out */
1639     for (i=1; i<size; i++) {
1640       nz   = (rowners[i+1] - rowners[i])*N;
1641       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1642       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr);
1643     }
1644   } else {
1645     /* receive numeric values */
1646     ierr = PetscMalloc1(m*N,&vals);CHKERRQ(ierr);
1647 
1648     /* receive message of values*/
1649     ierr = MPIULong_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr);
1650 
1651     /* insert into matrix-by row (this is why cannot directly read into array */
1652     vals_ptr = vals;
1653     for (i=0; i<m; i++) {
1654       for (j=0; j<N; j++) {
1655         array[i + j*m] = *vals_ptr++;
1656       }
1657     }
1658   }
1659   ierr = MatDenseRestoreArray(newmat,&array);CHKERRQ(ierr);
1660   ierr = PetscFree(vals);CHKERRQ(ierr);
1661   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1662   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1663   PetscFunctionReturn(0);
1664 }
1665 
1666 PetscErrorCode MatLoad_MPIDense(Mat newmat,PetscViewer viewer)
1667 {
1668   Mat_MPIDense   *a;
1669   PetscScalar    *vals,*svals;
1670   MPI_Comm       comm;
1671   MPI_Status     status;
1672   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,n,maxnz;
1673   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1674   PetscInt       *ourlens,*procsnz = 0,jj,*mycols,*smycols;
1675   PetscInt       i,nz,j,rstart,rend;
1676   int            fd;
1677   PetscErrorCode ierr;
1678 
1679   PetscFunctionBegin;
1680   /* force binary viewer to load .info file if it has not yet done so */
1681   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1682   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
1683   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1684   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1685   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1686   if (!rank) {
1687     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
1688     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1689   }
1690   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1691   M    = header[1]; N = header[2]; nz = header[3];
1692 
1693   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
1694   if (newmat->rmap->N < 0) newmat->rmap->N = M;
1695   if (newmat->cmap->N < 0) newmat->cmap->N = N;
1696 
1697   if (newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",M,newmat->rmap->N);
1698   if (newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",N,newmat->cmap->N);
1699 
1700   /*
1701        Handle case where matrix is stored on disk as a dense matrix
1702   */
1703   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1704     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr);
1705     PetscFunctionReturn(0);
1706   }
1707 
1708   /* determine ownership of all rows */
1709   if (newmat->rmap->n < 0) {
1710     ierr = PetscMPIIntCast(M/size + ((M % size) > rank),&m);CHKERRQ(ierr);
1711   } else {
1712     ierr = PetscMPIIntCast(newmat->rmap->n,&m);CHKERRQ(ierr);
1713   }
1714   if (newmat->cmap->n < 0) {
1715     n = PETSC_DECIDE;
1716   } else {
1717     ierr = PetscMPIIntCast(newmat->cmap->n,&n);CHKERRQ(ierr);
1718   }
1719 
1720   ierr       = PetscMalloc1(size+2,&rowners);CHKERRQ(ierr);
1721   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1722   rowners[0] = 0;
1723   for (i=2; i<=size; i++) {
1724     rowners[i] += rowners[i-1];
1725   }
1726   rstart = rowners[rank];
1727   rend   = rowners[rank+1];
1728 
1729   /* distribute row lengths to all processors */
1730   ierr = PetscMalloc1(rend-rstart,&ourlens);CHKERRQ(ierr);
1731   if (!rank) {
1732     ierr = PetscMalloc1(M,&rowlengths);CHKERRQ(ierr);
1733     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1734     ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr);
1735     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1736     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1737     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1738   } else {
1739     ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1740   }
1741 
1742   if (!rank) {
1743     /* calculate the number of nonzeros on each processor */
1744     ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr);
1745     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1746     for (i=0; i<size; i++) {
1747       for (j=rowners[i]; j< rowners[i+1]; j++) {
1748         procsnz[i] += rowlengths[j];
1749       }
1750     }
1751     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1752 
1753     /* determine max buffer needed and allocate it */
1754     maxnz = 0;
1755     for (i=0; i<size; i++) {
1756       maxnz = PetscMax(maxnz,procsnz[i]);
1757     }
1758     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
1759 
1760     /* read in my part of the matrix column indices  */
1761     nz   = procsnz[0];
1762     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
1763     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1764 
1765     /* read in every one elses and ship off */
1766     for (i=1; i<size; i++) {
1767       nz   = procsnz[i];
1768       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1769       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1770     }
1771     ierr = PetscFree(cols);CHKERRQ(ierr);
1772   } else {
1773     /* determine buffer space needed for message */
1774     nz = 0;
1775     for (i=0; i<m; i++) {
1776       nz += ourlens[i];
1777     }
1778     ierr = PetscMalloc1(nz+1,&mycols);CHKERRQ(ierr);
1779 
1780     /* receive message of column indices*/
1781     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1782     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1783     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1784   }
1785 
1786   ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr);
1787   a = (Mat_MPIDense*)newmat->data;
1788   if (!a->A || !((Mat_SeqDense*)(a->A->data))->user_alloc) {
1789     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
1790   }
1791 
1792   if (!rank) {
1793     ierr = PetscMalloc1(maxnz,&vals);CHKERRQ(ierr);
1794 
1795     /* read in my part of the matrix numerical values  */
1796     nz   = procsnz[0];
1797     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1798 
1799     /* insert into matrix */
1800     jj      = rstart;
1801     smycols = mycols;
1802     svals   = vals;
1803     for (i=0; i<m; i++) {
1804       ierr     = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1805       smycols += ourlens[i];
1806       svals   += ourlens[i];
1807       jj++;
1808     }
1809 
1810     /* read in other processors and ship out */
1811     for (i=1; i<size; i++) {
1812       nz   = procsnz[i];
1813       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1814       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
1815     }
1816     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1817   } else {
1818     /* receive numeric values */
1819     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
1820 
1821     /* receive message of values*/
1822     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
1823     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1824     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1825 
1826     /* insert into matrix */
1827     jj      = rstart;
1828     smycols = mycols;
1829     svals   = vals;
1830     for (i=0; i<m; i++) {
1831       ierr     = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1832       smycols += ourlens[i];
1833       svals   += ourlens[i];
1834       jj++;
1835     }
1836   }
1837   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1838   ierr = PetscFree(vals);CHKERRQ(ierr);
1839   ierr = PetscFree(mycols);CHKERRQ(ierr);
1840   ierr = PetscFree(rowners);CHKERRQ(ierr);
1841 
1842   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1843   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1844   PetscFunctionReturn(0);
1845 }
1846 
1847 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool  *flag)
1848 {
1849   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
1850   Mat            a,b;
1851   PetscBool      flg;
1852   PetscErrorCode ierr;
1853 
1854   PetscFunctionBegin;
1855   a    = matA->A;
1856   b    = matB->A;
1857   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
1858   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1859   PetscFunctionReturn(0);
1860 }
1861 
1862 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(Mat A)
1863 {
1864   PetscErrorCode        ierr;
1865   Mat_MPIDense          *a = (Mat_MPIDense*)A->data;
1866   Mat_TransMatMultDense *atb = a->atbdense;
1867 
1868   PetscFunctionBegin;
1869   ierr = PetscFree3(atb->sendbuf,atb->atbarray,atb->recvcounts);CHKERRQ(ierr);
1870   ierr = (atb->destroy)(A);CHKERRQ(ierr);
1871   ierr = PetscFree(atb);CHKERRQ(ierr);
1872   PetscFunctionReturn(0);
1873 }
1874 
1875 PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1876 {
1877   Mat_MPIDense   *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
1878   Mat_SeqDense   *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data;
1879   Mat_TransMatMultDense *atb = c->atbdense;
1880   PetscErrorCode ierr;
1881   MPI_Comm       comm;
1882   PetscMPIInt    rank,size,*recvcounts=atb->recvcounts;
1883   PetscScalar    *carray,*atbarray=atb->atbarray,*sendbuf=atb->sendbuf;
1884   PetscInt       i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j;
1885   PetscScalar    _DOne=1.0,_DZero=0.0;
1886   PetscBLASInt   am,an,bn,aN;
1887   const PetscInt *ranges;
1888 
1889   PetscFunctionBegin;
1890   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1891   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1892   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1893 
1894   /* compute atbarray = aseq^T * bseq */
1895   ierr = PetscBLASIntCast(a->A->cmap->n,&an);CHKERRQ(ierr);
1896   ierr = PetscBLASIntCast(b->A->cmap->n,&bn);CHKERRQ(ierr);
1897   ierr = PetscBLASIntCast(a->A->rmap->n,&am);CHKERRQ(ierr);
1898   ierr = PetscBLASIntCast(A->cmap->N,&aN);CHKERRQ(ierr);
1899   PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&an,&bn,&am,&_DOne,aseq->v,&aseq->lda,bseq->v,&bseq->lda,&_DZero,atbarray,&aN));
1900 
1901   ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr);
1902   for (i=0; i<size; i++) recvcounts[i] = (ranges[i+1] - ranges[i])*cN;
1903 
1904   /* arrange atbarray into sendbuf */
1905   k = 0;
1906   for (proc=0; proc<size; proc++) {
1907     for (j=0; j<cN; j++) {
1908       for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM];
1909     }
1910   }
1911   /* sum all atbarray to local values of C */
1912   ierr = MatDenseGetArray(c->A,&carray);CHKERRQ(ierr);
1913   ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRQ(ierr);
1914   ierr = MatDenseRestoreArray(c->A,&carray);CHKERRQ(ierr);
1915   PetscFunctionReturn(0);
1916 }
1917 
1918 PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1919 {
1920   PetscErrorCode        ierr;
1921   Mat                   Cdense;
1922   MPI_Comm              comm;
1923   PetscMPIInt           size;
1924   PetscInt              cm=A->cmap->n,cM,cN=B->cmap->N;
1925   Mat_MPIDense          *c;
1926   Mat_TransMatMultDense *atb;
1927 
1928   PetscFunctionBegin;
1929   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1930   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
1931     SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
1932   }
1933 
1934   /* create matrix product Cdense */
1935   ierr = MatCreate(comm,&Cdense);CHKERRQ(ierr);
1936   ierr = MatSetSizes(Cdense,cm,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
1937   ierr = MatSetType(Cdense,MATMPIDENSE);CHKERRQ(ierr);
1938   ierr = MatMPIDenseSetPreallocation(Cdense,NULL);CHKERRQ(ierr);
1939   ierr = MatAssemblyBegin(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1940   ierr = MatAssemblyEnd(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1941   *C   = Cdense;
1942 
1943   /* create data structure for reuse Cdense */
1944   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1945   ierr = PetscNew(&atb);CHKERRQ(ierr);
1946   cM = Cdense->rmap->N;
1947   ierr = PetscMalloc3(cM*cN,&atb->sendbuf,cM*cN,&atb->atbarray,size,&atb->recvcounts);CHKERRQ(ierr);
1948 
1949   c                    = (Mat_MPIDense*)Cdense->data;
1950   c->atbdense          = atb;
1951   atb->destroy         = Cdense->ops->destroy;
1952   Cdense->ops->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
1953   PetscFunctionReturn(0);
1954 }
1955 
1956 PetscErrorCode MatTransposeMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
1957 {
1958   PetscErrorCode ierr;
1959 
1960   PetscFunctionBegin;
1961   if (scall == MAT_INITIAL_MATRIX) {
1962     ierr = MatTransposeMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr);
1963   }
1964   ierr = MatTransposeMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr);
1965   PetscFunctionReturn(0);
1966 }
1967 
1968 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(Mat A)
1969 {
1970   PetscErrorCode   ierr;
1971   Mat_MPIDense     *a = (Mat_MPIDense*)A->data;
1972   Mat_MatMultDense *ab = a->abdense;
1973 
1974   PetscFunctionBegin;
1975   ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr);
1976   ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr);
1977   ierr = MatDestroy(&ab->Be);CHKERRQ(ierr);
1978 
1979   ierr = (ab->destroy)(A);CHKERRQ(ierr);
1980   ierr = PetscFree(ab);CHKERRQ(ierr);
1981   PetscFunctionReturn(0);
1982 }
1983 
1984 #if defined(PETSC_HAVE_ELEMENTAL)
1985 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1986 {
1987   PetscErrorCode   ierr;
1988   Mat_MPIDense     *c=(Mat_MPIDense*)C->data;
1989   Mat_MatMultDense *ab=c->abdense;
1990 
1991   PetscFunctionBegin;
1992   ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr);
1993   ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr);
1994   ierr = MatMatMultNumeric(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr);
1995   ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
1996   PetscFunctionReturn(0);
1997 }
1998 
1999 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
2000 {
2001   PetscErrorCode   ierr;
2002   Mat              Ae,Be,Ce;
2003   Mat_MPIDense     *c;
2004   Mat_MatMultDense *ab;
2005 
2006   PetscFunctionBegin;
2007   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
2008     SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2009   }
2010 
2011   /* convert A and B to Elemental matrices Ae and Be */
2012   ierr = MatConvert(A,MATELEMENTAL,MAT_INITIAL_MATRIX, &Ae);CHKERRQ(ierr);
2013   ierr = MatConvert(B,MATELEMENTAL,MAT_INITIAL_MATRIX, &Be);CHKERRQ(ierr);
2014 
2015   /* Ce = Ae*Be */
2016   ierr = MatMatMultSymbolic(Ae,Be,fill,&Ce);CHKERRQ(ierr);
2017   ierr = MatMatMultNumeric(Ae,Be,Ce);CHKERRQ(ierr);
2018 
2019   /* convert Ce to C */
2020   ierr = MatConvert(Ce,MATMPIDENSE,MAT_INITIAL_MATRIX,C);CHKERRQ(ierr);
2021 
2022   /* create data structure for reuse Cdense */
2023   ierr = PetscNew(&ab);CHKERRQ(ierr);
2024   c                  = (Mat_MPIDense*)(*C)->data;
2025   c->abdense         = ab;
2026 
2027   ab->Ae             = Ae;
2028   ab->Be             = Be;
2029   ab->Ce             = Ce;
2030   ab->destroy        = (*C)->ops->destroy;
2031   (*C)->ops->destroy        = MatDestroy_MatMatMult_MPIDense_MPIDense;
2032   (*C)->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
2033   PetscFunctionReturn(0);
2034 }
2035 
2036 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
2037 {
2038   PetscErrorCode ierr;
2039 
2040   PetscFunctionBegin;
2041   if (scall == MAT_INITIAL_MATRIX) { /* simbolic product includes numeric product */
2042     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
2043     ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr);
2044     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
2045   } else {
2046     ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
2047     ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr);
2048     ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
2049   }
2050   PetscFunctionReturn(0);
2051 }
2052 #endif
2053 
2054