xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 6e5f5dad97d5d0d453d1db100f9dbfcf5747de2b)
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4 */
5 
6 
7 #include <../src/mat/impls/dense/mpi/mpidense.h>    /*I   "petscmat.h"  I*/
8 #include <../src/mat/impls/aij/mpi/mpiaij.h>
9 #include <petscblaslapack.h>
10 
11 /*@
12 
13       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
14               matrix that represents the operator. For sequential matrices it returns itself.
15 
16     Input Parameter:
17 .      A - the Seq or MPI dense matrix
18 
19     Output Parameter:
20 .      B - the inner matrix
21 
22     Level: intermediate
23 
24 @*/
25 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
26 {
27   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
28   PetscErrorCode ierr;
29   PetscBool      flg;
30 
31   PetscFunctionBegin;
32   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
33   if (flg) *B = mat->A;
34   else *B = A;
35   PetscFunctionReturn(0);
36 }
37 
38 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
39 {
40   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
41   PetscErrorCode ierr;
42   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
43 
44   PetscFunctionBegin;
45   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
46   lrow = row - rstart;
47   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
48   PetscFunctionReturn(0);
49 }
50 
51 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
52 {
53   PetscErrorCode ierr;
54 
55   PetscFunctionBegin;
56   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
57   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
58   PetscFunctionReturn(0);
59 }
60 
61 PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
62 {
63   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
64   PetscErrorCode ierr;
65   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
66   PetscScalar    *array;
67   MPI_Comm       comm;
68   Mat            B;
69 
70   PetscFunctionBegin;
71   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
72 
73   ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr);
74   if (!B) {
75     ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
76     ierr = MatCreate(comm,&B);CHKERRQ(ierr);
77     ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr);
78     ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
79     ierr = MatDenseGetArray(mdn->A,&array);CHKERRQ(ierr);
80     ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr);
81     ierr = MatDenseRestoreArray(mdn->A,&array);CHKERRQ(ierr);
82     ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
83     ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
84     ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr);
85     *a   = B;
86     ierr = MatDestroy(&B);CHKERRQ(ierr);
87   } else *a = B;
88   PetscFunctionReturn(0);
89 }
90 
91 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
92 {
93   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
94   PetscErrorCode ierr;
95   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
96   PetscBool      roworiented = A->roworiented;
97 
98   PetscFunctionBegin;
99   for (i=0; i<m; i++) {
100     if (idxm[i] < 0) continue;
101     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
102     if (idxm[i] >= rstart && idxm[i] < rend) {
103       row = idxm[i] - rstart;
104       if (roworiented) {
105         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
106       } else {
107         for (j=0; j<n; j++) {
108           if (idxn[j] < 0) continue;
109           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
110           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
111         }
112       }
113     } else if (!A->donotstash) {
114       mat->assembled = PETSC_FALSE;
115       if (roworiented) {
116         ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
117       } else {
118         ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
119       }
120     }
121   }
122   PetscFunctionReturn(0);
123 }
124 
125 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
126 {
127   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
128   PetscErrorCode ierr;
129   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
130 
131   PetscFunctionBegin;
132   for (i=0; i<m; i++) {
133     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
134     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
135     if (idxm[i] >= rstart && idxm[i] < rend) {
136       row = idxm[i] - rstart;
137       for (j=0; j<n; j++) {
138         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
139         if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
140         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
141       }
142     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
143   }
144   PetscFunctionReturn(0);
145 }
146 
147 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[])
148 {
149   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
150   PetscErrorCode ierr;
151 
152   PetscFunctionBegin;
153   ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr);
154   PetscFunctionReturn(0);
155 }
156 
157 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar *array[])
158 {
159   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
160   PetscErrorCode ierr;
161 
162   PetscFunctionBegin;
163   ierr = MatDenseGetArrayRead(a->A,array);CHKERRQ(ierr);
164   PetscFunctionReturn(0);
165 }
166 
167 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar array[])
168 {
169   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
170   PetscErrorCode ierr;
171 
172   PetscFunctionBegin;
173   ierr = MatDensePlaceArray(a->A,array);CHKERRQ(ierr);
174   PetscFunctionReturn(0);
175 }
176 
177 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
178 {
179   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
180   PetscErrorCode ierr;
181 
182   PetscFunctionBegin;
183   ierr = MatDenseResetArray(a->A);CHKERRQ(ierr);
184   PetscFunctionReturn(0);
185 }
186 
187 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
188 {
189   Mat_MPIDense   *mat  = (Mat_MPIDense*)A->data,*newmatd;
190   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
191   PetscErrorCode ierr;
192   PetscInt       i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
193   const PetscInt *irow,*icol;
194   PetscScalar    *av,*bv,*v = lmat->v;
195   Mat            newmat;
196   IS             iscol_local;
197 
198   PetscFunctionBegin;
199   ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
200   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
201   ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr);
202   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
203   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
204   ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */
205 
206   /* No parallel redistribution currently supported! Should really check each index set
207      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
208      original matrix! */
209 
210   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
211   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
212 
213   /* Check submatrix call */
214   if (scall == MAT_REUSE_MATRIX) {
215     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
216     /* Really need to test rows and column sizes! */
217     newmat = *B;
218   } else {
219     /* Create and fill new matrix */
220     ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr);
221     ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
222     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
223     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
224   }
225 
226   /* Now extract the data pointers and do the copy, column at a time */
227   newmatd = (Mat_MPIDense*)newmat->data;
228   bv      = ((Mat_SeqDense*)newmatd->A->data)->v;
229 
230   for (i=0; i<Ncols; i++) {
231     av = v + ((Mat_SeqDense*)mat->A->data)->lda*icol[i];
232     for (j=0; j<nrows; j++) {
233       *bv++ = av[irow[j] - rstart];
234     }
235   }
236 
237   /* Assemble the matrices so that the correct flags are set */
238   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
239   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
240 
241   /* Free work space */
242   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
243   ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr);
244   ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
245   *B   = newmat;
246   PetscFunctionReturn(0);
247 }
248 
249 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[])
250 {
251   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
252   PetscErrorCode ierr;
253 
254   PetscFunctionBegin;
255   ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr);
256   PetscFunctionReturn(0);
257 }
258 
259 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar *array[])
260 {
261   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
262   PetscErrorCode ierr;
263 
264   PetscFunctionBegin;
265   ierr = MatDenseRestoreArrayRead(a->A,array);CHKERRQ(ierr);
266   PetscFunctionReturn(0);
267 }
268 
269 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
270 {
271   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
272   MPI_Comm       comm;
273   PetscErrorCode ierr;
274   PetscInt       nstash,reallocs;
275   InsertMode     addv;
276 
277   PetscFunctionBegin;
278   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
279   /* make sure all processors are either in INSERTMODE or ADDMODE */
280   ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,comm);CHKERRQ(ierr);
281   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
282   mat->insertmode = addv; /* in case this processor had no cache */
283 
284   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
285   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
286   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
287   PetscFunctionReturn(0);
288 }
289 
290 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
291 {
292   Mat_MPIDense   *mdn=(Mat_MPIDense*)mat->data;
293   PetscErrorCode ierr;
294   PetscInt       i,*row,*col,flg,j,rstart,ncols;
295   PetscMPIInt    n;
296   PetscScalar    *val;
297 
298   PetscFunctionBegin;
299   /*  wait on receives */
300   while (1) {
301     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
302     if (!flg) break;
303 
304     for (i=0; i<n;) {
305       /* Now identify the consecutive vals belonging to the same row */
306       for (j=i,rstart=row[j]; j<n; j++) {
307         if (row[j] != rstart) break;
308       }
309       if (j < n) ncols = j-i;
310       else       ncols = n-i;
311       /* Now assemble all these values with a single function call */
312       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
313       i    = j;
314     }
315   }
316   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
317 
318   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
319   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
320 
321   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
322     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
323   }
324   PetscFunctionReturn(0);
325 }
326 
327 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
328 {
329   PetscErrorCode ierr;
330   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
331 
332   PetscFunctionBegin;
333   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
334   PetscFunctionReturn(0);
335 }
336 
337 /* the code does not do the diagonal entries correctly unless the
338    matrix is square and the column and row owerships are identical.
339    This is a BUG. The only way to fix it seems to be to access
340    mdn->A and mdn->B directly and not through the MatZeroRows()
341    routine.
342 */
343 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
344 {
345   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
346   PetscErrorCode    ierr;
347   PetscInt          i,*owners = A->rmap->range;
348   PetscInt          *sizes,j,idx,nsends;
349   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
350   PetscInt          *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source;
351   PetscInt          *lens,*lrows,*values;
352   PetscMPIInt       n,imdex,rank = l->rank,size = l->size;
353   MPI_Comm          comm;
354   MPI_Request       *send_waits,*recv_waits;
355   MPI_Status        recv_status,*send_status;
356   PetscBool         found;
357   const PetscScalar *xx;
358   PetscScalar       *bb;
359 
360   PetscFunctionBegin;
361   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
362   if (A->rmap->N != A->cmap->N) SETERRQ(comm,PETSC_ERR_SUP,"Only handles square matrices");
363   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership");
364   /*  first count number of contributors to each processor */
365   ierr = PetscCalloc1(2*size,&sizes);CHKERRQ(ierr);
366   ierr = PetscMalloc1(N+1,&owner);CHKERRQ(ierr);  /* see note*/
367   for (i=0; i<N; i++) {
368     idx   = rows[i];
369     found = PETSC_FALSE;
370     for (j=0; j<size; j++) {
371       if (idx >= owners[j] && idx < owners[j+1]) {
372         sizes[2*j]++; sizes[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
373       }
374     }
375     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
376   }
377   nsends = 0;
378   for (i=0; i<size; i++) nsends += sizes[2*i+1];
379 
380   /* inform other processors of number of messages and max length*/
381   ierr = PetscMaxSum(comm,sizes,&nmax,&nrecvs);CHKERRQ(ierr);
382 
383   /* post receives:   */
384   ierr = PetscMalloc1((nrecvs+1)*(nmax+1),&rvalues);CHKERRQ(ierr);
385   ierr = PetscMalloc1(nrecvs+1,&recv_waits);CHKERRQ(ierr);
386   for (i=0; i<nrecvs; i++) {
387     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
388   }
389 
390   /* do sends:
391       1) starts[i] gives the starting index in svalues for stuff going to
392          the ith processor
393   */
394   ierr = PetscMalloc1(N+1,&svalues);CHKERRQ(ierr);
395   ierr = PetscMalloc1(nsends+1,&send_waits);CHKERRQ(ierr);
396   ierr = PetscMalloc1(size+1,&starts);CHKERRQ(ierr);
397 
398   starts[0] = 0;
399   for (i=1; i<size; i++) starts[i] = starts[i-1] + sizes[2*i-2];
400   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
401 
402   starts[0] = 0;
403   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + sizes[2*i-2];
404   count = 0;
405   for (i=0; i<size; i++) {
406     if (sizes[2*i+1]) {
407       ierr = MPI_Isend(svalues+starts[i],sizes[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
408     }
409   }
410   ierr = PetscFree(starts);CHKERRQ(ierr);
411 
412   base = owners[rank];
413 
414   /*  wait on receives */
415   ierr  = PetscMalloc2(nrecvs,&lens,nrecvs,&source);CHKERRQ(ierr);
416   count = nrecvs;
417   slen  = 0;
418   while (count) {
419     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
420     /* unpack receives into our local space */
421     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
422 
423     source[imdex] = recv_status.MPI_SOURCE;
424     lens[imdex]   = n;
425     slen += n;
426     count--;
427   }
428   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
429 
430   /* move the data into the send scatter */
431   ierr  = PetscMalloc1(slen+1,&lrows);CHKERRQ(ierr);
432   count = 0;
433   for (i=0; i<nrecvs; i++) {
434     values = rvalues + i*nmax;
435     for (j=0; j<lens[i]; j++) {
436       lrows[count++] = values[j] - base;
437     }
438   }
439   ierr = PetscFree(rvalues);CHKERRQ(ierr);
440   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
441   ierr = PetscFree(owner);CHKERRQ(ierr);
442   ierr = PetscFree(sizes);CHKERRQ(ierr);
443 
444   /* fix right hand side if needed */
445   if (x && b) {
446     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
447     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
448     for (i=0; i<slen; i++) {
449       bb[lrows[i]] = diag*xx[lrows[i]];
450     }
451     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
452     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
453   }
454 
455   /* actually zap the local rows */
456   ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
457   if (diag != 0.0) {
458     Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data;
459     PetscInt     m   = ll->lda, i;
460 
461     for (i=0; i<slen; i++) {
462       ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag;
463     }
464   }
465   ierr = PetscFree(lrows);CHKERRQ(ierr);
466 
467   /* wait on sends */
468   if (nsends) {
469     ierr = PetscMalloc1(nsends,&send_status);CHKERRQ(ierr);
470     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
471     ierr = PetscFree(send_status);CHKERRQ(ierr);
472   }
473   ierr = PetscFree(send_waits);CHKERRQ(ierr);
474   ierr = PetscFree(svalues);CHKERRQ(ierr);
475   PetscFunctionReturn(0);
476 }
477 
478 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec);
479 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec);
480 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec);
481 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec);
482 
483 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
484 {
485   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
486   PetscErrorCode ierr;
487 
488   PetscFunctionBegin;
489   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
490   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
491   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
492   PetscFunctionReturn(0);
493 }
494 
495 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
496 {
497   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
498   PetscErrorCode ierr;
499 
500   PetscFunctionBegin;
501   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
502   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
503   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
504   PetscFunctionReturn(0);
505 }
506 
507 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
508 {
509   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
510   PetscErrorCode ierr;
511   PetscScalar    zero = 0.0;
512 
513   PetscFunctionBegin;
514   ierr = VecSet(yy,zero);CHKERRQ(ierr);
515   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
516   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
517   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
518   PetscFunctionReturn(0);
519 }
520 
521 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
522 {
523   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
524   PetscErrorCode ierr;
525 
526   PetscFunctionBegin;
527   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
528   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
529   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
530   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
531   PetscFunctionReturn(0);
532 }
533 
534 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
535 {
536   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
537   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
538   PetscErrorCode ierr;
539   PetscInt       len,i,n,m = A->rmap->n,radd;
540   PetscScalar    *x,zero = 0.0;
541 
542   PetscFunctionBegin;
543   ierr = VecSet(v,zero);CHKERRQ(ierr);
544   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
545   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
546   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
547   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
548   radd = A->rmap->rstart*m;
549   for (i=0; i<len; i++) {
550     x[i] = aloc->v[radd + i*m + i];
551   }
552   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
553   PetscFunctionReturn(0);
554 }
555 
556 PetscErrorCode MatDestroy_MPIDense(Mat mat)
557 {
558   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
559   PetscErrorCode ierr;
560 
561   PetscFunctionBegin;
562 #if defined(PETSC_USE_LOG)
563   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
564 #endif
565   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
566   ierr = MatDestroy(&mdn->A);CHKERRQ(ierr);
567   ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr);
568   ierr = VecScatterDestroy(&mdn->Mvctx);CHKERRQ(ierr);
569 
570   ierr = PetscFree(mat->data);CHKERRQ(ierr);
571   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
572 
573   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr);
574   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr);
575   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL);CHKERRQ(ierr);
576   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL);CHKERRQ(ierr);
577   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);CHKERRQ(ierr);
578   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);CHKERRQ(ierr);
579 #if defined(PETSC_HAVE_ELEMENTAL)
580   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr);
581 #endif
582   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr);
583   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
584   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
585   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
586   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
587   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
588   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
589   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL);CHKERRQ(ierr);
590   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL);CHKERRQ(ierr);
591   PetscFunctionReturn(0);
592 }
593 
594 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
595 {
596   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
597   PetscErrorCode    ierr;
598   PetscViewerFormat format;
599   int               fd;
600   PetscInt          header[4],mmax,N = mat->cmap->N,i,j,m,k;
601   PetscMPIInt       rank,tag  = ((PetscObject)viewer)->tag,size;
602   PetscScalar       *work,*v,*vv;
603   Mat_SeqDense      *a = (Mat_SeqDense*)mdn->A->data;
604 
605   PetscFunctionBegin;
606   if (mdn->size == 1) {
607     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
608   } else {
609     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
610     ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
611     ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
612 
613     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
614     if (format == PETSC_VIEWER_NATIVE) {
615 
616       if (!rank) {
617         /* store the matrix as a dense matrix */
618         header[0] = MAT_FILE_CLASSID;
619         header[1] = mat->rmap->N;
620         header[2] = N;
621         header[3] = MATRIX_BINARY_FORMAT_DENSE;
622         ierr      = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
623 
624         /* get largest work array needed for transposing array */
625         mmax = mat->rmap->n;
626         for (i=1; i<size; i++) {
627           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
628         }
629         ierr = PetscMalloc1(mmax*N,&work);CHKERRQ(ierr);
630 
631         /* write out local array, by rows */
632         m = mat->rmap->n;
633         v = a->v;
634         for (j=0; j<N; j++) {
635           for (i=0; i<m; i++) {
636             work[j + i*N] = *v++;
637           }
638         }
639         ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
640         /* get largest work array to receive messages from other processes, excludes process zero */
641         mmax = 0;
642         for (i=1; i<size; i++) {
643           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
644         }
645         ierr = PetscMalloc1(mmax*N,&vv);CHKERRQ(ierr);
646         for (k = 1; k < size; k++) {
647           v    = vv;
648           m    = mat->rmap->range[k+1] - mat->rmap->range[k];
649           ierr = MPIULong_Recv(v,m*N,MPIU_SCALAR,k,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
650 
651           for (j = 0; j < N; j++) {
652             for (i = 0; i < m; i++) {
653               work[j + i*N] = *v++;
654             }
655           }
656           ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
657         }
658         ierr = PetscFree(work);CHKERRQ(ierr);
659         ierr = PetscFree(vv);CHKERRQ(ierr);
660       } else {
661         ierr = MPIULong_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
662       }
663     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerPushFormat(viewer,PETSC_VIEWER_NATIVE)");
664   }
665   PetscFunctionReturn(0);
666 }
667 
668 extern PetscErrorCode MatView_SeqDense(Mat,PetscViewer);
669 #include <petscdraw.h>
670 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
671 {
672   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
673   PetscErrorCode    ierr;
674   PetscMPIInt       rank = mdn->rank;
675   PetscViewerType   vtype;
676   PetscBool         iascii,isdraw;
677   PetscViewer       sviewer;
678   PetscViewerFormat format;
679 
680   PetscFunctionBegin;
681   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
682   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
683   if (iascii) {
684     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
685     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
686     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
687       MatInfo info;
688       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
689       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
690       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
691       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
692       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
693       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
694       PetscFunctionReturn(0);
695     } else if (format == PETSC_VIEWER_ASCII_INFO) {
696       PetscFunctionReturn(0);
697     }
698   } else if (isdraw) {
699     PetscDraw draw;
700     PetscBool isnull;
701 
702     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
703     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
704     if (isnull) PetscFunctionReturn(0);
705   }
706 
707   {
708     /* assemble the entire matrix onto first processor. */
709     Mat         A;
710     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
711     PetscInt    *cols;
712     PetscScalar *vals;
713 
714     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
715     if (!rank) {
716       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
717     } else {
718       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
719     }
720     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
721     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
722     ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr);
723     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
724 
725     /* Copy the matrix ... This isn't the most efficient means,
726        but it's quick for now */
727     A->insertmode = INSERT_VALUES;
728 
729     row = mat->rmap->rstart;
730     m   = mdn->A->rmap->n;
731     for (i=0; i<m; i++) {
732       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
733       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
734       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
735       row++;
736     }
737 
738     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
739     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
740     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
741     if (!rank) {
742       ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
743       ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
744     }
745     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
746     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
747     ierr = MatDestroy(&A);CHKERRQ(ierr);
748   }
749   PetscFunctionReturn(0);
750 }
751 
752 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
753 {
754   PetscErrorCode ierr;
755   PetscBool      iascii,isbinary,isdraw,issocket;
756 
757   PetscFunctionBegin;
758   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
759   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
760   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
761   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
762 
763   if (iascii || issocket || isdraw) {
764     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
765   } else if (isbinary) {
766     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
767   }
768   PetscFunctionReturn(0);
769 }
770 
771 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
772 {
773   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
774   Mat            mdn  = mat->A;
775   PetscErrorCode ierr;
776   PetscReal      isend[5],irecv[5];
777 
778   PetscFunctionBegin;
779   info->block_size = 1.0;
780 
781   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
782 
783   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
784   isend[3] = info->memory;  isend[4] = info->mallocs;
785   if (flag == MAT_LOCAL) {
786     info->nz_used      = isend[0];
787     info->nz_allocated = isend[1];
788     info->nz_unneeded  = isend[2];
789     info->memory       = isend[3];
790     info->mallocs      = isend[4];
791   } else if (flag == MAT_GLOBAL_MAX) {
792     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
793 
794     info->nz_used      = irecv[0];
795     info->nz_allocated = irecv[1];
796     info->nz_unneeded  = irecv[2];
797     info->memory       = irecv[3];
798     info->mallocs      = irecv[4];
799   } else if (flag == MAT_GLOBAL_SUM) {
800     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
801 
802     info->nz_used      = irecv[0];
803     info->nz_allocated = irecv[1];
804     info->nz_unneeded  = irecv[2];
805     info->memory       = irecv[3];
806     info->mallocs      = irecv[4];
807   }
808   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
809   info->fill_ratio_needed = 0;
810   info->factor_mallocs    = 0;
811   PetscFunctionReturn(0);
812 }
813 
814 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
815 {
816   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
817   PetscErrorCode ierr;
818 
819   PetscFunctionBegin;
820   switch (op) {
821   case MAT_NEW_NONZERO_LOCATIONS:
822   case MAT_NEW_NONZERO_LOCATION_ERR:
823   case MAT_NEW_NONZERO_ALLOCATION_ERR:
824     MatCheckPreallocated(A,1);
825     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
826     break;
827   case MAT_ROW_ORIENTED:
828     MatCheckPreallocated(A,1);
829     a->roworiented = flg;
830     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
831     break;
832   case MAT_NEW_DIAGONALS:
833   case MAT_KEEP_NONZERO_PATTERN:
834   case MAT_USE_HASH_TABLE:
835     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
836     break;
837   case MAT_IGNORE_OFF_PROC_ENTRIES:
838     a->donotstash = flg;
839     break;
840   case MAT_SYMMETRIC:
841   case MAT_STRUCTURALLY_SYMMETRIC:
842   case MAT_HERMITIAN:
843   case MAT_SYMMETRY_ETERNAL:
844   case MAT_IGNORE_LOWER_TRIANGULAR:
845     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
846     break;
847   default:
848     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
849   }
850   PetscFunctionReturn(0);
851 }
852 
853 
854 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
855 {
856   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
857   Mat_SeqDense      *mat = (Mat_SeqDense*)mdn->A->data;
858   const PetscScalar *l,*r;
859   PetscScalar       x,*v;
860   PetscErrorCode    ierr;
861   PetscInt          i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n;
862 
863   PetscFunctionBegin;
864   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
865   if (ll) {
866     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
867     if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
868     ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr);
869     for (i=0; i<m; i++) {
870       x = l[i];
871       v = mat->v + i;
872       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
873     }
874     ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr);
875     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
876   }
877   if (rr) {
878     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
879     if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
880     ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
881     ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
882     ierr = VecGetArrayRead(mdn->lvec,&r);CHKERRQ(ierr);
883     for (i=0; i<n; i++) {
884       x = r[i];
885       v = mat->v + i*m;
886       for (j=0; j<m; j++) (*v++) *= x;
887     }
888     ierr = VecRestoreArrayRead(mdn->lvec,&r);CHKERRQ(ierr);
889     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
890   }
891   PetscFunctionReturn(0);
892 }
893 
894 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
895 {
896   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
897   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
898   PetscErrorCode ierr;
899   PetscInt       i,j;
900   PetscReal      sum = 0.0;
901   PetscScalar    *v  = mat->v;
902 
903   PetscFunctionBegin;
904   if (mdn->size == 1) {
905     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
906   } else {
907     if (type == NORM_FROBENIUS) {
908       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
909         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
910       }
911       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
912       *nrm = PetscSqrtReal(*nrm);
913       ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr);
914     } else if (type == NORM_1) {
915       PetscReal *tmp,*tmp2;
916       ierr = PetscMalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr);
917       ierr = PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
918       ierr = PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
919       *nrm = 0.0;
920       v    = mat->v;
921       for (j=0; j<mdn->A->cmap->n; j++) {
922         for (i=0; i<mdn->A->rmap->n; i++) {
923           tmp[j] += PetscAbsScalar(*v);  v++;
924         }
925       }
926       ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
927       for (j=0; j<A->cmap->N; j++) {
928         if (tmp2[j] > *nrm) *nrm = tmp2[j];
929       }
930       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
931       ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr);
932     } else if (type == NORM_INFINITY) { /* max row norm */
933       PetscReal ntemp;
934       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
935       ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
936     } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
937   }
938   PetscFunctionReturn(0);
939 }
940 
941 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
942 {
943   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
944   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
945   Mat            B;
946   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
947   PetscErrorCode ierr;
948   PetscInt       j,i;
949   PetscScalar    *v;
950 
951   PetscFunctionBegin;
952   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
953     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
954     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
955     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
956     ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr);
957   } else {
958     B = *matout;
959   }
960 
961   m    = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v;
962   ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr);
963   for (i=0; i<m; i++) rwork[i] = rstart + i;
964   for (j=0; j<n; j++) {
965     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
966     v   += m;
967   }
968   ierr = PetscFree(rwork);CHKERRQ(ierr);
969   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
970   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
971   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
972     *matout = B;
973   } else {
974     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
975   }
976   PetscFunctionReturn(0);
977 }
978 
979 
980 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
981 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
982 
983 PetscErrorCode MatSetUp_MPIDense(Mat A)
984 {
985   PetscErrorCode ierr;
986 
987   PetscFunctionBegin;
988   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
989   PetscFunctionReturn(0);
990 }
991 
992 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
993 {
994   PetscErrorCode ierr;
995   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
996 
997   PetscFunctionBegin;
998   ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr);
999   ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1000   PetscFunctionReturn(0);
1001 }
1002 
1003 PetscErrorCode  MatConjugate_MPIDense(Mat mat)
1004 {
1005   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
1006   PetscErrorCode ierr;
1007 
1008   PetscFunctionBegin;
1009   ierr = MatConjugate(a->A);CHKERRQ(ierr);
1010   PetscFunctionReturn(0);
1011 }
1012 
1013 PetscErrorCode MatRealPart_MPIDense(Mat A)
1014 {
1015   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1016   PetscErrorCode ierr;
1017 
1018   PetscFunctionBegin;
1019   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1020   PetscFunctionReturn(0);
1021 }
1022 
1023 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1024 {
1025   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1026   PetscErrorCode ierr;
1027 
1028   PetscFunctionBegin;
1029   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1030   PetscFunctionReturn(0);
1031 }
1032 
1033 extern PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*);
1034 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms)
1035 {
1036   PetscErrorCode ierr;
1037   PetscInt       i,n;
1038   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
1039   PetscReal      *work;
1040 
1041   PetscFunctionBegin;
1042   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
1043   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
1044   ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr);
1045   if (type == NORM_2) {
1046     for (i=0; i<n; i++) work[i] *= work[i];
1047   }
1048   if (type == NORM_INFINITY) {
1049     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
1050   } else {
1051     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
1052   }
1053   ierr = PetscFree(work);CHKERRQ(ierr);
1054   if (type == NORM_2) {
1055     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
1056   }
1057   PetscFunctionReturn(0);
1058 }
1059 
1060 static PetscErrorCode  MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1061 {
1062   Mat_MPIDense   *d = (Mat_MPIDense*)x->data;
1063   PetscErrorCode ierr;
1064   PetscScalar    *a;
1065   PetscInt       m,n,i;
1066 
1067   PetscFunctionBegin;
1068   ierr = MatGetSize(d->A,&m,&n);CHKERRQ(ierr);
1069   ierr = MatDenseGetArray(d->A,&a);CHKERRQ(ierr);
1070   for (i=0; i<m*n; i++) {
1071     ierr = PetscRandomGetValue(rctx,a+i);CHKERRQ(ierr);
1072   }
1073   ierr = MatDenseRestoreArray(d->A,&a);CHKERRQ(ierr);
1074   PetscFunctionReturn(0);
1075 }
1076 
1077 extern PetscErrorCode MatMatMultNumeric_MPIDense(Mat A,Mat,Mat);
1078 
1079 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool  *missing,PetscInt *d)
1080 {
1081   PetscFunctionBegin;
1082   *missing = PETSC_FALSE;
1083   PetscFunctionReturn(0);
1084 }
1085 
1086 /* -------------------------------------------------------------------*/
1087 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1088                                         MatGetRow_MPIDense,
1089                                         MatRestoreRow_MPIDense,
1090                                         MatMult_MPIDense,
1091                                 /*  4*/ MatMultAdd_MPIDense,
1092                                         MatMultTranspose_MPIDense,
1093                                         MatMultTransposeAdd_MPIDense,
1094                                         0,
1095                                         0,
1096                                         0,
1097                                 /* 10*/ 0,
1098                                         0,
1099                                         0,
1100                                         0,
1101                                         MatTranspose_MPIDense,
1102                                 /* 15*/ MatGetInfo_MPIDense,
1103                                         MatEqual_MPIDense,
1104                                         MatGetDiagonal_MPIDense,
1105                                         MatDiagonalScale_MPIDense,
1106                                         MatNorm_MPIDense,
1107                                 /* 20*/ MatAssemblyBegin_MPIDense,
1108                                         MatAssemblyEnd_MPIDense,
1109                                         MatSetOption_MPIDense,
1110                                         MatZeroEntries_MPIDense,
1111                                 /* 24*/ MatZeroRows_MPIDense,
1112                                         0,
1113                                         0,
1114                                         0,
1115                                         0,
1116                                 /* 29*/ MatSetUp_MPIDense,
1117                                         0,
1118                                         0,
1119                                         MatGetDiagonalBlock_MPIDense,
1120                                         0,
1121                                 /* 34*/ MatDuplicate_MPIDense,
1122                                         0,
1123                                         0,
1124                                         0,
1125                                         0,
1126                                 /* 39*/ MatAXPY_MPIDense,
1127                                         MatCreateSubMatrices_MPIDense,
1128                                         0,
1129                                         MatGetValues_MPIDense,
1130                                         0,
1131                                 /* 44*/ 0,
1132                                         MatScale_MPIDense,
1133                                         MatShift_Basic,
1134                                         0,
1135                                         0,
1136                                 /* 49*/ MatSetRandom_MPIDense,
1137                                         0,
1138                                         0,
1139                                         0,
1140                                         0,
1141                                 /* 54*/ 0,
1142                                         0,
1143                                         0,
1144                                         0,
1145                                         0,
1146                                 /* 59*/ MatCreateSubMatrix_MPIDense,
1147                                         MatDestroy_MPIDense,
1148                                         MatView_MPIDense,
1149                                         0,
1150                                         0,
1151                                 /* 64*/ 0,
1152                                         0,
1153                                         0,
1154                                         0,
1155                                         0,
1156                                 /* 69*/ 0,
1157                                         0,
1158                                         0,
1159                                         0,
1160                                         0,
1161                                 /* 74*/ 0,
1162                                         0,
1163                                         0,
1164                                         0,
1165                                         0,
1166                                 /* 79*/ 0,
1167                                         0,
1168                                         0,
1169                                         0,
1170                                 /* 83*/ MatLoad_MPIDense,
1171                                         0,
1172                                         0,
1173                                         0,
1174                                         0,
1175                                         0,
1176 #if defined(PETSC_HAVE_ELEMENTAL)
1177                                 /* 89*/ MatMatMult_MPIDense_MPIDense,
1178                                         MatMatMultSymbolic_MPIDense_MPIDense,
1179 #else
1180                                 /* 89*/ 0,
1181                                         0,
1182 #endif
1183                                         MatMatMultNumeric_MPIDense,
1184                                         0,
1185                                         0,
1186                                 /* 94*/ 0,
1187                                         0,
1188                                         0,
1189                                         0,
1190                                         0,
1191                                 /* 99*/ 0,
1192                                         0,
1193                                         0,
1194                                         MatConjugate_MPIDense,
1195                                         0,
1196                                 /*104*/ 0,
1197                                         MatRealPart_MPIDense,
1198                                         MatImaginaryPart_MPIDense,
1199                                         0,
1200                                         0,
1201                                 /*109*/ 0,
1202                                         0,
1203                                         0,
1204                                         0,
1205                                         MatMissingDiagonal_MPIDense,
1206                                 /*114*/ 0,
1207                                         0,
1208                                         0,
1209                                         0,
1210                                         0,
1211                                 /*119*/ 0,
1212                                         0,
1213                                         0,
1214                                         0,
1215                                         0,
1216                                 /*124*/ 0,
1217                                         MatGetColumnNorms_MPIDense,
1218                                         0,
1219                                         0,
1220                                         0,
1221                                 /*129*/ 0,
1222                                         MatTransposeMatMult_MPIDense_MPIDense,
1223                                         MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1224                                         MatTransposeMatMultNumeric_MPIDense_MPIDense,
1225                                         0,
1226                                 /*134*/ 0,
1227                                         0,
1228                                         0,
1229                                         0,
1230                                         0,
1231                                 /*139*/ 0,
1232                                         0,
1233                                         0,
1234                                         0,
1235                                         0,
1236                                 /*144*/ MatCreateMPIMatConcatenateSeqMat_MPIDense
1237 };
1238 
1239 PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1240 {
1241   Mat_MPIDense   *a;
1242   PetscErrorCode ierr;
1243 
1244   PetscFunctionBegin;
1245   mat->preallocated = PETSC_TRUE;
1246   /* Note:  For now, when data is specified above, this assumes the user correctly
1247    allocates the local dense storage space.  We should add error checking. */
1248 
1249   a       = (Mat_MPIDense*)mat->data;
1250   ierr    = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
1251   ierr    = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
1252   a->nvec = mat->cmap->n;
1253 
1254   ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1255   ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr);
1256   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
1257   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1258   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1259   PetscFunctionReturn(0);
1260 }
1261 
1262 #if defined(PETSC_HAVE_ELEMENTAL)
1263 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1264 {
1265   Mat            mat_elemental;
1266   PetscErrorCode ierr;
1267   PetscScalar    *v;
1268   PetscInt       m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols;
1269 
1270   PetscFunctionBegin;
1271   if (reuse == MAT_REUSE_MATRIX) {
1272     mat_elemental = *newmat;
1273     ierr = MatZeroEntries(*newmat);CHKERRQ(ierr);
1274   } else {
1275     ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr);
1276     ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1277     ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr);
1278     ierr = MatSetUp(mat_elemental);CHKERRQ(ierr);
1279     ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
1280   }
1281 
1282   ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr);
1283   for (i=0; i<N; i++) cols[i] = i;
1284   for (i=0; i<m; i++) rows[i] = rstart + i;
1285 
1286   /* PETSc-Elemental interaface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1287   ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr);
1288   ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr);
1289   ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1290   ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1291   ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr);
1292   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1293 
1294   if (reuse == MAT_INPLACE_MATRIX) {
1295     ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr);
1296   } else {
1297     *newmat = mat_elemental;
1298   }
1299   PetscFunctionReturn(0);
1300 }
1301 #endif
1302 
1303 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals)
1304 {
1305   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1306   PetscErrorCode ierr;
1307 
1308   PetscFunctionBegin;
1309   ierr = MatDenseGetColumn(mat->A,col,vals);CHKERRQ(ierr);
1310   PetscFunctionReturn(0);
1311 }
1312 
1313 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals)
1314 {
1315   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1316   PetscErrorCode ierr;
1317 
1318   PetscFunctionBegin;
1319   ierr = MatDenseRestoreColumn(mat->A,vals);CHKERRQ(ierr);
1320   PetscFunctionReturn(0);
1321 }
1322 
1323 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
1324 {
1325   PetscErrorCode ierr;
1326   Mat_MPIDense   *mat;
1327   PetscInt       m,nloc,N;
1328 
1329   PetscFunctionBegin;
1330   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
1331   ierr = MatGetLocalSize(inmat,NULL,&nloc);CHKERRQ(ierr);
1332   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1333     PetscInt sum;
1334 
1335     if (n == PETSC_DECIDE) {
1336       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
1337     }
1338     /* Check sum(n) = N */
1339     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
1340     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
1341 
1342     ierr = MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat);CHKERRQ(ierr);
1343   }
1344 
1345   /* numeric phase */
1346   mat = (Mat_MPIDense*)(*outmat)->data;
1347   ierr = MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1348   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1349   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1350   PetscFunctionReturn(0);
1351 }
1352 
1353 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1354 {
1355   Mat_MPIDense   *a;
1356   PetscErrorCode ierr;
1357 
1358   PetscFunctionBegin;
1359   ierr      = PetscNewLog(mat,&a);CHKERRQ(ierr);
1360   mat->data = (void*)a;
1361   ierr      = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1362 
1363   mat->insertmode = NOT_SET_VALUES;
1364   ierr            = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);CHKERRQ(ierr);
1365   ierr            = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);CHKERRQ(ierr);
1366 
1367   /* build cache for off array entries formed */
1368   a->donotstash = PETSC_FALSE;
1369 
1370   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr);
1371 
1372   /* stuff used for matrix vector multiply */
1373   a->lvec        = 0;
1374   a->Mvctx       = 0;
1375   a->roworiented = PETSC_TRUE;
1376 
1377   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr);
1378   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr);
1379   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense);CHKERRQ(ierr);
1380   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense);CHKERRQ(ierr);
1381   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);CHKERRQ(ierr);
1382   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);CHKERRQ(ierr);
1383 #if defined(PETSC_HAVE_ELEMENTAL)
1384   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr);
1385 #endif
1386   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1387   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1388   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1389   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1390 
1391   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",MatTransposeMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1392   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",MatTransposeMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1393   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",MatTransposeMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1394   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense);CHKERRQ(ierr);
1395   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense);CHKERRQ(ierr);
1396   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1397   PetscFunctionReturn(0);
1398 }
1399 
1400 /*MC
1401    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1402 
1403    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1404    and MATMPIDENSE otherwise.
1405 
1406    Options Database Keys:
1407 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1408 
1409   Level: beginner
1410 
1411 
1412 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1413 M*/
1414 
1415 /*@C
1416    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1417 
1418    Not collective
1419 
1420    Input Parameters:
1421 .  B - the matrix
1422 -  data - optional location of matrix data.  Set data=NULL for PETSc
1423    to control all matrix memory allocation.
1424 
1425    Notes:
1426    The dense format is fully compatible with standard Fortran 77
1427    storage by columns.
1428 
1429    The data input variable is intended primarily for Fortran programmers
1430    who wish to allocate their own matrix memory space.  Most users should
1431    set data=NULL.
1432 
1433    Level: intermediate
1434 
1435 .keywords: matrix,dense, parallel
1436 
1437 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1438 @*/
1439 PetscErrorCode  MatMPIDenseSetPreallocation(Mat B,PetscScalar *data)
1440 {
1441   PetscErrorCode ierr;
1442 
1443   PetscFunctionBegin;
1444   ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr);
1445   PetscFunctionReturn(0);
1446 }
1447 
1448 /*@
1449    MatDensePlaceArray - Allows one to replace the array in a dense array with an
1450    array provided by the user. This is useful to avoid copying an array
1451    into a matrix
1452 
1453    Not Collective
1454 
1455    Input Parameters:
1456 +  mat - the matrix
1457 -  array - the array in column major order
1458 
1459    Notes:
1460    You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be
1461    freed when the matrix is destroyed.
1462 
1463    Level: developer
1464 
1465 .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
1466 
1467 @*/
1468 PetscErrorCode  MatDensePlaceArray(Mat mat,const PetscScalar array[])
1469 {
1470   PetscErrorCode ierr;
1471   PetscFunctionBegin;
1472   ierr = PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
1473   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
1474   PetscFunctionReturn(0);
1475 }
1476 
1477 /*@
1478    MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray()
1479 
1480    Not Collective
1481 
1482    Input Parameters:
1483 .  mat - the matrix
1484 
1485    Notes:
1486    You can only call this after a call to MatDensePlaceArray()
1487 
1488    Level: developer
1489 
1490 .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
1491 
1492 @*/
1493 PetscErrorCode  MatDenseResetArray(Mat mat)
1494 {
1495   PetscErrorCode ierr;
1496   PetscFunctionBegin;
1497   ierr = PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));CHKERRQ(ierr);
1498   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
1499   PetscFunctionReturn(0);
1500 }
1501 
1502 /*@C
1503    MatCreateDense - Creates a parallel matrix in dense format.
1504 
1505    Collective on MPI_Comm
1506 
1507    Input Parameters:
1508 +  comm - MPI communicator
1509 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1510 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1511 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1512 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1513 -  data - optional location of matrix data.  Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1514    to control all matrix memory allocation.
1515 
1516    Output Parameter:
1517 .  A - the matrix
1518 
1519    Notes:
1520    The dense format is fully compatible with standard Fortran 77
1521    storage by columns.
1522 
1523    The data input variable is intended primarily for Fortran programmers
1524    who wish to allocate their own matrix memory space.  Most users should
1525    set data=NULL (PETSC_NULL_SCALAR for Fortran users).
1526 
1527    The user MUST specify either the local or global matrix dimensions
1528    (possibly both).
1529 
1530    Level: intermediate
1531 
1532 .keywords: matrix,dense, parallel
1533 
1534 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1535 @*/
1536 PetscErrorCode  MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1537 {
1538   PetscErrorCode ierr;
1539   PetscMPIInt    size;
1540 
1541   PetscFunctionBegin;
1542   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1543   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1544   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1545   if (size > 1) {
1546     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1547     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1548     if (data) {  /* user provided data array, so no need to assemble */
1549       ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr);
1550       (*A)->assembled = PETSC_TRUE;
1551     }
1552   } else {
1553     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1554     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1555   }
1556   PetscFunctionReturn(0);
1557 }
1558 
1559 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1560 {
1561   Mat            mat;
1562   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
1563   PetscErrorCode ierr;
1564 
1565   PetscFunctionBegin;
1566   *newmat = 0;
1567   ierr    = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr);
1568   ierr    = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1569   ierr    = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1570   a       = (Mat_MPIDense*)mat->data;
1571   ierr    = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1572 
1573   mat->factortype   = A->factortype;
1574   mat->assembled    = PETSC_TRUE;
1575   mat->preallocated = PETSC_TRUE;
1576 
1577   a->size         = oldmat->size;
1578   a->rank         = oldmat->rank;
1579   mat->insertmode = NOT_SET_VALUES;
1580   a->nvec         = oldmat->nvec;
1581   a->donotstash   = oldmat->donotstash;
1582 
1583   ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr);
1584   ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr);
1585 
1586   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1587   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1588   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1589 
1590   *newmat = mat;
1591   PetscFunctionReturn(0);
1592 }
1593 
1594 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat)
1595 {
1596   PetscErrorCode ierr;
1597   PetscMPIInt    rank,size;
1598   const PetscInt *rowners;
1599   PetscInt       i,m,n,nz,j,mMax;
1600   PetscScalar    *array,*vals,*vals_ptr;
1601   Mat_MPIDense   *a = (Mat_MPIDense*)newmat->data;
1602 
1603   PetscFunctionBegin;
1604   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1605   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1606 
1607   /* determine ownership of rows and columns */
1608   m = (newmat->rmap->n < 0) ? PETSC_DECIDE : newmat->rmap->n;
1609   n = (newmat->cmap->n < 0) ? PETSC_DECIDE : newmat->cmap->n;
1610 
1611   ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr);
1612   if (!a->A || !((Mat_SeqDense*)(a->A->data))->user_alloc) {
1613     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
1614   }
1615   ierr = MatDenseGetArray(newmat,&array);CHKERRQ(ierr);
1616   ierr = MatGetLocalSize(newmat,&m,NULL);CHKERRQ(ierr);
1617   ierr = MatGetOwnershipRanges(newmat,&rowners);CHKERRQ(ierr);
1618   ierr = MPI_Reduce(&m,&mMax,1,MPIU_INT,MPI_MAX,0,comm);CHKERRQ(ierr);
1619   if (!rank) {
1620     ierr = PetscMalloc1(mMax*N,&vals);CHKERRQ(ierr);
1621 
1622     /* read in my part of the matrix numerical values  */
1623     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1624 
1625     /* insert into matrix-by row (this is why cannot directly read into array */
1626     vals_ptr = vals;
1627     for (i=0; i<m; i++) {
1628       for (j=0; j<N; j++) {
1629         array[i + j*m] = *vals_ptr++;
1630       }
1631     }
1632 
1633     /* read in other processors and ship out */
1634     for (i=1; i<size; i++) {
1635       nz   = (rowners[i+1] - rowners[i])*N;
1636       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1637       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr);
1638     }
1639   } else {
1640     /* receive numeric values */
1641     ierr = PetscMalloc1(m*N,&vals);CHKERRQ(ierr);
1642 
1643     /* receive message of values*/
1644     ierr = MPIULong_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr);
1645 
1646     /* insert into matrix-by row (this is why cannot directly read into array */
1647     vals_ptr = vals;
1648     for (i=0; i<m; i++) {
1649       for (j=0; j<N; j++) {
1650         array[i + j*m] = *vals_ptr++;
1651       }
1652     }
1653   }
1654   ierr = MatDenseRestoreArray(newmat,&array);CHKERRQ(ierr);
1655   ierr = PetscFree(vals);CHKERRQ(ierr);
1656   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1657   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1658   PetscFunctionReturn(0);
1659 }
1660 
1661 PetscErrorCode MatLoad_MPIDense(Mat newmat,PetscViewer viewer)
1662 {
1663   Mat_MPIDense   *a;
1664   PetscScalar    *vals,*svals;
1665   MPI_Comm       comm;
1666   MPI_Status     status;
1667   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,n,maxnz;
1668   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1669   PetscInt       *ourlens,*procsnz = 0,jj,*mycols,*smycols;
1670   PetscInt       i,nz,j,rstart,rend;
1671   int            fd;
1672   PetscErrorCode ierr;
1673 
1674   PetscFunctionBegin;
1675   /* force binary viewer to load .info file if it has not yet done so */
1676   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1677   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
1678   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1679   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1680   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1681   if (!rank) {
1682     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
1683     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1684   }
1685   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1686   M    = header[1]; N = header[2]; nz = header[3];
1687 
1688   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
1689   if (newmat->rmap->N < 0) newmat->rmap->N = M;
1690   if (newmat->cmap->N < 0) newmat->cmap->N = N;
1691 
1692   if (newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",M,newmat->rmap->N);
1693   if (newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",N,newmat->cmap->N);
1694 
1695   /*
1696        Handle case where matrix is stored on disk as a dense matrix
1697   */
1698   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1699     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr);
1700     PetscFunctionReturn(0);
1701   }
1702 
1703   /* determine ownership of all rows */
1704   if (newmat->rmap->n < 0) {
1705     ierr = PetscMPIIntCast(M/size + ((M % size) > rank),&m);CHKERRQ(ierr);
1706   } else {
1707     ierr = PetscMPIIntCast(newmat->rmap->n,&m);CHKERRQ(ierr);
1708   }
1709   if (newmat->cmap->n < 0) {
1710     n = PETSC_DECIDE;
1711   } else {
1712     ierr = PetscMPIIntCast(newmat->cmap->n,&n);CHKERRQ(ierr);
1713   }
1714 
1715   ierr       = PetscMalloc1(size+2,&rowners);CHKERRQ(ierr);
1716   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1717   rowners[0] = 0;
1718   for (i=2; i<=size; i++) {
1719     rowners[i] += rowners[i-1];
1720   }
1721   rstart = rowners[rank];
1722   rend   = rowners[rank+1];
1723 
1724   /* distribute row lengths to all processors */
1725   ierr = PetscMalloc1(rend-rstart,&ourlens);CHKERRQ(ierr);
1726   if (!rank) {
1727     ierr = PetscMalloc1(M,&rowlengths);CHKERRQ(ierr);
1728     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1729     ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr);
1730     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1731     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1732     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1733   } else {
1734     ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1735   }
1736 
1737   if (!rank) {
1738     /* calculate the number of nonzeros on each processor */
1739     ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr);
1740     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1741     for (i=0; i<size; i++) {
1742       for (j=rowners[i]; j< rowners[i+1]; j++) {
1743         procsnz[i] += rowlengths[j];
1744       }
1745     }
1746     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1747 
1748     /* determine max buffer needed and allocate it */
1749     maxnz = 0;
1750     for (i=0; i<size; i++) {
1751       maxnz = PetscMax(maxnz,procsnz[i]);
1752     }
1753     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
1754 
1755     /* read in my part of the matrix column indices  */
1756     nz   = procsnz[0];
1757     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
1758     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1759 
1760     /* read in every one elses and ship off */
1761     for (i=1; i<size; i++) {
1762       nz   = procsnz[i];
1763       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1764       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1765     }
1766     ierr = PetscFree(cols);CHKERRQ(ierr);
1767   } else {
1768     /* determine buffer space needed for message */
1769     nz = 0;
1770     for (i=0; i<m; i++) {
1771       nz += ourlens[i];
1772     }
1773     ierr = PetscMalloc1(nz+1,&mycols);CHKERRQ(ierr);
1774 
1775     /* receive message of column indices*/
1776     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1777     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1778     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1779   }
1780 
1781   ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr);
1782   a = (Mat_MPIDense*)newmat->data;
1783   if (!a->A || !((Mat_SeqDense*)(a->A->data))->user_alloc) {
1784     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
1785   }
1786 
1787   if (!rank) {
1788     ierr = PetscMalloc1(maxnz,&vals);CHKERRQ(ierr);
1789 
1790     /* read in my part of the matrix numerical values  */
1791     nz   = procsnz[0];
1792     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1793 
1794     /* insert into matrix */
1795     jj      = rstart;
1796     smycols = mycols;
1797     svals   = vals;
1798     for (i=0; i<m; i++) {
1799       ierr     = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1800       smycols += ourlens[i];
1801       svals   += ourlens[i];
1802       jj++;
1803     }
1804 
1805     /* read in other processors and ship out */
1806     for (i=1; i<size; i++) {
1807       nz   = procsnz[i];
1808       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1809       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
1810     }
1811     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1812   } else {
1813     /* receive numeric values */
1814     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
1815 
1816     /* receive message of values*/
1817     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
1818     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1819     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1820 
1821     /* insert into matrix */
1822     jj      = rstart;
1823     smycols = mycols;
1824     svals   = vals;
1825     for (i=0; i<m; i++) {
1826       ierr     = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1827       smycols += ourlens[i];
1828       svals   += ourlens[i];
1829       jj++;
1830     }
1831   }
1832   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1833   ierr = PetscFree(vals);CHKERRQ(ierr);
1834   ierr = PetscFree(mycols);CHKERRQ(ierr);
1835   ierr = PetscFree(rowners);CHKERRQ(ierr);
1836 
1837   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1838   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1839   PetscFunctionReturn(0);
1840 }
1841 
1842 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool  *flag)
1843 {
1844   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
1845   Mat            a,b;
1846   PetscBool      flg;
1847   PetscErrorCode ierr;
1848 
1849   PetscFunctionBegin;
1850   a    = matA->A;
1851   b    = matB->A;
1852   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
1853   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1854   PetscFunctionReturn(0);
1855 }
1856 
1857 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(Mat A)
1858 {
1859   PetscErrorCode        ierr;
1860   Mat_MPIDense          *a = (Mat_MPIDense*)A->data;
1861   Mat_TransMatMultDense *atb = a->atbdense;
1862 
1863   PetscFunctionBegin;
1864   ierr = PetscFree3(atb->sendbuf,atb->atbarray,atb->recvcounts);CHKERRQ(ierr);
1865   ierr = (atb->destroy)(A);CHKERRQ(ierr);
1866   ierr = PetscFree(atb);CHKERRQ(ierr);
1867   PetscFunctionReturn(0);
1868 }
1869 
1870 PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1871 {
1872   Mat_MPIDense   *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
1873   Mat_SeqDense   *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data;
1874   Mat_TransMatMultDense *atb = c->atbdense;
1875   PetscErrorCode ierr;
1876   MPI_Comm       comm;
1877   PetscMPIInt    rank,size,*recvcounts=atb->recvcounts;
1878   PetscScalar    *carray,*atbarray=atb->atbarray,*sendbuf=atb->sendbuf;
1879   PetscInt       i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j;
1880   PetscScalar    _DOne=1.0,_DZero=0.0;
1881   PetscBLASInt   am,an,bn,aN;
1882   const PetscInt *ranges;
1883 
1884   PetscFunctionBegin;
1885   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1886   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1887   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1888 
1889   /* compute atbarray = aseq^T * bseq */
1890   ierr = PetscBLASIntCast(a->A->cmap->n,&an);CHKERRQ(ierr);
1891   ierr = PetscBLASIntCast(b->A->cmap->n,&bn);CHKERRQ(ierr);
1892   ierr = PetscBLASIntCast(a->A->rmap->n,&am);CHKERRQ(ierr);
1893   ierr = PetscBLASIntCast(A->cmap->N,&aN);CHKERRQ(ierr);
1894   PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&an,&bn,&am,&_DOne,aseq->v,&aseq->lda,bseq->v,&bseq->lda,&_DZero,atbarray,&aN));
1895 
1896   ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr);
1897   for (i=0; i<size; i++) recvcounts[i] = (ranges[i+1] - ranges[i])*cN;
1898 
1899   /* arrange atbarray into sendbuf */
1900   k = 0;
1901   for (proc=0; proc<size; proc++) {
1902     for (j=0; j<cN; j++) {
1903       for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM];
1904     }
1905   }
1906   /* sum all atbarray to local values of C */
1907   ierr = MatDenseGetArray(c->A,&carray);CHKERRQ(ierr);
1908   ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRQ(ierr);
1909   ierr = MatDenseRestoreArray(c->A,&carray);CHKERRQ(ierr);
1910   PetscFunctionReturn(0);
1911 }
1912 
1913 PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1914 {
1915   PetscErrorCode        ierr;
1916   Mat                   Cdense;
1917   MPI_Comm              comm;
1918   PetscMPIInt           size;
1919   PetscInt              cm=A->cmap->n,cM,cN=B->cmap->N;
1920   Mat_MPIDense          *c;
1921   Mat_TransMatMultDense *atb;
1922 
1923   PetscFunctionBegin;
1924   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1925   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
1926     SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
1927   }
1928 
1929   /* create matrix product Cdense */
1930   ierr = MatCreate(comm,&Cdense);CHKERRQ(ierr);
1931   ierr = MatSetSizes(Cdense,cm,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
1932   ierr = MatSetType(Cdense,MATMPIDENSE);CHKERRQ(ierr);
1933   ierr = MatMPIDenseSetPreallocation(Cdense,NULL);CHKERRQ(ierr);
1934   ierr = MatAssemblyBegin(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1935   ierr = MatAssemblyEnd(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1936   *C   = Cdense;
1937 
1938   /* create data structure for reuse Cdense */
1939   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1940   ierr = PetscNew(&atb);CHKERRQ(ierr);
1941   cM = Cdense->rmap->N;
1942   ierr = PetscMalloc3(cM*cN,&atb->sendbuf,cM*cN,&atb->atbarray,size,&atb->recvcounts);CHKERRQ(ierr);
1943 
1944   c                    = (Mat_MPIDense*)Cdense->data;
1945   c->atbdense          = atb;
1946   atb->destroy         = Cdense->ops->destroy;
1947   Cdense->ops->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
1948   PetscFunctionReturn(0);
1949 }
1950 
1951 PetscErrorCode MatTransposeMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
1952 {
1953   PetscErrorCode ierr;
1954 
1955   PetscFunctionBegin;
1956   if (scall == MAT_INITIAL_MATRIX) {
1957     ierr = MatTransposeMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr);
1958   }
1959   ierr = MatTransposeMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr);
1960   PetscFunctionReturn(0);
1961 }
1962 
1963 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(Mat A)
1964 {
1965   PetscErrorCode   ierr;
1966   Mat_MPIDense     *a = (Mat_MPIDense*)A->data;
1967   Mat_MatMultDense *ab = a->abdense;
1968 
1969   PetscFunctionBegin;
1970   ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr);
1971   ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr);
1972   ierr = MatDestroy(&ab->Be);CHKERRQ(ierr);
1973 
1974   ierr = (ab->destroy)(A);CHKERRQ(ierr);
1975   ierr = PetscFree(ab);CHKERRQ(ierr);
1976   PetscFunctionReturn(0);
1977 }
1978 
1979 #if defined(PETSC_HAVE_ELEMENTAL)
1980 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1981 {
1982   PetscErrorCode   ierr;
1983   Mat_MPIDense     *c=(Mat_MPIDense*)C->data;
1984   Mat_MatMultDense *ab=c->abdense;
1985 
1986   PetscFunctionBegin;
1987   ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr);
1988   ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr);
1989   ierr = MatMatMultNumeric(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr);
1990   ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
1991   PetscFunctionReturn(0);
1992 }
1993 
1994 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1995 {
1996   PetscErrorCode   ierr;
1997   Mat              Ae,Be,Ce;
1998   Mat_MPIDense     *c;
1999   Mat_MatMultDense *ab;
2000 
2001   PetscFunctionBegin;
2002   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
2003     SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2004   }
2005 
2006   /* convert A and B to Elemental matrices Ae and Be */
2007   ierr = MatConvert(A,MATELEMENTAL,MAT_INITIAL_MATRIX, &Ae);CHKERRQ(ierr);
2008   ierr = MatConvert(B,MATELEMENTAL,MAT_INITIAL_MATRIX, &Be);CHKERRQ(ierr);
2009 
2010   /* Ce = Ae*Be */
2011   ierr = MatMatMultSymbolic(Ae,Be,fill,&Ce);CHKERRQ(ierr);
2012   ierr = MatMatMultNumeric(Ae,Be,Ce);CHKERRQ(ierr);
2013 
2014   /* convert Ce to C */
2015   ierr = MatConvert(Ce,MATMPIDENSE,MAT_INITIAL_MATRIX,C);CHKERRQ(ierr);
2016 
2017   /* create data structure for reuse Cdense */
2018   ierr = PetscNew(&ab);CHKERRQ(ierr);
2019   c                  = (Mat_MPIDense*)(*C)->data;
2020   c->abdense         = ab;
2021 
2022   ab->Ae             = Ae;
2023   ab->Be             = Be;
2024   ab->Ce             = Ce;
2025   ab->destroy        = (*C)->ops->destroy;
2026   (*C)->ops->destroy        = MatDestroy_MatMatMult_MPIDense_MPIDense;
2027   (*C)->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
2028   PetscFunctionReturn(0);
2029 }
2030 
2031 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
2032 {
2033   PetscErrorCode ierr;
2034 
2035   PetscFunctionBegin;
2036   if (scall == MAT_INITIAL_MATRIX) { /* simbolic product includes numeric product */
2037     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
2038     ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr);
2039     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
2040   } else {
2041     ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
2042     ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr);
2043     ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
2044   }
2045   PetscFunctionReturn(0);
2046 }
2047 #endif
2048 
2049