xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision c3a89c15c66be42c1ea3c2eab6faebba95f76ffa)
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4 */
5 
6 
7 #include <../src/mat/impls/dense/mpi/mpidense.h>    /*I   "petscmat.h"  I*/
8 #include <../src/mat/impls/aij/mpi/mpiaij.h>
9 #include <petscblaslapack.h>
10 
11 /*@
12 
13       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
14               matrix that represents the operator. For sequential matrices it returns itself.
15 
16     Input Parameter:
17 .      A - the Seq or MPI dense matrix
18 
19     Output Parameter:
20 .      B - the inner matrix
21 
22     Level: intermediate
23 
24 @*/
25 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
26 {
27   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
28   PetscErrorCode ierr;
29   PetscBool      flg;
30 
31   PetscFunctionBegin;
32   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
33   if (flg) *B = mat->A;
34   else *B = A;
35   PetscFunctionReturn(0);
36 }
37 
38 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
39 {
40   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
41   PetscErrorCode ierr;
42   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
43 
44   PetscFunctionBegin;
45   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
46   lrow = row - rstart;
47   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
48   PetscFunctionReturn(0);
49 }
50 
51 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
52 {
53   PetscErrorCode ierr;
54 
55   PetscFunctionBegin;
56   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
57   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
58   PetscFunctionReturn(0);
59 }
60 
61 PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
62 {
63   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
64   PetscErrorCode ierr;
65   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
66   PetscScalar    *array;
67   MPI_Comm       comm;
68   Mat            B;
69 
70   PetscFunctionBegin;
71   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
72 
73   ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr);
74   if (!B) {
75     ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
76     ierr = MatCreate(comm,&B);CHKERRQ(ierr);
77     ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr);
78     ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
79     ierr = MatDenseGetArray(mdn->A,&array);CHKERRQ(ierr);
80     ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr);
81     ierr = MatDenseRestoreArray(mdn->A,&array);CHKERRQ(ierr);
82     ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
83     ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
84     ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr);
85     *a   = B;
86     ierr = MatDestroy(&B);CHKERRQ(ierr);
87   } else *a = B;
88   PetscFunctionReturn(0);
89 }
90 
91 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
92 {
93   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
94   PetscErrorCode ierr;
95   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
96   PetscBool      roworiented = A->roworiented;
97 
98   PetscFunctionBegin;
99   for (i=0; i<m; i++) {
100     if (idxm[i] < 0) continue;
101     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
102     if (idxm[i] >= rstart && idxm[i] < rend) {
103       row = idxm[i] - rstart;
104       if (roworiented) {
105         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
106       } else {
107         for (j=0; j<n; j++) {
108           if (idxn[j] < 0) continue;
109           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
110           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
111         }
112       }
113     } else if (!A->donotstash) {
114       mat->assembled = PETSC_FALSE;
115       if (roworiented) {
116         ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
117       } else {
118         ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
119       }
120     }
121   }
122   PetscFunctionReturn(0);
123 }
124 
125 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
126 {
127   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
128   PetscErrorCode ierr;
129   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
130 
131   PetscFunctionBegin;
132   for (i=0; i<m; i++) {
133     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
134     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
135     if (idxm[i] >= rstart && idxm[i] < rend) {
136       row = idxm[i] - rstart;
137       for (j=0; j<n; j++) {
138         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
139         if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
140         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
141       }
142     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
143   }
144   PetscFunctionReturn(0);
145 }
146 
147 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[])
148 {
149   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
150   PetscErrorCode ierr;
151 
152   PetscFunctionBegin;
153   ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr);
154   PetscFunctionReturn(0);
155 }
156 
157 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar *array[])
158 {
159   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
160   PetscErrorCode ierr;
161 
162   PetscFunctionBegin;
163   ierr = MatDenseGetArrayRead(a->A,array);CHKERRQ(ierr);
164   PetscFunctionReturn(0);
165 }
166 
167 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar array[])
168 {
169   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
170   PetscErrorCode ierr;
171 
172   PetscFunctionBegin;
173   ierr = MatDensePlaceArray(a->A,array);CHKERRQ(ierr);
174   PetscFunctionReturn(0);
175 }
176 
177 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
178 {
179   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
180   PetscErrorCode ierr;
181 
182   PetscFunctionBegin;
183   ierr = MatDenseResetArray(a->A);CHKERRQ(ierr);
184   PetscFunctionReturn(0);
185 }
186 
187 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
188 {
189   Mat_MPIDense   *mat  = (Mat_MPIDense*)A->data,*newmatd;
190   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
191   PetscErrorCode ierr;
192   PetscInt       i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
193   const PetscInt *irow,*icol;
194   PetscScalar    *av,*bv,*v = lmat->v;
195   Mat            newmat;
196   IS             iscol_local;
197 
198   PetscFunctionBegin;
199   ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
200   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
201   ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr);
202   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
203   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
204   ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */
205 
206   /* No parallel redistribution currently supported! Should really check each index set
207      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
208      original matrix! */
209 
210   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
211   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
212 
213   /* Check submatrix call */
214   if (scall == MAT_REUSE_MATRIX) {
215     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
216     /* Really need to test rows and column sizes! */
217     newmat = *B;
218   } else {
219     /* Create and fill new matrix */
220     ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr);
221     ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
222     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
223     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
224   }
225 
226   /* Now extract the data pointers and do the copy, column at a time */
227   newmatd = (Mat_MPIDense*)newmat->data;
228   bv      = ((Mat_SeqDense*)newmatd->A->data)->v;
229 
230   for (i=0; i<Ncols; i++) {
231     av = v + ((Mat_SeqDense*)mat->A->data)->lda*icol[i];
232     for (j=0; j<nrows; j++) {
233       *bv++ = av[irow[j] - rstart];
234     }
235   }
236 
237   /* Assemble the matrices so that the correct flags are set */
238   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
239   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
240 
241   /* Free work space */
242   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
243   ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr);
244   ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
245   *B   = newmat;
246   PetscFunctionReturn(0);
247 }
248 
249 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[])
250 {
251   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
252   PetscErrorCode ierr;
253 
254   PetscFunctionBegin;
255   ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr);
256   PetscFunctionReturn(0);
257 }
258 
259 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar *array[])
260 {
261   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
262   PetscErrorCode ierr;
263 
264   PetscFunctionBegin;
265   ierr = MatDenseRestoreArrayRead(a->A,array);CHKERRQ(ierr);
266   PetscFunctionReturn(0);
267 }
268 
269 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
270 {
271   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
272   MPI_Comm       comm;
273   PetscErrorCode ierr;
274   PetscInt       nstash,reallocs;
275   InsertMode     addv;
276 
277   PetscFunctionBegin;
278   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
279   /* make sure all processors are either in INSERTMODE or ADDMODE */
280   ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,comm);CHKERRQ(ierr);
281   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
282   mat->insertmode = addv; /* in case this processor had no cache */
283 
284   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
285   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
286   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
287   PetscFunctionReturn(0);
288 }
289 
290 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
291 {
292   Mat_MPIDense   *mdn=(Mat_MPIDense*)mat->data;
293   PetscErrorCode ierr;
294   PetscInt       i,*row,*col,flg,j,rstart,ncols;
295   PetscMPIInt    n;
296   PetscScalar    *val;
297 
298   PetscFunctionBegin;
299   /*  wait on receives */
300   while (1) {
301     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
302     if (!flg) break;
303 
304     for (i=0; i<n;) {
305       /* Now identify the consecutive vals belonging to the same row */
306       for (j=i,rstart=row[j]; j<n; j++) {
307         if (row[j] != rstart) break;
308       }
309       if (j < n) ncols = j-i;
310       else       ncols = n-i;
311       /* Now assemble all these values with a single function call */
312       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
313       i    = j;
314     }
315   }
316   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
317 
318   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
319   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
320 
321   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
322     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
323   }
324   PetscFunctionReturn(0);
325 }
326 
327 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
328 {
329   PetscErrorCode ierr;
330   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
331 
332   PetscFunctionBegin;
333   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
334   PetscFunctionReturn(0);
335 }
336 
337 /* the code does not do the diagonal entries correctly unless the
338    matrix is square and the column and row owerships are identical.
339    This is a BUG. The only way to fix it seems to be to access
340    mdn->A and mdn->B directly and not through the MatZeroRows()
341    routine.
342 */
343 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
344 {
345   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
346   PetscErrorCode    ierr;
347   PetscInt          i,*owners = A->rmap->range;
348   PetscInt          *sizes,j,idx,nsends;
349   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
350   PetscInt          *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source;
351   PetscInt          *lens,*lrows,*values;
352   PetscMPIInt       n,imdex,rank = l->rank,size = l->size;
353   MPI_Comm          comm;
354   MPI_Request       *send_waits,*recv_waits;
355   MPI_Status        recv_status,*send_status;
356   PetscBool         found;
357   const PetscScalar *xx;
358   PetscScalar       *bb;
359 
360   PetscFunctionBegin;
361   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
362   if (A->rmap->N != A->cmap->N) SETERRQ(comm,PETSC_ERR_SUP,"Only handles square matrices");
363   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership");
364   /*  first count number of contributors to each processor */
365   ierr = PetscCalloc1(2*size,&sizes);CHKERRQ(ierr);
366   ierr = PetscMalloc1(N+1,&owner);CHKERRQ(ierr);  /* see note*/
367   for (i=0; i<N; i++) {
368     idx   = rows[i];
369     found = PETSC_FALSE;
370     for (j=0; j<size; j++) {
371       if (idx >= owners[j] && idx < owners[j+1]) {
372         sizes[2*j]++; sizes[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
373       }
374     }
375     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
376   }
377   nsends = 0;
378   for (i=0; i<size; i++) nsends += sizes[2*i+1];
379 
380   /* inform other processors of number of messages and max length*/
381   ierr = PetscMaxSum(comm,sizes,&nmax,&nrecvs);CHKERRQ(ierr);
382 
383   /* post receives:   */
384   ierr = PetscMalloc1((nrecvs+1)*(nmax+1),&rvalues);CHKERRQ(ierr);
385   ierr = PetscMalloc1(nrecvs+1,&recv_waits);CHKERRQ(ierr);
386   for (i=0; i<nrecvs; i++) {
387     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
388   }
389 
390   /* do sends:
391       1) starts[i] gives the starting index in svalues for stuff going to
392          the ith processor
393   */
394   ierr = PetscMalloc1(N+1,&svalues);CHKERRQ(ierr);
395   ierr = PetscMalloc1(nsends+1,&send_waits);CHKERRQ(ierr);
396   ierr = PetscMalloc1(size+1,&starts);CHKERRQ(ierr);
397 
398   starts[0] = 0;
399   for (i=1; i<size; i++) starts[i] = starts[i-1] + sizes[2*i-2];
400   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
401 
402   starts[0] = 0;
403   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + sizes[2*i-2];
404   count = 0;
405   for (i=0; i<size; i++) {
406     if (sizes[2*i+1]) {
407       ierr = MPI_Isend(svalues+starts[i],sizes[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
408     }
409   }
410   ierr = PetscFree(starts);CHKERRQ(ierr);
411 
412   base = owners[rank];
413 
414   /*  wait on receives */
415   ierr  = PetscMalloc2(nrecvs,&lens,nrecvs,&source);CHKERRQ(ierr);
416   count = nrecvs;
417   slen  = 0;
418   while (count) {
419     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
420     /* unpack receives into our local space */
421     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
422 
423     source[imdex] = recv_status.MPI_SOURCE;
424     lens[imdex]   = n;
425     slen += n;
426     count--;
427   }
428   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
429 
430   /* move the data into the send scatter */
431   ierr  = PetscMalloc1(slen+1,&lrows);CHKERRQ(ierr);
432   count = 0;
433   for (i=0; i<nrecvs; i++) {
434     values = rvalues + i*nmax;
435     for (j=0; j<lens[i]; j++) {
436       lrows[count++] = values[j] - base;
437     }
438   }
439   ierr = PetscFree(rvalues);CHKERRQ(ierr);
440   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
441   ierr = PetscFree(owner);CHKERRQ(ierr);
442   ierr = PetscFree(sizes);CHKERRQ(ierr);
443 
444   /* fix right hand side if needed */
445   if (x && b) {
446     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
447     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
448     for (i=0; i<slen; i++) {
449       bb[lrows[i]] = diag*xx[lrows[i]];
450     }
451     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
452     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
453   }
454 
455   /* actually zap the local rows */
456   ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
457   if (diag != 0.0) {
458     Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data;
459     PetscInt     m   = ll->lda, i;
460 
461     for (i=0; i<slen; i++) {
462       ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag;
463     }
464   }
465   ierr = PetscFree(lrows);CHKERRQ(ierr);
466 
467   /* wait on sends */
468   if (nsends) {
469     ierr = PetscMalloc1(nsends,&send_status);CHKERRQ(ierr);
470     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
471     ierr = PetscFree(send_status);CHKERRQ(ierr);
472   }
473   ierr = PetscFree(send_waits);CHKERRQ(ierr);
474   ierr = PetscFree(svalues);CHKERRQ(ierr);
475   PetscFunctionReturn(0);
476 }
477 
478 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec);
479 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec);
480 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec);
481 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec);
482 
483 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
484 {
485   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
486   PetscErrorCode ierr;
487 
488   PetscFunctionBegin;
489   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
490   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
491   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
492   PetscFunctionReturn(0);
493 }
494 
495 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
496 {
497   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
498   PetscErrorCode ierr;
499 
500   PetscFunctionBegin;
501   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
502   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
503   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
504   PetscFunctionReturn(0);
505 }
506 
507 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
508 {
509   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
510   PetscErrorCode ierr;
511   PetscScalar    zero = 0.0;
512 
513   PetscFunctionBegin;
514   ierr = VecSet(yy,zero);CHKERRQ(ierr);
515   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
516   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
517   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
518   PetscFunctionReturn(0);
519 }
520 
521 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
522 {
523   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
524   PetscErrorCode ierr;
525 
526   PetscFunctionBegin;
527   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
528   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
529   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
530   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
531   PetscFunctionReturn(0);
532 }
533 
534 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
535 {
536   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
537   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
538   PetscErrorCode ierr;
539   PetscInt       len,i,n,m = A->rmap->n,radd;
540   PetscScalar    *x,zero = 0.0;
541 
542   PetscFunctionBegin;
543   ierr = VecSet(v,zero);CHKERRQ(ierr);
544   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
545   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
546   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
547   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
548   radd = A->rmap->rstart*m;
549   for (i=0; i<len; i++) {
550     x[i] = aloc->v[radd + i*m + i];
551   }
552   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
553   PetscFunctionReturn(0);
554 }
555 
556 PetscErrorCode MatDestroy_MPIDense(Mat mat)
557 {
558   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
559   PetscErrorCode ierr;
560 
561   PetscFunctionBegin;
562 #if defined(PETSC_USE_LOG)
563   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
564 #endif
565   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
566   ierr = MatDestroy(&mdn->A);CHKERRQ(ierr);
567   ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr);
568   ierr = VecScatterDestroy(&mdn->Mvctx);CHKERRQ(ierr);
569 
570   ierr = PetscFree(mat->data);CHKERRQ(ierr);
571   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
572 
573   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr);
574   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr);
575   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL);CHKERRQ(ierr);
576   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL);CHKERRQ(ierr);
577   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);CHKERRQ(ierr);
578   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);CHKERRQ(ierr);
579 #if defined(PETSC_HAVE_ELEMENTAL)
580   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr);
581 #endif
582   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr);
583   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
584   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
585   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
586   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
587   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
588   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
589   PetscFunctionReturn(0);
590 }
591 
592 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
593 {
594   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
595   PetscErrorCode    ierr;
596   PetscViewerFormat format;
597   int               fd;
598   PetscInt          header[4],mmax,N = mat->cmap->N,i,j,m,k;
599   PetscMPIInt       rank,tag  = ((PetscObject)viewer)->tag,size;
600   PetscScalar       *work,*v,*vv;
601   Mat_SeqDense      *a = (Mat_SeqDense*)mdn->A->data;
602 
603   PetscFunctionBegin;
604   if (mdn->size == 1) {
605     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
606   } else {
607     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
608     ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
609     ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
610 
611     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
612     if (format == PETSC_VIEWER_NATIVE) {
613 
614       if (!rank) {
615         /* store the matrix as a dense matrix */
616         header[0] = MAT_FILE_CLASSID;
617         header[1] = mat->rmap->N;
618         header[2] = N;
619         header[3] = MATRIX_BINARY_FORMAT_DENSE;
620         ierr      = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
621 
622         /* get largest work array needed for transposing array */
623         mmax = mat->rmap->n;
624         for (i=1; i<size; i++) {
625           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
626         }
627         ierr = PetscMalloc1(mmax*N,&work);CHKERRQ(ierr);
628 
629         /* write out local array, by rows */
630         m = mat->rmap->n;
631         v = a->v;
632         for (j=0; j<N; j++) {
633           for (i=0; i<m; i++) {
634             work[j + i*N] = *v++;
635           }
636         }
637         ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
638         /* get largest work array to receive messages from other processes, excludes process zero */
639         mmax = 0;
640         for (i=1; i<size; i++) {
641           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
642         }
643         ierr = PetscMalloc1(mmax*N,&vv);CHKERRQ(ierr);
644         for (k = 1; k < size; k++) {
645           v    = vv;
646           m    = mat->rmap->range[k+1] - mat->rmap->range[k];
647           ierr = MPIULong_Recv(v,m*N,MPIU_SCALAR,k,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
648 
649           for (j = 0; j < N; j++) {
650             for (i = 0; i < m; i++) {
651               work[j + i*N] = *v++;
652             }
653           }
654           ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
655         }
656         ierr = PetscFree(work);CHKERRQ(ierr);
657         ierr = PetscFree(vv);CHKERRQ(ierr);
658       } else {
659         ierr = MPIULong_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
660       }
661     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerPushFormat(viewer,PETSC_VIEWER_NATIVE)");
662   }
663   PetscFunctionReturn(0);
664 }
665 
666 extern PetscErrorCode MatView_SeqDense(Mat,PetscViewer);
667 #include <petscdraw.h>
668 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
669 {
670   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
671   PetscErrorCode    ierr;
672   PetscMPIInt       rank = mdn->rank;
673   PetscViewerType   vtype;
674   PetscBool         iascii,isdraw;
675   PetscViewer       sviewer;
676   PetscViewerFormat format;
677 
678   PetscFunctionBegin;
679   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
680   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
681   if (iascii) {
682     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
683     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
684     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
685       MatInfo info;
686       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
687       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
688       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
689       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
690       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
691       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
692       PetscFunctionReturn(0);
693     } else if (format == PETSC_VIEWER_ASCII_INFO) {
694       PetscFunctionReturn(0);
695     }
696   } else if (isdraw) {
697     PetscDraw draw;
698     PetscBool isnull;
699 
700     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
701     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
702     if (isnull) PetscFunctionReturn(0);
703   }
704 
705   {
706     /* assemble the entire matrix onto first processor. */
707     Mat         A;
708     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
709     PetscInt    *cols;
710     PetscScalar *vals;
711 
712     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
713     if (!rank) {
714       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
715     } else {
716       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
717     }
718     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
719     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
720     ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr);
721     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
722 
723     /* Copy the matrix ... This isn't the most efficient means,
724        but it's quick for now */
725     A->insertmode = INSERT_VALUES;
726 
727     row = mat->rmap->rstart;
728     m   = mdn->A->rmap->n;
729     for (i=0; i<m; i++) {
730       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
731       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
732       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
733       row++;
734     }
735 
736     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
737     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
738     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
739     if (!rank) {
740       ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
741       ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
742     }
743     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
744     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
745     ierr = MatDestroy(&A);CHKERRQ(ierr);
746   }
747   PetscFunctionReturn(0);
748 }
749 
750 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
751 {
752   PetscErrorCode ierr;
753   PetscBool      iascii,isbinary,isdraw,issocket;
754 
755   PetscFunctionBegin;
756   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
757   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
758   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
759   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
760 
761   if (iascii || issocket || isdraw) {
762     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
763   } else if (isbinary) {
764     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
765   }
766   PetscFunctionReturn(0);
767 }
768 
769 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
770 {
771   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
772   Mat            mdn  = mat->A;
773   PetscErrorCode ierr;
774   PetscReal      isend[5],irecv[5];
775 
776   PetscFunctionBegin;
777   info->block_size = 1.0;
778 
779   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
780 
781   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
782   isend[3] = info->memory;  isend[4] = info->mallocs;
783   if (flag == MAT_LOCAL) {
784     info->nz_used      = isend[0];
785     info->nz_allocated = isend[1];
786     info->nz_unneeded  = isend[2];
787     info->memory       = isend[3];
788     info->mallocs      = isend[4];
789   } else if (flag == MAT_GLOBAL_MAX) {
790     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
791 
792     info->nz_used      = irecv[0];
793     info->nz_allocated = irecv[1];
794     info->nz_unneeded  = irecv[2];
795     info->memory       = irecv[3];
796     info->mallocs      = irecv[4];
797   } else if (flag == MAT_GLOBAL_SUM) {
798     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
799 
800     info->nz_used      = irecv[0];
801     info->nz_allocated = irecv[1];
802     info->nz_unneeded  = irecv[2];
803     info->memory       = irecv[3];
804     info->mallocs      = irecv[4];
805   }
806   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
807   info->fill_ratio_needed = 0;
808   info->factor_mallocs    = 0;
809   PetscFunctionReturn(0);
810 }
811 
812 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
813 {
814   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
815   PetscErrorCode ierr;
816 
817   PetscFunctionBegin;
818   switch (op) {
819   case MAT_NEW_NONZERO_LOCATIONS:
820   case MAT_NEW_NONZERO_LOCATION_ERR:
821   case MAT_NEW_NONZERO_ALLOCATION_ERR:
822     MatCheckPreallocated(A,1);
823     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
824     break;
825   case MAT_ROW_ORIENTED:
826     MatCheckPreallocated(A,1);
827     a->roworiented = flg;
828     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
829     break;
830   case MAT_NEW_DIAGONALS:
831   case MAT_KEEP_NONZERO_PATTERN:
832   case MAT_USE_HASH_TABLE:
833     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
834     break;
835   case MAT_IGNORE_OFF_PROC_ENTRIES:
836     a->donotstash = flg;
837     break;
838   case MAT_SYMMETRIC:
839   case MAT_STRUCTURALLY_SYMMETRIC:
840   case MAT_HERMITIAN:
841   case MAT_SYMMETRY_ETERNAL:
842   case MAT_IGNORE_LOWER_TRIANGULAR:
843     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
844     break;
845   default:
846     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
847   }
848   PetscFunctionReturn(0);
849 }
850 
851 
852 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
853 {
854   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
855   Mat_SeqDense      *mat = (Mat_SeqDense*)mdn->A->data;
856   const PetscScalar *l,*r;
857   PetscScalar       x,*v;
858   PetscErrorCode    ierr;
859   PetscInt          i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n;
860 
861   PetscFunctionBegin;
862   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
863   if (ll) {
864     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
865     if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
866     ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr);
867     for (i=0; i<m; i++) {
868       x = l[i];
869       v = mat->v + i;
870       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
871     }
872     ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr);
873     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
874   }
875   if (rr) {
876     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
877     if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
878     ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
879     ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
880     ierr = VecGetArrayRead(mdn->lvec,&r);CHKERRQ(ierr);
881     for (i=0; i<n; i++) {
882       x = r[i];
883       v = mat->v + i*m;
884       for (j=0; j<m; j++) (*v++) *= x;
885     }
886     ierr = VecRestoreArrayRead(mdn->lvec,&r);CHKERRQ(ierr);
887     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
888   }
889   PetscFunctionReturn(0);
890 }
891 
892 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
893 {
894   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
895   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
896   PetscErrorCode ierr;
897   PetscInt       i,j;
898   PetscReal      sum = 0.0;
899   PetscScalar    *v  = mat->v;
900 
901   PetscFunctionBegin;
902   if (mdn->size == 1) {
903     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
904   } else {
905     if (type == NORM_FROBENIUS) {
906       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
907         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
908       }
909       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
910       *nrm = PetscSqrtReal(*nrm);
911       ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr);
912     } else if (type == NORM_1) {
913       PetscReal *tmp,*tmp2;
914       ierr = PetscMalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr);
915       ierr = PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
916       ierr = PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
917       *nrm = 0.0;
918       v    = mat->v;
919       for (j=0; j<mdn->A->cmap->n; j++) {
920         for (i=0; i<mdn->A->rmap->n; i++) {
921           tmp[j] += PetscAbsScalar(*v);  v++;
922         }
923       }
924       ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
925       for (j=0; j<A->cmap->N; j++) {
926         if (tmp2[j] > *nrm) *nrm = tmp2[j];
927       }
928       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
929       ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr);
930     } else if (type == NORM_INFINITY) { /* max row norm */
931       PetscReal ntemp;
932       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
933       ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
934     } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
935   }
936   PetscFunctionReturn(0);
937 }
938 
939 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
940 {
941   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
942   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
943   Mat            B;
944   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
945   PetscErrorCode ierr;
946   PetscInt       j,i;
947   PetscScalar    *v;
948 
949   PetscFunctionBegin;
950   if (reuse == MAT_INPLACE_MATRIX  && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports square matrix only in-place");
951   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
952     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
953     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
954     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
955     ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr);
956   } else {
957     B = *matout;
958   }
959 
960   m    = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v;
961   ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr);
962   for (i=0; i<m; i++) rwork[i] = rstart + i;
963   for (j=0; j<n; j++) {
964     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
965     v   += m;
966   }
967   ierr = PetscFree(rwork);CHKERRQ(ierr);
968   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
969   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
970   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
971     *matout = B;
972   } else {
973     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
974   }
975   PetscFunctionReturn(0);
976 }
977 
978 
979 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
980 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
981 
982 PetscErrorCode MatSetUp_MPIDense(Mat A)
983 {
984   PetscErrorCode ierr;
985 
986   PetscFunctionBegin;
987   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
988   PetscFunctionReturn(0);
989 }
990 
991 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
992 {
993   PetscErrorCode ierr;
994   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
995 
996   PetscFunctionBegin;
997   ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr);
998   ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
999   PetscFunctionReturn(0);
1000 }
1001 
1002 PetscErrorCode  MatConjugate_MPIDense(Mat mat)
1003 {
1004   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
1005   PetscErrorCode ierr;
1006 
1007   PetscFunctionBegin;
1008   ierr = MatConjugate(a->A);CHKERRQ(ierr);
1009   PetscFunctionReturn(0);
1010 }
1011 
1012 PetscErrorCode MatRealPart_MPIDense(Mat A)
1013 {
1014   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1015   PetscErrorCode ierr;
1016 
1017   PetscFunctionBegin;
1018   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1019   PetscFunctionReturn(0);
1020 }
1021 
1022 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1023 {
1024   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1025   PetscErrorCode ierr;
1026 
1027   PetscFunctionBegin;
1028   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1029   PetscFunctionReturn(0);
1030 }
1031 
1032 extern PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*);
1033 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms)
1034 {
1035   PetscErrorCode ierr;
1036   PetscInt       i,n;
1037   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
1038   PetscReal      *work;
1039 
1040   PetscFunctionBegin;
1041   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
1042   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
1043   ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr);
1044   if (type == NORM_2) {
1045     for (i=0; i<n; i++) work[i] *= work[i];
1046   }
1047   if (type == NORM_INFINITY) {
1048     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
1049   } else {
1050     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
1051   }
1052   ierr = PetscFree(work);CHKERRQ(ierr);
1053   if (type == NORM_2) {
1054     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
1055   }
1056   PetscFunctionReturn(0);
1057 }
1058 
1059 static PetscErrorCode  MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1060 {
1061   Mat_MPIDense   *d = (Mat_MPIDense*)x->data;
1062   PetscErrorCode ierr;
1063   PetscScalar    *a;
1064   PetscInt       m,n,i;
1065 
1066   PetscFunctionBegin;
1067   ierr = MatGetSize(d->A,&m,&n);CHKERRQ(ierr);
1068   ierr = MatDenseGetArray(d->A,&a);CHKERRQ(ierr);
1069   for (i=0; i<m*n; i++) {
1070     ierr = PetscRandomGetValue(rctx,a+i);CHKERRQ(ierr);
1071   }
1072   ierr = MatDenseRestoreArray(d->A,&a);CHKERRQ(ierr);
1073   PetscFunctionReturn(0);
1074 }
1075 
1076 extern PetscErrorCode MatMatMultNumeric_MPIDense(Mat A,Mat,Mat);
1077 
1078 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool  *missing,PetscInt *d)
1079 {
1080   PetscFunctionBegin;
1081   *missing = PETSC_FALSE;
1082   PetscFunctionReturn(0);
1083 }
1084 
1085 /* -------------------------------------------------------------------*/
1086 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1087                                         MatGetRow_MPIDense,
1088                                         MatRestoreRow_MPIDense,
1089                                         MatMult_MPIDense,
1090                                 /*  4*/ MatMultAdd_MPIDense,
1091                                         MatMultTranspose_MPIDense,
1092                                         MatMultTransposeAdd_MPIDense,
1093                                         0,
1094                                         0,
1095                                         0,
1096                                 /* 10*/ 0,
1097                                         0,
1098                                         0,
1099                                         0,
1100                                         MatTranspose_MPIDense,
1101                                 /* 15*/ MatGetInfo_MPIDense,
1102                                         MatEqual_MPIDense,
1103                                         MatGetDiagonal_MPIDense,
1104                                         MatDiagonalScale_MPIDense,
1105                                         MatNorm_MPIDense,
1106                                 /* 20*/ MatAssemblyBegin_MPIDense,
1107                                         MatAssemblyEnd_MPIDense,
1108                                         MatSetOption_MPIDense,
1109                                         MatZeroEntries_MPIDense,
1110                                 /* 24*/ MatZeroRows_MPIDense,
1111                                         0,
1112                                         0,
1113                                         0,
1114                                         0,
1115                                 /* 29*/ MatSetUp_MPIDense,
1116                                         0,
1117                                         0,
1118                                         MatGetDiagonalBlock_MPIDense,
1119                                         0,
1120                                 /* 34*/ MatDuplicate_MPIDense,
1121                                         0,
1122                                         0,
1123                                         0,
1124                                         0,
1125                                 /* 39*/ MatAXPY_MPIDense,
1126                                         MatCreateSubMatrices_MPIDense,
1127                                         0,
1128                                         MatGetValues_MPIDense,
1129                                         0,
1130                                 /* 44*/ 0,
1131                                         MatScale_MPIDense,
1132                                         MatShift_Basic,
1133                                         0,
1134                                         0,
1135                                 /* 49*/ MatSetRandom_MPIDense,
1136                                         0,
1137                                         0,
1138                                         0,
1139                                         0,
1140                                 /* 54*/ 0,
1141                                         0,
1142                                         0,
1143                                         0,
1144                                         0,
1145                                 /* 59*/ MatCreateSubMatrix_MPIDense,
1146                                         MatDestroy_MPIDense,
1147                                         MatView_MPIDense,
1148                                         0,
1149                                         0,
1150                                 /* 64*/ 0,
1151                                         0,
1152                                         0,
1153                                         0,
1154                                         0,
1155                                 /* 69*/ 0,
1156                                         0,
1157                                         0,
1158                                         0,
1159                                         0,
1160                                 /* 74*/ 0,
1161                                         0,
1162                                         0,
1163                                         0,
1164                                         0,
1165                                 /* 79*/ 0,
1166                                         0,
1167                                         0,
1168                                         0,
1169                                 /* 83*/ MatLoad_MPIDense,
1170                                         0,
1171                                         0,
1172                                         0,
1173                                         0,
1174                                         0,
1175 #if defined(PETSC_HAVE_ELEMENTAL)
1176                                 /* 89*/ MatMatMult_MPIDense_MPIDense,
1177                                         MatMatMultSymbolic_MPIDense_MPIDense,
1178 #else
1179                                 /* 89*/ 0,
1180                                         0,
1181 #endif
1182                                         MatMatMultNumeric_MPIDense,
1183                                         0,
1184                                         0,
1185                                 /* 94*/ 0,
1186                                         0,
1187                                         0,
1188                                         0,
1189                                         0,
1190                                 /* 99*/ 0,
1191                                         0,
1192                                         0,
1193                                         MatConjugate_MPIDense,
1194                                         0,
1195                                 /*104*/ 0,
1196                                         MatRealPart_MPIDense,
1197                                         MatImaginaryPart_MPIDense,
1198                                         0,
1199                                         0,
1200                                 /*109*/ 0,
1201                                         0,
1202                                         0,
1203                                         0,
1204                                         MatMissingDiagonal_MPIDense,
1205                                 /*114*/ 0,
1206                                         0,
1207                                         0,
1208                                         0,
1209                                         0,
1210                                 /*119*/ 0,
1211                                         0,
1212                                         0,
1213                                         0,
1214                                         0,
1215                                 /*124*/ 0,
1216                                         MatGetColumnNorms_MPIDense,
1217                                         0,
1218                                         0,
1219                                         0,
1220                                 /*129*/ 0,
1221                                         MatTransposeMatMult_MPIDense_MPIDense,
1222                                         MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1223                                         MatTransposeMatMultNumeric_MPIDense_MPIDense,
1224                                         0,
1225                                 /*134*/ 0,
1226                                         0,
1227                                         0,
1228                                         0,
1229                                         0,
1230                                 /*139*/ 0,
1231                                         0,
1232                                         0
1233 };
1234 
1235 PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1236 {
1237   Mat_MPIDense   *a;
1238   PetscErrorCode ierr;
1239 
1240   PetscFunctionBegin;
1241   mat->preallocated = PETSC_TRUE;
1242   /* Note:  For now, when data is specified above, this assumes the user correctly
1243    allocates the local dense storage space.  We should add error checking. */
1244 
1245   a       = (Mat_MPIDense*)mat->data;
1246   ierr    = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
1247   ierr    = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
1248   a->nvec = mat->cmap->n;
1249 
1250   ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1251   ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr);
1252   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
1253   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1254   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1255   PetscFunctionReturn(0);
1256 }
1257 
1258 #if defined(PETSC_HAVE_ELEMENTAL)
1259 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1260 {
1261   Mat            mat_elemental;
1262   PetscErrorCode ierr;
1263   PetscScalar    *v;
1264   PetscInt       m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols;
1265 
1266   PetscFunctionBegin;
1267   if (reuse == MAT_REUSE_MATRIX) {
1268     mat_elemental = *newmat;
1269     ierr = MatZeroEntries(*newmat);CHKERRQ(ierr);
1270   } else {
1271     ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr);
1272     ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1273     ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr);
1274     ierr = MatSetUp(mat_elemental);CHKERRQ(ierr);
1275     ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
1276   }
1277 
1278   ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr);
1279   for (i=0; i<N; i++) cols[i] = i;
1280   for (i=0; i<m; i++) rows[i] = rstart + i;
1281 
1282   /* PETSc-Elemental interaface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1283   ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr);
1284   ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr);
1285   ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1286   ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1287   ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr);
1288   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1289 
1290   if (reuse == MAT_INPLACE_MATRIX) {
1291     ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr);
1292   } else {
1293     *newmat = mat_elemental;
1294   }
1295   PetscFunctionReturn(0);
1296 }
1297 #endif
1298 
1299 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1300 {
1301   Mat_MPIDense   *a;
1302   PetscErrorCode ierr;
1303 
1304   PetscFunctionBegin;
1305   ierr      = PetscNewLog(mat,&a);CHKERRQ(ierr);
1306   mat->data = (void*)a;
1307   ierr      = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1308 
1309   mat->insertmode = NOT_SET_VALUES;
1310   ierr            = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);CHKERRQ(ierr);
1311   ierr            = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);CHKERRQ(ierr);
1312 
1313   /* build cache for off array entries formed */
1314   a->donotstash = PETSC_FALSE;
1315 
1316   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr);
1317 
1318   /* stuff used for matrix vector multiply */
1319   a->lvec        = 0;
1320   a->Mvctx       = 0;
1321   a->roworiented = PETSC_TRUE;
1322 
1323   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr);
1324   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr);
1325   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense);CHKERRQ(ierr);
1326   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense);CHKERRQ(ierr);
1327   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);CHKERRQ(ierr);
1328   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);CHKERRQ(ierr);
1329 #if defined(PETSC_HAVE_ELEMENTAL)
1330   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr);
1331 #endif
1332   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1333   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1334   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1335   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1336 
1337   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",MatTransposeMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1338   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",MatTransposeMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1339   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",MatTransposeMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1340   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1341   PetscFunctionReturn(0);
1342 }
1343 
1344 /*MC
1345    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1346 
1347    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1348    and MATMPIDENSE otherwise.
1349 
1350    Options Database Keys:
1351 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1352 
1353   Level: beginner
1354 
1355 
1356 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1357 M*/
1358 
1359 /*@C
1360    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1361 
1362    Not collective
1363 
1364    Input Parameters:
1365 .  B - the matrix
1366 -  data - optional location of matrix data.  Set data=NULL for PETSc
1367    to control all matrix memory allocation.
1368 
1369    Notes:
1370    The dense format is fully compatible with standard Fortran 77
1371    storage by columns.
1372 
1373    The data input variable is intended primarily for Fortran programmers
1374    who wish to allocate their own matrix memory space.  Most users should
1375    set data=NULL.
1376 
1377    Level: intermediate
1378 
1379 .keywords: matrix,dense, parallel
1380 
1381 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1382 @*/
1383 PetscErrorCode  MatMPIDenseSetPreallocation(Mat B,PetscScalar *data)
1384 {
1385   PetscErrorCode ierr;
1386 
1387   PetscFunctionBegin;
1388   ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr);
1389   PetscFunctionReturn(0);
1390 }
1391 
1392 /*@
1393    MatDensePlaceArray - Allows one to replace the array in a dense array with an
1394    array provided by the user. This is useful to avoid copying an array
1395    into a matrix
1396 
1397    Not Collective
1398 
1399    Input Parameters:
1400 +  mat - the matrix
1401 -  array - the array in column major order
1402 
1403    Notes:
1404    You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be
1405    freed when the matrix is destroyed.
1406 
1407    Level: developer
1408 
1409 .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
1410 
1411 @*/
1412 PetscErrorCode  MatDensePlaceArray(Mat mat,const PetscScalar array[])
1413 {
1414   PetscErrorCode ierr;
1415   PetscFunctionBegin;
1416   ierr = PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
1417   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
1418   PetscFunctionReturn(0);
1419 }
1420 
1421 /*@
1422    MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray()
1423 
1424    Not Collective
1425 
1426    Input Parameters:
1427 .  mat - the matrix
1428 
1429    Notes:
1430    You can only call this after a call to MatDensePlaceArray()
1431 
1432    Level: developer
1433 
1434 .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
1435 
1436 @*/
1437 PetscErrorCode  MatDenseResetArray(Mat mat)
1438 {
1439   PetscErrorCode ierr;
1440   PetscFunctionBegin;
1441   ierr = PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));CHKERRQ(ierr);
1442   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
1443   PetscFunctionReturn(0);
1444 }
1445 
1446 /*@C
1447    MatCreateDense - Creates a parallel matrix in dense format.
1448 
1449    Collective on MPI_Comm
1450 
1451    Input Parameters:
1452 +  comm - MPI communicator
1453 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1454 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1455 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1456 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1457 -  data - optional location of matrix data.  Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1458    to control all matrix memory allocation.
1459 
1460    Output Parameter:
1461 .  A - the matrix
1462 
1463    Notes:
1464    The dense format is fully compatible with standard Fortran 77
1465    storage by columns.
1466 
1467    The data input variable is intended primarily for Fortran programmers
1468    who wish to allocate their own matrix memory space.  Most users should
1469    set data=NULL (PETSC_NULL_SCALAR for Fortran users).
1470 
1471    The user MUST specify either the local or global matrix dimensions
1472    (possibly both).
1473 
1474    Level: intermediate
1475 
1476 .keywords: matrix,dense, parallel
1477 
1478 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1479 @*/
1480 PetscErrorCode  MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1481 {
1482   PetscErrorCode ierr;
1483   PetscMPIInt    size;
1484 
1485   PetscFunctionBegin;
1486   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1487   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1488   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1489   if (size > 1) {
1490     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1491     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1492     if (data) {  /* user provided data array, so no need to assemble */
1493       ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr);
1494       (*A)->assembled = PETSC_TRUE;
1495     }
1496   } else {
1497     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1498     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1499   }
1500   PetscFunctionReturn(0);
1501 }
1502 
1503 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1504 {
1505   Mat            mat;
1506   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
1507   PetscErrorCode ierr;
1508 
1509   PetscFunctionBegin;
1510   *newmat = 0;
1511   ierr    = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr);
1512   ierr    = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1513   ierr    = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1514   a       = (Mat_MPIDense*)mat->data;
1515   ierr    = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1516 
1517   mat->factortype   = A->factortype;
1518   mat->assembled    = PETSC_TRUE;
1519   mat->preallocated = PETSC_TRUE;
1520 
1521   a->size         = oldmat->size;
1522   a->rank         = oldmat->rank;
1523   mat->insertmode = NOT_SET_VALUES;
1524   a->nvec         = oldmat->nvec;
1525   a->donotstash   = oldmat->donotstash;
1526 
1527   ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr);
1528   ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr);
1529 
1530   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1531   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1532   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1533 
1534   *newmat = mat;
1535   PetscFunctionReturn(0);
1536 }
1537 
1538 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat)
1539 {
1540   PetscErrorCode ierr;
1541   PetscMPIInt    rank,size;
1542   const PetscInt *rowners;
1543   PetscInt       i,m,n,nz,j,mMax;
1544   PetscScalar    *array,*vals,*vals_ptr;
1545   Mat_MPIDense   *a = (Mat_MPIDense*)newmat->data;
1546 
1547   PetscFunctionBegin;
1548   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1549   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1550 
1551   /* determine ownership of rows and columns */
1552   m = (newmat->rmap->n < 0) ? PETSC_DECIDE : newmat->rmap->n;
1553   n = (newmat->cmap->n < 0) ? PETSC_DECIDE : newmat->cmap->n;
1554 
1555   ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr);
1556   if (!a->A || !((Mat_SeqDense*)(a->A->data))->user_alloc) {
1557     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
1558   }
1559   ierr = MatDenseGetArray(newmat,&array);CHKERRQ(ierr);
1560   ierr = MatGetLocalSize(newmat,&m,NULL);CHKERRQ(ierr);
1561   ierr = MatGetOwnershipRanges(newmat,&rowners);CHKERRQ(ierr);
1562   ierr = MPI_Reduce(&m,&mMax,1,MPIU_INT,MPI_MAX,0,comm);CHKERRQ(ierr);
1563   if (!rank) {
1564     ierr = PetscMalloc1(mMax*N,&vals);CHKERRQ(ierr);
1565 
1566     /* read in my part of the matrix numerical values  */
1567     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1568 
1569     /* insert into matrix-by row (this is why cannot directly read into array */
1570     vals_ptr = vals;
1571     for (i=0; i<m; i++) {
1572       for (j=0; j<N; j++) {
1573         array[i + j*m] = *vals_ptr++;
1574       }
1575     }
1576 
1577     /* read in other processors and ship out */
1578     for (i=1; i<size; i++) {
1579       nz   = (rowners[i+1] - rowners[i])*N;
1580       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1581       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr);
1582     }
1583   } else {
1584     /* receive numeric values */
1585     ierr = PetscMalloc1(m*N,&vals);CHKERRQ(ierr);
1586 
1587     /* receive message of values*/
1588     ierr = MPIULong_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr);
1589 
1590     /* insert into matrix-by row (this is why cannot directly read into array */
1591     vals_ptr = vals;
1592     for (i=0; i<m; i++) {
1593       for (j=0; j<N; j++) {
1594         array[i + j*m] = *vals_ptr++;
1595       }
1596     }
1597   }
1598   ierr = MatDenseRestoreArray(newmat,&array);CHKERRQ(ierr);
1599   ierr = PetscFree(vals);CHKERRQ(ierr);
1600   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1601   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1602   PetscFunctionReturn(0);
1603 }
1604 
1605 PetscErrorCode MatLoad_MPIDense(Mat newmat,PetscViewer viewer)
1606 {
1607   Mat_MPIDense   *a;
1608   PetscScalar    *vals,*svals;
1609   MPI_Comm       comm;
1610   MPI_Status     status;
1611   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,n,maxnz;
1612   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1613   PetscInt       *ourlens,*procsnz = 0,jj,*mycols,*smycols;
1614   PetscInt       i,nz,j,rstart,rend;
1615   int            fd;
1616   PetscErrorCode ierr;
1617 
1618   PetscFunctionBegin;
1619   /* force binary viewer to load .info file if it has not yet done so */
1620   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1621   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
1622   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1623   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1624   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1625   if (!rank) {
1626     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
1627     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1628   }
1629   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1630   M    = header[1]; N = header[2]; nz = header[3];
1631 
1632   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
1633   if (newmat->rmap->N < 0) newmat->rmap->N = M;
1634   if (newmat->cmap->N < 0) newmat->cmap->N = N;
1635 
1636   if (newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",M,newmat->rmap->N);
1637   if (newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",N,newmat->cmap->N);
1638 
1639   /*
1640        Handle case where matrix is stored on disk as a dense matrix
1641   */
1642   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1643     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr);
1644     PetscFunctionReturn(0);
1645   }
1646 
1647   /* determine ownership of all rows */
1648   if (newmat->rmap->n < 0) {
1649     ierr = PetscMPIIntCast(M/size + ((M % size) > rank),&m);CHKERRQ(ierr);
1650   } else {
1651     ierr = PetscMPIIntCast(newmat->rmap->n,&m);CHKERRQ(ierr);
1652   }
1653   if (newmat->cmap->n < 0) {
1654     n = PETSC_DECIDE;
1655   } else {
1656     ierr = PetscMPIIntCast(newmat->cmap->n,&n);CHKERRQ(ierr);
1657   }
1658 
1659   ierr       = PetscMalloc1(size+2,&rowners);CHKERRQ(ierr);
1660   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1661   rowners[0] = 0;
1662   for (i=2; i<=size; i++) {
1663     rowners[i] += rowners[i-1];
1664   }
1665   rstart = rowners[rank];
1666   rend   = rowners[rank+1];
1667 
1668   /* distribute row lengths to all processors */
1669   ierr = PetscMalloc1(rend-rstart,&ourlens);CHKERRQ(ierr);
1670   if (!rank) {
1671     ierr = PetscMalloc1(M,&rowlengths);CHKERRQ(ierr);
1672     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1673     ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr);
1674     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1675     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1676     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1677   } else {
1678     ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1679   }
1680 
1681   if (!rank) {
1682     /* calculate the number of nonzeros on each processor */
1683     ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr);
1684     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1685     for (i=0; i<size; i++) {
1686       for (j=rowners[i]; j< rowners[i+1]; j++) {
1687         procsnz[i] += rowlengths[j];
1688       }
1689     }
1690     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1691 
1692     /* determine max buffer needed and allocate it */
1693     maxnz = 0;
1694     for (i=0; i<size; i++) {
1695       maxnz = PetscMax(maxnz,procsnz[i]);
1696     }
1697     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
1698 
1699     /* read in my part of the matrix column indices  */
1700     nz   = procsnz[0];
1701     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
1702     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1703 
1704     /* read in every one elses and ship off */
1705     for (i=1; i<size; i++) {
1706       nz   = procsnz[i];
1707       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1708       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1709     }
1710     ierr = PetscFree(cols);CHKERRQ(ierr);
1711   } else {
1712     /* determine buffer space needed for message */
1713     nz = 0;
1714     for (i=0; i<m; i++) {
1715       nz += ourlens[i];
1716     }
1717     ierr = PetscMalloc1(nz+1,&mycols);CHKERRQ(ierr);
1718 
1719     /* receive message of column indices*/
1720     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1721     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1722     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1723   }
1724 
1725   ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr);
1726   a = (Mat_MPIDense*)newmat->data;
1727   if (!a->A || !((Mat_SeqDense*)(a->A->data))->user_alloc) {
1728     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
1729   }
1730 
1731   if (!rank) {
1732     ierr = PetscMalloc1(maxnz,&vals);CHKERRQ(ierr);
1733 
1734     /* read in my part of the matrix numerical values  */
1735     nz   = procsnz[0];
1736     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1737 
1738     /* insert into matrix */
1739     jj      = rstart;
1740     smycols = mycols;
1741     svals   = vals;
1742     for (i=0; i<m; i++) {
1743       ierr     = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1744       smycols += ourlens[i];
1745       svals   += ourlens[i];
1746       jj++;
1747     }
1748 
1749     /* read in other processors and ship out */
1750     for (i=1; i<size; i++) {
1751       nz   = procsnz[i];
1752       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1753       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
1754     }
1755     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1756   } else {
1757     /* receive numeric values */
1758     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
1759 
1760     /* receive message of values*/
1761     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
1762     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1763     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1764 
1765     /* insert into matrix */
1766     jj      = rstart;
1767     smycols = mycols;
1768     svals   = vals;
1769     for (i=0; i<m; i++) {
1770       ierr     = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1771       smycols += ourlens[i];
1772       svals   += ourlens[i];
1773       jj++;
1774     }
1775   }
1776   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1777   ierr = PetscFree(vals);CHKERRQ(ierr);
1778   ierr = PetscFree(mycols);CHKERRQ(ierr);
1779   ierr = PetscFree(rowners);CHKERRQ(ierr);
1780 
1781   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1782   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1783   PetscFunctionReturn(0);
1784 }
1785 
1786 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool  *flag)
1787 {
1788   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
1789   Mat            a,b;
1790   PetscBool      flg;
1791   PetscErrorCode ierr;
1792 
1793   PetscFunctionBegin;
1794   a    = matA->A;
1795   b    = matB->A;
1796   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
1797   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1798   PetscFunctionReturn(0);
1799 }
1800 
1801 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(Mat A)
1802 {
1803   PetscErrorCode        ierr;
1804   Mat_MPIDense          *a = (Mat_MPIDense*)A->data;
1805   Mat_TransMatMultDense *atb = a->atbdense;
1806 
1807   PetscFunctionBegin;
1808   ierr = PetscFree3(atb->sendbuf,atb->atbarray,atb->recvcounts);CHKERRQ(ierr);
1809   ierr = (atb->destroy)(A);CHKERRQ(ierr);
1810   ierr = PetscFree(atb);CHKERRQ(ierr);
1811   PetscFunctionReturn(0);
1812 }
1813 
1814 PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1815 {
1816   Mat_MPIDense   *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
1817   Mat_SeqDense   *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data;
1818   Mat_TransMatMultDense *atb = c->atbdense;
1819   PetscErrorCode ierr;
1820   MPI_Comm       comm;
1821   PetscMPIInt    rank,size,*recvcounts=atb->recvcounts;
1822   PetscScalar    *carray,*atbarray=atb->atbarray,*sendbuf=atb->sendbuf;
1823   PetscInt       i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j;
1824   PetscScalar    _DOne=1.0,_DZero=0.0;
1825   PetscBLASInt   am,an,bn,aN;
1826   const PetscInt *ranges;
1827 
1828   PetscFunctionBegin;
1829   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1830   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1831   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1832 
1833   /* compute atbarray = aseq^T * bseq */
1834   ierr = PetscBLASIntCast(a->A->cmap->n,&an);CHKERRQ(ierr);
1835   ierr = PetscBLASIntCast(b->A->cmap->n,&bn);CHKERRQ(ierr);
1836   ierr = PetscBLASIntCast(a->A->rmap->n,&am);CHKERRQ(ierr);
1837   ierr = PetscBLASIntCast(A->cmap->N,&aN);CHKERRQ(ierr);
1838   PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&an,&bn,&am,&_DOne,aseq->v,&aseq->lda,bseq->v,&bseq->lda,&_DZero,atbarray,&aN));
1839 
1840   ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr);
1841   for (i=0; i<size; i++) recvcounts[i] = (ranges[i+1] - ranges[i])*cN;
1842 
1843   /* arrange atbarray into sendbuf */
1844   k = 0;
1845   for (proc=0; proc<size; proc++) {
1846     for (j=0; j<cN; j++) {
1847       for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM];
1848     }
1849   }
1850   /* sum all atbarray to local values of C */
1851   ierr = MatDenseGetArray(c->A,&carray);CHKERRQ(ierr);
1852   ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRQ(ierr);
1853   ierr = MatDenseRestoreArray(c->A,&carray);CHKERRQ(ierr);
1854   PetscFunctionReturn(0);
1855 }
1856 
1857 PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1858 {
1859   PetscErrorCode        ierr;
1860   Mat                   Cdense;
1861   MPI_Comm              comm;
1862   PetscMPIInt           size;
1863   PetscInt              cm=A->cmap->n,cM,cN=B->cmap->N;
1864   Mat_MPIDense          *c;
1865   Mat_TransMatMultDense *atb;
1866 
1867   PetscFunctionBegin;
1868   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1869   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
1870     SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
1871   }
1872 
1873   /* create matrix product Cdense */
1874   ierr = MatCreate(comm,&Cdense);CHKERRQ(ierr);
1875   ierr = MatSetSizes(Cdense,cm,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
1876   ierr = MatSetType(Cdense,MATMPIDENSE);CHKERRQ(ierr);
1877   ierr = MatMPIDenseSetPreallocation(Cdense,NULL);CHKERRQ(ierr);
1878   ierr = MatAssemblyBegin(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1879   ierr = MatAssemblyEnd(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1880   *C   = Cdense;
1881 
1882   /* create data structure for reuse Cdense */
1883   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1884   ierr = PetscNew(&atb);CHKERRQ(ierr);
1885   cM = Cdense->rmap->N;
1886   ierr = PetscMalloc3(cM*cN,&atb->sendbuf,cM*cN,&atb->atbarray,size,&atb->recvcounts);CHKERRQ(ierr);
1887 
1888   c                    = (Mat_MPIDense*)Cdense->data;
1889   c->atbdense          = atb;
1890   atb->destroy         = Cdense->ops->destroy;
1891   Cdense->ops->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
1892   PetscFunctionReturn(0);
1893 }
1894 
1895 PetscErrorCode MatTransposeMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
1896 {
1897   PetscErrorCode ierr;
1898 
1899   PetscFunctionBegin;
1900   if (scall == MAT_INITIAL_MATRIX) {
1901     ierr = MatTransposeMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr);
1902   }
1903   ierr = MatTransposeMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr);
1904   PetscFunctionReturn(0);
1905 }
1906 
1907 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(Mat A)
1908 {
1909   PetscErrorCode   ierr;
1910   Mat_MPIDense     *a = (Mat_MPIDense*)A->data;
1911   Mat_MatMultDense *ab = a->abdense;
1912 
1913   PetscFunctionBegin;
1914   ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr);
1915   ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr);
1916   ierr = MatDestroy(&ab->Be);CHKERRQ(ierr);
1917 
1918   ierr = (ab->destroy)(A);CHKERRQ(ierr);
1919   ierr = PetscFree(ab);CHKERRQ(ierr);
1920   PetscFunctionReturn(0);
1921 }
1922 
1923 #if defined(PETSC_HAVE_ELEMENTAL)
1924 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1925 {
1926   PetscErrorCode   ierr;
1927   Mat_MPIDense     *c=(Mat_MPIDense*)C->data;
1928   Mat_MatMultDense *ab=c->abdense;
1929 
1930   PetscFunctionBegin;
1931   ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr);
1932   ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr);
1933   ierr = MatMatMultNumeric(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr);
1934   ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
1935   PetscFunctionReturn(0);
1936 }
1937 
1938 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1939 {
1940   PetscErrorCode   ierr;
1941   Mat              Ae,Be,Ce;
1942   Mat_MPIDense     *c;
1943   Mat_MatMultDense *ab;
1944 
1945   PetscFunctionBegin;
1946   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
1947     SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
1948   }
1949 
1950   /* convert A and B to Elemental matrices Ae and Be */
1951   ierr = MatConvert(A,MATELEMENTAL,MAT_INITIAL_MATRIX, &Ae);CHKERRQ(ierr);
1952   ierr = MatConvert(B,MATELEMENTAL,MAT_INITIAL_MATRIX, &Be);CHKERRQ(ierr);
1953 
1954   /* Ce = Ae*Be */
1955   ierr = MatMatMultSymbolic(Ae,Be,fill,&Ce);CHKERRQ(ierr);
1956   ierr = MatMatMultNumeric(Ae,Be,Ce);CHKERRQ(ierr);
1957 
1958   /* convert Ce to C */
1959   ierr = MatConvert(Ce,MATMPIDENSE,MAT_INITIAL_MATRIX,C);CHKERRQ(ierr);
1960 
1961   /* create data structure for reuse Cdense */
1962   ierr = PetscNew(&ab);CHKERRQ(ierr);
1963   c                  = (Mat_MPIDense*)(*C)->data;
1964   c->abdense         = ab;
1965 
1966   ab->Ae             = Ae;
1967   ab->Be             = Be;
1968   ab->Ce             = Ce;
1969   ab->destroy        = (*C)->ops->destroy;
1970   (*C)->ops->destroy        = MatDestroy_MatMatMult_MPIDense_MPIDense;
1971   (*C)->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
1972   PetscFunctionReturn(0);
1973 }
1974 
1975 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
1976 {
1977   PetscErrorCode ierr;
1978 
1979   PetscFunctionBegin;
1980   if (scall == MAT_INITIAL_MATRIX) { /* simbolic product includes numeric product */
1981     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
1982     ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr);
1983     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
1984   } else {
1985     ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
1986     ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr);
1987     ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
1988   }
1989   PetscFunctionReturn(0);
1990 }
1991 #endif
1992