xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 00d931fe9835bef04c3bcd2a9a1bf118d64cc4c2)
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4 */
5 
6 
7 #include <../src/mat/impls/dense/mpi/mpidense.h>    /*I   "petscmat.h"  I*/
8 #include <../src/mat/impls/aij/mpi/mpiaij.h>
9 #include <petscblaslapack.h>
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatDenseGetLocalMatrix"
13 /*@
14 
15       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
16               matrix that represents the operator. For sequential matrices it returns itself.
17 
18     Input Parameter:
19 .      A - the Seq or MPI dense matrix
20 
21     Output Parameter:
22 .      B - the inner matrix
23 
24     Level: intermediate
25 
26 @*/
27 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
28 {
29   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
30   PetscErrorCode ierr;
31   PetscBool      flg;
32 
33   PetscFunctionBegin;
34   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
35   if (flg) *B = mat->A;
36   else *B = A;
37   PetscFunctionReturn(0);
38 }
39 
40 #undef __FUNCT__
41 #define __FUNCT__ "MatGetRow_MPIDense"
42 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
43 {
44   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
45   PetscErrorCode ierr;
46   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
47 
48   PetscFunctionBegin;
49   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
50   lrow = row - rstart;
51   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
52   PetscFunctionReturn(0);
53 }
54 
55 #undef __FUNCT__
56 #define __FUNCT__ "MatRestoreRow_MPIDense"
57 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
58 {
59   PetscErrorCode ierr;
60 
61   PetscFunctionBegin;
62   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
63   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
64   PetscFunctionReturn(0);
65 }
66 
67 #undef __FUNCT__
68 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense"
69 PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
70 {
71   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
72   PetscErrorCode ierr;
73   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
74   PetscScalar    *array;
75   MPI_Comm       comm;
76   Mat            B;
77 
78   PetscFunctionBegin;
79   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
80 
81   ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr);
82   if (!B) {
83     ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
84     ierr = MatCreate(comm,&B);CHKERRQ(ierr);
85     ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr);
86     ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
87     ierr = MatDenseGetArray(mdn->A,&array);CHKERRQ(ierr);
88     ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr);
89     ierr = MatDenseRestoreArray(mdn->A,&array);CHKERRQ(ierr);
90     ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
91     ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
92     ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr);
93     *a   = B;
94     ierr = MatDestroy(&B);CHKERRQ(ierr);
95   } else *a = B;
96   PetscFunctionReturn(0);
97 }
98 
99 #undef __FUNCT__
100 #define __FUNCT__ "MatSetValues_MPIDense"
101 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
102 {
103   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
104   PetscErrorCode ierr;
105   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
106   PetscBool      roworiented = A->roworiented;
107 
108   PetscFunctionBegin;
109   for (i=0; i<m; i++) {
110     if (idxm[i] < 0) continue;
111     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
112     if (idxm[i] >= rstart && idxm[i] < rend) {
113       row = idxm[i] - rstart;
114       if (roworiented) {
115         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
116       } else {
117         for (j=0; j<n; j++) {
118           if (idxn[j] < 0) continue;
119           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
120           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
121         }
122       }
123     } else if (!A->donotstash) {
124       mat->assembled = PETSC_FALSE;
125       if (roworiented) {
126         ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
127       } else {
128         ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
129       }
130     }
131   }
132   PetscFunctionReturn(0);
133 }
134 
135 #undef __FUNCT__
136 #define __FUNCT__ "MatGetValues_MPIDense"
137 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
138 {
139   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
140   PetscErrorCode ierr;
141   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
142 
143   PetscFunctionBegin;
144   for (i=0; i<m; i++) {
145     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
146     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
147     if (idxm[i] >= rstart && idxm[i] < rend) {
148       row = idxm[i] - rstart;
149       for (j=0; j<n; j++) {
150         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
151         if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
152         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
153       }
154     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
155   }
156   PetscFunctionReturn(0);
157 }
158 
159 #undef __FUNCT__
160 #define __FUNCT__ "MatDenseGetArray_MPIDense"
161 PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[])
162 {
163   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
164   PetscErrorCode ierr;
165 
166   PetscFunctionBegin;
167   ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr);
168   PetscFunctionReturn(0);
169 }
170 
171 #undef __FUNCT__
172 #define __FUNCT__ "MatGetSubMatrix_MPIDense"
173 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
174 {
175   Mat_MPIDense   *mat  = (Mat_MPIDense*)A->data,*newmatd;
176   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
177   PetscErrorCode ierr;
178   PetscInt       i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
179   const PetscInt *irow,*icol;
180   PetscScalar    *av,*bv,*v = lmat->v;
181   Mat            newmat;
182   IS             iscol_local;
183 
184   PetscFunctionBegin;
185   ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
186   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
187   ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr);
188   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
189   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
190   ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */
191 
192   /* No parallel redistribution currently supported! Should really check each index set
193      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
194      original matrix! */
195 
196   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
197   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
198 
199   /* Check submatrix call */
200   if (scall == MAT_REUSE_MATRIX) {
201     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
202     /* Really need to test rows and column sizes! */
203     newmat = *B;
204   } else {
205     /* Create and fill new matrix */
206     ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr);
207     ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
208     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
209     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
210   }
211 
212   /* Now extract the data pointers and do the copy, column at a time */
213   newmatd = (Mat_MPIDense*)newmat->data;
214   bv      = ((Mat_SeqDense*)newmatd->A->data)->v;
215 
216   for (i=0; i<Ncols; i++) {
217     av = v + ((Mat_SeqDense*)mat->A->data)->lda*icol[i];
218     for (j=0; j<nrows; j++) {
219       *bv++ = av[irow[j] - rstart];
220     }
221   }
222 
223   /* Assemble the matrices so that the correct flags are set */
224   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
225   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
226 
227   /* Free work space */
228   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
229   ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr);
230   ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
231   *B   = newmat;
232   PetscFunctionReturn(0);
233 }
234 
235 #undef __FUNCT__
236 #define __FUNCT__ "MatDenseRestoreArray_MPIDense"
237 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[])
238 {
239   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
240   PetscErrorCode ierr;
241 
242   PetscFunctionBegin;
243   ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr);
244   PetscFunctionReturn(0);
245 }
246 
247 #undef __FUNCT__
248 #define __FUNCT__ "MatAssemblyBegin_MPIDense"
249 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
250 {
251   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
252   MPI_Comm       comm;
253   PetscErrorCode ierr;
254   PetscInt       nstash,reallocs;
255   InsertMode     addv;
256 
257   PetscFunctionBegin;
258   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
259   /* make sure all processors are either in INSERTMODE or ADDMODE */
260   ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,comm);CHKERRQ(ierr);
261   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
262   mat->insertmode = addv; /* in case this processor had no cache */
263 
264   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
265   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
266   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
267   PetscFunctionReturn(0);
268 }
269 
270 #undef __FUNCT__
271 #define __FUNCT__ "MatAssemblyEnd_MPIDense"
272 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
273 {
274   Mat_MPIDense   *mdn=(Mat_MPIDense*)mat->data;
275   PetscErrorCode ierr;
276   PetscInt       i,*row,*col,flg,j,rstart,ncols;
277   PetscMPIInt    n;
278   PetscScalar    *val;
279 
280   PetscFunctionBegin;
281   /*  wait on receives */
282   while (1) {
283     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
284     if (!flg) break;
285 
286     for (i=0; i<n;) {
287       /* Now identify the consecutive vals belonging to the same row */
288       for (j=i,rstart=row[j]; j<n; j++) {
289         if (row[j] != rstart) break;
290       }
291       if (j < n) ncols = j-i;
292       else       ncols = n-i;
293       /* Now assemble all these values with a single function call */
294       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
295       i    = j;
296     }
297   }
298   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
299 
300   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
301   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
302 
303   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
304     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
305   }
306   PetscFunctionReturn(0);
307 }
308 
309 #undef __FUNCT__
310 #define __FUNCT__ "MatZeroEntries_MPIDense"
311 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
312 {
313   PetscErrorCode ierr;
314   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
315 
316   PetscFunctionBegin;
317   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
318   PetscFunctionReturn(0);
319 }
320 
321 /* the code does not do the diagonal entries correctly unless the
322    matrix is square and the column and row owerships are identical.
323    This is a BUG. The only way to fix it seems to be to access
324    mdn->A and mdn->B directly and not through the MatZeroRows()
325    routine.
326 */
327 #undef __FUNCT__
328 #define __FUNCT__ "MatZeroRows_MPIDense"
329 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
330 {
331   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
332   PetscErrorCode    ierr;
333   PetscInt          i,*owners = A->rmap->range;
334   PetscInt          *sizes,j,idx,nsends;
335   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
336   PetscInt          *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source;
337   PetscInt          *lens,*lrows,*values;
338   PetscMPIInt       n,imdex,rank = l->rank,size = l->size;
339   MPI_Comm          comm;
340   MPI_Request       *send_waits,*recv_waits;
341   MPI_Status        recv_status,*send_status;
342   PetscBool         found;
343   const PetscScalar *xx;
344   PetscScalar       *bb;
345 
346   PetscFunctionBegin;
347   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
348   if (A->rmap->N != A->cmap->N) SETERRQ(comm,PETSC_ERR_SUP,"Only handles square matrices");
349   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership");
350   /*  first count number of contributors to each processor */
351   ierr = PetscCalloc1(2*size,&sizes);CHKERRQ(ierr);
352   ierr = PetscMalloc1(N+1,&owner);CHKERRQ(ierr);  /* see note*/
353   for (i=0; i<N; i++) {
354     idx   = rows[i];
355     found = PETSC_FALSE;
356     for (j=0; j<size; j++) {
357       if (idx >= owners[j] && idx < owners[j+1]) {
358         sizes[2*j]++; sizes[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
359       }
360     }
361     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
362   }
363   nsends = 0;
364   for (i=0; i<size; i++) nsends += sizes[2*i+1];
365 
366   /* inform other processors of number of messages and max length*/
367   ierr = PetscMaxSum(comm,sizes,&nmax,&nrecvs);CHKERRQ(ierr);
368 
369   /* post receives:   */
370   ierr = PetscMalloc1((nrecvs+1)*(nmax+1),&rvalues);CHKERRQ(ierr);
371   ierr = PetscMalloc1(nrecvs+1,&recv_waits);CHKERRQ(ierr);
372   for (i=0; i<nrecvs; i++) {
373     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
374   }
375 
376   /* do sends:
377       1) starts[i] gives the starting index in svalues for stuff going to
378          the ith processor
379   */
380   ierr = PetscMalloc1(N+1,&svalues);CHKERRQ(ierr);
381   ierr = PetscMalloc1(nsends+1,&send_waits);CHKERRQ(ierr);
382   ierr = PetscMalloc1(size+1,&starts);CHKERRQ(ierr);
383 
384   starts[0] = 0;
385   for (i=1; i<size; i++) starts[i] = starts[i-1] + sizes[2*i-2];
386   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
387 
388   starts[0] = 0;
389   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + sizes[2*i-2];
390   count = 0;
391   for (i=0; i<size; i++) {
392     if (sizes[2*i+1]) {
393       ierr = MPI_Isend(svalues+starts[i],sizes[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
394     }
395   }
396   ierr = PetscFree(starts);CHKERRQ(ierr);
397 
398   base = owners[rank];
399 
400   /*  wait on receives */
401   ierr  = PetscMalloc2(nrecvs,&lens,nrecvs,&source);CHKERRQ(ierr);
402   count = nrecvs;
403   slen  = 0;
404   while (count) {
405     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
406     /* unpack receives into our local space */
407     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
408 
409     source[imdex] = recv_status.MPI_SOURCE;
410     lens[imdex]   = n;
411     slen += n;
412     count--;
413   }
414   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
415 
416   /* move the data into the send scatter */
417   ierr  = PetscMalloc1(slen+1,&lrows);CHKERRQ(ierr);
418   count = 0;
419   for (i=0; i<nrecvs; i++) {
420     values = rvalues + i*nmax;
421     for (j=0; j<lens[i]; j++) {
422       lrows[count++] = values[j] - base;
423     }
424   }
425   ierr = PetscFree(rvalues);CHKERRQ(ierr);
426   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
427   ierr = PetscFree(owner);CHKERRQ(ierr);
428   ierr = PetscFree(sizes);CHKERRQ(ierr);
429 
430   /* fix right hand side if needed */
431   if (x && b) {
432     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
433     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
434     for (i=0; i<slen; i++) {
435       bb[lrows[i]] = diag*xx[lrows[i]];
436     }
437     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
438     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
439   }
440 
441   /* actually zap the local rows */
442   ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
443   if (diag != 0.0) {
444     Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data;
445     PetscInt     m   = ll->lda, i;
446 
447     for (i=0; i<slen; i++) {
448       ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag;
449     }
450   }
451   ierr = PetscFree(lrows);CHKERRQ(ierr);
452 
453   /* wait on sends */
454   if (nsends) {
455     ierr = PetscMalloc1(nsends,&send_status);CHKERRQ(ierr);
456     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
457     ierr = PetscFree(send_status);CHKERRQ(ierr);
458   }
459   ierr = PetscFree(send_waits);CHKERRQ(ierr);
460   ierr = PetscFree(svalues);CHKERRQ(ierr);
461   PetscFunctionReturn(0);
462 }
463 
464 #undef __FUNCT__
465 #define __FUNCT__ "MatMult_MPIDense"
466 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
467 {
468   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
469   PetscErrorCode ierr;
470 
471   PetscFunctionBegin;
472   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
473   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
474   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
475   PetscFunctionReturn(0);
476 }
477 
478 #undef __FUNCT__
479 #define __FUNCT__ "MatMultAdd_MPIDense"
480 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
481 {
482   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
483   PetscErrorCode ierr;
484 
485   PetscFunctionBegin;
486   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
487   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
488   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
489   PetscFunctionReturn(0);
490 }
491 
492 #undef __FUNCT__
493 #define __FUNCT__ "MatMultTranspose_MPIDense"
494 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
495 {
496   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
497   PetscErrorCode ierr;
498   PetscScalar    zero = 0.0;
499 
500   PetscFunctionBegin;
501   ierr = VecSet(yy,zero);CHKERRQ(ierr);
502   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
503   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
504   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
505   PetscFunctionReturn(0);
506 }
507 
508 #undef __FUNCT__
509 #define __FUNCT__ "MatMultTransposeAdd_MPIDense"
510 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
511 {
512   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
513   PetscErrorCode ierr;
514 
515   PetscFunctionBegin;
516   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
517   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
518   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
519   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
520   PetscFunctionReturn(0);
521 }
522 
523 #undef __FUNCT__
524 #define __FUNCT__ "MatGetDiagonal_MPIDense"
525 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
526 {
527   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
528   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
529   PetscErrorCode ierr;
530   PetscInt       len,i,n,m = A->rmap->n,radd;
531   PetscScalar    *x,zero = 0.0;
532 
533   PetscFunctionBegin;
534   ierr = VecSet(v,zero);CHKERRQ(ierr);
535   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
536   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
537   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
538   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
539   radd = A->rmap->rstart*m;
540   for (i=0; i<len; i++) {
541     x[i] = aloc->v[radd + i*m + i];
542   }
543   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
544   PetscFunctionReturn(0);
545 }
546 
547 #undef __FUNCT__
548 #define __FUNCT__ "MatDestroy_MPIDense"
549 PetscErrorCode MatDestroy_MPIDense(Mat mat)
550 {
551   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
552   PetscErrorCode ierr;
553 
554   PetscFunctionBegin;
555 #if defined(PETSC_USE_LOG)
556   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
557 #endif
558   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
559   ierr = MatDestroy(&mdn->A);CHKERRQ(ierr);
560   ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr);
561   ierr = VecScatterDestroy(&mdn->Mvctx);CHKERRQ(ierr);
562 
563   ierr = PetscFree(mat->data);CHKERRQ(ierr);
564   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
565 
566   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr);
567   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr);
568 #if defined(PETSC_HAVE_ELEMENTAL)
569   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr);
570 #endif
571   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
572   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr);
573   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
574   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
575   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
576   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
577   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
578   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
579   PetscFunctionReturn(0);
580 }
581 
582 #undef __FUNCT__
583 #define __FUNCT__ "MatView_MPIDense_Binary"
584 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
585 {
586   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
587   PetscErrorCode    ierr;
588   PetscViewerFormat format;
589   int               fd;
590   PetscInt          header[4],mmax,N = mat->cmap->N,i,j,m,k;
591   PetscMPIInt       rank,tag  = ((PetscObject)viewer)->tag,size;
592   PetscScalar       *work,*v,*vv;
593   Mat_SeqDense      *a = (Mat_SeqDense*)mdn->A->data;
594 
595   PetscFunctionBegin;
596   if (mdn->size == 1) {
597     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
598   } else {
599     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
600     ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
601     ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
602 
603     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
604     if (format == PETSC_VIEWER_NATIVE) {
605 
606       if (!rank) {
607         /* store the matrix as a dense matrix */
608         header[0] = MAT_FILE_CLASSID;
609         header[1] = mat->rmap->N;
610         header[2] = N;
611         header[3] = MATRIX_BINARY_FORMAT_DENSE;
612         ierr      = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
613 
614         /* get largest work array needed for transposing array */
615         mmax = mat->rmap->n;
616         for (i=1; i<size; i++) {
617           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
618         }
619         ierr = PetscMalloc1(mmax*N,&work);CHKERRQ(ierr);
620 
621         /* write out local array, by rows */
622         m = mat->rmap->n;
623         v = a->v;
624         for (j=0; j<N; j++) {
625           for (i=0; i<m; i++) {
626             work[j + i*N] = *v++;
627           }
628         }
629         ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
630         /* get largest work array to receive messages from other processes, excludes process zero */
631         mmax = 0;
632         for (i=1; i<size; i++) {
633           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
634         }
635         ierr = PetscMalloc1(mmax*N,&vv);CHKERRQ(ierr);
636         for (k = 1; k < size; k++) {
637           v    = vv;
638           m    = mat->rmap->range[k+1] - mat->rmap->range[k];
639           ierr = MPIULong_Recv(v,m*N,MPIU_SCALAR,k,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
640 
641           for (j = 0; j < N; j++) {
642             for (i = 0; i < m; i++) {
643               work[j + i*N] = *v++;
644             }
645           }
646           ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
647         }
648         ierr = PetscFree(work);CHKERRQ(ierr);
649         ierr = PetscFree(vv);CHKERRQ(ierr);
650       } else {
651         ierr = MPIULong_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
652       }
653     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerPushFormat(viewer,PETSC_VIEWER_NATIVE)");
654   }
655   PetscFunctionReturn(0);
656 }
657 
658 extern PetscErrorCode MatView_SeqDense(Mat,PetscViewer);
659 #include <petscdraw.h>
660 #undef __FUNCT__
661 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket"
662 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
663 {
664   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
665   PetscErrorCode    ierr;
666   PetscMPIInt       rank = mdn->rank;
667   PetscViewerType   vtype;
668   PetscBool         iascii,isdraw;
669   PetscViewer       sviewer;
670   PetscViewerFormat format;
671 
672   PetscFunctionBegin;
673   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
674   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
675   if (iascii) {
676     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
677     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
678     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
679       MatInfo info;
680       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
681       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
682       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
683       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
684       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
685       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
686       PetscFunctionReturn(0);
687     } else if (format == PETSC_VIEWER_ASCII_INFO) {
688       PetscFunctionReturn(0);
689     }
690   } else if (isdraw) {
691     PetscDraw draw;
692     PetscBool isnull;
693 
694     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
695     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
696     if (isnull) PetscFunctionReturn(0);
697   }
698 
699   {
700     /* assemble the entire matrix onto first processor. */
701     Mat         A;
702     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
703     PetscInt    *cols;
704     PetscScalar *vals;
705 
706     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
707     if (!rank) {
708       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
709     } else {
710       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
711     }
712     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
713     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
714     ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr);
715     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
716 
717     /* Copy the matrix ... This isn't the most efficient means,
718        but it's quick for now */
719     A->insertmode = INSERT_VALUES;
720 
721     row = mat->rmap->rstart;
722     m   = mdn->A->rmap->n;
723     for (i=0; i<m; i++) {
724       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
725       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
726       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
727       row++;
728     }
729 
730     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
731     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
732     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
733     if (!rank) {
734         ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
735     }
736     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
737     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
738     ierr = MatDestroy(&A);CHKERRQ(ierr);
739   }
740   PetscFunctionReturn(0);
741 }
742 
743 #undef __FUNCT__
744 #define __FUNCT__ "MatView_MPIDense"
745 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
746 {
747   PetscErrorCode ierr;
748   PetscBool      iascii,isbinary,isdraw,issocket;
749 
750   PetscFunctionBegin;
751   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
752   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
753   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
754   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
755 
756   if (iascii || issocket || isdraw) {
757     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
758   } else if (isbinary) {
759     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
760   }
761   PetscFunctionReturn(0);
762 }
763 
764 #undef __FUNCT__
765 #define __FUNCT__ "MatGetInfo_MPIDense"
766 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
767 {
768   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
769   Mat            mdn  = mat->A;
770   PetscErrorCode ierr;
771   PetscReal      isend[5],irecv[5];
772 
773   PetscFunctionBegin;
774   info->block_size = 1.0;
775 
776   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
777 
778   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
779   isend[3] = info->memory;  isend[4] = info->mallocs;
780   if (flag == MAT_LOCAL) {
781     info->nz_used      = isend[0];
782     info->nz_allocated = isend[1];
783     info->nz_unneeded  = isend[2];
784     info->memory       = isend[3];
785     info->mallocs      = isend[4];
786   } else if (flag == MAT_GLOBAL_MAX) {
787     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
788 
789     info->nz_used      = irecv[0];
790     info->nz_allocated = irecv[1];
791     info->nz_unneeded  = irecv[2];
792     info->memory       = irecv[3];
793     info->mallocs      = irecv[4];
794   } else if (flag == MAT_GLOBAL_SUM) {
795     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
796 
797     info->nz_used      = irecv[0];
798     info->nz_allocated = irecv[1];
799     info->nz_unneeded  = irecv[2];
800     info->memory       = irecv[3];
801     info->mallocs      = irecv[4];
802   }
803   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
804   info->fill_ratio_needed = 0;
805   info->factor_mallocs    = 0;
806   PetscFunctionReturn(0);
807 }
808 
809 #undef __FUNCT__
810 #define __FUNCT__ "MatSetOption_MPIDense"
811 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
812 {
813   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
814   PetscErrorCode ierr;
815 
816   PetscFunctionBegin;
817   switch (op) {
818   case MAT_NEW_NONZERO_LOCATIONS:
819   case MAT_NEW_NONZERO_LOCATION_ERR:
820   case MAT_NEW_NONZERO_ALLOCATION_ERR:
821     MatCheckPreallocated(A,1);
822     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
823     break;
824   case MAT_ROW_ORIENTED:
825     MatCheckPreallocated(A,1);
826     a->roworiented = flg;
827     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
828     break;
829   case MAT_NEW_DIAGONALS:
830   case MAT_KEEP_NONZERO_PATTERN:
831   case MAT_USE_HASH_TABLE:
832     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
833     break;
834   case MAT_IGNORE_OFF_PROC_ENTRIES:
835     a->donotstash = flg;
836     break;
837   case MAT_SYMMETRIC:
838   case MAT_STRUCTURALLY_SYMMETRIC:
839   case MAT_HERMITIAN:
840   case MAT_SYMMETRY_ETERNAL:
841   case MAT_IGNORE_LOWER_TRIANGULAR:
842     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
843     break;
844   default:
845     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
846   }
847   PetscFunctionReturn(0);
848 }
849 
850 
851 #undef __FUNCT__
852 #define __FUNCT__ "MatDiagonalScale_MPIDense"
853 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
854 {
855   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
856   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
857   PetscScalar    *l,*r,x,*v;
858   PetscErrorCode ierr;
859   PetscInt       i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n;
860 
861   PetscFunctionBegin;
862   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
863   if (ll) {
864     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
865     if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
866     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
867     for (i=0; i<m; i++) {
868       x = l[i];
869       v = mat->v + i;
870       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
871     }
872     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
873     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
874   }
875   if (rr) {
876     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
877     if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
878     ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
879     ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
880     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
881     for (i=0; i<n; i++) {
882       x = r[i];
883       v = mat->v + i*m;
884       for (j=0; j<m; j++) (*v++) *= x;
885     }
886     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
887     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
888   }
889   PetscFunctionReturn(0);
890 }
891 
892 #undef __FUNCT__
893 #define __FUNCT__ "MatNorm_MPIDense"
894 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
895 {
896   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
897   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
898   PetscErrorCode ierr;
899   PetscInt       i,j;
900   PetscReal      sum = 0.0;
901   PetscScalar    *v  = mat->v;
902 
903   PetscFunctionBegin;
904   if (mdn->size == 1) {
905     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
906   } else {
907     if (type == NORM_FROBENIUS) {
908       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
909         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
910       }
911       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
912       *nrm = PetscSqrtReal(*nrm);
913       ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr);
914     } else if (type == NORM_1) {
915       PetscReal *tmp,*tmp2;
916       ierr = PetscMalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr);
917       ierr = PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
918       ierr = PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
919       *nrm = 0.0;
920       v    = mat->v;
921       for (j=0; j<mdn->A->cmap->n; j++) {
922         for (i=0; i<mdn->A->rmap->n; i++) {
923           tmp[j] += PetscAbsScalar(*v);  v++;
924         }
925       }
926       ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
927       for (j=0; j<A->cmap->N; j++) {
928         if (tmp2[j] > *nrm) *nrm = tmp2[j];
929       }
930       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
931       ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr);
932     } else if (type == NORM_INFINITY) { /* max row norm */
933       PetscReal ntemp;
934       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
935       ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
936     } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
937   }
938   PetscFunctionReturn(0);
939 }
940 
941 #undef __FUNCT__
942 #define __FUNCT__ "MatTranspose_MPIDense"
943 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
944 {
945   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
946   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
947   Mat            B;
948   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
949   PetscErrorCode ierr;
950   PetscInt       j,i;
951   PetscScalar    *v;
952 
953   PetscFunctionBegin;
954   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports square matrix only in-place");
955   if (reuse == MAT_INITIAL_MATRIX || A == *matout) {
956     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
957     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
958     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
959     ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr);
960   } else {
961     B = *matout;
962   }
963 
964   m    = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v;
965   ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr);
966   for (i=0; i<m; i++) rwork[i] = rstart + i;
967   for (j=0; j<n; j++) {
968     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
969     v   += m;
970   }
971   ierr = PetscFree(rwork);CHKERRQ(ierr);
972   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
973   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
974   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
975     *matout = B;
976   } else {
977     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
978   }
979   PetscFunctionReturn(0);
980 }
981 
982 
983 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
984 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
985 
986 #undef __FUNCT__
987 #define __FUNCT__ "MatSetUp_MPIDense"
988 PetscErrorCode MatSetUp_MPIDense(Mat A)
989 {
990   PetscErrorCode ierr;
991 
992   PetscFunctionBegin;
993   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
994   PetscFunctionReturn(0);
995 }
996 
997 #undef __FUNCT__
998 #define __FUNCT__ "MatAXPY_MPIDense"
999 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
1000 {
1001   PetscErrorCode ierr;
1002   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
1003 
1004   PetscFunctionBegin;
1005   ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr);
1006   ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1007   PetscFunctionReturn(0);
1008 }
1009 
1010 #undef __FUNCT__
1011 #define __FUNCT__ "MatConjugate_MPIDense"
1012 PetscErrorCode  MatConjugate_MPIDense(Mat mat)
1013 {
1014   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
1015   PetscErrorCode ierr;
1016 
1017   PetscFunctionBegin;
1018   ierr = MatConjugate(a->A);CHKERRQ(ierr);
1019   PetscFunctionReturn(0);
1020 }
1021 
1022 #undef __FUNCT__
1023 #define __FUNCT__ "MatRealPart_MPIDense"
1024 PetscErrorCode MatRealPart_MPIDense(Mat A)
1025 {
1026   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1027   PetscErrorCode ierr;
1028 
1029   PetscFunctionBegin;
1030   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1031   PetscFunctionReturn(0);
1032 }
1033 
1034 #undef __FUNCT__
1035 #define __FUNCT__ "MatImaginaryPart_MPIDense"
1036 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1037 {
1038   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1039   PetscErrorCode ierr;
1040 
1041   PetscFunctionBegin;
1042   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1043   PetscFunctionReturn(0);
1044 }
1045 
1046 extern PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*);
1047 #undef __FUNCT__
1048 #define __FUNCT__ "MatGetColumnNorms_MPIDense"
1049 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms)
1050 {
1051   PetscErrorCode ierr;
1052   PetscInt       i,n;
1053   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
1054   PetscReal      *work;
1055 
1056   PetscFunctionBegin;
1057   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
1058   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
1059   ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr);
1060   if (type == NORM_2) {
1061     for (i=0; i<n; i++) work[i] *= work[i];
1062   }
1063   if (type == NORM_INFINITY) {
1064     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
1065   } else {
1066     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
1067   }
1068   ierr = PetscFree(work);CHKERRQ(ierr);
1069   if (type == NORM_2) {
1070     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
1071   }
1072   PetscFunctionReturn(0);
1073 }
1074 
1075 #undef __FUNCT__
1076 #define __FUNCT__ "MatSetRandom_MPIDense"
1077 static PetscErrorCode  MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1078 {
1079   Mat_MPIDense   *d = (Mat_MPIDense*)x->data;
1080   PetscErrorCode ierr;
1081   PetscScalar    *a;
1082   PetscInt       m,n,i;
1083 
1084   PetscFunctionBegin;
1085   ierr = MatGetSize(d->A,&m,&n);CHKERRQ(ierr);
1086   ierr = MatDenseGetArray(d->A,&a);CHKERRQ(ierr);
1087   for (i=0; i<m*n; i++) {
1088     ierr = PetscRandomGetValue(rctx,a+i);CHKERRQ(ierr);
1089   }
1090   ierr = MatDenseRestoreArray(d->A,&a);CHKERRQ(ierr);
1091   PetscFunctionReturn(0);
1092 }
1093 
1094 extern PetscErrorCode MatMatMultNumeric_MPIDense(Mat A,Mat,Mat);
1095 
1096 #undef __FUNCT__
1097 #define __FUNCT__ "MatMissingDiagonal_MPIDense"
1098 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool  *missing,PetscInt *d)
1099 {
1100   PetscFunctionBegin;
1101   *missing = PETSC_FALSE;
1102   PetscFunctionReturn(0);
1103 }
1104 
1105 /* -------------------------------------------------------------------*/
1106 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1107                                         MatGetRow_MPIDense,
1108                                         MatRestoreRow_MPIDense,
1109                                         MatMult_MPIDense,
1110                                 /*  4*/ MatMultAdd_MPIDense,
1111                                         MatMultTranspose_MPIDense,
1112                                         MatMultTransposeAdd_MPIDense,
1113                                         0,
1114                                         0,
1115                                         0,
1116                                 /* 10*/ 0,
1117                                         0,
1118                                         0,
1119                                         0,
1120                                         MatTranspose_MPIDense,
1121                                 /* 15*/ MatGetInfo_MPIDense,
1122                                         MatEqual_MPIDense,
1123                                         MatGetDiagonal_MPIDense,
1124                                         MatDiagonalScale_MPIDense,
1125                                         MatNorm_MPIDense,
1126                                 /* 20*/ MatAssemblyBegin_MPIDense,
1127                                         MatAssemblyEnd_MPIDense,
1128                                         MatSetOption_MPIDense,
1129                                         MatZeroEntries_MPIDense,
1130                                 /* 24*/ MatZeroRows_MPIDense,
1131                                         0,
1132                                         0,
1133                                         0,
1134                                         0,
1135                                 /* 29*/ MatSetUp_MPIDense,
1136                                         0,
1137                                         0,
1138                                         0,
1139                                         0,
1140                                 /* 34*/ MatDuplicate_MPIDense,
1141                                         0,
1142                                         0,
1143                                         0,
1144                                         0,
1145                                 /* 39*/ MatAXPY_MPIDense,
1146                                         MatGetSubMatrices_MPIDense,
1147                                         0,
1148                                         MatGetValues_MPIDense,
1149                                         0,
1150                                 /* 44*/ 0,
1151                                         MatScale_MPIDense,
1152                                         MatShift_Basic,
1153                                         0,
1154                                         0,
1155                                 /* 49*/ MatSetRandom_MPIDense,
1156                                         0,
1157                                         0,
1158                                         0,
1159                                         0,
1160                                 /* 54*/ 0,
1161                                         0,
1162                                         0,
1163                                         0,
1164                                         0,
1165                                 /* 59*/ MatGetSubMatrix_MPIDense,
1166                                         MatDestroy_MPIDense,
1167                                         MatView_MPIDense,
1168                                         0,
1169                                         0,
1170                                 /* 64*/ 0,
1171                                         0,
1172                                         0,
1173                                         0,
1174                                         0,
1175                                 /* 69*/ 0,
1176                                         0,
1177                                         0,
1178                                         0,
1179                                         0,
1180                                 /* 74*/ 0,
1181                                         0,
1182                                         0,
1183                                         0,
1184                                         0,
1185                                 /* 79*/ 0,
1186                                         0,
1187                                         0,
1188                                         0,
1189                                 /* 83*/ MatLoad_MPIDense,
1190                                         0,
1191                                         0,
1192                                         0,
1193                                         0,
1194                                         0,
1195 #if defined(PETSC_HAVE_ELEMENTAL)
1196                                 /* 89*/ MatMatMult_MPIDense_MPIDense,
1197                                         MatMatMultSymbolic_MPIDense_MPIDense,
1198 #else
1199                                 /* 89*/ 0,
1200                                         0,
1201 #endif
1202                                         MatMatMultNumeric_MPIDense,
1203                                         0,
1204                                         0,
1205                                 /* 94*/ 0,
1206                                         0,
1207                                         0,
1208                                         0,
1209                                         0,
1210                                 /* 99*/ 0,
1211                                         0,
1212                                         0,
1213                                         MatConjugate_MPIDense,
1214                                         0,
1215                                 /*104*/ 0,
1216                                         MatRealPart_MPIDense,
1217                                         MatImaginaryPart_MPIDense,
1218                                         0,
1219                                         0,
1220                                 /*109*/ 0,
1221                                         0,
1222                                         0,
1223                                         0,
1224                                         MatMissingDiagonal_MPIDense,
1225                                 /*114*/ 0,
1226                                         0,
1227                                         0,
1228                                         0,
1229                                         0,
1230                                 /*119*/ 0,
1231                                         0,
1232                                         0,
1233                                         0,
1234                                         0,
1235                                 /*124*/ 0,
1236                                         MatGetColumnNorms_MPIDense,
1237                                         0,
1238                                         0,
1239                                         0,
1240                                 /*129*/ 0,
1241                                         MatTransposeMatMult_MPIDense_MPIDense,
1242                                         MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1243                                         MatTransposeMatMultNumeric_MPIDense_MPIDense,
1244                                         0,
1245                                 /*134*/ 0,
1246                                         0,
1247                                         0,
1248                                         0,
1249                                         0,
1250                                 /*139*/ 0,
1251                                         0,
1252                                         0
1253 };
1254 
1255 #undef __FUNCT__
1256 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense"
1257 PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1258 {
1259   Mat_MPIDense   *a;
1260   PetscErrorCode ierr;
1261 
1262   PetscFunctionBegin;
1263   mat->preallocated = PETSC_TRUE;
1264   /* Note:  For now, when data is specified above, this assumes the user correctly
1265    allocates the local dense storage space.  We should add error checking. */
1266 
1267   a       = (Mat_MPIDense*)mat->data;
1268   ierr    = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
1269   ierr    = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
1270   a->nvec = mat->cmap->n;
1271 
1272   ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1273   ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr);
1274   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
1275   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1276   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1277   PetscFunctionReturn(0);
1278 }
1279 
1280 #if defined(PETSC_HAVE_ELEMENTAL)
1281 #undef __FUNCT__
1282 #define __FUNCT__ "MatConvert_MPIDense_Elemental"
1283 PETSC_EXTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1284 {
1285   Mat            mat_elemental;
1286   PetscErrorCode ierr;
1287   PetscScalar    *v;
1288   PetscInt       m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols;
1289 
1290   PetscFunctionBegin;
1291   if (reuse == MAT_REUSE_MATRIX) {
1292     mat_elemental = *newmat;
1293     ierr = MatZeroEntries(*newmat);CHKERRQ(ierr);
1294   } else {
1295     ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr);
1296     ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1297     ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr);
1298     ierr = MatSetUp(mat_elemental);CHKERRQ(ierr);
1299     ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
1300   }
1301 
1302   ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr);
1303   for (i=0; i<N; i++) cols[i] = i;
1304   for (i=0; i<m; i++) rows[i] = rstart + i;
1305 
1306   /* PETSc-Elemental interaface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1307   ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr);
1308   ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr);
1309   ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1310   ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1311   ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr);
1312   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1313 
1314   if (reuse == MAT_INPLACE_MATRIX) {
1315     ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr);
1316   } else {
1317     *newmat = mat_elemental;
1318   }
1319   PetscFunctionReturn(0);
1320 }
1321 #endif
1322 
1323 #undef __FUNCT__
1324 #define __FUNCT__ "MatCreate_MPIDense"
1325 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1326 {
1327   Mat_MPIDense   *a;
1328   PetscErrorCode ierr;
1329 
1330   PetscFunctionBegin;
1331   ierr      = PetscNewLog(mat,&a);CHKERRQ(ierr);
1332   mat->data = (void*)a;
1333   ierr      = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1334 
1335   mat->insertmode = NOT_SET_VALUES;
1336   ierr            = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);CHKERRQ(ierr);
1337   ierr            = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);CHKERRQ(ierr);
1338 
1339   /* build cache for off array entries formed */
1340   a->donotstash = PETSC_FALSE;
1341 
1342   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr);
1343 
1344   /* stuff used for matrix vector multiply */
1345   a->lvec        = 0;
1346   a->Mvctx       = 0;
1347   a->roworiented = PETSC_TRUE;
1348 
1349   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr);
1350   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr);
1351 #if defined(PETSC_HAVE_ELEMENTAL)
1352   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr);
1353 #endif
1354   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr);
1355   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1356   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1357   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1358   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1359 
1360   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",MatTransposeMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1361   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",MatTransposeMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1362   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",MatTransposeMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1363   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1364   PetscFunctionReturn(0);
1365 }
1366 
1367 /*MC
1368    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1369 
1370    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1371    and MATMPIDENSE otherwise.
1372 
1373    Options Database Keys:
1374 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1375 
1376   Level: beginner
1377 
1378 
1379 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1380 M*/
1381 
1382 #undef __FUNCT__
1383 #define __FUNCT__ "MatMPIDenseSetPreallocation"
1384 /*@C
1385    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1386 
1387    Not collective
1388 
1389    Input Parameters:
1390 .  B - the matrix
1391 -  data - optional location of matrix data.  Set data=NULL for PETSc
1392    to control all matrix memory allocation.
1393 
1394    Notes:
1395    The dense format is fully compatible with standard Fortran 77
1396    storage by columns.
1397 
1398    The data input variable is intended primarily for Fortran programmers
1399    who wish to allocate their own matrix memory space.  Most users should
1400    set data=NULL.
1401 
1402    Level: intermediate
1403 
1404 .keywords: matrix,dense, parallel
1405 
1406 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1407 @*/
1408 PetscErrorCode  MatMPIDenseSetPreallocation(Mat B,PetscScalar *data)
1409 {
1410   PetscErrorCode ierr;
1411 
1412   PetscFunctionBegin;
1413   ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr);
1414   PetscFunctionReturn(0);
1415 }
1416 
1417 #undef __FUNCT__
1418 #define __FUNCT__ "MatCreateDense"
1419 /*@C
1420    MatCreateDense - Creates a parallel matrix in dense format.
1421 
1422    Collective on MPI_Comm
1423 
1424    Input Parameters:
1425 +  comm - MPI communicator
1426 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1427 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1428 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1429 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1430 -  data - optional location of matrix data.  Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1431    to control all matrix memory allocation.
1432 
1433    Output Parameter:
1434 .  A - the matrix
1435 
1436    Notes:
1437    The dense format is fully compatible with standard Fortran 77
1438    storage by columns.
1439 
1440    The data input variable is intended primarily for Fortran programmers
1441    who wish to allocate their own matrix memory space.  Most users should
1442    set data=NULL (PETSC_NULL_SCALAR for Fortran users).
1443 
1444    The user MUST specify either the local or global matrix dimensions
1445    (possibly both).
1446 
1447    Level: intermediate
1448 
1449 .keywords: matrix,dense, parallel
1450 
1451 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1452 @*/
1453 PetscErrorCode  MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1454 {
1455   PetscErrorCode ierr;
1456   PetscMPIInt    size;
1457 
1458   PetscFunctionBegin;
1459   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1460   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1461   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1462   if (size > 1) {
1463     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1464     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1465     if (data) {  /* user provided data array, so no need to assemble */
1466       ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr);
1467       (*A)->assembled = PETSC_TRUE;
1468     }
1469   } else {
1470     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1471     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1472   }
1473   PetscFunctionReturn(0);
1474 }
1475 
1476 #undef __FUNCT__
1477 #define __FUNCT__ "MatDuplicate_MPIDense"
1478 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1479 {
1480   Mat            mat;
1481   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
1482   PetscErrorCode ierr;
1483 
1484   PetscFunctionBegin;
1485   *newmat = 0;
1486   ierr    = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr);
1487   ierr    = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1488   ierr    = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1489   a       = (Mat_MPIDense*)mat->data;
1490   ierr    = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1491 
1492   mat->factortype   = A->factortype;
1493   mat->assembled    = PETSC_TRUE;
1494   mat->preallocated = PETSC_TRUE;
1495 
1496   a->size         = oldmat->size;
1497   a->rank         = oldmat->rank;
1498   mat->insertmode = NOT_SET_VALUES;
1499   a->nvec         = oldmat->nvec;
1500   a->donotstash   = oldmat->donotstash;
1501 
1502   ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr);
1503   ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr);
1504 
1505   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1506   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1507   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1508 
1509   *newmat = mat;
1510   PetscFunctionReturn(0);
1511 }
1512 
1513 #undef __FUNCT__
1514 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile"
1515 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat)
1516 {
1517   PetscErrorCode ierr;
1518   PetscMPIInt    rank,size;
1519   const PetscInt *rowners;
1520   PetscInt       i,m,n,nz,j,mMax;
1521   PetscScalar    *array,*vals,*vals_ptr;
1522   Mat_MPIDense   *a = (Mat_MPIDense*)newmat->data;
1523 
1524   PetscFunctionBegin;
1525   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1526   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1527 
1528   /* determine ownership of rows and columns */
1529   m = (newmat->rmap->n < 0) ? PETSC_DECIDE : newmat->rmap->n;
1530   n = (newmat->cmap->n < 0) ? PETSC_DECIDE : newmat->cmap->n;
1531 
1532   ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr);
1533   if (!a->A || !((Mat_SeqDense*)(a->A->data))->user_alloc) {
1534     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
1535   }
1536   ierr = MatDenseGetArray(newmat,&array);CHKERRQ(ierr);
1537   ierr = MatGetLocalSize(newmat,&m,NULL);CHKERRQ(ierr);
1538   ierr = MatGetOwnershipRanges(newmat,&rowners);CHKERRQ(ierr);
1539   ierr = MPI_Reduce(&m,&mMax,1,MPIU_INT,MPI_MAX,0,comm);CHKERRQ(ierr);
1540   if (!rank) {
1541     ierr = PetscMalloc1(mMax*N,&vals);CHKERRQ(ierr);
1542 
1543     /* read in my part of the matrix numerical values  */
1544     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1545 
1546     /* insert into matrix-by row (this is why cannot directly read into array */
1547     vals_ptr = vals;
1548     for (i=0; i<m; i++) {
1549       for (j=0; j<N; j++) {
1550         array[i + j*m] = *vals_ptr++;
1551       }
1552     }
1553 
1554     /* read in other processors and ship out */
1555     for (i=1; i<size; i++) {
1556       nz   = (rowners[i+1] - rowners[i])*N;
1557       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1558       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr);
1559     }
1560   } else {
1561     /* receive numeric values */
1562     ierr = PetscMalloc1(m*N,&vals);CHKERRQ(ierr);
1563 
1564     /* receive message of values*/
1565     ierr = MPIULong_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr);
1566 
1567     /* insert into matrix-by row (this is why cannot directly read into array */
1568     vals_ptr = vals;
1569     for (i=0; i<m; i++) {
1570       for (j=0; j<N; j++) {
1571         array[i + j*m] = *vals_ptr++;
1572       }
1573     }
1574   }
1575   ierr = MatDenseRestoreArray(newmat,&array);CHKERRQ(ierr);
1576   ierr = PetscFree(vals);CHKERRQ(ierr);
1577   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1578   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1579   PetscFunctionReturn(0);
1580 }
1581 
1582 #undef __FUNCT__
1583 #define __FUNCT__ "MatLoad_MPIDense"
1584 PetscErrorCode MatLoad_MPIDense(Mat newmat,PetscViewer viewer)
1585 {
1586   Mat_MPIDense   *a;
1587   PetscScalar    *vals,*svals;
1588   MPI_Comm       comm;
1589   MPI_Status     status;
1590   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,n,maxnz;
1591   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1592   PetscInt       *ourlens,*procsnz = 0,jj,*mycols,*smycols;
1593   PetscInt       i,nz,j,rstart,rend;
1594   int            fd;
1595   PetscErrorCode ierr;
1596 
1597   PetscFunctionBegin;
1598   /* force binary viewer to load .info file if it has not yet done so */
1599   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1600   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
1601   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1602   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1603   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1604   if (!rank) {
1605     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
1606     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1607   }
1608   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1609   M    = header[1]; N = header[2]; nz = header[3];
1610 
1611   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
1612   if (newmat->rmap->N < 0) newmat->rmap->N = M;
1613   if (newmat->cmap->N < 0) newmat->cmap->N = N;
1614 
1615   if (newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",M,newmat->rmap->N);
1616   if (newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",N,newmat->cmap->N);
1617 
1618   /*
1619        Handle case where matrix is stored on disk as a dense matrix
1620   */
1621   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1622     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr);
1623     PetscFunctionReturn(0);
1624   }
1625 
1626   /* determine ownership of all rows */
1627   if (newmat->rmap->n < 0) {
1628     ierr = PetscMPIIntCast(M/size + ((M % size) > rank),&m);CHKERRQ(ierr);
1629   } else {
1630     ierr = PetscMPIIntCast(newmat->rmap->n,&m);CHKERRQ(ierr);
1631   }
1632   if (newmat->cmap->n < 0) {
1633     n = PETSC_DECIDE;
1634   } else {
1635     ierr = PetscMPIIntCast(newmat->cmap->n,&n);CHKERRQ(ierr);
1636   }
1637 
1638   ierr       = PetscMalloc1(size+2,&rowners);CHKERRQ(ierr);
1639   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1640   rowners[0] = 0;
1641   for (i=2; i<=size; i++) {
1642     rowners[i] += rowners[i-1];
1643   }
1644   rstart = rowners[rank];
1645   rend   = rowners[rank+1];
1646 
1647   /* distribute row lengths to all processors */
1648   ierr = PetscMalloc1(rend-rstart,&ourlens);CHKERRQ(ierr);
1649   if (!rank) {
1650     ierr = PetscMalloc1(M,&rowlengths);CHKERRQ(ierr);
1651     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1652     ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr);
1653     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1654     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1655     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1656   } else {
1657     ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1658   }
1659 
1660   if (!rank) {
1661     /* calculate the number of nonzeros on each processor */
1662     ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr);
1663     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1664     for (i=0; i<size; i++) {
1665       for (j=rowners[i]; j< rowners[i+1]; j++) {
1666         procsnz[i] += rowlengths[j];
1667       }
1668     }
1669     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1670 
1671     /* determine max buffer needed and allocate it */
1672     maxnz = 0;
1673     for (i=0; i<size; i++) {
1674       maxnz = PetscMax(maxnz,procsnz[i]);
1675     }
1676     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
1677 
1678     /* read in my part of the matrix column indices  */
1679     nz   = procsnz[0];
1680     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
1681     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1682 
1683     /* read in every one elses and ship off */
1684     for (i=1; i<size; i++) {
1685       nz   = procsnz[i];
1686       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1687       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1688     }
1689     ierr = PetscFree(cols);CHKERRQ(ierr);
1690   } else {
1691     /* determine buffer space needed for message */
1692     nz = 0;
1693     for (i=0; i<m; i++) {
1694       nz += ourlens[i];
1695     }
1696     ierr = PetscMalloc1(nz+1,&mycols);CHKERRQ(ierr);
1697 
1698     /* receive message of column indices*/
1699     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1700     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1701     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1702   }
1703 
1704   ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr);
1705   a = (Mat_MPIDense*)newmat->data;
1706   if (!a->A || !((Mat_SeqDense*)(a->A->data))->user_alloc) {
1707     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
1708   }
1709 
1710   if (!rank) {
1711     ierr = PetscMalloc1(maxnz,&vals);CHKERRQ(ierr);
1712 
1713     /* read in my part of the matrix numerical values  */
1714     nz   = procsnz[0];
1715     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1716 
1717     /* insert into matrix */
1718     jj      = rstart;
1719     smycols = mycols;
1720     svals   = vals;
1721     for (i=0; i<m; i++) {
1722       ierr     = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1723       smycols += ourlens[i];
1724       svals   += ourlens[i];
1725       jj++;
1726     }
1727 
1728     /* read in other processors and ship out */
1729     for (i=1; i<size; i++) {
1730       nz   = procsnz[i];
1731       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1732       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
1733     }
1734     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1735   } else {
1736     /* receive numeric values */
1737     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
1738 
1739     /* receive message of values*/
1740     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
1741     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1742     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1743 
1744     /* insert into matrix */
1745     jj      = rstart;
1746     smycols = mycols;
1747     svals   = vals;
1748     for (i=0; i<m; i++) {
1749       ierr     = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1750       smycols += ourlens[i];
1751       svals   += ourlens[i];
1752       jj++;
1753     }
1754   }
1755   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1756   ierr = PetscFree(vals);CHKERRQ(ierr);
1757   ierr = PetscFree(mycols);CHKERRQ(ierr);
1758   ierr = PetscFree(rowners);CHKERRQ(ierr);
1759 
1760   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1761   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1762   PetscFunctionReturn(0);
1763 }
1764 
1765 #undef __FUNCT__
1766 #define __FUNCT__ "MatEqual_MPIDense"
1767 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool  *flag)
1768 {
1769   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
1770   Mat            a,b;
1771   PetscBool      flg;
1772   PetscErrorCode ierr;
1773 
1774   PetscFunctionBegin;
1775   a    = matA->A;
1776   b    = matB->A;
1777   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
1778   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1779   PetscFunctionReturn(0);
1780 }
1781 
1782 #undef __FUNCT__
1783 #define __FUNCT__ "MatDestroy_MatTransMatMult_MPIDense_MPIDense"
1784 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(Mat A)
1785 {
1786   PetscErrorCode        ierr;
1787   Mat_MPIDense          *a = (Mat_MPIDense*)A->data;
1788   Mat_TransMatMultDense *atb = a->atbdense;
1789 
1790   PetscFunctionBegin;
1791   ierr = PetscFree3(atb->sendbuf,atb->atbarray,atb->recvcounts);CHKERRQ(ierr);
1792   ierr = (atb->destroy)(A);CHKERRQ(ierr);
1793   ierr = PetscFree(atb);CHKERRQ(ierr);
1794   PetscFunctionReturn(0);
1795 }
1796 
1797 #undef __FUNCT__
1798 #define __FUNCT__ "MatTransposeMatMultNumeric_MPIDense_MPIDense"
1799 PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1800 {
1801   Mat_MPIDense   *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
1802   Mat_SeqDense   *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data;
1803   Mat_TransMatMultDense *atb = c->atbdense;
1804   PetscErrorCode ierr;
1805   MPI_Comm       comm;
1806   PetscMPIInt    rank,size,*recvcounts=atb->recvcounts;
1807   PetscScalar    *carray,*atbarray=atb->atbarray,*sendbuf=atb->sendbuf;
1808   PetscInt       i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j;
1809   PetscScalar    _DOne=1.0,_DZero=0.0;
1810   PetscBLASInt   am,an,bn,aN;
1811   const PetscInt *ranges;
1812 
1813   PetscFunctionBegin;
1814   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1815   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1816   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1817 
1818   /* compute atbarray = aseq^T * bseq */
1819   ierr = PetscBLASIntCast(a->A->cmap->n,&an);CHKERRQ(ierr);
1820   ierr = PetscBLASIntCast(b->A->cmap->n,&bn);CHKERRQ(ierr);
1821   ierr = PetscBLASIntCast(a->A->rmap->n,&am);CHKERRQ(ierr);
1822   ierr = PetscBLASIntCast(A->cmap->N,&aN);CHKERRQ(ierr);
1823   PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&an,&bn,&am,&_DOne,aseq->v,&aseq->lda,bseq->v,&bseq->lda,&_DZero,atbarray,&aN));
1824 
1825   ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr);
1826   for (i=0; i<size; i++) recvcounts[i] = (ranges[i+1] - ranges[i])*cN;
1827 
1828   /* arrange atbarray into sendbuf */
1829   k = 0;
1830   for (proc=0; proc<size; proc++) {
1831     for (j=0; j<cN; j++) {
1832       for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM];
1833     }
1834   }
1835   /* sum all atbarray to local values of C */
1836   ierr = MatDenseGetArray(c->A,&carray);CHKERRQ(ierr);
1837   ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRQ(ierr);
1838   ierr = MatDenseRestoreArray(c->A,&carray);CHKERRQ(ierr);
1839   PetscFunctionReturn(0);
1840 }
1841 
1842 #undef __FUNCT__
1843 #define __FUNCT__ "MatTransposeMatMultSymbolic_MPIDense_MPIDense"
1844 PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1845 {
1846   PetscErrorCode        ierr;
1847   Mat                   Cdense;
1848   MPI_Comm              comm;
1849   PetscMPIInt           size;
1850   PetscInt              cm=A->cmap->n,cM,cN=B->cmap->N;
1851   Mat_MPIDense          *c;
1852   Mat_TransMatMultDense *atb;
1853 
1854   PetscFunctionBegin;
1855   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1856   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
1857     SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
1858   }
1859 
1860   /* create matrix product Cdense */
1861   ierr = MatCreate(comm,&Cdense);CHKERRQ(ierr);
1862   ierr = MatSetSizes(Cdense,cm,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
1863   ierr = MatSetType(Cdense,MATMPIDENSE);CHKERRQ(ierr);
1864   ierr = MatMPIDenseSetPreallocation(Cdense,NULL);CHKERRQ(ierr);
1865   ierr = MatAssemblyBegin(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1866   ierr = MatAssemblyEnd(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1867   *C   = Cdense;
1868 
1869   /* create data structure for reuse Cdense */
1870   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1871   ierr = PetscNew(&atb);CHKERRQ(ierr);
1872   cM = Cdense->rmap->N;
1873   ierr = PetscMalloc3(cM*cN,&atb->sendbuf,cM*cN,&atb->atbarray,size,&atb->recvcounts);CHKERRQ(ierr);
1874 
1875   c                    = (Mat_MPIDense*)Cdense->data;
1876   c->atbdense          = atb;
1877   atb->destroy         = Cdense->ops->destroy;
1878   Cdense->ops->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
1879   PetscFunctionReturn(0);
1880 }
1881 
1882 #undef __FUNCT__
1883 #define __FUNCT__ "MatTransposeMatMult_MPIDense_MPIDense"
1884 PetscErrorCode MatTransposeMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
1885 {
1886   PetscErrorCode ierr;
1887 
1888   PetscFunctionBegin;
1889   if (scall == MAT_INITIAL_MATRIX) {
1890     ierr = MatTransposeMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr);
1891   }
1892   ierr = MatTransposeMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr);
1893   PetscFunctionReturn(0);
1894 }
1895 
1896 #undef __FUNCT__
1897 #define __FUNCT__ "MatDestroy_MatMatMult_MPIDense_MPIDense"
1898 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(Mat A)
1899 {
1900   PetscErrorCode   ierr;
1901   Mat_MPIDense     *a = (Mat_MPIDense*)A->data;
1902   Mat_MatMultDense *ab = a->abdense;
1903 
1904   PetscFunctionBegin;
1905   ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr);
1906   ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr);
1907   ierr = MatDestroy(&ab->Be);CHKERRQ(ierr);
1908 
1909   ierr = (ab->destroy)(A);CHKERRQ(ierr);
1910   ierr = PetscFree(ab);CHKERRQ(ierr);
1911   PetscFunctionReturn(0);
1912 }
1913 
1914 #if defined(PETSC_HAVE_ELEMENTAL)
1915 #undef __FUNCT__
1916 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIDense"
1917 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1918 {
1919   PetscErrorCode   ierr;
1920   Mat_MPIDense     *c=(Mat_MPIDense*)C->data;
1921   Mat_MatMultDense *ab=c->abdense;
1922 
1923   PetscFunctionBegin;
1924   ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr);
1925   ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr);
1926   ierr = MatMatMultNumeric(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr);
1927   ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
1928   PetscFunctionReturn(0);
1929 }
1930 
1931 #undef __FUNCT__
1932 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIDense"
1933 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1934 {
1935   PetscErrorCode   ierr;
1936   Mat              Ae,Be,Ce;
1937   Mat_MPIDense     *c;
1938   Mat_MatMultDense *ab;
1939 
1940   PetscFunctionBegin;
1941   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
1942     SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
1943   }
1944 
1945   /* convert A and B to Elemental matrices Ae and Be */
1946   ierr = MatConvert(A,MATELEMENTAL,MAT_INITIAL_MATRIX, &Ae);CHKERRQ(ierr);
1947   ierr = MatConvert(B,MATELEMENTAL,MAT_INITIAL_MATRIX, &Be);CHKERRQ(ierr);
1948 
1949   /* Ce = Ae*Be */
1950   ierr = MatMatMultSymbolic(Ae,Be,fill,&Ce);CHKERRQ(ierr);
1951   ierr = MatMatMultNumeric(Ae,Be,Ce);CHKERRQ(ierr);
1952 
1953   /* convert Ce to C */
1954   ierr = MatConvert(Ce,MATMPIDENSE,MAT_INITIAL_MATRIX,C);CHKERRQ(ierr);
1955 
1956   /* create data structure for reuse Cdense */
1957   ierr = PetscNew(&ab);CHKERRQ(ierr);
1958   c                  = (Mat_MPIDense*)(*C)->data;
1959   c->abdense         = ab;
1960 
1961   ab->Ae             = Ae;
1962   ab->Be             = Be;
1963   ab->Ce             = Ce;
1964   ab->destroy        = (*C)->ops->destroy;
1965   (*C)->ops->destroy        = MatDestroy_MatMatMult_MPIDense_MPIDense;
1966   (*C)->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
1967   PetscFunctionReturn(0);
1968 }
1969 
1970 #undef __FUNCT__
1971 #define __FUNCT__ "MatMatMult_MPIDense_MPIDense"
1972 PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
1973 {
1974   PetscErrorCode ierr;
1975 
1976   PetscFunctionBegin;
1977   if (scall == MAT_INITIAL_MATRIX) { /* simbolic product includes numeric product */
1978     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
1979     ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr);
1980     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
1981   } else {
1982     ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
1983     ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr);
1984     ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
1985   }
1986   PetscFunctionReturn(0);
1987 }
1988 #endif
1989