xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision c094ef4021e955ef5f85f7d8a1bbc6ed64ba7621)
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4 */
5 
6 
7 #include <../src/mat/impls/dense/mpi/mpidense.h>    /*I   "petscmat.h"  I*/
8 #include <../src/mat/impls/aij/mpi/mpiaij.h>
9 #include <petscblaslapack.h>
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatDenseGetLocalMatrix"
13 /*@
14 
15       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
16               matrix that represents the operator. For sequential matrices it returns itself.
17 
18     Input Parameter:
19 .      A - the Seq or MPI dense matrix
20 
21     Output Parameter:
22 .      B - the inner matrix
23 
24     Level: intermediate
25 
26 @*/
27 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
28 {
29   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
30   PetscErrorCode ierr;
31   PetscBool      flg;
32 
33   PetscFunctionBegin;
34   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
35   if (flg) *B = mat->A;
36   else *B = A;
37   PetscFunctionReturn(0);
38 }
39 
40 #undef __FUNCT__
41 #define __FUNCT__ "MatGetRow_MPIDense"
42 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
43 {
44   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
45   PetscErrorCode ierr;
46   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
47 
48   PetscFunctionBegin;
49   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
50   lrow = row - rstart;
51   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
52   PetscFunctionReturn(0);
53 }
54 
55 #undef __FUNCT__
56 #define __FUNCT__ "MatRestoreRow_MPIDense"
57 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
58 {
59   PetscErrorCode ierr;
60 
61   PetscFunctionBegin;
62   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
63   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
64   PetscFunctionReturn(0);
65 }
66 
67 #undef __FUNCT__
68 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense"
69 PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
70 {
71   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
72   PetscErrorCode ierr;
73   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
74   PetscScalar    *array;
75   MPI_Comm       comm;
76   Mat            B;
77 
78   PetscFunctionBegin;
79   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
80 
81   ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr);
82   if (!B) {
83     ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
84     ierr = MatCreate(comm,&B);CHKERRQ(ierr);
85     ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr);
86     ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
87     ierr = MatDenseGetArray(mdn->A,&array);CHKERRQ(ierr);
88     ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr);
89     ierr = MatDenseRestoreArray(mdn->A,&array);CHKERRQ(ierr);
90     ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
91     ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
92     ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr);
93     *a   = B;
94     ierr = MatDestroy(&B);CHKERRQ(ierr);
95   } else *a = B;
96   PetscFunctionReturn(0);
97 }
98 
99 #undef __FUNCT__
100 #define __FUNCT__ "MatSetValues_MPIDense"
101 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
102 {
103   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
104   PetscErrorCode ierr;
105   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
106   PetscBool      roworiented = A->roworiented;
107 
108   PetscFunctionBegin;
109   for (i=0; i<m; i++) {
110     if (idxm[i] < 0) continue;
111     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
112     if (idxm[i] >= rstart && idxm[i] < rend) {
113       row = idxm[i] - rstart;
114       if (roworiented) {
115         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
116       } else {
117         for (j=0; j<n; j++) {
118           if (idxn[j] < 0) continue;
119           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
120           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
121         }
122       }
123     } else if (!A->donotstash) {
124       mat->assembled = PETSC_FALSE;
125       if (roworiented) {
126         ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
127       } else {
128         ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
129       }
130     }
131   }
132   PetscFunctionReturn(0);
133 }
134 
135 #undef __FUNCT__
136 #define __FUNCT__ "MatGetValues_MPIDense"
137 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
138 {
139   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
140   PetscErrorCode ierr;
141   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
142 
143   PetscFunctionBegin;
144   for (i=0; i<m; i++) {
145     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
146     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
147     if (idxm[i] >= rstart && idxm[i] < rend) {
148       row = idxm[i] - rstart;
149       for (j=0; j<n; j++) {
150         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
151         if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
152         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
153       }
154     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
155   }
156   PetscFunctionReturn(0);
157 }
158 
159 #undef __FUNCT__
160 #define __FUNCT__ "MatDenseGetArray_MPIDense"
161 PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[])
162 {
163   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
164   PetscErrorCode ierr;
165 
166   PetscFunctionBegin;
167   ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr);
168   PetscFunctionReturn(0);
169 }
170 
171 #undef __FUNCT__
172 #define __FUNCT__ "MatGetSubMatrix_MPIDense"
173 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
174 {
175   Mat_MPIDense   *mat  = (Mat_MPIDense*)A->data,*newmatd;
176   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
177   PetscErrorCode ierr;
178   PetscInt       i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
179   const PetscInt *irow,*icol;
180   PetscScalar    *av,*bv,*v = lmat->v;
181   Mat            newmat;
182   IS             iscol_local;
183 
184   PetscFunctionBegin;
185   ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
186   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
187   ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr);
188   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
189   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
190   ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */
191 
192   /* No parallel redistribution currently supported! Should really check each index set
193      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
194      original matrix! */
195 
196   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
197   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
198 
199   /* Check submatrix call */
200   if (scall == MAT_REUSE_MATRIX) {
201     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
202     /* Really need to test rows and column sizes! */
203     newmat = *B;
204   } else {
205     /* Create and fill new matrix */
206     ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr);
207     ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
208     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
209     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
210   }
211 
212   /* Now extract the data pointers and do the copy, column at a time */
213   newmatd = (Mat_MPIDense*)newmat->data;
214   bv      = ((Mat_SeqDense*)newmatd->A->data)->v;
215 
216   for (i=0; i<Ncols; i++) {
217     av = v + ((Mat_SeqDense*)mat->A->data)->lda*icol[i];
218     for (j=0; j<nrows; j++) {
219       *bv++ = av[irow[j] - rstart];
220     }
221   }
222 
223   /* Assemble the matrices so that the correct flags are set */
224   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
225   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
226 
227   /* Free work space */
228   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
229   ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr);
230   ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
231   *B   = newmat;
232   PetscFunctionReturn(0);
233 }
234 
235 #undef __FUNCT__
236 #define __FUNCT__ "MatDenseRestoreArray_MPIDense"
237 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[])
238 {
239   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
240   PetscErrorCode ierr;
241 
242   PetscFunctionBegin;
243   ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr);
244   PetscFunctionReturn(0);
245 }
246 
247 #undef __FUNCT__
248 #define __FUNCT__ "MatAssemblyBegin_MPIDense"
249 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
250 {
251   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
252   MPI_Comm       comm;
253   PetscErrorCode ierr;
254   PetscInt       nstash,reallocs;
255   InsertMode     addv;
256 
257   PetscFunctionBegin;
258   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
259   /* make sure all processors are either in INSERTMODE or ADDMODE */
260   ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,comm);CHKERRQ(ierr);
261   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
262   mat->insertmode = addv; /* in case this processor had no cache */
263 
264   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
265   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
266   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
267   PetscFunctionReturn(0);
268 }
269 
270 #undef __FUNCT__
271 #define __FUNCT__ "MatAssemblyEnd_MPIDense"
272 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
273 {
274   Mat_MPIDense   *mdn=(Mat_MPIDense*)mat->data;
275   PetscErrorCode ierr;
276   PetscInt       i,*row,*col,flg,j,rstart,ncols;
277   PetscMPIInt    n;
278   PetscScalar    *val;
279 
280   PetscFunctionBegin;
281   /*  wait on receives */
282   while (1) {
283     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
284     if (!flg) break;
285 
286     for (i=0; i<n;) {
287       /* Now identify the consecutive vals belonging to the same row */
288       for (j=i,rstart=row[j]; j<n; j++) {
289         if (row[j] != rstart) break;
290       }
291       if (j < n) ncols = j-i;
292       else       ncols = n-i;
293       /* Now assemble all these values with a single function call */
294       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
295       i    = j;
296     }
297   }
298   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
299 
300   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
301   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
302 
303   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
304     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
305   }
306   PetscFunctionReturn(0);
307 }
308 
309 #undef __FUNCT__
310 #define __FUNCT__ "MatZeroEntries_MPIDense"
311 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
312 {
313   PetscErrorCode ierr;
314   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
315 
316   PetscFunctionBegin;
317   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
318   PetscFunctionReturn(0);
319 }
320 
321 /* the code does not do the diagonal entries correctly unless the
322    matrix is square and the column and row owerships are identical.
323    This is a BUG. The only way to fix it seems to be to access
324    mdn->A and mdn->B directly and not through the MatZeroRows()
325    routine.
326 */
327 #undef __FUNCT__
328 #define __FUNCT__ "MatZeroRows_MPIDense"
329 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
330 {
331   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
332   PetscErrorCode    ierr;
333   PetscInt          i,*owners = A->rmap->range;
334   PetscInt          *sizes,j,idx,nsends;
335   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
336   PetscInt          *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source;
337   PetscInt          *lens,*lrows,*values;
338   PetscMPIInt       n,imdex,rank = l->rank,size = l->size;
339   MPI_Comm          comm;
340   MPI_Request       *send_waits,*recv_waits;
341   MPI_Status        recv_status,*send_status;
342   PetscBool         found;
343   const PetscScalar *xx;
344   PetscScalar       *bb;
345 
346   PetscFunctionBegin;
347   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
348   if (A->rmap->N != A->cmap->N) SETERRQ(comm,PETSC_ERR_SUP,"Only handles square matrices");
349   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership");
350   /*  first count number of contributors to each processor */
351   ierr = PetscCalloc1(2*size,&sizes);CHKERRQ(ierr);
352   ierr = PetscMalloc1(N+1,&owner);CHKERRQ(ierr);  /* see note*/
353   for (i=0; i<N; i++) {
354     idx   = rows[i];
355     found = PETSC_FALSE;
356     for (j=0; j<size; j++) {
357       if (idx >= owners[j] && idx < owners[j+1]) {
358         sizes[2*j]++; sizes[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
359       }
360     }
361     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
362   }
363   nsends = 0;
364   for (i=0; i<size; i++) nsends += sizes[2*i+1];
365 
366   /* inform other processors of number of messages and max length*/
367   ierr = PetscMaxSum(comm,sizes,&nmax,&nrecvs);CHKERRQ(ierr);
368 
369   /* post receives:   */
370   ierr = PetscMalloc1((nrecvs+1)*(nmax+1),&rvalues);CHKERRQ(ierr);
371   ierr = PetscMalloc1(nrecvs+1,&recv_waits);CHKERRQ(ierr);
372   for (i=0; i<nrecvs; i++) {
373     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
374   }
375 
376   /* do sends:
377       1) starts[i] gives the starting index in svalues for stuff going to
378          the ith processor
379   */
380   ierr = PetscMalloc1(N+1,&svalues);CHKERRQ(ierr);
381   ierr = PetscMalloc1(nsends+1,&send_waits);CHKERRQ(ierr);
382   ierr = PetscMalloc1(size+1,&starts);CHKERRQ(ierr);
383 
384   starts[0] = 0;
385   for (i=1; i<size; i++) starts[i] = starts[i-1] + sizes[2*i-2];
386   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
387 
388   starts[0] = 0;
389   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + sizes[2*i-2];
390   count = 0;
391   for (i=0; i<size; i++) {
392     if (sizes[2*i+1]) {
393       ierr = MPI_Isend(svalues+starts[i],sizes[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
394     }
395   }
396   ierr = PetscFree(starts);CHKERRQ(ierr);
397 
398   base = owners[rank];
399 
400   /*  wait on receives */
401   ierr  = PetscMalloc2(nrecvs,&lens,nrecvs,&source);CHKERRQ(ierr);
402   count = nrecvs;
403   slen  = 0;
404   while (count) {
405     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
406     /* unpack receives into our local space */
407     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
408 
409     source[imdex] = recv_status.MPI_SOURCE;
410     lens[imdex]   = n;
411     slen += n;
412     count--;
413   }
414   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
415 
416   /* move the data into the send scatter */
417   ierr  = PetscMalloc1(slen+1,&lrows);CHKERRQ(ierr);
418   count = 0;
419   for (i=0; i<nrecvs; i++) {
420     values = rvalues + i*nmax;
421     for (j=0; j<lens[i]; j++) {
422       lrows[count++] = values[j] - base;
423     }
424   }
425   ierr = PetscFree(rvalues);CHKERRQ(ierr);
426   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
427   ierr = PetscFree(owner);CHKERRQ(ierr);
428   ierr = PetscFree(sizes);CHKERRQ(ierr);
429 
430   /* fix right hand side if needed */
431   if (x && b) {
432     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
433     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
434     for (i=0; i<slen; i++) {
435       bb[lrows[i]] = diag*xx[lrows[i]];
436     }
437     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
438     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
439   }
440 
441   /* actually zap the local rows */
442   ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
443   if (diag != 0.0) {
444     Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data;
445     PetscInt     m   = ll->lda, i;
446 
447     for (i=0; i<slen; i++) {
448       ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag;
449     }
450   }
451   ierr = PetscFree(lrows);CHKERRQ(ierr);
452 
453   /* wait on sends */
454   if (nsends) {
455     ierr = PetscMalloc1(nsends,&send_status);CHKERRQ(ierr);
456     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
457     ierr = PetscFree(send_status);CHKERRQ(ierr);
458   }
459   ierr = PetscFree(send_waits);CHKERRQ(ierr);
460   ierr = PetscFree(svalues);CHKERRQ(ierr);
461   PetscFunctionReturn(0);
462 }
463 
464 #undef __FUNCT__
465 #define __FUNCT__ "MatMult_MPIDense"
466 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
467 {
468   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
469   PetscErrorCode ierr;
470 
471   PetscFunctionBegin;
472   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
473   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
474   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
475   PetscFunctionReturn(0);
476 }
477 
478 #undef __FUNCT__
479 #define __FUNCT__ "MatMultAdd_MPIDense"
480 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
481 {
482   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
483   PetscErrorCode ierr;
484 
485   PetscFunctionBegin;
486   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
487   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
488   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
489   PetscFunctionReturn(0);
490 }
491 
492 #undef __FUNCT__
493 #define __FUNCT__ "MatMultTranspose_MPIDense"
494 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
495 {
496   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
497   PetscErrorCode ierr;
498   PetscScalar    zero = 0.0;
499 
500   PetscFunctionBegin;
501   ierr = VecSet(yy,zero);CHKERRQ(ierr);
502   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
503   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
504   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
505   PetscFunctionReturn(0);
506 }
507 
508 #undef __FUNCT__
509 #define __FUNCT__ "MatMultTransposeAdd_MPIDense"
510 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
511 {
512   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
513   PetscErrorCode ierr;
514 
515   PetscFunctionBegin;
516   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
517   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
518   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
519   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
520   PetscFunctionReturn(0);
521 }
522 
523 #undef __FUNCT__
524 #define __FUNCT__ "MatGetDiagonal_MPIDense"
525 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
526 {
527   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
528   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
529   PetscErrorCode ierr;
530   PetscInt       len,i,n,m = A->rmap->n,radd;
531   PetscScalar    *x,zero = 0.0;
532 
533   PetscFunctionBegin;
534   ierr = VecSet(v,zero);CHKERRQ(ierr);
535   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
536   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
537   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
538   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
539   radd = A->rmap->rstart*m;
540   for (i=0; i<len; i++) {
541     x[i] = aloc->v[radd + i*m + i];
542   }
543   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
544   PetscFunctionReturn(0);
545 }
546 
547 #undef __FUNCT__
548 #define __FUNCT__ "MatDestroy_MPIDense"
549 PetscErrorCode MatDestroy_MPIDense(Mat mat)
550 {
551   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
552   PetscErrorCode ierr;
553 
554   PetscFunctionBegin;
555 #if defined(PETSC_USE_LOG)
556   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
557 #endif
558   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
559   ierr = MatDestroy(&mdn->A);CHKERRQ(ierr);
560   ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr);
561   ierr = VecScatterDestroy(&mdn->Mvctx);CHKERRQ(ierr);
562 
563   ierr = PetscFree(mat->data);CHKERRQ(ierr);
564   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
565 
566   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr);
567   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr);
568 #if defined(PETSC_HAVE_ELEMENTAL)
569   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr);
570 #endif
571   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
572   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr);
573   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
574   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
575   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
576   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
577   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
578   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
579   PetscFunctionReturn(0);
580 }
581 
582 #undef __FUNCT__
583 #define __FUNCT__ "MatView_MPIDense_Binary"
584 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
585 {
586   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
587   PetscErrorCode    ierr;
588   PetscViewerFormat format;
589   int               fd;
590   PetscInt          header[4],mmax,N = mat->cmap->N,i,j,m,k;
591   PetscMPIInt       rank,tag  = ((PetscObject)viewer)->tag,size;
592   PetscScalar       *work,*v,*vv;
593   Mat_SeqDense      *a = (Mat_SeqDense*)mdn->A->data;
594 
595   PetscFunctionBegin;
596   if (mdn->size == 1) {
597     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
598   } else {
599     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
600     ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
601     ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
602 
603     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
604     if (format == PETSC_VIEWER_NATIVE) {
605 
606       if (!rank) {
607         /* store the matrix as a dense matrix */
608         header[0] = MAT_FILE_CLASSID;
609         header[1] = mat->rmap->N;
610         header[2] = N;
611         header[3] = MATRIX_BINARY_FORMAT_DENSE;
612         ierr      = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
613 
614         /* get largest work array needed for transposing array */
615         mmax = mat->rmap->n;
616         for (i=1; i<size; i++) {
617           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
618         }
619         ierr = PetscMalloc1(mmax*N,&work);CHKERRQ(ierr);
620 
621         /* write out local array, by rows */
622         m = mat->rmap->n;
623         v = a->v;
624         for (j=0; j<N; j++) {
625           for (i=0; i<m; i++) {
626             work[j + i*N] = *v++;
627           }
628         }
629         ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
630         /* get largest work array to receive messages from other processes, excludes process zero */
631         mmax = 0;
632         for (i=1; i<size; i++) {
633           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
634         }
635         ierr = PetscMalloc1(mmax*N,&vv);CHKERRQ(ierr);
636         for (k = 1; k < size; k++) {
637           v    = vv;
638           m    = mat->rmap->range[k+1] - mat->rmap->range[k];
639           ierr = MPIULong_Recv(v,m*N,MPIU_SCALAR,k,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
640 
641           for (j = 0; j < N; j++) {
642             for (i = 0; i < m; i++) {
643               work[j + i*N] = *v++;
644             }
645           }
646           ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
647         }
648         ierr = PetscFree(work);CHKERRQ(ierr);
649         ierr = PetscFree(vv);CHKERRQ(ierr);
650       } else {
651         ierr = MPIULong_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
652       }
653     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerSetFormat(viewer,PETSC_VIEWER_NATIVE)");
654   }
655   PetscFunctionReturn(0);
656 }
657 
658 extern PetscErrorCode MatView_SeqDense(Mat,PetscViewer);
659 #include <petscdraw.h>
660 #undef __FUNCT__
661 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket"
662 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
663 {
664   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
665   PetscErrorCode    ierr;
666   PetscMPIInt       rank = mdn->rank;
667   PetscViewerType   vtype;
668   PetscBool         iascii,isdraw;
669   PetscViewer       sviewer;
670   PetscViewerFormat format;
671 
672   PetscFunctionBegin;
673   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
674   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
675   if (iascii) {
676     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
677     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
678     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
679       MatInfo info;
680       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
681       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
682       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
683       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
684       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
685       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
686       PetscFunctionReturn(0);
687     } else if (format == PETSC_VIEWER_ASCII_INFO) {
688       PetscFunctionReturn(0);
689     }
690   } else if (isdraw) {
691     PetscDraw draw;
692     PetscBool isnull;
693 
694     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
695     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
696     if (isnull) PetscFunctionReturn(0);
697   }
698 
699   {
700     /* assemble the entire matrix onto first processor. */
701     Mat         A;
702     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
703     PetscInt    *cols;
704     PetscScalar *vals;
705 
706     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
707     if (!rank) {
708       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
709     } else {
710       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
711     }
712     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
713     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
714     ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr);
715     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
716 
717     /* Copy the matrix ... This isn't the most efficient means,
718        but it's quick for now */
719     A->insertmode = INSERT_VALUES;
720 
721     row = mat->rmap->rstart;
722     m   = mdn->A->rmap->n;
723     for (i=0; i<m; i++) {
724       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
725       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
726       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
727       row++;
728     }
729 
730     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
731     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
732     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
733     if (!rank) {
734         ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
735     }
736     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
737     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
738     ierr = MatDestroy(&A);CHKERRQ(ierr);
739   }
740   PetscFunctionReturn(0);
741 }
742 
743 #undef __FUNCT__
744 #define __FUNCT__ "MatView_MPIDense"
745 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
746 {
747   PetscErrorCode ierr;
748   PetscBool      iascii,isbinary,isdraw,issocket;
749 
750   PetscFunctionBegin;
751   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
752   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
753   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
754   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
755 
756   if (iascii || issocket || isdraw) {
757     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
758   } else if (isbinary) {
759     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
760   }
761   PetscFunctionReturn(0);
762 }
763 
764 #undef __FUNCT__
765 #define __FUNCT__ "MatGetInfo_MPIDense"
766 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
767 {
768   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
769   Mat            mdn  = mat->A;
770   PetscErrorCode ierr;
771   PetscReal      isend[5],irecv[5];
772 
773   PetscFunctionBegin;
774   info->block_size = 1.0;
775 
776   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
777 
778   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
779   isend[3] = info->memory;  isend[4] = info->mallocs;
780   if (flag == MAT_LOCAL) {
781     info->nz_used      = isend[0];
782     info->nz_allocated = isend[1];
783     info->nz_unneeded  = isend[2];
784     info->memory       = isend[3];
785     info->mallocs      = isend[4];
786   } else if (flag == MAT_GLOBAL_MAX) {
787     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
788 
789     info->nz_used      = irecv[0];
790     info->nz_allocated = irecv[1];
791     info->nz_unneeded  = irecv[2];
792     info->memory       = irecv[3];
793     info->mallocs      = irecv[4];
794   } else if (flag == MAT_GLOBAL_SUM) {
795     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
796 
797     info->nz_used      = irecv[0];
798     info->nz_allocated = irecv[1];
799     info->nz_unneeded  = irecv[2];
800     info->memory       = irecv[3];
801     info->mallocs      = irecv[4];
802   }
803   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
804   info->fill_ratio_needed = 0;
805   info->factor_mallocs    = 0;
806   PetscFunctionReturn(0);
807 }
808 
809 #undef __FUNCT__
810 #define __FUNCT__ "MatSetOption_MPIDense"
811 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
812 {
813   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
814   PetscErrorCode ierr;
815 
816   PetscFunctionBegin;
817   switch (op) {
818   case MAT_NEW_NONZERO_LOCATIONS:
819   case MAT_NEW_NONZERO_LOCATION_ERR:
820   case MAT_NEW_NONZERO_ALLOCATION_ERR:
821     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
822     break;
823   case MAT_ROW_ORIENTED:
824     a->roworiented = flg;
825     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
826     break;
827   case MAT_NEW_DIAGONALS:
828   case MAT_KEEP_NONZERO_PATTERN:
829   case MAT_USE_HASH_TABLE:
830     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
831     break;
832   case MAT_IGNORE_OFF_PROC_ENTRIES:
833     a->donotstash = flg;
834     break;
835   case MAT_SYMMETRIC:
836   case MAT_STRUCTURALLY_SYMMETRIC:
837   case MAT_HERMITIAN:
838   case MAT_SYMMETRY_ETERNAL:
839   case MAT_IGNORE_LOWER_TRIANGULAR:
840     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
841     break;
842   default:
843     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
844   }
845   PetscFunctionReturn(0);
846 }
847 
848 
849 #undef __FUNCT__
850 #define __FUNCT__ "MatDiagonalScale_MPIDense"
851 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
852 {
853   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
854   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
855   PetscScalar    *l,*r,x,*v;
856   PetscErrorCode ierr;
857   PetscInt       i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n;
858 
859   PetscFunctionBegin;
860   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
861   if (ll) {
862     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
863     if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
864     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
865     for (i=0; i<m; i++) {
866       x = l[i];
867       v = mat->v + i;
868       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
869     }
870     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
871     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
872   }
873   if (rr) {
874     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
875     if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
876     ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
877     ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
878     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
879     for (i=0; i<n; i++) {
880       x = r[i];
881       v = mat->v + i*m;
882       for (j=0; j<m; j++) (*v++) *= x;
883     }
884     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
885     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
886   }
887   PetscFunctionReturn(0);
888 }
889 
890 #undef __FUNCT__
891 #define __FUNCT__ "MatNorm_MPIDense"
892 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
893 {
894   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
895   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
896   PetscErrorCode ierr;
897   PetscInt       i,j;
898   PetscReal      sum = 0.0;
899   PetscScalar    *v  = mat->v;
900 
901   PetscFunctionBegin;
902   if (mdn->size == 1) {
903     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
904   } else {
905     if (type == NORM_FROBENIUS) {
906       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
907         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
908       }
909       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
910       *nrm = PetscSqrtReal(*nrm);
911       ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr);
912     } else if (type == NORM_1) {
913       PetscReal *tmp,*tmp2;
914       ierr = PetscMalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr);
915       ierr = PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
916       ierr = PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
917       *nrm = 0.0;
918       v    = mat->v;
919       for (j=0; j<mdn->A->cmap->n; j++) {
920         for (i=0; i<mdn->A->rmap->n; i++) {
921           tmp[j] += PetscAbsScalar(*v);  v++;
922         }
923       }
924       ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
925       for (j=0; j<A->cmap->N; j++) {
926         if (tmp2[j] > *nrm) *nrm = tmp2[j];
927       }
928       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
929       ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr);
930     } else if (type == NORM_INFINITY) { /* max row norm */
931       PetscReal ntemp;
932       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
933       ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
934     } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
935   }
936   PetscFunctionReturn(0);
937 }
938 
939 #undef __FUNCT__
940 #define __FUNCT__ "MatTranspose_MPIDense"
941 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
942 {
943   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
944   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
945   Mat            B;
946   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
947   PetscErrorCode ierr;
948   PetscInt       j,i;
949   PetscScalar    *v;
950 
951   PetscFunctionBegin;
952   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports square matrix only in-place");
953   if (reuse == MAT_INITIAL_MATRIX || A == *matout) {
954     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
955     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
956     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
957     ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr);
958   } else {
959     B = *matout;
960   }
961 
962   m    = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v;
963   ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr);
964   for (i=0; i<m; i++) rwork[i] = rstart + i;
965   for (j=0; j<n; j++) {
966     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
967     v   += m;
968   }
969   ierr = PetscFree(rwork);CHKERRQ(ierr);
970   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
971   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
972   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
973     *matout = B;
974   } else {
975     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
976   }
977   PetscFunctionReturn(0);
978 }
979 
980 
981 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
982 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
983 
984 #undef __FUNCT__
985 #define __FUNCT__ "MatSetUp_MPIDense"
986 PetscErrorCode MatSetUp_MPIDense(Mat A)
987 {
988   PetscErrorCode ierr;
989 
990   PetscFunctionBegin;
991   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
992   PetscFunctionReturn(0);
993 }
994 
995 #undef __FUNCT__
996 #define __FUNCT__ "MatAXPY_MPIDense"
997 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
998 {
999   PetscErrorCode ierr;
1000   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
1001 
1002   PetscFunctionBegin;
1003   ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr);
1004   ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1005   PetscFunctionReturn(0);
1006 }
1007 
1008 #undef __FUNCT__
1009 #define __FUNCT__ "MatConjugate_MPIDense"
1010 PetscErrorCode  MatConjugate_MPIDense(Mat mat)
1011 {
1012   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
1013   PetscErrorCode ierr;
1014 
1015   PetscFunctionBegin;
1016   ierr = MatConjugate(a->A);CHKERRQ(ierr);
1017   PetscFunctionReturn(0);
1018 }
1019 
1020 #undef __FUNCT__
1021 #define __FUNCT__ "MatRealPart_MPIDense"
1022 PetscErrorCode MatRealPart_MPIDense(Mat A)
1023 {
1024   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1025   PetscErrorCode ierr;
1026 
1027   PetscFunctionBegin;
1028   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1029   PetscFunctionReturn(0);
1030 }
1031 
1032 #undef __FUNCT__
1033 #define __FUNCT__ "MatImaginaryPart_MPIDense"
1034 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1035 {
1036   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1037   PetscErrorCode ierr;
1038 
1039   PetscFunctionBegin;
1040   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1041   PetscFunctionReturn(0);
1042 }
1043 
1044 extern PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*);
1045 #undef __FUNCT__
1046 #define __FUNCT__ "MatGetColumnNorms_MPIDense"
1047 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms)
1048 {
1049   PetscErrorCode ierr;
1050   PetscInt       i,n;
1051   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
1052   PetscReal      *work;
1053 
1054   PetscFunctionBegin;
1055   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
1056   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
1057   ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr);
1058   if (type == NORM_2) {
1059     for (i=0; i<n; i++) work[i] *= work[i];
1060   }
1061   if (type == NORM_INFINITY) {
1062     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
1063   } else {
1064     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
1065   }
1066   ierr = PetscFree(work);CHKERRQ(ierr);
1067   if (type == NORM_2) {
1068     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
1069   }
1070   PetscFunctionReturn(0);
1071 }
1072 
1073 #undef __FUNCT__
1074 #define __FUNCT__ "MatSetRandom_MPIDense"
1075 static PetscErrorCode  MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1076 {
1077   Mat_MPIDense   *d = (Mat_MPIDense*)x->data;
1078   PetscErrorCode ierr;
1079   PetscScalar    *a;
1080   PetscInt       m,n,i;
1081 
1082   PetscFunctionBegin;
1083   ierr = MatGetSize(d->A,&m,&n);CHKERRQ(ierr);
1084   ierr = MatDenseGetArray(d->A,&a);CHKERRQ(ierr);
1085   for (i=0; i<m*n; i++) {
1086     ierr = PetscRandomGetValue(rctx,a+i);CHKERRQ(ierr);
1087   }
1088   ierr = MatDenseRestoreArray(d->A,&a);CHKERRQ(ierr);
1089   PetscFunctionReturn(0);
1090 }
1091 
1092 extern PetscErrorCode MatMatMultNumeric_MPIDense(Mat A,Mat,Mat);
1093 
1094 #undef __FUNCT__
1095 #define __FUNCT__ "MatMissingDiagonal_MPIDense"
1096 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool  *missing,PetscInt *d)
1097 {
1098   PetscFunctionBegin;
1099   *missing = PETSC_FALSE;
1100   PetscFunctionReturn(0);
1101 }
1102 
1103 /* -------------------------------------------------------------------*/
1104 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1105                                         MatGetRow_MPIDense,
1106                                         MatRestoreRow_MPIDense,
1107                                         MatMult_MPIDense,
1108                                 /*  4*/ MatMultAdd_MPIDense,
1109                                         MatMultTranspose_MPIDense,
1110                                         MatMultTransposeAdd_MPIDense,
1111                                         0,
1112                                         0,
1113                                         0,
1114                                 /* 10*/ 0,
1115                                         0,
1116                                         0,
1117                                         0,
1118                                         MatTranspose_MPIDense,
1119                                 /* 15*/ MatGetInfo_MPIDense,
1120                                         MatEqual_MPIDense,
1121                                         MatGetDiagonal_MPIDense,
1122                                         MatDiagonalScale_MPIDense,
1123                                         MatNorm_MPIDense,
1124                                 /* 20*/ MatAssemblyBegin_MPIDense,
1125                                         MatAssemblyEnd_MPIDense,
1126                                         MatSetOption_MPIDense,
1127                                         MatZeroEntries_MPIDense,
1128                                 /* 24*/ MatZeroRows_MPIDense,
1129                                         0,
1130                                         0,
1131                                         0,
1132                                         0,
1133                                 /* 29*/ MatSetUp_MPIDense,
1134                                         0,
1135                                         0,
1136                                         0,
1137                                         0,
1138                                 /* 34*/ MatDuplicate_MPIDense,
1139                                         0,
1140                                         0,
1141                                         0,
1142                                         0,
1143                                 /* 39*/ MatAXPY_MPIDense,
1144                                         MatGetSubMatrices_MPIDense,
1145                                         0,
1146                                         MatGetValues_MPIDense,
1147                                         0,
1148                                 /* 44*/ 0,
1149                                         MatScale_MPIDense,
1150                                         MatShift_Basic,
1151                                         0,
1152                                         0,
1153                                 /* 49*/ MatSetRandom_MPIDense,
1154                                         0,
1155                                         0,
1156                                         0,
1157                                         0,
1158                                 /* 54*/ 0,
1159                                         0,
1160                                         0,
1161                                         0,
1162                                         0,
1163                                 /* 59*/ MatGetSubMatrix_MPIDense,
1164                                         MatDestroy_MPIDense,
1165                                         MatView_MPIDense,
1166                                         0,
1167                                         0,
1168                                 /* 64*/ 0,
1169                                         0,
1170                                         0,
1171                                         0,
1172                                         0,
1173                                 /* 69*/ 0,
1174                                         0,
1175                                         0,
1176                                         0,
1177                                         0,
1178                                 /* 74*/ 0,
1179                                         0,
1180                                         0,
1181                                         0,
1182                                         0,
1183                                 /* 79*/ 0,
1184                                         0,
1185                                         0,
1186                                         0,
1187                                 /* 83*/ MatLoad_MPIDense,
1188                                         0,
1189                                         0,
1190                                         0,
1191                                         0,
1192                                         0,
1193 #if defined(PETSC_HAVE_ELEMENTAL)
1194                                 /* 89*/ MatMatMult_MPIDense_MPIDense,
1195                                         MatMatMultSymbolic_MPIDense_MPIDense,
1196 #else
1197                                 /* 89*/ 0,
1198                                         0,
1199 #endif
1200                                         MatMatMultNumeric_MPIDense,
1201                                         0,
1202                                         0,
1203                                 /* 94*/ 0,
1204                                         0,
1205                                         0,
1206                                         0,
1207                                         0,
1208                                 /* 99*/ 0,
1209                                         0,
1210                                         0,
1211                                         MatConjugate_MPIDense,
1212                                         0,
1213                                 /*104*/ 0,
1214                                         MatRealPart_MPIDense,
1215                                         MatImaginaryPart_MPIDense,
1216                                         0,
1217                                         0,
1218                                 /*109*/ 0,
1219                                         0,
1220                                         0,
1221                                         0,
1222                                         MatMissingDiagonal_MPIDense,
1223                                 /*114*/ 0,
1224                                         0,
1225                                         0,
1226                                         0,
1227                                         0,
1228                                 /*119*/ 0,
1229                                         0,
1230                                         0,
1231                                         0,
1232                                         0,
1233                                 /*124*/ 0,
1234                                         MatGetColumnNorms_MPIDense,
1235                                         0,
1236                                         0,
1237                                         0,
1238                                 /*129*/ 0,
1239                                         MatTransposeMatMult_MPIDense_MPIDense,
1240                                         MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1241                                         MatTransposeMatMultNumeric_MPIDense_MPIDense,
1242                                         0,
1243                                 /*134*/ 0,
1244                                         0,
1245                                         0,
1246                                         0,
1247                                         0,
1248                                 /*139*/ 0,
1249                                         0,
1250                                         0
1251 };
1252 
1253 #undef __FUNCT__
1254 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense"
1255 PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1256 {
1257   Mat_MPIDense   *a;
1258   PetscErrorCode ierr;
1259 
1260   PetscFunctionBegin;
1261   mat->preallocated = PETSC_TRUE;
1262   /* Note:  For now, when data is specified above, this assumes the user correctly
1263    allocates the local dense storage space.  We should add error checking. */
1264 
1265   a       = (Mat_MPIDense*)mat->data;
1266   ierr    = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
1267   ierr    = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
1268   a->nvec = mat->cmap->n;
1269 
1270   ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1271   ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr);
1272   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
1273   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1274   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1275   PetscFunctionReturn(0);
1276 }
1277 
1278 #if defined(PETSC_HAVE_ELEMENTAL)
1279 #undef __FUNCT__
1280 #define __FUNCT__ "MatConvert_MPIDense_Elemental"
1281 PETSC_EXTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1282 {
1283   Mat            mat_elemental;
1284   PetscErrorCode ierr;
1285   PetscScalar    *v;
1286   PetscInt       m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols;
1287 
1288   PetscFunctionBegin;
1289   if (reuse == MAT_REUSE_MATRIX) {
1290     mat_elemental = *newmat;
1291     ierr = MatZeroEntries(*newmat);CHKERRQ(ierr);
1292   } else {
1293     ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr);
1294     ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1295     ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr);
1296     ierr = MatSetUp(mat_elemental);CHKERRQ(ierr);
1297     ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
1298   }
1299 
1300   ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr);
1301   for (i=0; i<N; i++) cols[i] = i;
1302   for (i=0; i<m; i++) rows[i] = rstart + i;
1303 
1304   /* PETSc-Elemental interaface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1305   ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr);
1306   ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr);
1307   ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1308   ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1309   ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr);
1310   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1311 
1312   if (reuse == MAT_INPLACE_MATRIX) {
1313     ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr);
1314   } else {
1315     *newmat = mat_elemental;
1316   }
1317   PetscFunctionReturn(0);
1318 }
1319 #endif
1320 
1321 #undef __FUNCT__
1322 #define __FUNCT__ "MatCreate_MPIDense"
1323 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1324 {
1325   Mat_MPIDense   *a;
1326   PetscErrorCode ierr;
1327 
1328   PetscFunctionBegin;
1329   ierr      = PetscNewLog(mat,&a);CHKERRQ(ierr);
1330   mat->data = (void*)a;
1331   ierr      = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1332 
1333   mat->insertmode = NOT_SET_VALUES;
1334   ierr            = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);CHKERRQ(ierr);
1335   ierr            = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);CHKERRQ(ierr);
1336 
1337   /* build cache for off array entries formed */
1338   a->donotstash = PETSC_FALSE;
1339 
1340   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr);
1341 
1342   /* stuff used for matrix vector multiply */
1343   a->lvec        = 0;
1344   a->Mvctx       = 0;
1345   a->roworiented = PETSC_TRUE;
1346 
1347   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr);
1348   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr);
1349 #if defined(PETSC_HAVE_ELEMENTAL)
1350   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr);
1351 #endif
1352   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr);
1353   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1354   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1355   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1356   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1357 
1358   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",MatTransposeMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1359   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",MatTransposeMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1360   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",MatTransposeMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1361   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1362   PetscFunctionReturn(0);
1363 }
1364 
1365 /*MC
1366    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1367 
1368    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1369    and MATMPIDENSE otherwise.
1370 
1371    Options Database Keys:
1372 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1373 
1374   Level: beginner
1375 
1376 
1377 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1378 M*/
1379 
1380 #undef __FUNCT__
1381 #define __FUNCT__ "MatMPIDenseSetPreallocation"
1382 /*@C
1383    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1384 
1385    Not collective
1386 
1387    Input Parameters:
1388 .  B - the matrix
1389 -  data - optional location of matrix data.  Set data=NULL for PETSc
1390    to control all matrix memory allocation.
1391 
1392    Notes:
1393    The dense format is fully compatible with standard Fortran 77
1394    storage by columns.
1395 
1396    The data input variable is intended primarily for Fortran programmers
1397    who wish to allocate their own matrix memory space.  Most users should
1398    set data=NULL.
1399 
1400    Level: intermediate
1401 
1402 .keywords: matrix,dense, parallel
1403 
1404 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1405 @*/
1406 PetscErrorCode  MatMPIDenseSetPreallocation(Mat B,PetscScalar *data)
1407 {
1408   PetscErrorCode ierr;
1409 
1410   PetscFunctionBegin;
1411   ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr);
1412   PetscFunctionReturn(0);
1413 }
1414 
1415 #undef __FUNCT__
1416 #define __FUNCT__ "MatCreateDense"
1417 /*@C
1418    MatCreateDense - Creates a parallel matrix in dense format.
1419 
1420    Collective on MPI_Comm
1421 
1422    Input Parameters:
1423 +  comm - MPI communicator
1424 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1425 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1426 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1427 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1428 -  data - optional location of matrix data.  Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1429    to control all matrix memory allocation.
1430 
1431    Output Parameter:
1432 .  A - the matrix
1433 
1434    Notes:
1435    The dense format is fully compatible with standard Fortran 77
1436    storage by columns.
1437 
1438    The data input variable is intended primarily for Fortran programmers
1439    who wish to allocate their own matrix memory space.  Most users should
1440    set data=NULL (PETSC_NULL_SCALAR for Fortran users).
1441 
1442    The user MUST specify either the local or global matrix dimensions
1443    (possibly both).
1444 
1445    Level: intermediate
1446 
1447 .keywords: matrix,dense, parallel
1448 
1449 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1450 @*/
1451 PetscErrorCode  MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1452 {
1453   PetscErrorCode ierr;
1454   PetscMPIInt    size;
1455 
1456   PetscFunctionBegin;
1457   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1458   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1459   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1460   if (size > 1) {
1461     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1462     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1463     if (data) {  /* user provided data array, so no need to assemble */
1464       ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr);
1465       (*A)->assembled = PETSC_TRUE;
1466     }
1467   } else {
1468     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1469     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1470   }
1471   PetscFunctionReturn(0);
1472 }
1473 
1474 #undef __FUNCT__
1475 #define __FUNCT__ "MatDuplicate_MPIDense"
1476 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1477 {
1478   Mat            mat;
1479   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
1480   PetscErrorCode ierr;
1481 
1482   PetscFunctionBegin;
1483   *newmat = 0;
1484   ierr    = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr);
1485   ierr    = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1486   ierr    = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1487   a       = (Mat_MPIDense*)mat->data;
1488   ierr    = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1489 
1490   mat->factortype   = A->factortype;
1491   mat->assembled    = PETSC_TRUE;
1492   mat->preallocated = PETSC_TRUE;
1493 
1494   a->size         = oldmat->size;
1495   a->rank         = oldmat->rank;
1496   mat->insertmode = NOT_SET_VALUES;
1497   a->nvec         = oldmat->nvec;
1498   a->donotstash   = oldmat->donotstash;
1499 
1500   ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr);
1501   ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr);
1502 
1503   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1504   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1505   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1506 
1507   *newmat = mat;
1508   PetscFunctionReturn(0);
1509 }
1510 
1511 #undef __FUNCT__
1512 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile"
1513 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat)
1514 {
1515   PetscErrorCode ierr;
1516   PetscMPIInt    rank,size;
1517   const PetscInt *rowners;
1518   PetscInt       i,m,n,nz,j,mMax;
1519   PetscScalar    *array,*vals,*vals_ptr;
1520   Mat_MPIDense   *a = (Mat_MPIDense*)newmat->data;
1521 
1522   PetscFunctionBegin;
1523   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1524   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1525 
1526   /* determine ownership of rows and columns */
1527   m = (newmat->rmap->n < 0) ? PETSC_DECIDE : newmat->rmap->n;
1528   n = (newmat->cmap->n < 0) ? PETSC_DECIDE : newmat->cmap->n;
1529 
1530   ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr);
1531   if (!a->A || !((Mat_SeqDense*)(a->A->data))->user_alloc) {
1532     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
1533   }
1534   ierr = MatDenseGetArray(newmat,&array);CHKERRQ(ierr);
1535   ierr = MatGetLocalSize(newmat,&m,NULL);CHKERRQ(ierr);
1536   ierr = MatGetOwnershipRanges(newmat,&rowners);CHKERRQ(ierr);
1537   ierr = MPI_Reduce(&m,&mMax,1,MPIU_INT,MPI_MAX,0,comm);CHKERRQ(ierr);
1538   if (!rank) {
1539     ierr = PetscMalloc1(mMax*N,&vals);CHKERRQ(ierr);
1540 
1541     /* read in my part of the matrix numerical values  */
1542     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1543 
1544     /* insert into matrix-by row (this is why cannot directly read into array */
1545     vals_ptr = vals;
1546     for (i=0; i<m; i++) {
1547       for (j=0; j<N; j++) {
1548         array[i + j*m] = *vals_ptr++;
1549       }
1550     }
1551 
1552     /* read in other processors and ship out */
1553     for (i=1; i<size; i++) {
1554       nz   = (rowners[i+1] - rowners[i])*N;
1555       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1556       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr);
1557     }
1558   } else {
1559     /* receive numeric values */
1560     ierr = PetscMalloc1(m*N,&vals);CHKERRQ(ierr);
1561 
1562     /* receive message of values*/
1563     ierr = MPIULong_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr);
1564 
1565     /* insert into matrix-by row (this is why cannot directly read into array */
1566     vals_ptr = vals;
1567     for (i=0; i<m; i++) {
1568       for (j=0; j<N; j++) {
1569         array[i + j*m] = *vals_ptr++;
1570       }
1571     }
1572   }
1573   ierr = MatDenseRestoreArray(newmat,&array);CHKERRQ(ierr);
1574   ierr = PetscFree(vals);CHKERRQ(ierr);
1575   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1576   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1577   PetscFunctionReturn(0);
1578 }
1579 
1580 #undef __FUNCT__
1581 #define __FUNCT__ "MatLoad_MPIDense"
1582 PetscErrorCode MatLoad_MPIDense(Mat newmat,PetscViewer viewer)
1583 {
1584   Mat_MPIDense   *a;
1585   PetscScalar    *vals,*svals;
1586   MPI_Comm       comm;
1587   MPI_Status     status;
1588   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,n,maxnz;
1589   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1590   PetscInt       *ourlens,*procsnz = 0,jj,*mycols,*smycols;
1591   PetscInt       i,nz,j,rstart,rend;
1592   int            fd;
1593   PetscErrorCode ierr;
1594 
1595   PetscFunctionBegin;
1596   /* force binary viewer to load .info file if it has not yet done so */
1597   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1598   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
1599   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1600   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1601   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1602   if (!rank) {
1603     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
1604     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1605   }
1606   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1607   M    = header[1]; N = header[2]; nz = header[3];
1608 
1609   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
1610   if (newmat->rmap->N < 0) newmat->rmap->N = M;
1611   if (newmat->cmap->N < 0) newmat->cmap->N = N;
1612 
1613   if (newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",M,newmat->rmap->N);
1614   if (newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",N,newmat->cmap->N);
1615 
1616   /*
1617        Handle case where matrix is stored on disk as a dense matrix
1618   */
1619   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1620     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr);
1621     PetscFunctionReturn(0);
1622   }
1623 
1624   /* determine ownership of all rows */
1625   if (newmat->rmap->n < 0) {
1626     ierr = PetscMPIIntCast(M/size + ((M % size) > rank),&m);CHKERRQ(ierr);
1627   } else {
1628     ierr = PetscMPIIntCast(newmat->rmap->n,&m);CHKERRQ(ierr);
1629   }
1630   if (newmat->cmap->n < 0) {
1631     n = PETSC_DECIDE;
1632   } else {
1633     ierr = PetscMPIIntCast(newmat->cmap->n,&n);CHKERRQ(ierr);
1634   }
1635 
1636   ierr       = PetscMalloc1(size+2,&rowners);CHKERRQ(ierr);
1637   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1638   rowners[0] = 0;
1639   for (i=2; i<=size; i++) {
1640     rowners[i] += rowners[i-1];
1641   }
1642   rstart = rowners[rank];
1643   rend   = rowners[rank+1];
1644 
1645   /* distribute row lengths to all processors */
1646   ierr = PetscMalloc1(rend-rstart,&ourlens);CHKERRQ(ierr);
1647   if (!rank) {
1648     ierr = PetscMalloc1(M,&rowlengths);CHKERRQ(ierr);
1649     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1650     ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr);
1651     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1652     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1653     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1654   } else {
1655     ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1656   }
1657 
1658   if (!rank) {
1659     /* calculate the number of nonzeros on each processor */
1660     ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr);
1661     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1662     for (i=0; i<size; i++) {
1663       for (j=rowners[i]; j< rowners[i+1]; j++) {
1664         procsnz[i] += rowlengths[j];
1665       }
1666     }
1667     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1668 
1669     /* determine max buffer needed and allocate it */
1670     maxnz = 0;
1671     for (i=0; i<size; i++) {
1672       maxnz = PetscMax(maxnz,procsnz[i]);
1673     }
1674     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
1675 
1676     /* read in my part of the matrix column indices  */
1677     nz   = procsnz[0];
1678     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
1679     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1680 
1681     /* read in every one elses and ship off */
1682     for (i=1; i<size; i++) {
1683       nz   = procsnz[i];
1684       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1685       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1686     }
1687     ierr = PetscFree(cols);CHKERRQ(ierr);
1688   } else {
1689     /* determine buffer space needed for message */
1690     nz = 0;
1691     for (i=0; i<m; i++) {
1692       nz += ourlens[i];
1693     }
1694     ierr = PetscMalloc1(nz+1,&mycols);CHKERRQ(ierr);
1695 
1696     /* receive message of column indices*/
1697     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1698     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1699     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1700   }
1701 
1702   ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr);
1703   a = (Mat_MPIDense*)newmat->data;
1704   if (!a->A || !((Mat_SeqDense*)(a->A->data))->user_alloc) {
1705     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
1706   }
1707 
1708   if (!rank) {
1709     ierr = PetscMalloc1(maxnz,&vals);CHKERRQ(ierr);
1710 
1711     /* read in my part of the matrix numerical values  */
1712     nz   = procsnz[0];
1713     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1714 
1715     /* insert into matrix */
1716     jj      = rstart;
1717     smycols = mycols;
1718     svals   = vals;
1719     for (i=0; i<m; i++) {
1720       ierr     = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1721       smycols += ourlens[i];
1722       svals   += ourlens[i];
1723       jj++;
1724     }
1725 
1726     /* read in other processors and ship out */
1727     for (i=1; i<size; i++) {
1728       nz   = procsnz[i];
1729       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1730       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
1731     }
1732     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1733   } else {
1734     /* receive numeric values */
1735     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
1736 
1737     /* receive message of values*/
1738     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
1739     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1740     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1741 
1742     /* insert into matrix */
1743     jj      = rstart;
1744     smycols = mycols;
1745     svals   = vals;
1746     for (i=0; i<m; i++) {
1747       ierr     = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1748       smycols += ourlens[i];
1749       svals   += ourlens[i];
1750       jj++;
1751     }
1752   }
1753   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1754   ierr = PetscFree(vals);CHKERRQ(ierr);
1755   ierr = PetscFree(mycols);CHKERRQ(ierr);
1756   ierr = PetscFree(rowners);CHKERRQ(ierr);
1757 
1758   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1759   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1760   PetscFunctionReturn(0);
1761 }
1762 
1763 #undef __FUNCT__
1764 #define __FUNCT__ "MatEqual_MPIDense"
1765 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool  *flag)
1766 {
1767   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
1768   Mat            a,b;
1769   PetscBool      flg;
1770   PetscErrorCode ierr;
1771 
1772   PetscFunctionBegin;
1773   a    = matA->A;
1774   b    = matB->A;
1775   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
1776   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1777   PetscFunctionReturn(0);
1778 }
1779 
1780 #undef __FUNCT__
1781 #define __FUNCT__ "MatDestroy_MatTransMatMult_MPIDense_MPIDense"
1782 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(Mat A)
1783 {
1784   PetscErrorCode        ierr;
1785   Mat_MPIDense          *a = (Mat_MPIDense*)A->data;
1786   Mat_TransMatMultDense *atb = a->atbdense;
1787 
1788   PetscFunctionBegin;
1789   ierr = PetscFree3(atb->sendbuf,atb->atbarray,atb->recvcounts);CHKERRQ(ierr);
1790   ierr = (atb->destroy)(A);CHKERRQ(ierr);
1791   ierr = PetscFree(atb);CHKERRQ(ierr);
1792   PetscFunctionReturn(0);
1793 }
1794 
1795 #undef __FUNCT__
1796 #define __FUNCT__ "MatTransposeMatMultNumeric_MPIDense_MPIDense"
1797 PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1798 {
1799   Mat_MPIDense   *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
1800   Mat_SeqDense   *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data;
1801   Mat_TransMatMultDense *atb = c->atbdense;
1802   PetscErrorCode ierr;
1803   MPI_Comm       comm;
1804   PetscMPIInt    rank,size,*recvcounts=atb->recvcounts;
1805   PetscScalar    *carray,*atbarray=atb->atbarray,*sendbuf=atb->sendbuf;
1806   PetscInt       i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j;
1807   PetscScalar    _DOne=1.0,_DZero=0.0;
1808   PetscBLASInt   am,an,bn,aN;
1809   const PetscInt *ranges;
1810 
1811   PetscFunctionBegin;
1812   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1813   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1814   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1815 
1816   /* compute atbarray = aseq^T * bseq */
1817   ierr = PetscBLASIntCast(a->A->cmap->n,&an);CHKERRQ(ierr);
1818   ierr = PetscBLASIntCast(b->A->cmap->n,&bn);CHKERRQ(ierr);
1819   ierr = PetscBLASIntCast(a->A->rmap->n,&am);CHKERRQ(ierr);
1820   ierr = PetscBLASIntCast(A->cmap->N,&aN);CHKERRQ(ierr);
1821   PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&an,&bn,&am,&_DOne,aseq->v,&aseq->lda,bseq->v,&bseq->lda,&_DZero,atbarray,&aN));
1822 
1823   ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr);
1824   for (i=0; i<size; i++) recvcounts[i] = (ranges[i+1] - ranges[i])*cN;
1825 
1826   /* arrange atbarray into sendbuf */
1827   k = 0;
1828   for (proc=0; proc<size; proc++) {
1829     for (j=0; j<cN; j++) {
1830       for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM];
1831     }
1832   }
1833   /* sum all atbarray to local values of C */
1834   ierr = MatDenseGetArray(c->A,&carray);CHKERRQ(ierr);
1835   ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRQ(ierr);
1836   ierr = MatDenseRestoreArray(c->A,&carray);CHKERRQ(ierr);
1837   PetscFunctionReturn(0);
1838 }
1839 
1840 #undef __FUNCT__
1841 #define __FUNCT__ "MatTransposeMatMultSymbolic_MPIDense_MPIDense"
1842 PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1843 {
1844   PetscErrorCode        ierr;
1845   Mat                   Cdense;
1846   MPI_Comm              comm;
1847   PetscMPIInt           size;
1848   PetscInt              cm=A->cmap->n,cM,cN=B->cmap->N;
1849   Mat_MPIDense          *c;
1850   Mat_TransMatMultDense *atb;
1851 
1852   PetscFunctionBegin;
1853   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1854   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
1855     SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
1856   }
1857 
1858   /* create matrix product Cdense */
1859   ierr = MatCreate(comm,&Cdense);CHKERRQ(ierr);
1860   ierr = MatSetSizes(Cdense,cm,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
1861   ierr = MatSetType(Cdense,MATMPIDENSE);CHKERRQ(ierr);
1862   ierr = MatMPIDenseSetPreallocation(Cdense,NULL);CHKERRQ(ierr);
1863   ierr = MatAssemblyBegin(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1864   ierr = MatAssemblyEnd(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1865   *C   = Cdense;
1866 
1867   /* create data structure for reuse Cdense */
1868   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1869   ierr = PetscNew(&atb);CHKERRQ(ierr);
1870   cM = Cdense->rmap->N;
1871   ierr = PetscMalloc3(cM*cN,&atb->sendbuf,cM*cN,&atb->atbarray,size,&atb->recvcounts);CHKERRQ(ierr);
1872 
1873   c                    = (Mat_MPIDense*)Cdense->data;
1874   c->atbdense          = atb;
1875   atb->destroy         = Cdense->ops->destroy;
1876   Cdense->ops->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
1877   PetscFunctionReturn(0);
1878 }
1879 
1880 #undef __FUNCT__
1881 #define __FUNCT__ "MatTransposeMatMult_MPIDense_MPIDense"
1882 PetscErrorCode MatTransposeMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
1883 {
1884   PetscErrorCode ierr;
1885 
1886   PetscFunctionBegin;
1887   if (scall == MAT_INITIAL_MATRIX) {
1888     ierr = MatTransposeMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr);
1889   }
1890   ierr = MatTransposeMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr);
1891   PetscFunctionReturn(0);
1892 }
1893 
1894 #undef __FUNCT__
1895 #define __FUNCT__ "MatDestroy_MatMatMult_MPIDense_MPIDense"
1896 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(Mat A)
1897 {
1898   PetscErrorCode   ierr;
1899   Mat_MPIDense     *a = (Mat_MPIDense*)A->data;
1900   Mat_MatMultDense *ab = a->abdense;
1901 
1902   PetscFunctionBegin;
1903   ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr);
1904   ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr);
1905   ierr = MatDestroy(&ab->Be);CHKERRQ(ierr);
1906 
1907   ierr = (ab->destroy)(A);CHKERRQ(ierr);
1908   ierr = PetscFree(ab);CHKERRQ(ierr);
1909   PetscFunctionReturn(0);
1910 }
1911 
1912 #if defined(PETSC_HAVE_ELEMENTAL)
1913 #undef __FUNCT__
1914 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIDense"
1915 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1916 {
1917   PetscErrorCode   ierr;
1918   Mat_MPIDense     *c=(Mat_MPIDense*)C->data;
1919   Mat_MatMultDense *ab=c->abdense;
1920 
1921   PetscFunctionBegin;
1922   ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr);
1923   ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr);
1924   ierr = MatMatMultNumeric(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr);
1925   ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
1926   PetscFunctionReturn(0);
1927 }
1928 
1929 #undef __FUNCT__
1930 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIDense"
1931 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1932 {
1933   PetscErrorCode   ierr;
1934   Mat              Ae,Be,Ce;
1935   Mat_MPIDense     *c;
1936   Mat_MatMultDense *ab;
1937 
1938   PetscFunctionBegin;
1939   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
1940     SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
1941   }
1942 
1943   /* convert A and B to Elemental matrices Ae and Be */
1944   ierr = MatConvert(A,MATELEMENTAL,MAT_INITIAL_MATRIX, &Ae);CHKERRQ(ierr);
1945   ierr = MatConvert(B,MATELEMENTAL,MAT_INITIAL_MATRIX, &Be);CHKERRQ(ierr);
1946 
1947   /* Ce = Ae*Be */
1948   ierr = MatMatMultSymbolic(Ae,Be,fill,&Ce);CHKERRQ(ierr);
1949   ierr = MatMatMultNumeric(Ae,Be,Ce);CHKERRQ(ierr);
1950 
1951   /* convert Ce to C */
1952   ierr = MatConvert(Ce,MATMPIDENSE,MAT_INITIAL_MATRIX,C);CHKERRQ(ierr);
1953 
1954   /* create data structure for reuse Cdense */
1955   ierr = PetscNew(&ab);CHKERRQ(ierr);
1956   c                  = (Mat_MPIDense*)(*C)->data;
1957   c->abdense         = ab;
1958 
1959   ab->Ae             = Ae;
1960   ab->Be             = Be;
1961   ab->Ce             = Ce;
1962   ab->destroy        = (*C)->ops->destroy;
1963   (*C)->ops->destroy        = MatDestroy_MatMatMult_MPIDense_MPIDense;
1964   (*C)->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
1965   PetscFunctionReturn(0);
1966 }
1967 
1968 #undef __FUNCT__
1969 #define __FUNCT__ "MatMatMult_MPIDense_MPIDense"
1970 PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
1971 {
1972   PetscErrorCode ierr;
1973 
1974   PetscFunctionBegin;
1975   if (scall == MAT_INITIAL_MATRIX) { /* simbolic product includes numeric product */
1976     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
1977     ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr);
1978     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
1979   } else {
1980     ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
1981     ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr);
1982     ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
1983   }
1984   PetscFunctionReturn(0);
1985 }
1986 #endif
1987