xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 4fc747eaadbeca11629f314a99edccbc2ed7b3d3) !
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4 */
5 
6 
7 #include <../src/mat/impls/dense/mpi/mpidense.h>    /*I   "petscmat.h"  I*/
8 #include <../src/mat/impls/aij/mpi/mpiaij.h>
9 #include <petscblaslapack.h>
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatDenseGetLocalMatrix"
13 /*@
14 
15       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
16               matrix that represents the operator. For sequential matrices it returns itself.
17 
18     Input Parameter:
19 .      A - the Seq or MPI dense matrix
20 
21     Output Parameter:
22 .      B - the inner matrix
23 
24     Level: intermediate
25 
26 @*/
27 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
28 {
29   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
30   PetscErrorCode ierr;
31   PetscBool      flg;
32 
33   PetscFunctionBegin;
34   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
35   if (flg) *B = mat->A;
36   else *B = A;
37   PetscFunctionReturn(0);
38 }
39 
40 #undef __FUNCT__
41 #define __FUNCT__ "MatGetRow_MPIDense"
42 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
43 {
44   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
45   PetscErrorCode ierr;
46   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
47 
48   PetscFunctionBegin;
49   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
50   lrow = row - rstart;
51   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
52   PetscFunctionReturn(0);
53 }
54 
55 #undef __FUNCT__
56 #define __FUNCT__ "MatRestoreRow_MPIDense"
57 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
58 {
59   PetscErrorCode ierr;
60 
61   PetscFunctionBegin;
62   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
63   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
64   PetscFunctionReturn(0);
65 }
66 
67 #undef __FUNCT__
68 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense"
69 PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
70 {
71   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
72   PetscErrorCode ierr;
73   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
74   PetscScalar    *array;
75   MPI_Comm       comm;
76   Mat            B;
77 
78   PetscFunctionBegin;
79   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
80 
81   ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr);
82   if (!B) {
83     ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
84     ierr = MatCreate(comm,&B);CHKERRQ(ierr);
85     ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr);
86     ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
87     ierr = MatDenseGetArray(mdn->A,&array);CHKERRQ(ierr);
88     ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr);
89     ierr = MatDenseRestoreArray(mdn->A,&array);CHKERRQ(ierr);
90     ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
91     ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
92     ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr);
93     *a   = B;
94     ierr = MatDestroy(&B);CHKERRQ(ierr);
95   } else *a = B;
96   PetscFunctionReturn(0);
97 }
98 
99 #undef __FUNCT__
100 #define __FUNCT__ "MatSetValues_MPIDense"
101 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
102 {
103   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
104   PetscErrorCode ierr;
105   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
106   PetscBool      roworiented = A->roworiented;
107 
108   PetscFunctionBegin;
109   for (i=0; i<m; i++) {
110     if (idxm[i] < 0) continue;
111     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
112     if (idxm[i] >= rstart && idxm[i] < rend) {
113       row = idxm[i] - rstart;
114       if (roworiented) {
115         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
116       } else {
117         for (j=0; j<n; j++) {
118           if (idxn[j] < 0) continue;
119           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
120           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
121         }
122       }
123     } else if (!A->donotstash) {
124       mat->assembled = PETSC_FALSE;
125       if (roworiented) {
126         ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
127       } else {
128         ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
129       }
130     }
131   }
132   PetscFunctionReturn(0);
133 }
134 
135 #undef __FUNCT__
136 #define __FUNCT__ "MatGetValues_MPIDense"
137 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
138 {
139   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
140   PetscErrorCode ierr;
141   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
142 
143   PetscFunctionBegin;
144   for (i=0; i<m; i++) {
145     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
146     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
147     if (idxm[i] >= rstart && idxm[i] < rend) {
148       row = idxm[i] - rstart;
149       for (j=0; j<n; j++) {
150         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
151         if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
152         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
153       }
154     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
155   }
156   PetscFunctionReturn(0);
157 }
158 
159 #undef __FUNCT__
160 #define __FUNCT__ "MatDenseGetArray_MPIDense"
161 PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[])
162 {
163   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
164   PetscErrorCode ierr;
165 
166   PetscFunctionBegin;
167   ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr);
168   PetscFunctionReturn(0);
169 }
170 
171 #undef __FUNCT__
172 #define __FUNCT__ "MatGetSubMatrix_MPIDense"
173 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
174 {
175   Mat_MPIDense   *mat  = (Mat_MPIDense*)A->data,*newmatd;
176   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
177   PetscErrorCode ierr;
178   PetscInt       i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
179   const PetscInt *irow,*icol;
180   PetscScalar    *av,*bv,*v = lmat->v;
181   Mat            newmat;
182   IS             iscol_local;
183 
184   PetscFunctionBegin;
185   ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
186   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
187   ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr);
188   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
189   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
190   ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */
191 
192   /* No parallel redistribution currently supported! Should really check each index set
193      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
194      original matrix! */
195 
196   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
197   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
198 
199   /* Check submatrix call */
200   if (scall == MAT_REUSE_MATRIX) {
201     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
202     /* Really need to test rows and column sizes! */
203     newmat = *B;
204   } else {
205     /* Create and fill new matrix */
206     ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr);
207     ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
208     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
209     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
210   }
211 
212   /* Now extract the data pointers and do the copy, column at a time */
213   newmatd = (Mat_MPIDense*)newmat->data;
214   bv      = ((Mat_SeqDense*)newmatd->A->data)->v;
215 
216   for (i=0; i<Ncols; i++) {
217     av = v + ((Mat_SeqDense*)mat->A->data)->lda*icol[i];
218     for (j=0; j<nrows; j++) {
219       *bv++ = av[irow[j] - rstart];
220     }
221   }
222 
223   /* Assemble the matrices so that the correct flags are set */
224   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
225   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
226 
227   /* Free work space */
228   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
229   ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr);
230   ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
231   *B   = newmat;
232   PetscFunctionReturn(0);
233 }
234 
235 #undef __FUNCT__
236 #define __FUNCT__ "MatDenseRestoreArray_MPIDense"
237 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[])
238 {
239   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
240   PetscErrorCode ierr;
241 
242   PetscFunctionBegin;
243   ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr);
244   PetscFunctionReturn(0);
245 }
246 
247 #undef __FUNCT__
248 #define __FUNCT__ "MatAssemblyBegin_MPIDense"
249 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
250 {
251   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
252   MPI_Comm       comm;
253   PetscErrorCode ierr;
254   PetscInt       nstash,reallocs;
255   InsertMode     addv;
256 
257   PetscFunctionBegin;
258   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
259   /* make sure all processors are either in INSERTMODE or ADDMODE */
260   ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,comm);CHKERRQ(ierr);
261   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
262   mat->insertmode = addv; /* in case this processor had no cache */
263 
264   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
265   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
266   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
267   PetscFunctionReturn(0);
268 }
269 
270 #undef __FUNCT__
271 #define __FUNCT__ "MatAssemblyEnd_MPIDense"
272 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
273 {
274   Mat_MPIDense   *mdn=(Mat_MPIDense*)mat->data;
275   PetscErrorCode ierr;
276   PetscInt       i,*row,*col,flg,j,rstart,ncols;
277   PetscMPIInt    n;
278   PetscScalar    *val;
279 
280   PetscFunctionBegin;
281   /*  wait on receives */
282   while (1) {
283     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
284     if (!flg) break;
285 
286     for (i=0; i<n;) {
287       /* Now identify the consecutive vals belonging to the same row */
288       for (j=i,rstart=row[j]; j<n; j++) {
289         if (row[j] != rstart) break;
290       }
291       if (j < n) ncols = j-i;
292       else       ncols = n-i;
293       /* Now assemble all these values with a single function call */
294       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
295       i    = j;
296     }
297   }
298   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
299 
300   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
301   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
302 
303   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
304     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
305   }
306   PetscFunctionReturn(0);
307 }
308 
309 #undef __FUNCT__
310 #define __FUNCT__ "MatZeroEntries_MPIDense"
311 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
312 {
313   PetscErrorCode ierr;
314   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
315 
316   PetscFunctionBegin;
317   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
318   PetscFunctionReturn(0);
319 }
320 
321 /* the code does not do the diagonal entries correctly unless the
322    matrix is square and the column and row owerships are identical.
323    This is a BUG. The only way to fix it seems to be to access
324    mdn->A and mdn->B directly and not through the MatZeroRows()
325    routine.
326 */
327 #undef __FUNCT__
328 #define __FUNCT__ "MatZeroRows_MPIDense"
329 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
330 {
331   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
332   PetscErrorCode    ierr;
333   PetscInt          i,*owners = A->rmap->range;
334   PetscInt          *sizes,j,idx,nsends;
335   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
336   PetscInt          *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source;
337   PetscInt          *lens,*lrows,*values;
338   PetscMPIInt       n,imdex,rank = l->rank,size = l->size;
339   MPI_Comm          comm;
340   MPI_Request       *send_waits,*recv_waits;
341   MPI_Status        recv_status,*send_status;
342   PetscBool         found;
343   const PetscScalar *xx;
344   PetscScalar       *bb;
345 
346   PetscFunctionBegin;
347   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
348   if (A->rmap->N != A->cmap->N) SETERRQ(comm,PETSC_ERR_SUP,"Only handles square matrices");
349   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership");
350   /*  first count number of contributors to each processor */
351   ierr = PetscCalloc1(2*size,&sizes);CHKERRQ(ierr);
352   ierr = PetscMalloc1(N+1,&owner);CHKERRQ(ierr);  /* see note*/
353   for (i=0; i<N; i++) {
354     idx   = rows[i];
355     found = PETSC_FALSE;
356     for (j=0; j<size; j++) {
357       if (idx >= owners[j] && idx < owners[j+1]) {
358         sizes[2*j]++; sizes[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
359       }
360     }
361     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
362   }
363   nsends = 0;
364   for (i=0; i<size; i++) nsends += sizes[2*i+1];
365 
366   /* inform other processors of number of messages and max length*/
367   ierr = PetscMaxSum(comm,sizes,&nmax,&nrecvs);CHKERRQ(ierr);
368 
369   /* post receives:   */
370   ierr = PetscMalloc1((nrecvs+1)*(nmax+1),&rvalues);CHKERRQ(ierr);
371   ierr = PetscMalloc1(nrecvs+1,&recv_waits);CHKERRQ(ierr);
372   for (i=0; i<nrecvs; i++) {
373     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
374   }
375 
376   /* do sends:
377       1) starts[i] gives the starting index in svalues for stuff going to
378          the ith processor
379   */
380   ierr = PetscMalloc1(N+1,&svalues);CHKERRQ(ierr);
381   ierr = PetscMalloc1(nsends+1,&send_waits);CHKERRQ(ierr);
382   ierr = PetscMalloc1(size+1,&starts);CHKERRQ(ierr);
383 
384   starts[0] = 0;
385   for (i=1; i<size; i++) starts[i] = starts[i-1] + sizes[2*i-2];
386   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
387 
388   starts[0] = 0;
389   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + sizes[2*i-2];
390   count = 0;
391   for (i=0; i<size; i++) {
392     if (sizes[2*i+1]) {
393       ierr = MPI_Isend(svalues+starts[i],sizes[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
394     }
395   }
396   ierr = PetscFree(starts);CHKERRQ(ierr);
397 
398   base = owners[rank];
399 
400   /*  wait on receives */
401   ierr  = PetscMalloc2(nrecvs,&lens,nrecvs,&source);CHKERRQ(ierr);
402   count = nrecvs;
403   slen  = 0;
404   while (count) {
405     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
406     /* unpack receives into our local space */
407     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
408 
409     source[imdex] = recv_status.MPI_SOURCE;
410     lens[imdex]   = n;
411     slen += n;
412     count--;
413   }
414   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
415 
416   /* move the data into the send scatter */
417   ierr  = PetscMalloc1(slen+1,&lrows);CHKERRQ(ierr);
418   count = 0;
419   for (i=0; i<nrecvs; i++) {
420     values = rvalues + i*nmax;
421     for (j=0; j<lens[i]; j++) {
422       lrows[count++] = values[j] - base;
423     }
424   }
425   ierr = PetscFree(rvalues);CHKERRQ(ierr);
426   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
427   ierr = PetscFree(owner);CHKERRQ(ierr);
428   ierr = PetscFree(sizes);CHKERRQ(ierr);
429 
430   /* fix right hand side if needed */
431   if (x && b) {
432     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
433     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
434     for (i=0; i<slen; i++) {
435       bb[lrows[i]] = diag*xx[lrows[i]];
436     }
437     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
438     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
439   }
440 
441   /* actually zap the local rows */
442   ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
443   if (diag != 0.0) {
444     Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data;
445     PetscInt     m   = ll->lda, i;
446 
447     for (i=0; i<slen; i++) {
448       ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag;
449     }
450   }
451   ierr = PetscFree(lrows);CHKERRQ(ierr);
452 
453   /* wait on sends */
454   if (nsends) {
455     ierr = PetscMalloc1(nsends,&send_status);CHKERRQ(ierr);
456     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
457     ierr = PetscFree(send_status);CHKERRQ(ierr);
458   }
459   ierr = PetscFree(send_waits);CHKERRQ(ierr);
460   ierr = PetscFree(svalues);CHKERRQ(ierr);
461   PetscFunctionReturn(0);
462 }
463 
464 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec);
465 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec);
466 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec);
467 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec);
468 
469 #undef __FUNCT__
470 #define __FUNCT__ "MatMult_MPIDense"
471 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
472 {
473   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
474   PetscErrorCode ierr;
475 
476   PetscFunctionBegin;
477   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
478   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
479   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
480   PetscFunctionReturn(0);
481 }
482 
483 #undef __FUNCT__
484 #define __FUNCT__ "MatMultAdd_MPIDense"
485 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
486 {
487   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
488   PetscErrorCode ierr;
489 
490   PetscFunctionBegin;
491   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
492   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
493   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
494   PetscFunctionReturn(0);
495 }
496 
497 #undef __FUNCT__
498 #define __FUNCT__ "MatMultTranspose_MPIDense"
499 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
500 {
501   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
502   PetscErrorCode ierr;
503   PetscScalar    zero = 0.0;
504 
505   PetscFunctionBegin;
506   ierr = VecSet(yy,zero);CHKERRQ(ierr);
507   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
508   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
509   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
510   PetscFunctionReturn(0);
511 }
512 
513 #undef __FUNCT__
514 #define __FUNCT__ "MatMultTransposeAdd_MPIDense"
515 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
516 {
517   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
518   PetscErrorCode ierr;
519 
520   PetscFunctionBegin;
521   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
522   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
523   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
524   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
525   PetscFunctionReturn(0);
526 }
527 
528 #undef __FUNCT__
529 #define __FUNCT__ "MatGetDiagonal_MPIDense"
530 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
531 {
532   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
533   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
534   PetscErrorCode ierr;
535   PetscInt       len,i,n,m = A->rmap->n,radd;
536   PetscScalar    *x,zero = 0.0;
537 
538   PetscFunctionBegin;
539   ierr = VecSet(v,zero);CHKERRQ(ierr);
540   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
541   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
542   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
543   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
544   radd = A->rmap->rstart*m;
545   for (i=0; i<len; i++) {
546     x[i] = aloc->v[radd + i*m + i];
547   }
548   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
549   PetscFunctionReturn(0);
550 }
551 
552 #undef __FUNCT__
553 #define __FUNCT__ "MatDestroy_MPIDense"
554 PetscErrorCode MatDestroy_MPIDense(Mat mat)
555 {
556   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
557   PetscErrorCode ierr;
558 
559   PetscFunctionBegin;
560 #if defined(PETSC_USE_LOG)
561   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
562 #endif
563   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
564   ierr = MatDestroy(&mdn->A);CHKERRQ(ierr);
565   ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr);
566   ierr = VecScatterDestroy(&mdn->Mvctx);CHKERRQ(ierr);
567 
568   ierr = PetscFree(mat->data);CHKERRQ(ierr);
569   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
570 
571   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr);
572   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr);
573 #if defined(PETSC_HAVE_ELEMENTAL)
574   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr);
575 #endif
576   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
577   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr);
578   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
579   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
580   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
581   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
582   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
583   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
584   PetscFunctionReturn(0);
585 }
586 
587 #undef __FUNCT__
588 #define __FUNCT__ "MatView_MPIDense_Binary"
589 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
590 {
591   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
592   PetscErrorCode    ierr;
593   PetscViewerFormat format;
594   int               fd;
595   PetscInt          header[4],mmax,N = mat->cmap->N,i,j,m,k;
596   PetscMPIInt       rank,tag  = ((PetscObject)viewer)->tag,size;
597   PetscScalar       *work,*v,*vv;
598   Mat_SeqDense      *a = (Mat_SeqDense*)mdn->A->data;
599 
600   PetscFunctionBegin;
601   if (mdn->size == 1) {
602     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
603   } else {
604     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
605     ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
606     ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
607 
608     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
609     if (format == PETSC_VIEWER_NATIVE) {
610 
611       if (!rank) {
612         /* store the matrix as a dense matrix */
613         header[0] = MAT_FILE_CLASSID;
614         header[1] = mat->rmap->N;
615         header[2] = N;
616         header[3] = MATRIX_BINARY_FORMAT_DENSE;
617         ierr      = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
618 
619         /* get largest work array needed for transposing array */
620         mmax = mat->rmap->n;
621         for (i=1; i<size; i++) {
622           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
623         }
624         ierr = PetscMalloc1(mmax*N,&work);CHKERRQ(ierr);
625 
626         /* write out local array, by rows */
627         m = mat->rmap->n;
628         v = a->v;
629         for (j=0; j<N; j++) {
630           for (i=0; i<m; i++) {
631             work[j + i*N] = *v++;
632           }
633         }
634         ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
635         /* get largest work array to receive messages from other processes, excludes process zero */
636         mmax = 0;
637         for (i=1; i<size; i++) {
638           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
639         }
640         ierr = PetscMalloc1(mmax*N,&vv);CHKERRQ(ierr);
641         for (k = 1; k < size; k++) {
642           v    = vv;
643           m    = mat->rmap->range[k+1] - mat->rmap->range[k];
644           ierr = MPIULong_Recv(v,m*N,MPIU_SCALAR,k,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
645 
646           for (j = 0; j < N; j++) {
647             for (i = 0; i < m; i++) {
648               work[j + i*N] = *v++;
649             }
650           }
651           ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
652         }
653         ierr = PetscFree(work);CHKERRQ(ierr);
654         ierr = PetscFree(vv);CHKERRQ(ierr);
655       } else {
656         ierr = MPIULong_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
657       }
658     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerPushFormat(viewer,PETSC_VIEWER_NATIVE)");
659   }
660   PetscFunctionReturn(0);
661 }
662 
663 extern PetscErrorCode MatView_SeqDense(Mat,PetscViewer);
664 #include <petscdraw.h>
665 #undef __FUNCT__
666 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket"
667 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
668 {
669   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
670   PetscErrorCode    ierr;
671   PetscMPIInt       rank = mdn->rank;
672   PetscViewerType   vtype;
673   PetscBool         iascii,isdraw;
674   PetscViewer       sviewer;
675   PetscViewerFormat format;
676 
677   PetscFunctionBegin;
678   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
679   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
680   if (iascii) {
681     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
682     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
683     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
684       MatInfo info;
685       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
686       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
687       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
688       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
689       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
690       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
691       PetscFunctionReturn(0);
692     } else if (format == PETSC_VIEWER_ASCII_INFO) {
693       PetscFunctionReturn(0);
694     }
695   } else if (isdraw) {
696     PetscDraw draw;
697     PetscBool isnull;
698 
699     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
700     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
701     if (isnull) PetscFunctionReturn(0);
702   }
703 
704   {
705     /* assemble the entire matrix onto first processor. */
706     Mat         A;
707     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
708     PetscInt    *cols;
709     PetscScalar *vals;
710 
711     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
712     if (!rank) {
713       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
714     } else {
715       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
716     }
717     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
718     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
719     ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr);
720     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
721 
722     /* Copy the matrix ... This isn't the most efficient means,
723        but it's quick for now */
724     A->insertmode = INSERT_VALUES;
725 
726     row = mat->rmap->rstart;
727     m   = mdn->A->rmap->n;
728     for (i=0; i<m; i++) {
729       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
730       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
731       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
732       row++;
733     }
734 
735     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
736     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
737     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
738     if (!rank) {
739         ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
740     }
741     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
742     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
743     ierr = MatDestroy(&A);CHKERRQ(ierr);
744   }
745   PetscFunctionReturn(0);
746 }
747 
748 #undef __FUNCT__
749 #define __FUNCT__ "MatView_MPIDense"
750 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
751 {
752   PetscErrorCode ierr;
753   PetscBool      iascii,isbinary,isdraw,issocket;
754 
755   PetscFunctionBegin;
756   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
757   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
758   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
759   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
760 
761   if (iascii || issocket || isdraw) {
762     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
763   } else if (isbinary) {
764     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
765   }
766   PetscFunctionReturn(0);
767 }
768 
769 #undef __FUNCT__
770 #define __FUNCT__ "MatGetInfo_MPIDense"
771 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
772 {
773   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
774   Mat            mdn  = mat->A;
775   PetscErrorCode ierr;
776   PetscReal      isend[5],irecv[5];
777 
778   PetscFunctionBegin;
779   info->block_size = 1.0;
780 
781   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
782 
783   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
784   isend[3] = info->memory;  isend[4] = info->mallocs;
785   if (flag == MAT_LOCAL) {
786     info->nz_used      = isend[0];
787     info->nz_allocated = isend[1];
788     info->nz_unneeded  = isend[2];
789     info->memory       = isend[3];
790     info->mallocs      = isend[4];
791   } else if (flag == MAT_GLOBAL_MAX) {
792     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
793 
794     info->nz_used      = irecv[0];
795     info->nz_allocated = irecv[1];
796     info->nz_unneeded  = irecv[2];
797     info->memory       = irecv[3];
798     info->mallocs      = irecv[4];
799   } else if (flag == MAT_GLOBAL_SUM) {
800     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
801 
802     info->nz_used      = irecv[0];
803     info->nz_allocated = irecv[1];
804     info->nz_unneeded  = irecv[2];
805     info->memory       = irecv[3];
806     info->mallocs      = irecv[4];
807   }
808   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
809   info->fill_ratio_needed = 0;
810   info->factor_mallocs    = 0;
811   PetscFunctionReturn(0);
812 }
813 
814 #undef __FUNCT__
815 #define __FUNCT__ "MatSetOption_MPIDense"
816 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
817 {
818   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
819   PetscErrorCode ierr;
820 
821   PetscFunctionBegin;
822   switch (op) {
823   case MAT_NEW_NONZERO_LOCATIONS:
824   case MAT_NEW_NONZERO_LOCATION_ERR:
825   case MAT_NEW_NONZERO_ALLOCATION_ERR:
826     MatCheckPreallocated(A,1);
827     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
828     break;
829   case MAT_ROW_ORIENTED:
830     MatCheckPreallocated(A,1);
831     a->roworiented = flg;
832     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
833     break;
834   case MAT_NEW_DIAGONALS:
835   case MAT_KEEP_NONZERO_PATTERN:
836   case MAT_USE_HASH_TABLE:
837     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
838     break;
839   case MAT_IGNORE_OFF_PROC_ENTRIES:
840     a->donotstash = flg;
841     break;
842   case MAT_SYMMETRIC:
843   case MAT_STRUCTURALLY_SYMMETRIC:
844   case MAT_HERMITIAN:
845   case MAT_SYMMETRY_ETERNAL:
846   case MAT_IGNORE_LOWER_TRIANGULAR:
847     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
848     break;
849   default:
850     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
851   }
852   PetscFunctionReturn(0);
853 }
854 
855 
856 #undef __FUNCT__
857 #define __FUNCT__ "MatDiagonalScale_MPIDense"
858 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
859 {
860   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
861   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
862   PetscScalar    *l,*r,x,*v;
863   PetscErrorCode ierr;
864   PetscInt       i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n;
865 
866   PetscFunctionBegin;
867   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
868   if (ll) {
869     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
870     if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
871     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
872     for (i=0; i<m; i++) {
873       x = l[i];
874       v = mat->v + i;
875       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
876     }
877     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
878     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
879   }
880   if (rr) {
881     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
882     if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
883     ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
884     ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
885     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
886     for (i=0; i<n; i++) {
887       x = r[i];
888       v = mat->v + i*m;
889       for (j=0; j<m; j++) (*v++) *= x;
890     }
891     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
892     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
893   }
894   PetscFunctionReturn(0);
895 }
896 
897 #undef __FUNCT__
898 #define __FUNCT__ "MatNorm_MPIDense"
899 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
900 {
901   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
902   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
903   PetscErrorCode ierr;
904   PetscInt       i,j;
905   PetscReal      sum = 0.0;
906   PetscScalar    *v  = mat->v;
907 
908   PetscFunctionBegin;
909   if (mdn->size == 1) {
910     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
911   } else {
912     if (type == NORM_FROBENIUS) {
913       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
914         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
915       }
916       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
917       *nrm = PetscSqrtReal(*nrm);
918       ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr);
919     } else if (type == NORM_1) {
920       PetscReal *tmp,*tmp2;
921       ierr = PetscMalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr);
922       ierr = PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
923       ierr = PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
924       *nrm = 0.0;
925       v    = mat->v;
926       for (j=0; j<mdn->A->cmap->n; j++) {
927         for (i=0; i<mdn->A->rmap->n; i++) {
928           tmp[j] += PetscAbsScalar(*v);  v++;
929         }
930       }
931       ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
932       for (j=0; j<A->cmap->N; j++) {
933         if (tmp2[j] > *nrm) *nrm = tmp2[j];
934       }
935       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
936       ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr);
937     } else if (type == NORM_INFINITY) { /* max row norm */
938       PetscReal ntemp;
939       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
940       ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
941     } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
942   }
943   PetscFunctionReturn(0);
944 }
945 
946 #undef __FUNCT__
947 #define __FUNCT__ "MatTranspose_MPIDense"
948 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
949 {
950   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
951   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
952   Mat            B;
953   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
954   PetscErrorCode ierr;
955   PetscInt       j,i;
956   PetscScalar    *v;
957 
958   PetscFunctionBegin;
959   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports square matrix only in-place");
960   if (reuse == MAT_INITIAL_MATRIX || A == *matout) {
961     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
962     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
963     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
964     ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr);
965   } else {
966     B = *matout;
967   }
968 
969   m    = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v;
970   ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr);
971   for (i=0; i<m; i++) rwork[i] = rstart + i;
972   for (j=0; j<n; j++) {
973     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
974     v   += m;
975   }
976   ierr = PetscFree(rwork);CHKERRQ(ierr);
977   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
978   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
979   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
980     *matout = B;
981   } else {
982     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
983   }
984   PetscFunctionReturn(0);
985 }
986 
987 
988 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
989 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
990 
991 #undef __FUNCT__
992 #define __FUNCT__ "MatSetUp_MPIDense"
993 PetscErrorCode MatSetUp_MPIDense(Mat A)
994 {
995   PetscErrorCode ierr;
996 
997   PetscFunctionBegin;
998   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
999   PetscFunctionReturn(0);
1000 }
1001 
1002 #undef __FUNCT__
1003 #define __FUNCT__ "MatAXPY_MPIDense"
1004 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
1005 {
1006   PetscErrorCode ierr;
1007   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
1008 
1009   PetscFunctionBegin;
1010   ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr);
1011   ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1012   PetscFunctionReturn(0);
1013 }
1014 
1015 #undef __FUNCT__
1016 #define __FUNCT__ "MatConjugate_MPIDense"
1017 PetscErrorCode  MatConjugate_MPIDense(Mat mat)
1018 {
1019   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
1020   PetscErrorCode ierr;
1021 
1022   PetscFunctionBegin;
1023   ierr = MatConjugate(a->A);CHKERRQ(ierr);
1024   PetscFunctionReturn(0);
1025 }
1026 
1027 #undef __FUNCT__
1028 #define __FUNCT__ "MatRealPart_MPIDense"
1029 PetscErrorCode MatRealPart_MPIDense(Mat A)
1030 {
1031   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1032   PetscErrorCode ierr;
1033 
1034   PetscFunctionBegin;
1035   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1036   PetscFunctionReturn(0);
1037 }
1038 
1039 #undef __FUNCT__
1040 #define __FUNCT__ "MatImaginaryPart_MPIDense"
1041 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1042 {
1043   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1044   PetscErrorCode ierr;
1045 
1046   PetscFunctionBegin;
1047   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1048   PetscFunctionReturn(0);
1049 }
1050 
1051 extern PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*);
1052 #undef __FUNCT__
1053 #define __FUNCT__ "MatGetColumnNorms_MPIDense"
1054 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms)
1055 {
1056   PetscErrorCode ierr;
1057   PetscInt       i,n;
1058   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
1059   PetscReal      *work;
1060 
1061   PetscFunctionBegin;
1062   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
1063   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
1064   ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr);
1065   if (type == NORM_2) {
1066     for (i=0; i<n; i++) work[i] *= work[i];
1067   }
1068   if (type == NORM_INFINITY) {
1069     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
1070   } else {
1071     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
1072   }
1073   ierr = PetscFree(work);CHKERRQ(ierr);
1074   if (type == NORM_2) {
1075     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
1076   }
1077   PetscFunctionReturn(0);
1078 }
1079 
1080 #undef __FUNCT__
1081 #define __FUNCT__ "MatSetRandom_MPIDense"
1082 static PetscErrorCode  MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1083 {
1084   Mat_MPIDense   *d = (Mat_MPIDense*)x->data;
1085   PetscErrorCode ierr;
1086   PetscScalar    *a;
1087   PetscInt       m,n,i;
1088 
1089   PetscFunctionBegin;
1090   ierr = MatGetSize(d->A,&m,&n);CHKERRQ(ierr);
1091   ierr = MatDenseGetArray(d->A,&a);CHKERRQ(ierr);
1092   for (i=0; i<m*n; i++) {
1093     ierr = PetscRandomGetValue(rctx,a+i);CHKERRQ(ierr);
1094   }
1095   ierr = MatDenseRestoreArray(d->A,&a);CHKERRQ(ierr);
1096   PetscFunctionReturn(0);
1097 }
1098 
1099 extern PetscErrorCode MatMatMultNumeric_MPIDense(Mat A,Mat,Mat);
1100 
1101 #undef __FUNCT__
1102 #define __FUNCT__ "MatMissingDiagonal_MPIDense"
1103 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool  *missing,PetscInt *d)
1104 {
1105   PetscFunctionBegin;
1106   *missing = PETSC_FALSE;
1107   PetscFunctionReturn(0);
1108 }
1109 
1110 /* -------------------------------------------------------------------*/
1111 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1112                                         MatGetRow_MPIDense,
1113                                         MatRestoreRow_MPIDense,
1114                                         MatMult_MPIDense,
1115                                 /*  4*/ MatMultAdd_MPIDense,
1116                                         MatMultTranspose_MPIDense,
1117                                         MatMultTransposeAdd_MPIDense,
1118                                         0,
1119                                         0,
1120                                         0,
1121                                 /* 10*/ 0,
1122                                         0,
1123                                         0,
1124                                         0,
1125                                         MatTranspose_MPIDense,
1126                                 /* 15*/ MatGetInfo_MPIDense,
1127                                         MatEqual_MPIDense,
1128                                         MatGetDiagonal_MPIDense,
1129                                         MatDiagonalScale_MPIDense,
1130                                         MatNorm_MPIDense,
1131                                 /* 20*/ MatAssemblyBegin_MPIDense,
1132                                         MatAssemblyEnd_MPIDense,
1133                                         MatSetOption_MPIDense,
1134                                         MatZeroEntries_MPIDense,
1135                                 /* 24*/ MatZeroRows_MPIDense,
1136                                         0,
1137                                         0,
1138                                         0,
1139                                         0,
1140                                 /* 29*/ MatSetUp_MPIDense,
1141                                         0,
1142                                         0,
1143                                         0,
1144                                         0,
1145                                 /* 34*/ MatDuplicate_MPIDense,
1146                                         0,
1147                                         0,
1148                                         0,
1149                                         0,
1150                                 /* 39*/ MatAXPY_MPIDense,
1151                                         MatGetSubMatrices_MPIDense,
1152                                         0,
1153                                         MatGetValues_MPIDense,
1154                                         0,
1155                                 /* 44*/ 0,
1156                                         MatScale_MPIDense,
1157                                         MatShift_Basic,
1158                                         0,
1159                                         0,
1160                                 /* 49*/ MatSetRandom_MPIDense,
1161                                         0,
1162                                         0,
1163                                         0,
1164                                         0,
1165                                 /* 54*/ 0,
1166                                         0,
1167                                         0,
1168                                         0,
1169                                         0,
1170                                 /* 59*/ MatGetSubMatrix_MPIDense,
1171                                         MatDestroy_MPIDense,
1172                                         MatView_MPIDense,
1173                                         0,
1174                                         0,
1175                                 /* 64*/ 0,
1176                                         0,
1177                                         0,
1178                                         0,
1179                                         0,
1180                                 /* 69*/ 0,
1181                                         0,
1182                                         0,
1183                                         0,
1184                                         0,
1185                                 /* 74*/ 0,
1186                                         0,
1187                                         0,
1188                                         0,
1189                                         0,
1190                                 /* 79*/ 0,
1191                                         0,
1192                                         0,
1193                                         0,
1194                                 /* 83*/ MatLoad_MPIDense,
1195                                         0,
1196                                         0,
1197                                         0,
1198                                         0,
1199                                         0,
1200 #if defined(PETSC_HAVE_ELEMENTAL)
1201                                 /* 89*/ MatMatMult_MPIDense_MPIDense,
1202                                         MatMatMultSymbolic_MPIDense_MPIDense,
1203 #else
1204                                 /* 89*/ 0,
1205                                         0,
1206 #endif
1207                                         MatMatMultNumeric_MPIDense,
1208                                         0,
1209                                         0,
1210                                 /* 94*/ 0,
1211                                         0,
1212                                         0,
1213                                         0,
1214                                         0,
1215                                 /* 99*/ 0,
1216                                         0,
1217                                         0,
1218                                         MatConjugate_MPIDense,
1219                                         0,
1220                                 /*104*/ 0,
1221                                         MatRealPart_MPIDense,
1222                                         MatImaginaryPart_MPIDense,
1223                                         0,
1224                                         0,
1225                                 /*109*/ 0,
1226                                         0,
1227                                         0,
1228                                         0,
1229                                         MatMissingDiagonal_MPIDense,
1230                                 /*114*/ 0,
1231                                         0,
1232                                         0,
1233                                         0,
1234                                         0,
1235                                 /*119*/ 0,
1236                                         0,
1237                                         0,
1238                                         0,
1239                                         0,
1240                                 /*124*/ 0,
1241                                         MatGetColumnNorms_MPIDense,
1242                                         0,
1243                                         0,
1244                                         0,
1245                                 /*129*/ 0,
1246                                         MatTransposeMatMult_MPIDense_MPIDense,
1247                                         MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1248                                         MatTransposeMatMultNumeric_MPIDense_MPIDense,
1249                                         0,
1250                                 /*134*/ 0,
1251                                         0,
1252                                         0,
1253                                         0,
1254                                         0,
1255                                 /*139*/ 0,
1256                                         0,
1257                                         0
1258 };
1259 
1260 #undef __FUNCT__
1261 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense"
1262 PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1263 {
1264   Mat_MPIDense   *a;
1265   PetscErrorCode ierr;
1266 
1267   PetscFunctionBegin;
1268   mat->preallocated = PETSC_TRUE;
1269   /* Note:  For now, when data is specified above, this assumes the user correctly
1270    allocates the local dense storage space.  We should add error checking. */
1271 
1272   a       = (Mat_MPIDense*)mat->data;
1273   ierr    = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
1274   ierr    = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
1275   a->nvec = mat->cmap->n;
1276 
1277   ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1278   ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr);
1279   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
1280   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1281   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1282   PetscFunctionReturn(0);
1283 }
1284 
1285 #if defined(PETSC_HAVE_ELEMENTAL)
1286 #undef __FUNCT__
1287 #define __FUNCT__ "MatConvert_MPIDense_Elemental"
1288 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1289 {
1290   Mat            mat_elemental;
1291   PetscErrorCode ierr;
1292   PetscScalar    *v;
1293   PetscInt       m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols;
1294 
1295   PetscFunctionBegin;
1296   if (reuse == MAT_REUSE_MATRIX) {
1297     mat_elemental = *newmat;
1298     ierr = MatZeroEntries(*newmat);CHKERRQ(ierr);
1299   } else {
1300     ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr);
1301     ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1302     ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr);
1303     ierr = MatSetUp(mat_elemental);CHKERRQ(ierr);
1304     ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
1305   }
1306 
1307   ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr);
1308   for (i=0; i<N; i++) cols[i] = i;
1309   for (i=0; i<m; i++) rows[i] = rstart + i;
1310 
1311   /* PETSc-Elemental interaface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1312   ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr);
1313   ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr);
1314   ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1315   ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1316   ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr);
1317   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1318 
1319   if (reuse == MAT_INPLACE_MATRIX) {
1320     ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr);
1321   } else {
1322     *newmat = mat_elemental;
1323   }
1324   PetscFunctionReturn(0);
1325 }
1326 #endif
1327 
1328 #undef __FUNCT__
1329 #define __FUNCT__ "MatCreate_MPIDense"
1330 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1331 {
1332   Mat_MPIDense   *a;
1333   PetscErrorCode ierr;
1334 
1335   PetscFunctionBegin;
1336   ierr      = PetscNewLog(mat,&a);CHKERRQ(ierr);
1337   mat->data = (void*)a;
1338   ierr      = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1339 
1340   mat->insertmode = NOT_SET_VALUES;
1341   ierr            = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);CHKERRQ(ierr);
1342   ierr            = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);CHKERRQ(ierr);
1343 
1344   /* build cache for off array entries formed */
1345   a->donotstash = PETSC_FALSE;
1346 
1347   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr);
1348 
1349   /* stuff used for matrix vector multiply */
1350   a->lvec        = 0;
1351   a->Mvctx       = 0;
1352   a->roworiented = PETSC_TRUE;
1353 
1354   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr);
1355   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr);
1356 #if defined(PETSC_HAVE_ELEMENTAL)
1357   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr);
1358 #endif
1359   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr);
1360   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1361   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1362   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1363   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1364 
1365   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",MatTransposeMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1366   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",MatTransposeMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1367   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",MatTransposeMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1368   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1369   PetscFunctionReturn(0);
1370 }
1371 
1372 /*MC
1373    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1374 
1375    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1376    and MATMPIDENSE otherwise.
1377 
1378    Options Database Keys:
1379 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1380 
1381   Level: beginner
1382 
1383 
1384 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1385 M*/
1386 
1387 #undef __FUNCT__
1388 #define __FUNCT__ "MatMPIDenseSetPreallocation"
1389 /*@C
1390    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1391 
1392    Not collective
1393 
1394    Input Parameters:
1395 .  B - the matrix
1396 -  data - optional location of matrix data.  Set data=NULL for PETSc
1397    to control all matrix memory allocation.
1398 
1399    Notes:
1400    The dense format is fully compatible with standard Fortran 77
1401    storage by columns.
1402 
1403    The data input variable is intended primarily for Fortran programmers
1404    who wish to allocate their own matrix memory space.  Most users should
1405    set data=NULL.
1406 
1407    Level: intermediate
1408 
1409 .keywords: matrix,dense, parallel
1410 
1411 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1412 @*/
1413 PetscErrorCode  MatMPIDenseSetPreallocation(Mat B,PetscScalar *data)
1414 {
1415   PetscErrorCode ierr;
1416 
1417   PetscFunctionBegin;
1418   ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr);
1419   PetscFunctionReturn(0);
1420 }
1421 
1422 #undef __FUNCT__
1423 #define __FUNCT__ "MatCreateDense"
1424 /*@C
1425    MatCreateDense - Creates a parallel matrix in dense format.
1426 
1427    Collective on MPI_Comm
1428 
1429    Input Parameters:
1430 +  comm - MPI communicator
1431 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1432 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1433 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1434 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1435 -  data - optional location of matrix data.  Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1436    to control all matrix memory allocation.
1437 
1438    Output Parameter:
1439 .  A - the matrix
1440 
1441    Notes:
1442    The dense format is fully compatible with standard Fortran 77
1443    storage by columns.
1444 
1445    The data input variable is intended primarily for Fortran programmers
1446    who wish to allocate their own matrix memory space.  Most users should
1447    set data=NULL (PETSC_NULL_SCALAR for Fortran users).
1448 
1449    The user MUST specify either the local or global matrix dimensions
1450    (possibly both).
1451 
1452    Level: intermediate
1453 
1454 .keywords: matrix,dense, parallel
1455 
1456 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1457 @*/
1458 PetscErrorCode  MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1459 {
1460   PetscErrorCode ierr;
1461   PetscMPIInt    size;
1462 
1463   PetscFunctionBegin;
1464   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1465   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1466   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1467   if (size > 1) {
1468     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1469     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1470     if (data) {  /* user provided data array, so no need to assemble */
1471       ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr);
1472       (*A)->assembled = PETSC_TRUE;
1473     }
1474   } else {
1475     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1476     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1477   }
1478   PetscFunctionReturn(0);
1479 }
1480 
1481 #undef __FUNCT__
1482 #define __FUNCT__ "MatDuplicate_MPIDense"
1483 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1484 {
1485   Mat            mat;
1486   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
1487   PetscErrorCode ierr;
1488 
1489   PetscFunctionBegin;
1490   *newmat = 0;
1491   ierr    = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr);
1492   ierr    = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1493   ierr    = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1494   a       = (Mat_MPIDense*)mat->data;
1495   ierr    = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1496 
1497   mat->factortype   = A->factortype;
1498   mat->assembled    = PETSC_TRUE;
1499   mat->preallocated = PETSC_TRUE;
1500 
1501   a->size         = oldmat->size;
1502   a->rank         = oldmat->rank;
1503   mat->insertmode = NOT_SET_VALUES;
1504   a->nvec         = oldmat->nvec;
1505   a->donotstash   = oldmat->donotstash;
1506 
1507   ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr);
1508   ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr);
1509 
1510   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1511   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1512   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1513 
1514   *newmat = mat;
1515   PetscFunctionReturn(0);
1516 }
1517 
1518 #undef __FUNCT__
1519 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile"
1520 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat)
1521 {
1522   PetscErrorCode ierr;
1523   PetscMPIInt    rank,size;
1524   const PetscInt *rowners;
1525   PetscInt       i,m,n,nz,j,mMax;
1526   PetscScalar    *array,*vals,*vals_ptr;
1527   Mat_MPIDense   *a = (Mat_MPIDense*)newmat->data;
1528 
1529   PetscFunctionBegin;
1530   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1531   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1532 
1533   /* determine ownership of rows and columns */
1534   m = (newmat->rmap->n < 0) ? PETSC_DECIDE : newmat->rmap->n;
1535   n = (newmat->cmap->n < 0) ? PETSC_DECIDE : newmat->cmap->n;
1536 
1537   ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr);
1538   if (!a->A || !((Mat_SeqDense*)(a->A->data))->user_alloc) {
1539     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
1540   }
1541   ierr = MatDenseGetArray(newmat,&array);CHKERRQ(ierr);
1542   ierr = MatGetLocalSize(newmat,&m,NULL);CHKERRQ(ierr);
1543   ierr = MatGetOwnershipRanges(newmat,&rowners);CHKERRQ(ierr);
1544   ierr = MPI_Reduce(&m,&mMax,1,MPIU_INT,MPI_MAX,0,comm);CHKERRQ(ierr);
1545   if (!rank) {
1546     ierr = PetscMalloc1(mMax*N,&vals);CHKERRQ(ierr);
1547 
1548     /* read in my part of the matrix numerical values  */
1549     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1550 
1551     /* insert into matrix-by row (this is why cannot directly read into array */
1552     vals_ptr = vals;
1553     for (i=0; i<m; i++) {
1554       for (j=0; j<N; j++) {
1555         array[i + j*m] = *vals_ptr++;
1556       }
1557     }
1558 
1559     /* read in other processors and ship out */
1560     for (i=1; i<size; i++) {
1561       nz   = (rowners[i+1] - rowners[i])*N;
1562       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1563       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr);
1564     }
1565   } else {
1566     /* receive numeric values */
1567     ierr = PetscMalloc1(m*N,&vals);CHKERRQ(ierr);
1568 
1569     /* receive message of values*/
1570     ierr = MPIULong_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr);
1571 
1572     /* insert into matrix-by row (this is why cannot directly read into array */
1573     vals_ptr = vals;
1574     for (i=0; i<m; i++) {
1575       for (j=0; j<N; j++) {
1576         array[i + j*m] = *vals_ptr++;
1577       }
1578     }
1579   }
1580   ierr = MatDenseRestoreArray(newmat,&array);CHKERRQ(ierr);
1581   ierr = PetscFree(vals);CHKERRQ(ierr);
1582   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1583   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1584   PetscFunctionReturn(0);
1585 }
1586 
1587 #undef __FUNCT__
1588 #define __FUNCT__ "MatLoad_MPIDense"
1589 PetscErrorCode MatLoad_MPIDense(Mat newmat,PetscViewer viewer)
1590 {
1591   Mat_MPIDense   *a;
1592   PetscScalar    *vals,*svals;
1593   MPI_Comm       comm;
1594   MPI_Status     status;
1595   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,n,maxnz;
1596   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1597   PetscInt       *ourlens,*procsnz = 0,jj,*mycols,*smycols;
1598   PetscInt       i,nz,j,rstart,rend;
1599   int            fd;
1600   PetscErrorCode ierr;
1601 
1602   PetscFunctionBegin;
1603   /* force binary viewer to load .info file if it has not yet done so */
1604   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1605   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
1606   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1607   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1608   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1609   if (!rank) {
1610     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
1611     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1612   }
1613   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1614   M    = header[1]; N = header[2]; nz = header[3];
1615 
1616   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
1617   if (newmat->rmap->N < 0) newmat->rmap->N = M;
1618   if (newmat->cmap->N < 0) newmat->cmap->N = N;
1619 
1620   if (newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",M,newmat->rmap->N);
1621   if (newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",N,newmat->cmap->N);
1622 
1623   /*
1624        Handle case where matrix is stored on disk as a dense matrix
1625   */
1626   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1627     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr);
1628     PetscFunctionReturn(0);
1629   }
1630 
1631   /* determine ownership of all rows */
1632   if (newmat->rmap->n < 0) {
1633     ierr = PetscMPIIntCast(M/size + ((M % size) > rank),&m);CHKERRQ(ierr);
1634   } else {
1635     ierr = PetscMPIIntCast(newmat->rmap->n,&m);CHKERRQ(ierr);
1636   }
1637   if (newmat->cmap->n < 0) {
1638     n = PETSC_DECIDE;
1639   } else {
1640     ierr = PetscMPIIntCast(newmat->cmap->n,&n);CHKERRQ(ierr);
1641   }
1642 
1643   ierr       = PetscMalloc1(size+2,&rowners);CHKERRQ(ierr);
1644   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1645   rowners[0] = 0;
1646   for (i=2; i<=size; i++) {
1647     rowners[i] += rowners[i-1];
1648   }
1649   rstart = rowners[rank];
1650   rend   = rowners[rank+1];
1651 
1652   /* distribute row lengths to all processors */
1653   ierr = PetscMalloc1(rend-rstart,&ourlens);CHKERRQ(ierr);
1654   if (!rank) {
1655     ierr = PetscMalloc1(M,&rowlengths);CHKERRQ(ierr);
1656     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1657     ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr);
1658     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1659     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1660     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1661   } else {
1662     ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1663   }
1664 
1665   if (!rank) {
1666     /* calculate the number of nonzeros on each processor */
1667     ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr);
1668     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1669     for (i=0; i<size; i++) {
1670       for (j=rowners[i]; j< rowners[i+1]; j++) {
1671         procsnz[i] += rowlengths[j];
1672       }
1673     }
1674     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1675 
1676     /* determine max buffer needed and allocate it */
1677     maxnz = 0;
1678     for (i=0; i<size; i++) {
1679       maxnz = PetscMax(maxnz,procsnz[i]);
1680     }
1681     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
1682 
1683     /* read in my part of the matrix column indices  */
1684     nz   = procsnz[0];
1685     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
1686     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1687 
1688     /* read in every one elses and ship off */
1689     for (i=1; i<size; i++) {
1690       nz   = procsnz[i];
1691       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1692       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1693     }
1694     ierr = PetscFree(cols);CHKERRQ(ierr);
1695   } else {
1696     /* determine buffer space needed for message */
1697     nz = 0;
1698     for (i=0; i<m; i++) {
1699       nz += ourlens[i];
1700     }
1701     ierr = PetscMalloc1(nz+1,&mycols);CHKERRQ(ierr);
1702 
1703     /* receive message of column indices*/
1704     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1705     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1706     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1707   }
1708 
1709   ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr);
1710   a = (Mat_MPIDense*)newmat->data;
1711   if (!a->A || !((Mat_SeqDense*)(a->A->data))->user_alloc) {
1712     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
1713   }
1714 
1715   if (!rank) {
1716     ierr = PetscMalloc1(maxnz,&vals);CHKERRQ(ierr);
1717 
1718     /* read in my part of the matrix numerical values  */
1719     nz   = procsnz[0];
1720     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1721 
1722     /* insert into matrix */
1723     jj      = rstart;
1724     smycols = mycols;
1725     svals   = vals;
1726     for (i=0; i<m; i++) {
1727       ierr     = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1728       smycols += ourlens[i];
1729       svals   += ourlens[i];
1730       jj++;
1731     }
1732 
1733     /* read in other processors and ship out */
1734     for (i=1; i<size; i++) {
1735       nz   = procsnz[i];
1736       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1737       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
1738     }
1739     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1740   } else {
1741     /* receive numeric values */
1742     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
1743 
1744     /* receive message of values*/
1745     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
1746     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1747     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1748 
1749     /* insert into matrix */
1750     jj      = rstart;
1751     smycols = mycols;
1752     svals   = vals;
1753     for (i=0; i<m; i++) {
1754       ierr     = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1755       smycols += ourlens[i];
1756       svals   += ourlens[i];
1757       jj++;
1758     }
1759   }
1760   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1761   ierr = PetscFree(vals);CHKERRQ(ierr);
1762   ierr = PetscFree(mycols);CHKERRQ(ierr);
1763   ierr = PetscFree(rowners);CHKERRQ(ierr);
1764 
1765   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1766   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1767   PetscFunctionReturn(0);
1768 }
1769 
1770 #undef __FUNCT__
1771 #define __FUNCT__ "MatEqual_MPIDense"
1772 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool  *flag)
1773 {
1774   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
1775   Mat            a,b;
1776   PetscBool      flg;
1777   PetscErrorCode ierr;
1778 
1779   PetscFunctionBegin;
1780   a    = matA->A;
1781   b    = matB->A;
1782   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
1783   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1784   PetscFunctionReturn(0);
1785 }
1786 
1787 #undef __FUNCT__
1788 #define __FUNCT__ "MatDestroy_MatTransMatMult_MPIDense_MPIDense"
1789 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(Mat A)
1790 {
1791   PetscErrorCode        ierr;
1792   Mat_MPIDense          *a = (Mat_MPIDense*)A->data;
1793   Mat_TransMatMultDense *atb = a->atbdense;
1794 
1795   PetscFunctionBegin;
1796   ierr = PetscFree3(atb->sendbuf,atb->atbarray,atb->recvcounts);CHKERRQ(ierr);
1797   ierr = (atb->destroy)(A);CHKERRQ(ierr);
1798   ierr = PetscFree(atb);CHKERRQ(ierr);
1799   PetscFunctionReturn(0);
1800 }
1801 
1802 #undef __FUNCT__
1803 #define __FUNCT__ "MatTransposeMatMultNumeric_MPIDense_MPIDense"
1804 PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1805 {
1806   Mat_MPIDense   *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
1807   Mat_SeqDense   *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data;
1808   Mat_TransMatMultDense *atb = c->atbdense;
1809   PetscErrorCode ierr;
1810   MPI_Comm       comm;
1811   PetscMPIInt    rank,size,*recvcounts=atb->recvcounts;
1812   PetscScalar    *carray,*atbarray=atb->atbarray,*sendbuf=atb->sendbuf;
1813   PetscInt       i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j;
1814   PetscScalar    _DOne=1.0,_DZero=0.0;
1815   PetscBLASInt   am,an,bn,aN;
1816   const PetscInt *ranges;
1817 
1818   PetscFunctionBegin;
1819   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1820   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1821   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1822 
1823   /* compute atbarray = aseq^T * bseq */
1824   ierr = PetscBLASIntCast(a->A->cmap->n,&an);CHKERRQ(ierr);
1825   ierr = PetscBLASIntCast(b->A->cmap->n,&bn);CHKERRQ(ierr);
1826   ierr = PetscBLASIntCast(a->A->rmap->n,&am);CHKERRQ(ierr);
1827   ierr = PetscBLASIntCast(A->cmap->N,&aN);CHKERRQ(ierr);
1828   PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&an,&bn,&am,&_DOne,aseq->v,&aseq->lda,bseq->v,&bseq->lda,&_DZero,atbarray,&aN));
1829 
1830   ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr);
1831   for (i=0; i<size; i++) recvcounts[i] = (ranges[i+1] - ranges[i])*cN;
1832 
1833   /* arrange atbarray into sendbuf */
1834   k = 0;
1835   for (proc=0; proc<size; proc++) {
1836     for (j=0; j<cN; j++) {
1837       for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM];
1838     }
1839   }
1840   /* sum all atbarray to local values of C */
1841   ierr = MatDenseGetArray(c->A,&carray);CHKERRQ(ierr);
1842   ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRQ(ierr);
1843   ierr = MatDenseRestoreArray(c->A,&carray);CHKERRQ(ierr);
1844   PetscFunctionReturn(0);
1845 }
1846 
1847 #undef __FUNCT__
1848 #define __FUNCT__ "MatTransposeMatMultSymbolic_MPIDense_MPIDense"
1849 PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1850 {
1851   PetscErrorCode        ierr;
1852   Mat                   Cdense;
1853   MPI_Comm              comm;
1854   PetscMPIInt           size;
1855   PetscInt              cm=A->cmap->n,cM,cN=B->cmap->N;
1856   Mat_MPIDense          *c;
1857   Mat_TransMatMultDense *atb;
1858 
1859   PetscFunctionBegin;
1860   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1861   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
1862     SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
1863   }
1864 
1865   /* create matrix product Cdense */
1866   ierr = MatCreate(comm,&Cdense);CHKERRQ(ierr);
1867   ierr = MatSetSizes(Cdense,cm,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
1868   ierr = MatSetType(Cdense,MATMPIDENSE);CHKERRQ(ierr);
1869   ierr = MatMPIDenseSetPreallocation(Cdense,NULL);CHKERRQ(ierr);
1870   ierr = MatAssemblyBegin(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1871   ierr = MatAssemblyEnd(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1872   *C   = Cdense;
1873 
1874   /* create data structure for reuse Cdense */
1875   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1876   ierr = PetscNew(&atb);CHKERRQ(ierr);
1877   cM = Cdense->rmap->N;
1878   ierr = PetscMalloc3(cM*cN,&atb->sendbuf,cM*cN,&atb->atbarray,size,&atb->recvcounts);CHKERRQ(ierr);
1879 
1880   c                    = (Mat_MPIDense*)Cdense->data;
1881   c->atbdense          = atb;
1882   atb->destroy         = Cdense->ops->destroy;
1883   Cdense->ops->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
1884   PetscFunctionReturn(0);
1885 }
1886 
1887 #undef __FUNCT__
1888 #define __FUNCT__ "MatTransposeMatMult_MPIDense_MPIDense"
1889 PetscErrorCode MatTransposeMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
1890 {
1891   PetscErrorCode ierr;
1892 
1893   PetscFunctionBegin;
1894   if (scall == MAT_INITIAL_MATRIX) {
1895     ierr = MatTransposeMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr);
1896   }
1897   ierr = MatTransposeMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr);
1898   PetscFunctionReturn(0);
1899 }
1900 
1901 #undef __FUNCT__
1902 #define __FUNCT__ "MatDestroy_MatMatMult_MPIDense_MPIDense"
1903 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(Mat A)
1904 {
1905   PetscErrorCode   ierr;
1906   Mat_MPIDense     *a = (Mat_MPIDense*)A->data;
1907   Mat_MatMultDense *ab = a->abdense;
1908 
1909   PetscFunctionBegin;
1910   ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr);
1911   ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr);
1912   ierr = MatDestroy(&ab->Be);CHKERRQ(ierr);
1913 
1914   ierr = (ab->destroy)(A);CHKERRQ(ierr);
1915   ierr = PetscFree(ab);CHKERRQ(ierr);
1916   PetscFunctionReturn(0);
1917 }
1918 
1919 #if defined(PETSC_HAVE_ELEMENTAL)
1920 #undef __FUNCT__
1921 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIDense"
1922 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1923 {
1924   PetscErrorCode   ierr;
1925   Mat_MPIDense     *c=(Mat_MPIDense*)C->data;
1926   Mat_MatMultDense *ab=c->abdense;
1927 
1928   PetscFunctionBegin;
1929   ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr);
1930   ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr);
1931   ierr = MatMatMultNumeric(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr);
1932   ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
1933   PetscFunctionReturn(0);
1934 }
1935 
1936 #undef __FUNCT__
1937 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIDense"
1938 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1939 {
1940   PetscErrorCode   ierr;
1941   Mat              Ae,Be,Ce;
1942   Mat_MPIDense     *c;
1943   Mat_MatMultDense *ab;
1944 
1945   PetscFunctionBegin;
1946   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
1947     SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
1948   }
1949 
1950   /* convert A and B to Elemental matrices Ae and Be */
1951   ierr = MatConvert(A,MATELEMENTAL,MAT_INITIAL_MATRIX, &Ae);CHKERRQ(ierr);
1952   ierr = MatConvert(B,MATELEMENTAL,MAT_INITIAL_MATRIX, &Be);CHKERRQ(ierr);
1953 
1954   /* Ce = Ae*Be */
1955   ierr = MatMatMultSymbolic(Ae,Be,fill,&Ce);CHKERRQ(ierr);
1956   ierr = MatMatMultNumeric(Ae,Be,Ce);CHKERRQ(ierr);
1957 
1958   /* convert Ce to C */
1959   ierr = MatConvert(Ce,MATMPIDENSE,MAT_INITIAL_MATRIX,C);CHKERRQ(ierr);
1960 
1961   /* create data structure for reuse Cdense */
1962   ierr = PetscNew(&ab);CHKERRQ(ierr);
1963   c                  = (Mat_MPIDense*)(*C)->data;
1964   c->abdense         = ab;
1965 
1966   ab->Ae             = Ae;
1967   ab->Be             = Be;
1968   ab->Ce             = Ce;
1969   ab->destroy        = (*C)->ops->destroy;
1970   (*C)->ops->destroy        = MatDestroy_MatMatMult_MPIDense_MPIDense;
1971   (*C)->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
1972   PetscFunctionReturn(0);
1973 }
1974 
1975 #undef __FUNCT__
1976 #define __FUNCT__ "MatMatMult_MPIDense_MPIDense"
1977 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
1978 {
1979   PetscErrorCode ierr;
1980 
1981   PetscFunctionBegin;
1982   if (scall == MAT_INITIAL_MATRIX) { /* simbolic product includes numeric product */
1983     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
1984     ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr);
1985     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
1986   } else {
1987     ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
1988     ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr);
1989     ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
1990   }
1991   PetscFunctionReturn(0);
1992 }
1993 #endif
1994