xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 70baa948e42e6bb1660071283af10f413c9269b6)
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4 */
5 
6 
7 #include <../src/mat/impls/dense/mpi/mpidense.h>    /*I   "petscmat.h"  I*/
8 #include <../src/mat/impls/aij/mpi/mpiaij.h>
9 #include <petscblaslapack.h>
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatDenseGetLocalMatrix"
13 /*@
14 
15       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
16               matrix that represents the operator. For sequential matrices it returns itself.
17 
18     Input Parameter:
19 .      A - the Seq or MPI dense matrix
20 
21     Output Parameter:
22 .      B - the inner matrix
23 
24     Level: intermediate
25 
26 @*/
27 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
28 {
29   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
30   PetscErrorCode ierr;
31   PetscBool      flg;
32 
33   PetscFunctionBegin;
34   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
35   if (flg) *B = mat->A;
36   else *B = A;
37   PetscFunctionReturn(0);
38 }
39 
40 #undef __FUNCT__
41 #define __FUNCT__ "MatGetRow_MPIDense"
42 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
43 {
44   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
45   PetscErrorCode ierr;
46   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
47 
48   PetscFunctionBegin;
49   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
50   lrow = row - rstart;
51   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
52   PetscFunctionReturn(0);
53 }
54 
55 #undef __FUNCT__
56 #define __FUNCT__ "MatRestoreRow_MPIDense"
57 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
58 {
59   PetscErrorCode ierr;
60 
61   PetscFunctionBegin;
62   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
63   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
64   PetscFunctionReturn(0);
65 }
66 
67 #undef __FUNCT__
68 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense"
69 PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
70 {
71   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
72   PetscErrorCode ierr;
73   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
74   PetscScalar    *array;
75   MPI_Comm       comm;
76   Mat            B;
77 
78   PetscFunctionBegin;
79   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
80 
81   ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr);
82   if (!B) {
83     ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
84     ierr = MatCreate(comm,&B);CHKERRQ(ierr);
85     ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr);
86     ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
87     ierr = MatDenseGetArray(mdn->A,&array);CHKERRQ(ierr);
88     ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr);
89     ierr = MatDenseRestoreArray(mdn->A,&array);CHKERRQ(ierr);
90     ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
91     ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
92     ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr);
93     *a   = B;
94     ierr = MatDestroy(&B);CHKERRQ(ierr);
95   } else *a = B;
96   PetscFunctionReturn(0);
97 }
98 
99 #undef __FUNCT__
100 #define __FUNCT__ "MatSetValues_MPIDense"
101 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
102 {
103   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
104   PetscErrorCode ierr;
105   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
106   PetscBool      roworiented = A->roworiented;
107 
108   PetscFunctionBegin;
109   for (i=0; i<m; i++) {
110     if (idxm[i] < 0) continue;
111     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
112     if (idxm[i] >= rstart && idxm[i] < rend) {
113       row = idxm[i] - rstart;
114       if (roworiented) {
115         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
116       } else {
117         for (j=0; j<n; j++) {
118           if (idxn[j] < 0) continue;
119           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
120           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
121         }
122       }
123     } else if (!A->donotstash) {
124       mat->assembled = PETSC_FALSE;
125       if (roworiented) {
126         ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
127       } else {
128         ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
129       }
130     }
131   }
132   PetscFunctionReturn(0);
133 }
134 
135 #undef __FUNCT__
136 #define __FUNCT__ "MatGetValues_MPIDense"
137 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
138 {
139   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
140   PetscErrorCode ierr;
141   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
142 
143   PetscFunctionBegin;
144   for (i=0; i<m; i++) {
145     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
146     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
147     if (idxm[i] >= rstart && idxm[i] < rend) {
148       row = idxm[i] - rstart;
149       for (j=0; j<n; j++) {
150         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
151         if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
152         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
153       }
154     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
155   }
156   PetscFunctionReturn(0);
157 }
158 
159 #undef __FUNCT__
160 #define __FUNCT__ "MatDenseGetArray_MPIDense"
161 PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[])
162 {
163   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
164   PetscErrorCode ierr;
165 
166   PetscFunctionBegin;
167   ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr);
168   PetscFunctionReturn(0);
169 }
170 
171 #undef __FUNCT__
172 #define __FUNCT__ "MatGetSubMatrix_MPIDense"
173 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
174 {
175   Mat_MPIDense   *mat  = (Mat_MPIDense*)A->data,*newmatd;
176   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
177   PetscErrorCode ierr;
178   PetscInt       i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
179   const PetscInt *irow,*icol;
180   PetscScalar    *av,*bv,*v = lmat->v;
181   Mat            newmat;
182   IS             iscol_local;
183 
184   PetscFunctionBegin;
185   ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
186   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
187   ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr);
188   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
189   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
190   ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */
191 
192   /* No parallel redistribution currently supported! Should really check each index set
193      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
194      original matrix! */
195 
196   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
197   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
198 
199   /* Check submatrix call */
200   if (scall == MAT_REUSE_MATRIX) {
201     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
202     /* Really need to test rows and column sizes! */
203     newmat = *B;
204   } else {
205     /* Create and fill new matrix */
206     ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr);
207     ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
208     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
209     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
210   }
211 
212   /* Now extract the data pointers and do the copy, column at a time */
213   newmatd = (Mat_MPIDense*)newmat->data;
214   bv      = ((Mat_SeqDense*)newmatd->A->data)->v;
215 
216   for (i=0; i<Ncols; i++) {
217     av = v + ((Mat_SeqDense*)mat->A->data)->lda*icol[i];
218     for (j=0; j<nrows; j++) {
219       *bv++ = av[irow[j] - rstart];
220     }
221   }
222 
223   /* Assemble the matrices so that the correct flags are set */
224   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
225   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
226 
227   /* Free work space */
228   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
229   ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr);
230   ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
231   *B   = newmat;
232   PetscFunctionReturn(0);
233 }
234 
235 #undef __FUNCT__
236 #define __FUNCT__ "MatDenseRestoreArray_MPIDense"
237 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[])
238 {
239   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
240   PetscErrorCode ierr;
241 
242   PetscFunctionBegin;
243   ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr);
244   PetscFunctionReturn(0);
245 }
246 
247 #undef __FUNCT__
248 #define __FUNCT__ "MatAssemblyBegin_MPIDense"
249 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
250 {
251   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
252   MPI_Comm       comm;
253   PetscErrorCode ierr;
254   PetscInt       nstash,reallocs;
255   InsertMode     addv;
256 
257   PetscFunctionBegin;
258   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
259   /* make sure all processors are either in INSERTMODE or ADDMODE */
260   ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,comm);CHKERRQ(ierr);
261   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
262   mat->insertmode = addv; /* in case this processor had no cache */
263 
264   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
265   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
266   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
267   PetscFunctionReturn(0);
268 }
269 
270 #undef __FUNCT__
271 #define __FUNCT__ "MatAssemblyEnd_MPIDense"
272 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
273 {
274   Mat_MPIDense   *mdn=(Mat_MPIDense*)mat->data;
275   PetscErrorCode ierr;
276   PetscInt       i,*row,*col,flg,j,rstart,ncols;
277   PetscMPIInt    n;
278   PetscScalar    *val;
279 
280   PetscFunctionBegin;
281   /*  wait on receives */
282   while (1) {
283     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
284     if (!flg) break;
285 
286     for (i=0; i<n;) {
287       /* Now identify the consecutive vals belonging to the same row */
288       for (j=i,rstart=row[j]; j<n; j++) {
289         if (row[j] != rstart) break;
290       }
291       if (j < n) ncols = j-i;
292       else       ncols = n-i;
293       /* Now assemble all these values with a single function call */
294       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
295       i    = j;
296     }
297   }
298   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
299 
300   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
301   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
302 
303   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
304     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
305   }
306   PetscFunctionReturn(0);
307 }
308 
309 #undef __FUNCT__
310 #define __FUNCT__ "MatZeroEntries_MPIDense"
311 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
312 {
313   PetscErrorCode ierr;
314   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
315 
316   PetscFunctionBegin;
317   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
318   PetscFunctionReturn(0);
319 }
320 
321 /* the code does not do the diagonal entries correctly unless the
322    matrix is square and the column and row owerships are identical.
323    This is a BUG. The only way to fix it seems to be to access
324    mdn->A and mdn->B directly and not through the MatZeroRows()
325    routine.
326 */
327 #undef __FUNCT__
328 #define __FUNCT__ "MatZeroRows_MPIDense"
329 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
330 {
331   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
332   PetscErrorCode    ierr;
333   PetscInt          i,*owners = A->rmap->range;
334   PetscInt          *sizes,j,idx,nsends;
335   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
336   PetscInt          *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source;
337   PetscInt          *lens,*lrows,*values;
338   PetscMPIInt       n,imdex,rank = l->rank,size = l->size;
339   MPI_Comm          comm;
340   MPI_Request       *send_waits,*recv_waits;
341   MPI_Status        recv_status,*send_status;
342   PetscBool         found;
343   const PetscScalar *xx;
344   PetscScalar       *bb;
345 
346   PetscFunctionBegin;
347   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
348   if (A->rmap->N != A->cmap->N) SETERRQ(comm,PETSC_ERR_SUP,"Only handles square matrices");
349   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership");
350   /*  first count number of contributors to each processor */
351   ierr = PetscCalloc1(2*size,&sizes);CHKERRQ(ierr);
352   ierr = PetscMalloc1(N+1,&owner);CHKERRQ(ierr);  /* see note*/
353   for (i=0; i<N; i++) {
354     idx   = rows[i];
355     found = PETSC_FALSE;
356     for (j=0; j<size; j++) {
357       if (idx >= owners[j] && idx < owners[j+1]) {
358         sizes[2*j]++; sizes[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
359       }
360     }
361     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
362   }
363   nsends = 0;
364   for (i=0; i<size; i++) nsends += sizes[2*i+1];
365 
366   /* inform other processors of number of messages and max length*/
367   ierr = PetscMaxSum(comm,sizes,&nmax,&nrecvs);CHKERRQ(ierr);
368 
369   /* post receives:   */
370   ierr = PetscMalloc1((nrecvs+1)*(nmax+1),&rvalues);CHKERRQ(ierr);
371   ierr = PetscMalloc1(nrecvs+1,&recv_waits);CHKERRQ(ierr);
372   for (i=0; i<nrecvs; i++) {
373     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
374   }
375 
376   /* do sends:
377       1) starts[i] gives the starting index in svalues for stuff going to
378          the ith processor
379   */
380   ierr = PetscMalloc1(N+1,&svalues);CHKERRQ(ierr);
381   ierr = PetscMalloc1(nsends+1,&send_waits);CHKERRQ(ierr);
382   ierr = PetscMalloc1(size+1,&starts);CHKERRQ(ierr);
383 
384   starts[0] = 0;
385   for (i=1; i<size; i++) starts[i] = starts[i-1] + sizes[2*i-2];
386   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
387 
388   starts[0] = 0;
389   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + sizes[2*i-2];
390   count = 0;
391   for (i=0; i<size; i++) {
392     if (sizes[2*i+1]) {
393       ierr = MPI_Isend(svalues+starts[i],sizes[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
394     }
395   }
396   ierr = PetscFree(starts);CHKERRQ(ierr);
397 
398   base = owners[rank];
399 
400   /*  wait on receives */
401   ierr  = PetscMalloc2(nrecvs,&lens,nrecvs,&source);CHKERRQ(ierr);
402   count = nrecvs;
403   slen  = 0;
404   while (count) {
405     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
406     /* unpack receives into our local space */
407     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
408 
409     source[imdex] = recv_status.MPI_SOURCE;
410     lens[imdex]   = n;
411     slen += n;
412     count--;
413   }
414   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
415 
416   /* move the data into the send scatter */
417   ierr  = PetscMalloc1(slen+1,&lrows);CHKERRQ(ierr);
418   count = 0;
419   for (i=0; i<nrecvs; i++) {
420     values = rvalues + i*nmax;
421     for (j=0; j<lens[i]; j++) {
422       lrows[count++] = values[j] - base;
423     }
424   }
425   ierr = PetscFree(rvalues);CHKERRQ(ierr);
426   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
427   ierr = PetscFree(owner);CHKERRQ(ierr);
428   ierr = PetscFree(sizes);CHKERRQ(ierr);
429 
430   /* fix right hand side if needed */
431   if (x && b) {
432     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
433     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
434     for (i=0; i<slen; i++) {
435       bb[lrows[i]] = diag*xx[lrows[i]];
436     }
437     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
438     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
439   }
440 
441   /* actually zap the local rows */
442   ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
443   if (diag != 0.0) {
444     Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data;
445     PetscInt     m   = ll->lda, i;
446 
447     for (i=0; i<slen; i++) {
448       ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag;
449     }
450   }
451   ierr = PetscFree(lrows);CHKERRQ(ierr);
452 
453   /* wait on sends */
454   if (nsends) {
455     ierr = PetscMalloc1(nsends,&send_status);CHKERRQ(ierr);
456     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
457     ierr = PetscFree(send_status);CHKERRQ(ierr);
458   }
459   ierr = PetscFree(send_waits);CHKERRQ(ierr);
460   ierr = PetscFree(svalues);CHKERRQ(ierr);
461   PetscFunctionReturn(0);
462 }
463 
464 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec);
465 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec);
466 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec);
467 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec);
468 
469 #undef __FUNCT__
470 #define __FUNCT__ "MatMult_MPIDense"
471 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
472 {
473   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
474   PetscErrorCode ierr;
475 
476   PetscFunctionBegin;
477   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
478   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
479   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
480   PetscFunctionReturn(0);
481 }
482 
483 #undef __FUNCT__
484 #define __FUNCT__ "MatMultAdd_MPIDense"
485 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
486 {
487   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
488   PetscErrorCode ierr;
489 
490   PetscFunctionBegin;
491   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
492   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
493   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
494   PetscFunctionReturn(0);
495 }
496 
497 #undef __FUNCT__
498 #define __FUNCT__ "MatMultTranspose_MPIDense"
499 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
500 {
501   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
502   PetscErrorCode ierr;
503   PetscScalar    zero = 0.0;
504 
505   PetscFunctionBegin;
506   ierr = VecSet(yy,zero);CHKERRQ(ierr);
507   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
508   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
509   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
510   PetscFunctionReturn(0);
511 }
512 
513 #undef __FUNCT__
514 #define __FUNCT__ "MatMultTransposeAdd_MPIDense"
515 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
516 {
517   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
518   PetscErrorCode ierr;
519 
520   PetscFunctionBegin;
521   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
522   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
523   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
524   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
525   PetscFunctionReturn(0);
526 }
527 
528 #undef __FUNCT__
529 #define __FUNCT__ "MatGetDiagonal_MPIDense"
530 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
531 {
532   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
533   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
534   PetscErrorCode ierr;
535   PetscInt       len,i,n,m = A->rmap->n,radd;
536   PetscScalar    *x,zero = 0.0;
537 
538   PetscFunctionBegin;
539   ierr = VecSet(v,zero);CHKERRQ(ierr);
540   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
541   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
542   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
543   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
544   radd = A->rmap->rstart*m;
545   for (i=0; i<len; i++) {
546     x[i] = aloc->v[radd + i*m + i];
547   }
548   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
549   PetscFunctionReturn(0);
550 }
551 
552 #undef __FUNCT__
553 #define __FUNCT__ "MatDestroy_MPIDense"
554 PetscErrorCode MatDestroy_MPIDense(Mat mat)
555 {
556   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
557   PetscErrorCode ierr;
558 
559   PetscFunctionBegin;
560 #if defined(PETSC_USE_LOG)
561   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
562 #endif
563   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
564   ierr = MatDestroy(&mdn->A);CHKERRQ(ierr);
565   ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr);
566   ierr = VecScatterDestroy(&mdn->Mvctx);CHKERRQ(ierr);
567 
568   ierr = PetscFree(mat->data);CHKERRQ(ierr);
569   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
570 
571   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr);
572   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr);
573 #if defined(PETSC_HAVE_ELEMENTAL)
574   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr);
575 #endif
576   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr);
577   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
578   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
579   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
580   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
581   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
582   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
583   PetscFunctionReturn(0);
584 }
585 
586 #undef __FUNCT__
587 #define __FUNCT__ "MatView_MPIDense_Binary"
588 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
589 {
590   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
591   PetscErrorCode    ierr;
592   PetscViewerFormat format;
593   int               fd;
594   PetscInt          header[4],mmax,N = mat->cmap->N,i,j,m,k;
595   PetscMPIInt       rank,tag  = ((PetscObject)viewer)->tag,size;
596   PetscScalar       *work,*v,*vv;
597   Mat_SeqDense      *a = (Mat_SeqDense*)mdn->A->data;
598 
599   PetscFunctionBegin;
600   if (mdn->size == 1) {
601     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
602   } else {
603     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
604     ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
605     ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
606 
607     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
608     if (format == PETSC_VIEWER_NATIVE) {
609 
610       if (!rank) {
611         /* store the matrix as a dense matrix */
612         header[0] = MAT_FILE_CLASSID;
613         header[1] = mat->rmap->N;
614         header[2] = N;
615         header[3] = MATRIX_BINARY_FORMAT_DENSE;
616         ierr      = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
617 
618         /* get largest work array needed for transposing array */
619         mmax = mat->rmap->n;
620         for (i=1; i<size; i++) {
621           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
622         }
623         ierr = PetscMalloc1(mmax*N,&work);CHKERRQ(ierr);
624 
625         /* write out local array, by rows */
626         m = mat->rmap->n;
627         v = a->v;
628         for (j=0; j<N; j++) {
629           for (i=0; i<m; i++) {
630             work[j + i*N] = *v++;
631           }
632         }
633         ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
634         /* get largest work array to receive messages from other processes, excludes process zero */
635         mmax = 0;
636         for (i=1; i<size; i++) {
637           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
638         }
639         ierr = PetscMalloc1(mmax*N,&vv);CHKERRQ(ierr);
640         for (k = 1; k < size; k++) {
641           v    = vv;
642           m    = mat->rmap->range[k+1] - mat->rmap->range[k];
643           ierr = MPIULong_Recv(v,m*N,MPIU_SCALAR,k,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
644 
645           for (j = 0; j < N; j++) {
646             for (i = 0; i < m; i++) {
647               work[j + i*N] = *v++;
648             }
649           }
650           ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
651         }
652         ierr = PetscFree(work);CHKERRQ(ierr);
653         ierr = PetscFree(vv);CHKERRQ(ierr);
654       } else {
655         ierr = MPIULong_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
656       }
657     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerPushFormat(viewer,PETSC_VIEWER_NATIVE)");
658   }
659   PetscFunctionReturn(0);
660 }
661 
662 extern PetscErrorCode MatView_SeqDense(Mat,PetscViewer);
663 #include <petscdraw.h>
664 #undef __FUNCT__
665 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket"
666 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
667 {
668   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
669   PetscErrorCode    ierr;
670   PetscMPIInt       rank = mdn->rank;
671   PetscViewerType   vtype;
672   PetscBool         iascii,isdraw;
673   PetscViewer       sviewer;
674   PetscViewerFormat format;
675 
676   PetscFunctionBegin;
677   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
678   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
679   if (iascii) {
680     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
681     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
682     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
683       MatInfo info;
684       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
685       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
686       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
687       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
688       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
689       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
690       PetscFunctionReturn(0);
691     } else if (format == PETSC_VIEWER_ASCII_INFO) {
692       PetscFunctionReturn(0);
693     }
694   } else if (isdraw) {
695     PetscDraw draw;
696     PetscBool isnull;
697 
698     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
699     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
700     if (isnull) PetscFunctionReturn(0);
701   }
702 
703   {
704     /* assemble the entire matrix onto first processor. */
705     Mat         A;
706     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
707     PetscInt    *cols;
708     PetscScalar *vals;
709 
710     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
711     if (!rank) {
712       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
713     } else {
714       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
715     }
716     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
717     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
718     ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr);
719     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
720 
721     /* Copy the matrix ... This isn't the most efficient means,
722        but it's quick for now */
723     A->insertmode = INSERT_VALUES;
724 
725     row = mat->rmap->rstart;
726     m   = mdn->A->rmap->n;
727     for (i=0; i<m; i++) {
728       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
729       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
730       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
731       row++;
732     }
733 
734     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
735     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
736     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
737     if (!rank) {
738       ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
739       ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
740     }
741     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
742     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
743     ierr = MatDestroy(&A);CHKERRQ(ierr);
744   }
745   PetscFunctionReturn(0);
746 }
747 
748 #undef __FUNCT__
749 #define __FUNCT__ "MatView_MPIDense"
750 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
751 {
752   PetscErrorCode ierr;
753   PetscBool      iascii,isbinary,isdraw,issocket;
754 
755   PetscFunctionBegin;
756   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
757   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
758   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
759   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
760 
761   if (iascii || issocket || isdraw) {
762     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
763   } else if (isbinary) {
764     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
765   }
766   PetscFunctionReturn(0);
767 }
768 
769 #undef __FUNCT__
770 #define __FUNCT__ "MatGetInfo_MPIDense"
771 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
772 {
773   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
774   Mat            mdn  = mat->A;
775   PetscErrorCode ierr;
776   PetscReal      isend[5],irecv[5];
777 
778   PetscFunctionBegin;
779   info->block_size = 1.0;
780 
781   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
782 
783   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
784   isend[3] = info->memory;  isend[4] = info->mallocs;
785   if (flag == MAT_LOCAL) {
786     info->nz_used      = isend[0];
787     info->nz_allocated = isend[1];
788     info->nz_unneeded  = isend[2];
789     info->memory       = isend[3];
790     info->mallocs      = isend[4];
791   } else if (flag == MAT_GLOBAL_MAX) {
792     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
793 
794     info->nz_used      = irecv[0];
795     info->nz_allocated = irecv[1];
796     info->nz_unneeded  = irecv[2];
797     info->memory       = irecv[3];
798     info->mallocs      = irecv[4];
799   } else if (flag == MAT_GLOBAL_SUM) {
800     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
801 
802     info->nz_used      = irecv[0];
803     info->nz_allocated = irecv[1];
804     info->nz_unneeded  = irecv[2];
805     info->memory       = irecv[3];
806     info->mallocs      = irecv[4];
807   }
808   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
809   info->fill_ratio_needed = 0;
810   info->factor_mallocs    = 0;
811   PetscFunctionReturn(0);
812 }
813 
814 #undef __FUNCT__
815 #define __FUNCT__ "MatSetOption_MPIDense"
816 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
817 {
818   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
819   PetscErrorCode ierr;
820 
821   PetscFunctionBegin;
822   switch (op) {
823   case MAT_NEW_NONZERO_LOCATIONS:
824   case MAT_NEW_NONZERO_LOCATION_ERR:
825   case MAT_NEW_NONZERO_ALLOCATION_ERR:
826     MatCheckPreallocated(A,1);
827     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
828     break;
829   case MAT_ROW_ORIENTED:
830     MatCheckPreallocated(A,1);
831     a->roworiented = flg;
832     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
833     break;
834   case MAT_NEW_DIAGONALS:
835   case MAT_KEEP_NONZERO_PATTERN:
836   case MAT_USE_HASH_TABLE:
837     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
838     break;
839   case MAT_IGNORE_OFF_PROC_ENTRIES:
840     a->donotstash = flg;
841     break;
842   case MAT_SYMMETRIC:
843   case MAT_STRUCTURALLY_SYMMETRIC:
844   case MAT_HERMITIAN:
845   case MAT_SYMMETRY_ETERNAL:
846   case MAT_IGNORE_LOWER_TRIANGULAR:
847     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
848     break;
849   default:
850     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
851   }
852   PetscFunctionReturn(0);
853 }
854 
855 
856 #undef __FUNCT__
857 #define __FUNCT__ "MatDiagonalScale_MPIDense"
858 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
859 {
860   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
861   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
862   PetscScalar    *l,*r,x,*v;
863   PetscErrorCode ierr;
864   PetscInt       i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n;
865 
866   PetscFunctionBegin;
867   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
868   if (ll) {
869     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
870     if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
871     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
872     for (i=0; i<m; i++) {
873       x = l[i];
874       v = mat->v + i;
875       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
876     }
877     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
878     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
879   }
880   if (rr) {
881     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
882     if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
883     ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
884     ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
885     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
886     for (i=0; i<n; i++) {
887       x = r[i];
888       v = mat->v + i*m;
889       for (j=0; j<m; j++) (*v++) *= x;
890     }
891     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
892     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
893   }
894   PetscFunctionReturn(0);
895 }
896 
897 #undef __FUNCT__
898 #define __FUNCT__ "MatNorm_MPIDense"
899 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
900 {
901   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
902   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
903   PetscErrorCode ierr;
904   PetscInt       i,j;
905   PetscReal      sum = 0.0;
906   PetscScalar    *v  = mat->v;
907 
908   PetscFunctionBegin;
909   if (mdn->size == 1) {
910     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
911   } else {
912     if (type == NORM_FROBENIUS) {
913       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
914         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
915       }
916       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
917       *nrm = PetscSqrtReal(*nrm);
918       ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr);
919     } else if (type == NORM_1) {
920       PetscReal *tmp,*tmp2;
921       ierr = PetscMalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr);
922       ierr = PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
923       ierr = PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
924       *nrm = 0.0;
925       v    = mat->v;
926       for (j=0; j<mdn->A->cmap->n; j++) {
927         for (i=0; i<mdn->A->rmap->n; i++) {
928           tmp[j] += PetscAbsScalar(*v);  v++;
929         }
930       }
931       ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
932       for (j=0; j<A->cmap->N; j++) {
933         if (tmp2[j] > *nrm) *nrm = tmp2[j];
934       }
935       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
936       ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr);
937     } else if (type == NORM_INFINITY) { /* max row norm */
938       PetscReal ntemp;
939       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
940       ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
941     } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
942   }
943   PetscFunctionReturn(0);
944 }
945 
946 #undef __FUNCT__
947 #define __FUNCT__ "MatTranspose_MPIDense"
948 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
949 {
950   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
951   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
952   Mat            B;
953   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
954   PetscErrorCode ierr;
955   PetscInt       j,i;
956   PetscScalar    *v;
957 
958   PetscFunctionBegin;
959   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports square matrix only in-place");
960   if (reuse == MAT_INITIAL_MATRIX || A == *matout) {
961     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
962     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
963     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
964     ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr);
965   } else {
966     B = *matout;
967   }
968 
969   m    = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v;
970   ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr);
971   for (i=0; i<m; i++) rwork[i] = rstart + i;
972   for (j=0; j<n; j++) {
973     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
974     v   += m;
975   }
976   ierr = PetscFree(rwork);CHKERRQ(ierr);
977   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
978   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
979   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
980     *matout = B;
981   } else {
982     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
983   }
984   PetscFunctionReturn(0);
985 }
986 
987 
988 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
989 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
990 
991 #undef __FUNCT__
992 #define __FUNCT__ "MatSetUp_MPIDense"
993 PetscErrorCode MatSetUp_MPIDense(Mat A)
994 {
995   PetscErrorCode ierr;
996 
997   PetscFunctionBegin;
998   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
999   PetscFunctionReturn(0);
1000 }
1001 
1002 #undef __FUNCT__
1003 #define __FUNCT__ "MatAXPY_MPIDense"
1004 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
1005 {
1006   PetscErrorCode ierr;
1007   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
1008 
1009   PetscFunctionBegin;
1010   ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr);
1011   ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1012   PetscFunctionReturn(0);
1013 }
1014 
1015 #undef __FUNCT__
1016 #define __FUNCT__ "MatConjugate_MPIDense"
1017 PetscErrorCode  MatConjugate_MPIDense(Mat mat)
1018 {
1019   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
1020   PetscErrorCode ierr;
1021 
1022   PetscFunctionBegin;
1023   ierr = MatConjugate(a->A);CHKERRQ(ierr);
1024   PetscFunctionReturn(0);
1025 }
1026 
1027 #undef __FUNCT__
1028 #define __FUNCT__ "MatRealPart_MPIDense"
1029 PetscErrorCode MatRealPart_MPIDense(Mat A)
1030 {
1031   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1032   PetscErrorCode ierr;
1033 
1034   PetscFunctionBegin;
1035   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1036   PetscFunctionReturn(0);
1037 }
1038 
1039 #undef __FUNCT__
1040 #define __FUNCT__ "MatImaginaryPart_MPIDense"
1041 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1042 {
1043   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1044   PetscErrorCode ierr;
1045 
1046   PetscFunctionBegin;
1047   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1048   PetscFunctionReturn(0);
1049 }
1050 
1051 extern PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*);
1052 #undef __FUNCT__
1053 #define __FUNCT__ "MatGetColumnNorms_MPIDense"
1054 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms)
1055 {
1056   PetscErrorCode ierr;
1057   PetscInt       i,n;
1058   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
1059   PetscReal      *work;
1060 
1061   PetscFunctionBegin;
1062   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
1063   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
1064   ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr);
1065   if (type == NORM_2) {
1066     for (i=0; i<n; i++) work[i] *= work[i];
1067   }
1068   if (type == NORM_INFINITY) {
1069     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
1070   } else {
1071     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
1072   }
1073   ierr = PetscFree(work);CHKERRQ(ierr);
1074   if (type == NORM_2) {
1075     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
1076   }
1077   PetscFunctionReturn(0);
1078 }
1079 
1080 #undef __FUNCT__
1081 #define __FUNCT__ "MatSetRandom_MPIDense"
1082 static PetscErrorCode  MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1083 {
1084   Mat_MPIDense   *d = (Mat_MPIDense*)x->data;
1085   PetscErrorCode ierr;
1086   PetscScalar    *a;
1087   PetscInt       m,n,i;
1088 
1089   PetscFunctionBegin;
1090   ierr = MatGetSize(d->A,&m,&n);CHKERRQ(ierr);
1091   ierr = MatDenseGetArray(d->A,&a);CHKERRQ(ierr);
1092   for (i=0; i<m*n; i++) {
1093     ierr = PetscRandomGetValue(rctx,a+i);CHKERRQ(ierr);
1094   }
1095   ierr = MatDenseRestoreArray(d->A,&a);CHKERRQ(ierr);
1096   PetscFunctionReturn(0);
1097 }
1098 
1099 extern PetscErrorCode MatMatMultNumeric_MPIDense(Mat A,Mat,Mat);
1100 
1101 #undef __FUNCT__
1102 #define __FUNCT__ "MatMissingDiagonal_MPIDense"
1103 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool  *missing,PetscInt *d)
1104 {
1105   PetscFunctionBegin;
1106   *missing = PETSC_FALSE;
1107   PetscFunctionReturn(0);
1108 }
1109 
1110 /* -------------------------------------------------------------------*/
1111 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1112                                         MatGetRow_MPIDense,
1113                                         MatRestoreRow_MPIDense,
1114                                         MatMult_MPIDense,
1115                                 /*  4*/ MatMultAdd_MPIDense,
1116                                         MatMultTranspose_MPIDense,
1117                                         MatMultTransposeAdd_MPIDense,
1118                                         0,
1119                                         0,
1120                                         0,
1121                                 /* 10*/ 0,
1122                                         0,
1123                                         0,
1124                                         0,
1125                                         MatTranspose_MPIDense,
1126                                 /* 15*/ MatGetInfo_MPIDense,
1127                                         MatEqual_MPIDense,
1128                                         MatGetDiagonal_MPIDense,
1129                                         MatDiagonalScale_MPIDense,
1130                                         MatNorm_MPIDense,
1131                                 /* 20*/ MatAssemblyBegin_MPIDense,
1132                                         MatAssemblyEnd_MPIDense,
1133                                         MatSetOption_MPIDense,
1134                                         MatZeroEntries_MPIDense,
1135                                 /* 24*/ MatZeroRows_MPIDense,
1136                                         0,
1137                                         0,
1138                                         0,
1139                                         0,
1140                                 /* 29*/ MatSetUp_MPIDense,
1141                                         0,
1142                                         0,
1143                                         MatGetDiagonalBlock_MPIDense,
1144                                         0,
1145                                 /* 34*/ MatDuplicate_MPIDense,
1146                                         0,
1147                                         0,
1148                                         0,
1149                                         0,
1150                                 /* 39*/ MatAXPY_MPIDense,
1151                                         MatGetSubMatrices_MPIDense,
1152                                         0,
1153                                         MatGetValues_MPIDense,
1154                                         0,
1155                                 /* 44*/ 0,
1156                                         MatScale_MPIDense,
1157                                         MatShift_Basic,
1158                                         0,
1159                                         0,
1160                                 /* 49*/ MatSetRandom_MPIDense,
1161                                         0,
1162                                         0,
1163                                         0,
1164                                         0,
1165                                 /* 54*/ 0,
1166                                         0,
1167                                         0,
1168                                         0,
1169                                         0,
1170                                 /* 59*/ MatGetSubMatrix_MPIDense,
1171                                         MatDestroy_MPIDense,
1172                                         MatView_MPIDense,
1173                                         0,
1174                                         0,
1175                                 /* 64*/ 0,
1176                                         0,
1177                                         0,
1178                                         0,
1179                                         0,
1180                                 /* 69*/ 0,
1181                                         0,
1182                                         0,
1183                                         0,
1184                                         0,
1185                                 /* 74*/ 0,
1186                                         0,
1187                                         0,
1188                                         0,
1189                                         0,
1190                                 /* 79*/ 0,
1191                                         0,
1192                                         0,
1193                                         0,
1194                                 /* 83*/ MatLoad_MPIDense,
1195                                         0,
1196                                         0,
1197                                         0,
1198                                         0,
1199                                         0,
1200 #if defined(PETSC_HAVE_ELEMENTAL)
1201                                 /* 89*/ MatMatMult_MPIDense_MPIDense,
1202                                         MatMatMultSymbolic_MPIDense_MPIDense,
1203 #else
1204                                 /* 89*/ 0,
1205                                         0,
1206 #endif
1207                                         MatMatMultNumeric_MPIDense,
1208                                         0,
1209                                         0,
1210                                 /* 94*/ 0,
1211                                         0,
1212                                         0,
1213                                         0,
1214                                         0,
1215                                 /* 99*/ 0,
1216                                         0,
1217                                         0,
1218                                         MatConjugate_MPIDense,
1219                                         0,
1220                                 /*104*/ 0,
1221                                         MatRealPart_MPIDense,
1222                                         MatImaginaryPart_MPIDense,
1223                                         0,
1224                                         0,
1225                                 /*109*/ 0,
1226                                         0,
1227                                         0,
1228                                         0,
1229                                         MatMissingDiagonal_MPIDense,
1230                                 /*114*/ 0,
1231                                         0,
1232                                         0,
1233                                         0,
1234                                         0,
1235                                 /*119*/ 0,
1236                                         0,
1237                                         0,
1238                                         0,
1239                                         0,
1240                                 /*124*/ 0,
1241                                         MatGetColumnNorms_MPIDense,
1242                                         0,
1243                                         0,
1244                                         0,
1245                                 /*129*/ 0,
1246                                         MatTransposeMatMult_MPIDense_MPIDense,
1247                                         MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1248                                         MatTransposeMatMultNumeric_MPIDense_MPIDense,
1249                                         0,
1250                                 /*134*/ 0,
1251                                         0,
1252                                         0,
1253                                         0,
1254                                         0,
1255                                 /*139*/ 0,
1256                                         0,
1257                                         0
1258 };
1259 
1260 #undef __FUNCT__
1261 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense"
1262 PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1263 {
1264   Mat_MPIDense   *a;
1265   PetscErrorCode ierr;
1266 
1267   PetscFunctionBegin;
1268   mat->preallocated = PETSC_TRUE;
1269   /* Note:  For now, when data is specified above, this assumes the user correctly
1270    allocates the local dense storage space.  We should add error checking. */
1271 
1272   a       = (Mat_MPIDense*)mat->data;
1273   ierr    = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
1274   ierr    = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
1275   a->nvec = mat->cmap->n;
1276 
1277   ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1278   ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr);
1279   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
1280   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1281   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1282   PetscFunctionReturn(0);
1283 }
1284 
1285 #if defined(PETSC_HAVE_ELEMENTAL)
1286 #undef __FUNCT__
1287 #define __FUNCT__ "MatConvert_MPIDense_Elemental"
1288 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1289 {
1290   Mat            mat_elemental;
1291   PetscErrorCode ierr;
1292   PetscScalar    *v;
1293   PetscInt       m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols;
1294 
1295   PetscFunctionBegin;
1296   if (reuse == MAT_REUSE_MATRIX) {
1297     mat_elemental = *newmat;
1298     ierr = MatZeroEntries(*newmat);CHKERRQ(ierr);
1299   } else {
1300     ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr);
1301     ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1302     ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr);
1303     ierr = MatSetUp(mat_elemental);CHKERRQ(ierr);
1304     ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
1305   }
1306 
1307   ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr);
1308   for (i=0; i<N; i++) cols[i] = i;
1309   for (i=0; i<m; i++) rows[i] = rstart + i;
1310 
1311   /* PETSc-Elemental interaface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1312   ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr);
1313   ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr);
1314   ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1315   ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1316   ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr);
1317   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1318 
1319   if (reuse == MAT_INPLACE_MATRIX) {
1320     ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr);
1321   } else {
1322     *newmat = mat_elemental;
1323   }
1324   PetscFunctionReturn(0);
1325 }
1326 #endif
1327 
1328 #undef __FUNCT__
1329 #define __FUNCT__ "MatCreate_MPIDense"
1330 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1331 {
1332   Mat_MPIDense   *a;
1333   PetscErrorCode ierr;
1334 
1335   PetscFunctionBegin;
1336   ierr      = PetscNewLog(mat,&a);CHKERRQ(ierr);
1337   mat->data = (void*)a;
1338   ierr      = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1339 
1340   mat->insertmode = NOT_SET_VALUES;
1341   ierr            = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);CHKERRQ(ierr);
1342   ierr            = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);CHKERRQ(ierr);
1343 
1344   /* build cache for off array entries formed */
1345   a->donotstash = PETSC_FALSE;
1346 
1347   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr);
1348 
1349   /* stuff used for matrix vector multiply */
1350   a->lvec        = 0;
1351   a->Mvctx       = 0;
1352   a->roworiented = PETSC_TRUE;
1353 
1354   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr);
1355   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr);
1356 #if defined(PETSC_HAVE_ELEMENTAL)
1357   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr);
1358 #endif
1359   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1360   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1361   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1362   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1363 
1364   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",MatTransposeMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1365   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",MatTransposeMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1366   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",MatTransposeMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1367   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1368   PetscFunctionReturn(0);
1369 }
1370 
1371 /*MC
1372    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1373 
1374    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1375    and MATMPIDENSE otherwise.
1376 
1377    Options Database Keys:
1378 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1379 
1380   Level: beginner
1381 
1382 
1383 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1384 M*/
1385 
1386 #undef __FUNCT__
1387 #define __FUNCT__ "MatMPIDenseSetPreallocation"
1388 /*@C
1389    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1390 
1391    Not collective
1392 
1393    Input Parameters:
1394 .  B - the matrix
1395 -  data - optional location of matrix data.  Set data=NULL for PETSc
1396    to control all matrix memory allocation.
1397 
1398    Notes:
1399    The dense format is fully compatible with standard Fortran 77
1400    storage by columns.
1401 
1402    The data input variable is intended primarily for Fortran programmers
1403    who wish to allocate their own matrix memory space.  Most users should
1404    set data=NULL.
1405 
1406    Level: intermediate
1407 
1408 .keywords: matrix,dense, parallel
1409 
1410 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1411 @*/
1412 PetscErrorCode  MatMPIDenseSetPreallocation(Mat B,PetscScalar *data)
1413 {
1414   PetscErrorCode ierr;
1415 
1416   PetscFunctionBegin;
1417   ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr);
1418   PetscFunctionReturn(0);
1419 }
1420 
1421 #undef __FUNCT__
1422 #define __FUNCT__ "MatCreateDense"
1423 /*@C
1424    MatCreateDense - Creates a parallel matrix in dense format.
1425 
1426    Collective on MPI_Comm
1427 
1428    Input Parameters:
1429 +  comm - MPI communicator
1430 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1431 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1432 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1433 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1434 -  data - optional location of matrix data.  Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1435    to control all matrix memory allocation.
1436 
1437    Output Parameter:
1438 .  A - the matrix
1439 
1440    Notes:
1441    The dense format is fully compatible with standard Fortran 77
1442    storage by columns.
1443 
1444    The data input variable is intended primarily for Fortran programmers
1445    who wish to allocate their own matrix memory space.  Most users should
1446    set data=NULL (PETSC_NULL_SCALAR for Fortran users).
1447 
1448    The user MUST specify either the local or global matrix dimensions
1449    (possibly both).
1450 
1451    Level: intermediate
1452 
1453 .keywords: matrix,dense, parallel
1454 
1455 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1456 @*/
1457 PetscErrorCode  MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1458 {
1459   PetscErrorCode ierr;
1460   PetscMPIInt    size;
1461 
1462   PetscFunctionBegin;
1463   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1464   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1465   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1466   if (size > 1) {
1467     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1468     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1469     if (data) {  /* user provided data array, so no need to assemble */
1470       ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr);
1471       (*A)->assembled = PETSC_TRUE;
1472     }
1473   } else {
1474     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1475     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1476   }
1477   PetscFunctionReturn(0);
1478 }
1479 
1480 #undef __FUNCT__
1481 #define __FUNCT__ "MatDuplicate_MPIDense"
1482 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1483 {
1484   Mat            mat;
1485   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
1486   PetscErrorCode ierr;
1487 
1488   PetscFunctionBegin;
1489   *newmat = 0;
1490   ierr    = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr);
1491   ierr    = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1492   ierr    = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1493   a       = (Mat_MPIDense*)mat->data;
1494   ierr    = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1495 
1496   mat->factortype   = A->factortype;
1497   mat->assembled    = PETSC_TRUE;
1498   mat->preallocated = PETSC_TRUE;
1499 
1500   a->size         = oldmat->size;
1501   a->rank         = oldmat->rank;
1502   mat->insertmode = NOT_SET_VALUES;
1503   a->nvec         = oldmat->nvec;
1504   a->donotstash   = oldmat->donotstash;
1505 
1506   ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr);
1507   ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr);
1508 
1509   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1510   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1511   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1512 
1513   *newmat = mat;
1514   PetscFunctionReturn(0);
1515 }
1516 
1517 #undef __FUNCT__
1518 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile"
1519 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat)
1520 {
1521   PetscErrorCode ierr;
1522   PetscMPIInt    rank,size;
1523   const PetscInt *rowners;
1524   PetscInt       i,m,n,nz,j,mMax;
1525   PetscScalar    *array,*vals,*vals_ptr;
1526   Mat_MPIDense   *a = (Mat_MPIDense*)newmat->data;
1527 
1528   PetscFunctionBegin;
1529   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1530   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1531 
1532   /* determine ownership of rows and columns */
1533   m = (newmat->rmap->n < 0) ? PETSC_DECIDE : newmat->rmap->n;
1534   n = (newmat->cmap->n < 0) ? PETSC_DECIDE : newmat->cmap->n;
1535 
1536   ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr);
1537   if (!a->A || !((Mat_SeqDense*)(a->A->data))->user_alloc) {
1538     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
1539   }
1540   ierr = MatDenseGetArray(newmat,&array);CHKERRQ(ierr);
1541   ierr = MatGetLocalSize(newmat,&m,NULL);CHKERRQ(ierr);
1542   ierr = MatGetOwnershipRanges(newmat,&rowners);CHKERRQ(ierr);
1543   ierr = MPI_Reduce(&m,&mMax,1,MPIU_INT,MPI_MAX,0,comm);CHKERRQ(ierr);
1544   if (!rank) {
1545     ierr = PetscMalloc1(mMax*N,&vals);CHKERRQ(ierr);
1546 
1547     /* read in my part of the matrix numerical values  */
1548     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1549 
1550     /* insert into matrix-by row (this is why cannot directly read into array */
1551     vals_ptr = vals;
1552     for (i=0; i<m; i++) {
1553       for (j=0; j<N; j++) {
1554         array[i + j*m] = *vals_ptr++;
1555       }
1556     }
1557 
1558     /* read in other processors and ship out */
1559     for (i=1; i<size; i++) {
1560       nz   = (rowners[i+1] - rowners[i])*N;
1561       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1562       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr);
1563     }
1564   } else {
1565     /* receive numeric values */
1566     ierr = PetscMalloc1(m*N,&vals);CHKERRQ(ierr);
1567 
1568     /* receive message of values*/
1569     ierr = MPIULong_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr);
1570 
1571     /* insert into matrix-by row (this is why cannot directly read into array */
1572     vals_ptr = vals;
1573     for (i=0; i<m; i++) {
1574       for (j=0; j<N; j++) {
1575         array[i + j*m] = *vals_ptr++;
1576       }
1577     }
1578   }
1579   ierr = MatDenseRestoreArray(newmat,&array);CHKERRQ(ierr);
1580   ierr = PetscFree(vals);CHKERRQ(ierr);
1581   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1582   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1583   PetscFunctionReturn(0);
1584 }
1585 
1586 #undef __FUNCT__
1587 #define __FUNCT__ "MatLoad_MPIDense"
1588 PetscErrorCode MatLoad_MPIDense(Mat newmat,PetscViewer viewer)
1589 {
1590   Mat_MPIDense   *a;
1591   PetscScalar    *vals,*svals;
1592   MPI_Comm       comm;
1593   MPI_Status     status;
1594   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,n,maxnz;
1595   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1596   PetscInt       *ourlens,*procsnz = 0,jj,*mycols,*smycols;
1597   PetscInt       i,nz,j,rstart,rend;
1598   int            fd;
1599   PetscErrorCode ierr;
1600 
1601   PetscFunctionBegin;
1602   /* force binary viewer to load .info file if it has not yet done so */
1603   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1604   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
1605   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1606   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1607   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1608   if (!rank) {
1609     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
1610     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1611   }
1612   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1613   M    = header[1]; N = header[2]; nz = header[3];
1614 
1615   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
1616   if (newmat->rmap->N < 0) newmat->rmap->N = M;
1617   if (newmat->cmap->N < 0) newmat->cmap->N = N;
1618 
1619   if (newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",M,newmat->rmap->N);
1620   if (newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",N,newmat->cmap->N);
1621 
1622   /*
1623        Handle case where matrix is stored on disk as a dense matrix
1624   */
1625   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1626     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr);
1627     PetscFunctionReturn(0);
1628   }
1629 
1630   /* determine ownership of all rows */
1631   if (newmat->rmap->n < 0) {
1632     ierr = PetscMPIIntCast(M/size + ((M % size) > rank),&m);CHKERRQ(ierr);
1633   } else {
1634     ierr = PetscMPIIntCast(newmat->rmap->n,&m);CHKERRQ(ierr);
1635   }
1636   if (newmat->cmap->n < 0) {
1637     n = PETSC_DECIDE;
1638   } else {
1639     ierr = PetscMPIIntCast(newmat->cmap->n,&n);CHKERRQ(ierr);
1640   }
1641 
1642   ierr       = PetscMalloc1(size+2,&rowners);CHKERRQ(ierr);
1643   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1644   rowners[0] = 0;
1645   for (i=2; i<=size; i++) {
1646     rowners[i] += rowners[i-1];
1647   }
1648   rstart = rowners[rank];
1649   rend   = rowners[rank+1];
1650 
1651   /* distribute row lengths to all processors */
1652   ierr = PetscMalloc1(rend-rstart,&ourlens);CHKERRQ(ierr);
1653   if (!rank) {
1654     ierr = PetscMalloc1(M,&rowlengths);CHKERRQ(ierr);
1655     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1656     ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr);
1657     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1658     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1659     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1660   } else {
1661     ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1662   }
1663 
1664   if (!rank) {
1665     /* calculate the number of nonzeros on each processor */
1666     ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr);
1667     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1668     for (i=0; i<size; i++) {
1669       for (j=rowners[i]; j< rowners[i+1]; j++) {
1670         procsnz[i] += rowlengths[j];
1671       }
1672     }
1673     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1674 
1675     /* determine max buffer needed and allocate it */
1676     maxnz = 0;
1677     for (i=0; i<size; i++) {
1678       maxnz = PetscMax(maxnz,procsnz[i]);
1679     }
1680     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
1681 
1682     /* read in my part of the matrix column indices  */
1683     nz   = procsnz[0];
1684     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
1685     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1686 
1687     /* read in every one elses and ship off */
1688     for (i=1; i<size; i++) {
1689       nz   = procsnz[i];
1690       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1691       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1692     }
1693     ierr = PetscFree(cols);CHKERRQ(ierr);
1694   } else {
1695     /* determine buffer space needed for message */
1696     nz = 0;
1697     for (i=0; i<m; i++) {
1698       nz += ourlens[i];
1699     }
1700     ierr = PetscMalloc1(nz+1,&mycols);CHKERRQ(ierr);
1701 
1702     /* receive message of column indices*/
1703     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1704     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1705     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1706   }
1707 
1708   ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr);
1709   a = (Mat_MPIDense*)newmat->data;
1710   if (!a->A || !((Mat_SeqDense*)(a->A->data))->user_alloc) {
1711     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
1712   }
1713 
1714   if (!rank) {
1715     ierr = PetscMalloc1(maxnz,&vals);CHKERRQ(ierr);
1716 
1717     /* read in my part of the matrix numerical values  */
1718     nz   = procsnz[0];
1719     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1720 
1721     /* insert into matrix */
1722     jj      = rstart;
1723     smycols = mycols;
1724     svals   = vals;
1725     for (i=0; i<m; i++) {
1726       ierr     = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1727       smycols += ourlens[i];
1728       svals   += ourlens[i];
1729       jj++;
1730     }
1731 
1732     /* read in other processors and ship out */
1733     for (i=1; i<size; i++) {
1734       nz   = procsnz[i];
1735       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1736       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
1737     }
1738     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1739   } else {
1740     /* receive numeric values */
1741     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
1742 
1743     /* receive message of values*/
1744     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
1745     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1746     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1747 
1748     /* insert into matrix */
1749     jj      = rstart;
1750     smycols = mycols;
1751     svals   = vals;
1752     for (i=0; i<m; i++) {
1753       ierr     = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1754       smycols += ourlens[i];
1755       svals   += ourlens[i];
1756       jj++;
1757     }
1758   }
1759   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1760   ierr = PetscFree(vals);CHKERRQ(ierr);
1761   ierr = PetscFree(mycols);CHKERRQ(ierr);
1762   ierr = PetscFree(rowners);CHKERRQ(ierr);
1763 
1764   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1765   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1766   PetscFunctionReturn(0);
1767 }
1768 
1769 #undef __FUNCT__
1770 #define __FUNCT__ "MatEqual_MPIDense"
1771 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool  *flag)
1772 {
1773   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
1774   Mat            a,b;
1775   PetscBool      flg;
1776   PetscErrorCode ierr;
1777 
1778   PetscFunctionBegin;
1779   a    = matA->A;
1780   b    = matB->A;
1781   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
1782   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1783   PetscFunctionReturn(0);
1784 }
1785 
1786 #undef __FUNCT__
1787 #define __FUNCT__ "MatDestroy_MatTransMatMult_MPIDense_MPIDense"
1788 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(Mat A)
1789 {
1790   PetscErrorCode        ierr;
1791   Mat_MPIDense          *a = (Mat_MPIDense*)A->data;
1792   Mat_TransMatMultDense *atb = a->atbdense;
1793 
1794   PetscFunctionBegin;
1795   ierr = PetscFree3(atb->sendbuf,atb->atbarray,atb->recvcounts);CHKERRQ(ierr);
1796   ierr = (atb->destroy)(A);CHKERRQ(ierr);
1797   ierr = PetscFree(atb);CHKERRQ(ierr);
1798   PetscFunctionReturn(0);
1799 }
1800 
1801 #undef __FUNCT__
1802 #define __FUNCT__ "MatTransposeMatMultNumeric_MPIDense_MPIDense"
1803 PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1804 {
1805   Mat_MPIDense   *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
1806   Mat_SeqDense   *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data;
1807   Mat_TransMatMultDense *atb = c->atbdense;
1808   PetscErrorCode ierr;
1809   MPI_Comm       comm;
1810   PetscMPIInt    rank,size,*recvcounts=atb->recvcounts;
1811   PetscScalar    *carray,*atbarray=atb->atbarray,*sendbuf=atb->sendbuf;
1812   PetscInt       i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j;
1813   PetscScalar    _DOne=1.0,_DZero=0.0;
1814   PetscBLASInt   am,an,bn,aN;
1815   const PetscInt *ranges;
1816 
1817   PetscFunctionBegin;
1818   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1819   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1820   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1821 
1822   /* compute atbarray = aseq^T * bseq */
1823   ierr = PetscBLASIntCast(a->A->cmap->n,&an);CHKERRQ(ierr);
1824   ierr = PetscBLASIntCast(b->A->cmap->n,&bn);CHKERRQ(ierr);
1825   ierr = PetscBLASIntCast(a->A->rmap->n,&am);CHKERRQ(ierr);
1826   ierr = PetscBLASIntCast(A->cmap->N,&aN);CHKERRQ(ierr);
1827   PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&an,&bn,&am,&_DOne,aseq->v,&aseq->lda,bseq->v,&bseq->lda,&_DZero,atbarray,&aN));
1828 
1829   ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr);
1830   for (i=0; i<size; i++) recvcounts[i] = (ranges[i+1] - ranges[i])*cN;
1831 
1832   /* arrange atbarray into sendbuf */
1833   k = 0;
1834   for (proc=0; proc<size; proc++) {
1835     for (j=0; j<cN; j++) {
1836       for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM];
1837     }
1838   }
1839   /* sum all atbarray to local values of C */
1840   ierr = MatDenseGetArray(c->A,&carray);CHKERRQ(ierr);
1841   ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRQ(ierr);
1842   ierr = MatDenseRestoreArray(c->A,&carray);CHKERRQ(ierr);
1843   PetscFunctionReturn(0);
1844 }
1845 
1846 #undef __FUNCT__
1847 #define __FUNCT__ "MatTransposeMatMultSymbolic_MPIDense_MPIDense"
1848 PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1849 {
1850   PetscErrorCode        ierr;
1851   Mat                   Cdense;
1852   MPI_Comm              comm;
1853   PetscMPIInt           size;
1854   PetscInt              cm=A->cmap->n,cM,cN=B->cmap->N;
1855   Mat_MPIDense          *c;
1856   Mat_TransMatMultDense *atb;
1857 
1858   PetscFunctionBegin;
1859   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1860   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
1861     SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
1862   }
1863 
1864   /* create matrix product Cdense */
1865   ierr = MatCreate(comm,&Cdense);CHKERRQ(ierr);
1866   ierr = MatSetSizes(Cdense,cm,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
1867   ierr = MatSetType(Cdense,MATMPIDENSE);CHKERRQ(ierr);
1868   ierr = MatMPIDenseSetPreallocation(Cdense,NULL);CHKERRQ(ierr);
1869   ierr = MatAssemblyBegin(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1870   ierr = MatAssemblyEnd(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1871   *C   = Cdense;
1872 
1873   /* create data structure for reuse Cdense */
1874   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1875   ierr = PetscNew(&atb);CHKERRQ(ierr);
1876   cM = Cdense->rmap->N;
1877   ierr = PetscMalloc3(cM*cN,&atb->sendbuf,cM*cN,&atb->atbarray,size,&atb->recvcounts);CHKERRQ(ierr);
1878 
1879   c                    = (Mat_MPIDense*)Cdense->data;
1880   c->atbdense          = atb;
1881   atb->destroy         = Cdense->ops->destroy;
1882   Cdense->ops->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
1883   PetscFunctionReturn(0);
1884 }
1885 
1886 #undef __FUNCT__
1887 #define __FUNCT__ "MatTransposeMatMult_MPIDense_MPIDense"
1888 PetscErrorCode MatTransposeMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
1889 {
1890   PetscErrorCode ierr;
1891 
1892   PetscFunctionBegin;
1893   if (scall == MAT_INITIAL_MATRIX) {
1894     ierr = MatTransposeMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr);
1895   }
1896   ierr = MatTransposeMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr);
1897   PetscFunctionReturn(0);
1898 }
1899 
1900 #undef __FUNCT__
1901 #define __FUNCT__ "MatDestroy_MatMatMult_MPIDense_MPIDense"
1902 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(Mat A)
1903 {
1904   PetscErrorCode   ierr;
1905   Mat_MPIDense     *a = (Mat_MPIDense*)A->data;
1906   Mat_MatMultDense *ab = a->abdense;
1907 
1908   PetscFunctionBegin;
1909   ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr);
1910   ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr);
1911   ierr = MatDestroy(&ab->Be);CHKERRQ(ierr);
1912 
1913   ierr = (ab->destroy)(A);CHKERRQ(ierr);
1914   ierr = PetscFree(ab);CHKERRQ(ierr);
1915   PetscFunctionReturn(0);
1916 }
1917 
1918 #if defined(PETSC_HAVE_ELEMENTAL)
1919 #undef __FUNCT__
1920 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIDense"
1921 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1922 {
1923   PetscErrorCode   ierr;
1924   Mat_MPIDense     *c=(Mat_MPIDense*)C->data;
1925   Mat_MatMultDense *ab=c->abdense;
1926 
1927   PetscFunctionBegin;
1928   ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr);
1929   ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr);
1930   ierr = MatMatMultNumeric(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr);
1931   ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
1932   PetscFunctionReturn(0);
1933 }
1934 
1935 #undef __FUNCT__
1936 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIDense"
1937 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1938 {
1939   PetscErrorCode   ierr;
1940   Mat              Ae,Be,Ce;
1941   Mat_MPIDense     *c;
1942   Mat_MatMultDense *ab;
1943 
1944   PetscFunctionBegin;
1945   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
1946     SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
1947   }
1948 
1949   /* convert A and B to Elemental matrices Ae and Be */
1950   ierr = MatConvert(A,MATELEMENTAL,MAT_INITIAL_MATRIX, &Ae);CHKERRQ(ierr);
1951   ierr = MatConvert(B,MATELEMENTAL,MAT_INITIAL_MATRIX, &Be);CHKERRQ(ierr);
1952 
1953   /* Ce = Ae*Be */
1954   ierr = MatMatMultSymbolic(Ae,Be,fill,&Ce);CHKERRQ(ierr);
1955   ierr = MatMatMultNumeric(Ae,Be,Ce);CHKERRQ(ierr);
1956 
1957   /* convert Ce to C */
1958   ierr = MatConvert(Ce,MATMPIDENSE,MAT_INITIAL_MATRIX,C);CHKERRQ(ierr);
1959 
1960   /* create data structure for reuse Cdense */
1961   ierr = PetscNew(&ab);CHKERRQ(ierr);
1962   c                  = (Mat_MPIDense*)(*C)->data;
1963   c->abdense         = ab;
1964 
1965   ab->Ae             = Ae;
1966   ab->Be             = Be;
1967   ab->Ce             = Ce;
1968   ab->destroy        = (*C)->ops->destroy;
1969   (*C)->ops->destroy        = MatDestroy_MatMatMult_MPIDense_MPIDense;
1970   (*C)->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
1971   PetscFunctionReturn(0);
1972 }
1973 
1974 #undef __FUNCT__
1975 #define __FUNCT__ "MatMatMult_MPIDense_MPIDense"
1976 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
1977 {
1978   PetscErrorCode ierr;
1979 
1980   PetscFunctionBegin;
1981   if (scall == MAT_INITIAL_MATRIX) { /* simbolic product includes numeric product */
1982     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
1983     ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr);
1984     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
1985   } else {
1986     ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
1987     ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr);
1988     ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
1989   }
1990   PetscFunctionReturn(0);
1991 }
1992 #endif
1993