xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 609bdbee21ea3be08735c64dbe00a9ab27759925)
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4 */
5 
6 
7 #include <../src/mat/impls/dense/mpi/mpidense.h>    /*I   "petscmat.h"  I*/
8 #include <../src/mat/impls/aij/mpi/mpiaij.h>
9 #include <petscblaslapack.h>
10 
11 /*@
12 
13       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
14               matrix that represents the operator. For sequential matrices it returns itself.
15 
16     Input Parameter:
17 .      A - the Seq or MPI dense matrix
18 
19     Output Parameter:
20 .      B - the inner matrix
21 
22     Level: intermediate
23 
24 @*/
25 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
26 {
27   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
28   PetscErrorCode ierr;
29   PetscBool      flg;
30 
31   PetscFunctionBegin;
32   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
33   if (flg) *B = mat->A;
34   else *B = A;
35   PetscFunctionReturn(0);
36 }
37 
38 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
39 {
40   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
41   PetscErrorCode ierr;
42   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
43 
44   PetscFunctionBegin;
45   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
46   lrow = row - rstart;
47   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
48   PetscFunctionReturn(0);
49 }
50 
51 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
52 {
53   PetscErrorCode ierr;
54 
55   PetscFunctionBegin;
56   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
57   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
58   PetscFunctionReturn(0);
59 }
60 
61 PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
62 {
63   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
64   PetscErrorCode ierr;
65   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
66   PetscScalar    *array;
67   MPI_Comm       comm;
68   Mat            B;
69 
70   PetscFunctionBegin;
71   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
72 
73   ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr);
74   if (!B) {
75     ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
76     ierr = MatCreate(comm,&B);CHKERRQ(ierr);
77     ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr);
78     ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
79     ierr = MatDenseGetArray(mdn->A,&array);CHKERRQ(ierr);
80     ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr);
81     ierr = MatDenseRestoreArray(mdn->A,&array);CHKERRQ(ierr);
82     ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
83     ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
84     ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr);
85     *a   = B;
86     ierr = MatDestroy(&B);CHKERRQ(ierr);
87   } else *a = B;
88   PetscFunctionReturn(0);
89 }
90 
91 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
92 {
93   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
94   PetscErrorCode ierr;
95   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
96   PetscBool      roworiented = A->roworiented;
97 
98   PetscFunctionBegin;
99   for (i=0; i<m; i++) {
100     if (idxm[i] < 0) continue;
101     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
102     if (idxm[i] >= rstart && idxm[i] < rend) {
103       row = idxm[i] - rstart;
104       if (roworiented) {
105         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
106       } else {
107         for (j=0; j<n; j++) {
108           if (idxn[j] < 0) continue;
109           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
110           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
111         }
112       }
113     } else if (!A->donotstash) {
114       mat->assembled = PETSC_FALSE;
115       if (roworiented) {
116         ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
117       } else {
118         ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
119       }
120     }
121   }
122   PetscFunctionReturn(0);
123 }
124 
125 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
126 {
127   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
128   PetscErrorCode ierr;
129   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
130 
131   PetscFunctionBegin;
132   for (i=0; i<m; i++) {
133     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
134     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
135     if (idxm[i] >= rstart && idxm[i] < rend) {
136       row = idxm[i] - rstart;
137       for (j=0; j<n; j++) {
138         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
139         if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
140         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
141       }
142     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
143   }
144   PetscFunctionReturn(0);
145 }
146 
147 PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[])
148 {
149   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
150   PetscErrorCode ierr;
151 
152   PetscFunctionBegin;
153   ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr);
154   PetscFunctionReturn(0);
155 }
156 
157 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
158 {
159   Mat_MPIDense   *mat  = (Mat_MPIDense*)A->data,*newmatd;
160   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
161   PetscErrorCode ierr;
162   PetscInt       i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
163   const PetscInt *irow,*icol;
164   PetscScalar    *av,*bv,*v = lmat->v;
165   Mat            newmat;
166   IS             iscol_local;
167 
168   PetscFunctionBegin;
169   ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
170   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
171   ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr);
172   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
173   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
174   ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */
175 
176   /* No parallel redistribution currently supported! Should really check each index set
177      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
178      original matrix! */
179 
180   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
181   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
182 
183   /* Check submatrix call */
184   if (scall == MAT_REUSE_MATRIX) {
185     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
186     /* Really need to test rows and column sizes! */
187     newmat = *B;
188   } else {
189     /* Create and fill new matrix */
190     ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr);
191     ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
192     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
193     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
194   }
195 
196   /* Now extract the data pointers and do the copy, column at a time */
197   newmatd = (Mat_MPIDense*)newmat->data;
198   bv      = ((Mat_SeqDense*)newmatd->A->data)->v;
199 
200   for (i=0; i<Ncols; i++) {
201     av = v + ((Mat_SeqDense*)mat->A->data)->lda*icol[i];
202     for (j=0; j<nrows; j++) {
203       *bv++ = av[irow[j] - rstart];
204     }
205   }
206 
207   /* Assemble the matrices so that the correct flags are set */
208   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
209   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
210 
211   /* Free work space */
212   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
213   ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr);
214   ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
215   *B   = newmat;
216   PetscFunctionReturn(0);
217 }
218 
219 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[])
220 {
221   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
222   PetscErrorCode ierr;
223 
224   PetscFunctionBegin;
225   ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr);
226   PetscFunctionReturn(0);
227 }
228 
229 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
230 {
231   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
232   MPI_Comm       comm;
233   PetscErrorCode ierr;
234   PetscInt       nstash,reallocs;
235   InsertMode     addv;
236 
237   PetscFunctionBegin;
238   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
239   /* make sure all processors are either in INSERTMODE or ADDMODE */
240   ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,comm);CHKERRQ(ierr);
241   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
242   mat->insertmode = addv; /* in case this processor had no cache */
243 
244   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
245   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
246   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
247   PetscFunctionReturn(0);
248 }
249 
250 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
251 {
252   Mat_MPIDense   *mdn=(Mat_MPIDense*)mat->data;
253   PetscErrorCode ierr;
254   PetscInt       i,*row,*col,flg,j,rstart,ncols;
255   PetscMPIInt    n;
256   PetscScalar    *val;
257 
258   PetscFunctionBegin;
259   /*  wait on receives */
260   while (1) {
261     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
262     if (!flg) break;
263 
264     for (i=0; i<n;) {
265       /* Now identify the consecutive vals belonging to the same row */
266       for (j=i,rstart=row[j]; j<n; j++) {
267         if (row[j] != rstart) break;
268       }
269       if (j < n) ncols = j-i;
270       else       ncols = n-i;
271       /* Now assemble all these values with a single function call */
272       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
273       i    = j;
274     }
275   }
276   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
277 
278   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
279   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
280 
281   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
282     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
283   }
284   PetscFunctionReturn(0);
285 }
286 
287 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
288 {
289   PetscErrorCode ierr;
290   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
291 
292   PetscFunctionBegin;
293   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
294   PetscFunctionReturn(0);
295 }
296 
297 /* the code does not do the diagonal entries correctly unless the
298    matrix is square and the column and row owerships are identical.
299    This is a BUG. The only way to fix it seems to be to access
300    mdn->A and mdn->B directly and not through the MatZeroRows()
301    routine.
302 */
303 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
304 {
305   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
306   PetscErrorCode    ierr;
307   PetscInt          i,*owners = A->rmap->range;
308   PetscInt          *sizes,j,idx,nsends;
309   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
310   PetscInt          *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source;
311   PetscInt          *lens,*lrows,*values;
312   PetscMPIInt       n,imdex,rank = l->rank,size = l->size;
313   MPI_Comm          comm;
314   MPI_Request       *send_waits,*recv_waits;
315   MPI_Status        recv_status,*send_status;
316   PetscBool         found;
317   const PetscScalar *xx;
318   PetscScalar       *bb;
319 
320   PetscFunctionBegin;
321   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
322   if (A->rmap->N != A->cmap->N) SETERRQ(comm,PETSC_ERR_SUP,"Only handles square matrices");
323   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership");
324   /*  first count number of contributors to each processor */
325   ierr = PetscCalloc1(2*size,&sizes);CHKERRQ(ierr);
326   ierr = PetscMalloc1(N+1,&owner);CHKERRQ(ierr);  /* see note*/
327   for (i=0; i<N; i++) {
328     idx   = rows[i];
329     found = PETSC_FALSE;
330     for (j=0; j<size; j++) {
331       if (idx >= owners[j] && idx < owners[j+1]) {
332         sizes[2*j]++; sizes[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
333       }
334     }
335     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
336   }
337   nsends = 0;
338   for (i=0; i<size; i++) nsends += sizes[2*i+1];
339 
340   /* inform other processors of number of messages and max length*/
341   ierr = PetscMaxSum(comm,sizes,&nmax,&nrecvs);CHKERRQ(ierr);
342 
343   /* post receives:   */
344   ierr = PetscMalloc1((nrecvs+1)*(nmax+1),&rvalues);CHKERRQ(ierr);
345   ierr = PetscMalloc1(nrecvs+1,&recv_waits);CHKERRQ(ierr);
346   for (i=0; i<nrecvs; i++) {
347     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
348   }
349 
350   /* do sends:
351       1) starts[i] gives the starting index in svalues for stuff going to
352          the ith processor
353   */
354   ierr = PetscMalloc1(N+1,&svalues);CHKERRQ(ierr);
355   ierr = PetscMalloc1(nsends+1,&send_waits);CHKERRQ(ierr);
356   ierr = PetscMalloc1(size+1,&starts);CHKERRQ(ierr);
357 
358   starts[0] = 0;
359   for (i=1; i<size; i++) starts[i] = starts[i-1] + sizes[2*i-2];
360   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
361 
362   starts[0] = 0;
363   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + sizes[2*i-2];
364   count = 0;
365   for (i=0; i<size; i++) {
366     if (sizes[2*i+1]) {
367       ierr = MPI_Isend(svalues+starts[i],sizes[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
368     }
369   }
370   ierr = PetscFree(starts);CHKERRQ(ierr);
371 
372   base = owners[rank];
373 
374   /*  wait on receives */
375   ierr  = PetscMalloc2(nrecvs,&lens,nrecvs,&source);CHKERRQ(ierr);
376   count = nrecvs;
377   slen  = 0;
378   while (count) {
379     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
380     /* unpack receives into our local space */
381     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
382 
383     source[imdex] = recv_status.MPI_SOURCE;
384     lens[imdex]   = n;
385     slen += n;
386     count--;
387   }
388   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
389 
390   /* move the data into the send scatter */
391   ierr  = PetscMalloc1(slen+1,&lrows);CHKERRQ(ierr);
392   count = 0;
393   for (i=0; i<nrecvs; i++) {
394     values = rvalues + i*nmax;
395     for (j=0; j<lens[i]; j++) {
396       lrows[count++] = values[j] - base;
397     }
398   }
399   ierr = PetscFree(rvalues);CHKERRQ(ierr);
400   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
401   ierr = PetscFree(owner);CHKERRQ(ierr);
402   ierr = PetscFree(sizes);CHKERRQ(ierr);
403 
404   /* fix right hand side if needed */
405   if (x && b) {
406     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
407     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
408     for (i=0; i<slen; i++) {
409       bb[lrows[i]] = diag*xx[lrows[i]];
410     }
411     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
412     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
413   }
414 
415   /* actually zap the local rows */
416   ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
417   if (diag != 0.0) {
418     Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data;
419     PetscInt     m   = ll->lda, i;
420 
421     for (i=0; i<slen; i++) {
422       ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag;
423     }
424   }
425   ierr = PetscFree(lrows);CHKERRQ(ierr);
426 
427   /* wait on sends */
428   if (nsends) {
429     ierr = PetscMalloc1(nsends,&send_status);CHKERRQ(ierr);
430     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
431     ierr = PetscFree(send_status);CHKERRQ(ierr);
432   }
433   ierr = PetscFree(send_waits);CHKERRQ(ierr);
434   ierr = PetscFree(svalues);CHKERRQ(ierr);
435   PetscFunctionReturn(0);
436 }
437 
438 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec);
439 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec);
440 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec);
441 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec);
442 
443 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
444 {
445   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
446   PetscErrorCode ierr;
447 
448   PetscFunctionBegin;
449   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
450   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
451   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
452   PetscFunctionReturn(0);
453 }
454 
455 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
456 {
457   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
458   PetscErrorCode ierr;
459 
460   PetscFunctionBegin;
461   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
462   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
463   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
464   PetscFunctionReturn(0);
465 }
466 
467 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
468 {
469   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
470   PetscErrorCode ierr;
471   PetscScalar    zero = 0.0;
472 
473   PetscFunctionBegin;
474   ierr = VecSet(yy,zero);CHKERRQ(ierr);
475   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
476   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
477   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
478   PetscFunctionReturn(0);
479 }
480 
481 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
482 {
483   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
484   PetscErrorCode ierr;
485 
486   PetscFunctionBegin;
487   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
488   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
489   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
490   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
491   PetscFunctionReturn(0);
492 }
493 
494 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
495 {
496   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
497   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
498   PetscErrorCode ierr;
499   PetscInt       len,i,n,m = A->rmap->n,radd;
500   PetscScalar    *x,zero = 0.0;
501 
502   PetscFunctionBegin;
503   ierr = VecSet(v,zero);CHKERRQ(ierr);
504   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
505   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
506   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
507   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
508   radd = A->rmap->rstart*m;
509   for (i=0; i<len; i++) {
510     x[i] = aloc->v[radd + i*m + i];
511   }
512   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
513   PetscFunctionReturn(0);
514 }
515 
516 PetscErrorCode MatDestroy_MPIDense(Mat mat)
517 {
518   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
519   PetscErrorCode ierr;
520 
521   PetscFunctionBegin;
522 #if defined(PETSC_USE_LOG)
523   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
524 #endif
525   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
526   ierr = MatDestroy(&mdn->A);CHKERRQ(ierr);
527   ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr);
528   ierr = VecScatterDestroy(&mdn->Mvctx);CHKERRQ(ierr);
529 
530   ierr = PetscFree(mat->data);CHKERRQ(ierr);
531   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
532 
533   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr);
534   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr);
535 #if defined(PETSC_HAVE_ELEMENTAL)
536   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr);
537 #endif
538   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr);
539   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
540   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
541   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
542   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
543   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
544   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
545   PetscFunctionReturn(0);
546 }
547 
548 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
549 {
550   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
551   PetscErrorCode    ierr;
552   PetscViewerFormat format;
553   int               fd;
554   PetscInt          header[4],mmax,N = mat->cmap->N,i,j,m,k;
555   PetscMPIInt       rank,tag  = ((PetscObject)viewer)->tag,size;
556   PetscScalar       *work,*v,*vv;
557   Mat_SeqDense      *a = (Mat_SeqDense*)mdn->A->data;
558 
559   PetscFunctionBegin;
560   if (mdn->size == 1) {
561     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
562   } else {
563     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
564     ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
565     ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
566 
567     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
568     if (format == PETSC_VIEWER_NATIVE) {
569 
570       if (!rank) {
571         /* store the matrix as a dense matrix */
572         header[0] = MAT_FILE_CLASSID;
573         header[1] = mat->rmap->N;
574         header[2] = N;
575         header[3] = MATRIX_BINARY_FORMAT_DENSE;
576         ierr      = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
577 
578         /* get largest work array needed for transposing array */
579         mmax = mat->rmap->n;
580         for (i=1; i<size; i++) {
581           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
582         }
583         ierr = PetscMalloc1(mmax*N,&work);CHKERRQ(ierr);
584 
585         /* write out local array, by rows */
586         m = mat->rmap->n;
587         v = a->v;
588         for (j=0; j<N; j++) {
589           for (i=0; i<m; i++) {
590             work[j + i*N] = *v++;
591           }
592         }
593         ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
594         /* get largest work array to receive messages from other processes, excludes process zero */
595         mmax = 0;
596         for (i=1; i<size; i++) {
597           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
598         }
599         ierr = PetscMalloc1(mmax*N,&vv);CHKERRQ(ierr);
600         for (k = 1; k < size; k++) {
601           v    = vv;
602           m    = mat->rmap->range[k+1] - mat->rmap->range[k];
603           ierr = MPIULong_Recv(v,m*N,MPIU_SCALAR,k,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
604 
605           for (j = 0; j < N; j++) {
606             for (i = 0; i < m; i++) {
607               work[j + i*N] = *v++;
608             }
609           }
610           ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
611         }
612         ierr = PetscFree(work);CHKERRQ(ierr);
613         ierr = PetscFree(vv);CHKERRQ(ierr);
614       } else {
615         ierr = MPIULong_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
616       }
617     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerPushFormat(viewer,PETSC_VIEWER_NATIVE)");
618   }
619   PetscFunctionReturn(0);
620 }
621 
622 extern PetscErrorCode MatView_SeqDense(Mat,PetscViewer);
623 #include <petscdraw.h>
624 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
625 {
626   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
627   PetscErrorCode    ierr;
628   PetscMPIInt       rank = mdn->rank;
629   PetscViewerType   vtype;
630   PetscBool         iascii,isdraw;
631   PetscViewer       sviewer;
632   PetscViewerFormat format;
633 
634   PetscFunctionBegin;
635   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
636   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
637   if (iascii) {
638     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
639     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
640     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
641       MatInfo info;
642       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
643       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
644       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
645       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
646       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
647       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
648       PetscFunctionReturn(0);
649     } else if (format == PETSC_VIEWER_ASCII_INFO) {
650       PetscFunctionReturn(0);
651     }
652   } else if (isdraw) {
653     PetscDraw draw;
654     PetscBool isnull;
655 
656     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
657     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
658     if (isnull) PetscFunctionReturn(0);
659   }
660 
661   {
662     /* assemble the entire matrix onto first processor. */
663     Mat         A;
664     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
665     PetscInt    *cols;
666     PetscScalar *vals;
667 
668     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
669     if (!rank) {
670       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
671     } else {
672       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
673     }
674     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
675     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
676     ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr);
677     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
678 
679     /* Copy the matrix ... This isn't the most efficient means,
680        but it's quick for now */
681     A->insertmode = INSERT_VALUES;
682 
683     row = mat->rmap->rstart;
684     m   = mdn->A->rmap->n;
685     for (i=0; i<m; i++) {
686       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
687       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
688       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
689       row++;
690     }
691 
692     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
693     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
694     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
695     if (!rank) {
696       ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
697       ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
698     }
699     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
700     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
701     ierr = MatDestroy(&A);CHKERRQ(ierr);
702   }
703   PetscFunctionReturn(0);
704 }
705 
706 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
707 {
708   PetscErrorCode ierr;
709   PetscBool      iascii,isbinary,isdraw,issocket;
710 
711   PetscFunctionBegin;
712   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
713   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
714   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
715   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
716 
717   if (iascii || issocket || isdraw) {
718     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
719   } else if (isbinary) {
720     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
721   }
722   PetscFunctionReturn(0);
723 }
724 
725 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
726 {
727   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
728   Mat            mdn  = mat->A;
729   PetscErrorCode ierr;
730   PetscReal      isend[5],irecv[5];
731 
732   PetscFunctionBegin;
733   info->block_size = 1.0;
734 
735   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
736 
737   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
738   isend[3] = info->memory;  isend[4] = info->mallocs;
739   if (flag == MAT_LOCAL) {
740     info->nz_used      = isend[0];
741     info->nz_allocated = isend[1];
742     info->nz_unneeded  = isend[2];
743     info->memory       = isend[3];
744     info->mallocs      = isend[4];
745   } else if (flag == MAT_GLOBAL_MAX) {
746     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
747 
748     info->nz_used      = irecv[0];
749     info->nz_allocated = irecv[1];
750     info->nz_unneeded  = irecv[2];
751     info->memory       = irecv[3];
752     info->mallocs      = irecv[4];
753   } else if (flag == MAT_GLOBAL_SUM) {
754     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
755 
756     info->nz_used      = irecv[0];
757     info->nz_allocated = irecv[1];
758     info->nz_unneeded  = irecv[2];
759     info->memory       = irecv[3];
760     info->mallocs      = irecv[4];
761   }
762   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
763   info->fill_ratio_needed = 0;
764   info->factor_mallocs    = 0;
765   PetscFunctionReturn(0);
766 }
767 
768 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
769 {
770   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
771   PetscErrorCode ierr;
772 
773   PetscFunctionBegin;
774   switch (op) {
775   case MAT_NEW_NONZERO_LOCATIONS:
776   case MAT_NEW_NONZERO_LOCATION_ERR:
777   case MAT_NEW_NONZERO_ALLOCATION_ERR:
778     MatCheckPreallocated(A,1);
779     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
780     break;
781   case MAT_ROW_ORIENTED:
782     MatCheckPreallocated(A,1);
783     a->roworiented = flg;
784     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
785     break;
786   case MAT_NEW_DIAGONALS:
787   case MAT_KEEP_NONZERO_PATTERN:
788   case MAT_USE_HASH_TABLE:
789     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
790     break;
791   case MAT_IGNORE_OFF_PROC_ENTRIES:
792     a->donotstash = flg;
793     break;
794   case MAT_SYMMETRIC:
795   case MAT_STRUCTURALLY_SYMMETRIC:
796   case MAT_HERMITIAN:
797   case MAT_SYMMETRY_ETERNAL:
798   case MAT_IGNORE_LOWER_TRIANGULAR:
799     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
800     break;
801   default:
802     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
803   }
804   PetscFunctionReturn(0);
805 }
806 
807 
808 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
809 {
810   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
811   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
812   PetscScalar    *l,*r,x,*v;
813   PetscErrorCode ierr;
814   PetscInt       i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n;
815 
816   PetscFunctionBegin;
817   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
818   if (ll) {
819     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
820     if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
821     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
822     for (i=0; i<m; i++) {
823       x = l[i];
824       v = mat->v + i;
825       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
826     }
827     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
828     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
829   }
830   if (rr) {
831     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
832     if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
833     ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
834     ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
835     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
836     for (i=0; i<n; i++) {
837       x = r[i];
838       v = mat->v + i*m;
839       for (j=0; j<m; j++) (*v++) *= x;
840     }
841     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
842     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
843   }
844   PetscFunctionReturn(0);
845 }
846 
847 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
848 {
849   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
850   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
851   PetscErrorCode ierr;
852   PetscInt       i,j;
853   PetscReal      sum = 0.0;
854   PetscScalar    *v  = mat->v;
855 
856   PetscFunctionBegin;
857   if (mdn->size == 1) {
858     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
859   } else {
860     if (type == NORM_FROBENIUS) {
861       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
862         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
863       }
864       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
865       *nrm = PetscSqrtReal(*nrm);
866       ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr);
867     } else if (type == NORM_1) {
868       PetscReal *tmp,*tmp2;
869       ierr = PetscMalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr);
870       ierr = PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
871       ierr = PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
872       *nrm = 0.0;
873       v    = mat->v;
874       for (j=0; j<mdn->A->cmap->n; j++) {
875         for (i=0; i<mdn->A->rmap->n; i++) {
876           tmp[j] += PetscAbsScalar(*v);  v++;
877         }
878       }
879       ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
880       for (j=0; j<A->cmap->N; j++) {
881         if (tmp2[j] > *nrm) *nrm = tmp2[j];
882       }
883       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
884       ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr);
885     } else if (type == NORM_INFINITY) { /* max row norm */
886       PetscReal ntemp;
887       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
888       ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
889     } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
890   }
891   PetscFunctionReturn(0);
892 }
893 
894 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
895 {
896   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
897   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
898   Mat            B;
899   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
900   PetscErrorCode ierr;
901   PetscInt       j,i;
902   PetscScalar    *v;
903 
904   PetscFunctionBegin;
905   if (reuse == MAT_INPLACE_MATRIX  && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports square matrix only in-place");
906   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
907     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
908     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
909     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
910     ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr);
911   } else {
912     B = *matout;
913   }
914 
915   m    = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v;
916   ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr);
917   for (i=0; i<m; i++) rwork[i] = rstart + i;
918   for (j=0; j<n; j++) {
919     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
920     v   += m;
921   }
922   ierr = PetscFree(rwork);CHKERRQ(ierr);
923   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
924   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
925   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
926     *matout = B;
927   } else {
928     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
929   }
930   PetscFunctionReturn(0);
931 }
932 
933 
934 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
935 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
936 
937 PetscErrorCode MatSetUp_MPIDense(Mat A)
938 {
939   PetscErrorCode ierr;
940 
941   PetscFunctionBegin;
942   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
943   PetscFunctionReturn(0);
944 }
945 
946 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
947 {
948   PetscErrorCode ierr;
949   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
950 
951   PetscFunctionBegin;
952   ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr);
953   ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
954   PetscFunctionReturn(0);
955 }
956 
957 PetscErrorCode  MatConjugate_MPIDense(Mat mat)
958 {
959   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
960   PetscErrorCode ierr;
961 
962   PetscFunctionBegin;
963   ierr = MatConjugate(a->A);CHKERRQ(ierr);
964   PetscFunctionReturn(0);
965 }
966 
967 PetscErrorCode MatRealPart_MPIDense(Mat A)
968 {
969   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
970   PetscErrorCode ierr;
971 
972   PetscFunctionBegin;
973   ierr = MatRealPart(a->A);CHKERRQ(ierr);
974   PetscFunctionReturn(0);
975 }
976 
977 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
978 {
979   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
980   PetscErrorCode ierr;
981 
982   PetscFunctionBegin;
983   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
984   PetscFunctionReturn(0);
985 }
986 
987 extern PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*);
988 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms)
989 {
990   PetscErrorCode ierr;
991   PetscInt       i,n;
992   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
993   PetscReal      *work;
994 
995   PetscFunctionBegin;
996   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
997   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
998   ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr);
999   if (type == NORM_2) {
1000     for (i=0; i<n; i++) work[i] *= work[i];
1001   }
1002   if (type == NORM_INFINITY) {
1003     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
1004   } else {
1005     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
1006   }
1007   ierr = PetscFree(work);CHKERRQ(ierr);
1008   if (type == NORM_2) {
1009     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
1010   }
1011   PetscFunctionReturn(0);
1012 }
1013 
1014 static PetscErrorCode  MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1015 {
1016   Mat_MPIDense   *d = (Mat_MPIDense*)x->data;
1017   PetscErrorCode ierr;
1018   PetscScalar    *a;
1019   PetscInt       m,n,i;
1020 
1021   PetscFunctionBegin;
1022   ierr = MatGetSize(d->A,&m,&n);CHKERRQ(ierr);
1023   ierr = MatDenseGetArray(d->A,&a);CHKERRQ(ierr);
1024   for (i=0; i<m*n; i++) {
1025     ierr = PetscRandomGetValue(rctx,a+i);CHKERRQ(ierr);
1026   }
1027   ierr = MatDenseRestoreArray(d->A,&a);CHKERRQ(ierr);
1028   PetscFunctionReturn(0);
1029 }
1030 
1031 extern PetscErrorCode MatMatMultNumeric_MPIDense(Mat A,Mat,Mat);
1032 
1033 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool  *missing,PetscInt *d)
1034 {
1035   PetscFunctionBegin;
1036   *missing = PETSC_FALSE;
1037   PetscFunctionReturn(0);
1038 }
1039 
1040 /* -------------------------------------------------------------------*/
1041 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1042                                         MatGetRow_MPIDense,
1043                                         MatRestoreRow_MPIDense,
1044                                         MatMult_MPIDense,
1045                                 /*  4*/ MatMultAdd_MPIDense,
1046                                         MatMultTranspose_MPIDense,
1047                                         MatMultTransposeAdd_MPIDense,
1048                                         0,
1049                                         0,
1050                                         0,
1051                                 /* 10*/ 0,
1052                                         0,
1053                                         0,
1054                                         0,
1055                                         MatTranspose_MPIDense,
1056                                 /* 15*/ MatGetInfo_MPIDense,
1057                                         MatEqual_MPIDense,
1058                                         MatGetDiagonal_MPIDense,
1059                                         MatDiagonalScale_MPIDense,
1060                                         MatNorm_MPIDense,
1061                                 /* 20*/ MatAssemblyBegin_MPIDense,
1062                                         MatAssemblyEnd_MPIDense,
1063                                         MatSetOption_MPIDense,
1064                                         MatZeroEntries_MPIDense,
1065                                 /* 24*/ MatZeroRows_MPIDense,
1066                                         0,
1067                                         0,
1068                                         0,
1069                                         0,
1070                                 /* 29*/ MatSetUp_MPIDense,
1071                                         0,
1072                                         0,
1073                                         MatGetDiagonalBlock_MPIDense,
1074                                         0,
1075                                 /* 34*/ MatDuplicate_MPIDense,
1076                                         0,
1077                                         0,
1078                                         0,
1079                                         0,
1080                                 /* 39*/ MatAXPY_MPIDense,
1081                                         MatCreateSubMatrices_MPIDense,
1082                                         0,
1083                                         MatGetValues_MPIDense,
1084                                         0,
1085                                 /* 44*/ 0,
1086                                         MatScale_MPIDense,
1087                                         MatShift_Basic,
1088                                         0,
1089                                         0,
1090                                 /* 49*/ MatSetRandom_MPIDense,
1091                                         0,
1092                                         0,
1093                                         0,
1094                                         0,
1095                                 /* 54*/ 0,
1096                                         0,
1097                                         0,
1098                                         0,
1099                                         0,
1100                                 /* 59*/ MatCreateSubMatrix_MPIDense,
1101                                         MatDestroy_MPIDense,
1102                                         MatView_MPIDense,
1103                                         0,
1104                                         0,
1105                                 /* 64*/ 0,
1106                                         0,
1107                                         0,
1108                                         0,
1109                                         0,
1110                                 /* 69*/ 0,
1111                                         0,
1112                                         0,
1113                                         0,
1114                                         0,
1115                                 /* 74*/ 0,
1116                                         0,
1117                                         0,
1118                                         0,
1119                                         0,
1120                                 /* 79*/ 0,
1121                                         0,
1122                                         0,
1123                                         0,
1124                                 /* 83*/ MatLoad_MPIDense,
1125                                         0,
1126                                         0,
1127                                         0,
1128                                         0,
1129                                         0,
1130 #if defined(PETSC_HAVE_ELEMENTAL)
1131                                 /* 89*/ MatMatMult_MPIDense_MPIDense,
1132                                         MatMatMultSymbolic_MPIDense_MPIDense,
1133 #else
1134                                 /* 89*/ 0,
1135                                         0,
1136 #endif
1137                                         MatMatMultNumeric_MPIDense,
1138                                         0,
1139                                         0,
1140                                 /* 94*/ 0,
1141                                         0,
1142                                         0,
1143                                         0,
1144                                         0,
1145                                 /* 99*/ 0,
1146                                         0,
1147                                         0,
1148                                         MatConjugate_MPIDense,
1149                                         0,
1150                                 /*104*/ 0,
1151                                         MatRealPart_MPIDense,
1152                                         MatImaginaryPart_MPIDense,
1153                                         0,
1154                                         0,
1155                                 /*109*/ 0,
1156                                         0,
1157                                         0,
1158                                         0,
1159                                         MatMissingDiagonal_MPIDense,
1160                                 /*114*/ 0,
1161                                         0,
1162                                         0,
1163                                         0,
1164                                         0,
1165                                 /*119*/ 0,
1166                                         0,
1167                                         0,
1168                                         0,
1169                                         0,
1170                                 /*124*/ 0,
1171                                         MatGetColumnNorms_MPIDense,
1172                                         0,
1173                                         0,
1174                                         0,
1175                                 /*129*/ 0,
1176                                         MatTransposeMatMult_MPIDense_MPIDense,
1177                                         MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1178                                         MatTransposeMatMultNumeric_MPIDense_MPIDense,
1179                                         0,
1180                                 /*134*/ 0,
1181                                         0,
1182                                         0,
1183                                         0,
1184                                         0,
1185                                 /*139*/ 0,
1186                                         0,
1187                                         0
1188 };
1189 
1190 PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1191 {
1192   Mat_MPIDense   *a;
1193   PetscErrorCode ierr;
1194 
1195   PetscFunctionBegin;
1196   mat->preallocated = PETSC_TRUE;
1197   /* Note:  For now, when data is specified above, this assumes the user correctly
1198    allocates the local dense storage space.  We should add error checking. */
1199 
1200   a       = (Mat_MPIDense*)mat->data;
1201   ierr    = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
1202   ierr    = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
1203   a->nvec = mat->cmap->n;
1204 
1205   ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1206   ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr);
1207   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
1208   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1209   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1210   PetscFunctionReturn(0);
1211 }
1212 
1213 #if defined(PETSC_HAVE_ELEMENTAL)
1214 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1215 {
1216   Mat            mat_elemental;
1217   PetscErrorCode ierr;
1218   PetscScalar    *v;
1219   PetscInt       m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols;
1220 
1221   PetscFunctionBegin;
1222   if (reuse == MAT_REUSE_MATRIX) {
1223     mat_elemental = *newmat;
1224     ierr = MatZeroEntries(*newmat);CHKERRQ(ierr);
1225   } else {
1226     ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr);
1227     ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1228     ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr);
1229     ierr = MatSetUp(mat_elemental);CHKERRQ(ierr);
1230     ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
1231   }
1232 
1233   ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr);
1234   for (i=0; i<N; i++) cols[i] = i;
1235   for (i=0; i<m; i++) rows[i] = rstart + i;
1236 
1237   /* PETSc-Elemental interaface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1238   ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr);
1239   ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr);
1240   ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1241   ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1242   ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr);
1243   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1244 
1245   if (reuse == MAT_INPLACE_MATRIX) {
1246     ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr);
1247   } else {
1248     *newmat = mat_elemental;
1249   }
1250   PetscFunctionReturn(0);
1251 }
1252 #endif
1253 
1254 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1255 {
1256   Mat_MPIDense   *a;
1257   PetscErrorCode ierr;
1258 
1259   PetscFunctionBegin;
1260   ierr      = PetscNewLog(mat,&a);CHKERRQ(ierr);
1261   mat->data = (void*)a;
1262   ierr      = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1263 
1264   mat->insertmode = NOT_SET_VALUES;
1265   ierr            = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);CHKERRQ(ierr);
1266   ierr            = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);CHKERRQ(ierr);
1267 
1268   /* build cache for off array entries formed */
1269   a->donotstash = PETSC_FALSE;
1270 
1271   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr);
1272 
1273   /* stuff used for matrix vector multiply */
1274   a->lvec        = 0;
1275   a->Mvctx       = 0;
1276   a->roworiented = PETSC_TRUE;
1277 
1278   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr);
1279   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr);
1280 #if defined(PETSC_HAVE_ELEMENTAL)
1281   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr);
1282 #endif
1283   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1284   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1285   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1286   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1287 
1288   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",MatTransposeMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1289   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",MatTransposeMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1290   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",MatTransposeMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1291   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1292   PetscFunctionReturn(0);
1293 }
1294 
1295 /*MC
1296    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1297 
1298    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1299    and MATMPIDENSE otherwise.
1300 
1301    Options Database Keys:
1302 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1303 
1304   Level: beginner
1305 
1306 
1307 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1308 M*/
1309 
1310 /*@C
1311    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1312 
1313    Not collective
1314 
1315    Input Parameters:
1316 .  B - the matrix
1317 -  data - optional location of matrix data.  Set data=NULL for PETSc
1318    to control all matrix memory allocation.
1319 
1320    Notes:
1321    The dense format is fully compatible with standard Fortran 77
1322    storage by columns.
1323 
1324    The data input variable is intended primarily for Fortran programmers
1325    who wish to allocate their own matrix memory space.  Most users should
1326    set data=NULL.
1327 
1328    Level: intermediate
1329 
1330 .keywords: matrix,dense, parallel
1331 
1332 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1333 @*/
1334 PetscErrorCode  MatMPIDenseSetPreallocation(Mat B,PetscScalar *data)
1335 {
1336   PetscErrorCode ierr;
1337 
1338   PetscFunctionBegin;
1339   ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr);
1340   PetscFunctionReturn(0);
1341 }
1342 
1343 /*@C
1344    MatCreateDense - Creates a parallel matrix in dense format.
1345 
1346    Collective on MPI_Comm
1347 
1348    Input Parameters:
1349 +  comm - MPI communicator
1350 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1351 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1352 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1353 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1354 -  data - optional location of matrix data.  Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1355    to control all matrix memory allocation.
1356 
1357    Output Parameter:
1358 .  A - the matrix
1359 
1360    Notes:
1361    The dense format is fully compatible with standard Fortran 77
1362    storage by columns.
1363 
1364    The data input variable is intended primarily for Fortran programmers
1365    who wish to allocate their own matrix memory space.  Most users should
1366    set data=NULL (PETSC_NULL_SCALAR for Fortran users).
1367 
1368    The user MUST specify either the local or global matrix dimensions
1369    (possibly both).
1370 
1371    Level: intermediate
1372 
1373 .keywords: matrix,dense, parallel
1374 
1375 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1376 @*/
1377 PetscErrorCode  MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1378 {
1379   PetscErrorCode ierr;
1380   PetscMPIInt    size;
1381 
1382   PetscFunctionBegin;
1383   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1384   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1385   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1386   if (size > 1) {
1387     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1388     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1389     if (data) {  /* user provided data array, so no need to assemble */
1390       ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr);
1391       (*A)->assembled = PETSC_TRUE;
1392     }
1393   } else {
1394     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1395     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1396   }
1397   PetscFunctionReturn(0);
1398 }
1399 
1400 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1401 {
1402   Mat            mat;
1403   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
1404   PetscErrorCode ierr;
1405 
1406   PetscFunctionBegin;
1407   *newmat = 0;
1408   ierr    = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr);
1409   ierr    = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1410   ierr    = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1411   a       = (Mat_MPIDense*)mat->data;
1412   ierr    = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1413 
1414   mat->factortype   = A->factortype;
1415   mat->assembled    = PETSC_TRUE;
1416   mat->preallocated = PETSC_TRUE;
1417 
1418   a->size         = oldmat->size;
1419   a->rank         = oldmat->rank;
1420   mat->insertmode = NOT_SET_VALUES;
1421   a->nvec         = oldmat->nvec;
1422   a->donotstash   = oldmat->donotstash;
1423 
1424   ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr);
1425   ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr);
1426 
1427   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1428   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1429   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1430 
1431   *newmat = mat;
1432   PetscFunctionReturn(0);
1433 }
1434 
1435 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat)
1436 {
1437   PetscErrorCode ierr;
1438   PetscMPIInt    rank,size;
1439   const PetscInt *rowners;
1440   PetscInt       i,m,n,nz,j,mMax;
1441   PetscScalar    *array,*vals,*vals_ptr;
1442   Mat_MPIDense   *a = (Mat_MPIDense*)newmat->data;
1443 
1444   PetscFunctionBegin;
1445   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1446   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1447 
1448   /* determine ownership of rows and columns */
1449   m = (newmat->rmap->n < 0) ? PETSC_DECIDE : newmat->rmap->n;
1450   n = (newmat->cmap->n < 0) ? PETSC_DECIDE : newmat->cmap->n;
1451 
1452   ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr);
1453   if (!a->A || !((Mat_SeqDense*)(a->A->data))->user_alloc) {
1454     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
1455   }
1456   ierr = MatDenseGetArray(newmat,&array);CHKERRQ(ierr);
1457   ierr = MatGetLocalSize(newmat,&m,NULL);CHKERRQ(ierr);
1458   ierr = MatGetOwnershipRanges(newmat,&rowners);CHKERRQ(ierr);
1459   ierr = MPI_Reduce(&m,&mMax,1,MPIU_INT,MPI_MAX,0,comm);CHKERRQ(ierr);
1460   if (!rank) {
1461     ierr = PetscMalloc1(mMax*N,&vals);CHKERRQ(ierr);
1462 
1463     /* read in my part of the matrix numerical values  */
1464     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1465 
1466     /* insert into matrix-by row (this is why cannot directly read into array */
1467     vals_ptr = vals;
1468     for (i=0; i<m; i++) {
1469       for (j=0; j<N; j++) {
1470         array[i + j*m] = *vals_ptr++;
1471       }
1472     }
1473 
1474     /* read in other processors and ship out */
1475     for (i=1; i<size; i++) {
1476       nz   = (rowners[i+1] - rowners[i])*N;
1477       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1478       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr);
1479     }
1480   } else {
1481     /* receive numeric values */
1482     ierr = PetscMalloc1(m*N,&vals);CHKERRQ(ierr);
1483 
1484     /* receive message of values*/
1485     ierr = MPIULong_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr);
1486 
1487     /* insert into matrix-by row (this is why cannot directly read into array */
1488     vals_ptr = vals;
1489     for (i=0; i<m; i++) {
1490       for (j=0; j<N; j++) {
1491         array[i + j*m] = *vals_ptr++;
1492       }
1493     }
1494   }
1495   ierr = MatDenseRestoreArray(newmat,&array);CHKERRQ(ierr);
1496   ierr = PetscFree(vals);CHKERRQ(ierr);
1497   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1498   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1499   PetscFunctionReturn(0);
1500 }
1501 
1502 PetscErrorCode MatLoad_MPIDense(Mat newmat,PetscViewer viewer)
1503 {
1504   Mat_MPIDense   *a;
1505   PetscScalar    *vals,*svals;
1506   MPI_Comm       comm;
1507   MPI_Status     status;
1508   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,n,maxnz;
1509   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1510   PetscInt       *ourlens,*procsnz = 0,jj,*mycols,*smycols;
1511   PetscInt       i,nz,j,rstart,rend;
1512   int            fd;
1513   PetscErrorCode ierr;
1514 
1515   PetscFunctionBegin;
1516   /* force binary viewer to load .info file if it has not yet done so */
1517   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1518   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
1519   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1520   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1521   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1522   if (!rank) {
1523     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
1524     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1525   }
1526   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1527   M    = header[1]; N = header[2]; nz = header[3];
1528 
1529   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
1530   if (newmat->rmap->N < 0) newmat->rmap->N = M;
1531   if (newmat->cmap->N < 0) newmat->cmap->N = N;
1532 
1533   if (newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",M,newmat->rmap->N);
1534   if (newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",N,newmat->cmap->N);
1535 
1536   /*
1537        Handle case where matrix is stored on disk as a dense matrix
1538   */
1539   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1540     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr);
1541     PetscFunctionReturn(0);
1542   }
1543 
1544   /* determine ownership of all rows */
1545   if (newmat->rmap->n < 0) {
1546     ierr = PetscMPIIntCast(M/size + ((M % size) > rank),&m);CHKERRQ(ierr);
1547   } else {
1548     ierr = PetscMPIIntCast(newmat->rmap->n,&m);CHKERRQ(ierr);
1549   }
1550   if (newmat->cmap->n < 0) {
1551     n = PETSC_DECIDE;
1552   } else {
1553     ierr = PetscMPIIntCast(newmat->cmap->n,&n);CHKERRQ(ierr);
1554   }
1555 
1556   ierr       = PetscMalloc1(size+2,&rowners);CHKERRQ(ierr);
1557   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1558   rowners[0] = 0;
1559   for (i=2; i<=size; i++) {
1560     rowners[i] += rowners[i-1];
1561   }
1562   rstart = rowners[rank];
1563   rend   = rowners[rank+1];
1564 
1565   /* distribute row lengths to all processors */
1566   ierr = PetscMalloc1(rend-rstart,&ourlens);CHKERRQ(ierr);
1567   if (!rank) {
1568     ierr = PetscMalloc1(M,&rowlengths);CHKERRQ(ierr);
1569     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1570     ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr);
1571     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1572     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1573     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1574   } else {
1575     ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1576   }
1577 
1578   if (!rank) {
1579     /* calculate the number of nonzeros on each processor */
1580     ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr);
1581     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1582     for (i=0; i<size; i++) {
1583       for (j=rowners[i]; j< rowners[i+1]; j++) {
1584         procsnz[i] += rowlengths[j];
1585       }
1586     }
1587     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1588 
1589     /* determine max buffer needed and allocate it */
1590     maxnz = 0;
1591     for (i=0; i<size; i++) {
1592       maxnz = PetscMax(maxnz,procsnz[i]);
1593     }
1594     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
1595 
1596     /* read in my part of the matrix column indices  */
1597     nz   = procsnz[0];
1598     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
1599     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1600 
1601     /* read in every one elses and ship off */
1602     for (i=1; i<size; i++) {
1603       nz   = procsnz[i];
1604       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1605       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1606     }
1607     ierr = PetscFree(cols);CHKERRQ(ierr);
1608   } else {
1609     /* determine buffer space needed for message */
1610     nz = 0;
1611     for (i=0; i<m; i++) {
1612       nz += ourlens[i];
1613     }
1614     ierr = PetscMalloc1(nz+1,&mycols);CHKERRQ(ierr);
1615 
1616     /* receive message of column indices*/
1617     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1618     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1619     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1620   }
1621 
1622   ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr);
1623   a = (Mat_MPIDense*)newmat->data;
1624   if (!a->A || !((Mat_SeqDense*)(a->A->data))->user_alloc) {
1625     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
1626   }
1627 
1628   if (!rank) {
1629     ierr = PetscMalloc1(maxnz,&vals);CHKERRQ(ierr);
1630 
1631     /* read in my part of the matrix numerical values  */
1632     nz   = procsnz[0];
1633     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1634 
1635     /* insert into matrix */
1636     jj      = rstart;
1637     smycols = mycols;
1638     svals   = vals;
1639     for (i=0; i<m; i++) {
1640       ierr     = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1641       smycols += ourlens[i];
1642       svals   += ourlens[i];
1643       jj++;
1644     }
1645 
1646     /* read in other processors and ship out */
1647     for (i=1; i<size; i++) {
1648       nz   = procsnz[i];
1649       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1650       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
1651     }
1652     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1653   } else {
1654     /* receive numeric values */
1655     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
1656 
1657     /* receive message of values*/
1658     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
1659     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1660     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1661 
1662     /* insert into matrix */
1663     jj      = rstart;
1664     smycols = mycols;
1665     svals   = vals;
1666     for (i=0; i<m; i++) {
1667       ierr     = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1668       smycols += ourlens[i];
1669       svals   += ourlens[i];
1670       jj++;
1671     }
1672   }
1673   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1674   ierr = PetscFree(vals);CHKERRQ(ierr);
1675   ierr = PetscFree(mycols);CHKERRQ(ierr);
1676   ierr = PetscFree(rowners);CHKERRQ(ierr);
1677 
1678   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1679   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1680   PetscFunctionReturn(0);
1681 }
1682 
1683 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool  *flag)
1684 {
1685   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
1686   Mat            a,b;
1687   PetscBool      flg;
1688   PetscErrorCode ierr;
1689 
1690   PetscFunctionBegin;
1691   a    = matA->A;
1692   b    = matB->A;
1693   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
1694   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1695   PetscFunctionReturn(0);
1696 }
1697 
1698 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(Mat A)
1699 {
1700   PetscErrorCode        ierr;
1701   Mat_MPIDense          *a = (Mat_MPIDense*)A->data;
1702   Mat_TransMatMultDense *atb = a->atbdense;
1703 
1704   PetscFunctionBegin;
1705   ierr = PetscFree3(atb->sendbuf,atb->atbarray,atb->recvcounts);CHKERRQ(ierr);
1706   ierr = (atb->destroy)(A);CHKERRQ(ierr);
1707   ierr = PetscFree(atb);CHKERRQ(ierr);
1708   PetscFunctionReturn(0);
1709 }
1710 
1711 PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1712 {
1713   Mat_MPIDense   *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
1714   Mat_SeqDense   *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data;
1715   Mat_TransMatMultDense *atb = c->atbdense;
1716   PetscErrorCode ierr;
1717   MPI_Comm       comm;
1718   PetscMPIInt    rank,size,*recvcounts=atb->recvcounts;
1719   PetscScalar    *carray,*atbarray=atb->atbarray,*sendbuf=atb->sendbuf;
1720   PetscInt       i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j;
1721   PetscScalar    _DOne=1.0,_DZero=0.0;
1722   PetscBLASInt   am,an,bn,aN;
1723   const PetscInt *ranges;
1724 
1725   PetscFunctionBegin;
1726   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1727   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1728   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1729 
1730   /* compute atbarray = aseq^T * bseq */
1731   ierr = PetscBLASIntCast(a->A->cmap->n,&an);CHKERRQ(ierr);
1732   ierr = PetscBLASIntCast(b->A->cmap->n,&bn);CHKERRQ(ierr);
1733   ierr = PetscBLASIntCast(a->A->rmap->n,&am);CHKERRQ(ierr);
1734   ierr = PetscBLASIntCast(A->cmap->N,&aN);CHKERRQ(ierr);
1735   PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&an,&bn,&am,&_DOne,aseq->v,&aseq->lda,bseq->v,&bseq->lda,&_DZero,atbarray,&aN));
1736 
1737   ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr);
1738   for (i=0; i<size; i++) recvcounts[i] = (ranges[i+1] - ranges[i])*cN;
1739 
1740   /* arrange atbarray into sendbuf */
1741   k = 0;
1742   for (proc=0; proc<size; proc++) {
1743     for (j=0; j<cN; j++) {
1744       for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM];
1745     }
1746   }
1747   /* sum all atbarray to local values of C */
1748   ierr = MatDenseGetArray(c->A,&carray);CHKERRQ(ierr);
1749   ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRQ(ierr);
1750   ierr = MatDenseRestoreArray(c->A,&carray);CHKERRQ(ierr);
1751   PetscFunctionReturn(0);
1752 }
1753 
1754 PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1755 {
1756   PetscErrorCode        ierr;
1757   Mat                   Cdense;
1758   MPI_Comm              comm;
1759   PetscMPIInt           size;
1760   PetscInt              cm=A->cmap->n,cM,cN=B->cmap->N;
1761   Mat_MPIDense          *c;
1762   Mat_TransMatMultDense *atb;
1763 
1764   PetscFunctionBegin;
1765   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1766   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
1767     SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
1768   }
1769 
1770   /* create matrix product Cdense */
1771   ierr = MatCreate(comm,&Cdense);CHKERRQ(ierr);
1772   ierr = MatSetSizes(Cdense,cm,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
1773   ierr = MatSetType(Cdense,MATMPIDENSE);CHKERRQ(ierr);
1774   ierr = MatMPIDenseSetPreallocation(Cdense,NULL);CHKERRQ(ierr);
1775   ierr = MatAssemblyBegin(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1776   ierr = MatAssemblyEnd(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1777   *C   = Cdense;
1778 
1779   /* create data structure for reuse Cdense */
1780   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1781   ierr = PetscNew(&atb);CHKERRQ(ierr);
1782   cM = Cdense->rmap->N;
1783   ierr = PetscMalloc3(cM*cN,&atb->sendbuf,cM*cN,&atb->atbarray,size,&atb->recvcounts);CHKERRQ(ierr);
1784 
1785   c                    = (Mat_MPIDense*)Cdense->data;
1786   c->atbdense          = atb;
1787   atb->destroy         = Cdense->ops->destroy;
1788   Cdense->ops->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
1789   PetscFunctionReturn(0);
1790 }
1791 
1792 PetscErrorCode MatTransposeMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
1793 {
1794   PetscErrorCode ierr;
1795 
1796   PetscFunctionBegin;
1797   if (scall == MAT_INITIAL_MATRIX) {
1798     ierr = MatTransposeMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr);
1799   }
1800   ierr = MatTransposeMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr);
1801   PetscFunctionReturn(0);
1802 }
1803 
1804 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(Mat A)
1805 {
1806   PetscErrorCode   ierr;
1807   Mat_MPIDense     *a = (Mat_MPIDense*)A->data;
1808   Mat_MatMultDense *ab = a->abdense;
1809 
1810   PetscFunctionBegin;
1811   ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr);
1812   ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr);
1813   ierr = MatDestroy(&ab->Be);CHKERRQ(ierr);
1814 
1815   ierr = (ab->destroy)(A);CHKERRQ(ierr);
1816   ierr = PetscFree(ab);CHKERRQ(ierr);
1817   PetscFunctionReturn(0);
1818 }
1819 
1820 #if defined(PETSC_HAVE_ELEMENTAL)
1821 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1822 {
1823   PetscErrorCode   ierr;
1824   Mat_MPIDense     *c=(Mat_MPIDense*)C->data;
1825   Mat_MatMultDense *ab=c->abdense;
1826 
1827   PetscFunctionBegin;
1828   ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr);
1829   ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr);
1830   ierr = MatMatMultNumeric(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr);
1831   ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
1832   PetscFunctionReturn(0);
1833 }
1834 
1835 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1836 {
1837   PetscErrorCode   ierr;
1838   Mat              Ae,Be,Ce;
1839   Mat_MPIDense     *c;
1840   Mat_MatMultDense *ab;
1841 
1842   PetscFunctionBegin;
1843   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
1844     SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
1845   }
1846 
1847   /* convert A and B to Elemental matrices Ae and Be */
1848   ierr = MatConvert(A,MATELEMENTAL,MAT_INITIAL_MATRIX, &Ae);CHKERRQ(ierr);
1849   ierr = MatConvert(B,MATELEMENTAL,MAT_INITIAL_MATRIX, &Be);CHKERRQ(ierr);
1850 
1851   /* Ce = Ae*Be */
1852   ierr = MatMatMultSymbolic(Ae,Be,fill,&Ce);CHKERRQ(ierr);
1853   ierr = MatMatMultNumeric(Ae,Be,Ce);CHKERRQ(ierr);
1854 
1855   /* convert Ce to C */
1856   ierr = MatConvert(Ce,MATMPIDENSE,MAT_INITIAL_MATRIX,C);CHKERRQ(ierr);
1857 
1858   /* create data structure for reuse Cdense */
1859   ierr = PetscNew(&ab);CHKERRQ(ierr);
1860   c                  = (Mat_MPIDense*)(*C)->data;
1861   c->abdense         = ab;
1862 
1863   ab->Ae             = Ae;
1864   ab->Be             = Be;
1865   ab->Ce             = Ce;
1866   ab->destroy        = (*C)->ops->destroy;
1867   (*C)->ops->destroy        = MatDestroy_MatMatMult_MPIDense_MPIDense;
1868   (*C)->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
1869   PetscFunctionReturn(0);
1870 }
1871 
1872 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
1873 {
1874   PetscErrorCode ierr;
1875 
1876   PetscFunctionBegin;
1877   if (scall == MAT_INITIAL_MATRIX) { /* simbolic product includes numeric product */
1878     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
1879     ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr);
1880     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
1881   } else {
1882     ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
1883     ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr);
1884     ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
1885   }
1886   PetscFunctionReturn(0);
1887 }
1888 #endif
1889