xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision a05bf03e2ccda28965eba84b188c3be8ae57245c)
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4 */
5 
6 
7 #include <../src/mat/impls/dense/mpi/mpidense.h>    /*I   "petscmat.h"  I*/
8 #include <../src/mat/impls/aij/mpi/mpiaij.h>
9 #include <petscblaslapack.h>
10 
11 /*@
12 
13       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
14               matrix that represents the operator. For sequential matrices it returns itself.
15 
16     Input Parameter:
17 .      A - the Seq or MPI dense matrix
18 
19     Output Parameter:
20 .      B - the inner matrix
21 
22     Level: intermediate
23 
24 @*/
25 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
26 {
27   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
28   PetscErrorCode ierr;
29   PetscBool      flg;
30 
31   PetscFunctionBegin;
32   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
33   if (flg) *B = mat->A;
34   else *B = A;
35   PetscFunctionReturn(0);
36 }
37 
38 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
39 {
40   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
41   PetscErrorCode ierr;
42   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
43 
44   PetscFunctionBegin;
45   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
46   lrow = row - rstart;
47   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
48   PetscFunctionReturn(0);
49 }
50 
51 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
52 {
53   PetscErrorCode ierr;
54 
55   PetscFunctionBegin;
56   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
57   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
58   PetscFunctionReturn(0);
59 }
60 
61 PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
62 {
63   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
64   PetscErrorCode ierr;
65   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
66   PetscScalar    *array;
67   MPI_Comm       comm;
68   Mat            B;
69 
70   PetscFunctionBegin;
71   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
72 
73   ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr);
74   if (!B) {
75     ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
76     ierr = MatCreate(comm,&B);CHKERRQ(ierr);
77     ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr);
78     ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
79     ierr = MatDenseGetArray(mdn->A,&array);CHKERRQ(ierr);
80     ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr);
81     ierr = MatDenseRestoreArray(mdn->A,&array);CHKERRQ(ierr);
82     ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
83     ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
84     ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr);
85     *a   = B;
86     ierr = MatDestroy(&B);CHKERRQ(ierr);
87   } else *a = B;
88   PetscFunctionReturn(0);
89 }
90 
91 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
92 {
93   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
94   PetscErrorCode ierr;
95   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
96   PetscBool      roworiented = A->roworiented;
97 
98   PetscFunctionBegin;
99   for (i=0; i<m; i++) {
100     if (idxm[i] < 0) continue;
101     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
102     if (idxm[i] >= rstart && idxm[i] < rend) {
103       row = idxm[i] - rstart;
104       if (roworiented) {
105         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
106       } else {
107         for (j=0; j<n; j++) {
108           if (idxn[j] < 0) continue;
109           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
110           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
111         }
112       }
113     } else if (!A->donotstash) {
114       mat->assembled = PETSC_FALSE;
115       if (roworiented) {
116         ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
117       } else {
118         ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
119       }
120     }
121   }
122   PetscFunctionReturn(0);
123 }
124 
125 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
126 {
127   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
128   PetscErrorCode ierr;
129   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
130 
131   PetscFunctionBegin;
132   for (i=0; i<m; i++) {
133     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
134     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
135     if (idxm[i] >= rstart && idxm[i] < rend) {
136       row = idxm[i] - rstart;
137       for (j=0; j<n; j++) {
138         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
139         if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
140         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
141       }
142     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
143   }
144   PetscFunctionReturn(0);
145 }
146 
147 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[])
148 {
149   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
150   PetscErrorCode ierr;
151 
152   PetscFunctionBegin;
153   ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr);
154   PetscFunctionReturn(0);
155 }
156 
157 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar array[])
158 {
159   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
160   PetscErrorCode ierr;
161 
162   PetscFunctionBegin;
163   ierr = MatDensePlaceArray(a->A,array);CHKERRQ(ierr);
164   PetscFunctionReturn(0);
165 }
166 
167 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
168 {
169   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
170   PetscErrorCode ierr;
171 
172   PetscFunctionBegin;
173   ierr = MatDenseResetArray(a->A);CHKERRQ(ierr);
174   PetscFunctionReturn(0);
175 }
176 
177 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
178 {
179   Mat_MPIDense   *mat  = (Mat_MPIDense*)A->data,*newmatd;
180   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
181   PetscErrorCode ierr;
182   PetscInt       i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
183   const PetscInt *irow,*icol;
184   PetscScalar    *av,*bv,*v = lmat->v;
185   Mat            newmat;
186   IS             iscol_local;
187 
188   PetscFunctionBegin;
189   ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
190   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
191   ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr);
192   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
193   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
194   ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */
195 
196   /* No parallel redistribution currently supported! Should really check each index set
197      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
198      original matrix! */
199 
200   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
201   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
202 
203   /* Check submatrix call */
204   if (scall == MAT_REUSE_MATRIX) {
205     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
206     /* Really need to test rows and column sizes! */
207     newmat = *B;
208   } else {
209     /* Create and fill new matrix */
210     ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr);
211     ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
212     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
213     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
214   }
215 
216   /* Now extract the data pointers and do the copy, column at a time */
217   newmatd = (Mat_MPIDense*)newmat->data;
218   bv      = ((Mat_SeqDense*)newmatd->A->data)->v;
219 
220   for (i=0; i<Ncols; i++) {
221     av = v + ((Mat_SeqDense*)mat->A->data)->lda*icol[i];
222     for (j=0; j<nrows; j++) {
223       *bv++ = av[irow[j] - rstart];
224     }
225   }
226 
227   /* Assemble the matrices so that the correct flags are set */
228   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
229   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
230 
231   /* Free work space */
232   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
233   ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr);
234   ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
235   *B   = newmat;
236   PetscFunctionReturn(0);
237 }
238 
239 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[])
240 {
241   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
242   PetscErrorCode ierr;
243 
244   PetscFunctionBegin;
245   ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr);
246   PetscFunctionReturn(0);
247 }
248 
249 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
250 {
251   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
252   MPI_Comm       comm;
253   PetscErrorCode ierr;
254   PetscInt       nstash,reallocs;
255   InsertMode     addv;
256 
257   PetscFunctionBegin;
258   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
259   /* make sure all processors are either in INSERTMODE or ADDMODE */
260   ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,comm);CHKERRQ(ierr);
261   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
262   mat->insertmode = addv; /* in case this processor had no cache */
263 
264   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
265   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
266   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
267   PetscFunctionReturn(0);
268 }
269 
270 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
271 {
272   Mat_MPIDense   *mdn=(Mat_MPIDense*)mat->data;
273   PetscErrorCode ierr;
274   PetscInt       i,*row,*col,flg,j,rstart,ncols;
275   PetscMPIInt    n;
276   PetscScalar    *val;
277 
278   PetscFunctionBegin;
279   /*  wait on receives */
280   while (1) {
281     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
282     if (!flg) break;
283 
284     for (i=0; i<n;) {
285       /* Now identify the consecutive vals belonging to the same row */
286       for (j=i,rstart=row[j]; j<n; j++) {
287         if (row[j] != rstart) break;
288       }
289       if (j < n) ncols = j-i;
290       else       ncols = n-i;
291       /* Now assemble all these values with a single function call */
292       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
293       i    = j;
294     }
295   }
296   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
297 
298   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
299   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
300 
301   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
302     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
303   }
304   PetscFunctionReturn(0);
305 }
306 
307 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
308 {
309   PetscErrorCode ierr;
310   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
311 
312   PetscFunctionBegin;
313   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
314   PetscFunctionReturn(0);
315 }
316 
317 /* the code does not do the diagonal entries correctly unless the
318    matrix is square and the column and row owerships are identical.
319    This is a BUG. The only way to fix it seems to be to access
320    mdn->A and mdn->B directly and not through the MatZeroRows()
321    routine.
322 */
323 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
324 {
325   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
326   PetscErrorCode    ierr;
327   PetscInt          i,*owners = A->rmap->range;
328   PetscInt          *sizes,j,idx,nsends;
329   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
330   PetscInt          *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source;
331   PetscInt          *lens,*lrows,*values;
332   PetscMPIInt       n,imdex,rank = l->rank,size = l->size;
333   MPI_Comm          comm;
334   MPI_Request       *send_waits,*recv_waits;
335   MPI_Status        recv_status,*send_status;
336   PetscBool         found;
337   const PetscScalar *xx;
338   PetscScalar       *bb;
339 
340   PetscFunctionBegin;
341   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
342   if (A->rmap->N != A->cmap->N) SETERRQ(comm,PETSC_ERR_SUP,"Only handles square matrices");
343   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership");
344   /*  first count number of contributors to each processor */
345   ierr = PetscCalloc1(2*size,&sizes);CHKERRQ(ierr);
346   ierr = PetscMalloc1(N+1,&owner);CHKERRQ(ierr);  /* see note*/
347   for (i=0; i<N; i++) {
348     idx   = rows[i];
349     found = PETSC_FALSE;
350     for (j=0; j<size; j++) {
351       if (idx >= owners[j] && idx < owners[j+1]) {
352         sizes[2*j]++; sizes[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
353       }
354     }
355     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
356   }
357   nsends = 0;
358   for (i=0; i<size; i++) nsends += sizes[2*i+1];
359 
360   /* inform other processors of number of messages and max length*/
361   ierr = PetscMaxSum(comm,sizes,&nmax,&nrecvs);CHKERRQ(ierr);
362 
363   /* post receives:   */
364   ierr = PetscMalloc1((nrecvs+1)*(nmax+1),&rvalues);CHKERRQ(ierr);
365   ierr = PetscMalloc1(nrecvs+1,&recv_waits);CHKERRQ(ierr);
366   for (i=0; i<nrecvs; i++) {
367     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
368   }
369 
370   /* do sends:
371       1) starts[i] gives the starting index in svalues for stuff going to
372          the ith processor
373   */
374   ierr = PetscMalloc1(N+1,&svalues);CHKERRQ(ierr);
375   ierr = PetscMalloc1(nsends+1,&send_waits);CHKERRQ(ierr);
376   ierr = PetscMalloc1(size+1,&starts);CHKERRQ(ierr);
377 
378   starts[0] = 0;
379   for (i=1; i<size; i++) starts[i] = starts[i-1] + sizes[2*i-2];
380   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
381 
382   starts[0] = 0;
383   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + sizes[2*i-2];
384   count = 0;
385   for (i=0; i<size; i++) {
386     if (sizes[2*i+1]) {
387       ierr = MPI_Isend(svalues+starts[i],sizes[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
388     }
389   }
390   ierr = PetscFree(starts);CHKERRQ(ierr);
391 
392   base = owners[rank];
393 
394   /*  wait on receives */
395   ierr  = PetscMalloc2(nrecvs,&lens,nrecvs,&source);CHKERRQ(ierr);
396   count = nrecvs;
397   slen  = 0;
398   while (count) {
399     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
400     /* unpack receives into our local space */
401     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
402 
403     source[imdex] = recv_status.MPI_SOURCE;
404     lens[imdex]   = n;
405     slen += n;
406     count--;
407   }
408   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
409 
410   /* move the data into the send scatter */
411   ierr  = PetscMalloc1(slen+1,&lrows);CHKERRQ(ierr);
412   count = 0;
413   for (i=0; i<nrecvs; i++) {
414     values = rvalues + i*nmax;
415     for (j=0; j<lens[i]; j++) {
416       lrows[count++] = values[j] - base;
417     }
418   }
419   ierr = PetscFree(rvalues);CHKERRQ(ierr);
420   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
421   ierr = PetscFree(owner);CHKERRQ(ierr);
422   ierr = PetscFree(sizes);CHKERRQ(ierr);
423 
424   /* fix right hand side if needed */
425   if (x && b) {
426     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
427     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
428     for (i=0; i<slen; i++) {
429       bb[lrows[i]] = diag*xx[lrows[i]];
430     }
431     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
432     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
433   }
434 
435   /* actually zap the local rows */
436   ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
437   if (diag != 0.0) {
438     Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data;
439     PetscInt     m   = ll->lda, i;
440 
441     for (i=0; i<slen; i++) {
442       ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag;
443     }
444   }
445   ierr = PetscFree(lrows);CHKERRQ(ierr);
446 
447   /* wait on sends */
448   if (nsends) {
449     ierr = PetscMalloc1(nsends,&send_status);CHKERRQ(ierr);
450     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
451     ierr = PetscFree(send_status);CHKERRQ(ierr);
452   }
453   ierr = PetscFree(send_waits);CHKERRQ(ierr);
454   ierr = PetscFree(svalues);CHKERRQ(ierr);
455   PetscFunctionReturn(0);
456 }
457 
458 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec);
459 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec);
460 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec);
461 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec);
462 
463 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
464 {
465   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
466   PetscErrorCode ierr;
467 
468   PetscFunctionBegin;
469   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
470   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
471   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
472   PetscFunctionReturn(0);
473 }
474 
475 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
476 {
477   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
478   PetscErrorCode ierr;
479 
480   PetscFunctionBegin;
481   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
482   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
483   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
484   PetscFunctionReturn(0);
485 }
486 
487 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
488 {
489   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
490   PetscErrorCode ierr;
491   PetscScalar    zero = 0.0;
492 
493   PetscFunctionBegin;
494   ierr = VecSet(yy,zero);CHKERRQ(ierr);
495   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
496   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
497   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
498   PetscFunctionReturn(0);
499 }
500 
501 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
502 {
503   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
504   PetscErrorCode ierr;
505 
506   PetscFunctionBegin;
507   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
508   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
509   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
510   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
511   PetscFunctionReturn(0);
512 }
513 
514 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
515 {
516   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
517   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
518   PetscErrorCode ierr;
519   PetscInt       len,i,n,m = A->rmap->n,radd;
520   PetscScalar    *x,zero = 0.0;
521 
522   PetscFunctionBegin;
523   ierr = VecSet(v,zero);CHKERRQ(ierr);
524   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
525   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
526   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
527   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
528   radd = A->rmap->rstart*m;
529   for (i=0; i<len; i++) {
530     x[i] = aloc->v[radd + i*m + i];
531   }
532   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
533   PetscFunctionReturn(0);
534 }
535 
536 PetscErrorCode MatDestroy_MPIDense(Mat mat)
537 {
538   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
539   PetscErrorCode ierr;
540 
541   PetscFunctionBegin;
542 #if defined(PETSC_USE_LOG)
543   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
544 #endif
545   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
546   ierr = MatDestroy(&mdn->A);CHKERRQ(ierr);
547   ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr);
548   ierr = VecScatterDestroy(&mdn->Mvctx);CHKERRQ(ierr);
549 
550   ierr = PetscFree(mat->data);CHKERRQ(ierr);
551   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
552 
553   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr);
554   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);CHKERRQ(ierr);
555   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);CHKERRQ(ierr);
556 
557   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr);
558 #if defined(PETSC_HAVE_ELEMENTAL)
559   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr);
560 #endif
561   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr);
562   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
563   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
564   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
565   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
566   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
567   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
568   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL);CHKERRQ(ierr);
569   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL);CHKERRQ(ierr);
570   PetscFunctionReturn(0);
571 }
572 
573 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
574 {
575   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
576   PetscErrorCode    ierr;
577   PetscViewerFormat format;
578   int               fd;
579   PetscInt          header[4],mmax,N = mat->cmap->N,i,j,m,k;
580   PetscMPIInt       rank,tag  = ((PetscObject)viewer)->tag,size;
581   PetscScalar       *work,*v,*vv;
582   Mat_SeqDense      *a = (Mat_SeqDense*)mdn->A->data;
583 
584   PetscFunctionBegin;
585   if (mdn->size == 1) {
586     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
587   } else {
588     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
589     ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
590     ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
591 
592     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
593     if (format == PETSC_VIEWER_NATIVE) {
594 
595       if (!rank) {
596         /* store the matrix as a dense matrix */
597         header[0] = MAT_FILE_CLASSID;
598         header[1] = mat->rmap->N;
599         header[2] = N;
600         header[3] = MATRIX_BINARY_FORMAT_DENSE;
601         ierr      = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
602 
603         /* get largest work array needed for transposing array */
604         mmax = mat->rmap->n;
605         for (i=1; i<size; i++) {
606           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
607         }
608         ierr = PetscMalloc1(mmax*N,&work);CHKERRQ(ierr);
609 
610         /* write out local array, by rows */
611         m = mat->rmap->n;
612         v = a->v;
613         for (j=0; j<N; j++) {
614           for (i=0; i<m; i++) {
615             work[j + i*N] = *v++;
616           }
617         }
618         ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
619         /* get largest work array to receive messages from other processes, excludes process zero */
620         mmax = 0;
621         for (i=1; i<size; i++) {
622           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
623         }
624         ierr = PetscMalloc1(mmax*N,&vv);CHKERRQ(ierr);
625         for (k = 1; k < size; k++) {
626           v    = vv;
627           m    = mat->rmap->range[k+1] - mat->rmap->range[k];
628           ierr = MPIULong_Recv(v,m*N,MPIU_SCALAR,k,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
629 
630           for (j = 0; j < N; j++) {
631             for (i = 0; i < m; i++) {
632               work[j + i*N] = *v++;
633             }
634           }
635           ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
636         }
637         ierr = PetscFree(work);CHKERRQ(ierr);
638         ierr = PetscFree(vv);CHKERRQ(ierr);
639       } else {
640         ierr = MPIULong_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
641       }
642     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerPushFormat(viewer,PETSC_VIEWER_NATIVE)");
643   }
644   PetscFunctionReturn(0);
645 }
646 
647 extern PetscErrorCode MatView_SeqDense(Mat,PetscViewer);
648 #include <petscdraw.h>
649 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
650 {
651   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
652   PetscErrorCode    ierr;
653   PetscMPIInt       rank = mdn->rank;
654   PetscViewerType   vtype;
655   PetscBool         iascii,isdraw;
656   PetscViewer       sviewer;
657   PetscViewerFormat format;
658 
659   PetscFunctionBegin;
660   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
661   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
662   if (iascii) {
663     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
664     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
665     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
666       MatInfo info;
667       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
668       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
669       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
670       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
671       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
672       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
673       PetscFunctionReturn(0);
674     } else if (format == PETSC_VIEWER_ASCII_INFO) {
675       PetscFunctionReturn(0);
676     }
677   } else if (isdraw) {
678     PetscDraw draw;
679     PetscBool isnull;
680 
681     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
682     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
683     if (isnull) PetscFunctionReturn(0);
684   }
685 
686   {
687     /* assemble the entire matrix onto first processor. */
688     Mat         A;
689     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
690     PetscInt    *cols;
691     PetscScalar *vals;
692 
693     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
694     if (!rank) {
695       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
696     } else {
697       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
698     }
699     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
700     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
701     ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr);
702     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
703 
704     /* Copy the matrix ... This isn't the most efficient means,
705        but it's quick for now */
706     A->insertmode = INSERT_VALUES;
707 
708     row = mat->rmap->rstart;
709     m   = mdn->A->rmap->n;
710     for (i=0; i<m; i++) {
711       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
712       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
713       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
714       row++;
715     }
716 
717     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
718     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
719     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
720     if (!rank) {
721       ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
722       ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
723     }
724     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
725     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
726     ierr = MatDestroy(&A);CHKERRQ(ierr);
727   }
728   PetscFunctionReturn(0);
729 }
730 
731 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
732 {
733   PetscErrorCode ierr;
734   PetscBool      iascii,isbinary,isdraw,issocket;
735 
736   PetscFunctionBegin;
737   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
738   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
739   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
740   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
741 
742   if (iascii || issocket || isdraw) {
743     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
744   } else if (isbinary) {
745     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
746   }
747   PetscFunctionReturn(0);
748 }
749 
750 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
751 {
752   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
753   Mat            mdn  = mat->A;
754   PetscErrorCode ierr;
755   PetscReal      isend[5],irecv[5];
756 
757   PetscFunctionBegin;
758   info->block_size = 1.0;
759 
760   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
761 
762   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
763   isend[3] = info->memory;  isend[4] = info->mallocs;
764   if (flag == MAT_LOCAL) {
765     info->nz_used      = isend[0];
766     info->nz_allocated = isend[1];
767     info->nz_unneeded  = isend[2];
768     info->memory       = isend[3];
769     info->mallocs      = isend[4];
770   } else if (flag == MAT_GLOBAL_MAX) {
771     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
772 
773     info->nz_used      = irecv[0];
774     info->nz_allocated = irecv[1];
775     info->nz_unneeded  = irecv[2];
776     info->memory       = irecv[3];
777     info->mallocs      = irecv[4];
778   } else if (flag == MAT_GLOBAL_SUM) {
779     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
780 
781     info->nz_used      = irecv[0];
782     info->nz_allocated = irecv[1];
783     info->nz_unneeded  = irecv[2];
784     info->memory       = irecv[3];
785     info->mallocs      = irecv[4];
786   }
787   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
788   info->fill_ratio_needed = 0;
789   info->factor_mallocs    = 0;
790   PetscFunctionReturn(0);
791 }
792 
793 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
794 {
795   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
796   PetscErrorCode ierr;
797 
798   PetscFunctionBegin;
799   switch (op) {
800   case MAT_NEW_NONZERO_LOCATIONS:
801   case MAT_NEW_NONZERO_LOCATION_ERR:
802   case MAT_NEW_NONZERO_ALLOCATION_ERR:
803     MatCheckPreallocated(A,1);
804     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
805     break;
806   case MAT_ROW_ORIENTED:
807     MatCheckPreallocated(A,1);
808     a->roworiented = flg;
809     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
810     break;
811   case MAT_NEW_DIAGONALS:
812   case MAT_KEEP_NONZERO_PATTERN:
813   case MAT_USE_HASH_TABLE:
814     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
815     break;
816   case MAT_IGNORE_OFF_PROC_ENTRIES:
817     a->donotstash = flg;
818     break;
819   case MAT_SYMMETRIC:
820   case MAT_STRUCTURALLY_SYMMETRIC:
821   case MAT_HERMITIAN:
822   case MAT_SYMMETRY_ETERNAL:
823   case MAT_IGNORE_LOWER_TRIANGULAR:
824     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
825     break;
826   default:
827     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
828   }
829   PetscFunctionReturn(0);
830 }
831 
832 
833 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
834 {
835   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
836   Mat_SeqDense      *mat = (Mat_SeqDense*)mdn->A->data;
837   const PetscScalar *l,*r;
838   PetscScalar       x,*v;
839   PetscErrorCode    ierr;
840   PetscInt          i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n;
841 
842   PetscFunctionBegin;
843   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
844   if (ll) {
845     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
846     if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
847     ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr);
848     for (i=0; i<m; i++) {
849       x = l[i];
850       v = mat->v + i;
851       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
852     }
853     ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr);
854     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
855   }
856   if (rr) {
857     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
858     if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
859     ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
860     ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
861     ierr = VecGetArrayRead(mdn->lvec,&r);CHKERRQ(ierr);
862     for (i=0; i<n; i++) {
863       x = r[i];
864       v = mat->v + i*m;
865       for (j=0; j<m; j++) (*v++) *= x;
866     }
867     ierr = VecRestoreArrayRead(mdn->lvec,&r);CHKERRQ(ierr);
868     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
869   }
870   PetscFunctionReturn(0);
871 }
872 
873 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
874 {
875   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
876   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
877   PetscErrorCode ierr;
878   PetscInt       i,j;
879   PetscReal      sum = 0.0;
880   PetscScalar    *v  = mat->v;
881 
882   PetscFunctionBegin;
883   if (mdn->size == 1) {
884     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
885   } else {
886     if (type == NORM_FROBENIUS) {
887       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
888         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
889       }
890       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
891       *nrm = PetscSqrtReal(*nrm);
892       ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr);
893     } else if (type == NORM_1) {
894       PetscReal *tmp,*tmp2;
895       ierr = PetscMalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr);
896       ierr = PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
897       ierr = PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
898       *nrm = 0.0;
899       v    = mat->v;
900       for (j=0; j<mdn->A->cmap->n; j++) {
901         for (i=0; i<mdn->A->rmap->n; i++) {
902           tmp[j] += PetscAbsScalar(*v);  v++;
903         }
904       }
905       ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
906       for (j=0; j<A->cmap->N; j++) {
907         if (tmp2[j] > *nrm) *nrm = tmp2[j];
908       }
909       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
910       ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr);
911     } else if (type == NORM_INFINITY) { /* max row norm */
912       PetscReal ntemp;
913       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
914       ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
915     } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
916   }
917   PetscFunctionReturn(0);
918 }
919 
920 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
921 {
922   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
923   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
924   Mat            B;
925   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
926   PetscErrorCode ierr;
927   PetscInt       j,i;
928   PetscScalar    *v;
929 
930   PetscFunctionBegin;
931   if (reuse == MAT_INPLACE_MATRIX  && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports square matrix only in-place");
932   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
933     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
934     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
935     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
936     ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr);
937   } else {
938     B = *matout;
939   }
940 
941   m    = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v;
942   ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr);
943   for (i=0; i<m; i++) rwork[i] = rstart + i;
944   for (j=0; j<n; j++) {
945     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
946     v   += m;
947   }
948   ierr = PetscFree(rwork);CHKERRQ(ierr);
949   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
950   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
951   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
952     *matout = B;
953   } else {
954     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
955   }
956   PetscFunctionReturn(0);
957 }
958 
959 
960 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
961 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
962 
963 PetscErrorCode MatSetUp_MPIDense(Mat A)
964 {
965   PetscErrorCode ierr;
966 
967   PetscFunctionBegin;
968   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
969   PetscFunctionReturn(0);
970 }
971 
972 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
973 {
974   PetscErrorCode ierr;
975   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
976 
977   PetscFunctionBegin;
978   ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr);
979   ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
980   PetscFunctionReturn(0);
981 }
982 
983 PetscErrorCode  MatConjugate_MPIDense(Mat mat)
984 {
985   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
986   PetscErrorCode ierr;
987 
988   PetscFunctionBegin;
989   ierr = MatConjugate(a->A);CHKERRQ(ierr);
990   PetscFunctionReturn(0);
991 }
992 
993 PetscErrorCode MatRealPart_MPIDense(Mat A)
994 {
995   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
996   PetscErrorCode ierr;
997 
998   PetscFunctionBegin;
999   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1000   PetscFunctionReturn(0);
1001 }
1002 
1003 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1004 {
1005   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1006   PetscErrorCode ierr;
1007 
1008   PetscFunctionBegin;
1009   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1010   PetscFunctionReturn(0);
1011 }
1012 
1013 extern PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*);
1014 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms)
1015 {
1016   PetscErrorCode ierr;
1017   PetscInt       i,n;
1018   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
1019   PetscReal      *work;
1020 
1021   PetscFunctionBegin;
1022   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
1023   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
1024   ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr);
1025   if (type == NORM_2) {
1026     for (i=0; i<n; i++) work[i] *= work[i];
1027   }
1028   if (type == NORM_INFINITY) {
1029     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
1030   } else {
1031     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
1032   }
1033   ierr = PetscFree(work);CHKERRQ(ierr);
1034   if (type == NORM_2) {
1035     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
1036   }
1037   PetscFunctionReturn(0);
1038 }
1039 
1040 static PetscErrorCode  MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1041 {
1042   Mat_MPIDense   *d = (Mat_MPIDense*)x->data;
1043   PetscErrorCode ierr;
1044   PetscScalar    *a;
1045   PetscInt       m,n,i;
1046 
1047   PetscFunctionBegin;
1048   ierr = MatGetSize(d->A,&m,&n);CHKERRQ(ierr);
1049   ierr = MatDenseGetArray(d->A,&a);CHKERRQ(ierr);
1050   for (i=0; i<m*n; i++) {
1051     ierr = PetscRandomGetValue(rctx,a+i);CHKERRQ(ierr);
1052   }
1053   ierr = MatDenseRestoreArray(d->A,&a);CHKERRQ(ierr);
1054   PetscFunctionReturn(0);
1055 }
1056 
1057 extern PetscErrorCode MatMatMultNumeric_MPIDense(Mat A,Mat,Mat);
1058 
1059 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool  *missing,PetscInt *d)
1060 {
1061   PetscFunctionBegin;
1062   *missing = PETSC_FALSE;
1063   PetscFunctionReturn(0);
1064 }
1065 
1066 /* -------------------------------------------------------------------*/
1067 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1068                                         MatGetRow_MPIDense,
1069                                         MatRestoreRow_MPIDense,
1070                                         MatMult_MPIDense,
1071                                 /*  4*/ MatMultAdd_MPIDense,
1072                                         MatMultTranspose_MPIDense,
1073                                         MatMultTransposeAdd_MPIDense,
1074                                         0,
1075                                         0,
1076                                         0,
1077                                 /* 10*/ 0,
1078                                         0,
1079                                         0,
1080                                         0,
1081                                         MatTranspose_MPIDense,
1082                                 /* 15*/ MatGetInfo_MPIDense,
1083                                         MatEqual_MPIDense,
1084                                         MatGetDiagonal_MPIDense,
1085                                         MatDiagonalScale_MPIDense,
1086                                         MatNorm_MPIDense,
1087                                 /* 20*/ MatAssemblyBegin_MPIDense,
1088                                         MatAssemblyEnd_MPIDense,
1089                                         MatSetOption_MPIDense,
1090                                         MatZeroEntries_MPIDense,
1091                                 /* 24*/ MatZeroRows_MPIDense,
1092                                         0,
1093                                         0,
1094                                         0,
1095                                         0,
1096                                 /* 29*/ MatSetUp_MPIDense,
1097                                         0,
1098                                         0,
1099                                         MatGetDiagonalBlock_MPIDense,
1100                                         0,
1101                                 /* 34*/ MatDuplicate_MPIDense,
1102                                         0,
1103                                         0,
1104                                         0,
1105                                         0,
1106                                 /* 39*/ MatAXPY_MPIDense,
1107                                         MatCreateSubMatrices_MPIDense,
1108                                         0,
1109                                         MatGetValues_MPIDense,
1110                                         0,
1111                                 /* 44*/ 0,
1112                                         MatScale_MPIDense,
1113                                         MatShift_Basic,
1114                                         0,
1115                                         0,
1116                                 /* 49*/ MatSetRandom_MPIDense,
1117                                         0,
1118                                         0,
1119                                         0,
1120                                         0,
1121                                 /* 54*/ 0,
1122                                         0,
1123                                         0,
1124                                         0,
1125                                         0,
1126                                 /* 59*/ MatCreateSubMatrix_MPIDense,
1127                                         MatDestroy_MPIDense,
1128                                         MatView_MPIDense,
1129                                         0,
1130                                         0,
1131                                 /* 64*/ 0,
1132                                         0,
1133                                         0,
1134                                         0,
1135                                         0,
1136                                 /* 69*/ 0,
1137                                         0,
1138                                         0,
1139                                         0,
1140                                         0,
1141                                 /* 74*/ 0,
1142                                         0,
1143                                         0,
1144                                         0,
1145                                         0,
1146                                 /* 79*/ 0,
1147                                         0,
1148                                         0,
1149                                         0,
1150                                 /* 83*/ MatLoad_MPIDense,
1151                                         0,
1152                                         0,
1153                                         0,
1154                                         0,
1155                                         0,
1156 #if defined(PETSC_HAVE_ELEMENTAL)
1157                                 /* 89*/ MatMatMult_MPIDense_MPIDense,
1158                                         MatMatMultSymbolic_MPIDense_MPIDense,
1159 #else
1160                                 /* 89*/ 0,
1161                                         0,
1162 #endif
1163                                         MatMatMultNumeric_MPIDense,
1164                                         0,
1165                                         0,
1166                                 /* 94*/ 0,
1167                                         0,
1168                                         0,
1169                                         0,
1170                                         0,
1171                                 /* 99*/ 0,
1172                                         0,
1173                                         0,
1174                                         MatConjugate_MPIDense,
1175                                         0,
1176                                 /*104*/ 0,
1177                                         MatRealPart_MPIDense,
1178                                         MatImaginaryPart_MPIDense,
1179                                         0,
1180                                         0,
1181                                 /*109*/ 0,
1182                                         0,
1183                                         0,
1184                                         0,
1185                                         MatMissingDiagonal_MPIDense,
1186                                 /*114*/ 0,
1187                                         0,
1188                                         0,
1189                                         0,
1190                                         0,
1191                                 /*119*/ 0,
1192                                         0,
1193                                         0,
1194                                         0,
1195                                         0,
1196                                 /*124*/ 0,
1197                                         MatGetColumnNorms_MPIDense,
1198                                         0,
1199                                         0,
1200                                         0,
1201                                 /*129*/ 0,
1202                                         MatTransposeMatMult_MPIDense_MPIDense,
1203                                         MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1204                                         MatTransposeMatMultNumeric_MPIDense_MPIDense,
1205                                         0,
1206                                 /*134*/ 0,
1207                                         0,
1208                                         0,
1209                                         0,
1210                                         0,
1211                                 /*139*/ 0,
1212                                         0,
1213                                         0
1214 };
1215 
1216 PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1217 {
1218   Mat_MPIDense   *a;
1219   PetscErrorCode ierr;
1220 
1221   PetscFunctionBegin;
1222   mat->preallocated = PETSC_TRUE;
1223   /* Note:  For now, when data is specified above, this assumes the user correctly
1224    allocates the local dense storage space.  We should add error checking. */
1225 
1226   a       = (Mat_MPIDense*)mat->data;
1227   ierr    = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
1228   ierr    = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
1229   a->nvec = mat->cmap->n;
1230 
1231   ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1232   ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr);
1233   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
1234   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1235   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1236   PetscFunctionReturn(0);
1237 }
1238 
1239 #if defined(PETSC_HAVE_ELEMENTAL)
1240 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1241 {
1242   Mat            mat_elemental;
1243   PetscErrorCode ierr;
1244   PetscScalar    *v;
1245   PetscInt       m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols;
1246 
1247   PetscFunctionBegin;
1248   if (reuse == MAT_REUSE_MATRIX) {
1249     mat_elemental = *newmat;
1250     ierr = MatZeroEntries(*newmat);CHKERRQ(ierr);
1251   } else {
1252     ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr);
1253     ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1254     ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr);
1255     ierr = MatSetUp(mat_elemental);CHKERRQ(ierr);
1256     ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
1257   }
1258 
1259   ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr);
1260   for (i=0; i<N; i++) cols[i] = i;
1261   for (i=0; i<m; i++) rows[i] = rstart + i;
1262 
1263   /* PETSc-Elemental interaface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1264   ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr);
1265   ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr);
1266   ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1267   ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1268   ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr);
1269   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1270 
1271   if (reuse == MAT_INPLACE_MATRIX) {
1272     ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr);
1273   } else {
1274     *newmat = mat_elemental;
1275   }
1276   PetscFunctionReturn(0);
1277 }
1278 #endif
1279 
1280 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals)
1281 {
1282   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1283   PetscErrorCode ierr;
1284 
1285   PetscFunctionBegin;
1286   ierr = MatDenseGetColumn(mat->A,col,vals);CHKERRQ(ierr);
1287   PetscFunctionReturn(0);
1288 }
1289 
1290 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals)
1291 {
1292   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1293   PetscErrorCode ierr;
1294 
1295   PetscFunctionBegin;
1296   ierr = MatDenseRestoreColumn(mat->A,vals);CHKERRQ(ierr);
1297   PetscFunctionReturn(0);
1298 }
1299 
1300 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1301 {
1302   Mat_MPIDense   *a;
1303   PetscErrorCode ierr;
1304 
1305   PetscFunctionBegin;
1306   ierr      = PetscNewLog(mat,&a);CHKERRQ(ierr);
1307   mat->data = (void*)a;
1308   ierr      = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1309 
1310   mat->insertmode = NOT_SET_VALUES;
1311   ierr            = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);CHKERRQ(ierr);
1312   ierr            = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);CHKERRQ(ierr);
1313 
1314   /* build cache for off array entries formed */
1315   a->donotstash = PETSC_FALSE;
1316 
1317   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr);
1318 
1319   /* stuff used for matrix vector multiply */
1320   a->lvec        = 0;
1321   a->Mvctx       = 0;
1322   a->roworiented = PETSC_TRUE;
1323 
1324   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr);
1325   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);CHKERRQ(ierr);
1326   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);CHKERRQ(ierr);
1327   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr);
1328 #if defined(PETSC_HAVE_ELEMENTAL)
1329   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr);
1330 #endif
1331   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1332   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1333   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1334   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1335 
1336   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",MatTransposeMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1337   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",MatTransposeMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1338   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",MatTransposeMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1339   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense);CHKERRQ(ierr);
1340   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense);CHKERRQ(ierr);
1341   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1342   PetscFunctionReturn(0);
1343 }
1344 
1345 /*MC
1346    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1347 
1348    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1349    and MATMPIDENSE otherwise.
1350 
1351    Options Database Keys:
1352 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1353 
1354   Level: beginner
1355 
1356 
1357 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1358 M*/
1359 
1360 /*@C
1361    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1362 
1363    Not collective
1364 
1365    Input Parameters:
1366 .  B - the matrix
1367 -  data - optional location of matrix data.  Set data=NULL for PETSc
1368    to control all matrix memory allocation.
1369 
1370    Notes:
1371    The dense format is fully compatible with standard Fortran 77
1372    storage by columns.
1373 
1374    The data input variable is intended primarily for Fortran programmers
1375    who wish to allocate their own matrix memory space.  Most users should
1376    set data=NULL.
1377 
1378    Level: intermediate
1379 
1380 .keywords: matrix,dense, parallel
1381 
1382 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1383 @*/
1384 PetscErrorCode  MatMPIDenseSetPreallocation(Mat B,PetscScalar *data)
1385 {
1386   PetscErrorCode ierr;
1387 
1388   PetscFunctionBegin;
1389   ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr);
1390   PetscFunctionReturn(0);
1391 }
1392 
1393 /*@
1394    MatDensePlaceArray - Allows one to replace the array in a dense array with an
1395    array provided by the user. This is useful to avoid copying an array
1396    into a matrix
1397 
1398    Not Collective
1399 
1400    Input Parameters:
1401 +  mat - the matrix
1402 -  array - the array in column major order
1403 
1404    Notes:
1405    You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be
1406    freed when the matrix is destroyed.
1407 
1408    Level: developer
1409 
1410 .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
1411 
1412 @*/
1413 PetscErrorCode  MatDensePlaceArray(Mat mat,const PetscScalar array[])
1414 {
1415   PetscErrorCode ierr;
1416   PetscFunctionBegin;
1417   ierr = PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
1418   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
1419   PetscFunctionReturn(0);
1420 }
1421 
1422 /*@
1423    MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray()
1424 
1425    Not Collective
1426 
1427    Input Parameters:
1428 .  mat - the matrix
1429 
1430    Notes:
1431    You can only call this after a call to MatDensePlaceArray()
1432 
1433    Level: developer
1434 
1435 .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
1436 
1437 @*/
1438 PetscErrorCode  MatDenseResetArray(Mat mat)
1439 {
1440   PetscErrorCode ierr;
1441   PetscFunctionBegin;
1442   ierr = PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));CHKERRQ(ierr);
1443   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
1444   PetscFunctionReturn(0);
1445 }
1446 
1447 /*@C
1448    MatCreateDense - Creates a parallel matrix in dense format.
1449 
1450    Collective on MPI_Comm
1451 
1452    Input Parameters:
1453 +  comm - MPI communicator
1454 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1455 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1456 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1457 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1458 -  data - optional location of matrix data.  Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1459    to control all matrix memory allocation.
1460 
1461    Output Parameter:
1462 .  A - the matrix
1463 
1464    Notes:
1465    The dense format is fully compatible with standard Fortran 77
1466    storage by columns.
1467 
1468    The data input variable is intended primarily for Fortran programmers
1469    who wish to allocate their own matrix memory space.  Most users should
1470    set data=NULL (PETSC_NULL_SCALAR for Fortran users).
1471 
1472    The user MUST specify either the local or global matrix dimensions
1473    (possibly both).
1474 
1475    Level: intermediate
1476 
1477 .keywords: matrix,dense, parallel
1478 
1479 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1480 @*/
1481 PetscErrorCode  MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1482 {
1483   PetscErrorCode ierr;
1484   PetscMPIInt    size;
1485 
1486   PetscFunctionBegin;
1487   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1488   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1489   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1490   if (size > 1) {
1491     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1492     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1493     if (data) {  /* user provided data array, so no need to assemble */
1494       ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr);
1495       (*A)->assembled = PETSC_TRUE;
1496     }
1497   } else {
1498     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1499     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1500   }
1501   PetscFunctionReturn(0);
1502 }
1503 
1504 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1505 {
1506   Mat            mat;
1507   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
1508   PetscErrorCode ierr;
1509 
1510   PetscFunctionBegin;
1511   *newmat = 0;
1512   ierr    = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr);
1513   ierr    = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1514   ierr    = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1515   a       = (Mat_MPIDense*)mat->data;
1516   ierr    = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1517 
1518   mat->factortype   = A->factortype;
1519   mat->assembled    = PETSC_TRUE;
1520   mat->preallocated = PETSC_TRUE;
1521 
1522   a->size         = oldmat->size;
1523   a->rank         = oldmat->rank;
1524   mat->insertmode = NOT_SET_VALUES;
1525   a->nvec         = oldmat->nvec;
1526   a->donotstash   = oldmat->donotstash;
1527 
1528   ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr);
1529   ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr);
1530 
1531   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1532   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1533   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1534 
1535   *newmat = mat;
1536   PetscFunctionReturn(0);
1537 }
1538 
1539 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat)
1540 {
1541   PetscErrorCode ierr;
1542   PetscMPIInt    rank,size;
1543   const PetscInt *rowners;
1544   PetscInt       i,m,n,nz,j,mMax;
1545   PetscScalar    *array,*vals,*vals_ptr;
1546   Mat_MPIDense   *a = (Mat_MPIDense*)newmat->data;
1547 
1548   PetscFunctionBegin;
1549   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1550   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1551 
1552   /* determine ownership of rows and columns */
1553   m = (newmat->rmap->n < 0) ? PETSC_DECIDE : newmat->rmap->n;
1554   n = (newmat->cmap->n < 0) ? PETSC_DECIDE : newmat->cmap->n;
1555 
1556   ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr);
1557   if (!a->A || !((Mat_SeqDense*)(a->A->data))->user_alloc) {
1558     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
1559   }
1560   ierr = MatDenseGetArray(newmat,&array);CHKERRQ(ierr);
1561   ierr = MatGetLocalSize(newmat,&m,NULL);CHKERRQ(ierr);
1562   ierr = MatGetOwnershipRanges(newmat,&rowners);CHKERRQ(ierr);
1563   ierr = MPI_Reduce(&m,&mMax,1,MPIU_INT,MPI_MAX,0,comm);CHKERRQ(ierr);
1564   if (!rank) {
1565     ierr = PetscMalloc1(mMax*N,&vals);CHKERRQ(ierr);
1566 
1567     /* read in my part of the matrix numerical values  */
1568     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1569 
1570     /* insert into matrix-by row (this is why cannot directly read into array */
1571     vals_ptr = vals;
1572     for (i=0; i<m; i++) {
1573       for (j=0; j<N; j++) {
1574         array[i + j*m] = *vals_ptr++;
1575       }
1576     }
1577 
1578     /* read in other processors and ship out */
1579     for (i=1; i<size; i++) {
1580       nz   = (rowners[i+1] - rowners[i])*N;
1581       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1582       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr);
1583     }
1584   } else {
1585     /* receive numeric values */
1586     ierr = PetscMalloc1(m*N,&vals);CHKERRQ(ierr);
1587 
1588     /* receive message of values*/
1589     ierr = MPIULong_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr);
1590 
1591     /* insert into matrix-by row (this is why cannot directly read into array */
1592     vals_ptr = vals;
1593     for (i=0; i<m; i++) {
1594       for (j=0; j<N; j++) {
1595         array[i + j*m] = *vals_ptr++;
1596       }
1597     }
1598   }
1599   ierr = MatDenseRestoreArray(newmat,&array);CHKERRQ(ierr);
1600   ierr = PetscFree(vals);CHKERRQ(ierr);
1601   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1602   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1603   PetscFunctionReturn(0);
1604 }
1605 
1606 PetscErrorCode MatLoad_MPIDense(Mat newmat,PetscViewer viewer)
1607 {
1608   Mat_MPIDense   *a;
1609   PetscScalar    *vals,*svals;
1610   MPI_Comm       comm;
1611   MPI_Status     status;
1612   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,n,maxnz;
1613   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1614   PetscInt       *ourlens,*procsnz = 0,jj,*mycols,*smycols;
1615   PetscInt       i,nz,j,rstart,rend;
1616   int            fd;
1617   PetscErrorCode ierr;
1618 
1619   PetscFunctionBegin;
1620   /* force binary viewer to load .info file if it has not yet done so */
1621   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1622   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
1623   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1624   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1625   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1626   if (!rank) {
1627     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
1628     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1629   }
1630   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1631   M    = header[1]; N = header[2]; nz = header[3];
1632 
1633   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
1634   if (newmat->rmap->N < 0) newmat->rmap->N = M;
1635   if (newmat->cmap->N < 0) newmat->cmap->N = N;
1636 
1637   if (newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",M,newmat->rmap->N);
1638   if (newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",N,newmat->cmap->N);
1639 
1640   /*
1641        Handle case where matrix is stored on disk as a dense matrix
1642   */
1643   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1644     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr);
1645     PetscFunctionReturn(0);
1646   }
1647 
1648   /* determine ownership of all rows */
1649   if (newmat->rmap->n < 0) {
1650     ierr = PetscMPIIntCast(M/size + ((M % size) > rank),&m);CHKERRQ(ierr);
1651   } else {
1652     ierr = PetscMPIIntCast(newmat->rmap->n,&m);CHKERRQ(ierr);
1653   }
1654   if (newmat->cmap->n < 0) {
1655     n = PETSC_DECIDE;
1656   } else {
1657     ierr = PetscMPIIntCast(newmat->cmap->n,&n);CHKERRQ(ierr);
1658   }
1659 
1660   ierr       = PetscMalloc1(size+2,&rowners);CHKERRQ(ierr);
1661   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1662   rowners[0] = 0;
1663   for (i=2; i<=size; i++) {
1664     rowners[i] += rowners[i-1];
1665   }
1666   rstart = rowners[rank];
1667   rend   = rowners[rank+1];
1668 
1669   /* distribute row lengths to all processors */
1670   ierr = PetscMalloc1(rend-rstart,&ourlens);CHKERRQ(ierr);
1671   if (!rank) {
1672     ierr = PetscMalloc1(M,&rowlengths);CHKERRQ(ierr);
1673     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1674     ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr);
1675     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1676     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1677     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1678   } else {
1679     ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1680   }
1681 
1682   if (!rank) {
1683     /* calculate the number of nonzeros on each processor */
1684     ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr);
1685     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1686     for (i=0; i<size; i++) {
1687       for (j=rowners[i]; j< rowners[i+1]; j++) {
1688         procsnz[i] += rowlengths[j];
1689       }
1690     }
1691     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1692 
1693     /* determine max buffer needed and allocate it */
1694     maxnz = 0;
1695     for (i=0; i<size; i++) {
1696       maxnz = PetscMax(maxnz,procsnz[i]);
1697     }
1698     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
1699 
1700     /* read in my part of the matrix column indices  */
1701     nz   = procsnz[0];
1702     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
1703     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1704 
1705     /* read in every one elses and ship off */
1706     for (i=1; i<size; i++) {
1707       nz   = procsnz[i];
1708       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1709       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1710     }
1711     ierr = PetscFree(cols);CHKERRQ(ierr);
1712   } else {
1713     /* determine buffer space needed for message */
1714     nz = 0;
1715     for (i=0; i<m; i++) {
1716       nz += ourlens[i];
1717     }
1718     ierr = PetscMalloc1(nz+1,&mycols);CHKERRQ(ierr);
1719 
1720     /* receive message of column indices*/
1721     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1722     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1723     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1724   }
1725 
1726   ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr);
1727   a = (Mat_MPIDense*)newmat->data;
1728   if (!a->A || !((Mat_SeqDense*)(a->A->data))->user_alloc) {
1729     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
1730   }
1731 
1732   if (!rank) {
1733     ierr = PetscMalloc1(maxnz,&vals);CHKERRQ(ierr);
1734 
1735     /* read in my part of the matrix numerical values  */
1736     nz   = procsnz[0];
1737     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1738 
1739     /* insert into matrix */
1740     jj      = rstart;
1741     smycols = mycols;
1742     svals   = vals;
1743     for (i=0; i<m; i++) {
1744       ierr     = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1745       smycols += ourlens[i];
1746       svals   += ourlens[i];
1747       jj++;
1748     }
1749 
1750     /* read in other processors and ship out */
1751     for (i=1; i<size; i++) {
1752       nz   = procsnz[i];
1753       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1754       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
1755     }
1756     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1757   } else {
1758     /* receive numeric values */
1759     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
1760 
1761     /* receive message of values*/
1762     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
1763     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1764     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1765 
1766     /* insert into matrix */
1767     jj      = rstart;
1768     smycols = mycols;
1769     svals   = vals;
1770     for (i=0; i<m; i++) {
1771       ierr     = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1772       smycols += ourlens[i];
1773       svals   += ourlens[i];
1774       jj++;
1775     }
1776   }
1777   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1778   ierr = PetscFree(vals);CHKERRQ(ierr);
1779   ierr = PetscFree(mycols);CHKERRQ(ierr);
1780   ierr = PetscFree(rowners);CHKERRQ(ierr);
1781 
1782   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1783   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1784   PetscFunctionReturn(0);
1785 }
1786 
1787 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool  *flag)
1788 {
1789   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
1790   Mat            a,b;
1791   PetscBool      flg;
1792   PetscErrorCode ierr;
1793 
1794   PetscFunctionBegin;
1795   a    = matA->A;
1796   b    = matB->A;
1797   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
1798   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1799   PetscFunctionReturn(0);
1800 }
1801 
1802 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(Mat A)
1803 {
1804   PetscErrorCode        ierr;
1805   Mat_MPIDense          *a = (Mat_MPIDense*)A->data;
1806   Mat_TransMatMultDense *atb = a->atbdense;
1807 
1808   PetscFunctionBegin;
1809   ierr = PetscFree3(atb->sendbuf,atb->atbarray,atb->recvcounts);CHKERRQ(ierr);
1810   ierr = (atb->destroy)(A);CHKERRQ(ierr);
1811   ierr = PetscFree(atb);CHKERRQ(ierr);
1812   PetscFunctionReturn(0);
1813 }
1814 
1815 PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1816 {
1817   Mat_MPIDense   *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
1818   Mat_SeqDense   *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data;
1819   Mat_TransMatMultDense *atb = c->atbdense;
1820   PetscErrorCode ierr;
1821   MPI_Comm       comm;
1822   PetscMPIInt    rank,size,*recvcounts=atb->recvcounts;
1823   PetscScalar    *carray,*atbarray=atb->atbarray,*sendbuf=atb->sendbuf;
1824   PetscInt       i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j;
1825   PetscScalar    _DOne=1.0,_DZero=0.0;
1826   PetscBLASInt   am,an,bn,aN;
1827   const PetscInt *ranges;
1828 
1829   PetscFunctionBegin;
1830   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1831   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1832   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1833 
1834   /* compute atbarray = aseq^T * bseq */
1835   ierr = PetscBLASIntCast(a->A->cmap->n,&an);CHKERRQ(ierr);
1836   ierr = PetscBLASIntCast(b->A->cmap->n,&bn);CHKERRQ(ierr);
1837   ierr = PetscBLASIntCast(a->A->rmap->n,&am);CHKERRQ(ierr);
1838   ierr = PetscBLASIntCast(A->cmap->N,&aN);CHKERRQ(ierr);
1839   PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&an,&bn,&am,&_DOne,aseq->v,&aseq->lda,bseq->v,&bseq->lda,&_DZero,atbarray,&aN));
1840 
1841   ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr);
1842   for (i=0; i<size; i++) recvcounts[i] = (ranges[i+1] - ranges[i])*cN;
1843 
1844   /* arrange atbarray into sendbuf */
1845   k = 0;
1846   for (proc=0; proc<size; proc++) {
1847     for (j=0; j<cN; j++) {
1848       for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM];
1849     }
1850   }
1851   /* sum all atbarray to local values of C */
1852   ierr = MatDenseGetArray(c->A,&carray);CHKERRQ(ierr);
1853   ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRQ(ierr);
1854   ierr = MatDenseRestoreArray(c->A,&carray);CHKERRQ(ierr);
1855   PetscFunctionReturn(0);
1856 }
1857 
1858 PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1859 {
1860   PetscErrorCode        ierr;
1861   Mat                   Cdense;
1862   MPI_Comm              comm;
1863   PetscMPIInt           size;
1864   PetscInt              cm=A->cmap->n,cM,cN=B->cmap->N;
1865   Mat_MPIDense          *c;
1866   Mat_TransMatMultDense *atb;
1867 
1868   PetscFunctionBegin;
1869   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1870   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
1871     SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
1872   }
1873 
1874   /* create matrix product Cdense */
1875   ierr = MatCreate(comm,&Cdense);CHKERRQ(ierr);
1876   ierr = MatSetSizes(Cdense,cm,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
1877   ierr = MatSetType(Cdense,MATMPIDENSE);CHKERRQ(ierr);
1878   ierr = MatMPIDenseSetPreallocation(Cdense,NULL);CHKERRQ(ierr);
1879   ierr = MatAssemblyBegin(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1880   ierr = MatAssemblyEnd(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1881   *C   = Cdense;
1882 
1883   /* create data structure for reuse Cdense */
1884   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1885   ierr = PetscNew(&atb);CHKERRQ(ierr);
1886   cM = Cdense->rmap->N;
1887   ierr = PetscMalloc3(cM*cN,&atb->sendbuf,cM*cN,&atb->atbarray,size,&atb->recvcounts);CHKERRQ(ierr);
1888 
1889   c                    = (Mat_MPIDense*)Cdense->data;
1890   c->atbdense          = atb;
1891   atb->destroy         = Cdense->ops->destroy;
1892   Cdense->ops->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
1893   PetscFunctionReturn(0);
1894 }
1895 
1896 PetscErrorCode MatTransposeMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
1897 {
1898   PetscErrorCode ierr;
1899 
1900   PetscFunctionBegin;
1901   if (scall == MAT_INITIAL_MATRIX) {
1902     ierr = MatTransposeMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr);
1903   }
1904   ierr = MatTransposeMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr);
1905   PetscFunctionReturn(0);
1906 }
1907 
1908 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(Mat A)
1909 {
1910   PetscErrorCode   ierr;
1911   Mat_MPIDense     *a = (Mat_MPIDense*)A->data;
1912   Mat_MatMultDense *ab = a->abdense;
1913 
1914   PetscFunctionBegin;
1915   ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr);
1916   ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr);
1917   ierr = MatDestroy(&ab->Be);CHKERRQ(ierr);
1918 
1919   ierr = (ab->destroy)(A);CHKERRQ(ierr);
1920   ierr = PetscFree(ab);CHKERRQ(ierr);
1921   PetscFunctionReturn(0);
1922 }
1923 
1924 #if defined(PETSC_HAVE_ELEMENTAL)
1925 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1926 {
1927   PetscErrorCode   ierr;
1928   Mat_MPIDense     *c=(Mat_MPIDense*)C->data;
1929   Mat_MatMultDense *ab=c->abdense;
1930 
1931   PetscFunctionBegin;
1932   ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr);
1933   ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr);
1934   ierr = MatMatMultNumeric(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr);
1935   ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
1936   PetscFunctionReturn(0);
1937 }
1938 
1939 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1940 {
1941   PetscErrorCode   ierr;
1942   Mat              Ae,Be,Ce;
1943   Mat_MPIDense     *c;
1944   Mat_MatMultDense *ab;
1945 
1946   PetscFunctionBegin;
1947   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
1948     SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
1949   }
1950 
1951   /* convert A and B to Elemental matrices Ae and Be */
1952   ierr = MatConvert(A,MATELEMENTAL,MAT_INITIAL_MATRIX, &Ae);CHKERRQ(ierr);
1953   ierr = MatConvert(B,MATELEMENTAL,MAT_INITIAL_MATRIX, &Be);CHKERRQ(ierr);
1954 
1955   /* Ce = Ae*Be */
1956   ierr = MatMatMultSymbolic(Ae,Be,fill,&Ce);CHKERRQ(ierr);
1957   ierr = MatMatMultNumeric(Ae,Be,Ce);CHKERRQ(ierr);
1958 
1959   /* convert Ce to C */
1960   ierr = MatConvert(Ce,MATMPIDENSE,MAT_INITIAL_MATRIX,C);CHKERRQ(ierr);
1961 
1962   /* create data structure for reuse Cdense */
1963   ierr = PetscNew(&ab);CHKERRQ(ierr);
1964   c                  = (Mat_MPIDense*)(*C)->data;
1965   c->abdense         = ab;
1966 
1967   ab->Ae             = Ae;
1968   ab->Be             = Be;
1969   ab->Ce             = Ce;
1970   ab->destroy        = (*C)->ops->destroy;
1971   (*C)->ops->destroy        = MatDestroy_MatMatMult_MPIDense_MPIDense;
1972   (*C)->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
1973   PetscFunctionReturn(0);
1974 }
1975 
1976 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
1977 {
1978   PetscErrorCode ierr;
1979 
1980   PetscFunctionBegin;
1981   if (scall == MAT_INITIAL_MATRIX) { /* simbolic product includes numeric product */
1982     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
1983     ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr);
1984     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
1985   } else {
1986     ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
1987     ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr);
1988     ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
1989   }
1990   PetscFunctionReturn(0);
1991 }
1992 #endif
1993 
1994