xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision dd169d643cbf4b5f50ca99c97eb8f9235c6ca078)
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4 */
5 
6 
7 #include <../src/mat/impls/dense/mpi/mpidense.h>    /*I   "petscmat.h"  I*/
8 #include <../src/mat/impls/aij/mpi/mpiaij.h>
9 #include <petscblaslapack.h>
10 
11 /*@
12 
13       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
14               matrix that represents the operator. For sequential matrices it returns itself.
15 
16     Input Parameter:
17 .      A - the Seq or MPI dense matrix
18 
19     Output Parameter:
20 .      B - the inner matrix
21 
22     Level: intermediate
23 
24 @*/
25 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
26 {
27   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
28   PetscErrorCode ierr;
29   PetscBool      flg;
30 
31   PetscFunctionBegin;
32   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
33   if (flg) *B = mat->A;
34   else *B = A;
35   PetscFunctionReturn(0);
36 }
37 
38 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
39 {
40   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
41   PetscErrorCode ierr;
42   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
43 
44   PetscFunctionBegin;
45   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
46   lrow = row - rstart;
47   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
48   PetscFunctionReturn(0);
49 }
50 
51 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
52 {
53   PetscErrorCode ierr;
54 
55   PetscFunctionBegin;
56   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
57   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
58   PetscFunctionReturn(0);
59 }
60 
61 PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
62 {
63   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
64   PetscErrorCode ierr;
65   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
66   PetscScalar    *array;
67   MPI_Comm       comm;
68   Mat            B;
69 
70   PetscFunctionBegin;
71   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
72 
73   ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr);
74   if (!B) {
75     ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
76     ierr = MatCreate(comm,&B);CHKERRQ(ierr);
77     ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr);
78     ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
79     ierr = MatDenseGetArray(mdn->A,&array);CHKERRQ(ierr);
80     ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr);
81     ierr = MatDenseRestoreArray(mdn->A,&array);CHKERRQ(ierr);
82     ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
83     ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
84     ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr);
85     *a   = B;
86     ierr = MatDestroy(&B);CHKERRQ(ierr);
87   } else *a = B;
88   PetscFunctionReturn(0);
89 }
90 
91 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
92 {
93   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
94   PetscErrorCode ierr;
95   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
96   PetscBool      roworiented = A->roworiented;
97 
98   PetscFunctionBegin;
99   for (i=0; i<m; i++) {
100     if (idxm[i] < 0) continue;
101     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
102     if (idxm[i] >= rstart && idxm[i] < rend) {
103       row = idxm[i] - rstart;
104       if (roworiented) {
105         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
106       } else {
107         for (j=0; j<n; j++) {
108           if (idxn[j] < 0) continue;
109           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
110           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
111         }
112       }
113     } else if (!A->donotstash) {
114       mat->assembled = PETSC_FALSE;
115       if (roworiented) {
116         ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
117       } else {
118         ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
119       }
120     }
121   }
122   PetscFunctionReturn(0);
123 }
124 
125 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
126 {
127   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
128   PetscErrorCode ierr;
129   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
130 
131   PetscFunctionBegin;
132   for (i=0; i<m; i++) {
133     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
134     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
135     if (idxm[i] >= rstart && idxm[i] < rend) {
136       row = idxm[i] - rstart;
137       for (j=0; j<n; j++) {
138         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
139         if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
140         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
141       }
142     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
143   }
144   PetscFunctionReturn(0);
145 }
146 
147 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[])
148 {
149   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
150   PetscErrorCode ierr;
151 
152   PetscFunctionBegin;
153   ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr);
154   PetscFunctionReturn(0);
155 }
156 
157 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar array[])
158 {
159   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
160   PetscErrorCode ierr;
161 
162   PetscFunctionBegin;
163   ierr = MatDensePlaceArray(a->A,array);CHKERRQ(ierr);
164   PetscFunctionReturn(0);
165 }
166 
167 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
168 {
169   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
170   PetscErrorCode ierr;
171 
172   PetscFunctionBegin;
173   ierr = MatDenseResetArray(a->A);CHKERRQ(ierr);
174   PetscFunctionReturn(0);
175 }
176 
177 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
178 {
179   Mat_MPIDense   *mat  = (Mat_MPIDense*)A->data,*newmatd;
180   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
181   PetscErrorCode ierr;
182   PetscInt       i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
183   const PetscInt *irow,*icol;
184   PetscScalar    *av,*bv,*v = lmat->v;
185   Mat            newmat;
186   IS             iscol_local;
187 
188   PetscFunctionBegin;
189   ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
190   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
191   ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr);
192   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
193   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
194   ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */
195 
196   /* No parallel redistribution currently supported! Should really check each index set
197      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
198      original matrix! */
199 
200   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
201   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
202 
203   /* Check submatrix call */
204   if (scall == MAT_REUSE_MATRIX) {
205     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
206     /* Really need to test rows and column sizes! */
207     newmat = *B;
208   } else {
209     /* Create and fill new matrix */
210     ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr);
211     ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
212     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
213     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
214   }
215 
216   /* Now extract the data pointers and do the copy, column at a time */
217   newmatd = (Mat_MPIDense*)newmat->data;
218   bv      = ((Mat_SeqDense*)newmatd->A->data)->v;
219 
220   for (i=0; i<Ncols; i++) {
221     av = v + ((Mat_SeqDense*)mat->A->data)->lda*icol[i];
222     for (j=0; j<nrows; j++) {
223       *bv++ = av[irow[j] - rstart];
224     }
225   }
226 
227   /* Assemble the matrices so that the correct flags are set */
228   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
229   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
230 
231   /* Free work space */
232   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
233   ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr);
234   ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
235   *B   = newmat;
236   PetscFunctionReturn(0);
237 }
238 
239 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[])
240 {
241   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
242   PetscErrorCode ierr;
243 
244   PetscFunctionBegin;
245   ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr);
246   PetscFunctionReturn(0);
247 }
248 
249 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
250 {
251   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
252   MPI_Comm       comm;
253   PetscErrorCode ierr;
254   PetscInt       nstash,reallocs;
255   InsertMode     addv;
256 
257   PetscFunctionBegin;
258   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
259   /* make sure all processors are either in INSERTMODE or ADDMODE */
260   ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,comm);CHKERRQ(ierr);
261   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
262   mat->insertmode = addv; /* in case this processor had no cache */
263 
264   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
265   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
266   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
267   PetscFunctionReturn(0);
268 }
269 
270 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
271 {
272   Mat_MPIDense   *mdn=(Mat_MPIDense*)mat->data;
273   PetscErrorCode ierr;
274   PetscInt       i,*row,*col,flg,j,rstart,ncols;
275   PetscMPIInt    n;
276   PetscScalar    *val;
277 
278   PetscFunctionBegin;
279   /*  wait on receives */
280   while (1) {
281     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
282     if (!flg) break;
283 
284     for (i=0; i<n;) {
285       /* Now identify the consecutive vals belonging to the same row */
286       for (j=i,rstart=row[j]; j<n; j++) {
287         if (row[j] != rstart) break;
288       }
289       if (j < n) ncols = j-i;
290       else       ncols = n-i;
291       /* Now assemble all these values with a single function call */
292       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
293       i    = j;
294     }
295   }
296   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
297 
298   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
299   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
300 
301   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
302     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
303   }
304   PetscFunctionReturn(0);
305 }
306 
307 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
308 {
309   PetscErrorCode ierr;
310   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
311 
312   PetscFunctionBegin;
313   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
314   PetscFunctionReturn(0);
315 }
316 
317 /* the code does not do the diagonal entries correctly unless the
318    matrix is square and the column and row owerships are identical.
319    This is a BUG. The only way to fix it seems to be to access
320    mdn->A and mdn->B directly and not through the MatZeroRows()
321    routine.
322 */
323 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
324 {
325   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
326   PetscErrorCode    ierr;
327   PetscInt          i,*owners = A->rmap->range;
328   PetscInt          *sizes,j,idx,nsends;
329   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
330   PetscInt          *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source;
331   PetscInt          *lens,*lrows,*values;
332   PetscMPIInt       n,imdex,rank = l->rank,size = l->size;
333   MPI_Comm          comm;
334   MPI_Request       *send_waits,*recv_waits;
335   MPI_Status        recv_status,*send_status;
336   PetscBool         found;
337   const PetscScalar *xx;
338   PetscScalar       *bb;
339 
340   PetscFunctionBegin;
341   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
342   if (A->rmap->N != A->cmap->N) SETERRQ(comm,PETSC_ERR_SUP,"Only handles square matrices");
343   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership");
344   /*  first count number of contributors to each processor */
345   ierr = PetscCalloc1(2*size,&sizes);CHKERRQ(ierr);
346   ierr = PetscMalloc1(N+1,&owner);CHKERRQ(ierr);  /* see note*/
347   for (i=0; i<N; i++) {
348     idx   = rows[i];
349     found = PETSC_FALSE;
350     for (j=0; j<size; j++) {
351       if (idx >= owners[j] && idx < owners[j+1]) {
352         sizes[2*j]++; sizes[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
353       }
354     }
355     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
356   }
357   nsends = 0;
358   for (i=0; i<size; i++) nsends += sizes[2*i+1];
359 
360   /* inform other processors of number of messages and max length*/
361   ierr = PetscMaxSum(comm,sizes,&nmax,&nrecvs);CHKERRQ(ierr);
362 
363   /* post receives:   */
364   ierr = PetscMalloc1((nrecvs+1)*(nmax+1),&rvalues);CHKERRQ(ierr);
365   ierr = PetscMalloc1(nrecvs+1,&recv_waits);CHKERRQ(ierr);
366   for (i=0; i<nrecvs; i++) {
367     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
368   }
369 
370   /* do sends:
371       1) starts[i] gives the starting index in svalues for stuff going to
372          the ith processor
373   */
374   ierr = PetscMalloc1(N+1,&svalues);CHKERRQ(ierr);
375   ierr = PetscMalloc1(nsends+1,&send_waits);CHKERRQ(ierr);
376   ierr = PetscMalloc1(size+1,&starts);CHKERRQ(ierr);
377 
378   starts[0] = 0;
379   for (i=1; i<size; i++) starts[i] = starts[i-1] + sizes[2*i-2];
380   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
381 
382   starts[0] = 0;
383   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + sizes[2*i-2];
384   count = 0;
385   for (i=0; i<size; i++) {
386     if (sizes[2*i+1]) {
387       ierr = MPI_Isend(svalues+starts[i],sizes[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
388     }
389   }
390   ierr = PetscFree(starts);CHKERRQ(ierr);
391 
392   base = owners[rank];
393 
394   /*  wait on receives */
395   ierr  = PetscMalloc2(nrecvs,&lens,nrecvs,&source);CHKERRQ(ierr);
396   count = nrecvs;
397   slen  = 0;
398   while (count) {
399     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
400     /* unpack receives into our local space */
401     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
402 
403     source[imdex] = recv_status.MPI_SOURCE;
404     lens[imdex]   = n;
405     slen += n;
406     count--;
407   }
408   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
409 
410   /* move the data into the send scatter */
411   ierr  = PetscMalloc1(slen+1,&lrows);CHKERRQ(ierr);
412   count = 0;
413   for (i=0; i<nrecvs; i++) {
414     values = rvalues + i*nmax;
415     for (j=0; j<lens[i]; j++) {
416       lrows[count++] = values[j] - base;
417     }
418   }
419   ierr = PetscFree(rvalues);CHKERRQ(ierr);
420   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
421   ierr = PetscFree(owner);CHKERRQ(ierr);
422   ierr = PetscFree(sizes);CHKERRQ(ierr);
423 
424   /* fix right hand side if needed */
425   if (x && b) {
426     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
427     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
428     for (i=0; i<slen; i++) {
429       bb[lrows[i]] = diag*xx[lrows[i]];
430     }
431     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
432     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
433   }
434 
435   /* actually zap the local rows */
436   ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
437   if (diag != 0.0) {
438     Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data;
439     PetscInt     m   = ll->lda, i;
440 
441     for (i=0; i<slen; i++) {
442       ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag;
443     }
444   }
445   ierr = PetscFree(lrows);CHKERRQ(ierr);
446 
447   /* wait on sends */
448   if (nsends) {
449     ierr = PetscMalloc1(nsends,&send_status);CHKERRQ(ierr);
450     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
451     ierr = PetscFree(send_status);CHKERRQ(ierr);
452   }
453   ierr = PetscFree(send_waits);CHKERRQ(ierr);
454   ierr = PetscFree(svalues);CHKERRQ(ierr);
455   PetscFunctionReturn(0);
456 }
457 
458 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec);
459 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec);
460 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec);
461 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec);
462 
463 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
464 {
465   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
466   PetscErrorCode ierr;
467 
468   PetscFunctionBegin;
469   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
470   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
471   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
472   PetscFunctionReturn(0);
473 }
474 
475 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
476 {
477   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
478   PetscErrorCode ierr;
479 
480   PetscFunctionBegin;
481   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
482   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
483   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
484   PetscFunctionReturn(0);
485 }
486 
487 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
488 {
489   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
490   PetscErrorCode ierr;
491   PetscScalar    zero = 0.0;
492 
493   PetscFunctionBegin;
494   ierr = VecSet(yy,zero);CHKERRQ(ierr);
495   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
496   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
497   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
498   PetscFunctionReturn(0);
499 }
500 
501 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
502 {
503   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
504   PetscErrorCode ierr;
505 
506   PetscFunctionBegin;
507   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
508   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
509   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
510   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
511   PetscFunctionReturn(0);
512 }
513 
514 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
515 {
516   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
517   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
518   PetscErrorCode ierr;
519   PetscInt       len,i,n,m = A->rmap->n,radd;
520   PetscScalar    *x,zero = 0.0;
521 
522   PetscFunctionBegin;
523   ierr = VecSet(v,zero);CHKERRQ(ierr);
524   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
525   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
526   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
527   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
528   radd = A->rmap->rstart*m;
529   for (i=0; i<len; i++) {
530     x[i] = aloc->v[radd + i*m + i];
531   }
532   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
533   PetscFunctionReturn(0);
534 }
535 
536 PetscErrorCode MatDestroy_MPIDense(Mat mat)
537 {
538   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
539   PetscErrorCode ierr;
540 
541   PetscFunctionBegin;
542 #if defined(PETSC_USE_LOG)
543   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
544 #endif
545   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
546   ierr = MatDestroy(&mdn->A);CHKERRQ(ierr);
547   ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr);
548   ierr = VecScatterDestroy(&mdn->Mvctx);CHKERRQ(ierr);
549 
550   ierr = PetscFree(mat->data);CHKERRQ(ierr);
551   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
552 
553   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr);
554   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);CHKERRQ(ierr);
555   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);CHKERRQ(ierr);
556 
557   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr);
558 #if defined(PETSC_HAVE_ELEMENTAL)
559   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr);
560 #endif
561   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr);
562   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
563   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
564   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
565   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
566   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
567   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
568   PetscFunctionReturn(0);
569 }
570 
571 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
572 {
573   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
574   PetscErrorCode    ierr;
575   PetscViewerFormat format;
576   int               fd;
577   PetscInt          header[4],mmax,N = mat->cmap->N,i,j,m,k;
578   PetscMPIInt       rank,tag  = ((PetscObject)viewer)->tag,size;
579   PetscScalar       *work,*v,*vv;
580   Mat_SeqDense      *a = (Mat_SeqDense*)mdn->A->data;
581 
582   PetscFunctionBegin;
583   if (mdn->size == 1) {
584     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
585   } else {
586     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
587     ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
588     ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
589 
590     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
591     if (format == PETSC_VIEWER_NATIVE) {
592 
593       if (!rank) {
594         /* store the matrix as a dense matrix */
595         header[0] = MAT_FILE_CLASSID;
596         header[1] = mat->rmap->N;
597         header[2] = N;
598         header[3] = MATRIX_BINARY_FORMAT_DENSE;
599         ierr      = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
600 
601         /* get largest work array needed for transposing array */
602         mmax = mat->rmap->n;
603         for (i=1; i<size; i++) {
604           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
605         }
606         ierr = PetscMalloc1(mmax*N,&work);CHKERRQ(ierr);
607 
608         /* write out local array, by rows */
609         m = mat->rmap->n;
610         v = a->v;
611         for (j=0; j<N; j++) {
612           for (i=0; i<m; i++) {
613             work[j + i*N] = *v++;
614           }
615         }
616         ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
617         /* get largest work array to receive messages from other processes, excludes process zero */
618         mmax = 0;
619         for (i=1; i<size; i++) {
620           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
621         }
622         ierr = PetscMalloc1(mmax*N,&vv);CHKERRQ(ierr);
623         for (k = 1; k < size; k++) {
624           v    = vv;
625           m    = mat->rmap->range[k+1] - mat->rmap->range[k];
626           ierr = MPIULong_Recv(v,m*N,MPIU_SCALAR,k,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
627 
628           for (j = 0; j < N; j++) {
629             for (i = 0; i < m; i++) {
630               work[j + i*N] = *v++;
631             }
632           }
633           ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
634         }
635         ierr = PetscFree(work);CHKERRQ(ierr);
636         ierr = PetscFree(vv);CHKERRQ(ierr);
637       } else {
638         ierr = MPIULong_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
639       }
640     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerPushFormat(viewer,PETSC_VIEWER_NATIVE)");
641   }
642   PetscFunctionReturn(0);
643 }
644 
645 extern PetscErrorCode MatView_SeqDense(Mat,PetscViewer);
646 #include <petscdraw.h>
647 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
648 {
649   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
650   PetscErrorCode    ierr;
651   PetscMPIInt       rank = mdn->rank;
652   PetscViewerType   vtype;
653   PetscBool         iascii,isdraw;
654   PetscViewer       sviewer;
655   PetscViewerFormat format;
656 
657   PetscFunctionBegin;
658   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
659   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
660   if (iascii) {
661     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
662     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
663     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
664       MatInfo info;
665       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
666       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
667       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
668       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
669       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
670       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
671       PetscFunctionReturn(0);
672     } else if (format == PETSC_VIEWER_ASCII_INFO) {
673       PetscFunctionReturn(0);
674     }
675   } else if (isdraw) {
676     PetscDraw draw;
677     PetscBool isnull;
678 
679     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
680     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
681     if (isnull) PetscFunctionReturn(0);
682   }
683 
684   {
685     /* assemble the entire matrix onto first processor. */
686     Mat         A;
687     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
688     PetscInt    *cols;
689     PetscScalar *vals;
690 
691     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
692     if (!rank) {
693       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
694     } else {
695       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
696     }
697     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
698     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
699     ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr);
700     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
701 
702     /* Copy the matrix ... This isn't the most efficient means,
703        but it's quick for now */
704     A->insertmode = INSERT_VALUES;
705 
706     row = mat->rmap->rstart;
707     m   = mdn->A->rmap->n;
708     for (i=0; i<m; i++) {
709       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
710       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
711       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
712       row++;
713     }
714 
715     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
716     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
717     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
718     if (!rank) {
719       ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
720       ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
721     }
722     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
723     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
724     ierr = MatDestroy(&A);CHKERRQ(ierr);
725   }
726   PetscFunctionReturn(0);
727 }
728 
729 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
730 {
731   PetscErrorCode ierr;
732   PetscBool      iascii,isbinary,isdraw,issocket;
733 
734   PetscFunctionBegin;
735   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
736   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
737   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
738   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
739 
740   if (iascii || issocket || isdraw) {
741     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
742   } else if (isbinary) {
743     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
744   }
745   PetscFunctionReturn(0);
746 }
747 
748 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
749 {
750   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
751   Mat            mdn  = mat->A;
752   PetscErrorCode ierr;
753   PetscReal      isend[5],irecv[5];
754 
755   PetscFunctionBegin;
756   info->block_size = 1.0;
757 
758   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
759 
760   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
761   isend[3] = info->memory;  isend[4] = info->mallocs;
762   if (flag == MAT_LOCAL) {
763     info->nz_used      = isend[0];
764     info->nz_allocated = isend[1];
765     info->nz_unneeded  = isend[2];
766     info->memory       = isend[3];
767     info->mallocs      = isend[4];
768   } else if (flag == MAT_GLOBAL_MAX) {
769     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
770 
771     info->nz_used      = irecv[0];
772     info->nz_allocated = irecv[1];
773     info->nz_unneeded  = irecv[2];
774     info->memory       = irecv[3];
775     info->mallocs      = irecv[4];
776   } else if (flag == MAT_GLOBAL_SUM) {
777     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
778 
779     info->nz_used      = irecv[0];
780     info->nz_allocated = irecv[1];
781     info->nz_unneeded  = irecv[2];
782     info->memory       = irecv[3];
783     info->mallocs      = irecv[4];
784   }
785   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
786   info->fill_ratio_needed = 0;
787   info->factor_mallocs    = 0;
788   PetscFunctionReturn(0);
789 }
790 
791 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
792 {
793   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
794   PetscErrorCode ierr;
795 
796   PetscFunctionBegin;
797   switch (op) {
798   case MAT_NEW_NONZERO_LOCATIONS:
799   case MAT_NEW_NONZERO_LOCATION_ERR:
800   case MAT_NEW_NONZERO_ALLOCATION_ERR:
801     MatCheckPreallocated(A,1);
802     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
803     break;
804   case MAT_ROW_ORIENTED:
805     MatCheckPreallocated(A,1);
806     a->roworiented = flg;
807     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
808     break;
809   case MAT_NEW_DIAGONALS:
810   case MAT_KEEP_NONZERO_PATTERN:
811   case MAT_USE_HASH_TABLE:
812     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
813     break;
814   case MAT_IGNORE_OFF_PROC_ENTRIES:
815     a->donotstash = flg;
816     break;
817   case MAT_SYMMETRIC:
818   case MAT_STRUCTURALLY_SYMMETRIC:
819   case MAT_HERMITIAN:
820   case MAT_SYMMETRY_ETERNAL:
821   case MAT_IGNORE_LOWER_TRIANGULAR:
822     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
823     break;
824   default:
825     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
826   }
827   PetscFunctionReturn(0);
828 }
829 
830 
831 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
832 {
833   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
834   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
835   PetscScalar    *l,*r,x,*v;
836   PetscErrorCode ierr;
837   PetscInt       i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n;
838 
839   PetscFunctionBegin;
840   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
841   if (ll) {
842     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
843     if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
844     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
845     for (i=0; i<m; i++) {
846       x = l[i];
847       v = mat->v + i;
848       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
849     }
850     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
851     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
852   }
853   if (rr) {
854     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
855     if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
856     ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
857     ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
858     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
859     for (i=0; i<n; i++) {
860       x = r[i];
861       v = mat->v + i*m;
862       for (j=0; j<m; j++) (*v++) *= x;
863     }
864     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
865     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
866   }
867   PetscFunctionReturn(0);
868 }
869 
870 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
871 {
872   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
873   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
874   PetscErrorCode ierr;
875   PetscInt       i,j;
876   PetscReal      sum = 0.0;
877   PetscScalar    *v  = mat->v;
878 
879   PetscFunctionBegin;
880   if (mdn->size == 1) {
881     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
882   } else {
883     if (type == NORM_FROBENIUS) {
884       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
885         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
886       }
887       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
888       *nrm = PetscSqrtReal(*nrm);
889       ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr);
890     } else if (type == NORM_1) {
891       PetscReal *tmp,*tmp2;
892       ierr = PetscMalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr);
893       ierr = PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
894       ierr = PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
895       *nrm = 0.0;
896       v    = mat->v;
897       for (j=0; j<mdn->A->cmap->n; j++) {
898         for (i=0; i<mdn->A->rmap->n; i++) {
899           tmp[j] += PetscAbsScalar(*v);  v++;
900         }
901       }
902       ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
903       for (j=0; j<A->cmap->N; j++) {
904         if (tmp2[j] > *nrm) *nrm = tmp2[j];
905       }
906       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
907       ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr);
908     } else if (type == NORM_INFINITY) { /* max row norm */
909       PetscReal ntemp;
910       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
911       ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
912     } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
913   }
914   PetscFunctionReturn(0);
915 }
916 
917 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
918 {
919   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
920   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
921   Mat            B;
922   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
923   PetscErrorCode ierr;
924   PetscInt       j,i;
925   PetscScalar    *v;
926 
927   PetscFunctionBegin;
928   if (reuse == MAT_INPLACE_MATRIX  && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports square matrix only in-place");
929   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
930     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
931     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
932     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
933     ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr);
934   } else {
935     B = *matout;
936   }
937 
938   m    = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v;
939   ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr);
940   for (i=0; i<m; i++) rwork[i] = rstart + i;
941   for (j=0; j<n; j++) {
942     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
943     v   += m;
944   }
945   ierr = PetscFree(rwork);CHKERRQ(ierr);
946   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
947   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
948   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
949     *matout = B;
950   } else {
951     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
952   }
953   PetscFunctionReturn(0);
954 }
955 
956 
957 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
958 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
959 
960 PetscErrorCode MatSetUp_MPIDense(Mat A)
961 {
962   PetscErrorCode ierr;
963 
964   PetscFunctionBegin;
965   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
966   PetscFunctionReturn(0);
967 }
968 
969 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
970 {
971   PetscErrorCode ierr;
972   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
973 
974   PetscFunctionBegin;
975   ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr);
976   ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
977   PetscFunctionReturn(0);
978 }
979 
980 PetscErrorCode  MatConjugate_MPIDense(Mat mat)
981 {
982   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
983   PetscErrorCode ierr;
984 
985   PetscFunctionBegin;
986   ierr = MatConjugate(a->A);CHKERRQ(ierr);
987   PetscFunctionReturn(0);
988 }
989 
990 PetscErrorCode MatRealPart_MPIDense(Mat A)
991 {
992   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
993   PetscErrorCode ierr;
994 
995   PetscFunctionBegin;
996   ierr = MatRealPart(a->A);CHKERRQ(ierr);
997   PetscFunctionReturn(0);
998 }
999 
1000 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1001 {
1002   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1003   PetscErrorCode ierr;
1004 
1005   PetscFunctionBegin;
1006   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1007   PetscFunctionReturn(0);
1008 }
1009 
1010 extern PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*);
1011 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms)
1012 {
1013   PetscErrorCode ierr;
1014   PetscInt       i,n;
1015   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
1016   PetscReal      *work;
1017 
1018   PetscFunctionBegin;
1019   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
1020   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
1021   ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr);
1022   if (type == NORM_2) {
1023     for (i=0; i<n; i++) work[i] *= work[i];
1024   }
1025   if (type == NORM_INFINITY) {
1026     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
1027   } else {
1028     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
1029   }
1030   ierr = PetscFree(work);CHKERRQ(ierr);
1031   if (type == NORM_2) {
1032     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
1033   }
1034   PetscFunctionReturn(0);
1035 }
1036 
1037 static PetscErrorCode  MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1038 {
1039   Mat_MPIDense   *d = (Mat_MPIDense*)x->data;
1040   PetscErrorCode ierr;
1041   PetscScalar    *a;
1042   PetscInt       m,n,i;
1043 
1044   PetscFunctionBegin;
1045   ierr = MatGetSize(d->A,&m,&n);CHKERRQ(ierr);
1046   ierr = MatDenseGetArray(d->A,&a);CHKERRQ(ierr);
1047   for (i=0; i<m*n; i++) {
1048     ierr = PetscRandomGetValue(rctx,a+i);CHKERRQ(ierr);
1049   }
1050   ierr = MatDenseRestoreArray(d->A,&a);CHKERRQ(ierr);
1051   PetscFunctionReturn(0);
1052 }
1053 
1054 extern PetscErrorCode MatMatMultNumeric_MPIDense(Mat A,Mat,Mat);
1055 
1056 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool  *missing,PetscInt *d)
1057 {
1058   PetscFunctionBegin;
1059   *missing = PETSC_FALSE;
1060   PetscFunctionReturn(0);
1061 }
1062 
1063 /* -------------------------------------------------------------------*/
1064 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1065                                         MatGetRow_MPIDense,
1066                                         MatRestoreRow_MPIDense,
1067                                         MatMult_MPIDense,
1068                                 /*  4*/ MatMultAdd_MPIDense,
1069                                         MatMultTranspose_MPIDense,
1070                                         MatMultTransposeAdd_MPIDense,
1071                                         0,
1072                                         0,
1073                                         0,
1074                                 /* 10*/ 0,
1075                                         0,
1076                                         0,
1077                                         0,
1078                                         MatTranspose_MPIDense,
1079                                 /* 15*/ MatGetInfo_MPIDense,
1080                                         MatEqual_MPIDense,
1081                                         MatGetDiagonal_MPIDense,
1082                                         MatDiagonalScale_MPIDense,
1083                                         MatNorm_MPIDense,
1084                                 /* 20*/ MatAssemblyBegin_MPIDense,
1085                                         MatAssemblyEnd_MPIDense,
1086                                         MatSetOption_MPIDense,
1087                                         MatZeroEntries_MPIDense,
1088                                 /* 24*/ MatZeroRows_MPIDense,
1089                                         0,
1090                                         0,
1091                                         0,
1092                                         0,
1093                                 /* 29*/ MatSetUp_MPIDense,
1094                                         0,
1095                                         0,
1096                                         MatGetDiagonalBlock_MPIDense,
1097                                         0,
1098                                 /* 34*/ MatDuplicate_MPIDense,
1099                                         0,
1100                                         0,
1101                                         0,
1102                                         0,
1103                                 /* 39*/ MatAXPY_MPIDense,
1104                                         MatCreateSubMatrices_MPIDense,
1105                                         0,
1106                                         MatGetValues_MPIDense,
1107                                         0,
1108                                 /* 44*/ 0,
1109                                         MatScale_MPIDense,
1110                                         MatShift_Basic,
1111                                         0,
1112                                         0,
1113                                 /* 49*/ MatSetRandom_MPIDense,
1114                                         0,
1115                                         0,
1116                                         0,
1117                                         0,
1118                                 /* 54*/ 0,
1119                                         0,
1120                                         0,
1121                                         0,
1122                                         0,
1123                                 /* 59*/ MatCreateSubMatrix_MPIDense,
1124                                         MatDestroy_MPIDense,
1125                                         MatView_MPIDense,
1126                                         0,
1127                                         0,
1128                                 /* 64*/ 0,
1129                                         0,
1130                                         0,
1131                                         0,
1132                                         0,
1133                                 /* 69*/ 0,
1134                                         0,
1135                                         0,
1136                                         0,
1137                                         0,
1138                                 /* 74*/ 0,
1139                                         0,
1140                                         0,
1141                                         0,
1142                                         0,
1143                                 /* 79*/ 0,
1144                                         0,
1145                                         0,
1146                                         0,
1147                                 /* 83*/ MatLoad_MPIDense,
1148                                         0,
1149                                         0,
1150                                         0,
1151                                         0,
1152                                         0,
1153 #if defined(PETSC_HAVE_ELEMENTAL)
1154                                 /* 89*/ MatMatMult_MPIDense_MPIDense,
1155                                         MatMatMultSymbolic_MPIDense_MPIDense,
1156 #else
1157                                 /* 89*/ 0,
1158                                         0,
1159 #endif
1160                                         MatMatMultNumeric_MPIDense,
1161                                         0,
1162                                         0,
1163                                 /* 94*/ 0,
1164                                         0,
1165                                         0,
1166                                         0,
1167                                         0,
1168                                 /* 99*/ 0,
1169                                         0,
1170                                         0,
1171                                         MatConjugate_MPIDense,
1172                                         0,
1173                                 /*104*/ 0,
1174                                         MatRealPart_MPIDense,
1175                                         MatImaginaryPart_MPIDense,
1176                                         0,
1177                                         0,
1178                                 /*109*/ 0,
1179                                         0,
1180                                         0,
1181                                         0,
1182                                         MatMissingDiagonal_MPIDense,
1183                                 /*114*/ 0,
1184                                         0,
1185                                         0,
1186                                         0,
1187                                         0,
1188                                 /*119*/ 0,
1189                                         0,
1190                                         0,
1191                                         0,
1192                                         0,
1193                                 /*124*/ 0,
1194                                         MatGetColumnNorms_MPIDense,
1195                                         0,
1196                                         0,
1197                                         0,
1198                                 /*129*/ 0,
1199                                         MatTransposeMatMult_MPIDense_MPIDense,
1200                                         MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1201                                         MatTransposeMatMultNumeric_MPIDense_MPIDense,
1202                                         0,
1203                                 /*134*/ 0,
1204                                         0,
1205                                         0,
1206                                         0,
1207                                         0,
1208                                 /*139*/ 0,
1209                                         0,
1210                                         0
1211 };
1212 
1213 PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1214 {
1215   Mat_MPIDense   *a;
1216   PetscErrorCode ierr;
1217 
1218   PetscFunctionBegin;
1219   mat->preallocated = PETSC_TRUE;
1220   /* Note:  For now, when data is specified above, this assumes the user correctly
1221    allocates the local dense storage space.  We should add error checking. */
1222 
1223   a       = (Mat_MPIDense*)mat->data;
1224   ierr    = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
1225   ierr    = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
1226   a->nvec = mat->cmap->n;
1227 
1228   ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1229   ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr);
1230   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
1231   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1232   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1233   PetscFunctionReturn(0);
1234 }
1235 
1236 #if defined(PETSC_HAVE_ELEMENTAL)
1237 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1238 {
1239   Mat            mat_elemental;
1240   PetscErrorCode ierr;
1241   PetscScalar    *v;
1242   PetscInt       m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols;
1243 
1244   PetscFunctionBegin;
1245   if (reuse == MAT_REUSE_MATRIX) {
1246     mat_elemental = *newmat;
1247     ierr = MatZeroEntries(*newmat);CHKERRQ(ierr);
1248   } else {
1249     ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr);
1250     ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1251     ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr);
1252     ierr = MatSetUp(mat_elemental);CHKERRQ(ierr);
1253     ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
1254   }
1255 
1256   ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr);
1257   for (i=0; i<N; i++) cols[i] = i;
1258   for (i=0; i<m; i++) rows[i] = rstart + i;
1259 
1260   /* PETSc-Elemental interaface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1261   ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr);
1262   ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr);
1263   ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1264   ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1265   ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr);
1266   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1267 
1268   if (reuse == MAT_INPLACE_MATRIX) {
1269     ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr);
1270   } else {
1271     *newmat = mat_elemental;
1272   }
1273   PetscFunctionReturn(0);
1274 }
1275 #endif
1276 
1277 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1278 {
1279   Mat_MPIDense   *a;
1280   PetscErrorCode ierr;
1281 
1282   PetscFunctionBegin;
1283   ierr      = PetscNewLog(mat,&a);CHKERRQ(ierr);
1284   mat->data = (void*)a;
1285   ierr      = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1286 
1287   mat->insertmode = NOT_SET_VALUES;
1288   ierr            = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);CHKERRQ(ierr);
1289   ierr            = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);CHKERRQ(ierr);
1290 
1291   /* build cache for off array entries formed */
1292   a->donotstash = PETSC_FALSE;
1293 
1294   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr);
1295 
1296   /* stuff used for matrix vector multiply */
1297   a->lvec        = 0;
1298   a->Mvctx       = 0;
1299   a->roworiented = PETSC_TRUE;
1300 
1301   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr);
1302   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);CHKERRQ(ierr);
1303   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);CHKERRQ(ierr);
1304   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr);
1305 #if defined(PETSC_HAVE_ELEMENTAL)
1306   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr);
1307 #endif
1308   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1309   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1310   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1311   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1312 
1313   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",MatTransposeMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1314   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",MatTransposeMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1315   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",MatTransposeMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1316   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1317   PetscFunctionReturn(0);
1318 }
1319 
1320 /*MC
1321    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1322 
1323    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1324    and MATMPIDENSE otherwise.
1325 
1326    Options Database Keys:
1327 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1328 
1329   Level: beginner
1330 
1331 
1332 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1333 M*/
1334 
1335 /*@C
1336    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1337 
1338    Not collective
1339 
1340    Input Parameters:
1341 .  B - the matrix
1342 -  data - optional location of matrix data.  Set data=NULL for PETSc
1343    to control all matrix memory allocation.
1344 
1345    Notes:
1346    The dense format is fully compatible with standard Fortran 77
1347    storage by columns.
1348 
1349    The data input variable is intended primarily for Fortran programmers
1350    who wish to allocate their own matrix memory space.  Most users should
1351    set data=NULL.
1352 
1353    Level: intermediate
1354 
1355 .keywords: matrix,dense, parallel
1356 
1357 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1358 @*/
1359 PetscErrorCode  MatMPIDenseSetPreallocation(Mat B,PetscScalar *data)
1360 {
1361   PetscErrorCode ierr;
1362 
1363   PetscFunctionBegin;
1364   ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr);
1365   PetscFunctionReturn(0);
1366 }
1367 
1368 /*@
1369    MatDensePlaceArray - Allows one to replace the array in a dense array with an
1370    array provided by the user. This is useful to avoid copying an array
1371    into a matrix
1372 
1373    Not Collective
1374 
1375    Input Parameters:
1376 +  mat - the matrix
1377 -  array - the array in column major order
1378 
1379    Notes:
1380    You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be
1381    freed when the matrix is destroyed.
1382 
1383    Level: developer
1384 
1385 .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
1386 
1387 @*/
1388 PetscErrorCode  MatDensePlaceArray(Mat mat,const PetscScalar array[])
1389 {
1390   PetscErrorCode ierr;
1391   PetscFunctionBegin;
1392   ierr = PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
1393   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
1394   PetscFunctionReturn(0);
1395 }
1396 
1397 /*@
1398    MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray()
1399 
1400    Not Collective
1401 
1402    Input Parameters:
1403 .  mat - the matrix
1404 
1405    Notes:
1406    You can only call this after a call to MatDensePlaceArray()
1407 
1408    Level: developer
1409 
1410 .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
1411 
1412 @*/
1413 PetscErrorCode  MatDenseResetArray(Mat mat)
1414 {
1415   PetscErrorCode ierr;
1416   PetscFunctionBegin;
1417   ierr = PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));CHKERRQ(ierr);
1418   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
1419   PetscFunctionReturn(0);
1420 }
1421 
1422 /*@C
1423    MatCreateDense - Creates a parallel matrix in dense format.
1424 
1425    Collective on MPI_Comm
1426 
1427    Input Parameters:
1428 +  comm - MPI communicator
1429 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1430 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1431 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1432 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1433 -  data - optional location of matrix data.  Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1434    to control all matrix memory allocation.
1435 
1436    Output Parameter:
1437 .  A - the matrix
1438 
1439    Notes:
1440    The dense format is fully compatible with standard Fortran 77
1441    storage by columns.
1442 
1443    The data input variable is intended primarily for Fortran programmers
1444    who wish to allocate their own matrix memory space.  Most users should
1445    set data=NULL (PETSC_NULL_SCALAR for Fortran users).
1446 
1447    The user MUST specify either the local or global matrix dimensions
1448    (possibly both).
1449 
1450    Level: intermediate
1451 
1452 .keywords: matrix,dense, parallel
1453 
1454 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1455 @*/
1456 PetscErrorCode  MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1457 {
1458   PetscErrorCode ierr;
1459   PetscMPIInt    size;
1460 
1461   PetscFunctionBegin;
1462   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1463   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1464   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1465   if (size > 1) {
1466     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1467     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1468     if (data) {  /* user provided data array, so no need to assemble */
1469       ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr);
1470       (*A)->assembled = PETSC_TRUE;
1471     }
1472   } else {
1473     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1474     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1475   }
1476   PetscFunctionReturn(0);
1477 }
1478 
1479 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1480 {
1481   Mat            mat;
1482   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
1483   PetscErrorCode ierr;
1484 
1485   PetscFunctionBegin;
1486   *newmat = 0;
1487   ierr    = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr);
1488   ierr    = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1489   ierr    = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1490   a       = (Mat_MPIDense*)mat->data;
1491   ierr    = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1492 
1493   mat->factortype   = A->factortype;
1494   mat->assembled    = PETSC_TRUE;
1495   mat->preallocated = PETSC_TRUE;
1496 
1497   a->size         = oldmat->size;
1498   a->rank         = oldmat->rank;
1499   mat->insertmode = NOT_SET_VALUES;
1500   a->nvec         = oldmat->nvec;
1501   a->donotstash   = oldmat->donotstash;
1502 
1503   ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr);
1504   ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr);
1505 
1506   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1507   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1508   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1509 
1510   *newmat = mat;
1511   PetscFunctionReturn(0);
1512 }
1513 
1514 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat)
1515 {
1516   PetscErrorCode ierr;
1517   PetscMPIInt    rank,size;
1518   const PetscInt *rowners;
1519   PetscInt       i,m,n,nz,j,mMax;
1520   PetscScalar    *array,*vals,*vals_ptr;
1521   Mat_MPIDense   *a = (Mat_MPIDense*)newmat->data;
1522 
1523   PetscFunctionBegin;
1524   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1525   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1526 
1527   /* determine ownership of rows and columns */
1528   m = (newmat->rmap->n < 0) ? PETSC_DECIDE : newmat->rmap->n;
1529   n = (newmat->cmap->n < 0) ? PETSC_DECIDE : newmat->cmap->n;
1530 
1531   ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr);
1532   if (!a->A || !((Mat_SeqDense*)(a->A->data))->user_alloc) {
1533     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
1534   }
1535   ierr = MatDenseGetArray(newmat,&array);CHKERRQ(ierr);
1536   ierr = MatGetLocalSize(newmat,&m,NULL);CHKERRQ(ierr);
1537   ierr = MatGetOwnershipRanges(newmat,&rowners);CHKERRQ(ierr);
1538   ierr = MPI_Reduce(&m,&mMax,1,MPIU_INT,MPI_MAX,0,comm);CHKERRQ(ierr);
1539   if (!rank) {
1540     ierr = PetscMalloc1(mMax*N,&vals);CHKERRQ(ierr);
1541 
1542     /* read in my part of the matrix numerical values  */
1543     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1544 
1545     /* insert into matrix-by row (this is why cannot directly read into array */
1546     vals_ptr = vals;
1547     for (i=0; i<m; i++) {
1548       for (j=0; j<N; j++) {
1549         array[i + j*m] = *vals_ptr++;
1550       }
1551     }
1552 
1553     /* read in other processors and ship out */
1554     for (i=1; i<size; i++) {
1555       nz   = (rowners[i+1] - rowners[i])*N;
1556       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1557       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr);
1558     }
1559   } else {
1560     /* receive numeric values */
1561     ierr = PetscMalloc1(m*N,&vals);CHKERRQ(ierr);
1562 
1563     /* receive message of values*/
1564     ierr = MPIULong_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr);
1565 
1566     /* insert into matrix-by row (this is why cannot directly read into array */
1567     vals_ptr = vals;
1568     for (i=0; i<m; i++) {
1569       for (j=0; j<N; j++) {
1570         array[i + j*m] = *vals_ptr++;
1571       }
1572     }
1573   }
1574   ierr = MatDenseRestoreArray(newmat,&array);CHKERRQ(ierr);
1575   ierr = PetscFree(vals);CHKERRQ(ierr);
1576   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1577   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1578   PetscFunctionReturn(0);
1579 }
1580 
1581 PetscErrorCode MatLoad_MPIDense(Mat newmat,PetscViewer viewer)
1582 {
1583   Mat_MPIDense   *a;
1584   PetscScalar    *vals,*svals;
1585   MPI_Comm       comm;
1586   MPI_Status     status;
1587   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,n,maxnz;
1588   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1589   PetscInt       *ourlens,*procsnz = 0,jj,*mycols,*smycols;
1590   PetscInt       i,nz,j,rstart,rend;
1591   int            fd;
1592   PetscErrorCode ierr;
1593 
1594   PetscFunctionBegin;
1595   /* force binary viewer to load .info file if it has not yet done so */
1596   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1597   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
1598   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1599   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1600   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1601   if (!rank) {
1602     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
1603     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1604   }
1605   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1606   M    = header[1]; N = header[2]; nz = header[3];
1607 
1608   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
1609   if (newmat->rmap->N < 0) newmat->rmap->N = M;
1610   if (newmat->cmap->N < 0) newmat->cmap->N = N;
1611 
1612   if (newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",M,newmat->rmap->N);
1613   if (newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",N,newmat->cmap->N);
1614 
1615   /*
1616        Handle case where matrix is stored on disk as a dense matrix
1617   */
1618   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1619     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr);
1620     PetscFunctionReturn(0);
1621   }
1622 
1623   /* determine ownership of all rows */
1624   if (newmat->rmap->n < 0) {
1625     ierr = PetscMPIIntCast(M/size + ((M % size) > rank),&m);CHKERRQ(ierr);
1626   } else {
1627     ierr = PetscMPIIntCast(newmat->rmap->n,&m);CHKERRQ(ierr);
1628   }
1629   if (newmat->cmap->n < 0) {
1630     n = PETSC_DECIDE;
1631   } else {
1632     ierr = PetscMPIIntCast(newmat->cmap->n,&n);CHKERRQ(ierr);
1633   }
1634 
1635   ierr       = PetscMalloc1(size+2,&rowners);CHKERRQ(ierr);
1636   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1637   rowners[0] = 0;
1638   for (i=2; i<=size; i++) {
1639     rowners[i] += rowners[i-1];
1640   }
1641   rstart = rowners[rank];
1642   rend   = rowners[rank+1];
1643 
1644   /* distribute row lengths to all processors */
1645   ierr = PetscMalloc1(rend-rstart,&ourlens);CHKERRQ(ierr);
1646   if (!rank) {
1647     ierr = PetscMalloc1(M,&rowlengths);CHKERRQ(ierr);
1648     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1649     ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr);
1650     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1651     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1652     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1653   } else {
1654     ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1655   }
1656 
1657   if (!rank) {
1658     /* calculate the number of nonzeros on each processor */
1659     ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr);
1660     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1661     for (i=0; i<size; i++) {
1662       for (j=rowners[i]; j< rowners[i+1]; j++) {
1663         procsnz[i] += rowlengths[j];
1664       }
1665     }
1666     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1667 
1668     /* determine max buffer needed and allocate it */
1669     maxnz = 0;
1670     for (i=0; i<size; i++) {
1671       maxnz = PetscMax(maxnz,procsnz[i]);
1672     }
1673     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
1674 
1675     /* read in my part of the matrix column indices  */
1676     nz   = procsnz[0];
1677     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
1678     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1679 
1680     /* read in every one elses and ship off */
1681     for (i=1; i<size; i++) {
1682       nz   = procsnz[i];
1683       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1684       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1685     }
1686     ierr = PetscFree(cols);CHKERRQ(ierr);
1687   } else {
1688     /* determine buffer space needed for message */
1689     nz = 0;
1690     for (i=0; i<m; i++) {
1691       nz += ourlens[i];
1692     }
1693     ierr = PetscMalloc1(nz+1,&mycols);CHKERRQ(ierr);
1694 
1695     /* receive message of column indices*/
1696     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1697     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1698     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1699   }
1700 
1701   ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr);
1702   a = (Mat_MPIDense*)newmat->data;
1703   if (!a->A || !((Mat_SeqDense*)(a->A->data))->user_alloc) {
1704     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
1705   }
1706 
1707   if (!rank) {
1708     ierr = PetscMalloc1(maxnz,&vals);CHKERRQ(ierr);
1709 
1710     /* read in my part of the matrix numerical values  */
1711     nz   = procsnz[0];
1712     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1713 
1714     /* insert into matrix */
1715     jj      = rstart;
1716     smycols = mycols;
1717     svals   = vals;
1718     for (i=0; i<m; i++) {
1719       ierr     = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1720       smycols += ourlens[i];
1721       svals   += ourlens[i];
1722       jj++;
1723     }
1724 
1725     /* read in other processors and ship out */
1726     for (i=1; i<size; i++) {
1727       nz   = procsnz[i];
1728       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1729       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
1730     }
1731     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1732   } else {
1733     /* receive numeric values */
1734     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
1735 
1736     /* receive message of values*/
1737     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
1738     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1739     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1740 
1741     /* insert into matrix */
1742     jj      = rstart;
1743     smycols = mycols;
1744     svals   = vals;
1745     for (i=0; i<m; i++) {
1746       ierr     = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1747       smycols += ourlens[i];
1748       svals   += ourlens[i];
1749       jj++;
1750     }
1751   }
1752   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1753   ierr = PetscFree(vals);CHKERRQ(ierr);
1754   ierr = PetscFree(mycols);CHKERRQ(ierr);
1755   ierr = PetscFree(rowners);CHKERRQ(ierr);
1756 
1757   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1758   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1759   PetscFunctionReturn(0);
1760 }
1761 
1762 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool  *flag)
1763 {
1764   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
1765   Mat            a,b;
1766   PetscBool      flg;
1767   PetscErrorCode ierr;
1768 
1769   PetscFunctionBegin;
1770   a    = matA->A;
1771   b    = matB->A;
1772   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
1773   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1774   PetscFunctionReturn(0);
1775 }
1776 
1777 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(Mat A)
1778 {
1779   PetscErrorCode        ierr;
1780   Mat_MPIDense          *a = (Mat_MPIDense*)A->data;
1781   Mat_TransMatMultDense *atb = a->atbdense;
1782 
1783   PetscFunctionBegin;
1784   ierr = PetscFree3(atb->sendbuf,atb->atbarray,atb->recvcounts);CHKERRQ(ierr);
1785   ierr = (atb->destroy)(A);CHKERRQ(ierr);
1786   ierr = PetscFree(atb);CHKERRQ(ierr);
1787   PetscFunctionReturn(0);
1788 }
1789 
1790 PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1791 {
1792   Mat_MPIDense   *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
1793   Mat_SeqDense   *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data;
1794   Mat_TransMatMultDense *atb = c->atbdense;
1795   PetscErrorCode ierr;
1796   MPI_Comm       comm;
1797   PetscMPIInt    rank,size,*recvcounts=atb->recvcounts;
1798   PetscScalar    *carray,*atbarray=atb->atbarray,*sendbuf=atb->sendbuf;
1799   PetscInt       i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j;
1800   PetscScalar    _DOne=1.0,_DZero=0.0;
1801   PetscBLASInt   am,an,bn,aN;
1802   const PetscInt *ranges;
1803 
1804   PetscFunctionBegin;
1805   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1806   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1807   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1808 
1809   /* compute atbarray = aseq^T * bseq */
1810   ierr = PetscBLASIntCast(a->A->cmap->n,&an);CHKERRQ(ierr);
1811   ierr = PetscBLASIntCast(b->A->cmap->n,&bn);CHKERRQ(ierr);
1812   ierr = PetscBLASIntCast(a->A->rmap->n,&am);CHKERRQ(ierr);
1813   ierr = PetscBLASIntCast(A->cmap->N,&aN);CHKERRQ(ierr);
1814   PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&an,&bn,&am,&_DOne,aseq->v,&aseq->lda,bseq->v,&bseq->lda,&_DZero,atbarray,&aN));
1815 
1816   ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr);
1817   for (i=0; i<size; i++) recvcounts[i] = (ranges[i+1] - ranges[i])*cN;
1818 
1819   /* arrange atbarray into sendbuf */
1820   k = 0;
1821   for (proc=0; proc<size; proc++) {
1822     for (j=0; j<cN; j++) {
1823       for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM];
1824     }
1825   }
1826   /* sum all atbarray to local values of C */
1827   ierr = MatDenseGetArray(c->A,&carray);CHKERRQ(ierr);
1828   ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRQ(ierr);
1829   ierr = MatDenseRestoreArray(c->A,&carray);CHKERRQ(ierr);
1830   PetscFunctionReturn(0);
1831 }
1832 
1833 PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1834 {
1835   PetscErrorCode        ierr;
1836   Mat                   Cdense;
1837   MPI_Comm              comm;
1838   PetscMPIInt           size;
1839   PetscInt              cm=A->cmap->n,cM,cN=B->cmap->N;
1840   Mat_MPIDense          *c;
1841   Mat_TransMatMultDense *atb;
1842 
1843   PetscFunctionBegin;
1844   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1845   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
1846     SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
1847   }
1848 
1849   /* create matrix product Cdense */
1850   ierr = MatCreate(comm,&Cdense);CHKERRQ(ierr);
1851   ierr = MatSetSizes(Cdense,cm,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
1852   ierr = MatSetType(Cdense,MATMPIDENSE);CHKERRQ(ierr);
1853   ierr = MatMPIDenseSetPreallocation(Cdense,NULL);CHKERRQ(ierr);
1854   ierr = MatAssemblyBegin(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1855   ierr = MatAssemblyEnd(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1856   *C   = Cdense;
1857 
1858   /* create data structure for reuse Cdense */
1859   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1860   ierr = PetscNew(&atb);CHKERRQ(ierr);
1861   cM = Cdense->rmap->N;
1862   ierr = PetscMalloc3(cM*cN,&atb->sendbuf,cM*cN,&atb->atbarray,size,&atb->recvcounts);CHKERRQ(ierr);
1863 
1864   c                    = (Mat_MPIDense*)Cdense->data;
1865   c->atbdense          = atb;
1866   atb->destroy         = Cdense->ops->destroy;
1867   Cdense->ops->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
1868   PetscFunctionReturn(0);
1869 }
1870 
1871 PetscErrorCode MatTransposeMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
1872 {
1873   PetscErrorCode ierr;
1874 
1875   PetscFunctionBegin;
1876   if (scall == MAT_INITIAL_MATRIX) {
1877     ierr = MatTransposeMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr);
1878   }
1879   ierr = MatTransposeMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr);
1880   PetscFunctionReturn(0);
1881 }
1882 
1883 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(Mat A)
1884 {
1885   PetscErrorCode   ierr;
1886   Mat_MPIDense     *a = (Mat_MPIDense*)A->data;
1887   Mat_MatMultDense *ab = a->abdense;
1888 
1889   PetscFunctionBegin;
1890   ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr);
1891   ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr);
1892   ierr = MatDestroy(&ab->Be);CHKERRQ(ierr);
1893 
1894   ierr = (ab->destroy)(A);CHKERRQ(ierr);
1895   ierr = PetscFree(ab);CHKERRQ(ierr);
1896   PetscFunctionReturn(0);
1897 }
1898 
1899 #if defined(PETSC_HAVE_ELEMENTAL)
1900 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1901 {
1902   PetscErrorCode   ierr;
1903   Mat_MPIDense     *c=(Mat_MPIDense*)C->data;
1904   Mat_MatMultDense *ab=c->abdense;
1905 
1906   PetscFunctionBegin;
1907   ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr);
1908   ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr);
1909   ierr = MatMatMultNumeric(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr);
1910   ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
1911   PetscFunctionReturn(0);
1912 }
1913 
1914 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1915 {
1916   PetscErrorCode   ierr;
1917   Mat              Ae,Be,Ce;
1918   Mat_MPIDense     *c;
1919   Mat_MatMultDense *ab;
1920 
1921   PetscFunctionBegin;
1922   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
1923     SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
1924   }
1925 
1926   /* convert A and B to Elemental matrices Ae and Be */
1927   ierr = MatConvert(A,MATELEMENTAL,MAT_INITIAL_MATRIX, &Ae);CHKERRQ(ierr);
1928   ierr = MatConvert(B,MATELEMENTAL,MAT_INITIAL_MATRIX, &Be);CHKERRQ(ierr);
1929 
1930   /* Ce = Ae*Be */
1931   ierr = MatMatMultSymbolic(Ae,Be,fill,&Ce);CHKERRQ(ierr);
1932   ierr = MatMatMultNumeric(Ae,Be,Ce);CHKERRQ(ierr);
1933 
1934   /* convert Ce to C */
1935   ierr = MatConvert(Ce,MATMPIDENSE,MAT_INITIAL_MATRIX,C);CHKERRQ(ierr);
1936 
1937   /* create data structure for reuse Cdense */
1938   ierr = PetscNew(&ab);CHKERRQ(ierr);
1939   c                  = (Mat_MPIDense*)(*C)->data;
1940   c->abdense         = ab;
1941 
1942   ab->Ae             = Ae;
1943   ab->Be             = Be;
1944   ab->Ce             = Ce;
1945   ab->destroy        = (*C)->ops->destroy;
1946   (*C)->ops->destroy        = MatDestroy_MatMatMult_MPIDense_MPIDense;
1947   (*C)->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
1948   PetscFunctionReturn(0);
1949 }
1950 
1951 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
1952 {
1953   PetscErrorCode ierr;
1954 
1955   PetscFunctionBegin;
1956   if (scall == MAT_INITIAL_MATRIX) { /* simbolic product includes numeric product */
1957     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
1958     ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr);
1959     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
1960   } else {
1961     ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
1962     ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr);
1963     ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
1964   }
1965   PetscFunctionReturn(0);
1966 }
1967 #endif
1968