xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 4863603af06b16768e4e4a060cedf7d42fa55e81)
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4 */
5 
6 
7 #include <../src/mat/impls/dense/mpi/mpidense.h>    /*I   "petscmat.h"  I*/
8 #include <../src/mat/impls/aij/mpi/mpiaij.h>
9 #include <petscblaslapack.h>
10 
11 /*@
12 
13       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
14               matrix that represents the operator. For sequential matrices it returns itself.
15 
16     Input Parameter:
17 .      A - the Seq or MPI dense matrix
18 
19     Output Parameter:
20 .      B - the inner matrix
21 
22     Level: intermediate
23 
24 @*/
25 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
26 {
27   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
28   PetscErrorCode ierr;
29   PetscBool      flg;
30 
31   PetscFunctionBegin;
32   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
33   if (flg) *B = mat->A;
34   else *B = A;
35   PetscFunctionReturn(0);
36 }
37 
38 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
39 {
40   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
41   PetscErrorCode ierr;
42   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
43 
44   PetscFunctionBegin;
45   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
46   lrow = row - rstart;
47   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
48   PetscFunctionReturn(0);
49 }
50 
51 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
52 {
53   PetscErrorCode ierr;
54 
55   PetscFunctionBegin;
56   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
57   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
58   PetscFunctionReturn(0);
59 }
60 
61 PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
62 {
63   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
64   PetscErrorCode ierr;
65   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
66   PetscScalar    *array;
67   MPI_Comm       comm;
68   Mat            B;
69 
70   PetscFunctionBegin;
71   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
72 
73   ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr);
74   if (!B) {
75     ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
76     ierr = MatCreate(comm,&B);CHKERRQ(ierr);
77     ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr);
78     ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
79     ierr = MatDenseGetArray(mdn->A,&array);CHKERRQ(ierr);
80     ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr);
81     ierr = MatDenseRestoreArray(mdn->A,&array);CHKERRQ(ierr);
82     ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
83     ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
84     ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr);
85     *a   = B;
86     ierr = MatDestroy(&B);CHKERRQ(ierr);
87   } else *a = B;
88   PetscFunctionReturn(0);
89 }
90 
91 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
92 {
93   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
94   PetscErrorCode ierr;
95   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
96   PetscBool      roworiented = A->roworiented;
97 
98   PetscFunctionBegin;
99   for (i=0; i<m; i++) {
100     if (idxm[i] < 0) continue;
101     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
102     if (idxm[i] >= rstart && idxm[i] < rend) {
103       row = idxm[i] - rstart;
104       if (roworiented) {
105         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
106       } else {
107         for (j=0; j<n; j++) {
108           if (idxn[j] < 0) continue;
109           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
110           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
111         }
112       }
113     } else if (!A->donotstash) {
114       mat->assembled = PETSC_FALSE;
115       if (roworiented) {
116         ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
117       } else {
118         ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
119       }
120     }
121   }
122   PetscFunctionReturn(0);
123 }
124 
125 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
126 {
127   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
128   PetscErrorCode ierr;
129   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
130 
131   PetscFunctionBegin;
132   for (i=0; i<m; i++) {
133     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
134     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
135     if (idxm[i] >= rstart && idxm[i] < rend) {
136       row = idxm[i] - rstart;
137       for (j=0; j<n; j++) {
138         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
139         if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
140         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
141       }
142     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
143   }
144   PetscFunctionReturn(0);
145 }
146 
147 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda)
148 {
149   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
150   PetscErrorCode ierr;
151 
152   PetscFunctionBegin;
153   ierr = MatDenseGetLDA(a->A,lda);CHKERRQ(ierr);
154   PetscFunctionReturn(0);
155 }
156 
157 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[])
158 {
159   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
160   PetscErrorCode ierr;
161 
162   PetscFunctionBegin;
163   ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr);
164   PetscFunctionReturn(0);
165 }
166 
167 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar *array[])
168 {
169   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
170   PetscErrorCode ierr;
171 
172   PetscFunctionBegin;
173   ierr = MatDenseGetArrayRead(a->A,array);CHKERRQ(ierr);
174   PetscFunctionReturn(0);
175 }
176 
177 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar array[])
178 {
179   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
180   PetscErrorCode ierr;
181 
182   PetscFunctionBegin;
183   ierr = MatDensePlaceArray(a->A,array);CHKERRQ(ierr);
184   PetscFunctionReturn(0);
185 }
186 
187 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
188 {
189   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
190   PetscErrorCode ierr;
191 
192   PetscFunctionBegin;
193   ierr = MatDenseResetArray(a->A);CHKERRQ(ierr);
194   PetscFunctionReturn(0);
195 }
196 
197 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
198 {
199   Mat_MPIDense   *mat  = (Mat_MPIDense*)A->data,*newmatd;
200   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
201   PetscErrorCode ierr;
202   PetscInt       i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
203   const PetscInt *irow,*icol;
204   PetscScalar    *av,*bv,*v = lmat->v;
205   Mat            newmat;
206   IS             iscol_local;
207   MPI_Comm       comm_is,comm_mat;
208 
209   PetscFunctionBegin;
210   ierr = PetscObjectGetComm((PetscObject)A,&comm_mat);CHKERRQ(ierr);
211   ierr = PetscObjectGetComm((PetscObject)iscol,&comm_is);CHKERRQ(ierr);
212   if (comm_mat != comm_is) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator");
213 
214   ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
215   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
216   ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr);
217   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
218   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
219   ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */
220 
221   /* No parallel redistribution currently supported! Should really check each index set
222      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
223      original matrix! */
224 
225   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
226   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
227 
228   /* Check submatrix call */
229   if (scall == MAT_REUSE_MATRIX) {
230     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
231     /* Really need to test rows and column sizes! */
232     newmat = *B;
233   } else {
234     /* Create and fill new matrix */
235     ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr);
236     ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
237     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
238     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
239   }
240 
241   /* Now extract the data pointers and do the copy, column at a time */
242   newmatd = (Mat_MPIDense*)newmat->data;
243   bv      = ((Mat_SeqDense*)newmatd->A->data)->v;
244 
245   for (i=0; i<Ncols; i++) {
246     av = v + ((Mat_SeqDense*)mat->A->data)->lda*icol[i];
247     for (j=0; j<nrows; j++) {
248       *bv++ = av[irow[j] - rstart];
249     }
250   }
251 
252   /* Assemble the matrices so that the correct flags are set */
253   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
254   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
255 
256   /* Free work space */
257   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
258   ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr);
259   ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
260   *B   = newmat;
261   PetscFunctionReturn(0);
262 }
263 
264 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[])
265 {
266   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
267   PetscErrorCode ierr;
268 
269   PetscFunctionBegin;
270   ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr);
271   PetscFunctionReturn(0);
272 }
273 
274 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar *array[])
275 {
276   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
277   PetscErrorCode ierr;
278 
279   PetscFunctionBegin;
280   ierr = MatDenseRestoreArrayRead(a->A,array);CHKERRQ(ierr);
281   PetscFunctionReturn(0);
282 }
283 
284 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
285 {
286   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
287   MPI_Comm       comm;
288   PetscErrorCode ierr;
289   PetscInt       nstash,reallocs;
290   InsertMode     addv;
291 
292   PetscFunctionBegin;
293   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
294   /* make sure all processors are either in INSERTMODE or ADDMODE */
295   ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,comm);CHKERRQ(ierr);
296   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
297   mat->insertmode = addv; /* in case this processor had no cache */
298 
299   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
300   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
301   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
302   PetscFunctionReturn(0);
303 }
304 
305 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
306 {
307   Mat_MPIDense   *mdn=(Mat_MPIDense*)mat->data;
308   PetscErrorCode ierr;
309   PetscInt       i,*row,*col,flg,j,rstart,ncols;
310   PetscMPIInt    n;
311   PetscScalar    *val;
312 
313   PetscFunctionBegin;
314   /*  wait on receives */
315   while (1) {
316     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
317     if (!flg) break;
318 
319     for (i=0; i<n;) {
320       /* Now identify the consecutive vals belonging to the same row */
321       for (j=i,rstart=row[j]; j<n; j++) {
322         if (row[j] != rstart) break;
323       }
324       if (j < n) ncols = j-i;
325       else       ncols = n-i;
326       /* Now assemble all these values with a single function call */
327       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
328       i    = j;
329     }
330   }
331   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
332 
333   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
334   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
335 
336   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
337     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
338   }
339   PetscFunctionReturn(0);
340 }
341 
342 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
343 {
344   PetscErrorCode ierr;
345   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
346 
347   PetscFunctionBegin;
348   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
349   PetscFunctionReturn(0);
350 }
351 
352 /* the code does not do the diagonal entries correctly unless the
353    matrix is square and the column and row owerships are identical.
354    This is a BUG. The only way to fix it seems to be to access
355    mdn->A and mdn->B directly and not through the MatZeroRows()
356    routine.
357 */
358 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
359 {
360   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
361   PetscErrorCode    ierr;
362   PetscInt          i,*owners = A->rmap->range;
363   PetscInt          *sizes,j,idx,nsends;
364   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
365   PetscInt          *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source;
366   PetscInt          *lens,*lrows,*values;
367   PetscMPIInt       n,imdex,rank = l->rank,size = l->size;
368   MPI_Comm          comm;
369   MPI_Request       *send_waits,*recv_waits;
370   MPI_Status        recv_status,*send_status;
371   PetscBool         found;
372   const PetscScalar *xx;
373   PetscScalar       *bb;
374 
375   PetscFunctionBegin;
376   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
377   if (A->rmap->N != A->cmap->N) SETERRQ(comm,PETSC_ERR_SUP,"Only handles square matrices");
378   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership");
379   /*  first count number of contributors to each processor */
380   ierr = PetscCalloc1(2*size,&sizes);CHKERRQ(ierr);
381   ierr = PetscMalloc1(N+1,&owner);CHKERRQ(ierr);  /* see note*/
382   for (i=0; i<N; i++) {
383     idx   = rows[i];
384     found = PETSC_FALSE;
385     for (j=0; j<size; j++) {
386       if (idx >= owners[j] && idx < owners[j+1]) {
387         sizes[2*j]++; sizes[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
388       }
389     }
390     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
391   }
392   nsends = 0;
393   for (i=0; i<size; i++) nsends += sizes[2*i+1];
394 
395   /* inform other processors of number of messages and max length*/
396   ierr = PetscMaxSum(comm,sizes,&nmax,&nrecvs);CHKERRQ(ierr);
397 
398   /* post receives:   */
399   ierr = PetscMalloc1((nrecvs+1)*(nmax+1),&rvalues);CHKERRQ(ierr);
400   ierr = PetscMalloc1(nrecvs+1,&recv_waits);CHKERRQ(ierr);
401   for (i=0; i<nrecvs; i++) {
402     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
403   }
404 
405   /* do sends:
406       1) starts[i] gives the starting index in svalues for stuff going to
407          the ith processor
408   */
409   ierr = PetscMalloc1(N+1,&svalues);CHKERRQ(ierr);
410   ierr = PetscMalloc1(nsends+1,&send_waits);CHKERRQ(ierr);
411   ierr = PetscMalloc1(size+1,&starts);CHKERRQ(ierr);
412 
413   starts[0] = 0;
414   for (i=1; i<size; i++) starts[i] = starts[i-1] + sizes[2*i-2];
415   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
416 
417   starts[0] = 0;
418   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + sizes[2*i-2];
419   count = 0;
420   for (i=0; i<size; i++) {
421     if (sizes[2*i+1]) {
422       ierr = MPI_Isend(svalues+starts[i],sizes[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
423     }
424   }
425   ierr = PetscFree(starts);CHKERRQ(ierr);
426 
427   base = owners[rank];
428 
429   /*  wait on receives */
430   ierr  = PetscMalloc2(nrecvs,&lens,nrecvs,&source);CHKERRQ(ierr);
431   count = nrecvs;
432   slen  = 0;
433   while (count) {
434     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
435     /* unpack receives into our local space */
436     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
437 
438     source[imdex] = recv_status.MPI_SOURCE;
439     lens[imdex]   = n;
440     slen += n;
441     count--;
442   }
443   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
444 
445   /* move the data into the send scatter */
446   ierr  = PetscMalloc1(slen+1,&lrows);CHKERRQ(ierr);
447   count = 0;
448   for (i=0; i<nrecvs; i++) {
449     values = rvalues + i*nmax;
450     for (j=0; j<lens[i]; j++) {
451       lrows[count++] = values[j] - base;
452     }
453   }
454   ierr = PetscFree(rvalues);CHKERRQ(ierr);
455   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
456   ierr = PetscFree(owner);CHKERRQ(ierr);
457   ierr = PetscFree(sizes);CHKERRQ(ierr);
458 
459   /* fix right hand side if needed */
460   if (x && b) {
461     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
462     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
463     for (i=0; i<slen; i++) {
464       bb[lrows[i]] = diag*xx[lrows[i]];
465     }
466     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
467     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
468   }
469 
470   /* actually zap the local rows */
471   ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
472   if (diag != 0.0) {
473     Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data;
474     PetscInt     m   = ll->lda, i;
475 
476     for (i=0; i<slen; i++) {
477       ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag;
478     }
479   }
480   ierr = PetscFree(lrows);CHKERRQ(ierr);
481 
482   /* wait on sends */
483   if (nsends) {
484     ierr = PetscMalloc1(nsends,&send_status);CHKERRQ(ierr);
485     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
486     ierr = PetscFree(send_status);CHKERRQ(ierr);
487   }
488   ierr = PetscFree(send_waits);CHKERRQ(ierr);
489   ierr = PetscFree(svalues);CHKERRQ(ierr);
490   PetscFunctionReturn(0);
491 }
492 
493 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec);
494 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec);
495 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec);
496 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec);
497 
498 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
499 {
500   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
501   PetscErrorCode ierr;
502 
503   PetscFunctionBegin;
504   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
505   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
506   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
507   PetscFunctionReturn(0);
508 }
509 
510 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
511 {
512   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
513   PetscErrorCode ierr;
514 
515   PetscFunctionBegin;
516   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
517   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
518   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
519   PetscFunctionReturn(0);
520 }
521 
522 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
523 {
524   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
525   PetscErrorCode ierr;
526   PetscScalar    zero = 0.0;
527 
528   PetscFunctionBegin;
529   ierr = VecSet(yy,zero);CHKERRQ(ierr);
530   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
531   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
532   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
533   PetscFunctionReturn(0);
534 }
535 
536 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
537 {
538   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
539   PetscErrorCode ierr;
540 
541   PetscFunctionBegin;
542   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
543   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
544   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
545   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
546   PetscFunctionReturn(0);
547 }
548 
549 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
550 {
551   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
552   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
553   PetscErrorCode ierr;
554   PetscInt       len,i,n,m = A->rmap->n,radd;
555   PetscScalar    *x,zero = 0.0;
556 
557   PetscFunctionBegin;
558   ierr = VecSet(v,zero);CHKERRQ(ierr);
559   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
560   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
561   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
562   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
563   radd = A->rmap->rstart*m;
564   for (i=0; i<len; i++) {
565     x[i] = aloc->v[radd + i*m + i];
566   }
567   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
568   PetscFunctionReturn(0);
569 }
570 
571 PetscErrorCode MatDestroy_MPIDense(Mat mat)
572 {
573   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
574   PetscErrorCode ierr;
575 
576   PetscFunctionBegin;
577 #if defined(PETSC_USE_LOG)
578   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
579 #endif
580   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
581   ierr = MatDestroy(&mdn->A);CHKERRQ(ierr);
582   ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr);
583   ierr = VecScatterDestroy(&mdn->Mvctx);CHKERRQ(ierr);
584 
585   ierr = PetscFree(mat->data);CHKERRQ(ierr);
586   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
587 
588   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL);CHKERRQ(ierr);
589   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr);
590   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr);
591   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL);CHKERRQ(ierr);
592   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL);CHKERRQ(ierr);
593   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);CHKERRQ(ierr);
594   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);CHKERRQ(ierr);
595 #if defined(PETSC_HAVE_ELEMENTAL)
596   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr);
597 #endif
598   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr);
599   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
600   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
601   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
602   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
603   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
604   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
605   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL);CHKERRQ(ierr);
606   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL);CHKERRQ(ierr);
607   PetscFunctionReturn(0);
608 }
609 
610 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
611 {
612   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
613   PetscErrorCode    ierr;
614   PetscViewerFormat format;
615   int               fd;
616   PetscInt          header[4],mmax,N = mat->cmap->N,i,j,m,k;
617   PetscMPIInt       rank,tag  = ((PetscObject)viewer)->tag,size;
618   PetscScalar       *work,*v,*vv;
619   Mat_SeqDense      *a = (Mat_SeqDense*)mdn->A->data;
620 
621   PetscFunctionBegin;
622   if (mdn->size == 1) {
623     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
624   } else {
625     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
626     ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
627     ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
628 
629     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
630     if (format == PETSC_VIEWER_NATIVE) {
631 
632       if (!rank) {
633         /* store the matrix as a dense matrix */
634         header[0] = MAT_FILE_CLASSID;
635         header[1] = mat->rmap->N;
636         header[2] = N;
637         header[3] = MATRIX_BINARY_FORMAT_DENSE;
638         ierr      = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
639 
640         /* get largest work array needed for transposing array */
641         mmax = mat->rmap->n;
642         for (i=1; i<size; i++) {
643           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
644         }
645         ierr = PetscMalloc1(mmax*N,&work);CHKERRQ(ierr);
646 
647         /* write out local array, by rows */
648         m = mat->rmap->n;
649         v = a->v;
650         for (j=0; j<N; j++) {
651           for (i=0; i<m; i++) {
652             work[j + i*N] = *v++;
653           }
654         }
655         ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
656         /* get largest work array to receive messages from other processes, excludes process zero */
657         mmax = 0;
658         for (i=1; i<size; i++) {
659           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
660         }
661         ierr = PetscMalloc1(mmax*N,&vv);CHKERRQ(ierr);
662         for (k = 1; k < size; k++) {
663           v    = vv;
664           m    = mat->rmap->range[k+1] - mat->rmap->range[k];
665           ierr = MPIULong_Recv(v,m*N,MPIU_SCALAR,k,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
666 
667           for (j = 0; j < N; j++) {
668             for (i = 0; i < m; i++) {
669               work[j + i*N] = *v++;
670             }
671           }
672           ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
673         }
674         ierr = PetscFree(work);CHKERRQ(ierr);
675         ierr = PetscFree(vv);CHKERRQ(ierr);
676       } else {
677         ierr = MPIULong_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
678       }
679     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerPushFormat(viewer,PETSC_VIEWER_NATIVE)");
680   }
681   PetscFunctionReturn(0);
682 }
683 
684 extern PetscErrorCode MatView_SeqDense(Mat,PetscViewer);
685 #include <petscdraw.h>
686 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
687 {
688   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
689   PetscErrorCode    ierr;
690   PetscMPIInt       rank = mdn->rank;
691   PetscViewerType   vtype;
692   PetscBool         iascii,isdraw;
693   PetscViewer       sviewer;
694   PetscViewerFormat format;
695 
696   PetscFunctionBegin;
697   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
698   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
699   if (iascii) {
700     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
701     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
702     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
703       MatInfo info;
704       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
705       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
706       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
707       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
708       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
709       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
710       PetscFunctionReturn(0);
711     } else if (format == PETSC_VIEWER_ASCII_INFO) {
712       PetscFunctionReturn(0);
713     }
714   } else if (isdraw) {
715     PetscDraw draw;
716     PetscBool isnull;
717 
718     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
719     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
720     if (isnull) PetscFunctionReturn(0);
721   }
722 
723   {
724     /* assemble the entire matrix onto first processor. */
725     Mat         A;
726     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
727     PetscInt    *cols;
728     PetscScalar *vals;
729 
730     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
731     if (!rank) {
732       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
733     } else {
734       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
735     }
736     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
737     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
738     ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr);
739     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
740 
741     /* Copy the matrix ... This isn't the most efficient means,
742        but it's quick for now */
743     A->insertmode = INSERT_VALUES;
744 
745     row = mat->rmap->rstart;
746     m   = mdn->A->rmap->n;
747     for (i=0; i<m; i++) {
748       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
749       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
750       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
751       row++;
752     }
753 
754     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
755     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
756     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
757     if (!rank) {
758       ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
759       ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
760     }
761     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
762     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
763     ierr = MatDestroy(&A);CHKERRQ(ierr);
764   }
765   PetscFunctionReturn(0);
766 }
767 
768 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
769 {
770   PetscErrorCode ierr;
771   PetscBool      iascii,isbinary,isdraw,issocket;
772 
773   PetscFunctionBegin;
774   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
775   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
776   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
777   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
778 
779   if (iascii || issocket || isdraw) {
780     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
781   } else if (isbinary) {
782     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
783   }
784   PetscFunctionReturn(0);
785 }
786 
787 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
788 {
789   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
790   Mat            mdn  = mat->A;
791   PetscErrorCode ierr;
792   PetscReal      isend[5],irecv[5];
793 
794   PetscFunctionBegin;
795   info->block_size = 1.0;
796 
797   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
798 
799   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
800   isend[3] = info->memory;  isend[4] = info->mallocs;
801   if (flag == MAT_LOCAL) {
802     info->nz_used      = isend[0];
803     info->nz_allocated = isend[1];
804     info->nz_unneeded  = isend[2];
805     info->memory       = isend[3];
806     info->mallocs      = isend[4];
807   } else if (flag == MAT_GLOBAL_MAX) {
808     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
809 
810     info->nz_used      = irecv[0];
811     info->nz_allocated = irecv[1];
812     info->nz_unneeded  = irecv[2];
813     info->memory       = irecv[3];
814     info->mallocs      = irecv[4];
815   } else if (flag == MAT_GLOBAL_SUM) {
816     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
817 
818     info->nz_used      = irecv[0];
819     info->nz_allocated = irecv[1];
820     info->nz_unneeded  = irecv[2];
821     info->memory       = irecv[3];
822     info->mallocs      = irecv[4];
823   }
824   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
825   info->fill_ratio_needed = 0;
826   info->factor_mallocs    = 0;
827   PetscFunctionReturn(0);
828 }
829 
830 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
831 {
832   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
833   PetscErrorCode ierr;
834 
835   PetscFunctionBegin;
836   switch (op) {
837   case MAT_NEW_NONZERO_LOCATIONS:
838   case MAT_NEW_NONZERO_LOCATION_ERR:
839   case MAT_NEW_NONZERO_ALLOCATION_ERR:
840     MatCheckPreallocated(A,1);
841     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
842     break;
843   case MAT_ROW_ORIENTED:
844     MatCheckPreallocated(A,1);
845     a->roworiented = flg;
846     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
847     break;
848   case MAT_NEW_DIAGONALS:
849   case MAT_KEEP_NONZERO_PATTERN:
850   case MAT_USE_HASH_TABLE:
851     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
852     break;
853   case MAT_IGNORE_OFF_PROC_ENTRIES:
854     a->donotstash = flg;
855     break;
856   case MAT_SYMMETRIC:
857   case MAT_STRUCTURALLY_SYMMETRIC:
858   case MAT_HERMITIAN:
859   case MAT_SYMMETRY_ETERNAL:
860   case MAT_IGNORE_LOWER_TRIANGULAR:
861   case MAT_IGNORE_ZERO_ENTRIES:
862     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
863     break;
864   default:
865     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
866   }
867   PetscFunctionReturn(0);
868 }
869 
870 
871 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
872 {
873   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
874   Mat_SeqDense      *mat = (Mat_SeqDense*)mdn->A->data;
875   const PetscScalar *l,*r;
876   PetscScalar       x,*v;
877   PetscErrorCode    ierr;
878   PetscInt          i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n;
879 
880   PetscFunctionBegin;
881   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
882   if (ll) {
883     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
884     if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
885     ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr);
886     for (i=0; i<m; i++) {
887       x = l[i];
888       v = mat->v + i;
889       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
890     }
891     ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr);
892     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
893   }
894   if (rr) {
895     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
896     if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
897     ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
898     ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
899     ierr = VecGetArrayRead(mdn->lvec,&r);CHKERRQ(ierr);
900     for (i=0; i<n; i++) {
901       x = r[i];
902       v = mat->v + i*m;
903       for (j=0; j<m; j++) (*v++) *= x;
904     }
905     ierr = VecRestoreArrayRead(mdn->lvec,&r);CHKERRQ(ierr);
906     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
907   }
908   PetscFunctionReturn(0);
909 }
910 
911 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
912 {
913   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
914   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
915   PetscErrorCode ierr;
916   PetscInt       i,j;
917   PetscReal      sum = 0.0;
918   PetscScalar    *v  = mat->v;
919 
920   PetscFunctionBegin;
921   if (mdn->size == 1) {
922     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
923   } else {
924     if (type == NORM_FROBENIUS) {
925       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
926         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
927       }
928       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
929       *nrm = PetscSqrtReal(*nrm);
930       ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr);
931     } else if (type == NORM_1) {
932       PetscReal *tmp,*tmp2;
933       ierr = PetscCalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr);
934       *nrm = 0.0;
935       v    = mat->v;
936       for (j=0; j<mdn->A->cmap->n; j++) {
937         for (i=0; i<mdn->A->rmap->n; i++) {
938           tmp[j] += PetscAbsScalar(*v);  v++;
939         }
940       }
941       ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
942       for (j=0; j<A->cmap->N; j++) {
943         if (tmp2[j] > *nrm) *nrm = tmp2[j];
944       }
945       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
946       ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr);
947     } else if (type == NORM_INFINITY) { /* max row norm */
948       PetscReal ntemp;
949       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
950       ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
951     } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
952   }
953   PetscFunctionReturn(0);
954 }
955 
956 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
957 {
958   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
959   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
960   Mat            B;
961   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
962   PetscErrorCode ierr;
963   PetscInt       j,i;
964   PetscScalar    *v;
965 
966   PetscFunctionBegin;
967   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
968     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
969     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
970     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
971     ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr);
972   } else {
973     B = *matout;
974   }
975 
976   m    = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v;
977   ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr);
978   for (i=0; i<m; i++) rwork[i] = rstart + i;
979   for (j=0; j<n; j++) {
980     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
981     v   += m;
982   }
983   ierr = PetscFree(rwork);CHKERRQ(ierr);
984   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
985   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
986   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
987     *matout = B;
988   } else {
989     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
990   }
991   PetscFunctionReturn(0);
992 }
993 
994 
995 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
996 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
997 
998 PetscErrorCode MatSetUp_MPIDense(Mat A)
999 {
1000   PetscErrorCode ierr;
1001 
1002   PetscFunctionBegin;
1003   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
1004   PetscFunctionReturn(0);
1005 }
1006 
1007 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
1008 {
1009   PetscErrorCode ierr;
1010   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
1011 
1012   PetscFunctionBegin;
1013   ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr);
1014   ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1015   PetscFunctionReturn(0);
1016 }
1017 
1018 PetscErrorCode  MatConjugate_MPIDense(Mat mat)
1019 {
1020   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
1021   PetscErrorCode ierr;
1022 
1023   PetscFunctionBegin;
1024   ierr = MatConjugate(a->A);CHKERRQ(ierr);
1025   PetscFunctionReturn(0);
1026 }
1027 
1028 PetscErrorCode MatRealPart_MPIDense(Mat A)
1029 {
1030   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1031   PetscErrorCode ierr;
1032 
1033   PetscFunctionBegin;
1034   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1035   PetscFunctionReturn(0);
1036 }
1037 
1038 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1039 {
1040   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1041   PetscErrorCode ierr;
1042 
1043   PetscFunctionBegin;
1044   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1045   PetscFunctionReturn(0);
1046 }
1047 
1048 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col)
1049 {
1050   PetscErrorCode    ierr;
1051   PetscScalar       *x;
1052   const PetscScalar *a;
1053   PetscInt          lda;
1054 
1055   PetscFunctionBegin;
1056   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1057   ierr = MatDenseGetLDA(A,&lda);CHKERRQ(ierr);
1058   ierr = MatDenseGetArrayRead(A,&a);CHKERRQ(ierr);
1059   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1060   ierr = PetscArraycpy(x,a+col*lda,A->rmap->n);CHKERRQ(ierr);
1061   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1062   ierr = MatDenseGetArrayRead(A,&a);CHKERRQ(ierr);
1063   PetscFunctionReturn(0);
1064 }
1065 
1066 extern PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*);
1067 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms)
1068 {
1069   PetscErrorCode ierr;
1070   PetscInt       i,n;
1071   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
1072   PetscReal      *work;
1073 
1074   PetscFunctionBegin;
1075   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
1076   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
1077   ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr);
1078   if (type == NORM_2) {
1079     for (i=0; i<n; i++) work[i] *= work[i];
1080   }
1081   if (type == NORM_INFINITY) {
1082     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
1083   } else {
1084     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
1085   }
1086   ierr = PetscFree(work);CHKERRQ(ierr);
1087   if (type == NORM_2) {
1088     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
1089   }
1090   PetscFunctionReturn(0);
1091 }
1092 
1093 static PetscErrorCode  MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1094 {
1095   Mat_MPIDense   *d = (Mat_MPIDense*)x->data;
1096   PetscErrorCode ierr;
1097   PetscScalar    *a;
1098   PetscInt       m,n,i;
1099 
1100   PetscFunctionBegin;
1101   ierr = MatGetSize(d->A,&m,&n);CHKERRQ(ierr);
1102   ierr = MatDenseGetArray(d->A,&a);CHKERRQ(ierr);
1103   for (i=0; i<m*n; i++) {
1104     ierr = PetscRandomGetValue(rctx,a+i);CHKERRQ(ierr);
1105   }
1106   ierr = MatDenseRestoreArray(d->A,&a);CHKERRQ(ierr);
1107   PetscFunctionReturn(0);
1108 }
1109 
1110 extern PetscErrorCode MatMatMultNumeric_MPIDense(Mat A,Mat,Mat);
1111 
1112 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool  *missing,PetscInt *d)
1113 {
1114   PetscFunctionBegin;
1115   *missing = PETSC_FALSE;
1116   PetscFunctionReturn(0);
1117 }
1118 
1119 static PetscErrorCode MatMatTransposeMult_MPIDense_MPIDense(Mat,Mat,MatReuse,PetscReal,Mat*);
1120 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat*);
1121 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);
1122 
1123 /* -------------------------------------------------------------------*/
1124 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1125                                         MatGetRow_MPIDense,
1126                                         MatRestoreRow_MPIDense,
1127                                         MatMult_MPIDense,
1128                                 /*  4*/ MatMultAdd_MPIDense,
1129                                         MatMultTranspose_MPIDense,
1130                                         MatMultTransposeAdd_MPIDense,
1131                                         0,
1132                                         0,
1133                                         0,
1134                                 /* 10*/ 0,
1135                                         0,
1136                                         0,
1137                                         0,
1138                                         MatTranspose_MPIDense,
1139                                 /* 15*/ MatGetInfo_MPIDense,
1140                                         MatEqual_MPIDense,
1141                                         MatGetDiagonal_MPIDense,
1142                                         MatDiagonalScale_MPIDense,
1143                                         MatNorm_MPIDense,
1144                                 /* 20*/ MatAssemblyBegin_MPIDense,
1145                                         MatAssemblyEnd_MPIDense,
1146                                         MatSetOption_MPIDense,
1147                                         MatZeroEntries_MPIDense,
1148                                 /* 24*/ MatZeroRows_MPIDense,
1149                                         0,
1150                                         0,
1151                                         0,
1152                                         0,
1153                                 /* 29*/ MatSetUp_MPIDense,
1154                                         0,
1155                                         0,
1156                                         MatGetDiagonalBlock_MPIDense,
1157                                         0,
1158                                 /* 34*/ MatDuplicate_MPIDense,
1159                                         0,
1160                                         0,
1161                                         0,
1162                                         0,
1163                                 /* 39*/ MatAXPY_MPIDense,
1164                                         MatCreateSubMatrices_MPIDense,
1165                                         0,
1166                                         MatGetValues_MPIDense,
1167                                         0,
1168                                 /* 44*/ 0,
1169                                         MatScale_MPIDense,
1170                                         MatShift_Basic,
1171                                         0,
1172                                         0,
1173                                 /* 49*/ MatSetRandom_MPIDense,
1174                                         0,
1175                                         0,
1176                                         0,
1177                                         0,
1178                                 /* 54*/ 0,
1179                                         0,
1180                                         0,
1181                                         0,
1182                                         0,
1183                                 /* 59*/ MatCreateSubMatrix_MPIDense,
1184                                         MatDestroy_MPIDense,
1185                                         MatView_MPIDense,
1186                                         0,
1187                                         0,
1188                                 /* 64*/ 0,
1189                                         0,
1190                                         0,
1191                                         0,
1192                                         0,
1193                                 /* 69*/ 0,
1194                                         0,
1195                                         0,
1196                                         0,
1197                                         0,
1198                                 /* 74*/ 0,
1199                                         0,
1200                                         0,
1201                                         0,
1202                                         0,
1203                                 /* 79*/ 0,
1204                                         0,
1205                                         0,
1206                                         0,
1207                                 /* 83*/ MatLoad_MPIDense,
1208                                         0,
1209                                         0,
1210                                         0,
1211                                         0,
1212                                         0,
1213 #if defined(PETSC_HAVE_ELEMENTAL)
1214                                 /* 89*/ MatMatMult_MPIDense_MPIDense,
1215                                         MatMatMultSymbolic_MPIDense_MPIDense,
1216 #else
1217                                 /* 89*/ 0,
1218                                         0,
1219 #endif
1220                                         MatMatMultNumeric_MPIDense,
1221                                         0,
1222                                         0,
1223                                 /* 94*/ 0,
1224                                         MatMatTransposeMult_MPIDense_MPIDense,
1225                                         MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1226                                         MatMatTransposeMultNumeric_MPIDense_MPIDense,
1227                                         0,
1228                                 /* 99*/ 0,
1229                                         0,
1230                                         0,
1231                                         MatConjugate_MPIDense,
1232                                         0,
1233                                 /*104*/ 0,
1234                                         MatRealPart_MPIDense,
1235                                         MatImaginaryPart_MPIDense,
1236                                         0,
1237                                         0,
1238                                 /*109*/ 0,
1239                                         0,
1240                                         0,
1241                                         MatGetColumnVector_MPIDense,
1242                                         MatMissingDiagonal_MPIDense,
1243                                 /*114*/ 0,
1244                                         0,
1245                                         0,
1246                                         0,
1247                                         0,
1248                                 /*119*/ 0,
1249                                         0,
1250                                         0,
1251                                         0,
1252                                         0,
1253                                 /*124*/ 0,
1254                                         MatGetColumnNorms_MPIDense,
1255                                         0,
1256                                         0,
1257                                         0,
1258                                 /*129*/ 0,
1259                                         MatTransposeMatMult_MPIDense_MPIDense,
1260                                         MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1261                                         MatTransposeMatMultNumeric_MPIDense_MPIDense,
1262                                         0,
1263                                 /*134*/ 0,
1264                                         0,
1265                                         0,
1266                                         0,
1267                                         0,
1268                                 /*139*/ 0,
1269                                         0,
1270                                         0,
1271                                         0,
1272                                         0,
1273                                 /*144*/ MatCreateMPIMatConcatenateSeqMat_MPIDense
1274 };
1275 
1276 PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1277 {
1278   Mat_MPIDense   *a;
1279   PetscErrorCode ierr;
1280 
1281   PetscFunctionBegin;
1282   if (mat->preallocated) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Matrix has already been preallocated");
1283   mat->preallocated = PETSC_TRUE;
1284   /* Note:  For now, when data is specified above, this assumes the user correctly
1285    allocates the local dense storage space.  We should add error checking. */
1286 
1287   a       = (Mat_MPIDense*)mat->data;
1288   ierr    = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
1289   ierr    = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
1290   a->nvec = mat->cmap->n;
1291 
1292   ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1293   ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr);
1294   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
1295   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1296   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1297   PetscFunctionReturn(0);
1298 }
1299 
1300 #if defined(PETSC_HAVE_ELEMENTAL)
1301 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1302 {
1303   Mat            mat_elemental;
1304   PetscErrorCode ierr;
1305   PetscScalar    *v;
1306   PetscInt       m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols;
1307 
1308   PetscFunctionBegin;
1309   if (reuse == MAT_REUSE_MATRIX) {
1310     mat_elemental = *newmat;
1311     ierr = MatZeroEntries(*newmat);CHKERRQ(ierr);
1312   } else {
1313     ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr);
1314     ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1315     ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr);
1316     ierr = MatSetUp(mat_elemental);CHKERRQ(ierr);
1317     ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
1318   }
1319 
1320   ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr);
1321   for (i=0; i<N; i++) cols[i] = i;
1322   for (i=0; i<m; i++) rows[i] = rstart + i;
1323 
1324   /* PETSc-Elemental interaface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1325   ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr);
1326   ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr);
1327   ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1328   ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1329   ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr);
1330   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1331 
1332   if (reuse == MAT_INPLACE_MATRIX) {
1333     ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr);
1334   } else {
1335     *newmat = mat_elemental;
1336   }
1337   PetscFunctionReturn(0);
1338 }
1339 #endif
1340 
1341 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals)
1342 {
1343   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1344   PetscErrorCode ierr;
1345 
1346   PetscFunctionBegin;
1347   ierr = MatDenseGetColumn(mat->A,col,vals);CHKERRQ(ierr);
1348   PetscFunctionReturn(0);
1349 }
1350 
1351 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals)
1352 {
1353   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1354   PetscErrorCode ierr;
1355 
1356   PetscFunctionBegin;
1357   ierr = MatDenseRestoreColumn(mat->A,vals);CHKERRQ(ierr);
1358   PetscFunctionReturn(0);
1359 }
1360 
1361 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
1362 {
1363   PetscErrorCode ierr;
1364   Mat_MPIDense   *mat;
1365   PetscInt       m,nloc,N;
1366 
1367   PetscFunctionBegin;
1368   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
1369   ierr = MatGetLocalSize(inmat,NULL,&nloc);CHKERRQ(ierr);
1370   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1371     PetscInt sum;
1372 
1373     if (n == PETSC_DECIDE) {
1374       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
1375     }
1376     /* Check sum(n) = N */
1377     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
1378     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
1379 
1380     ierr = MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat);CHKERRQ(ierr);
1381   }
1382 
1383   /* numeric phase */
1384   mat = (Mat_MPIDense*)(*outmat)->data;
1385   ierr = MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1386   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1387   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1388   PetscFunctionReturn(0);
1389 }
1390 
1391 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1392 {
1393   Mat_MPIDense   *a;
1394   PetscErrorCode ierr;
1395 
1396   PetscFunctionBegin;
1397   ierr      = PetscNewLog(mat,&a);CHKERRQ(ierr);
1398   mat->data = (void*)a;
1399   ierr      = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1400 
1401   mat->insertmode = NOT_SET_VALUES;
1402   ierr            = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);CHKERRQ(ierr);
1403   ierr            = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);CHKERRQ(ierr);
1404 
1405   /* build cache for off array entries formed */
1406   a->donotstash = PETSC_FALSE;
1407 
1408   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr);
1409 
1410   /* stuff used for matrix vector multiply */
1411   a->lvec        = 0;
1412   a->Mvctx       = 0;
1413   a->roworiented = PETSC_TRUE;
1414 
1415   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense);CHKERRQ(ierr);
1416   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr);
1417   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr);
1418   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense);CHKERRQ(ierr);
1419   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense);CHKERRQ(ierr);
1420   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);CHKERRQ(ierr);
1421   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);CHKERRQ(ierr);
1422 #if defined(PETSC_HAVE_ELEMENTAL)
1423   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr);
1424 #endif
1425   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1426   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1427   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1428   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1429 
1430   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",MatTransposeMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1431   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",MatTransposeMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1432   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",MatTransposeMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1433   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense);CHKERRQ(ierr);
1434   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense);CHKERRQ(ierr);
1435   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1436   PetscFunctionReturn(0);
1437 }
1438 
1439 /*MC
1440    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1441 
1442    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1443    and MATMPIDENSE otherwise.
1444 
1445    Options Database Keys:
1446 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1447 
1448   Level: beginner
1449 
1450 
1451 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1452 M*/
1453 
1454 /*@C
1455    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1456 
1457    Not collective
1458 
1459    Input Parameters:
1460 .  B - the matrix
1461 -  data - optional location of matrix data.  Set data=NULL for PETSc
1462    to control all matrix memory allocation.
1463 
1464    Notes:
1465    The dense format is fully compatible with standard Fortran 77
1466    storage by columns.
1467 
1468    The data input variable is intended primarily for Fortran programmers
1469    who wish to allocate their own matrix memory space.  Most users should
1470    set data=NULL.
1471 
1472    Level: intermediate
1473 
1474 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1475 @*/
1476 PetscErrorCode  MatMPIDenseSetPreallocation(Mat B,PetscScalar *data)
1477 {
1478   PetscErrorCode ierr;
1479 
1480   PetscFunctionBegin;
1481   ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr);
1482   PetscFunctionReturn(0);
1483 }
1484 
1485 /*@
1486    MatDensePlaceArray - Allows one to replace the array in a dense array with an
1487    array provided by the user. This is useful to avoid copying an array
1488    into a matrix
1489 
1490    Not Collective
1491 
1492    Input Parameters:
1493 +  mat - the matrix
1494 -  array - the array in column major order
1495 
1496    Notes:
1497    You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be
1498    freed when the matrix is destroyed.
1499 
1500    Level: developer
1501 
1502 .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
1503 
1504 @*/
1505 PetscErrorCode  MatDensePlaceArray(Mat mat,const PetscScalar array[])
1506 {
1507   PetscErrorCode ierr;
1508   PetscFunctionBegin;
1509   ierr = PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
1510   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
1511   PetscFunctionReturn(0);
1512 }
1513 
1514 /*@
1515    MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray()
1516 
1517    Not Collective
1518 
1519    Input Parameters:
1520 .  mat - the matrix
1521 
1522    Notes:
1523    You can only call this after a call to MatDensePlaceArray()
1524 
1525    Level: developer
1526 
1527 .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
1528 
1529 @*/
1530 PetscErrorCode  MatDenseResetArray(Mat mat)
1531 {
1532   PetscErrorCode ierr;
1533   PetscFunctionBegin;
1534   ierr = PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));CHKERRQ(ierr);
1535   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
1536   PetscFunctionReturn(0);
1537 }
1538 
1539 /*@C
1540    MatCreateDense - Creates a parallel matrix in dense format.
1541 
1542    Collective
1543 
1544    Input Parameters:
1545 +  comm - MPI communicator
1546 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1547 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1548 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1549 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1550 -  data - optional location of matrix data.  Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1551    to control all matrix memory allocation.
1552 
1553    Output Parameter:
1554 .  A - the matrix
1555 
1556    Notes:
1557    The dense format is fully compatible with standard Fortran 77
1558    storage by columns.
1559 
1560    The data input variable is intended primarily for Fortran programmers
1561    who wish to allocate their own matrix memory space.  Most users should
1562    set data=NULL (PETSC_NULL_SCALAR for Fortran users).
1563 
1564    The user MUST specify either the local or global matrix dimensions
1565    (possibly both).
1566 
1567    Level: intermediate
1568 
1569 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1570 @*/
1571 PetscErrorCode  MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1572 {
1573   PetscErrorCode ierr;
1574   PetscMPIInt    size;
1575 
1576   PetscFunctionBegin;
1577   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1578   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1579   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1580   if (size > 1) {
1581     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1582     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1583     if (data) {  /* user provided data array, so no need to assemble */
1584       ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr);
1585       (*A)->assembled = PETSC_TRUE;
1586     }
1587   } else {
1588     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1589     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1590   }
1591   PetscFunctionReturn(0);
1592 }
1593 
1594 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1595 {
1596   Mat            mat;
1597   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
1598   PetscErrorCode ierr;
1599 
1600   PetscFunctionBegin;
1601   *newmat = 0;
1602   ierr    = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr);
1603   ierr    = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1604   ierr    = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1605   a       = (Mat_MPIDense*)mat->data;
1606 
1607   mat->factortype   = A->factortype;
1608   mat->assembled    = PETSC_TRUE;
1609   mat->preallocated = PETSC_TRUE;
1610 
1611   a->size         = oldmat->size;
1612   a->rank         = oldmat->rank;
1613   mat->insertmode = NOT_SET_VALUES;
1614   a->nvec         = oldmat->nvec;
1615   a->donotstash   = oldmat->donotstash;
1616 
1617   ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr);
1618   ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr);
1619 
1620   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1621   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1622   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1623 
1624   *newmat = mat;
1625   PetscFunctionReturn(0);
1626 }
1627 
1628 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat)
1629 {
1630   PetscErrorCode ierr;
1631   PetscMPIInt    rank,size;
1632   const PetscInt *rowners;
1633   PetscInt       i,m,n,nz,j,mMax;
1634   PetscScalar    *array,*vals,*vals_ptr;
1635   Mat_MPIDense   *a = (Mat_MPIDense*)newmat->data;
1636 
1637   PetscFunctionBegin;
1638   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1639   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1640 
1641   /* determine ownership of rows and columns */
1642   m = (newmat->rmap->n < 0) ? PETSC_DECIDE : newmat->rmap->n;
1643   n = (newmat->cmap->n < 0) ? PETSC_DECIDE : newmat->cmap->n;
1644 
1645   ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr);
1646   if (!a->A) {
1647     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
1648   }
1649   ierr = MatDenseGetArray(newmat,&array);CHKERRQ(ierr);
1650   ierr = MatGetLocalSize(newmat,&m,NULL);CHKERRQ(ierr);
1651   ierr = MatGetOwnershipRanges(newmat,&rowners);CHKERRQ(ierr);
1652   ierr = MPI_Reduce(&m,&mMax,1,MPIU_INT,MPI_MAX,0,comm);CHKERRQ(ierr);
1653   if (!rank) {
1654     ierr = PetscMalloc1(mMax*N,&vals);CHKERRQ(ierr);
1655 
1656     /* read in my part of the matrix numerical values  */
1657     ierr = PetscBinaryRead(fd,vals,m*N,NULL,PETSC_SCALAR);CHKERRQ(ierr);
1658 
1659     /* insert into matrix-by row (this is why cannot directly read into array */
1660     vals_ptr = vals;
1661     for (i=0; i<m; i++) {
1662       for (j=0; j<N; j++) {
1663         array[i + j*m] = *vals_ptr++;
1664       }
1665     }
1666 
1667     /* read in other processors and ship out */
1668     for (i=1; i<size; i++) {
1669       nz   = (rowners[i+1] - rowners[i])*N;
1670       ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr);
1671       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr);
1672     }
1673   } else {
1674     /* receive numeric values */
1675     ierr = PetscMalloc1(m*N,&vals);CHKERRQ(ierr);
1676 
1677     /* receive message of values*/
1678     ierr = MPIULong_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr);
1679 
1680     /* insert into matrix-by row (this is why cannot directly read into array */
1681     vals_ptr = vals;
1682     for (i=0; i<m; i++) {
1683       for (j=0; j<N; j++) {
1684         array[i + j*m] = *vals_ptr++;
1685       }
1686     }
1687   }
1688   ierr = MatDenseRestoreArray(newmat,&array);CHKERRQ(ierr);
1689   ierr = PetscFree(vals);CHKERRQ(ierr);
1690   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1691   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1692   PetscFunctionReturn(0);
1693 }
1694 
1695 static PetscErrorCode MatLoad_MPIDense_Binary(Mat newmat,PetscViewer viewer)
1696 {
1697   Mat_MPIDense   *a;
1698   PetscScalar    *vals,*svals;
1699   MPI_Comm       comm;
1700   MPI_Status     status;
1701   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,n,maxnz;
1702   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1703   PetscInt       *ourlens,*procsnz = 0,jj,*mycols,*smycols;
1704   PetscInt       i,nz,j,rstart,rend;
1705   int            fd;
1706   PetscErrorCode ierr;
1707 
1708   PetscFunctionBegin;
1709   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
1710   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1711   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1712   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1713   if (!rank) {
1714     ierr = PetscBinaryRead(fd,(char*)header,4,NULL,PETSC_INT);CHKERRQ(ierr);
1715     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1716   }
1717   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1718   M    = header[1]; N = header[2]; nz = header[3];
1719 
1720   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
1721   if (newmat->rmap->N < 0) newmat->rmap->N = M;
1722   if (newmat->cmap->N < 0) newmat->cmap->N = N;
1723 
1724   if (newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",M,newmat->rmap->N);
1725   if (newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",N,newmat->cmap->N);
1726 
1727   /*
1728        Handle case where matrix is stored on disk as a dense matrix
1729   */
1730   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1731     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr);
1732     PetscFunctionReturn(0);
1733   }
1734 
1735   /* determine ownership of all rows */
1736   if (newmat->rmap->n < 0) {
1737     ierr = PetscMPIIntCast(M/size + ((M % size) > rank),&m);CHKERRQ(ierr);
1738   } else {
1739     ierr = PetscMPIIntCast(newmat->rmap->n,&m);CHKERRQ(ierr);
1740   }
1741   if (newmat->cmap->n < 0) {
1742     n = PETSC_DECIDE;
1743   } else {
1744     ierr = PetscMPIIntCast(newmat->cmap->n,&n);CHKERRQ(ierr);
1745   }
1746 
1747   ierr       = PetscMalloc1(size+2,&rowners);CHKERRQ(ierr);
1748   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1749   rowners[0] = 0;
1750   for (i=2; i<=size; i++) {
1751     rowners[i] += rowners[i-1];
1752   }
1753   rstart = rowners[rank];
1754   rend   = rowners[rank+1];
1755 
1756   /* distribute row lengths to all processors */
1757   ierr = PetscMalloc1(rend-rstart,&ourlens);CHKERRQ(ierr);
1758   if (!rank) {
1759     ierr = PetscMalloc1(M,&rowlengths);CHKERRQ(ierr);
1760     ierr = PetscBinaryRead(fd,rowlengths,M,NULL,PETSC_INT);CHKERRQ(ierr);
1761     ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr);
1762     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1763     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1764     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1765   } else {
1766     ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1767   }
1768 
1769   if (!rank) {
1770     /* calculate the number of nonzeros on each processor */
1771     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
1772     for (i=0; i<size; i++) {
1773       for (j=rowners[i]; j< rowners[i+1]; j++) {
1774         procsnz[i] += rowlengths[j];
1775       }
1776     }
1777     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1778 
1779     /* determine max buffer needed and allocate it */
1780     maxnz = 0;
1781     for (i=0; i<size; i++) {
1782       maxnz = PetscMax(maxnz,procsnz[i]);
1783     }
1784     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
1785 
1786     /* read in my part of the matrix column indices  */
1787     nz   = procsnz[0];
1788     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
1789     ierr = PetscBinaryRead(fd,mycols,nz,NULL,PETSC_INT);CHKERRQ(ierr);
1790 
1791     /* read in every one elses and ship off */
1792     for (i=1; i<size; i++) {
1793       nz   = procsnz[i];
1794       ierr = PetscBinaryRead(fd,cols,nz,NULL,PETSC_INT);CHKERRQ(ierr);
1795       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1796     }
1797     ierr = PetscFree(cols);CHKERRQ(ierr);
1798   } else {
1799     /* determine buffer space needed for message */
1800     nz = 0;
1801     for (i=0; i<m; i++) {
1802       nz += ourlens[i];
1803     }
1804     ierr = PetscMalloc1(nz+1,&mycols);CHKERRQ(ierr);
1805 
1806     /* receive message of column indices*/
1807     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1808     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1809     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1810   }
1811 
1812   ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr);
1813   a = (Mat_MPIDense*)newmat->data;
1814   if (!a->A) {
1815     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
1816   }
1817 
1818   if (!rank) {
1819     ierr = PetscMalloc1(maxnz,&vals);CHKERRQ(ierr);
1820 
1821     /* read in my part of the matrix numerical values  */
1822     nz   = procsnz[0];
1823     ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr);
1824 
1825     /* insert into matrix */
1826     jj      = rstart;
1827     smycols = mycols;
1828     svals   = vals;
1829     for (i=0; i<m; i++) {
1830       ierr     = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1831       smycols += ourlens[i];
1832       svals   += ourlens[i];
1833       jj++;
1834     }
1835 
1836     /* read in other processors and ship out */
1837     for (i=1; i<size; i++) {
1838       nz   = procsnz[i];
1839       ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr);
1840       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
1841     }
1842     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1843   } else {
1844     /* receive numeric values */
1845     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
1846 
1847     /* receive message of values*/
1848     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
1849     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1850     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1851 
1852     /* insert into matrix */
1853     jj      = rstart;
1854     smycols = mycols;
1855     svals   = vals;
1856     for (i=0; i<m; i++) {
1857       ierr     = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1858       smycols += ourlens[i];
1859       svals   += ourlens[i];
1860       jj++;
1861     }
1862   }
1863   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1864   ierr = PetscFree(vals);CHKERRQ(ierr);
1865   ierr = PetscFree(mycols);CHKERRQ(ierr);
1866   ierr = PetscFree(rowners);CHKERRQ(ierr);
1867 
1868   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1869   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1870   PetscFunctionReturn(0);
1871 }
1872 
1873 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
1874 {
1875   PetscErrorCode ierr;
1876   PetscBool      isbinary;
1877 #if defined(PETSC_HAVE_HDF5)
1878   PetscBool      ishdf5;
1879 #endif
1880 
1881   PetscFunctionBegin;
1882   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
1883   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
1884   /* force binary viewer to load .info file if it has not yet done so */
1885   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1886   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1887 #if defined(PETSC_HAVE_HDF5)
1888   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
1889 #endif
1890   if (isbinary) {
1891     ierr = MatLoad_MPIDense_Binary(newMat,viewer);CHKERRQ(ierr);
1892 #if defined(PETSC_HAVE_HDF5)
1893   } else if (ishdf5) {
1894     ierr = MatLoad_Dense_HDF5(newMat,viewer);CHKERRQ(ierr);
1895 #endif
1896   } else SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
1897   PetscFunctionReturn(0);
1898 }
1899 
1900 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool  *flag)
1901 {
1902   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
1903   Mat            a,b;
1904   PetscBool      flg;
1905   PetscErrorCode ierr;
1906 
1907   PetscFunctionBegin;
1908   a    = matA->A;
1909   b    = matB->A;
1910   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
1911   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1912   PetscFunctionReturn(0);
1913 }
1914 
1915 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(Mat A)
1916 {
1917   PetscErrorCode        ierr;
1918   Mat_MPIDense          *a = (Mat_MPIDense*)A->data;
1919   Mat_TransMatMultDense *atb = a->atbdense;
1920 
1921   PetscFunctionBegin;
1922   ierr = PetscFree3(atb->sendbuf,atb->atbarray,atb->recvcounts);CHKERRQ(ierr);
1923   ierr = (atb->destroy)(A);CHKERRQ(ierr);
1924   ierr = PetscFree(atb);CHKERRQ(ierr);
1925   PetscFunctionReturn(0);
1926 }
1927 
1928 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(Mat A)
1929 {
1930   PetscErrorCode        ierr;
1931   Mat_MPIDense          *a = (Mat_MPIDense*)A->data;
1932   Mat_MatTransMultDense *abt = a->abtdense;
1933 
1934   PetscFunctionBegin;
1935   ierr = PetscFree2(abt->buf[0],abt->buf[1]);CHKERRQ(ierr);
1936   ierr = PetscFree2(abt->recvcounts,abt->recvdispls);CHKERRQ(ierr);
1937   ierr = (abt->destroy)(A);CHKERRQ(ierr);
1938   ierr = PetscFree(abt);CHKERRQ(ierr);
1939   PetscFunctionReturn(0);
1940 }
1941 
1942 PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1943 {
1944   Mat_MPIDense   *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
1945   Mat_SeqDense   *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data;
1946   Mat_TransMatMultDense *atb = c->atbdense;
1947   PetscErrorCode ierr;
1948   MPI_Comm       comm;
1949   PetscMPIInt    rank,size,*recvcounts=atb->recvcounts;
1950   PetscScalar    *carray,*atbarray=atb->atbarray,*sendbuf=atb->sendbuf;
1951   PetscInt       i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j;
1952   PetscScalar    _DOne=1.0,_DZero=0.0;
1953   PetscBLASInt   am,an,bn,aN;
1954   const PetscInt *ranges;
1955 
1956   PetscFunctionBegin;
1957   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1958   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1959   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1960 
1961   /* compute atbarray = aseq^T * bseq */
1962   ierr = PetscBLASIntCast(a->A->cmap->n,&an);CHKERRQ(ierr);
1963   ierr = PetscBLASIntCast(b->A->cmap->n,&bn);CHKERRQ(ierr);
1964   ierr = PetscBLASIntCast(a->A->rmap->n,&am);CHKERRQ(ierr);
1965   ierr = PetscBLASIntCast(A->cmap->N,&aN);CHKERRQ(ierr);
1966   PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&an,&bn,&am,&_DOne,aseq->v,&aseq->lda,bseq->v,&bseq->lda,&_DZero,atbarray,&aN));
1967 
1968   ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr);
1969   for (i=0; i<size; i++) recvcounts[i] = (ranges[i+1] - ranges[i])*cN;
1970 
1971   /* arrange atbarray into sendbuf */
1972   k = 0;
1973   for (proc=0; proc<size; proc++) {
1974     for (j=0; j<cN; j++) {
1975       for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM];
1976     }
1977   }
1978   /* sum all atbarray to local values of C */
1979   ierr = MatDenseGetArray(c->A,&carray);CHKERRQ(ierr);
1980   ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRQ(ierr);
1981   ierr = MatDenseRestoreArray(c->A,&carray);CHKERRQ(ierr);
1982   PetscFunctionReturn(0);
1983 }
1984 
1985 PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1986 {
1987   PetscErrorCode        ierr;
1988   Mat                   Cdense;
1989   MPI_Comm              comm;
1990   PetscMPIInt           size;
1991   PetscInt              cm=A->cmap->n,cM,cN=B->cmap->N;
1992   Mat_MPIDense          *c;
1993   Mat_TransMatMultDense *atb;
1994 
1995   PetscFunctionBegin;
1996   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1997   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
1998     SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
1999   }
2000 
2001   /* create matrix product Cdense */
2002   ierr = MatCreate(comm,&Cdense);CHKERRQ(ierr);
2003   ierr = MatSetSizes(Cdense,cm,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2004   ierr = MatSetType(Cdense,MATMPIDENSE);CHKERRQ(ierr);
2005   ierr = MatMPIDenseSetPreallocation(Cdense,NULL);CHKERRQ(ierr);
2006   ierr = MatAssemblyBegin(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2007   ierr = MatAssemblyEnd(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2008   *C   = Cdense;
2009 
2010   /* create data structure for reuse Cdense */
2011   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2012   ierr = PetscNew(&atb);CHKERRQ(ierr);
2013   cM = Cdense->rmap->N;
2014   ierr = PetscMalloc3(cM*cN,&atb->sendbuf,cM*cN,&atb->atbarray,size,&atb->recvcounts);CHKERRQ(ierr);
2015 
2016   c                    = (Mat_MPIDense*)Cdense->data;
2017   c->atbdense          = atb;
2018   atb->destroy         = Cdense->ops->destroy;
2019   Cdense->ops->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
2020   PetscFunctionReturn(0);
2021 }
2022 
2023 PetscErrorCode MatTransposeMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
2024 {
2025   PetscErrorCode ierr;
2026 
2027   PetscFunctionBegin;
2028   if (scall == MAT_INITIAL_MATRIX) {
2029     ierr = MatTransposeMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr);
2030   }
2031   ierr = MatTransposeMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr);
2032   PetscFunctionReturn(0);
2033 }
2034 
2035 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat *C)
2036 {
2037   PetscErrorCode        ierr;
2038   Mat                   Cdense;
2039   MPI_Comm              comm;
2040   PetscMPIInt           i, size;
2041   PetscInt              maxRows, bufsiz;
2042   Mat_MPIDense          *c;
2043   PetscMPIInt           tag;
2044   const char            *algTypes[2] = {"allgatherv","cyclic"};
2045   PetscInt              alg, nalg = 2;
2046   Mat_MatTransMultDense *abt;
2047 
2048   PetscFunctionBegin;
2049   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2050   alg = 0; /* default is allgatherv */
2051   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MatMatTransposeMult","Mat");CHKERRQ(ierr);
2052   ierr = PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[0],&alg,NULL);CHKERRQ(ierr);
2053   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2054   if (A->cmap->N != B->cmap->N) {
2055     SETERRQ2(comm,PETSC_ERR_ARG_SIZ,"Matrix global column dimensions are incompatible, A (%D) != B (%D)",A->cmap->N,B->cmap->N);
2056   }
2057 
2058   /* create matrix product Cdense */
2059   ierr = MatCreate(comm,&Cdense);CHKERRQ(ierr);
2060   ierr = MatSetSizes(Cdense,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N);CHKERRQ(ierr);
2061   ierr = MatSetType(Cdense,MATMPIDENSE);CHKERRQ(ierr);
2062   ierr = MatMPIDenseSetPreallocation(Cdense,NULL);CHKERRQ(ierr);
2063   ierr = PetscObjectGetNewTag((PetscObject)Cdense, &tag);CHKERRQ(ierr);
2064   ierr = MatAssemblyBegin(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2065   ierr = MatAssemblyEnd(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2066   *C   = Cdense;
2067 
2068   /* create data structure for reuse Cdense */
2069   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2070   ierr = PetscNew(&abt);CHKERRQ(ierr);
2071   abt->tag = tag;
2072   abt->alg = alg;
2073   switch (alg) {
2074   case 1:
2075     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i]));
2076     bufsiz = A->cmap->N * maxRows;
2077     ierr = PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1]));CHKERRQ(ierr);
2078     break;
2079   default:
2080     ierr = PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]));CHKERRQ(ierr);
2081     ierr = PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls));CHKERRQ(ierr);
2082     for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N;
2083     for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i];
2084     break;
2085   }
2086 
2087   c                    = (Mat_MPIDense*)Cdense->data;
2088   c->abtdense          = abt;
2089   abt->destroy         = Cdense->ops->destroy;
2090   Cdense->ops->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
2091   PetscFunctionReturn(0);
2092 }
2093 
2094 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
2095 {
2096   Mat_MPIDense   *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2097   Mat_SeqDense   *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data;
2098   Mat_SeqDense   *cseq=(Mat_SeqDense*)(c->A)->data;
2099   Mat_MatTransMultDense *abt = c->abtdense;
2100   PetscErrorCode ierr;
2101   MPI_Comm       comm;
2102   PetscMPIInt    rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom;
2103   PetscScalar    *sendbuf, *recvbuf=0, *carray;
2104   PetscInt       i,cK=A->cmap->N,k,j,bn;
2105   PetscScalar    _DOne=1.0,_DZero=0.0;
2106   PetscBLASInt   cm, cn, ck;
2107   MPI_Request    reqs[2];
2108   const PetscInt *ranges;
2109 
2110   PetscFunctionBegin;
2111   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2112   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2113   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2114 
2115   ierr = MatGetOwnershipRanges(B,&ranges);CHKERRQ(ierr);
2116   bn = B->rmap->n;
2117   if (bseq->lda == bn) {
2118     sendbuf = bseq->v;
2119   } else {
2120     sendbuf = abt->buf[0];
2121     for (k = 0, i = 0; i < cK; i++) {
2122       for (j = 0; j < bn; j++, k++) {
2123         sendbuf[k] = bseq->v[i * bseq->lda + j];
2124       }
2125     }
2126   }
2127   if (size > 1) {
2128     sendto = (rank + size - 1) % size;
2129     recvfrom = (rank + size + 1) % size;
2130   } else {
2131     sendto = recvfrom = 0;
2132   }
2133   ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr);
2134   ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr);
2135   recvisfrom = rank;
2136   for (i = 0; i < size; i++) {
2137     /* we have finished receiving in sending, bufs can be read/modified */
2138     PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2139     PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
2140 
2141     if (nextrecvisfrom != rank) {
2142       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2143       sendsiz = cK * bn;
2144       recvsiz = cK * nextbn;
2145       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
2146       ierr = MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]);CHKERRQ(ierr);
2147       ierr = MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]);CHKERRQ(ierr);
2148     }
2149 
2150     /* local aseq * sendbuf^T */
2151     ierr = PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn);CHKERRQ(ierr);
2152     carray = &cseq->v[cseq->lda * ranges[recvisfrom]];
2153     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,aseq->v,&aseq->lda,sendbuf,&cn,&_DZero,carray,&cseq->lda));
2154 
2155     if (nextrecvisfrom != rank) {
2156       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
2157       ierr = MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2158     }
2159     bn = nextbn;
2160     recvisfrom = nextrecvisfrom;
2161     sendbuf = recvbuf;
2162   }
2163   PetscFunctionReturn(0);
2164 }
2165 
2166 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
2167 {
2168   Mat_MPIDense   *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2169   Mat_SeqDense   *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data;
2170   Mat_SeqDense   *cseq=(Mat_SeqDense*)(c->A)->data;
2171   Mat_MatTransMultDense *abt = c->abtdense;
2172   PetscErrorCode ierr;
2173   MPI_Comm       comm;
2174   PetscMPIInt    rank,size;
2175   PetscScalar    *sendbuf, *recvbuf;
2176   PetscInt       i,cK=A->cmap->N,k,j,bn;
2177   PetscScalar    _DOne=1.0,_DZero=0.0;
2178   PetscBLASInt   cm, cn, ck;
2179 
2180   PetscFunctionBegin;
2181   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2182   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2183   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2184 
2185   /* copy transpose of B into buf[0] */
2186   bn      = B->rmap->n;
2187   sendbuf = abt->buf[0];
2188   recvbuf = abt->buf[1];
2189   for (k = 0, j = 0; j < bn; j++) {
2190     for (i = 0; i < cK; i++, k++) {
2191       sendbuf[k] = bseq->v[i * bseq->lda + j];
2192     }
2193   }
2194   ierr = MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm);CHKERRQ(ierr);
2195   ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr);
2196   ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr);
2197   ierr = PetscBLASIntCast(c->A->cmap->n,&cn);CHKERRQ(ierr);
2198   PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,aseq->v,&aseq->lda,recvbuf,&ck,&_DZero,cseq->v,&cseq->lda));
2199   PetscFunctionReturn(0);
2200 }
2201 
2202 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2203 {
2204   Mat_MPIDense   *c=(Mat_MPIDense*)C->data;
2205   Mat_MatTransMultDense *abt = c->abtdense;
2206   PetscErrorCode ierr;
2207 
2208   PetscFunctionBegin;
2209   switch (abt->alg) {
2210   case 1:
2211     ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C);CHKERRQ(ierr);
2212     break;
2213   default:
2214     ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C);CHKERRQ(ierr);
2215     break;
2216   }
2217   PetscFunctionReturn(0);
2218 }
2219 
2220 static PetscErrorCode MatMatTransposeMult_MPIDense_MPIDense(Mat A,Mat B, MatReuse scall, PetscReal fill, Mat *C)
2221 {
2222   PetscErrorCode ierr;
2223 
2224   PetscFunctionBegin;
2225   if (scall == MAT_INITIAL_MATRIX) {
2226     ierr = MatMatTransposeMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr);
2227   }
2228   ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr);
2229   PetscFunctionReturn(0);
2230 }
2231 
2232 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(Mat A)
2233 {
2234   PetscErrorCode   ierr;
2235   Mat_MPIDense     *a = (Mat_MPIDense*)A->data;
2236   Mat_MatMultDense *ab = a->abdense;
2237 
2238   PetscFunctionBegin;
2239   ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr);
2240   ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr);
2241   ierr = MatDestroy(&ab->Be);CHKERRQ(ierr);
2242 
2243   ierr = (ab->destroy)(A);CHKERRQ(ierr);
2244   ierr = PetscFree(ab);CHKERRQ(ierr);
2245   PetscFunctionReturn(0);
2246 }
2247 
2248 #if defined(PETSC_HAVE_ELEMENTAL)
2249 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2250 {
2251   PetscErrorCode   ierr;
2252   Mat_MPIDense     *c=(Mat_MPIDense*)C->data;
2253   Mat_MatMultDense *ab=c->abdense;
2254 
2255   PetscFunctionBegin;
2256   ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr);
2257   ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr);
2258   ierr = MatMatMultNumeric(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr);
2259   ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
2260   PetscFunctionReturn(0);
2261 }
2262 
2263 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
2264 {
2265   PetscErrorCode   ierr;
2266   Mat              Ae,Be,Ce;
2267   Mat_MPIDense     *c;
2268   Mat_MatMultDense *ab;
2269 
2270   PetscFunctionBegin;
2271   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
2272     SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2273   }
2274 
2275   /* convert A and B to Elemental matrices Ae and Be */
2276   ierr = MatConvert(A,MATELEMENTAL,MAT_INITIAL_MATRIX, &Ae);CHKERRQ(ierr);
2277   ierr = MatConvert(B,MATELEMENTAL,MAT_INITIAL_MATRIX, &Be);CHKERRQ(ierr);
2278 
2279   /* Ce = Ae*Be */
2280   ierr = MatMatMultSymbolic(Ae,Be,fill,&Ce);CHKERRQ(ierr);
2281   ierr = MatMatMultNumeric(Ae,Be,Ce);CHKERRQ(ierr);
2282 
2283   /* convert Ce to C */
2284   ierr = MatConvert(Ce,MATMPIDENSE,MAT_INITIAL_MATRIX,C);CHKERRQ(ierr);
2285 
2286   /* create data structure for reuse Cdense */
2287   ierr = PetscNew(&ab);CHKERRQ(ierr);
2288   c                  = (Mat_MPIDense*)(*C)->data;
2289   c->abdense         = ab;
2290 
2291   ab->Ae             = Ae;
2292   ab->Be             = Be;
2293   ab->Ce             = Ce;
2294   ab->destroy        = (*C)->ops->destroy;
2295   (*C)->ops->destroy        = MatDestroy_MatMatMult_MPIDense_MPIDense;
2296   (*C)->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
2297   PetscFunctionReturn(0);
2298 }
2299 
2300 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
2301 {
2302   PetscErrorCode ierr;
2303 
2304   PetscFunctionBegin;
2305   if (scall == MAT_INITIAL_MATRIX) { /* simbolic product includes numeric product */
2306     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
2307     ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr);
2308     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
2309   } else {
2310     ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
2311     ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr);
2312     ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
2313   }
2314   PetscFunctionReturn(0);
2315 }
2316 #endif
2317 
2318