xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision dec1416f15364d8a66cef6f4b2a5a2aba5192d13)
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4 */
5 
6 
7 #include <../src/mat/impls/dense/mpi/mpidense.h>    /*I   "petscmat.h"  I*/
8 #include <../src/mat/impls/aij/mpi/mpiaij.h>
9 #include <petscblaslapack.h>
10 
11 /*@
12 
13       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
14               matrix that represents the operator. For sequential matrices it returns itself.
15 
16     Input Parameter:
17 .      A - the Seq or MPI dense matrix
18 
19     Output Parameter:
20 .      B - the inner matrix
21 
22     Level: intermediate
23 
24 @*/
25 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
26 {
27   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
28   PetscErrorCode ierr;
29   PetscBool      flg;
30 
31   PetscFunctionBegin;
32   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
33   if (flg) *B = mat->A;
34   else *B = A;
35   PetscFunctionReturn(0);
36 }
37 
38 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
39 {
40   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
41   PetscErrorCode ierr;
42   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
43 
44   PetscFunctionBegin;
45   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
46   lrow = row - rstart;
47   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
48   PetscFunctionReturn(0);
49 }
50 
51 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
52 {
53   PetscErrorCode ierr;
54 
55   PetscFunctionBegin;
56   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
57   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
58   PetscFunctionReturn(0);
59 }
60 
61 PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
62 {
63   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
64   PetscErrorCode ierr;
65   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
66   PetscScalar    *array;
67   MPI_Comm       comm;
68   Mat            B;
69 
70   PetscFunctionBegin;
71   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
72 
73   ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr);
74   if (!B) {
75     ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
76     ierr = MatCreate(comm,&B);CHKERRQ(ierr);
77     ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr);
78     ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
79     ierr = MatDenseGetArray(mdn->A,&array);CHKERRQ(ierr);
80     ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr);
81     ierr = MatDenseRestoreArray(mdn->A,&array);CHKERRQ(ierr);
82     ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
83     ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
84     ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr);
85     *a   = B;
86     ierr = MatDestroy(&B);CHKERRQ(ierr);
87   } else *a = B;
88   PetscFunctionReturn(0);
89 }
90 
91 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
92 {
93   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
94   PetscErrorCode ierr;
95   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
96   PetscBool      roworiented = A->roworiented;
97 
98   PetscFunctionBegin;
99   for (i=0; i<m; i++) {
100     if (idxm[i] < 0) continue;
101     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
102     if (idxm[i] >= rstart && idxm[i] < rend) {
103       row = idxm[i] - rstart;
104       if (roworiented) {
105         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
106       } else {
107         for (j=0; j<n; j++) {
108           if (idxn[j] < 0) continue;
109           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
110           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
111         }
112       }
113     } else if (!A->donotstash) {
114       mat->assembled = PETSC_FALSE;
115       if (roworiented) {
116         ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
117       } else {
118         ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
119       }
120     }
121   }
122   PetscFunctionReturn(0);
123 }
124 
125 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
126 {
127   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
128   PetscErrorCode ierr;
129   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
130 
131   PetscFunctionBegin;
132   for (i=0; i<m; i++) {
133     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
134     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
135     if (idxm[i] >= rstart && idxm[i] < rend) {
136       row = idxm[i] - rstart;
137       for (j=0; j<n; j++) {
138         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
139         if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
140         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
141       }
142     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
143   }
144   PetscFunctionReturn(0);
145 }
146 
147 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda)
148 {
149   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
150   PetscErrorCode ierr;
151 
152   PetscFunctionBegin;
153   ierr = MatDenseGetLDA(a->A,lda);CHKERRQ(ierr);
154   PetscFunctionReturn(0);
155 }
156 
157 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[])
158 {
159   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
160   PetscErrorCode ierr;
161 
162   PetscFunctionBegin;
163   ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr);
164   PetscFunctionReturn(0);
165 }
166 
167 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar *array[])
168 {
169   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
170   PetscErrorCode ierr;
171 
172   PetscFunctionBegin;
173   ierr = MatDenseGetArrayRead(a->A,array);CHKERRQ(ierr);
174   PetscFunctionReturn(0);
175 }
176 
177 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar array[])
178 {
179   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
180   PetscErrorCode ierr;
181 
182   PetscFunctionBegin;
183   ierr = MatDensePlaceArray(a->A,array);CHKERRQ(ierr);
184   PetscFunctionReturn(0);
185 }
186 
187 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
188 {
189   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
190   PetscErrorCode ierr;
191 
192   PetscFunctionBegin;
193   ierr = MatDenseResetArray(a->A);CHKERRQ(ierr);
194   PetscFunctionReturn(0);
195 }
196 
197 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
198 {
199   Mat_MPIDense   *mat  = (Mat_MPIDense*)A->data,*newmatd;
200   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
201   PetscErrorCode ierr;
202   PetscInt       i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
203   const PetscInt *irow,*icol;
204   PetscScalar    *av,*bv,*v = lmat->v;
205   Mat            newmat;
206   IS             iscol_local;
207   MPI_Comm       comm_is,comm_mat;
208 
209   PetscFunctionBegin;
210   ierr = PetscObjectGetComm((PetscObject)A,&comm_mat);CHKERRQ(ierr);
211   ierr = PetscObjectGetComm((PetscObject)iscol,&comm_is);CHKERRQ(ierr);
212   if (comm_mat != comm_is) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator");
213 
214   ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
215   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
216   ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr);
217   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
218   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
219   ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */
220 
221   /* No parallel redistribution currently supported! Should really check each index set
222      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
223      original matrix! */
224 
225   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
226   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
227 
228   /* Check submatrix call */
229   if (scall == MAT_REUSE_MATRIX) {
230     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
231     /* Really need to test rows and column sizes! */
232     newmat = *B;
233   } else {
234     /* Create and fill new matrix */
235     ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr);
236     ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
237     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
238     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
239   }
240 
241   /* Now extract the data pointers and do the copy, column at a time */
242   newmatd = (Mat_MPIDense*)newmat->data;
243   bv      = ((Mat_SeqDense*)newmatd->A->data)->v;
244 
245   for (i=0; i<Ncols; i++) {
246     av = v + ((Mat_SeqDense*)mat->A->data)->lda*icol[i];
247     for (j=0; j<nrows; j++) {
248       *bv++ = av[irow[j] - rstart];
249     }
250   }
251 
252   /* Assemble the matrices so that the correct flags are set */
253   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
254   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
255 
256   /* Free work space */
257   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
258   ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr);
259   ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
260   *B   = newmat;
261   PetscFunctionReturn(0);
262 }
263 
264 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[])
265 {
266   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
267   PetscErrorCode ierr;
268 
269   PetscFunctionBegin;
270   ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr);
271   PetscFunctionReturn(0);
272 }
273 
274 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar *array[])
275 {
276   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
277   PetscErrorCode ierr;
278 
279   PetscFunctionBegin;
280   ierr = MatDenseRestoreArrayRead(a->A,array);CHKERRQ(ierr);
281   PetscFunctionReturn(0);
282 }
283 
284 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
285 {
286   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
287   MPI_Comm       comm;
288   PetscErrorCode ierr;
289   PetscInt       nstash,reallocs;
290   InsertMode     addv;
291 
292   PetscFunctionBegin;
293   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
294   /* make sure all processors are either in INSERTMODE or ADDMODE */
295   ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,comm);CHKERRQ(ierr);
296   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
297   mat->insertmode = addv; /* in case this processor had no cache */
298 
299   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
300   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
301   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
302   PetscFunctionReturn(0);
303 }
304 
305 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
306 {
307   Mat_MPIDense   *mdn=(Mat_MPIDense*)mat->data;
308   PetscErrorCode ierr;
309   PetscInt       i,*row,*col,flg,j,rstart,ncols;
310   PetscMPIInt    n;
311   PetscScalar    *val;
312 
313   PetscFunctionBegin;
314   /*  wait on receives */
315   while (1) {
316     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
317     if (!flg) break;
318 
319     for (i=0; i<n;) {
320       /* Now identify the consecutive vals belonging to the same row */
321       for (j=i,rstart=row[j]; j<n; j++) {
322         if (row[j] != rstart) break;
323       }
324       if (j < n) ncols = j-i;
325       else       ncols = n-i;
326       /* Now assemble all these values with a single function call */
327       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
328       i    = j;
329     }
330   }
331   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
332 
333   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
334   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
335 
336   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
337     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
338   }
339   PetscFunctionReturn(0);
340 }
341 
342 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
343 {
344   PetscErrorCode ierr;
345   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
346 
347   PetscFunctionBegin;
348   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
349   PetscFunctionReturn(0);
350 }
351 
352 /* the code does not do the diagonal entries correctly unless the
353    matrix is square and the column and row owerships are identical.
354    This is a BUG. The only way to fix it seems to be to access
355    mdn->A and mdn->B directly and not through the MatZeroRows()
356    routine.
357 */
358 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
359 {
360   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
361   PetscErrorCode    ierr;
362   PetscInt          i,*owners = A->rmap->range;
363   PetscInt          *sizes,j,idx,nsends;
364   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
365   PetscInt          *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source;
366   PetscInt          *lens,*lrows,*values;
367   PetscMPIInt       n,imdex,rank = l->rank,size = l->size;
368   MPI_Comm          comm;
369   MPI_Request       *send_waits,*recv_waits;
370   MPI_Status        recv_status,*send_status;
371   PetscBool         found;
372   const PetscScalar *xx;
373   PetscScalar       *bb;
374 
375   PetscFunctionBegin;
376   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
377   if (A->rmap->N != A->cmap->N) SETERRQ(comm,PETSC_ERR_SUP,"Only handles square matrices");
378   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership");
379   /*  first count number of contributors to each processor */
380   ierr = PetscCalloc1(2*size,&sizes);CHKERRQ(ierr);
381   ierr = PetscMalloc1(N+1,&owner);CHKERRQ(ierr);  /* see note*/
382   for (i=0; i<N; i++) {
383     idx   = rows[i];
384     found = PETSC_FALSE;
385     for (j=0; j<size; j++) {
386       if (idx >= owners[j] && idx < owners[j+1]) {
387         sizes[2*j]++; sizes[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
388       }
389     }
390     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
391   }
392   nsends = 0;
393   for (i=0; i<size; i++) nsends += sizes[2*i+1];
394 
395   /* inform other processors of number of messages and max length*/
396   ierr = PetscMaxSum(comm,sizes,&nmax,&nrecvs);CHKERRQ(ierr);
397 
398   /* post receives:   */
399   ierr = PetscMalloc1((nrecvs+1)*(nmax+1),&rvalues);CHKERRQ(ierr);
400   ierr = PetscMalloc1(nrecvs+1,&recv_waits);CHKERRQ(ierr);
401   for (i=0; i<nrecvs; i++) {
402     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
403   }
404 
405   /* do sends:
406       1) starts[i] gives the starting index in svalues for stuff going to
407          the ith processor
408   */
409   ierr = PetscMalloc1(N+1,&svalues);CHKERRQ(ierr);
410   ierr = PetscMalloc1(nsends+1,&send_waits);CHKERRQ(ierr);
411   ierr = PetscMalloc1(size+1,&starts);CHKERRQ(ierr);
412 
413   starts[0] = 0;
414   for (i=1; i<size; i++) starts[i] = starts[i-1] + sizes[2*i-2];
415   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
416 
417   starts[0] = 0;
418   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + sizes[2*i-2];
419   count = 0;
420   for (i=0; i<size; i++) {
421     if (sizes[2*i+1]) {
422       ierr = MPI_Isend(svalues+starts[i],sizes[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
423     }
424   }
425   ierr = PetscFree(starts);CHKERRQ(ierr);
426 
427   base = owners[rank];
428 
429   /*  wait on receives */
430   ierr  = PetscMalloc2(nrecvs,&lens,nrecvs,&source);CHKERRQ(ierr);
431   count = nrecvs;
432   slen  = 0;
433   while (count) {
434     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
435     /* unpack receives into our local space */
436     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
437 
438     source[imdex] = recv_status.MPI_SOURCE;
439     lens[imdex]   = n;
440     slen += n;
441     count--;
442   }
443   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
444 
445   /* move the data into the send scatter */
446   ierr  = PetscMalloc1(slen+1,&lrows);CHKERRQ(ierr);
447   count = 0;
448   for (i=0; i<nrecvs; i++) {
449     values = rvalues + i*nmax;
450     for (j=0; j<lens[i]; j++) {
451       lrows[count++] = values[j] - base;
452     }
453   }
454   ierr = PetscFree(rvalues);CHKERRQ(ierr);
455   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
456   ierr = PetscFree(owner);CHKERRQ(ierr);
457   ierr = PetscFree(sizes);CHKERRQ(ierr);
458 
459   /* fix right hand side if needed */
460   if (x && b) {
461     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
462     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
463     for (i=0; i<slen; i++) {
464       bb[lrows[i]] = diag*xx[lrows[i]];
465     }
466     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
467     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
468   }
469 
470   /* actually zap the local rows */
471   ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
472   if (diag != 0.0) {
473     Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data;
474     PetscInt     m   = ll->lda, i;
475 
476     for (i=0; i<slen; i++) {
477       ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag;
478     }
479   }
480   ierr = PetscFree(lrows);CHKERRQ(ierr);
481 
482   /* wait on sends */
483   if (nsends) {
484     ierr = PetscMalloc1(nsends,&send_status);CHKERRQ(ierr);
485     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
486     ierr = PetscFree(send_status);CHKERRQ(ierr);
487   }
488   ierr = PetscFree(send_waits);CHKERRQ(ierr);
489   ierr = PetscFree(svalues);CHKERRQ(ierr);
490   PetscFunctionReturn(0);
491 }
492 
493 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec);
494 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec);
495 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec);
496 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec);
497 
498 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
499 {
500   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
501   PetscErrorCode ierr;
502 
503   PetscFunctionBegin;
504   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
505   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
506   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
507   PetscFunctionReturn(0);
508 }
509 
510 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
511 {
512   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
513   PetscErrorCode ierr;
514 
515   PetscFunctionBegin;
516   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
517   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
518   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
519   PetscFunctionReturn(0);
520 }
521 
522 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
523 {
524   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
525   PetscErrorCode ierr;
526   PetscScalar    zero = 0.0;
527 
528   PetscFunctionBegin;
529   ierr = VecSet(yy,zero);CHKERRQ(ierr);
530   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
531   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
532   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
533   PetscFunctionReturn(0);
534 }
535 
536 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
537 {
538   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
539   PetscErrorCode ierr;
540 
541   PetscFunctionBegin;
542   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
543   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
544   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
545   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
546   PetscFunctionReturn(0);
547 }
548 
549 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
550 {
551   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
552   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
553   PetscErrorCode ierr;
554   PetscInt       len,i,n,m = A->rmap->n,radd;
555   PetscScalar    *x,zero = 0.0;
556 
557   PetscFunctionBegin;
558   ierr = VecSet(v,zero);CHKERRQ(ierr);
559   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
560   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
561   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
562   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
563   radd = A->rmap->rstart*m;
564   for (i=0; i<len; i++) {
565     x[i] = aloc->v[radd + i*m + i];
566   }
567   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
568   PetscFunctionReturn(0);
569 }
570 
571 PetscErrorCode MatDestroy_MPIDense(Mat mat)
572 {
573   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
574   PetscErrorCode ierr;
575 
576   PetscFunctionBegin;
577 #if defined(PETSC_USE_LOG)
578   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
579 #endif
580   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
581   ierr = MatDestroy(&mdn->A);CHKERRQ(ierr);
582   ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr);
583   ierr = VecScatterDestroy(&mdn->Mvctx);CHKERRQ(ierr);
584 
585   ierr = PetscFree(mat->data);CHKERRQ(ierr);
586   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
587 
588   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL);CHKERRQ(ierr);
589   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr);
590   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr);
591   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL);CHKERRQ(ierr);
592   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL);CHKERRQ(ierr);
593   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);CHKERRQ(ierr);
594   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);CHKERRQ(ierr);
595 #if defined(PETSC_HAVE_ELEMENTAL)
596   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr);
597 #endif
598   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr);
599   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
600   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
601   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
602   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
603   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
604   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
605   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL);CHKERRQ(ierr);
606   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL);CHKERRQ(ierr);
607   PetscFunctionReturn(0);
608 }
609 
610 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
611 {
612   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
613   PetscErrorCode    ierr;
614   PetscViewerFormat format;
615   int               fd;
616   PetscInt          header[4],mmax,N = mat->cmap->N,i,j,m,k;
617   PetscMPIInt       rank,tag  = ((PetscObject)viewer)->tag,size;
618   PetscScalar       *work,*v,*vv;
619   Mat_SeqDense      *a = (Mat_SeqDense*)mdn->A->data;
620 
621   PetscFunctionBegin;
622   if (mdn->size == 1) {
623     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
624   } else {
625     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
626     ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
627     ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
628 
629     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
630     if (format == PETSC_VIEWER_NATIVE) {
631 
632       if (!rank) {
633         /* store the matrix as a dense matrix */
634         header[0] = MAT_FILE_CLASSID;
635         header[1] = mat->rmap->N;
636         header[2] = N;
637         header[3] = MATRIX_BINARY_FORMAT_DENSE;
638         ierr      = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
639 
640         /* get largest work array needed for transposing array */
641         mmax = mat->rmap->n;
642         for (i=1; i<size; i++) {
643           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
644         }
645         ierr = PetscMalloc1(mmax*N,&work);CHKERRQ(ierr);
646 
647         /* write out local array, by rows */
648         m = mat->rmap->n;
649         v = a->v;
650         for (j=0; j<N; j++) {
651           for (i=0; i<m; i++) {
652             work[j + i*N] = *v++;
653           }
654         }
655         ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
656         /* get largest work array to receive messages from other processes, excludes process zero */
657         mmax = 0;
658         for (i=1; i<size; i++) {
659           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
660         }
661         ierr = PetscMalloc1(mmax*N,&vv);CHKERRQ(ierr);
662         for (k = 1; k < size; k++) {
663           v    = vv;
664           m    = mat->rmap->range[k+1] - mat->rmap->range[k];
665           ierr = MPIULong_Recv(v,m*N,MPIU_SCALAR,k,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
666 
667           for (j = 0; j < N; j++) {
668             for (i = 0; i < m; i++) {
669               work[j + i*N] = *v++;
670             }
671           }
672           ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
673         }
674         ierr = PetscFree(work);CHKERRQ(ierr);
675         ierr = PetscFree(vv);CHKERRQ(ierr);
676       } else {
677         ierr = MPIULong_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
678       }
679     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerPushFormat(viewer,PETSC_VIEWER_NATIVE)");
680   }
681   PetscFunctionReturn(0);
682 }
683 
684 extern PetscErrorCode MatView_SeqDense(Mat,PetscViewer);
685 #include <petscdraw.h>
686 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
687 {
688   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
689   PetscErrorCode    ierr;
690   PetscMPIInt       rank = mdn->rank;
691   PetscViewerType   vtype;
692   PetscBool         iascii,isdraw;
693   PetscViewer       sviewer;
694   PetscViewerFormat format;
695 
696   PetscFunctionBegin;
697   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
698   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
699   if (iascii) {
700     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
701     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
702     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
703       MatInfo info;
704       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
705       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
706       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
707       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
708       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
709       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
710       PetscFunctionReturn(0);
711     } else if (format == PETSC_VIEWER_ASCII_INFO) {
712       PetscFunctionReturn(0);
713     }
714   } else if (isdraw) {
715     PetscDraw draw;
716     PetscBool isnull;
717 
718     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
719     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
720     if (isnull) PetscFunctionReturn(0);
721   }
722 
723   {
724     /* assemble the entire matrix onto first processor. */
725     Mat         A;
726     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
727     PetscInt    *cols;
728     PetscScalar *vals;
729 
730     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
731     if (!rank) {
732       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
733     } else {
734       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
735     }
736     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
737     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
738     ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr);
739     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
740 
741     /* Copy the matrix ... This isn't the most efficient means,
742        but it's quick for now */
743     A->insertmode = INSERT_VALUES;
744 
745     row = mat->rmap->rstart;
746     m   = mdn->A->rmap->n;
747     for (i=0; i<m; i++) {
748       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
749       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
750       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
751       row++;
752     }
753 
754     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
755     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
756     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
757     if (!rank) {
758       ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
759       ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
760     }
761     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
762     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
763     ierr = MatDestroy(&A);CHKERRQ(ierr);
764   }
765   PetscFunctionReturn(0);
766 }
767 
768 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
769 {
770   PetscErrorCode ierr;
771   PetscBool      iascii,isbinary,isdraw,issocket;
772 
773   PetscFunctionBegin;
774   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
775   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
776   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
777   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
778 
779   if (iascii || issocket || isdraw) {
780     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
781   } else if (isbinary) {
782     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
783   }
784   PetscFunctionReturn(0);
785 }
786 
787 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
788 {
789   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
790   Mat            mdn  = mat->A;
791   PetscErrorCode ierr;
792   PetscReal      isend[5],irecv[5];
793 
794   PetscFunctionBegin;
795   info->block_size = 1.0;
796 
797   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
798 
799   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
800   isend[3] = info->memory;  isend[4] = info->mallocs;
801   if (flag == MAT_LOCAL) {
802     info->nz_used      = isend[0];
803     info->nz_allocated = isend[1];
804     info->nz_unneeded  = isend[2];
805     info->memory       = isend[3];
806     info->mallocs      = isend[4];
807   } else if (flag == MAT_GLOBAL_MAX) {
808     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
809 
810     info->nz_used      = irecv[0];
811     info->nz_allocated = irecv[1];
812     info->nz_unneeded  = irecv[2];
813     info->memory       = irecv[3];
814     info->mallocs      = irecv[4];
815   } else if (flag == MAT_GLOBAL_SUM) {
816     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
817 
818     info->nz_used      = irecv[0];
819     info->nz_allocated = irecv[1];
820     info->nz_unneeded  = irecv[2];
821     info->memory       = irecv[3];
822     info->mallocs      = irecv[4];
823   }
824   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
825   info->fill_ratio_needed = 0;
826   info->factor_mallocs    = 0;
827   PetscFunctionReturn(0);
828 }
829 
830 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
831 {
832   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
833   PetscErrorCode ierr;
834 
835   PetscFunctionBegin;
836   switch (op) {
837   case MAT_NEW_NONZERO_LOCATIONS:
838   case MAT_NEW_NONZERO_LOCATION_ERR:
839   case MAT_NEW_NONZERO_ALLOCATION_ERR:
840     MatCheckPreallocated(A,1);
841     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
842     break;
843   case MAT_ROW_ORIENTED:
844     MatCheckPreallocated(A,1);
845     a->roworiented = flg;
846     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
847     break;
848   case MAT_NEW_DIAGONALS:
849   case MAT_KEEP_NONZERO_PATTERN:
850   case MAT_USE_HASH_TABLE:
851   case MAT_SORTED_FULL:
852     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
853     break;
854   case MAT_IGNORE_OFF_PROC_ENTRIES:
855     a->donotstash = flg;
856     break;
857   case MAT_SYMMETRIC:
858   case MAT_STRUCTURALLY_SYMMETRIC:
859   case MAT_HERMITIAN:
860   case MAT_SYMMETRY_ETERNAL:
861   case MAT_IGNORE_LOWER_TRIANGULAR:
862   case MAT_IGNORE_ZERO_ENTRIES:
863     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
864     break;
865   default:
866     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
867   }
868   PetscFunctionReturn(0);
869 }
870 
871 
872 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
873 {
874   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
875   Mat_SeqDense      *mat = (Mat_SeqDense*)mdn->A->data;
876   const PetscScalar *l,*r;
877   PetscScalar       x,*v;
878   PetscErrorCode    ierr;
879   PetscInt          i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n;
880 
881   PetscFunctionBegin;
882   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
883   if (ll) {
884     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
885     if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
886     ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr);
887     for (i=0; i<m; i++) {
888       x = l[i];
889       v = mat->v + i;
890       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
891     }
892     ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr);
893     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
894   }
895   if (rr) {
896     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
897     if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
898     ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
899     ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
900     ierr = VecGetArrayRead(mdn->lvec,&r);CHKERRQ(ierr);
901     for (i=0; i<n; i++) {
902       x = r[i];
903       v = mat->v + i*m;
904       for (j=0; j<m; j++) (*v++) *= x;
905     }
906     ierr = VecRestoreArrayRead(mdn->lvec,&r);CHKERRQ(ierr);
907     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
908   }
909   PetscFunctionReturn(0);
910 }
911 
912 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
913 {
914   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
915   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
916   PetscErrorCode ierr;
917   PetscInt       i,j;
918   PetscReal      sum = 0.0;
919   PetscScalar    *v  = mat->v;
920 
921   PetscFunctionBegin;
922   if (mdn->size == 1) {
923     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
924   } else {
925     if (type == NORM_FROBENIUS) {
926       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
927         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
928       }
929       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
930       *nrm = PetscSqrtReal(*nrm);
931       ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr);
932     } else if (type == NORM_1) {
933       PetscReal *tmp,*tmp2;
934       ierr = PetscCalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr);
935       *nrm = 0.0;
936       v    = mat->v;
937       for (j=0; j<mdn->A->cmap->n; j++) {
938         for (i=0; i<mdn->A->rmap->n; i++) {
939           tmp[j] += PetscAbsScalar(*v);  v++;
940         }
941       }
942       ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
943       for (j=0; j<A->cmap->N; j++) {
944         if (tmp2[j] > *nrm) *nrm = tmp2[j];
945       }
946       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
947       ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr);
948     } else if (type == NORM_INFINITY) { /* max row norm */
949       PetscReal ntemp;
950       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
951       ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
952     } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
953   }
954   PetscFunctionReturn(0);
955 }
956 
957 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
958 {
959   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
960   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
961   Mat            B;
962   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
963   PetscErrorCode ierr;
964   PetscInt       j,i;
965   PetscScalar    *v;
966 
967   PetscFunctionBegin;
968   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
969     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
970     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
971     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
972     ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr);
973   } else {
974     B = *matout;
975   }
976 
977   m    = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v;
978   ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr);
979   for (i=0; i<m; i++) rwork[i] = rstart + i;
980   for (j=0; j<n; j++) {
981     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
982     v   += m;
983   }
984   ierr = PetscFree(rwork);CHKERRQ(ierr);
985   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
986   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
987   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
988     *matout = B;
989   } else {
990     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
991   }
992   PetscFunctionReturn(0);
993 }
994 
995 
996 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
997 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
998 
999 PetscErrorCode MatSetUp_MPIDense(Mat A)
1000 {
1001   PetscErrorCode ierr;
1002 
1003   PetscFunctionBegin;
1004   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
1005   PetscFunctionReturn(0);
1006 }
1007 
1008 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
1009 {
1010   PetscErrorCode ierr;
1011   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
1012 
1013   PetscFunctionBegin;
1014   ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr);
1015   ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1016   PetscFunctionReturn(0);
1017 }
1018 
1019 PetscErrorCode  MatConjugate_MPIDense(Mat mat)
1020 {
1021   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
1022   PetscErrorCode ierr;
1023 
1024   PetscFunctionBegin;
1025   ierr = MatConjugate(a->A);CHKERRQ(ierr);
1026   PetscFunctionReturn(0);
1027 }
1028 
1029 PetscErrorCode MatRealPart_MPIDense(Mat A)
1030 {
1031   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1032   PetscErrorCode ierr;
1033 
1034   PetscFunctionBegin;
1035   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1036   PetscFunctionReturn(0);
1037 }
1038 
1039 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1040 {
1041   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1042   PetscErrorCode ierr;
1043 
1044   PetscFunctionBegin;
1045   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1046   PetscFunctionReturn(0);
1047 }
1048 
1049 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col)
1050 {
1051   PetscErrorCode    ierr;
1052   PetscScalar       *x;
1053   const PetscScalar *a;
1054   PetscInt          lda;
1055 
1056   PetscFunctionBegin;
1057   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1058   ierr = MatDenseGetLDA(A,&lda);CHKERRQ(ierr);
1059   ierr = MatDenseGetArrayRead(A,&a);CHKERRQ(ierr);
1060   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1061   ierr = PetscArraycpy(x,a+col*lda,A->rmap->n);CHKERRQ(ierr);
1062   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1063   ierr = MatDenseGetArrayRead(A,&a);CHKERRQ(ierr);
1064   PetscFunctionReturn(0);
1065 }
1066 
1067 extern PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*);
1068 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms)
1069 {
1070   PetscErrorCode ierr;
1071   PetscInt       i,n;
1072   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
1073   PetscReal      *work;
1074 
1075   PetscFunctionBegin;
1076   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
1077   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
1078   ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr);
1079   if (type == NORM_2) {
1080     for (i=0; i<n; i++) work[i] *= work[i];
1081   }
1082   if (type == NORM_INFINITY) {
1083     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
1084   } else {
1085     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
1086   }
1087   ierr = PetscFree(work);CHKERRQ(ierr);
1088   if (type == NORM_2) {
1089     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
1090   }
1091   PetscFunctionReturn(0);
1092 }
1093 
1094 static PetscErrorCode  MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1095 {
1096   Mat_MPIDense   *d = (Mat_MPIDense*)x->data;
1097   PetscErrorCode ierr;
1098   PetscScalar    *a;
1099   PetscInt       m,n,i;
1100 
1101   PetscFunctionBegin;
1102   ierr = MatGetSize(d->A,&m,&n);CHKERRQ(ierr);
1103   ierr = MatDenseGetArray(d->A,&a);CHKERRQ(ierr);
1104   for (i=0; i<m*n; i++) {
1105     ierr = PetscRandomGetValue(rctx,a+i);CHKERRQ(ierr);
1106   }
1107   ierr = MatDenseRestoreArray(d->A,&a);CHKERRQ(ierr);
1108   PetscFunctionReturn(0);
1109 }
1110 
1111 extern PetscErrorCode MatMatMultNumeric_MPIDense(Mat A,Mat,Mat);
1112 
1113 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool  *missing,PetscInt *d)
1114 {
1115   PetscFunctionBegin;
1116   *missing = PETSC_FALSE;
1117   PetscFunctionReturn(0);
1118 }
1119 
1120 static PetscErrorCode MatMatTransposeMult_MPIDense_MPIDense(Mat,Mat,MatReuse,PetscReal,Mat*);
1121 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat*);
1122 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);
1123 
1124 /* -------------------------------------------------------------------*/
1125 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1126                                         MatGetRow_MPIDense,
1127                                         MatRestoreRow_MPIDense,
1128                                         MatMult_MPIDense,
1129                                 /*  4*/ MatMultAdd_MPIDense,
1130                                         MatMultTranspose_MPIDense,
1131                                         MatMultTransposeAdd_MPIDense,
1132                                         0,
1133                                         0,
1134                                         0,
1135                                 /* 10*/ 0,
1136                                         0,
1137                                         0,
1138                                         0,
1139                                         MatTranspose_MPIDense,
1140                                 /* 15*/ MatGetInfo_MPIDense,
1141                                         MatEqual_MPIDense,
1142                                         MatGetDiagonal_MPIDense,
1143                                         MatDiagonalScale_MPIDense,
1144                                         MatNorm_MPIDense,
1145                                 /* 20*/ MatAssemblyBegin_MPIDense,
1146                                         MatAssemblyEnd_MPIDense,
1147                                         MatSetOption_MPIDense,
1148                                         MatZeroEntries_MPIDense,
1149                                 /* 24*/ MatZeroRows_MPIDense,
1150                                         0,
1151                                         0,
1152                                         0,
1153                                         0,
1154                                 /* 29*/ MatSetUp_MPIDense,
1155                                         0,
1156                                         0,
1157                                         MatGetDiagonalBlock_MPIDense,
1158                                         0,
1159                                 /* 34*/ MatDuplicate_MPIDense,
1160                                         0,
1161                                         0,
1162                                         0,
1163                                         0,
1164                                 /* 39*/ MatAXPY_MPIDense,
1165                                         MatCreateSubMatrices_MPIDense,
1166                                         0,
1167                                         MatGetValues_MPIDense,
1168                                         0,
1169                                 /* 44*/ 0,
1170                                         MatScale_MPIDense,
1171                                         MatShift_Basic,
1172                                         0,
1173                                         0,
1174                                 /* 49*/ MatSetRandom_MPIDense,
1175                                         0,
1176                                         0,
1177                                         0,
1178                                         0,
1179                                 /* 54*/ 0,
1180                                         0,
1181                                         0,
1182                                         0,
1183                                         0,
1184                                 /* 59*/ MatCreateSubMatrix_MPIDense,
1185                                         MatDestroy_MPIDense,
1186                                         MatView_MPIDense,
1187                                         0,
1188                                         0,
1189                                 /* 64*/ 0,
1190                                         0,
1191                                         0,
1192                                         0,
1193                                         0,
1194                                 /* 69*/ 0,
1195                                         0,
1196                                         0,
1197                                         0,
1198                                         0,
1199                                 /* 74*/ 0,
1200                                         0,
1201                                         0,
1202                                         0,
1203                                         0,
1204                                 /* 79*/ 0,
1205                                         0,
1206                                         0,
1207                                         0,
1208                                 /* 83*/ MatLoad_MPIDense,
1209                                         0,
1210                                         0,
1211                                         0,
1212                                         0,
1213                                         0,
1214 #if defined(PETSC_HAVE_ELEMENTAL)
1215                                 /* 89*/ MatMatMult_MPIDense_MPIDense,
1216                                         MatMatMultSymbolic_MPIDense_MPIDense,
1217 #else
1218                                 /* 89*/ 0,
1219                                         0,
1220 #endif
1221                                         MatMatMultNumeric_MPIDense,
1222                                         0,
1223                                         0,
1224                                 /* 94*/ 0,
1225                                         MatMatTransposeMult_MPIDense_MPIDense,
1226                                         MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1227                                         MatMatTransposeMultNumeric_MPIDense_MPIDense,
1228                                         0,
1229                                 /* 99*/ 0,
1230                                         0,
1231                                         0,
1232                                         MatConjugate_MPIDense,
1233                                         0,
1234                                 /*104*/ 0,
1235                                         MatRealPart_MPIDense,
1236                                         MatImaginaryPart_MPIDense,
1237                                         0,
1238                                         0,
1239                                 /*109*/ 0,
1240                                         0,
1241                                         0,
1242                                         MatGetColumnVector_MPIDense,
1243                                         MatMissingDiagonal_MPIDense,
1244                                 /*114*/ 0,
1245                                         0,
1246                                         0,
1247                                         0,
1248                                         0,
1249                                 /*119*/ 0,
1250                                         0,
1251                                         0,
1252                                         0,
1253                                         0,
1254                                 /*124*/ 0,
1255                                         MatGetColumnNorms_MPIDense,
1256                                         0,
1257                                         0,
1258                                         0,
1259                                 /*129*/ 0,
1260                                         MatTransposeMatMult_MPIDense_MPIDense,
1261                                         MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1262                                         MatTransposeMatMultNumeric_MPIDense_MPIDense,
1263                                         0,
1264                                 /*134*/ 0,
1265                                         0,
1266                                         0,
1267                                         0,
1268                                         0,
1269                                 /*139*/ 0,
1270                                         0,
1271                                         0,
1272                                         0,
1273                                         0,
1274                                 /*144*/ MatCreateMPIMatConcatenateSeqMat_MPIDense
1275 };
1276 
1277 PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1278 {
1279   Mat_MPIDense   *a;
1280   PetscErrorCode ierr;
1281 
1282   PetscFunctionBegin;
1283   if (mat->preallocated) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Matrix has already been preallocated");
1284   mat->preallocated = PETSC_TRUE;
1285   /* Note:  For now, when data is specified above, this assumes the user correctly
1286    allocates the local dense storage space.  We should add error checking. */
1287 
1288   a       = (Mat_MPIDense*)mat->data;
1289   ierr    = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
1290   ierr    = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
1291   a->nvec = mat->cmap->n;
1292 
1293   ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1294   ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr);
1295   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
1296   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1297   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1298   PetscFunctionReturn(0);
1299 }
1300 
1301 #if defined(PETSC_HAVE_ELEMENTAL)
1302 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1303 {
1304   Mat            mat_elemental;
1305   PetscErrorCode ierr;
1306   PetscScalar    *v;
1307   PetscInt       m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols;
1308 
1309   PetscFunctionBegin;
1310   if (reuse == MAT_REUSE_MATRIX) {
1311     mat_elemental = *newmat;
1312     ierr = MatZeroEntries(*newmat);CHKERRQ(ierr);
1313   } else {
1314     ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr);
1315     ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1316     ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr);
1317     ierr = MatSetUp(mat_elemental);CHKERRQ(ierr);
1318     ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
1319   }
1320 
1321   ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr);
1322   for (i=0; i<N; i++) cols[i] = i;
1323   for (i=0; i<m; i++) rows[i] = rstart + i;
1324 
1325   /* PETSc-Elemental interaface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1326   ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr);
1327   ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr);
1328   ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1329   ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1330   ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr);
1331   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1332 
1333   if (reuse == MAT_INPLACE_MATRIX) {
1334     ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr);
1335   } else {
1336     *newmat = mat_elemental;
1337   }
1338   PetscFunctionReturn(0);
1339 }
1340 #endif
1341 
1342 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals)
1343 {
1344   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1345   PetscErrorCode ierr;
1346 
1347   PetscFunctionBegin;
1348   ierr = MatDenseGetColumn(mat->A,col,vals);CHKERRQ(ierr);
1349   PetscFunctionReturn(0);
1350 }
1351 
1352 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals)
1353 {
1354   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1355   PetscErrorCode ierr;
1356 
1357   PetscFunctionBegin;
1358   ierr = MatDenseRestoreColumn(mat->A,vals);CHKERRQ(ierr);
1359   PetscFunctionReturn(0);
1360 }
1361 
1362 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
1363 {
1364   PetscErrorCode ierr;
1365   Mat_MPIDense   *mat;
1366   PetscInt       m,nloc,N;
1367 
1368   PetscFunctionBegin;
1369   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
1370   ierr = MatGetLocalSize(inmat,NULL,&nloc);CHKERRQ(ierr);
1371   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1372     PetscInt sum;
1373 
1374     if (n == PETSC_DECIDE) {
1375       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
1376     }
1377     /* Check sum(n) = N */
1378     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
1379     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
1380 
1381     ierr = MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat);CHKERRQ(ierr);
1382   }
1383 
1384   /* numeric phase */
1385   mat = (Mat_MPIDense*)(*outmat)->data;
1386   ierr = MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1387   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1388   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1389   PetscFunctionReturn(0);
1390 }
1391 
1392 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1393 {
1394   Mat_MPIDense   *a;
1395   PetscErrorCode ierr;
1396 
1397   PetscFunctionBegin;
1398   ierr      = PetscNewLog(mat,&a);CHKERRQ(ierr);
1399   mat->data = (void*)a;
1400   ierr      = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1401 
1402   mat->insertmode = NOT_SET_VALUES;
1403   ierr            = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);CHKERRQ(ierr);
1404   ierr            = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);CHKERRQ(ierr);
1405 
1406   /* build cache for off array entries formed */
1407   a->donotstash = PETSC_FALSE;
1408 
1409   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr);
1410 
1411   /* stuff used for matrix vector multiply */
1412   a->lvec        = 0;
1413   a->Mvctx       = 0;
1414   a->roworiented = PETSC_TRUE;
1415 
1416   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense);CHKERRQ(ierr);
1417   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr);
1418   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr);
1419   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense);CHKERRQ(ierr);
1420   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense);CHKERRQ(ierr);
1421   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);CHKERRQ(ierr);
1422   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);CHKERRQ(ierr);
1423 #if defined(PETSC_HAVE_ELEMENTAL)
1424   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr);
1425 #endif
1426   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1427   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1428   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1429   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1430 
1431   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",MatTransposeMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1432   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",MatTransposeMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1433   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",MatTransposeMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1434   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense);CHKERRQ(ierr);
1435   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense);CHKERRQ(ierr);
1436   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1437   PetscFunctionReturn(0);
1438 }
1439 
1440 /*MC
1441    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1442 
1443    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1444    and MATMPIDENSE otherwise.
1445 
1446    Options Database Keys:
1447 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1448 
1449   Level: beginner
1450 
1451 
1452 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1453 M*/
1454 
1455 /*@C
1456    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1457 
1458    Not collective
1459 
1460    Input Parameters:
1461 .  B - the matrix
1462 -  data - optional location of matrix data.  Set data=NULL for PETSc
1463    to control all matrix memory allocation.
1464 
1465    Notes:
1466    The dense format is fully compatible with standard Fortran 77
1467    storage by columns.
1468 
1469    The data input variable is intended primarily for Fortran programmers
1470    who wish to allocate their own matrix memory space.  Most users should
1471    set data=NULL.
1472 
1473    Level: intermediate
1474 
1475 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1476 @*/
1477 PetscErrorCode  MatMPIDenseSetPreallocation(Mat B,PetscScalar *data)
1478 {
1479   PetscErrorCode ierr;
1480 
1481   PetscFunctionBegin;
1482   ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr);
1483   PetscFunctionReturn(0);
1484 }
1485 
1486 /*@
1487    MatDensePlaceArray - Allows one to replace the array in a dense array with an
1488    array provided by the user. This is useful to avoid copying an array
1489    into a matrix
1490 
1491    Not Collective
1492 
1493    Input Parameters:
1494 +  mat - the matrix
1495 -  array - the array in column major order
1496 
1497    Notes:
1498    You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be
1499    freed when the matrix is destroyed.
1500 
1501    Level: developer
1502 
1503 .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
1504 
1505 @*/
1506 PetscErrorCode  MatDensePlaceArray(Mat mat,const PetscScalar array[])
1507 {
1508   PetscErrorCode ierr;
1509   PetscFunctionBegin;
1510   ierr = PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
1511   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
1512   PetscFunctionReturn(0);
1513 }
1514 
1515 /*@
1516    MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray()
1517 
1518    Not Collective
1519 
1520    Input Parameters:
1521 .  mat - the matrix
1522 
1523    Notes:
1524    You can only call this after a call to MatDensePlaceArray()
1525 
1526    Level: developer
1527 
1528 .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
1529 
1530 @*/
1531 PetscErrorCode  MatDenseResetArray(Mat mat)
1532 {
1533   PetscErrorCode ierr;
1534   PetscFunctionBegin;
1535   ierr = PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));CHKERRQ(ierr);
1536   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
1537   PetscFunctionReturn(0);
1538 }
1539 
1540 /*@C
1541    MatCreateDense - Creates a parallel matrix in dense format.
1542 
1543    Collective
1544 
1545    Input Parameters:
1546 +  comm - MPI communicator
1547 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1548 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1549 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1550 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1551 -  data - optional location of matrix data.  Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1552    to control all matrix memory allocation.
1553 
1554    Output Parameter:
1555 .  A - the matrix
1556 
1557    Notes:
1558    The dense format is fully compatible with standard Fortran 77
1559    storage by columns.
1560 
1561    The data input variable is intended primarily for Fortran programmers
1562    who wish to allocate their own matrix memory space.  Most users should
1563    set data=NULL (PETSC_NULL_SCALAR for Fortran users).
1564 
1565    The user MUST specify either the local or global matrix dimensions
1566    (possibly both).
1567 
1568    Level: intermediate
1569 
1570 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1571 @*/
1572 PetscErrorCode  MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1573 {
1574   PetscErrorCode ierr;
1575   PetscMPIInt    size;
1576 
1577   PetscFunctionBegin;
1578   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1579   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1580   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1581   if (size > 1) {
1582     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1583     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1584     if (data) {  /* user provided data array, so no need to assemble */
1585       ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr);
1586       (*A)->assembled = PETSC_TRUE;
1587     }
1588   } else {
1589     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1590     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1591   }
1592   PetscFunctionReturn(0);
1593 }
1594 
1595 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1596 {
1597   Mat            mat;
1598   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
1599   PetscErrorCode ierr;
1600 
1601   PetscFunctionBegin;
1602   *newmat = 0;
1603   ierr    = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr);
1604   ierr    = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1605   ierr    = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1606   a       = (Mat_MPIDense*)mat->data;
1607 
1608   mat->factortype   = A->factortype;
1609   mat->assembled    = PETSC_TRUE;
1610   mat->preallocated = PETSC_TRUE;
1611 
1612   a->size         = oldmat->size;
1613   a->rank         = oldmat->rank;
1614   mat->insertmode = NOT_SET_VALUES;
1615   a->nvec         = oldmat->nvec;
1616   a->donotstash   = oldmat->donotstash;
1617 
1618   ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr);
1619   ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr);
1620 
1621   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1622   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1623   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1624 
1625   *newmat = mat;
1626   PetscFunctionReturn(0);
1627 }
1628 
1629 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat)
1630 {
1631   PetscErrorCode ierr;
1632   PetscMPIInt    rank,size;
1633   const PetscInt *rowners;
1634   PetscInt       i,m,n,nz,j,mMax;
1635   PetscScalar    *array,*vals,*vals_ptr;
1636   Mat_MPIDense   *a = (Mat_MPIDense*)newmat->data;
1637 
1638   PetscFunctionBegin;
1639   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1640   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1641 
1642   /* determine ownership of rows and columns */
1643   m = (newmat->rmap->n < 0) ? PETSC_DECIDE : newmat->rmap->n;
1644   n = (newmat->cmap->n < 0) ? PETSC_DECIDE : newmat->cmap->n;
1645 
1646   ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr);
1647   if (!a->A) {
1648     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
1649   }
1650   ierr = MatDenseGetArray(newmat,&array);CHKERRQ(ierr);
1651   ierr = MatGetLocalSize(newmat,&m,NULL);CHKERRQ(ierr);
1652   ierr = MatGetOwnershipRanges(newmat,&rowners);CHKERRQ(ierr);
1653   ierr = MPI_Reduce(&m,&mMax,1,MPIU_INT,MPI_MAX,0,comm);CHKERRQ(ierr);
1654   if (!rank) {
1655     ierr = PetscMalloc1(mMax*N,&vals);CHKERRQ(ierr);
1656 
1657     /* read in my part of the matrix numerical values  */
1658     ierr = PetscBinaryRead(fd,vals,m*N,NULL,PETSC_SCALAR);CHKERRQ(ierr);
1659 
1660     /* insert into matrix-by row (this is why cannot directly read into array */
1661     vals_ptr = vals;
1662     for (i=0; i<m; i++) {
1663       for (j=0; j<N; j++) {
1664         array[i + j*m] = *vals_ptr++;
1665       }
1666     }
1667 
1668     /* read in other processors and ship out */
1669     for (i=1; i<size; i++) {
1670       nz   = (rowners[i+1] - rowners[i])*N;
1671       ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr);
1672       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr);
1673     }
1674   } else {
1675     /* receive numeric values */
1676     ierr = PetscMalloc1(m*N,&vals);CHKERRQ(ierr);
1677 
1678     /* receive message of values*/
1679     ierr = MPIULong_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr);
1680 
1681     /* insert into matrix-by row (this is why cannot directly read into array */
1682     vals_ptr = vals;
1683     for (i=0; i<m; i++) {
1684       for (j=0; j<N; j++) {
1685         array[i + j*m] = *vals_ptr++;
1686       }
1687     }
1688   }
1689   ierr = MatDenseRestoreArray(newmat,&array);CHKERRQ(ierr);
1690   ierr = PetscFree(vals);CHKERRQ(ierr);
1691   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1692   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1693   PetscFunctionReturn(0);
1694 }
1695 
1696 static PetscErrorCode MatLoad_MPIDense_Binary(Mat newmat,PetscViewer viewer)
1697 {
1698   Mat_MPIDense   *a;
1699   PetscScalar    *vals,*svals;
1700   MPI_Comm       comm;
1701   MPI_Status     status;
1702   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,n,maxnz;
1703   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1704   PetscInt       *ourlens,*procsnz = 0,jj,*mycols,*smycols;
1705   PetscInt       i,nz,j,rstart,rend;
1706   int            fd;
1707   PetscErrorCode ierr;
1708 
1709   PetscFunctionBegin;
1710   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
1711   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1712   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1713   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1714   if (!rank) {
1715     ierr = PetscBinaryRead(fd,(char*)header,4,NULL,PETSC_INT);CHKERRQ(ierr);
1716     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1717   }
1718   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1719   M    = header[1]; N = header[2]; nz = header[3];
1720 
1721   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
1722   if (newmat->rmap->N < 0) newmat->rmap->N = M;
1723   if (newmat->cmap->N < 0) newmat->cmap->N = N;
1724 
1725   if (newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",M,newmat->rmap->N);
1726   if (newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",N,newmat->cmap->N);
1727 
1728   /*
1729        Handle case where matrix is stored on disk as a dense matrix
1730   */
1731   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1732     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr);
1733     PetscFunctionReturn(0);
1734   }
1735 
1736   /* determine ownership of all rows */
1737   if (newmat->rmap->n < 0) {
1738     ierr = PetscMPIIntCast(M/size + ((M % size) > rank),&m);CHKERRQ(ierr);
1739   } else {
1740     ierr = PetscMPIIntCast(newmat->rmap->n,&m);CHKERRQ(ierr);
1741   }
1742   if (newmat->cmap->n < 0) {
1743     n = PETSC_DECIDE;
1744   } else {
1745     ierr = PetscMPIIntCast(newmat->cmap->n,&n);CHKERRQ(ierr);
1746   }
1747 
1748   ierr       = PetscMalloc1(size+2,&rowners);CHKERRQ(ierr);
1749   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1750   rowners[0] = 0;
1751   for (i=2; i<=size; i++) {
1752     rowners[i] += rowners[i-1];
1753   }
1754   rstart = rowners[rank];
1755   rend   = rowners[rank+1];
1756 
1757   /* distribute row lengths to all processors */
1758   ierr = PetscMalloc1(rend-rstart,&ourlens);CHKERRQ(ierr);
1759   if (!rank) {
1760     ierr = PetscMalloc1(M,&rowlengths);CHKERRQ(ierr);
1761     ierr = PetscBinaryRead(fd,rowlengths,M,NULL,PETSC_INT);CHKERRQ(ierr);
1762     ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr);
1763     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1764     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1765     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1766   } else {
1767     ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1768   }
1769 
1770   if (!rank) {
1771     /* calculate the number of nonzeros on each processor */
1772     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
1773     for (i=0; i<size; i++) {
1774       for (j=rowners[i]; j< rowners[i+1]; j++) {
1775         procsnz[i] += rowlengths[j];
1776       }
1777     }
1778     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1779 
1780     /* determine max buffer needed and allocate it */
1781     maxnz = 0;
1782     for (i=0; i<size; i++) {
1783       maxnz = PetscMax(maxnz,procsnz[i]);
1784     }
1785     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
1786 
1787     /* read in my part of the matrix column indices  */
1788     nz   = procsnz[0];
1789     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
1790     ierr = PetscBinaryRead(fd,mycols,nz,NULL,PETSC_INT);CHKERRQ(ierr);
1791 
1792     /* read in every one elses and ship off */
1793     for (i=1; i<size; i++) {
1794       nz   = procsnz[i];
1795       ierr = PetscBinaryRead(fd,cols,nz,NULL,PETSC_INT);CHKERRQ(ierr);
1796       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1797     }
1798     ierr = PetscFree(cols);CHKERRQ(ierr);
1799   } else {
1800     /* determine buffer space needed for message */
1801     nz = 0;
1802     for (i=0; i<m; i++) {
1803       nz += ourlens[i];
1804     }
1805     ierr = PetscMalloc1(nz+1,&mycols);CHKERRQ(ierr);
1806 
1807     /* receive message of column indices*/
1808     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1809     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1810     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1811   }
1812 
1813   ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr);
1814   a = (Mat_MPIDense*)newmat->data;
1815   if (!a->A) {
1816     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
1817   }
1818 
1819   if (!rank) {
1820     ierr = PetscMalloc1(maxnz,&vals);CHKERRQ(ierr);
1821 
1822     /* read in my part of the matrix numerical values  */
1823     nz   = procsnz[0];
1824     ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr);
1825 
1826     /* insert into matrix */
1827     jj      = rstart;
1828     smycols = mycols;
1829     svals   = vals;
1830     for (i=0; i<m; i++) {
1831       ierr     = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1832       smycols += ourlens[i];
1833       svals   += ourlens[i];
1834       jj++;
1835     }
1836 
1837     /* read in other processors and ship out */
1838     for (i=1; i<size; i++) {
1839       nz   = procsnz[i];
1840       ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr);
1841       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
1842     }
1843     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1844   } else {
1845     /* receive numeric values */
1846     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
1847 
1848     /* receive message of values*/
1849     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
1850     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1851     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1852 
1853     /* insert into matrix */
1854     jj      = rstart;
1855     smycols = mycols;
1856     svals   = vals;
1857     for (i=0; i<m; i++) {
1858       ierr     = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1859       smycols += ourlens[i];
1860       svals   += ourlens[i];
1861       jj++;
1862     }
1863   }
1864   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1865   ierr = PetscFree(vals);CHKERRQ(ierr);
1866   ierr = PetscFree(mycols);CHKERRQ(ierr);
1867   ierr = PetscFree(rowners);CHKERRQ(ierr);
1868 
1869   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1870   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1871   PetscFunctionReturn(0);
1872 }
1873 
1874 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
1875 {
1876   PetscErrorCode ierr;
1877   PetscBool      isbinary;
1878 #if defined(PETSC_HAVE_HDF5)
1879   PetscBool      ishdf5;
1880 #endif
1881 
1882   PetscFunctionBegin;
1883   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
1884   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
1885   /* force binary viewer to load .info file if it has not yet done so */
1886   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1887   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1888 #if defined(PETSC_HAVE_HDF5)
1889   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
1890 #endif
1891   if (isbinary) {
1892     ierr = MatLoad_MPIDense_Binary(newMat,viewer);CHKERRQ(ierr);
1893 #if defined(PETSC_HAVE_HDF5)
1894   } else if (ishdf5) {
1895     ierr = MatLoad_Dense_HDF5(newMat,viewer);CHKERRQ(ierr);
1896 #endif
1897   } else SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
1898   PetscFunctionReturn(0);
1899 }
1900 
1901 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool  *flag)
1902 {
1903   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
1904   Mat            a,b;
1905   PetscBool      flg;
1906   PetscErrorCode ierr;
1907 
1908   PetscFunctionBegin;
1909   a    = matA->A;
1910   b    = matB->A;
1911   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
1912   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1913   PetscFunctionReturn(0);
1914 }
1915 
1916 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(Mat A)
1917 {
1918   PetscErrorCode        ierr;
1919   Mat_MPIDense          *a = (Mat_MPIDense*)A->data;
1920   Mat_TransMatMultDense *atb = a->atbdense;
1921 
1922   PetscFunctionBegin;
1923   ierr = PetscFree3(atb->sendbuf,atb->atbarray,atb->recvcounts);CHKERRQ(ierr);
1924   ierr = (atb->destroy)(A);CHKERRQ(ierr);
1925   ierr = PetscFree(atb);CHKERRQ(ierr);
1926   PetscFunctionReturn(0);
1927 }
1928 
1929 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(Mat A)
1930 {
1931   PetscErrorCode        ierr;
1932   Mat_MPIDense          *a = (Mat_MPIDense*)A->data;
1933   Mat_MatTransMultDense *abt = a->abtdense;
1934 
1935   PetscFunctionBegin;
1936   ierr = PetscFree2(abt->buf[0],abt->buf[1]);CHKERRQ(ierr);
1937   ierr = PetscFree2(abt->recvcounts,abt->recvdispls);CHKERRQ(ierr);
1938   ierr = (abt->destroy)(A);CHKERRQ(ierr);
1939   ierr = PetscFree(abt);CHKERRQ(ierr);
1940   PetscFunctionReturn(0);
1941 }
1942 
1943 PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1944 {
1945   Mat_MPIDense   *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
1946   Mat_SeqDense   *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data;
1947   Mat_TransMatMultDense *atb = c->atbdense;
1948   PetscErrorCode ierr;
1949   MPI_Comm       comm;
1950   PetscMPIInt    rank,size,*recvcounts=atb->recvcounts;
1951   PetscScalar    *carray,*atbarray=atb->atbarray,*sendbuf=atb->sendbuf;
1952   PetscInt       i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j;
1953   PetscScalar    _DOne=1.0,_DZero=0.0;
1954   PetscBLASInt   am,an,bn,aN;
1955   const PetscInt *ranges;
1956 
1957   PetscFunctionBegin;
1958   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1959   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1960   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1961 
1962   /* compute atbarray = aseq^T * bseq */
1963   ierr = PetscBLASIntCast(a->A->cmap->n,&an);CHKERRQ(ierr);
1964   ierr = PetscBLASIntCast(b->A->cmap->n,&bn);CHKERRQ(ierr);
1965   ierr = PetscBLASIntCast(a->A->rmap->n,&am);CHKERRQ(ierr);
1966   ierr = PetscBLASIntCast(A->cmap->N,&aN);CHKERRQ(ierr);
1967   PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&an,&bn,&am,&_DOne,aseq->v,&aseq->lda,bseq->v,&bseq->lda,&_DZero,atbarray,&aN));
1968 
1969   ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr);
1970   for (i=0; i<size; i++) recvcounts[i] = (ranges[i+1] - ranges[i])*cN;
1971 
1972   /* arrange atbarray into sendbuf */
1973   k = 0;
1974   for (proc=0; proc<size; proc++) {
1975     for (j=0; j<cN; j++) {
1976       for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM];
1977     }
1978   }
1979   /* sum all atbarray to local values of C */
1980   ierr = MatDenseGetArray(c->A,&carray);CHKERRQ(ierr);
1981   ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRQ(ierr);
1982   ierr = MatDenseRestoreArray(c->A,&carray);CHKERRQ(ierr);
1983   PetscFunctionReturn(0);
1984 }
1985 
1986 PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1987 {
1988   PetscErrorCode        ierr;
1989   Mat                   Cdense;
1990   MPI_Comm              comm;
1991   PetscMPIInt           size;
1992   PetscInt              cm=A->cmap->n,cM,cN=B->cmap->N;
1993   Mat_MPIDense          *c;
1994   Mat_TransMatMultDense *atb;
1995 
1996   PetscFunctionBegin;
1997   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1998   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
1999     SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2000   }
2001 
2002   /* create matrix product Cdense */
2003   ierr = MatCreate(comm,&Cdense);CHKERRQ(ierr);
2004   ierr = MatSetSizes(Cdense,cm,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2005   ierr = MatSetType(Cdense,MATMPIDENSE);CHKERRQ(ierr);
2006   ierr = MatMPIDenseSetPreallocation(Cdense,NULL);CHKERRQ(ierr);
2007   ierr = MatAssemblyBegin(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2008   ierr = MatAssemblyEnd(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2009   *C   = Cdense;
2010 
2011   /* create data structure for reuse Cdense */
2012   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2013   ierr = PetscNew(&atb);CHKERRQ(ierr);
2014   cM = Cdense->rmap->N;
2015   ierr = PetscMalloc3(cM*cN,&atb->sendbuf,cM*cN,&atb->atbarray,size,&atb->recvcounts);CHKERRQ(ierr);
2016 
2017   c                    = (Mat_MPIDense*)Cdense->data;
2018   c->atbdense          = atb;
2019   atb->destroy         = Cdense->ops->destroy;
2020   Cdense->ops->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
2021   PetscFunctionReturn(0);
2022 }
2023 
2024 PetscErrorCode MatTransposeMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
2025 {
2026   PetscErrorCode ierr;
2027 
2028   PetscFunctionBegin;
2029   if (scall == MAT_INITIAL_MATRIX) {
2030     ierr = MatTransposeMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr);
2031   }
2032   ierr = MatTransposeMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr);
2033   PetscFunctionReturn(0);
2034 }
2035 
2036 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat *C)
2037 {
2038   PetscErrorCode        ierr;
2039   Mat                   Cdense;
2040   MPI_Comm              comm;
2041   PetscMPIInt           i, size;
2042   PetscInt              maxRows, bufsiz;
2043   Mat_MPIDense          *c;
2044   PetscMPIInt           tag;
2045   const char            *algTypes[2] = {"allgatherv","cyclic"};
2046   PetscInt              alg, nalg = 2;
2047   Mat_MatTransMultDense *abt;
2048 
2049   PetscFunctionBegin;
2050   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2051   alg = 0; /* default is allgatherv */
2052   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MatMatTransposeMult","Mat");CHKERRQ(ierr);
2053   ierr = PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[0],&alg,NULL);CHKERRQ(ierr);
2054   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2055   if (A->cmap->N != B->cmap->N) {
2056     SETERRQ2(comm,PETSC_ERR_ARG_SIZ,"Matrix global column dimensions are incompatible, A (%D) != B (%D)",A->cmap->N,B->cmap->N);
2057   }
2058 
2059   /* create matrix product Cdense */
2060   ierr = MatCreate(comm,&Cdense);CHKERRQ(ierr);
2061   ierr = MatSetSizes(Cdense,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N);CHKERRQ(ierr);
2062   ierr = MatSetType(Cdense,MATMPIDENSE);CHKERRQ(ierr);
2063   ierr = MatMPIDenseSetPreallocation(Cdense,NULL);CHKERRQ(ierr);
2064   ierr = PetscObjectGetNewTag((PetscObject)Cdense, &tag);CHKERRQ(ierr);
2065   ierr = MatAssemblyBegin(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2066   ierr = MatAssemblyEnd(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2067   *C   = Cdense;
2068 
2069   /* create data structure for reuse Cdense */
2070   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2071   ierr = PetscNew(&abt);CHKERRQ(ierr);
2072   abt->tag = tag;
2073   abt->alg = alg;
2074   switch (alg) {
2075   case 1:
2076     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i]));
2077     bufsiz = A->cmap->N * maxRows;
2078     ierr = PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1]));CHKERRQ(ierr);
2079     break;
2080   default:
2081     ierr = PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]));CHKERRQ(ierr);
2082     ierr = PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls));CHKERRQ(ierr);
2083     for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N;
2084     for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i];
2085     break;
2086   }
2087 
2088   c                    = (Mat_MPIDense*)Cdense->data;
2089   c->abtdense          = abt;
2090   abt->destroy         = Cdense->ops->destroy;
2091   Cdense->ops->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
2092   PetscFunctionReturn(0);
2093 }
2094 
2095 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
2096 {
2097   Mat_MPIDense   *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2098   Mat_SeqDense   *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data;
2099   Mat_SeqDense   *cseq=(Mat_SeqDense*)(c->A)->data;
2100   Mat_MatTransMultDense *abt = c->abtdense;
2101   PetscErrorCode ierr;
2102   MPI_Comm       comm;
2103   PetscMPIInt    rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom;
2104   PetscScalar    *sendbuf, *recvbuf=0, *carray;
2105   PetscInt       i,cK=A->cmap->N,k,j,bn;
2106   PetscScalar    _DOne=1.0,_DZero=0.0;
2107   PetscBLASInt   cm, cn, ck;
2108   MPI_Request    reqs[2];
2109   const PetscInt *ranges;
2110 
2111   PetscFunctionBegin;
2112   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2113   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2114   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2115 
2116   ierr = MatGetOwnershipRanges(B,&ranges);CHKERRQ(ierr);
2117   bn = B->rmap->n;
2118   if (bseq->lda == bn) {
2119     sendbuf = bseq->v;
2120   } else {
2121     sendbuf = abt->buf[0];
2122     for (k = 0, i = 0; i < cK; i++) {
2123       for (j = 0; j < bn; j++, k++) {
2124         sendbuf[k] = bseq->v[i * bseq->lda + j];
2125       }
2126     }
2127   }
2128   if (size > 1) {
2129     sendto = (rank + size - 1) % size;
2130     recvfrom = (rank + size + 1) % size;
2131   } else {
2132     sendto = recvfrom = 0;
2133   }
2134   ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr);
2135   ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr);
2136   recvisfrom = rank;
2137   for (i = 0; i < size; i++) {
2138     /* we have finished receiving in sending, bufs can be read/modified */
2139     PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2140     PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
2141 
2142     if (nextrecvisfrom != rank) {
2143       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2144       sendsiz = cK * bn;
2145       recvsiz = cK * nextbn;
2146       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
2147       ierr = MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]);CHKERRQ(ierr);
2148       ierr = MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]);CHKERRQ(ierr);
2149     }
2150 
2151     /* local aseq * sendbuf^T */
2152     ierr = PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn);CHKERRQ(ierr);
2153     carray = &cseq->v[cseq->lda * ranges[recvisfrom]];
2154     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,aseq->v,&aseq->lda,sendbuf,&cn,&_DZero,carray,&cseq->lda));
2155 
2156     if (nextrecvisfrom != rank) {
2157       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
2158       ierr = MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2159     }
2160     bn = nextbn;
2161     recvisfrom = nextrecvisfrom;
2162     sendbuf = recvbuf;
2163   }
2164   PetscFunctionReturn(0);
2165 }
2166 
2167 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
2168 {
2169   Mat_MPIDense   *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2170   Mat_SeqDense   *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data;
2171   Mat_SeqDense   *cseq=(Mat_SeqDense*)(c->A)->data;
2172   Mat_MatTransMultDense *abt = c->abtdense;
2173   PetscErrorCode ierr;
2174   MPI_Comm       comm;
2175   PetscMPIInt    rank,size;
2176   PetscScalar    *sendbuf, *recvbuf;
2177   PetscInt       i,cK=A->cmap->N,k,j,bn;
2178   PetscScalar    _DOne=1.0,_DZero=0.0;
2179   PetscBLASInt   cm, cn, ck;
2180 
2181   PetscFunctionBegin;
2182   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2183   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2184   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2185 
2186   /* copy transpose of B into buf[0] */
2187   bn      = B->rmap->n;
2188   sendbuf = abt->buf[0];
2189   recvbuf = abt->buf[1];
2190   for (k = 0, j = 0; j < bn; j++) {
2191     for (i = 0; i < cK; i++, k++) {
2192       sendbuf[k] = bseq->v[i * bseq->lda + j];
2193     }
2194   }
2195   ierr = MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm);CHKERRQ(ierr);
2196   ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr);
2197   ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr);
2198   ierr = PetscBLASIntCast(c->A->cmap->n,&cn);CHKERRQ(ierr);
2199   PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,aseq->v,&aseq->lda,recvbuf,&ck,&_DZero,cseq->v,&cseq->lda));
2200   PetscFunctionReturn(0);
2201 }
2202 
2203 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2204 {
2205   Mat_MPIDense   *c=(Mat_MPIDense*)C->data;
2206   Mat_MatTransMultDense *abt = c->abtdense;
2207   PetscErrorCode ierr;
2208 
2209   PetscFunctionBegin;
2210   switch (abt->alg) {
2211   case 1:
2212     ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C);CHKERRQ(ierr);
2213     break;
2214   default:
2215     ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C);CHKERRQ(ierr);
2216     break;
2217   }
2218   PetscFunctionReturn(0);
2219 }
2220 
2221 static PetscErrorCode MatMatTransposeMult_MPIDense_MPIDense(Mat A,Mat B, MatReuse scall, PetscReal fill, Mat *C)
2222 {
2223   PetscErrorCode ierr;
2224 
2225   PetscFunctionBegin;
2226   if (scall == MAT_INITIAL_MATRIX) {
2227     ierr = MatMatTransposeMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr);
2228   }
2229   ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr);
2230   PetscFunctionReturn(0);
2231 }
2232 
2233 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(Mat A)
2234 {
2235   PetscErrorCode   ierr;
2236   Mat_MPIDense     *a = (Mat_MPIDense*)A->data;
2237   Mat_MatMultDense *ab = a->abdense;
2238 
2239   PetscFunctionBegin;
2240   ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr);
2241   ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr);
2242   ierr = MatDestroy(&ab->Be);CHKERRQ(ierr);
2243 
2244   ierr = (ab->destroy)(A);CHKERRQ(ierr);
2245   ierr = PetscFree(ab);CHKERRQ(ierr);
2246   PetscFunctionReturn(0);
2247 }
2248 
2249 #if defined(PETSC_HAVE_ELEMENTAL)
2250 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2251 {
2252   PetscErrorCode   ierr;
2253   Mat_MPIDense     *c=(Mat_MPIDense*)C->data;
2254   Mat_MatMultDense *ab=c->abdense;
2255 
2256   PetscFunctionBegin;
2257   ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr);
2258   ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr);
2259   ierr = MatMatMultNumeric(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr);
2260   ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
2261   PetscFunctionReturn(0);
2262 }
2263 
2264 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
2265 {
2266   PetscErrorCode   ierr;
2267   Mat              Ae,Be,Ce;
2268   Mat_MPIDense     *c;
2269   Mat_MatMultDense *ab;
2270 
2271   PetscFunctionBegin;
2272   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
2273     SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2274   }
2275 
2276   /* convert A and B to Elemental matrices Ae and Be */
2277   ierr = MatConvert(A,MATELEMENTAL,MAT_INITIAL_MATRIX, &Ae);CHKERRQ(ierr);
2278   ierr = MatConvert(B,MATELEMENTAL,MAT_INITIAL_MATRIX, &Be);CHKERRQ(ierr);
2279 
2280   /* Ce = Ae*Be */
2281   ierr = MatMatMultSymbolic(Ae,Be,fill,&Ce);CHKERRQ(ierr);
2282   ierr = MatMatMultNumeric(Ae,Be,Ce);CHKERRQ(ierr);
2283 
2284   /* convert Ce to C */
2285   ierr = MatConvert(Ce,MATMPIDENSE,MAT_INITIAL_MATRIX,C);CHKERRQ(ierr);
2286 
2287   /* create data structure for reuse Cdense */
2288   ierr = PetscNew(&ab);CHKERRQ(ierr);
2289   c                  = (Mat_MPIDense*)(*C)->data;
2290   c->abdense         = ab;
2291 
2292   ab->Ae             = Ae;
2293   ab->Be             = Be;
2294   ab->Ce             = Ce;
2295   ab->destroy        = (*C)->ops->destroy;
2296   (*C)->ops->destroy        = MatDestroy_MatMatMult_MPIDense_MPIDense;
2297   (*C)->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
2298   PetscFunctionReturn(0);
2299 }
2300 
2301 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
2302 {
2303   PetscErrorCode ierr;
2304 
2305   PetscFunctionBegin;
2306   if (scall == MAT_INITIAL_MATRIX) { /* simbolic product includes numeric product */
2307     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
2308     ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr);
2309     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
2310   } else {
2311     ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
2312     ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr);
2313     ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
2314   }
2315   PetscFunctionReturn(0);
2316 }
2317 #endif
2318 
2319