xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision bfcb38ea38335faa6e7f8d97f6bc6ce9aa2a1dd1)
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4 */
5 
6 
7 #include <../src/mat/impls/dense/mpi/mpidense.h>    /*I   "petscmat.h"  I*/
8 #include <../src/mat/impls/aij/mpi/mpiaij.h>
9 #include <petscblaslapack.h>
10 
11 /*@
12 
13       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
14               matrix that represents the operator. For sequential matrices it returns itself.
15 
16     Input Parameter:
17 .      A - the Seq or MPI dense matrix
18 
19     Output Parameter:
20 .      B - the inner matrix
21 
22     Level: intermediate
23 
24 @*/
25 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
26 {
27   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
28   PetscErrorCode ierr;
29   PetscBool      flg;
30 
31   PetscFunctionBegin;
32   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
33   if (flg) *B = mat->A;
34   else *B = A;
35   PetscFunctionReturn(0);
36 }
37 
38 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
39 {
40   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
41   PetscErrorCode ierr;
42   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
43 
44   PetscFunctionBegin;
45   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
46   lrow = row - rstart;
47   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
48   PetscFunctionReturn(0);
49 }
50 
51 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
52 {
53   PetscErrorCode ierr;
54 
55   PetscFunctionBegin;
56   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
57   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
58   PetscFunctionReturn(0);
59 }
60 
61 PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
62 {
63   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
64   PetscErrorCode ierr;
65   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
66   PetscScalar    *array;
67   MPI_Comm       comm;
68   Mat            B;
69 
70   PetscFunctionBegin;
71   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
72 
73   ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr);
74   if (!B) {
75     ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
76     ierr = MatCreate(comm,&B);CHKERRQ(ierr);
77     ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr);
78     ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
79     ierr = MatDenseGetArray(mdn->A,&array);CHKERRQ(ierr);
80     ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr);
81     ierr = MatDenseRestoreArray(mdn->A,&array);CHKERRQ(ierr);
82     ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
83     ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
84     ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr);
85     *a   = B;
86     ierr = MatDestroy(&B);CHKERRQ(ierr);
87   } else *a = B;
88   PetscFunctionReturn(0);
89 }
90 
91 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
92 {
93   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
94   PetscErrorCode ierr;
95   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
96   PetscBool      roworiented = A->roworiented;
97 
98   PetscFunctionBegin;
99   for (i=0; i<m; i++) {
100     if (idxm[i] < 0) continue;
101     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
102     if (idxm[i] >= rstart && idxm[i] < rend) {
103       row = idxm[i] - rstart;
104       if (roworiented) {
105         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
106       } else {
107         for (j=0; j<n; j++) {
108           if (idxn[j] < 0) continue;
109           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
110           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
111         }
112       }
113     } else if (!A->donotstash) {
114       mat->assembled = PETSC_FALSE;
115       if (roworiented) {
116         ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
117       } else {
118         ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
119       }
120     }
121   }
122   PetscFunctionReturn(0);
123 }
124 
125 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
126 {
127   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
128   PetscErrorCode ierr;
129   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
130 
131   PetscFunctionBegin;
132   for (i=0; i<m; i++) {
133     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
134     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
135     if (idxm[i] >= rstart && idxm[i] < rend) {
136       row = idxm[i] - rstart;
137       for (j=0; j<n; j++) {
138         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
139         if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
140         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
141       }
142     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
143   }
144   PetscFunctionReturn(0);
145 }
146 
147 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda)
148 {
149   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
150   PetscErrorCode ierr;
151 
152   PetscFunctionBegin;
153   ierr = MatDenseGetLDA(a->A,lda);CHKERRQ(ierr);
154   PetscFunctionReturn(0);
155 }
156 
157 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[])
158 {
159   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
160   PetscErrorCode ierr;
161 
162   PetscFunctionBegin;
163   ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr);
164   PetscFunctionReturn(0);
165 }
166 
167 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar *array[])
168 {
169   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
170   PetscErrorCode ierr;
171 
172   PetscFunctionBegin;
173   ierr = MatDenseGetArrayRead(a->A,array);CHKERRQ(ierr);
174   PetscFunctionReturn(0);
175 }
176 
177 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar array[])
178 {
179   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
180   PetscErrorCode ierr;
181 
182   PetscFunctionBegin;
183   ierr = MatDensePlaceArray(a->A,array);CHKERRQ(ierr);
184   PetscFunctionReturn(0);
185 }
186 
187 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
188 {
189   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
190   PetscErrorCode ierr;
191 
192   PetscFunctionBegin;
193   ierr = MatDenseResetArray(a->A);CHKERRQ(ierr);
194   PetscFunctionReturn(0);
195 }
196 
197 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
198 {
199   Mat_MPIDense   *mat  = (Mat_MPIDense*)A->data,*newmatd;
200   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
201   PetscErrorCode ierr;
202   PetscInt       i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
203   const PetscInt *irow,*icol;
204   PetscScalar    *av,*bv,*v = lmat->v;
205   Mat            newmat;
206   IS             iscol_local;
207   MPI_Comm       comm_is,comm_mat;
208 
209   PetscFunctionBegin;
210   ierr = PetscObjectGetComm((PetscObject)A,&comm_mat);CHKERRQ(ierr);
211   ierr = PetscObjectGetComm((PetscObject)iscol,&comm_is);CHKERRQ(ierr);
212   if (comm_mat != comm_is) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator");
213 
214   ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
215   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
216   ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr);
217   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
218   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
219   ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */
220 
221   /* No parallel redistribution currently supported! Should really check each index set
222      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
223      original matrix! */
224 
225   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
226   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
227 
228   /* Check submatrix call */
229   if (scall == MAT_REUSE_MATRIX) {
230     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
231     /* Really need to test rows and column sizes! */
232     newmat = *B;
233   } else {
234     /* Create and fill new matrix */
235     ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr);
236     ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
237     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
238     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
239   }
240 
241   /* Now extract the data pointers and do the copy, column at a time */
242   newmatd = (Mat_MPIDense*)newmat->data;
243   bv      = ((Mat_SeqDense*)newmatd->A->data)->v;
244 
245   for (i=0; i<Ncols; i++) {
246     av = v + ((Mat_SeqDense*)mat->A->data)->lda*icol[i];
247     for (j=0; j<nrows; j++) {
248       *bv++ = av[irow[j] - rstart];
249     }
250   }
251 
252   /* Assemble the matrices so that the correct flags are set */
253   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
254   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
255 
256   /* Free work space */
257   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
258   ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr);
259   ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
260   *B   = newmat;
261   PetscFunctionReturn(0);
262 }
263 
264 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[])
265 {
266   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
267   PetscErrorCode ierr;
268 
269   PetscFunctionBegin;
270   ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr);
271   PetscFunctionReturn(0);
272 }
273 
274 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar *array[])
275 {
276   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
277   PetscErrorCode ierr;
278 
279   PetscFunctionBegin;
280   ierr = MatDenseRestoreArrayRead(a->A,array);CHKERRQ(ierr);
281   PetscFunctionReturn(0);
282 }
283 
284 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
285 {
286   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
287   MPI_Comm       comm;
288   PetscErrorCode ierr;
289   PetscInt       nstash,reallocs;
290   InsertMode     addv;
291 
292   PetscFunctionBegin;
293   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
294   /* make sure all processors are either in INSERTMODE or ADDMODE */
295   ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,comm);CHKERRQ(ierr);
296   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
297   mat->insertmode = addv; /* in case this processor had no cache */
298 
299   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
300   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
301   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
302   PetscFunctionReturn(0);
303 }
304 
305 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
306 {
307   Mat_MPIDense   *mdn=(Mat_MPIDense*)mat->data;
308   PetscErrorCode ierr;
309   PetscInt       i,*row,*col,flg,j,rstart,ncols;
310   PetscMPIInt    n;
311   PetscScalar    *val;
312 
313   PetscFunctionBegin;
314   /*  wait on receives */
315   while (1) {
316     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
317     if (!flg) break;
318 
319     for (i=0; i<n;) {
320       /* Now identify the consecutive vals belonging to the same row */
321       for (j=i,rstart=row[j]; j<n; j++) {
322         if (row[j] != rstart) break;
323       }
324       if (j < n) ncols = j-i;
325       else       ncols = n-i;
326       /* Now assemble all these values with a single function call */
327       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
328       i    = j;
329     }
330   }
331   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
332 
333   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
334   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
335 
336   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
337     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
338   }
339   PetscFunctionReturn(0);
340 }
341 
342 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
343 {
344   PetscErrorCode ierr;
345   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
346 
347   PetscFunctionBegin;
348   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
349   PetscFunctionReturn(0);
350 }
351 
352 /* the code does not do the diagonal entries correctly unless the
353    matrix is square and the column and row owerships are identical.
354    This is a BUG. The only way to fix it seems to be to access
355    mdn->A and mdn->B directly and not through the MatZeroRows()
356    routine.
357 */
358 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
359 {
360   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
361   PetscErrorCode    ierr;
362   PetscInt          i,*owners = A->rmap->range;
363   PetscInt          *sizes,j,idx,nsends;
364   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
365   PetscInt          *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source;
366   PetscInt          *lens,*lrows,*values;
367   PetscMPIInt       n,imdex,rank = l->rank,size = l->size;
368   MPI_Comm          comm;
369   MPI_Request       *send_waits,*recv_waits;
370   MPI_Status        recv_status,*send_status;
371   PetscBool         found;
372   const PetscScalar *xx;
373   PetscScalar       *bb;
374 
375   PetscFunctionBegin;
376   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
377   if (A->rmap->N != A->cmap->N) SETERRQ(comm,PETSC_ERR_SUP,"Only handles square matrices");
378   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership");
379   /*  first count number of contributors to each processor */
380   ierr = PetscCalloc1(2*size,&sizes);CHKERRQ(ierr);
381   ierr = PetscMalloc1(N+1,&owner);CHKERRQ(ierr);  /* see note*/
382   for (i=0; i<N; i++) {
383     idx   = rows[i];
384     found = PETSC_FALSE;
385     for (j=0; j<size; j++) {
386       if (idx >= owners[j] && idx < owners[j+1]) {
387         sizes[2*j]++; sizes[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
388       }
389     }
390     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
391   }
392   nsends = 0;
393   for (i=0; i<size; i++) nsends += sizes[2*i+1];
394 
395   /* inform other processors of number of messages and max length*/
396   ierr = PetscMaxSum(comm,sizes,&nmax,&nrecvs);CHKERRQ(ierr);
397 
398   /* post receives:   */
399   ierr = PetscMalloc1((nrecvs+1)*(nmax+1),&rvalues);CHKERRQ(ierr);
400   ierr = PetscMalloc1(nrecvs+1,&recv_waits);CHKERRQ(ierr);
401   for (i=0; i<nrecvs; i++) {
402     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
403   }
404 
405   /* do sends:
406       1) starts[i] gives the starting index in svalues for stuff going to
407          the ith processor
408   */
409   ierr = PetscMalloc1(N+1,&svalues);CHKERRQ(ierr);
410   ierr = PetscMalloc1(nsends+1,&send_waits);CHKERRQ(ierr);
411   ierr = PetscMalloc1(size+1,&starts);CHKERRQ(ierr);
412 
413   starts[0] = 0;
414   for (i=1; i<size; i++) starts[i] = starts[i-1] + sizes[2*i-2];
415   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
416 
417   starts[0] = 0;
418   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + sizes[2*i-2];
419   count = 0;
420   for (i=0; i<size; i++) {
421     if (sizes[2*i+1]) {
422       ierr = MPI_Isend(svalues+starts[i],sizes[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
423     }
424   }
425   ierr = PetscFree(starts);CHKERRQ(ierr);
426 
427   base = owners[rank];
428 
429   /*  wait on receives */
430   ierr  = PetscMalloc2(nrecvs,&lens,nrecvs,&source);CHKERRQ(ierr);
431   count = nrecvs;
432   slen  = 0;
433   while (count) {
434     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
435     /* unpack receives into our local space */
436     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
437 
438     source[imdex] = recv_status.MPI_SOURCE;
439     lens[imdex]   = n;
440     slen += n;
441     count--;
442   }
443   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
444 
445   /* move the data into the send scatter */
446   ierr  = PetscMalloc1(slen+1,&lrows);CHKERRQ(ierr);
447   count = 0;
448   for (i=0; i<nrecvs; i++) {
449     values = rvalues + i*nmax;
450     for (j=0; j<lens[i]; j++) {
451       lrows[count++] = values[j] - base;
452     }
453   }
454   ierr = PetscFree(rvalues);CHKERRQ(ierr);
455   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
456   ierr = PetscFree(owner);CHKERRQ(ierr);
457   ierr = PetscFree(sizes);CHKERRQ(ierr);
458 
459   /* fix right hand side if needed */
460   if (x && b) {
461     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
462     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
463     for (i=0; i<slen; i++) {
464       bb[lrows[i]] = diag*xx[lrows[i]];
465     }
466     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
467     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
468   }
469 
470   /* actually zap the local rows */
471   ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
472   if (diag != 0.0) {
473     Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data;
474     PetscInt     m   = ll->lda, i;
475 
476     for (i=0; i<slen; i++) {
477       ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag;
478     }
479   }
480   ierr = PetscFree(lrows);CHKERRQ(ierr);
481 
482   /* wait on sends */
483   if (nsends) {
484     ierr = PetscMalloc1(nsends,&send_status);CHKERRQ(ierr);
485     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
486     ierr = PetscFree(send_status);CHKERRQ(ierr);
487   }
488   ierr = PetscFree(send_waits);CHKERRQ(ierr);
489   ierr = PetscFree(svalues);CHKERRQ(ierr);
490   PetscFunctionReturn(0);
491 }
492 
493 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec);
494 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec);
495 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec);
496 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec);
497 
498 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
499 {
500   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
501   PetscErrorCode ierr;
502 
503   PetscFunctionBegin;
504   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
505   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
506   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
507   PetscFunctionReturn(0);
508 }
509 
510 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
511 {
512   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
513   PetscErrorCode ierr;
514 
515   PetscFunctionBegin;
516   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
517   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
518   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
519   PetscFunctionReturn(0);
520 }
521 
522 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
523 {
524   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
525   PetscErrorCode ierr;
526   PetscScalar    zero = 0.0;
527 
528   PetscFunctionBegin;
529   ierr = VecSet(yy,zero);CHKERRQ(ierr);
530   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
531   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
532   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
533   PetscFunctionReturn(0);
534 }
535 
536 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
537 {
538   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
539   PetscErrorCode ierr;
540 
541   PetscFunctionBegin;
542   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
543   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
544   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
545   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
546   PetscFunctionReturn(0);
547 }
548 
549 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
550 {
551   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
552   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
553   PetscErrorCode ierr;
554   PetscInt       len,i,n,m = A->rmap->n,radd;
555   PetscScalar    *x,zero = 0.0;
556 
557   PetscFunctionBegin;
558   ierr = VecSet(v,zero);CHKERRQ(ierr);
559   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
560   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
561   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
562   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
563   radd = A->rmap->rstart*m;
564   for (i=0; i<len; i++) {
565     x[i] = aloc->v[radd + i*m + i];
566   }
567   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
568   PetscFunctionReturn(0);
569 }
570 
571 PetscErrorCode MatDestroy_MPIDense(Mat mat)
572 {
573   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
574   PetscErrorCode ierr;
575 
576   PetscFunctionBegin;
577 #if defined(PETSC_USE_LOG)
578   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
579 #endif
580   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
581   ierr = MatDestroy(&mdn->A);CHKERRQ(ierr);
582   ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr);
583   ierr = VecScatterDestroy(&mdn->Mvctx);CHKERRQ(ierr);
584 
585   ierr = PetscFree(mat->data);CHKERRQ(ierr);
586   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
587 
588   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL);CHKERRQ(ierr);
589   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr);
590   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr);
591   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL);CHKERRQ(ierr);
592   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL);CHKERRQ(ierr);
593   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);CHKERRQ(ierr);
594   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);CHKERRQ(ierr);
595 #if defined(PETSC_HAVE_ELEMENTAL)
596   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr);
597 #endif
598   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr);
599   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
600   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
601   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
602   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
603   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
604   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
605   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL);CHKERRQ(ierr);
606   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL);CHKERRQ(ierr);
607   PetscFunctionReturn(0);
608 }
609 
610 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
611 {
612   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
613   PetscErrorCode    ierr;
614   PetscViewerFormat format;
615   int               fd;
616   PetscInt          header[4],mmax,N = mat->cmap->N,i,j,m,k;
617   PetscMPIInt       rank,tag  = ((PetscObject)viewer)->tag,size;
618   PetscScalar       *work,*v,*vv;
619   Mat_SeqDense      *a = (Mat_SeqDense*)mdn->A->data;
620 
621   PetscFunctionBegin;
622   if (mdn->size == 1) {
623     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
624   } else {
625     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
626     ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
627     ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
628 
629     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
630     if (format == PETSC_VIEWER_NATIVE) {
631 
632       if (!rank) {
633         /* store the matrix as a dense matrix */
634         header[0] = MAT_FILE_CLASSID;
635         header[1] = mat->rmap->N;
636         header[2] = N;
637         header[3] = MATRIX_BINARY_FORMAT_DENSE;
638         ierr      = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
639 
640         /* get largest work array needed for transposing array */
641         mmax = mat->rmap->n;
642         for (i=1; i<size; i++) {
643           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
644         }
645         ierr = PetscMalloc1(mmax*N,&work);CHKERRQ(ierr);
646 
647         /* write out local array, by rows */
648         m = mat->rmap->n;
649         v = a->v;
650         for (j=0; j<N; j++) {
651           for (i=0; i<m; i++) {
652             work[j + i*N] = *v++;
653           }
654         }
655         ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
656         /* get largest work array to receive messages from other processes, excludes process zero */
657         mmax = 0;
658         for (i=1; i<size; i++) {
659           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
660         }
661         ierr = PetscMalloc1(mmax*N,&vv);CHKERRQ(ierr);
662         for (k = 1; k < size; k++) {
663           v    = vv;
664           m    = mat->rmap->range[k+1] - mat->rmap->range[k];
665           ierr = MPIULong_Recv(v,m*N,MPIU_SCALAR,k,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
666 
667           for (j = 0; j < N; j++) {
668             for (i = 0; i < m; i++) {
669               work[j + i*N] = *v++;
670             }
671           }
672           ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
673         }
674         ierr = PetscFree(work);CHKERRQ(ierr);
675         ierr = PetscFree(vv);CHKERRQ(ierr);
676       } else {
677         ierr = MPIULong_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
678       }
679     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerPushFormat(viewer,PETSC_VIEWER_NATIVE)");
680   }
681   PetscFunctionReturn(0);
682 }
683 
684 extern PetscErrorCode MatView_SeqDense(Mat,PetscViewer);
685 #include <petscdraw.h>
686 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
687 {
688   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
689   PetscErrorCode    ierr;
690   PetscMPIInt       rank = mdn->rank;
691   PetscViewerType   vtype;
692   PetscBool         iascii,isdraw;
693   PetscViewer       sviewer;
694   PetscViewerFormat format;
695 
696   PetscFunctionBegin;
697   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
698   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
699   if (iascii) {
700     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
701     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
702     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
703       MatInfo info;
704       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
705       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
706       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
707       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
708       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
709       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
710       PetscFunctionReturn(0);
711     } else if (format == PETSC_VIEWER_ASCII_INFO) {
712       PetscFunctionReturn(0);
713     }
714   } else if (isdraw) {
715     PetscDraw draw;
716     PetscBool isnull;
717 
718     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
719     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
720     if (isnull) PetscFunctionReturn(0);
721   }
722 
723   {
724     /* assemble the entire matrix onto first processor. */
725     Mat         A;
726     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
727     PetscInt    *cols;
728     PetscScalar *vals;
729 
730     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
731     if (!rank) {
732       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
733     } else {
734       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
735     }
736     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
737     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
738     ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr);
739     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
740 
741     /* Copy the matrix ... This isn't the most efficient means,
742        but it's quick for now */
743     A->insertmode = INSERT_VALUES;
744 
745     row = mat->rmap->rstart;
746     m   = mdn->A->rmap->n;
747     for (i=0; i<m; i++) {
748       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
749       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
750       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
751       row++;
752     }
753 
754     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
755     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
756     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
757     if (!rank) {
758       ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
759       ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
760     }
761     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
762     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
763     ierr = MatDestroy(&A);CHKERRQ(ierr);
764   }
765   PetscFunctionReturn(0);
766 }
767 
768 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
769 {
770   PetscErrorCode ierr;
771   PetscBool      iascii,isbinary,isdraw,issocket;
772 
773   PetscFunctionBegin;
774   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
775   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
776   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
777   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
778 
779   if (iascii || issocket || isdraw) {
780     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
781   } else if (isbinary) {
782     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
783   }
784   PetscFunctionReturn(0);
785 }
786 
787 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
788 {
789   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
790   Mat            mdn  = mat->A;
791   PetscErrorCode ierr;
792   PetscReal      isend[5],irecv[5];
793 
794   PetscFunctionBegin;
795   info->block_size = 1.0;
796 
797   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
798 
799   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
800   isend[3] = info->memory;  isend[4] = info->mallocs;
801   if (flag == MAT_LOCAL) {
802     info->nz_used      = isend[0];
803     info->nz_allocated = isend[1];
804     info->nz_unneeded  = isend[2];
805     info->memory       = isend[3];
806     info->mallocs      = isend[4];
807   } else if (flag == MAT_GLOBAL_MAX) {
808     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
809 
810     info->nz_used      = irecv[0];
811     info->nz_allocated = irecv[1];
812     info->nz_unneeded  = irecv[2];
813     info->memory       = irecv[3];
814     info->mallocs      = irecv[4];
815   } else if (flag == MAT_GLOBAL_SUM) {
816     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
817 
818     info->nz_used      = irecv[0];
819     info->nz_allocated = irecv[1];
820     info->nz_unneeded  = irecv[2];
821     info->memory       = irecv[3];
822     info->mallocs      = irecv[4];
823   }
824   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
825   info->fill_ratio_needed = 0;
826   info->factor_mallocs    = 0;
827   PetscFunctionReturn(0);
828 }
829 
830 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
831 {
832   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
833   PetscErrorCode ierr;
834 
835   PetscFunctionBegin;
836   switch (op) {
837   case MAT_NEW_NONZERO_LOCATIONS:
838   case MAT_NEW_NONZERO_LOCATION_ERR:
839   case MAT_NEW_NONZERO_ALLOCATION_ERR:
840     MatCheckPreallocated(A,1);
841     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
842     break;
843   case MAT_ROW_ORIENTED:
844     MatCheckPreallocated(A,1);
845     a->roworiented = flg;
846     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
847     break;
848   case MAT_NEW_DIAGONALS:
849   case MAT_KEEP_NONZERO_PATTERN:
850   case MAT_USE_HASH_TABLE:
851     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
852     break;
853   case MAT_IGNORE_OFF_PROC_ENTRIES:
854     a->donotstash = flg;
855     break;
856   case MAT_SYMMETRIC:
857   case MAT_STRUCTURALLY_SYMMETRIC:
858   case MAT_HERMITIAN:
859   case MAT_SYMMETRY_ETERNAL:
860   case MAT_IGNORE_LOWER_TRIANGULAR:
861   case MAT_IGNORE_ZERO_ENTRIES:
862     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
863     break;
864   default:
865     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
866   }
867   PetscFunctionReturn(0);
868 }
869 
870 
871 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
872 {
873   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
874   Mat_SeqDense      *mat = (Mat_SeqDense*)mdn->A->data;
875   const PetscScalar *l,*r;
876   PetscScalar       x,*v;
877   PetscErrorCode    ierr;
878   PetscInt          i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n;
879 
880   PetscFunctionBegin;
881   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
882   if (ll) {
883     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
884     if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
885     ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr);
886     for (i=0; i<m; i++) {
887       x = l[i];
888       v = mat->v + i;
889       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
890     }
891     ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr);
892     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
893   }
894   if (rr) {
895     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
896     if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
897     ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
898     ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
899     ierr = VecGetArrayRead(mdn->lvec,&r);CHKERRQ(ierr);
900     for (i=0; i<n; i++) {
901       x = r[i];
902       v = mat->v + i*m;
903       for (j=0; j<m; j++) (*v++) *= x;
904     }
905     ierr = VecRestoreArrayRead(mdn->lvec,&r);CHKERRQ(ierr);
906     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
907   }
908   PetscFunctionReturn(0);
909 }
910 
911 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
912 {
913   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
914   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
915   PetscErrorCode ierr;
916   PetscInt       i,j;
917   PetscReal      sum = 0.0;
918   PetscScalar    *v  = mat->v;
919 
920   PetscFunctionBegin;
921   if (mdn->size == 1) {
922     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
923   } else {
924     if (type == NORM_FROBENIUS) {
925       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
926         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
927       }
928       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
929       *nrm = PetscSqrtReal(*nrm);
930       ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr);
931     } else if (type == NORM_1) {
932       PetscReal *tmp,*tmp2;
933       ierr = PetscMalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr);
934       ierr = PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
935       ierr = PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
936       *nrm = 0.0;
937       v    = mat->v;
938       for (j=0; j<mdn->A->cmap->n; j++) {
939         for (i=0; i<mdn->A->rmap->n; i++) {
940           tmp[j] += PetscAbsScalar(*v);  v++;
941         }
942       }
943       ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
944       for (j=0; j<A->cmap->N; j++) {
945         if (tmp2[j] > *nrm) *nrm = tmp2[j];
946       }
947       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
948       ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr);
949     } else if (type == NORM_INFINITY) { /* max row norm */
950       PetscReal ntemp;
951       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
952       ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
953     } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
954   }
955   PetscFunctionReturn(0);
956 }
957 
958 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
959 {
960   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
961   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
962   Mat            B;
963   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
964   PetscErrorCode ierr;
965   PetscInt       j,i;
966   PetscScalar    *v;
967 
968   PetscFunctionBegin;
969   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
970     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
971     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
972     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
973     ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr);
974   } else {
975     B = *matout;
976   }
977 
978   m    = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v;
979   ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr);
980   for (i=0; i<m; i++) rwork[i] = rstart + i;
981   for (j=0; j<n; j++) {
982     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
983     v   += m;
984   }
985   ierr = PetscFree(rwork);CHKERRQ(ierr);
986   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
987   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
988   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
989     *matout = B;
990   } else {
991     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
992   }
993   PetscFunctionReturn(0);
994 }
995 
996 
997 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
998 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
999 
1000 PetscErrorCode MatSetUp_MPIDense(Mat A)
1001 {
1002   PetscErrorCode ierr;
1003 
1004   PetscFunctionBegin;
1005   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
1006   PetscFunctionReturn(0);
1007 }
1008 
1009 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
1010 {
1011   PetscErrorCode ierr;
1012   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
1013 
1014   PetscFunctionBegin;
1015   ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr);
1016   ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1017   PetscFunctionReturn(0);
1018 }
1019 
1020 PetscErrorCode  MatConjugate_MPIDense(Mat mat)
1021 {
1022   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
1023   PetscErrorCode ierr;
1024 
1025   PetscFunctionBegin;
1026   ierr = MatConjugate(a->A);CHKERRQ(ierr);
1027   PetscFunctionReturn(0);
1028 }
1029 
1030 PetscErrorCode MatRealPart_MPIDense(Mat A)
1031 {
1032   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1033   PetscErrorCode ierr;
1034 
1035   PetscFunctionBegin;
1036   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1037   PetscFunctionReturn(0);
1038 }
1039 
1040 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1041 {
1042   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1043   PetscErrorCode ierr;
1044 
1045   PetscFunctionBegin;
1046   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1047   PetscFunctionReturn(0);
1048 }
1049 
1050 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col)
1051 {
1052   PetscErrorCode    ierr;
1053   PetscScalar       *x;
1054   const PetscScalar *a;
1055   PetscInt          lda;
1056 
1057   PetscFunctionBegin;
1058   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1059   ierr = MatDenseGetLDA(A,&lda);CHKERRQ(ierr);
1060   ierr = MatDenseGetArrayRead(A,&a);CHKERRQ(ierr);
1061   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1062   ierr = PetscMemcpy(x,a+col*lda,A->rmap->n*sizeof(PetscScalar));CHKERRQ(ierr);
1063   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1064   ierr = MatDenseGetArrayRead(A,&a);CHKERRQ(ierr);
1065   PetscFunctionReturn(0);
1066 }
1067 
1068 extern PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*);
1069 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms)
1070 {
1071   PetscErrorCode ierr;
1072   PetscInt       i,n;
1073   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
1074   PetscReal      *work;
1075 
1076   PetscFunctionBegin;
1077   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
1078   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
1079   ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr);
1080   if (type == NORM_2) {
1081     for (i=0; i<n; i++) work[i] *= work[i];
1082   }
1083   if (type == NORM_INFINITY) {
1084     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
1085   } else {
1086     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
1087   }
1088   ierr = PetscFree(work);CHKERRQ(ierr);
1089   if (type == NORM_2) {
1090     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
1091   }
1092   PetscFunctionReturn(0);
1093 }
1094 
1095 static PetscErrorCode  MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1096 {
1097   Mat_MPIDense   *d = (Mat_MPIDense*)x->data;
1098   PetscErrorCode ierr;
1099   PetscScalar    *a;
1100   PetscInt       m,n,i;
1101 
1102   PetscFunctionBegin;
1103   ierr = MatGetSize(d->A,&m,&n);CHKERRQ(ierr);
1104   ierr = MatDenseGetArray(d->A,&a);CHKERRQ(ierr);
1105   for (i=0; i<m*n; i++) {
1106     ierr = PetscRandomGetValue(rctx,a+i);CHKERRQ(ierr);
1107   }
1108   ierr = MatDenseRestoreArray(d->A,&a);CHKERRQ(ierr);
1109   PetscFunctionReturn(0);
1110 }
1111 
1112 extern PetscErrorCode MatMatMultNumeric_MPIDense(Mat A,Mat,Mat);
1113 
1114 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool  *missing,PetscInt *d)
1115 {
1116   PetscFunctionBegin;
1117   *missing = PETSC_FALSE;
1118   PetscFunctionReturn(0);
1119 }
1120 
1121 static PetscErrorCode MatMatTransposeMult_MPIDense_MPIDense(Mat,Mat,MatReuse,PetscReal,Mat*);
1122 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat*);
1123 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);
1124 
1125 /* -------------------------------------------------------------------*/
1126 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1127                                         MatGetRow_MPIDense,
1128                                         MatRestoreRow_MPIDense,
1129                                         MatMult_MPIDense,
1130                                 /*  4*/ MatMultAdd_MPIDense,
1131                                         MatMultTranspose_MPIDense,
1132                                         MatMultTransposeAdd_MPIDense,
1133                                         0,
1134                                         0,
1135                                         0,
1136                                 /* 10*/ 0,
1137                                         0,
1138                                         0,
1139                                         0,
1140                                         MatTranspose_MPIDense,
1141                                 /* 15*/ MatGetInfo_MPIDense,
1142                                         MatEqual_MPIDense,
1143                                         MatGetDiagonal_MPIDense,
1144                                         MatDiagonalScale_MPIDense,
1145                                         MatNorm_MPIDense,
1146                                 /* 20*/ MatAssemblyBegin_MPIDense,
1147                                         MatAssemblyEnd_MPIDense,
1148                                         MatSetOption_MPIDense,
1149                                         MatZeroEntries_MPIDense,
1150                                 /* 24*/ MatZeroRows_MPIDense,
1151                                         0,
1152                                         0,
1153                                         0,
1154                                         0,
1155                                 /* 29*/ MatSetUp_MPIDense,
1156                                         0,
1157                                         0,
1158                                         MatGetDiagonalBlock_MPIDense,
1159                                         0,
1160                                 /* 34*/ MatDuplicate_MPIDense,
1161                                         0,
1162                                         0,
1163                                         0,
1164                                         0,
1165                                 /* 39*/ MatAXPY_MPIDense,
1166                                         MatCreateSubMatrices_MPIDense,
1167                                         0,
1168                                         MatGetValues_MPIDense,
1169                                         0,
1170                                 /* 44*/ 0,
1171                                         MatScale_MPIDense,
1172                                         MatShift_Basic,
1173                                         0,
1174                                         0,
1175                                 /* 49*/ MatSetRandom_MPIDense,
1176                                         0,
1177                                         0,
1178                                         0,
1179                                         0,
1180                                 /* 54*/ 0,
1181                                         0,
1182                                         0,
1183                                         0,
1184                                         0,
1185                                 /* 59*/ MatCreateSubMatrix_MPIDense,
1186                                         MatDestroy_MPIDense,
1187                                         MatView_MPIDense,
1188                                         0,
1189                                         0,
1190                                 /* 64*/ 0,
1191                                         0,
1192                                         0,
1193                                         0,
1194                                         0,
1195                                 /* 69*/ 0,
1196                                         0,
1197                                         0,
1198                                         0,
1199                                         0,
1200                                 /* 74*/ 0,
1201                                         0,
1202                                         0,
1203                                         0,
1204                                         0,
1205                                 /* 79*/ 0,
1206                                         0,
1207                                         0,
1208                                         0,
1209                                 /* 83*/ MatLoad_MPIDense,
1210                                         0,
1211                                         0,
1212                                         0,
1213                                         0,
1214                                         0,
1215 #if defined(PETSC_HAVE_ELEMENTAL)
1216                                 /* 89*/ MatMatMult_MPIDense_MPIDense,
1217                                         MatMatMultSymbolic_MPIDense_MPIDense,
1218 #else
1219                                 /* 89*/ 0,
1220                                         0,
1221 #endif
1222                                         MatMatMultNumeric_MPIDense,
1223                                         0,
1224                                         0,
1225                                 /* 94*/ 0,
1226                                         MatMatTransposeMult_MPIDense_MPIDense,
1227                                         MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1228                                         MatMatTransposeMultNumeric_MPIDense_MPIDense,
1229                                         0,
1230                                 /* 99*/ 0,
1231                                         0,
1232                                         0,
1233                                         MatConjugate_MPIDense,
1234                                         0,
1235                                 /*104*/ 0,
1236                                         MatRealPart_MPIDense,
1237                                         MatImaginaryPart_MPIDense,
1238                                         0,
1239                                         0,
1240                                 /*109*/ 0,
1241                                         0,
1242                                         0,
1243                                         MatGetColumnVector_MPIDense,
1244                                         MatMissingDiagonal_MPIDense,
1245                                 /*114*/ 0,
1246                                         0,
1247                                         0,
1248                                         0,
1249                                         0,
1250                                 /*119*/ 0,
1251                                         0,
1252                                         0,
1253                                         0,
1254                                         0,
1255                                 /*124*/ 0,
1256                                         MatGetColumnNorms_MPIDense,
1257                                         0,
1258                                         0,
1259                                         0,
1260                                 /*129*/ 0,
1261                                         MatTransposeMatMult_MPIDense_MPIDense,
1262                                         MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1263                                         MatTransposeMatMultNumeric_MPIDense_MPIDense,
1264                                         0,
1265                                 /*134*/ 0,
1266                                         0,
1267                                         0,
1268                                         0,
1269                                         0,
1270                                 /*139*/ 0,
1271                                         0,
1272                                         0,
1273                                         0,
1274                                         0,
1275                                 /*144*/ MatCreateMPIMatConcatenateSeqMat_MPIDense
1276 };
1277 
1278 PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1279 {
1280   Mat_MPIDense   *a;
1281   PetscErrorCode ierr;
1282 
1283   PetscFunctionBegin;
1284   if (mat->preallocated) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Matrix has already been preallocated");
1285   mat->preallocated = PETSC_TRUE;
1286   /* Note:  For now, when data is specified above, this assumes the user correctly
1287    allocates the local dense storage space.  We should add error checking. */
1288 
1289   a       = (Mat_MPIDense*)mat->data;
1290   ierr    = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
1291   ierr    = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
1292   a->nvec = mat->cmap->n;
1293 
1294   ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1295   ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr);
1296   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
1297   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1298   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1299   PetscFunctionReturn(0);
1300 }
1301 
1302 #if defined(PETSC_HAVE_ELEMENTAL)
1303 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1304 {
1305   Mat            mat_elemental;
1306   PetscErrorCode ierr;
1307   PetscScalar    *v;
1308   PetscInt       m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols;
1309 
1310   PetscFunctionBegin;
1311   if (reuse == MAT_REUSE_MATRIX) {
1312     mat_elemental = *newmat;
1313     ierr = MatZeroEntries(*newmat);CHKERRQ(ierr);
1314   } else {
1315     ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr);
1316     ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1317     ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr);
1318     ierr = MatSetUp(mat_elemental);CHKERRQ(ierr);
1319     ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
1320   }
1321 
1322   ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr);
1323   for (i=0; i<N; i++) cols[i] = i;
1324   for (i=0; i<m; i++) rows[i] = rstart + i;
1325 
1326   /* PETSc-Elemental interaface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1327   ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr);
1328   ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr);
1329   ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1330   ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1331   ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr);
1332   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1333 
1334   if (reuse == MAT_INPLACE_MATRIX) {
1335     ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr);
1336   } else {
1337     *newmat = mat_elemental;
1338   }
1339   PetscFunctionReturn(0);
1340 }
1341 #endif
1342 
1343 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals)
1344 {
1345   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1346   PetscErrorCode ierr;
1347 
1348   PetscFunctionBegin;
1349   ierr = MatDenseGetColumn(mat->A,col,vals);CHKERRQ(ierr);
1350   PetscFunctionReturn(0);
1351 }
1352 
1353 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals)
1354 {
1355   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1356   PetscErrorCode ierr;
1357 
1358   PetscFunctionBegin;
1359   ierr = MatDenseRestoreColumn(mat->A,vals);CHKERRQ(ierr);
1360   PetscFunctionReturn(0);
1361 }
1362 
1363 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
1364 {
1365   PetscErrorCode ierr;
1366   Mat_MPIDense   *mat;
1367   PetscInt       m,nloc,N;
1368 
1369   PetscFunctionBegin;
1370   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
1371   ierr = MatGetLocalSize(inmat,NULL,&nloc);CHKERRQ(ierr);
1372   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1373     PetscInt sum;
1374 
1375     if (n == PETSC_DECIDE) {
1376       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
1377     }
1378     /* Check sum(n) = N */
1379     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
1380     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
1381 
1382     ierr = MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat);CHKERRQ(ierr);
1383   }
1384 
1385   /* numeric phase */
1386   mat = (Mat_MPIDense*)(*outmat)->data;
1387   ierr = MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1388   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1389   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1390   PetscFunctionReturn(0);
1391 }
1392 
1393 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1394 {
1395   Mat_MPIDense   *a;
1396   PetscErrorCode ierr;
1397 
1398   PetscFunctionBegin;
1399   ierr      = PetscNewLog(mat,&a);CHKERRQ(ierr);
1400   mat->data = (void*)a;
1401   ierr      = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1402 
1403   mat->insertmode = NOT_SET_VALUES;
1404   ierr            = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);CHKERRQ(ierr);
1405   ierr            = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);CHKERRQ(ierr);
1406 
1407   /* build cache for off array entries formed */
1408   a->donotstash = PETSC_FALSE;
1409 
1410   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr);
1411 
1412   /* stuff used for matrix vector multiply */
1413   a->lvec        = 0;
1414   a->Mvctx       = 0;
1415   a->roworiented = PETSC_TRUE;
1416 
1417   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense);CHKERRQ(ierr);
1418   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr);
1419   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr);
1420   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense);CHKERRQ(ierr);
1421   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense);CHKERRQ(ierr);
1422   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);CHKERRQ(ierr);
1423   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);CHKERRQ(ierr);
1424 #if defined(PETSC_HAVE_ELEMENTAL)
1425   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr);
1426 #endif
1427   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1428   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1429   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1430   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1431 
1432   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",MatTransposeMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1433   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",MatTransposeMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1434   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",MatTransposeMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1435   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense);CHKERRQ(ierr);
1436   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense);CHKERRQ(ierr);
1437   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1438   PetscFunctionReturn(0);
1439 }
1440 
1441 /*MC
1442    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1443 
1444    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1445    and MATMPIDENSE otherwise.
1446 
1447    Options Database Keys:
1448 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1449 
1450   Level: beginner
1451 
1452 
1453 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1454 M*/
1455 
1456 /*@C
1457    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1458 
1459    Not collective
1460 
1461    Input Parameters:
1462 .  B - the matrix
1463 -  data - optional location of matrix data.  Set data=NULL for PETSc
1464    to control all matrix memory allocation.
1465 
1466    Notes:
1467    The dense format is fully compatible with standard Fortran 77
1468    storage by columns.
1469 
1470    The data input variable is intended primarily for Fortran programmers
1471    who wish to allocate their own matrix memory space.  Most users should
1472    set data=NULL.
1473 
1474    Level: intermediate
1475 
1476 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1477 @*/
1478 PetscErrorCode  MatMPIDenseSetPreallocation(Mat B,PetscScalar *data)
1479 {
1480   PetscErrorCode ierr;
1481 
1482   PetscFunctionBegin;
1483   ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr);
1484   PetscFunctionReturn(0);
1485 }
1486 
1487 /*@
1488    MatDensePlaceArray - Allows one to replace the array in a dense array with an
1489    array provided by the user. This is useful to avoid copying an array
1490    into a matrix
1491 
1492    Not Collective
1493 
1494    Input Parameters:
1495 +  mat - the matrix
1496 -  array - the array in column major order
1497 
1498    Notes:
1499    You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be
1500    freed when the matrix is destroyed.
1501 
1502    Level: developer
1503 
1504 .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
1505 
1506 @*/
1507 PetscErrorCode  MatDensePlaceArray(Mat mat,const PetscScalar array[])
1508 {
1509   PetscErrorCode ierr;
1510   PetscFunctionBegin;
1511   ierr = PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
1512   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
1513   PetscFunctionReturn(0);
1514 }
1515 
1516 /*@
1517    MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray()
1518 
1519    Not Collective
1520 
1521    Input Parameters:
1522 .  mat - the matrix
1523 
1524    Notes:
1525    You can only call this after a call to MatDensePlaceArray()
1526 
1527    Level: developer
1528 
1529 .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
1530 
1531 @*/
1532 PetscErrorCode  MatDenseResetArray(Mat mat)
1533 {
1534   PetscErrorCode ierr;
1535   PetscFunctionBegin;
1536   ierr = PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));CHKERRQ(ierr);
1537   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
1538   PetscFunctionReturn(0);
1539 }
1540 
1541 /*@C
1542    MatCreateDense - Creates a parallel matrix in dense format.
1543 
1544    Collective on MPI_Comm
1545 
1546    Input Parameters:
1547 +  comm - MPI communicator
1548 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1549 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1550 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1551 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1552 -  data - optional location of matrix data.  Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1553    to control all matrix memory allocation.
1554 
1555    Output Parameter:
1556 .  A - the matrix
1557 
1558    Notes:
1559    The dense format is fully compatible with standard Fortran 77
1560    storage by columns.
1561 
1562    The data input variable is intended primarily for Fortran programmers
1563    who wish to allocate their own matrix memory space.  Most users should
1564    set data=NULL (PETSC_NULL_SCALAR for Fortran users).
1565 
1566    The user MUST specify either the local or global matrix dimensions
1567    (possibly both).
1568 
1569    Level: intermediate
1570 
1571 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1572 @*/
1573 PetscErrorCode  MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1574 {
1575   PetscErrorCode ierr;
1576   PetscMPIInt    size;
1577 
1578   PetscFunctionBegin;
1579   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1580   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1581   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1582   if (size > 1) {
1583     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1584     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1585     if (data) {  /* user provided data array, so no need to assemble */
1586       ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr);
1587       (*A)->assembled = PETSC_TRUE;
1588     }
1589   } else {
1590     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1591     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1592   }
1593   PetscFunctionReturn(0);
1594 }
1595 
1596 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1597 {
1598   Mat            mat;
1599   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
1600   PetscErrorCode ierr;
1601 
1602   PetscFunctionBegin;
1603   *newmat = 0;
1604   ierr    = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr);
1605   ierr    = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1606   ierr    = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1607   a       = (Mat_MPIDense*)mat->data;
1608 
1609   mat->factortype   = A->factortype;
1610   mat->assembled    = PETSC_TRUE;
1611   mat->preallocated = PETSC_TRUE;
1612 
1613   a->size         = oldmat->size;
1614   a->rank         = oldmat->rank;
1615   mat->insertmode = NOT_SET_VALUES;
1616   a->nvec         = oldmat->nvec;
1617   a->donotstash   = oldmat->donotstash;
1618 
1619   ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr);
1620   ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr);
1621 
1622   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1623   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1624   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1625 
1626   *newmat = mat;
1627   PetscFunctionReturn(0);
1628 }
1629 
1630 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat)
1631 {
1632   PetscErrorCode ierr;
1633   PetscMPIInt    rank,size;
1634   const PetscInt *rowners;
1635   PetscInt       i,m,n,nz,j,mMax;
1636   PetscScalar    *array,*vals,*vals_ptr;
1637   Mat_MPIDense   *a = (Mat_MPIDense*)newmat->data;
1638 
1639   PetscFunctionBegin;
1640   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1641   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1642 
1643   /* determine ownership of rows and columns */
1644   m = (newmat->rmap->n < 0) ? PETSC_DECIDE : newmat->rmap->n;
1645   n = (newmat->cmap->n < 0) ? PETSC_DECIDE : newmat->cmap->n;
1646 
1647   ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr);
1648   if (!a->A) {
1649     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
1650   }
1651   ierr = MatDenseGetArray(newmat,&array);CHKERRQ(ierr);
1652   ierr = MatGetLocalSize(newmat,&m,NULL);CHKERRQ(ierr);
1653   ierr = MatGetOwnershipRanges(newmat,&rowners);CHKERRQ(ierr);
1654   ierr = MPI_Reduce(&m,&mMax,1,MPIU_INT,MPI_MAX,0,comm);CHKERRQ(ierr);
1655   if (!rank) {
1656     ierr = PetscMalloc1(mMax*N,&vals);CHKERRQ(ierr);
1657 
1658     /* read in my part of the matrix numerical values  */
1659     ierr = PetscBinaryRead(fd,vals,m*N,NULL,PETSC_SCALAR);CHKERRQ(ierr);
1660 
1661     /* insert into matrix-by row (this is why cannot directly read into array */
1662     vals_ptr = vals;
1663     for (i=0; i<m; i++) {
1664       for (j=0; j<N; j++) {
1665         array[i + j*m] = *vals_ptr++;
1666       }
1667     }
1668 
1669     /* read in other processors and ship out */
1670     for (i=1; i<size; i++) {
1671       nz   = (rowners[i+1] - rowners[i])*N;
1672       ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr);
1673       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr);
1674     }
1675   } else {
1676     /* receive numeric values */
1677     ierr = PetscMalloc1(m*N,&vals);CHKERRQ(ierr);
1678 
1679     /* receive message of values*/
1680     ierr = MPIULong_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr);
1681 
1682     /* insert into matrix-by row (this is why cannot directly read into array */
1683     vals_ptr = vals;
1684     for (i=0; i<m; i++) {
1685       for (j=0; j<N; j++) {
1686         array[i + j*m] = *vals_ptr++;
1687       }
1688     }
1689   }
1690   ierr = MatDenseRestoreArray(newmat,&array);CHKERRQ(ierr);
1691   ierr = PetscFree(vals);CHKERRQ(ierr);
1692   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1693   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1694   PetscFunctionReturn(0);
1695 }
1696 
1697 PetscErrorCode MatLoad_MPIDense(Mat newmat,PetscViewer viewer)
1698 {
1699   Mat_MPIDense   *a;
1700   PetscScalar    *vals,*svals;
1701   MPI_Comm       comm;
1702   MPI_Status     status;
1703   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,n,maxnz;
1704   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1705   PetscInt       *ourlens,*procsnz = 0,jj,*mycols,*smycols;
1706   PetscInt       i,nz,j,rstart,rend;
1707   int            fd;
1708   PetscBool      isbinary;
1709   PetscErrorCode ierr;
1710 
1711   PetscFunctionBegin;
1712   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1713   if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)newmat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newmat)->type_name);
1714 
1715   /* force binary viewer to load .info file if it has not yet done so */
1716   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1717   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
1718   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1719   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1720   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1721   if (!rank) {
1722     ierr = PetscBinaryRead(fd,(char*)header,4,NULL,PETSC_INT);CHKERRQ(ierr);
1723     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1724   }
1725   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1726   M    = header[1]; N = header[2]; nz = header[3];
1727 
1728   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
1729   if (newmat->rmap->N < 0) newmat->rmap->N = M;
1730   if (newmat->cmap->N < 0) newmat->cmap->N = N;
1731 
1732   if (newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",M,newmat->rmap->N);
1733   if (newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",N,newmat->cmap->N);
1734 
1735   /*
1736        Handle case where matrix is stored on disk as a dense matrix
1737   */
1738   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1739     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr);
1740     PetscFunctionReturn(0);
1741   }
1742 
1743   /* determine ownership of all rows */
1744   if (newmat->rmap->n < 0) {
1745     ierr = PetscMPIIntCast(M/size + ((M % size) > rank),&m);CHKERRQ(ierr);
1746   } else {
1747     ierr = PetscMPIIntCast(newmat->rmap->n,&m);CHKERRQ(ierr);
1748   }
1749   if (newmat->cmap->n < 0) {
1750     n = PETSC_DECIDE;
1751   } else {
1752     ierr = PetscMPIIntCast(newmat->cmap->n,&n);CHKERRQ(ierr);
1753   }
1754 
1755   ierr       = PetscMalloc1(size+2,&rowners);CHKERRQ(ierr);
1756   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1757   rowners[0] = 0;
1758   for (i=2; i<=size; i++) {
1759     rowners[i] += rowners[i-1];
1760   }
1761   rstart = rowners[rank];
1762   rend   = rowners[rank+1];
1763 
1764   /* distribute row lengths to all processors */
1765   ierr = PetscMalloc1(rend-rstart,&ourlens);CHKERRQ(ierr);
1766   if (!rank) {
1767     ierr = PetscMalloc1(M,&rowlengths);CHKERRQ(ierr);
1768     ierr = PetscBinaryRead(fd,rowlengths,M,NULL,PETSC_INT);CHKERRQ(ierr);
1769     ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr);
1770     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1771     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1772     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1773   } else {
1774     ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1775   }
1776 
1777   if (!rank) {
1778     /* calculate the number of nonzeros on each processor */
1779     ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr);
1780     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1781     for (i=0; i<size; i++) {
1782       for (j=rowners[i]; j< rowners[i+1]; j++) {
1783         procsnz[i] += rowlengths[j];
1784       }
1785     }
1786     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1787 
1788     /* determine max buffer needed and allocate it */
1789     maxnz = 0;
1790     for (i=0; i<size; i++) {
1791       maxnz = PetscMax(maxnz,procsnz[i]);
1792     }
1793     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
1794 
1795     /* read in my part of the matrix column indices  */
1796     nz   = procsnz[0];
1797     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
1798     ierr = PetscBinaryRead(fd,mycols,nz,NULL,PETSC_INT);CHKERRQ(ierr);
1799 
1800     /* read in every one elses and ship off */
1801     for (i=1; i<size; i++) {
1802       nz   = procsnz[i];
1803       ierr = PetscBinaryRead(fd,cols,nz,NULL,PETSC_INT);CHKERRQ(ierr);
1804       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1805     }
1806     ierr = PetscFree(cols);CHKERRQ(ierr);
1807   } else {
1808     /* determine buffer space needed for message */
1809     nz = 0;
1810     for (i=0; i<m; i++) {
1811       nz += ourlens[i];
1812     }
1813     ierr = PetscMalloc1(nz+1,&mycols);CHKERRQ(ierr);
1814 
1815     /* receive message of column indices*/
1816     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1817     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1818     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1819   }
1820 
1821   ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr);
1822   a = (Mat_MPIDense*)newmat->data;
1823   if (!a->A) {
1824     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
1825   }
1826 
1827   if (!rank) {
1828     ierr = PetscMalloc1(maxnz,&vals);CHKERRQ(ierr);
1829 
1830     /* read in my part of the matrix numerical values  */
1831     nz   = procsnz[0];
1832     ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr);
1833 
1834     /* insert into matrix */
1835     jj      = rstart;
1836     smycols = mycols;
1837     svals   = vals;
1838     for (i=0; i<m; i++) {
1839       ierr     = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1840       smycols += ourlens[i];
1841       svals   += ourlens[i];
1842       jj++;
1843     }
1844 
1845     /* read in other processors and ship out */
1846     for (i=1; i<size; i++) {
1847       nz   = procsnz[i];
1848       ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr);
1849       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
1850     }
1851     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1852   } else {
1853     /* receive numeric values */
1854     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
1855 
1856     /* receive message of values*/
1857     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
1858     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1859     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1860 
1861     /* insert into matrix */
1862     jj      = rstart;
1863     smycols = mycols;
1864     svals   = vals;
1865     for (i=0; i<m; i++) {
1866       ierr     = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1867       smycols += ourlens[i];
1868       svals   += ourlens[i];
1869       jj++;
1870     }
1871   }
1872   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1873   ierr = PetscFree(vals);CHKERRQ(ierr);
1874   ierr = PetscFree(mycols);CHKERRQ(ierr);
1875   ierr = PetscFree(rowners);CHKERRQ(ierr);
1876 
1877   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1878   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1879   PetscFunctionReturn(0);
1880 }
1881 
1882 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool  *flag)
1883 {
1884   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
1885   Mat            a,b;
1886   PetscBool      flg;
1887   PetscErrorCode ierr;
1888 
1889   PetscFunctionBegin;
1890   a    = matA->A;
1891   b    = matB->A;
1892   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
1893   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1894   PetscFunctionReturn(0);
1895 }
1896 
1897 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(Mat A)
1898 {
1899   PetscErrorCode        ierr;
1900   Mat_MPIDense          *a = (Mat_MPIDense*)A->data;
1901   Mat_TransMatMultDense *atb = a->atbdense;
1902 
1903   PetscFunctionBegin;
1904   ierr = PetscFree3(atb->sendbuf,atb->atbarray,atb->recvcounts);CHKERRQ(ierr);
1905   ierr = (atb->destroy)(A);CHKERRQ(ierr);
1906   ierr = PetscFree(atb);CHKERRQ(ierr);
1907   PetscFunctionReturn(0);
1908 }
1909 
1910 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(Mat A)
1911 {
1912   PetscErrorCode        ierr;
1913   Mat_MPIDense          *a = (Mat_MPIDense*)A->data;
1914   Mat_MatTransMultDense *abt = a->abtdense;
1915 
1916   PetscFunctionBegin;
1917   ierr = PetscFree2(abt->buf[0],abt->buf[1]);CHKERRQ(ierr);
1918   ierr = PetscFree2(abt->recvcounts,abt->recvdispls);CHKERRQ(ierr);
1919   ierr = (abt->destroy)(A);CHKERRQ(ierr);
1920   ierr = PetscFree(abt);CHKERRQ(ierr);
1921   PetscFunctionReturn(0);
1922 }
1923 
1924 PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1925 {
1926   Mat_MPIDense   *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
1927   Mat_SeqDense   *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data;
1928   Mat_TransMatMultDense *atb = c->atbdense;
1929   PetscErrorCode ierr;
1930   MPI_Comm       comm;
1931   PetscMPIInt    rank,size,*recvcounts=atb->recvcounts;
1932   PetscScalar    *carray,*atbarray=atb->atbarray,*sendbuf=atb->sendbuf;
1933   PetscInt       i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j;
1934   PetscScalar    _DOne=1.0,_DZero=0.0;
1935   PetscBLASInt   am,an,bn,aN;
1936   const PetscInt *ranges;
1937 
1938   PetscFunctionBegin;
1939   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1940   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1941   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1942 
1943   /* compute atbarray = aseq^T * bseq */
1944   ierr = PetscBLASIntCast(a->A->cmap->n,&an);CHKERRQ(ierr);
1945   ierr = PetscBLASIntCast(b->A->cmap->n,&bn);CHKERRQ(ierr);
1946   ierr = PetscBLASIntCast(a->A->rmap->n,&am);CHKERRQ(ierr);
1947   ierr = PetscBLASIntCast(A->cmap->N,&aN);CHKERRQ(ierr);
1948   PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&an,&bn,&am,&_DOne,aseq->v,&aseq->lda,bseq->v,&bseq->lda,&_DZero,atbarray,&aN));
1949 
1950   ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr);
1951   for (i=0; i<size; i++) recvcounts[i] = (ranges[i+1] - ranges[i])*cN;
1952 
1953   /* arrange atbarray into sendbuf */
1954   k = 0;
1955   for (proc=0; proc<size; proc++) {
1956     for (j=0; j<cN; j++) {
1957       for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM];
1958     }
1959   }
1960   /* sum all atbarray to local values of C */
1961   ierr = MatDenseGetArray(c->A,&carray);CHKERRQ(ierr);
1962   ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRQ(ierr);
1963   ierr = MatDenseRestoreArray(c->A,&carray);CHKERRQ(ierr);
1964   PetscFunctionReturn(0);
1965 }
1966 
1967 PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1968 {
1969   PetscErrorCode        ierr;
1970   Mat                   Cdense;
1971   MPI_Comm              comm;
1972   PetscMPIInt           size;
1973   PetscInt              cm=A->cmap->n,cM,cN=B->cmap->N;
1974   Mat_MPIDense          *c;
1975   Mat_TransMatMultDense *atb;
1976 
1977   PetscFunctionBegin;
1978   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1979   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
1980     SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
1981   }
1982 
1983   /* create matrix product Cdense */
1984   ierr = MatCreate(comm,&Cdense);CHKERRQ(ierr);
1985   ierr = MatSetSizes(Cdense,cm,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
1986   ierr = MatSetType(Cdense,MATMPIDENSE);CHKERRQ(ierr);
1987   ierr = MatMPIDenseSetPreallocation(Cdense,NULL);CHKERRQ(ierr);
1988   ierr = MatAssemblyBegin(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1989   ierr = MatAssemblyEnd(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1990   *C   = Cdense;
1991 
1992   /* create data structure for reuse Cdense */
1993   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1994   ierr = PetscNew(&atb);CHKERRQ(ierr);
1995   cM = Cdense->rmap->N;
1996   ierr = PetscMalloc3(cM*cN,&atb->sendbuf,cM*cN,&atb->atbarray,size,&atb->recvcounts);CHKERRQ(ierr);
1997 
1998   c                    = (Mat_MPIDense*)Cdense->data;
1999   c->atbdense          = atb;
2000   atb->destroy         = Cdense->ops->destroy;
2001   Cdense->ops->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
2002   PetscFunctionReturn(0);
2003 }
2004 
2005 PetscErrorCode MatTransposeMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
2006 {
2007   PetscErrorCode ierr;
2008 
2009   PetscFunctionBegin;
2010   if (scall == MAT_INITIAL_MATRIX) {
2011     ierr = MatTransposeMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr);
2012   }
2013   ierr = MatTransposeMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr);
2014   PetscFunctionReturn(0);
2015 }
2016 
2017 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat *C)
2018 {
2019   PetscErrorCode        ierr;
2020   Mat                   Cdense;
2021   MPI_Comm              comm;
2022   PetscMPIInt           i, size;
2023   PetscInt              maxRows, bufsiz;
2024   Mat_MPIDense          *c;
2025   PetscMPIInt           tag;
2026   const char            *algTypes[2] = {"allgatherv","cyclic"};
2027   PetscInt              alg, nalg = 2;
2028   Mat_MatTransMultDense *abt;
2029 
2030   PetscFunctionBegin;
2031   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2032   alg = 0; /* default is allgatherv */
2033   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MatMatTransposeMult","Mat");CHKERRQ(ierr);
2034   ierr = PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[0],&alg,NULL);CHKERRQ(ierr);
2035   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2036   if (A->cmap->N != B->cmap->N) {
2037     SETERRQ2(comm,PETSC_ERR_ARG_SIZ,"Matrix global column dimensions are incompatible, A (%D) != B (%D)",A->cmap->N,B->cmap->N);
2038   }
2039 
2040   /* create matrix product Cdense */
2041   ierr = MatCreate(comm,&Cdense);CHKERRQ(ierr);
2042   ierr = MatSetSizes(Cdense,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N);CHKERRQ(ierr);
2043   ierr = MatSetType(Cdense,MATMPIDENSE);CHKERRQ(ierr);
2044   ierr = MatMPIDenseSetPreallocation(Cdense,NULL);CHKERRQ(ierr);
2045   ierr = PetscObjectGetNewTag((PetscObject)Cdense, &tag);CHKERRQ(ierr);
2046   ierr = MatAssemblyBegin(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2047   ierr = MatAssemblyEnd(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2048   *C   = Cdense;
2049 
2050   /* create data structure for reuse Cdense */
2051   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2052   ierr = PetscNew(&abt);CHKERRQ(ierr);
2053   abt->tag = tag;
2054   abt->alg = alg;
2055   switch (alg) {
2056   case 1:
2057     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i]));
2058     bufsiz = A->cmap->N * maxRows;
2059     ierr = PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1]));CHKERRQ(ierr);
2060     break;
2061   default:
2062     ierr = PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]));CHKERRQ(ierr);
2063     ierr = PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls));CHKERRQ(ierr);
2064     for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N;
2065     for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i];
2066     break;
2067   }
2068 
2069   c                    = (Mat_MPIDense*)Cdense->data;
2070   c->abtdense          = abt;
2071   abt->destroy         = Cdense->ops->destroy;
2072   Cdense->ops->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
2073   PetscFunctionReturn(0);
2074 }
2075 
2076 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
2077 {
2078   Mat_MPIDense   *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2079   Mat_SeqDense   *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data;
2080   Mat_SeqDense   *cseq=(Mat_SeqDense*)(c->A)->data;
2081   Mat_MatTransMultDense *abt = c->abtdense;
2082   PetscErrorCode ierr;
2083   MPI_Comm       comm;
2084   PetscMPIInt    rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom;
2085   PetscScalar    *sendbuf, *recvbuf=0, *carray;
2086   PetscInt       i,cK=A->cmap->N,k,j,bn;
2087   PetscScalar    _DOne=1.0,_DZero=0.0;
2088   PetscBLASInt   cm, cn, ck;
2089   MPI_Request    reqs[2];
2090   const PetscInt *ranges;
2091 
2092   PetscFunctionBegin;
2093   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2094   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2095   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2096 
2097   ierr = MatGetOwnershipRanges(B,&ranges);CHKERRQ(ierr);
2098   bn = B->rmap->n;
2099   if (bseq->lda == bn) {
2100     sendbuf = bseq->v;
2101   } else {
2102     sendbuf = abt->buf[0];
2103     for (k = 0, i = 0; i < cK; i++) {
2104       for (j = 0; j < bn; j++, k++) {
2105         sendbuf[k] = bseq->v[i * bseq->lda + j];
2106       }
2107     }
2108   }
2109   if (size > 1) {
2110     sendto = (rank + size - 1) % size;
2111     recvfrom = (rank + size + 1) % size;
2112   } else {
2113     sendto = recvfrom = 0;
2114   }
2115   ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr);
2116   ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr);
2117   recvisfrom = rank;
2118   for (i = 0; i < size; i++) {
2119     /* we have finished receiving in sending, bufs can be read/modified */
2120     PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2121     PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
2122 
2123     if (nextrecvisfrom != rank) {
2124       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2125       sendsiz = cK * bn;
2126       recvsiz = cK * nextbn;
2127       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
2128       ierr = MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]);CHKERRQ(ierr);
2129       ierr = MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]);CHKERRQ(ierr);
2130     }
2131 
2132     /* local aseq * sendbuf^T */
2133     ierr = PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn);CHKERRQ(ierr);
2134     carray = &cseq->v[cseq->lda * ranges[recvisfrom]];
2135     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,aseq->v,&aseq->lda,sendbuf,&cn,&_DZero,carray,&cseq->lda));
2136 
2137     if (nextrecvisfrom != rank) {
2138       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
2139       ierr = MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2140     }
2141     bn = nextbn;
2142     recvisfrom = nextrecvisfrom;
2143     sendbuf = recvbuf;
2144   }
2145   PetscFunctionReturn(0);
2146 }
2147 
2148 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
2149 {
2150   Mat_MPIDense   *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2151   Mat_SeqDense   *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data;
2152   Mat_SeqDense   *cseq=(Mat_SeqDense*)(c->A)->data;
2153   Mat_MatTransMultDense *abt = c->abtdense;
2154   PetscErrorCode ierr;
2155   MPI_Comm       comm;
2156   PetscMPIInt    rank,size;
2157   PetscScalar    *sendbuf, *recvbuf;
2158   PetscInt       i,cK=A->cmap->N,k,j,bn;
2159   PetscScalar    _DOne=1.0,_DZero=0.0;
2160   PetscBLASInt   cm, cn, ck;
2161 
2162   PetscFunctionBegin;
2163   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2164   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2165   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2166 
2167   /* copy transpose of B into buf[0] */
2168   bn      = B->rmap->n;
2169   sendbuf = abt->buf[0];
2170   recvbuf = abt->buf[1];
2171   for (k = 0, j = 0; j < bn; j++) {
2172     for (i = 0; i < cK; i++, k++) {
2173       sendbuf[k] = bseq->v[i * bseq->lda + j];
2174     }
2175   }
2176   ierr = MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm);CHKERRQ(ierr);
2177   ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr);
2178   ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr);
2179   ierr = PetscBLASIntCast(c->A->cmap->n,&cn);CHKERRQ(ierr);
2180   PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,aseq->v,&aseq->lda,recvbuf,&ck,&_DZero,cseq->v,&cseq->lda));
2181   PetscFunctionReturn(0);
2182 }
2183 
2184 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2185 {
2186   Mat_MPIDense   *c=(Mat_MPIDense*)C->data;
2187   Mat_MatTransMultDense *abt = c->abtdense;
2188   PetscErrorCode ierr;
2189 
2190   PetscFunctionBegin;
2191   switch (abt->alg) {
2192   case 1:
2193     ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C);CHKERRQ(ierr);
2194     break;
2195   default:
2196     ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C);CHKERRQ(ierr);
2197     break;
2198   }
2199   PetscFunctionReturn(0);
2200 }
2201 
2202 static PetscErrorCode MatMatTransposeMult_MPIDense_MPIDense(Mat A,Mat B, MatReuse scall, PetscReal fill, Mat *C)
2203 {
2204   PetscErrorCode ierr;
2205 
2206   PetscFunctionBegin;
2207   if (scall == MAT_INITIAL_MATRIX) {
2208     ierr = MatMatTransposeMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr);
2209   }
2210   ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr);
2211   PetscFunctionReturn(0);
2212 }
2213 
2214 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(Mat A)
2215 {
2216   PetscErrorCode   ierr;
2217   Mat_MPIDense     *a = (Mat_MPIDense*)A->data;
2218   Mat_MatMultDense *ab = a->abdense;
2219 
2220   PetscFunctionBegin;
2221   ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr);
2222   ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr);
2223   ierr = MatDestroy(&ab->Be);CHKERRQ(ierr);
2224 
2225   ierr = (ab->destroy)(A);CHKERRQ(ierr);
2226   ierr = PetscFree(ab);CHKERRQ(ierr);
2227   PetscFunctionReturn(0);
2228 }
2229 
2230 #if defined(PETSC_HAVE_ELEMENTAL)
2231 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2232 {
2233   PetscErrorCode   ierr;
2234   Mat_MPIDense     *c=(Mat_MPIDense*)C->data;
2235   Mat_MatMultDense *ab=c->abdense;
2236 
2237   PetscFunctionBegin;
2238   ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr);
2239   ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr);
2240   ierr = MatMatMultNumeric(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr);
2241   ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
2242   PetscFunctionReturn(0);
2243 }
2244 
2245 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
2246 {
2247   PetscErrorCode   ierr;
2248   Mat              Ae,Be,Ce;
2249   Mat_MPIDense     *c;
2250   Mat_MatMultDense *ab;
2251 
2252   PetscFunctionBegin;
2253   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
2254     SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2255   }
2256 
2257   /* convert A and B to Elemental matrices Ae and Be */
2258   ierr = MatConvert(A,MATELEMENTAL,MAT_INITIAL_MATRIX, &Ae);CHKERRQ(ierr);
2259   ierr = MatConvert(B,MATELEMENTAL,MAT_INITIAL_MATRIX, &Be);CHKERRQ(ierr);
2260 
2261   /* Ce = Ae*Be */
2262   ierr = MatMatMultSymbolic(Ae,Be,fill,&Ce);CHKERRQ(ierr);
2263   ierr = MatMatMultNumeric(Ae,Be,Ce);CHKERRQ(ierr);
2264 
2265   /* convert Ce to C */
2266   ierr = MatConvert(Ce,MATMPIDENSE,MAT_INITIAL_MATRIX,C);CHKERRQ(ierr);
2267 
2268   /* create data structure for reuse Cdense */
2269   ierr = PetscNew(&ab);CHKERRQ(ierr);
2270   c                  = (Mat_MPIDense*)(*C)->data;
2271   c->abdense         = ab;
2272 
2273   ab->Ae             = Ae;
2274   ab->Be             = Be;
2275   ab->Ce             = Ce;
2276   ab->destroy        = (*C)->ops->destroy;
2277   (*C)->ops->destroy        = MatDestroy_MatMatMult_MPIDense_MPIDense;
2278   (*C)->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
2279   PetscFunctionReturn(0);
2280 }
2281 
2282 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
2283 {
2284   PetscErrorCode ierr;
2285 
2286   PetscFunctionBegin;
2287   if (scall == MAT_INITIAL_MATRIX) { /* simbolic product includes numeric product */
2288     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
2289     ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr);
2290     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
2291   } else {
2292     ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
2293     ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr);
2294     ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
2295   }
2296   PetscFunctionReturn(0);
2297 }
2298 #endif
2299 
2300