xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 37acaa0252901aa880c48d810c76a84f6eb32550)
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4 */
5 
6 
7 #include <../src/mat/impls/dense/mpi/mpidense.h>    /*I   "petscmat.h"  I*/
8 #include <../src/mat/impls/aij/mpi/mpiaij.h>
9 #include <petscblaslapack.h>
10 
11 /*@
12 
13       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
14               matrix that represents the operator. For sequential matrices it returns itself.
15 
16     Input Parameter:
17 .      A - the Seq or MPI dense matrix
18 
19     Output Parameter:
20 .      B - the inner matrix
21 
22     Level: intermediate
23 
24 @*/
25 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
26 {
27   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
28   PetscErrorCode ierr;
29   PetscBool      flg;
30 
31   PetscFunctionBegin;
32   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
33   if (flg) *B = mat->A;
34   else *B = A;
35   PetscFunctionReturn(0);
36 }
37 
38 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
39 {
40   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
41   PetscErrorCode ierr;
42   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
43 
44   PetscFunctionBegin;
45   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
46   lrow = row - rstart;
47   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
48   PetscFunctionReturn(0);
49 }
50 
51 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
52 {
53   PetscErrorCode ierr;
54 
55   PetscFunctionBegin;
56   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
57   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
58   PetscFunctionReturn(0);
59 }
60 
61 PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
62 {
63   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
64   PetscErrorCode ierr;
65   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
66   PetscScalar    *array;
67   MPI_Comm       comm;
68   Mat            B;
69 
70   PetscFunctionBegin;
71   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
72 
73   ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr);
74   if (!B) {
75     ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
76     ierr = MatCreate(comm,&B);CHKERRQ(ierr);
77     ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr);
78     ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
79     ierr = MatDenseGetArray(mdn->A,&array);CHKERRQ(ierr);
80     ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr);
81     ierr = MatDenseRestoreArray(mdn->A,&array);CHKERRQ(ierr);
82     ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
83     ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
84     ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr);
85     *a   = B;
86     ierr = MatDestroy(&B);CHKERRQ(ierr);
87   } else *a = B;
88   PetscFunctionReturn(0);
89 }
90 
91 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
92 {
93   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
94   PetscErrorCode ierr;
95   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
96   PetscBool      roworiented = A->roworiented;
97 
98   PetscFunctionBegin;
99   for (i=0; i<m; i++) {
100     if (idxm[i] < 0) continue;
101     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
102     if (idxm[i] >= rstart && idxm[i] < rend) {
103       row = idxm[i] - rstart;
104       if (roworiented) {
105         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
106       } else {
107         for (j=0; j<n; j++) {
108           if (idxn[j] < 0) continue;
109           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
110           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
111         }
112       }
113     } else if (!A->donotstash) {
114       mat->assembled = PETSC_FALSE;
115       if (roworiented) {
116         ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
117       } else {
118         ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
119       }
120     }
121   }
122   PetscFunctionReturn(0);
123 }
124 
125 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
126 {
127   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
128   PetscErrorCode ierr;
129   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
130 
131   PetscFunctionBegin;
132   for (i=0; i<m; i++) {
133     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
134     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
135     if (idxm[i] >= rstart && idxm[i] < rend) {
136       row = idxm[i] - rstart;
137       for (j=0; j<n; j++) {
138         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
139         if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
140         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
141       }
142     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
143   }
144   PetscFunctionReturn(0);
145 }
146 
147 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda)
148 {
149   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
150   PetscErrorCode ierr;
151 
152   PetscFunctionBegin;
153   ierr = MatDenseGetLDA(a->A,lda);CHKERRQ(ierr);
154   PetscFunctionReturn(0);
155 }
156 
157 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[])
158 {
159   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
160   PetscErrorCode ierr;
161 
162   PetscFunctionBegin;
163   ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr);
164   PetscFunctionReturn(0);
165 }
166 
167 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar *array[])
168 {
169   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
170   PetscErrorCode ierr;
171 
172   PetscFunctionBegin;
173   ierr = MatDenseGetArrayRead(a->A,array);CHKERRQ(ierr);
174   PetscFunctionReturn(0);
175 }
176 
177 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar array[])
178 {
179   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
180   PetscErrorCode ierr;
181 
182   PetscFunctionBegin;
183   ierr = MatDensePlaceArray(a->A,array);CHKERRQ(ierr);
184   PetscFunctionReturn(0);
185 }
186 
187 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
188 {
189   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
190   PetscErrorCode ierr;
191 
192   PetscFunctionBegin;
193   ierr = MatDenseResetArray(a->A);CHKERRQ(ierr);
194   PetscFunctionReturn(0);
195 }
196 
197 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
198 {
199   Mat_MPIDense   *mat  = (Mat_MPIDense*)A->data,*newmatd;
200   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
201   PetscErrorCode ierr;
202   PetscInt       i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
203   const PetscInt *irow,*icol;
204   PetscScalar    *av,*bv,*v = lmat->v;
205   Mat            newmat;
206   IS             iscol_local;
207   MPI_Comm       comm_is,comm_mat;
208 
209   PetscFunctionBegin;
210   ierr = PetscObjectGetComm((PetscObject)A,&comm_mat);CHKERRQ(ierr);
211   ierr = PetscObjectGetComm((PetscObject)iscol,&comm_is);CHKERRQ(ierr);
212   if (comm_mat != comm_is) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator");
213 
214   ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
215   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
216   ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr);
217   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
218   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
219   ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */
220 
221   /* No parallel redistribution currently supported! Should really check each index set
222      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
223      original matrix! */
224 
225   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
226   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
227 
228   /* Check submatrix call */
229   if (scall == MAT_REUSE_MATRIX) {
230     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
231     /* Really need to test rows and column sizes! */
232     newmat = *B;
233   } else {
234     /* Create and fill new matrix */
235     ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr);
236     ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
237     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
238     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
239   }
240 
241   /* Now extract the data pointers and do the copy, column at a time */
242   newmatd = (Mat_MPIDense*)newmat->data;
243   bv      = ((Mat_SeqDense*)newmatd->A->data)->v;
244 
245   for (i=0; i<Ncols; i++) {
246     av = v + ((Mat_SeqDense*)mat->A->data)->lda*icol[i];
247     for (j=0; j<nrows; j++) {
248       *bv++ = av[irow[j] - rstart];
249     }
250   }
251 
252   /* Assemble the matrices so that the correct flags are set */
253   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
254   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
255 
256   /* Free work space */
257   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
258   ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr);
259   ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
260   *B   = newmat;
261   PetscFunctionReturn(0);
262 }
263 
264 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[])
265 {
266   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
267   PetscErrorCode ierr;
268 
269   PetscFunctionBegin;
270   ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr);
271   PetscFunctionReturn(0);
272 }
273 
274 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar *array[])
275 {
276   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
277   PetscErrorCode ierr;
278 
279   PetscFunctionBegin;
280   ierr = MatDenseRestoreArrayRead(a->A,array);CHKERRQ(ierr);
281   PetscFunctionReturn(0);
282 }
283 
284 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
285 {
286   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
287   MPI_Comm       comm;
288   PetscErrorCode ierr;
289   PetscInt       nstash,reallocs;
290   InsertMode     addv;
291 
292   PetscFunctionBegin;
293   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
294   /* make sure all processors are either in INSERTMODE or ADDMODE */
295   ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,comm);CHKERRQ(ierr);
296   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
297   mat->insertmode = addv; /* in case this processor had no cache */
298 
299   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
300   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
301   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
302   PetscFunctionReturn(0);
303 }
304 
305 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
306 {
307   Mat_MPIDense   *mdn=(Mat_MPIDense*)mat->data;
308   PetscErrorCode ierr;
309   PetscInt       i,*row,*col,flg,j,rstart,ncols;
310   PetscMPIInt    n;
311   PetscScalar    *val;
312 
313   PetscFunctionBegin;
314   /*  wait on receives */
315   while (1) {
316     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
317     if (!flg) break;
318 
319     for (i=0; i<n;) {
320       /* Now identify the consecutive vals belonging to the same row */
321       for (j=i,rstart=row[j]; j<n; j++) {
322         if (row[j] != rstart) break;
323       }
324       if (j < n) ncols = j-i;
325       else       ncols = n-i;
326       /* Now assemble all these values with a single function call */
327       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
328       i    = j;
329     }
330   }
331   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
332 
333   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
334   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
335 
336   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
337     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
338   }
339   PetscFunctionReturn(0);
340 }
341 
342 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
343 {
344   PetscErrorCode ierr;
345   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
346 
347   PetscFunctionBegin;
348   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
349   PetscFunctionReturn(0);
350 }
351 
352 /* the code does not do the diagonal entries correctly unless the
353    matrix is square and the column and row owerships are identical.
354    This is a BUG. The only way to fix it seems to be to access
355    mdn->A and mdn->B directly and not through the MatZeroRows()
356    routine.
357 */
358 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
359 {
360   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
361   PetscErrorCode    ierr;
362   PetscInt          i,*owners = A->rmap->range;
363   PetscInt          *sizes,j,idx,nsends;
364   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
365   PetscInt          *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source;
366   PetscInt          *lens,*lrows,*values;
367   PetscMPIInt       n,imdex,rank = l->rank,size = l->size;
368   MPI_Comm          comm;
369   MPI_Request       *send_waits,*recv_waits;
370   MPI_Status        recv_status,*send_status;
371   PetscBool         found;
372   const PetscScalar *xx;
373   PetscScalar       *bb;
374 
375   PetscFunctionBegin;
376   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
377   if (A->rmap->N != A->cmap->N) SETERRQ(comm,PETSC_ERR_SUP,"Only handles square matrices");
378   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership");
379   /*  first count number of contributors to each processor */
380   ierr = PetscCalloc1(2*size,&sizes);CHKERRQ(ierr);
381   ierr = PetscMalloc1(N+1,&owner);CHKERRQ(ierr);  /* see note*/
382   for (i=0; i<N; i++) {
383     idx   = rows[i];
384     found = PETSC_FALSE;
385     for (j=0; j<size; j++) {
386       if (idx >= owners[j] && idx < owners[j+1]) {
387         sizes[2*j]++; sizes[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
388       }
389     }
390     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
391   }
392   nsends = 0;
393   for (i=0; i<size; i++) nsends += sizes[2*i+1];
394 
395   /* inform other processors of number of messages and max length*/
396   ierr = PetscMaxSum(comm,sizes,&nmax,&nrecvs);CHKERRQ(ierr);
397 
398   /* post receives:   */
399   ierr = PetscMalloc1((nrecvs+1)*(nmax+1),&rvalues);CHKERRQ(ierr);
400   ierr = PetscMalloc1(nrecvs+1,&recv_waits);CHKERRQ(ierr);
401   for (i=0; i<nrecvs; i++) {
402     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
403   }
404 
405   /* do sends:
406       1) starts[i] gives the starting index in svalues for stuff going to
407          the ith processor
408   */
409   ierr = PetscMalloc1(N+1,&svalues);CHKERRQ(ierr);
410   ierr = PetscMalloc1(nsends+1,&send_waits);CHKERRQ(ierr);
411   ierr = PetscMalloc1(size+1,&starts);CHKERRQ(ierr);
412 
413   starts[0] = 0;
414   for (i=1; i<size; i++) starts[i] = starts[i-1] + sizes[2*i-2];
415   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
416 
417   starts[0] = 0;
418   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + sizes[2*i-2];
419   count = 0;
420   for (i=0; i<size; i++) {
421     if (sizes[2*i+1]) {
422       ierr = MPI_Isend(svalues+starts[i],sizes[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
423     }
424   }
425   ierr = PetscFree(starts);CHKERRQ(ierr);
426 
427   base = owners[rank];
428 
429   /*  wait on receives */
430   ierr  = PetscMalloc2(nrecvs,&lens,nrecvs,&source);CHKERRQ(ierr);
431   count = nrecvs;
432   slen  = 0;
433   while (count) {
434     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
435     /* unpack receives into our local space */
436     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
437 
438     source[imdex] = recv_status.MPI_SOURCE;
439     lens[imdex]   = n;
440     slen += n;
441     count--;
442   }
443   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
444 
445   /* move the data into the send scatter */
446   ierr  = PetscMalloc1(slen+1,&lrows);CHKERRQ(ierr);
447   count = 0;
448   for (i=0; i<nrecvs; i++) {
449     values = rvalues + i*nmax;
450     for (j=0; j<lens[i]; j++) {
451       lrows[count++] = values[j] - base;
452     }
453   }
454   ierr = PetscFree(rvalues);CHKERRQ(ierr);
455   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
456   ierr = PetscFree(owner);CHKERRQ(ierr);
457   ierr = PetscFree(sizes);CHKERRQ(ierr);
458 
459   /* fix right hand side if needed */
460   if (x && b) {
461     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
462     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
463     for (i=0; i<slen; i++) {
464       bb[lrows[i]] = diag*xx[lrows[i]];
465     }
466     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
467     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
468   }
469 
470   /* actually zap the local rows */
471   ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
472   if (diag != 0.0) {
473     Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data;
474     PetscInt     m   = ll->lda, i;
475 
476     for (i=0; i<slen; i++) {
477       ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag;
478     }
479   }
480   ierr = PetscFree(lrows);CHKERRQ(ierr);
481 
482   /* wait on sends */
483   if (nsends) {
484     ierr = PetscMalloc1(nsends,&send_status);CHKERRQ(ierr);
485     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
486     ierr = PetscFree(send_status);CHKERRQ(ierr);
487   }
488   ierr = PetscFree(send_waits);CHKERRQ(ierr);
489   ierr = PetscFree(svalues);CHKERRQ(ierr);
490   PetscFunctionReturn(0);
491 }
492 
493 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec);
494 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec);
495 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec);
496 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec);
497 
498 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
499 {
500   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
501   PetscErrorCode ierr;
502 
503   PetscFunctionBegin;
504   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
505   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
506   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
507   PetscFunctionReturn(0);
508 }
509 
510 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
511 {
512   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
513   PetscErrorCode ierr;
514 
515   PetscFunctionBegin;
516   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
517   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
518   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
519   PetscFunctionReturn(0);
520 }
521 
522 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
523 {
524   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
525   PetscErrorCode ierr;
526   PetscScalar    zero = 0.0;
527 
528   PetscFunctionBegin;
529   ierr = VecSet(yy,zero);CHKERRQ(ierr);
530   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
531   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
532   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
533   PetscFunctionReturn(0);
534 }
535 
536 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
537 {
538   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
539   PetscErrorCode ierr;
540 
541   PetscFunctionBegin;
542   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
543   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
544   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
545   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
546   PetscFunctionReturn(0);
547 }
548 
549 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
550 {
551   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
552   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
553   PetscErrorCode ierr;
554   PetscInt       len,i,n,m = A->rmap->n,radd;
555   PetscScalar    *x,zero = 0.0;
556 
557   PetscFunctionBegin;
558   ierr = VecSet(v,zero);CHKERRQ(ierr);
559   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
560   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
561   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
562   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
563   radd = A->rmap->rstart*m;
564   for (i=0; i<len; i++) {
565     x[i] = aloc->v[radd + i*m + i];
566   }
567   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
568   PetscFunctionReturn(0);
569 }
570 
571 PetscErrorCode MatDestroy_MPIDense(Mat mat)
572 {
573   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
574   PetscErrorCode ierr;
575 
576   PetscFunctionBegin;
577 #if defined(PETSC_USE_LOG)
578   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
579 #endif
580   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
581   ierr = MatDestroy(&mdn->A);CHKERRQ(ierr);
582   ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr);
583   ierr = VecScatterDestroy(&mdn->Mvctx);CHKERRQ(ierr);
584 
585   ierr = PetscFree(mat->data);CHKERRQ(ierr);
586   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
587 
588   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL);CHKERRQ(ierr);
589   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr);
590   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr);
591   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL);CHKERRQ(ierr);
592   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL);CHKERRQ(ierr);
593   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);CHKERRQ(ierr);
594   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);CHKERRQ(ierr);
595 #if defined(PETSC_HAVE_ELEMENTAL)
596   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr);
597 #endif
598   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr);
599   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
600   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
601   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
602   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
603   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
604   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
605   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL);CHKERRQ(ierr);
606   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL);CHKERRQ(ierr);
607   PetscFunctionReturn(0);
608 }
609 
610 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
611 {
612   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
613   PetscErrorCode    ierr;
614   PetscViewerFormat format;
615   int               fd;
616   PetscInt          header[4],mmax,N = mat->cmap->N,i,j,m,k;
617   PetscMPIInt       rank,tag  = ((PetscObject)viewer)->tag,size;
618   PetscScalar       *work,*v,*vv;
619   Mat_SeqDense      *a = (Mat_SeqDense*)mdn->A->data;
620 
621   PetscFunctionBegin;
622   if (mdn->size == 1) {
623     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
624   } else {
625     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
626     ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
627     ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
628 
629     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
630     if (format == PETSC_VIEWER_NATIVE) {
631 
632       if (!rank) {
633         /* store the matrix as a dense matrix */
634         header[0] = MAT_FILE_CLASSID;
635         header[1] = mat->rmap->N;
636         header[2] = N;
637         header[3] = MATRIX_BINARY_FORMAT_DENSE;
638         ierr      = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
639 
640         /* get largest work array needed for transposing array */
641         mmax = mat->rmap->n;
642         for (i=1; i<size; i++) {
643           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
644         }
645         ierr = PetscMalloc1(mmax*N,&work);CHKERRQ(ierr);
646 
647         /* write out local array, by rows */
648         m = mat->rmap->n;
649         v = a->v;
650         for (j=0; j<N; j++) {
651           for (i=0; i<m; i++) {
652             work[j + i*N] = *v++;
653           }
654         }
655         ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
656         /* get largest work array to receive messages from other processes, excludes process zero */
657         mmax = 0;
658         for (i=1; i<size; i++) {
659           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
660         }
661         ierr = PetscMalloc1(mmax*N,&vv);CHKERRQ(ierr);
662         for (k = 1; k < size; k++) {
663           v    = vv;
664           m    = mat->rmap->range[k+1] - mat->rmap->range[k];
665           ierr = MPIULong_Recv(v,m*N,MPIU_SCALAR,k,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
666 
667           for (j = 0; j < N; j++) {
668             for (i = 0; i < m; i++) {
669               work[j + i*N] = *v++;
670             }
671           }
672           ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
673         }
674         ierr = PetscFree(work);CHKERRQ(ierr);
675         ierr = PetscFree(vv);CHKERRQ(ierr);
676       } else {
677         ierr = MPIULong_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
678       }
679     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerPushFormat(viewer,PETSC_VIEWER_NATIVE)");
680   }
681   PetscFunctionReturn(0);
682 }
683 
684 extern PetscErrorCode MatView_SeqDense(Mat,PetscViewer);
685 #include <petscdraw.h>
686 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
687 {
688   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
689   PetscErrorCode    ierr;
690   PetscMPIInt       rank = mdn->rank;
691   PetscViewerType   vtype;
692   PetscBool         iascii,isdraw;
693   PetscViewer       sviewer;
694   PetscViewerFormat format;
695 
696   PetscFunctionBegin;
697   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
698   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
699   if (iascii) {
700     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
701     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
702     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
703       MatInfo info;
704       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
705       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
706       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
707       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
708       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
709       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
710       PetscFunctionReturn(0);
711     } else if (format == PETSC_VIEWER_ASCII_INFO) {
712       PetscFunctionReturn(0);
713     }
714   } else if (isdraw) {
715     PetscDraw draw;
716     PetscBool isnull;
717 
718     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
719     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
720     if (isnull) PetscFunctionReturn(0);
721   }
722 
723   {
724     /* assemble the entire matrix onto first processor. */
725     Mat         A;
726     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
727     PetscInt    *cols;
728     PetscScalar *vals;
729 
730     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
731     if (!rank) {
732       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
733     } else {
734       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
735     }
736     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
737     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
738     ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr);
739     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
740 
741     /* Copy the matrix ... This isn't the most efficient means,
742        but it's quick for now */
743     A->insertmode = INSERT_VALUES;
744 
745     row = mat->rmap->rstart;
746     m   = mdn->A->rmap->n;
747     for (i=0; i<m; i++) {
748       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
749       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
750       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
751       row++;
752     }
753 
754     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
755     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
756     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
757     if (!rank) {
758       ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
759       ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
760     }
761     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
762     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
763     ierr = MatDestroy(&A);CHKERRQ(ierr);
764   }
765   PetscFunctionReturn(0);
766 }
767 
768 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
769 {
770   PetscErrorCode ierr;
771   PetscBool      iascii,isbinary,isdraw,issocket;
772 
773   PetscFunctionBegin;
774   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
775   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
776   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
777   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
778 
779   if (iascii || issocket || isdraw) {
780     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
781   } else if (isbinary) {
782     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
783   }
784   PetscFunctionReturn(0);
785 }
786 
787 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
788 {
789   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
790   Mat            mdn  = mat->A;
791   PetscErrorCode ierr;
792   PetscReal      isend[5],irecv[5];
793 
794   PetscFunctionBegin;
795   info->block_size = 1.0;
796 
797   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
798 
799   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
800   isend[3] = info->memory;  isend[4] = info->mallocs;
801   if (flag == MAT_LOCAL) {
802     info->nz_used      = isend[0];
803     info->nz_allocated = isend[1];
804     info->nz_unneeded  = isend[2];
805     info->memory       = isend[3];
806     info->mallocs      = isend[4];
807   } else if (flag == MAT_GLOBAL_MAX) {
808     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
809 
810     info->nz_used      = irecv[0];
811     info->nz_allocated = irecv[1];
812     info->nz_unneeded  = irecv[2];
813     info->memory       = irecv[3];
814     info->mallocs      = irecv[4];
815   } else if (flag == MAT_GLOBAL_SUM) {
816     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
817 
818     info->nz_used      = irecv[0];
819     info->nz_allocated = irecv[1];
820     info->nz_unneeded  = irecv[2];
821     info->memory       = irecv[3];
822     info->mallocs      = irecv[4];
823   }
824   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
825   info->fill_ratio_needed = 0;
826   info->factor_mallocs    = 0;
827   PetscFunctionReturn(0);
828 }
829 
830 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
831 {
832   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
833   PetscErrorCode ierr;
834 
835   PetscFunctionBegin;
836   switch (op) {
837   case MAT_NEW_NONZERO_LOCATIONS:
838   case MAT_NEW_NONZERO_LOCATION_ERR:
839   case MAT_NEW_NONZERO_ALLOCATION_ERR:
840     MatCheckPreallocated(A,1);
841     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
842     break;
843   case MAT_ROW_ORIENTED:
844     MatCheckPreallocated(A,1);
845     a->roworiented = flg;
846     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
847     break;
848   case MAT_NEW_DIAGONALS:
849   case MAT_KEEP_NONZERO_PATTERN:
850   case MAT_USE_HASH_TABLE:
851     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
852     break;
853   case MAT_IGNORE_OFF_PROC_ENTRIES:
854     a->donotstash = flg;
855     break;
856   case MAT_SYMMETRIC:
857   case MAT_STRUCTURALLY_SYMMETRIC:
858   case MAT_HERMITIAN:
859   case MAT_SYMMETRY_ETERNAL:
860   case MAT_IGNORE_LOWER_TRIANGULAR:
861   case MAT_IGNORE_ZERO_ENTRIES:
862     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
863     break;
864   default:
865     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
866   }
867   PetscFunctionReturn(0);
868 }
869 
870 
871 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
872 {
873   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
874   Mat_SeqDense      *mat = (Mat_SeqDense*)mdn->A->data;
875   const PetscScalar *l,*r;
876   PetscScalar       x,*v;
877   PetscErrorCode    ierr;
878   PetscInt          i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n;
879 
880   PetscFunctionBegin;
881   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
882   if (ll) {
883     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
884     if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
885     ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr);
886     for (i=0; i<m; i++) {
887       x = l[i];
888       v = mat->v + i;
889       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
890     }
891     ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr);
892     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
893   }
894   if (rr) {
895     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
896     if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
897     ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
898     ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
899     ierr = VecGetArrayRead(mdn->lvec,&r);CHKERRQ(ierr);
900     for (i=0; i<n; i++) {
901       x = r[i];
902       v = mat->v + i*m;
903       for (j=0; j<m; j++) (*v++) *= x;
904     }
905     ierr = VecRestoreArrayRead(mdn->lvec,&r);CHKERRQ(ierr);
906     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
907   }
908   PetscFunctionReturn(0);
909 }
910 
911 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
912 {
913   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
914   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
915   PetscErrorCode ierr;
916   PetscInt       i,j;
917   PetscReal      sum = 0.0;
918   PetscScalar    *v  = mat->v;
919 
920   PetscFunctionBegin;
921   if (mdn->size == 1) {
922     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
923   } else {
924     if (type == NORM_FROBENIUS) {
925       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
926         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
927       }
928       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
929       *nrm = PetscSqrtReal(*nrm);
930       ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr);
931     } else if (type == NORM_1) {
932       PetscReal *tmp,*tmp2;
933       ierr = PetscMalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr);
934       ierr = PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
935       ierr = PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
936       *nrm = 0.0;
937       v    = mat->v;
938       for (j=0; j<mdn->A->cmap->n; j++) {
939         for (i=0; i<mdn->A->rmap->n; i++) {
940           tmp[j] += PetscAbsScalar(*v);  v++;
941         }
942       }
943       ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
944       for (j=0; j<A->cmap->N; j++) {
945         if (tmp2[j] > *nrm) *nrm = tmp2[j];
946       }
947       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
948       ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr);
949     } else if (type == NORM_INFINITY) { /* max row norm */
950       PetscReal ntemp;
951       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
952       ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
953     } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
954   }
955   PetscFunctionReturn(0);
956 }
957 
958 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
959 {
960   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
961   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
962   Mat            B;
963   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
964   PetscErrorCode ierr;
965   PetscInt       j,i;
966   PetscScalar    *v;
967 
968   PetscFunctionBegin;
969   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
970     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
971     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
972     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
973     ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr);
974   } else {
975     B = *matout;
976   }
977 
978   m    = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v;
979   ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr);
980   for (i=0; i<m; i++) rwork[i] = rstart + i;
981   for (j=0; j<n; j++) {
982     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
983     v   += m;
984   }
985   ierr = PetscFree(rwork);CHKERRQ(ierr);
986   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
987   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
988   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
989     *matout = B;
990   } else {
991     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
992   }
993   PetscFunctionReturn(0);
994 }
995 
996 
997 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
998 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
999 
1000 PetscErrorCode MatSetUp_MPIDense(Mat A)
1001 {
1002   PetscErrorCode ierr;
1003 
1004   PetscFunctionBegin;
1005   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
1006   PetscFunctionReturn(0);
1007 }
1008 
1009 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
1010 {
1011   PetscErrorCode ierr;
1012   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
1013 
1014   PetscFunctionBegin;
1015   ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr);
1016   ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1017   PetscFunctionReturn(0);
1018 }
1019 
1020 PetscErrorCode  MatConjugate_MPIDense(Mat mat)
1021 {
1022   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
1023   PetscErrorCode ierr;
1024 
1025   PetscFunctionBegin;
1026   ierr = MatConjugate(a->A);CHKERRQ(ierr);
1027   PetscFunctionReturn(0);
1028 }
1029 
1030 PetscErrorCode MatRealPart_MPIDense(Mat A)
1031 {
1032   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1033   PetscErrorCode ierr;
1034 
1035   PetscFunctionBegin;
1036   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1037   PetscFunctionReturn(0);
1038 }
1039 
1040 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1041 {
1042   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1043   PetscErrorCode ierr;
1044 
1045   PetscFunctionBegin;
1046   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1047   PetscFunctionReturn(0);
1048 }
1049 
1050 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col)
1051 {
1052   PetscErrorCode    ierr;
1053   PetscScalar       *x;
1054   const PetscScalar *a;
1055   PetscInt          lda;
1056 
1057   PetscFunctionBegin;
1058   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1059   ierr = MatDenseGetLDA(A,&lda);CHKERRQ(ierr);
1060   ierr = MatDenseGetArrayRead(A,&a);CHKERRQ(ierr);
1061   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1062   ierr = PetscMemcpy(x,a+col*lda,A->rmap->n*sizeof(PetscScalar));CHKERRQ(ierr);
1063   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1064   ierr = MatDenseGetArrayRead(A,&a);CHKERRQ(ierr);
1065   PetscFunctionReturn(0);
1066 }
1067 
1068 extern PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*);
1069 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms)
1070 {
1071   PetscErrorCode ierr;
1072   PetscInt       i,n;
1073   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
1074   PetscReal      *work;
1075 
1076   PetscFunctionBegin;
1077   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
1078   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
1079   ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr);
1080   if (type == NORM_2) {
1081     for (i=0; i<n; i++) work[i] *= work[i];
1082   }
1083   if (type == NORM_INFINITY) {
1084     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
1085   } else {
1086     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
1087   }
1088   ierr = PetscFree(work);CHKERRQ(ierr);
1089   if (type == NORM_2) {
1090     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
1091   }
1092   PetscFunctionReturn(0);
1093 }
1094 
1095 static PetscErrorCode  MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1096 {
1097   Mat_MPIDense   *d = (Mat_MPIDense*)x->data;
1098   PetscErrorCode ierr;
1099   PetscScalar    *a;
1100   PetscInt       m,n,i;
1101 
1102   PetscFunctionBegin;
1103   ierr = MatGetSize(d->A,&m,&n);CHKERRQ(ierr);
1104   ierr = MatDenseGetArray(d->A,&a);CHKERRQ(ierr);
1105   for (i=0; i<m*n; i++) {
1106     ierr = PetscRandomGetValue(rctx,a+i);CHKERRQ(ierr);
1107   }
1108   ierr = MatDenseRestoreArray(d->A,&a);CHKERRQ(ierr);
1109   PetscFunctionReturn(0);
1110 }
1111 
1112 extern PetscErrorCode MatMatMultNumeric_MPIDense(Mat A,Mat,Mat);
1113 
1114 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool  *missing,PetscInt *d)
1115 {
1116   PetscFunctionBegin;
1117   *missing = PETSC_FALSE;
1118   PetscFunctionReturn(0);
1119 }
1120 
1121 static PetscErrorCode MatMatTransposeMult_MPIDense_MPIDense(Mat,Mat,MatReuse,PetscReal,Mat*);
1122 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat*);
1123 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);
1124 
1125 /* -------------------------------------------------------------------*/
1126 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1127                                         MatGetRow_MPIDense,
1128                                         MatRestoreRow_MPIDense,
1129                                         MatMult_MPIDense,
1130                                 /*  4*/ MatMultAdd_MPIDense,
1131                                         MatMultTranspose_MPIDense,
1132                                         MatMultTransposeAdd_MPIDense,
1133                                         0,
1134                                         0,
1135                                         0,
1136                                 /* 10*/ 0,
1137                                         0,
1138                                         0,
1139                                         0,
1140                                         MatTranspose_MPIDense,
1141                                 /* 15*/ MatGetInfo_MPIDense,
1142                                         MatEqual_MPIDense,
1143                                         MatGetDiagonal_MPIDense,
1144                                         MatDiagonalScale_MPIDense,
1145                                         MatNorm_MPIDense,
1146                                 /* 20*/ MatAssemblyBegin_MPIDense,
1147                                         MatAssemblyEnd_MPIDense,
1148                                         MatSetOption_MPIDense,
1149                                         MatZeroEntries_MPIDense,
1150                                 /* 24*/ MatZeroRows_MPIDense,
1151                                         0,
1152                                         0,
1153                                         0,
1154                                         0,
1155                                 /* 29*/ MatSetUp_MPIDense,
1156                                         0,
1157                                         0,
1158                                         MatGetDiagonalBlock_MPIDense,
1159                                         0,
1160                                 /* 34*/ MatDuplicate_MPIDense,
1161                                         0,
1162                                         0,
1163                                         0,
1164                                         0,
1165                                 /* 39*/ MatAXPY_MPIDense,
1166                                         MatCreateSubMatrices_MPIDense,
1167                                         0,
1168                                         MatGetValues_MPIDense,
1169                                         0,
1170                                 /* 44*/ 0,
1171                                         MatScale_MPIDense,
1172                                         MatShift_Basic,
1173                                         0,
1174                                         0,
1175                                 /* 49*/ MatSetRandom_MPIDense,
1176                                         0,
1177                                         0,
1178                                         0,
1179                                         0,
1180                                 /* 54*/ 0,
1181                                         0,
1182                                         0,
1183                                         0,
1184                                         0,
1185                                 /* 59*/ MatCreateSubMatrix_MPIDense,
1186                                         MatDestroy_MPIDense,
1187                                         MatView_MPIDense,
1188                                         0,
1189                                         0,
1190                                 /* 64*/ 0,
1191                                         0,
1192                                         0,
1193                                         0,
1194                                         0,
1195                                 /* 69*/ 0,
1196                                         0,
1197                                         0,
1198                                         0,
1199                                         0,
1200                                 /* 74*/ 0,
1201                                         0,
1202                                         0,
1203                                         0,
1204                                         0,
1205                                 /* 79*/ 0,
1206                                         0,
1207                                         0,
1208                                         0,
1209                                 /* 83*/ MatLoad_MPIDense,
1210                                         0,
1211                                         0,
1212                                         0,
1213                                         0,
1214                                         0,
1215 #if defined(PETSC_HAVE_ELEMENTAL)
1216                                 /* 89*/ MatMatMult_MPIDense_MPIDense,
1217                                         MatMatMultSymbolic_MPIDense_MPIDense,
1218 #else
1219                                 /* 89*/ 0,
1220                                         0,
1221 #endif
1222                                         MatMatMultNumeric_MPIDense,
1223                                         0,
1224                                         0,
1225                                 /* 94*/ 0,
1226                                         MatMatTransposeMult_MPIDense_MPIDense,
1227                                         MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1228                                         MatMatTransposeMultNumeric_MPIDense_MPIDense,
1229                                         0,
1230                                 /* 99*/ 0,
1231                                         0,
1232                                         0,
1233                                         MatConjugate_MPIDense,
1234                                         0,
1235                                 /*104*/ 0,
1236                                         MatRealPart_MPIDense,
1237                                         MatImaginaryPart_MPIDense,
1238                                         0,
1239                                         0,
1240                                 /*109*/ 0,
1241                                         0,
1242                                         0,
1243                                         MatGetColumnVector_MPIDense,
1244                                         MatMissingDiagonal_MPIDense,
1245                                 /*114*/ 0,
1246                                         0,
1247                                         0,
1248                                         0,
1249                                         0,
1250                                 /*119*/ 0,
1251                                         0,
1252                                         0,
1253                                         0,
1254                                         0,
1255                                 /*124*/ 0,
1256                                         MatGetColumnNorms_MPIDense,
1257                                         0,
1258                                         0,
1259                                         0,
1260                                 /*129*/ 0,
1261                                         MatTransposeMatMult_MPIDense_MPIDense,
1262                                         MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1263                                         MatTransposeMatMultNumeric_MPIDense_MPIDense,
1264                                         0,
1265                                 /*134*/ 0,
1266                                         0,
1267                                         0,
1268                                         0,
1269                                         0,
1270                                 /*139*/ 0,
1271                                         0,
1272                                         0,
1273                                         0,
1274                                         0,
1275                                 /*144*/ MatCreateMPIMatConcatenateSeqMat_MPIDense
1276 };
1277 
1278 PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1279 {
1280   Mat_MPIDense   *a;
1281   PetscErrorCode ierr;
1282 
1283   PetscFunctionBegin;
1284   if (mat->preallocated) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Matrix has already been preallocated");
1285   mat->preallocated = PETSC_TRUE;
1286   /* Note:  For now, when data is specified above, this assumes the user correctly
1287    allocates the local dense storage space.  We should add error checking. */
1288 
1289   a       = (Mat_MPIDense*)mat->data;
1290   ierr    = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
1291   ierr    = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
1292   a->nvec = mat->cmap->n;
1293 
1294   ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1295   ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr);
1296   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
1297   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1298   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1299   PetscFunctionReturn(0);
1300 }
1301 
1302 #if defined(PETSC_HAVE_ELEMENTAL)
1303 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1304 {
1305   Mat            mat_elemental;
1306   PetscErrorCode ierr;
1307   PetscScalar    *v;
1308   PetscInt       m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols;
1309 
1310   PetscFunctionBegin;
1311   if (reuse == MAT_REUSE_MATRIX) {
1312     mat_elemental = *newmat;
1313     ierr = MatZeroEntries(*newmat);CHKERRQ(ierr);
1314   } else {
1315     ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr);
1316     ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1317     ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr);
1318     ierr = MatSetUp(mat_elemental);CHKERRQ(ierr);
1319     ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
1320   }
1321 
1322   ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr);
1323   for (i=0; i<N; i++) cols[i] = i;
1324   for (i=0; i<m; i++) rows[i] = rstart + i;
1325 
1326   /* PETSc-Elemental interaface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1327   ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr);
1328   ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr);
1329   ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1330   ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1331   ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr);
1332   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1333 
1334   if (reuse == MAT_INPLACE_MATRIX) {
1335     ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr);
1336   } else {
1337     *newmat = mat_elemental;
1338   }
1339   PetscFunctionReturn(0);
1340 }
1341 #endif
1342 
1343 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals)
1344 {
1345   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1346   PetscErrorCode ierr;
1347 
1348   PetscFunctionBegin;
1349   ierr = MatDenseGetColumn(mat->A,col,vals);CHKERRQ(ierr);
1350   PetscFunctionReturn(0);
1351 }
1352 
1353 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals)
1354 {
1355   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1356   PetscErrorCode ierr;
1357 
1358   PetscFunctionBegin;
1359   ierr = MatDenseRestoreColumn(mat->A,vals);CHKERRQ(ierr);
1360   PetscFunctionReturn(0);
1361 }
1362 
1363 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
1364 {
1365   PetscErrorCode ierr;
1366   Mat_MPIDense   *mat;
1367   PetscInt       m,nloc,N;
1368 
1369   PetscFunctionBegin;
1370   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
1371   ierr = MatGetLocalSize(inmat,NULL,&nloc);CHKERRQ(ierr);
1372   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1373     PetscInt sum;
1374 
1375     if (n == PETSC_DECIDE) {
1376       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
1377     }
1378     /* Check sum(n) = N */
1379     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
1380     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
1381 
1382     ierr = MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat);CHKERRQ(ierr);
1383   }
1384 
1385   /* numeric phase */
1386   mat = (Mat_MPIDense*)(*outmat)->data;
1387   ierr = MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1388   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1389   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1390   PetscFunctionReturn(0);
1391 }
1392 
1393 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1394 {
1395   Mat_MPIDense   *a;
1396   PetscErrorCode ierr;
1397 
1398   PetscFunctionBegin;
1399   ierr      = PetscNewLog(mat,&a);CHKERRQ(ierr);
1400   mat->data = (void*)a;
1401   ierr      = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1402 
1403   mat->insertmode = NOT_SET_VALUES;
1404   ierr            = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);CHKERRQ(ierr);
1405   ierr            = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);CHKERRQ(ierr);
1406 
1407   /* build cache for off array entries formed */
1408   a->donotstash = PETSC_FALSE;
1409 
1410   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr);
1411 
1412   /* stuff used for matrix vector multiply */
1413   a->lvec        = 0;
1414   a->Mvctx       = 0;
1415   a->roworiented = PETSC_TRUE;
1416 
1417   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense);CHKERRQ(ierr);
1418   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr);
1419   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr);
1420   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense);CHKERRQ(ierr);
1421   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense);CHKERRQ(ierr);
1422   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);CHKERRQ(ierr);
1423   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);CHKERRQ(ierr);
1424 #if defined(PETSC_HAVE_ELEMENTAL)
1425   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr);
1426 #endif
1427   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1428   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1429   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1430   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1431 
1432   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",MatTransposeMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1433   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",MatTransposeMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1434   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",MatTransposeMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1435   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense);CHKERRQ(ierr);
1436   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense);CHKERRQ(ierr);
1437   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1438   PetscFunctionReturn(0);
1439 }
1440 
1441 /*MC
1442    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1443 
1444    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1445    and MATMPIDENSE otherwise.
1446 
1447    Options Database Keys:
1448 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1449 
1450   Level: beginner
1451 
1452 
1453 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1454 M*/
1455 
1456 /*@C
1457    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1458 
1459    Not collective
1460 
1461    Input Parameters:
1462 .  B - the matrix
1463 -  data - optional location of matrix data.  Set data=NULL for PETSc
1464    to control all matrix memory allocation.
1465 
1466    Notes:
1467    The dense format is fully compatible with standard Fortran 77
1468    storage by columns.
1469 
1470    The data input variable is intended primarily for Fortran programmers
1471    who wish to allocate their own matrix memory space.  Most users should
1472    set data=NULL.
1473 
1474    Level: intermediate
1475 
1476 .keywords: matrix,dense, parallel
1477 
1478 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1479 @*/
1480 PetscErrorCode  MatMPIDenseSetPreallocation(Mat B,PetscScalar *data)
1481 {
1482   PetscErrorCode ierr;
1483 
1484   PetscFunctionBegin;
1485   ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr);
1486   PetscFunctionReturn(0);
1487 }
1488 
1489 /*@
1490    MatDensePlaceArray - Allows one to replace the array in a dense array with an
1491    array provided by the user. This is useful to avoid copying an array
1492    into a matrix
1493 
1494    Not Collective
1495 
1496    Input Parameters:
1497 +  mat - the matrix
1498 -  array - the array in column major order
1499 
1500    Notes:
1501    You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be
1502    freed when the matrix is destroyed.
1503 
1504    Level: developer
1505 
1506 .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
1507 
1508 @*/
1509 PetscErrorCode  MatDensePlaceArray(Mat mat,const PetscScalar array[])
1510 {
1511   PetscErrorCode ierr;
1512   PetscFunctionBegin;
1513   ierr = PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
1514   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
1515   PetscFunctionReturn(0);
1516 }
1517 
1518 /*@
1519    MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray()
1520 
1521    Not Collective
1522 
1523    Input Parameters:
1524 .  mat - the matrix
1525 
1526    Notes:
1527    You can only call this after a call to MatDensePlaceArray()
1528 
1529    Level: developer
1530 
1531 .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
1532 
1533 @*/
1534 PetscErrorCode  MatDenseResetArray(Mat mat)
1535 {
1536   PetscErrorCode ierr;
1537   PetscFunctionBegin;
1538   ierr = PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));CHKERRQ(ierr);
1539   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
1540   PetscFunctionReturn(0);
1541 }
1542 
1543 /*@C
1544    MatCreateDense - Creates a parallel matrix in dense format.
1545 
1546    Collective on MPI_Comm
1547 
1548    Input Parameters:
1549 +  comm - MPI communicator
1550 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1551 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1552 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1553 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1554 -  data - optional location of matrix data.  Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1555    to control all matrix memory allocation.
1556 
1557    Output Parameter:
1558 .  A - the matrix
1559 
1560    Notes:
1561    The dense format is fully compatible with standard Fortran 77
1562    storage by columns.
1563 
1564    The data input variable is intended primarily for Fortran programmers
1565    who wish to allocate their own matrix memory space.  Most users should
1566    set data=NULL (PETSC_NULL_SCALAR for Fortran users).
1567 
1568    The user MUST specify either the local or global matrix dimensions
1569    (possibly both).
1570 
1571    Level: intermediate
1572 
1573 .keywords: matrix,dense, parallel
1574 
1575 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1576 @*/
1577 PetscErrorCode  MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1578 {
1579   PetscErrorCode ierr;
1580   PetscMPIInt    size;
1581 
1582   PetscFunctionBegin;
1583   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1584   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1585   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1586   if (size > 1) {
1587     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1588     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1589     if (data) {  /* user provided data array, so no need to assemble */
1590       ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr);
1591       (*A)->assembled = PETSC_TRUE;
1592     }
1593   } else {
1594     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1595     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1596   }
1597   PetscFunctionReturn(0);
1598 }
1599 
1600 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1601 {
1602   Mat            mat;
1603   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
1604   PetscErrorCode ierr;
1605 
1606   PetscFunctionBegin;
1607   *newmat = 0;
1608   ierr    = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr);
1609   ierr    = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1610   ierr    = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1611   a       = (Mat_MPIDense*)mat->data;
1612 
1613   mat->factortype   = A->factortype;
1614   mat->assembled    = PETSC_TRUE;
1615   mat->preallocated = PETSC_TRUE;
1616 
1617   a->size         = oldmat->size;
1618   a->rank         = oldmat->rank;
1619   mat->insertmode = NOT_SET_VALUES;
1620   a->nvec         = oldmat->nvec;
1621   a->donotstash   = oldmat->donotstash;
1622 
1623   ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr);
1624   ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr);
1625 
1626   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1627   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1628   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1629 
1630   *newmat = mat;
1631   PetscFunctionReturn(0);
1632 }
1633 
1634 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat)
1635 {
1636   PetscErrorCode ierr;
1637   PetscMPIInt    rank,size;
1638   const PetscInt *rowners;
1639   PetscInt       i,m,n,nz,j,mMax;
1640   PetscScalar    *array,*vals,*vals_ptr;
1641   Mat_MPIDense   *a = (Mat_MPIDense*)newmat->data;
1642 
1643   PetscFunctionBegin;
1644   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1645   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1646 
1647   /* determine ownership of rows and columns */
1648   m = (newmat->rmap->n < 0) ? PETSC_DECIDE : newmat->rmap->n;
1649   n = (newmat->cmap->n < 0) ? PETSC_DECIDE : newmat->cmap->n;
1650 
1651   ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr);
1652   if (!a->A) {
1653     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
1654   }
1655   ierr = MatDenseGetArray(newmat,&array);CHKERRQ(ierr);
1656   ierr = MatGetLocalSize(newmat,&m,NULL);CHKERRQ(ierr);
1657   ierr = MatGetOwnershipRanges(newmat,&rowners);CHKERRQ(ierr);
1658   ierr = MPI_Reduce(&m,&mMax,1,MPIU_INT,MPI_MAX,0,comm);CHKERRQ(ierr);
1659   if (!rank) {
1660     ierr = PetscMalloc1(mMax*N,&vals);CHKERRQ(ierr);
1661 
1662     /* read in my part of the matrix numerical values  */
1663     ierr = PetscBinaryRead(fd,vals,m*N,NULL,PETSC_SCALAR);CHKERRQ(ierr);
1664 
1665     /* insert into matrix-by row (this is why cannot directly read into array */
1666     vals_ptr = vals;
1667     for (i=0; i<m; i++) {
1668       for (j=0; j<N; j++) {
1669         array[i + j*m] = *vals_ptr++;
1670       }
1671     }
1672 
1673     /* read in other processors and ship out */
1674     for (i=1; i<size; i++) {
1675       nz   = (rowners[i+1] - rowners[i])*N;
1676       ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr);
1677       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr);
1678     }
1679   } else {
1680     /* receive numeric values */
1681     ierr = PetscMalloc1(m*N,&vals);CHKERRQ(ierr);
1682 
1683     /* receive message of values*/
1684     ierr = MPIULong_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr);
1685 
1686     /* insert into matrix-by row (this is why cannot directly read into array */
1687     vals_ptr = vals;
1688     for (i=0; i<m; i++) {
1689       for (j=0; j<N; j++) {
1690         array[i + j*m] = *vals_ptr++;
1691       }
1692     }
1693   }
1694   ierr = MatDenseRestoreArray(newmat,&array);CHKERRQ(ierr);
1695   ierr = PetscFree(vals);CHKERRQ(ierr);
1696   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1697   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1698   PetscFunctionReturn(0);
1699 }
1700 
1701 PetscErrorCode MatLoad_MPIDense(Mat newmat,PetscViewer viewer)
1702 {
1703   Mat_MPIDense   *a;
1704   PetscScalar    *vals,*svals;
1705   MPI_Comm       comm;
1706   MPI_Status     status;
1707   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,n,maxnz;
1708   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1709   PetscInt       *ourlens,*procsnz = 0,jj,*mycols,*smycols;
1710   PetscInt       i,nz,j,rstart,rend;
1711   int            fd;
1712   PetscBool      isbinary;
1713   PetscErrorCode ierr;
1714 
1715   PetscFunctionBegin;
1716   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1717   if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)newmat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newmat)->type_name);
1718 
1719   /* force binary viewer to load .info file if it has not yet done so */
1720   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1721   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
1722   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1723   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1724   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1725   if (!rank) {
1726     ierr = PetscBinaryRead(fd,(char*)header,4,NULL,PETSC_INT);CHKERRQ(ierr);
1727     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1728   }
1729   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1730   M    = header[1]; N = header[2]; nz = header[3];
1731 
1732   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
1733   if (newmat->rmap->N < 0) newmat->rmap->N = M;
1734   if (newmat->cmap->N < 0) newmat->cmap->N = N;
1735 
1736   if (newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",M,newmat->rmap->N);
1737   if (newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",N,newmat->cmap->N);
1738 
1739   /*
1740        Handle case where matrix is stored on disk as a dense matrix
1741   */
1742   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1743     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr);
1744     PetscFunctionReturn(0);
1745   }
1746 
1747   /* determine ownership of all rows */
1748   if (newmat->rmap->n < 0) {
1749     ierr = PetscMPIIntCast(M/size + ((M % size) > rank),&m);CHKERRQ(ierr);
1750   } else {
1751     ierr = PetscMPIIntCast(newmat->rmap->n,&m);CHKERRQ(ierr);
1752   }
1753   if (newmat->cmap->n < 0) {
1754     n = PETSC_DECIDE;
1755   } else {
1756     ierr = PetscMPIIntCast(newmat->cmap->n,&n);CHKERRQ(ierr);
1757   }
1758 
1759   ierr       = PetscMalloc1(size+2,&rowners);CHKERRQ(ierr);
1760   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1761   rowners[0] = 0;
1762   for (i=2; i<=size; i++) {
1763     rowners[i] += rowners[i-1];
1764   }
1765   rstart = rowners[rank];
1766   rend   = rowners[rank+1];
1767 
1768   /* distribute row lengths to all processors */
1769   ierr = PetscMalloc1(rend-rstart,&ourlens);CHKERRQ(ierr);
1770   if (!rank) {
1771     ierr = PetscMalloc1(M,&rowlengths);CHKERRQ(ierr);
1772     ierr = PetscBinaryRead(fd,rowlengths,M,NULL,PETSC_INT);CHKERRQ(ierr);
1773     ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr);
1774     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1775     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1776     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1777   } else {
1778     ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1779   }
1780 
1781   if (!rank) {
1782     /* calculate the number of nonzeros on each processor */
1783     ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr);
1784     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1785     for (i=0; i<size; i++) {
1786       for (j=rowners[i]; j< rowners[i+1]; j++) {
1787         procsnz[i] += rowlengths[j];
1788       }
1789     }
1790     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1791 
1792     /* determine max buffer needed and allocate it */
1793     maxnz = 0;
1794     for (i=0; i<size; i++) {
1795       maxnz = PetscMax(maxnz,procsnz[i]);
1796     }
1797     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
1798 
1799     /* read in my part of the matrix column indices  */
1800     nz   = procsnz[0];
1801     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
1802     ierr = PetscBinaryRead(fd,mycols,nz,NULL,PETSC_INT);CHKERRQ(ierr);
1803 
1804     /* read in every one elses and ship off */
1805     for (i=1; i<size; i++) {
1806       nz   = procsnz[i];
1807       ierr = PetscBinaryRead(fd,cols,nz,NULL,PETSC_INT);CHKERRQ(ierr);
1808       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1809     }
1810     ierr = PetscFree(cols);CHKERRQ(ierr);
1811   } else {
1812     /* determine buffer space needed for message */
1813     nz = 0;
1814     for (i=0; i<m; i++) {
1815       nz += ourlens[i];
1816     }
1817     ierr = PetscMalloc1(nz+1,&mycols);CHKERRQ(ierr);
1818 
1819     /* receive message of column indices*/
1820     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1821     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1822     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1823   }
1824 
1825   ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr);
1826   a = (Mat_MPIDense*)newmat->data;
1827   if (!a->A) {
1828     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
1829   }
1830 
1831   if (!rank) {
1832     ierr = PetscMalloc1(maxnz,&vals);CHKERRQ(ierr);
1833 
1834     /* read in my part of the matrix numerical values  */
1835     nz   = procsnz[0];
1836     ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr);
1837 
1838     /* insert into matrix */
1839     jj      = rstart;
1840     smycols = mycols;
1841     svals   = vals;
1842     for (i=0; i<m; i++) {
1843       ierr     = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1844       smycols += ourlens[i];
1845       svals   += ourlens[i];
1846       jj++;
1847     }
1848 
1849     /* read in other processors and ship out */
1850     for (i=1; i<size; i++) {
1851       nz   = procsnz[i];
1852       ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr);
1853       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
1854     }
1855     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1856   } else {
1857     /* receive numeric values */
1858     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
1859 
1860     /* receive message of values*/
1861     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
1862     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1863     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1864 
1865     /* insert into matrix */
1866     jj      = rstart;
1867     smycols = mycols;
1868     svals   = vals;
1869     for (i=0; i<m; i++) {
1870       ierr     = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1871       smycols += ourlens[i];
1872       svals   += ourlens[i];
1873       jj++;
1874     }
1875   }
1876   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1877   ierr = PetscFree(vals);CHKERRQ(ierr);
1878   ierr = PetscFree(mycols);CHKERRQ(ierr);
1879   ierr = PetscFree(rowners);CHKERRQ(ierr);
1880 
1881   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1882   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1883   PetscFunctionReturn(0);
1884 }
1885 
1886 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool  *flag)
1887 {
1888   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
1889   Mat            a,b;
1890   PetscBool      flg;
1891   PetscErrorCode ierr;
1892 
1893   PetscFunctionBegin;
1894   a    = matA->A;
1895   b    = matB->A;
1896   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
1897   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1898   PetscFunctionReturn(0);
1899 }
1900 
1901 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(Mat A)
1902 {
1903   PetscErrorCode        ierr;
1904   Mat_MPIDense          *a = (Mat_MPIDense*)A->data;
1905   Mat_TransMatMultDense *atb = a->atbdense;
1906 
1907   PetscFunctionBegin;
1908   ierr = PetscFree3(atb->sendbuf,atb->atbarray,atb->recvcounts);CHKERRQ(ierr);
1909   ierr = (atb->destroy)(A);CHKERRQ(ierr);
1910   ierr = PetscFree(atb);CHKERRQ(ierr);
1911   PetscFunctionReturn(0);
1912 }
1913 
1914 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(Mat A)
1915 {
1916   PetscErrorCode        ierr;
1917   Mat_MPIDense          *a = (Mat_MPIDense*)A->data;
1918   Mat_MatTransMultDense *abt = a->abtdense;
1919 
1920   PetscFunctionBegin;
1921   ierr = PetscFree2(abt->buf[0],abt->buf[1]);CHKERRQ(ierr);
1922   ierr = PetscFree2(abt->recvcounts,abt->recvdispls);CHKERRQ(ierr);
1923   ierr = (abt->destroy)(A);CHKERRQ(ierr);
1924   ierr = PetscFree(abt);CHKERRQ(ierr);
1925   PetscFunctionReturn(0);
1926 }
1927 
1928 PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1929 {
1930   Mat_MPIDense   *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
1931   Mat_SeqDense   *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data;
1932   Mat_TransMatMultDense *atb = c->atbdense;
1933   PetscErrorCode ierr;
1934   MPI_Comm       comm;
1935   PetscMPIInt    rank,size,*recvcounts=atb->recvcounts;
1936   PetscScalar    *carray,*atbarray=atb->atbarray,*sendbuf=atb->sendbuf;
1937   PetscInt       i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j;
1938   PetscScalar    _DOne=1.0,_DZero=0.0;
1939   PetscBLASInt   am,an,bn,aN;
1940   const PetscInt *ranges;
1941 
1942   PetscFunctionBegin;
1943   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1944   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1945   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1946 
1947   /* compute atbarray = aseq^T * bseq */
1948   ierr = PetscBLASIntCast(a->A->cmap->n,&an);CHKERRQ(ierr);
1949   ierr = PetscBLASIntCast(b->A->cmap->n,&bn);CHKERRQ(ierr);
1950   ierr = PetscBLASIntCast(a->A->rmap->n,&am);CHKERRQ(ierr);
1951   ierr = PetscBLASIntCast(A->cmap->N,&aN);CHKERRQ(ierr);
1952   PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&an,&bn,&am,&_DOne,aseq->v,&aseq->lda,bseq->v,&bseq->lda,&_DZero,atbarray,&aN));
1953 
1954   ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr);
1955   for (i=0; i<size; i++) recvcounts[i] = (ranges[i+1] - ranges[i])*cN;
1956 
1957   /* arrange atbarray into sendbuf */
1958   k = 0;
1959   for (proc=0; proc<size; proc++) {
1960     for (j=0; j<cN; j++) {
1961       for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM];
1962     }
1963   }
1964   /* sum all atbarray to local values of C */
1965   ierr = MatDenseGetArray(c->A,&carray);CHKERRQ(ierr);
1966   ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRQ(ierr);
1967   ierr = MatDenseRestoreArray(c->A,&carray);CHKERRQ(ierr);
1968   PetscFunctionReturn(0);
1969 }
1970 
1971 PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1972 {
1973   PetscErrorCode        ierr;
1974   Mat                   Cdense;
1975   MPI_Comm              comm;
1976   PetscMPIInt           size;
1977   PetscInt              cm=A->cmap->n,cM,cN=B->cmap->N;
1978   Mat_MPIDense          *c;
1979   Mat_TransMatMultDense *atb;
1980 
1981   PetscFunctionBegin;
1982   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1983   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
1984     SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
1985   }
1986 
1987   /* create matrix product Cdense */
1988   ierr = MatCreate(comm,&Cdense);CHKERRQ(ierr);
1989   ierr = MatSetSizes(Cdense,cm,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
1990   ierr = MatSetType(Cdense,MATMPIDENSE);CHKERRQ(ierr);
1991   ierr = MatMPIDenseSetPreallocation(Cdense,NULL);CHKERRQ(ierr);
1992   ierr = MatAssemblyBegin(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1993   ierr = MatAssemblyEnd(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1994   *C   = Cdense;
1995 
1996   /* create data structure for reuse Cdense */
1997   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1998   ierr = PetscNew(&atb);CHKERRQ(ierr);
1999   cM = Cdense->rmap->N;
2000   ierr = PetscMalloc3(cM*cN,&atb->sendbuf,cM*cN,&atb->atbarray,size,&atb->recvcounts);CHKERRQ(ierr);
2001 
2002   c                    = (Mat_MPIDense*)Cdense->data;
2003   c->atbdense          = atb;
2004   atb->destroy         = Cdense->ops->destroy;
2005   Cdense->ops->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
2006   PetscFunctionReturn(0);
2007 }
2008 
2009 PetscErrorCode MatTransposeMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
2010 {
2011   PetscErrorCode ierr;
2012 
2013   PetscFunctionBegin;
2014   if (scall == MAT_INITIAL_MATRIX) {
2015     ierr = MatTransposeMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr);
2016   }
2017   ierr = MatTransposeMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr);
2018   PetscFunctionReturn(0);
2019 }
2020 
2021 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat *C)
2022 {
2023   PetscErrorCode        ierr;
2024   Mat                   Cdense;
2025   MPI_Comm              comm;
2026   PetscMPIInt           i, size;
2027   PetscInt              maxRows, bufsiz;
2028   Mat_MPIDense          *c;
2029   PetscMPIInt           tag;
2030   const char            *algTypes[2] = {"allgatherv","cyclic"};
2031   PetscInt              alg, nalg = 2;
2032   Mat_MatTransMultDense *abt;
2033 
2034   PetscFunctionBegin;
2035   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2036   alg = 0; /* default is allgatherv */
2037   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MatMatTransposeMult","Mat");CHKERRQ(ierr);
2038   ierr = PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[0],&alg,NULL);CHKERRQ(ierr);
2039   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2040   if (A->cmap->N != B->cmap->N) {
2041     SETERRQ2(comm,PETSC_ERR_ARG_SIZ,"Matrix global column dimensions are incompatible, A (%D) != B (%D)",A->cmap->N,B->cmap->N);
2042   }
2043 
2044   /* create matrix product Cdense */
2045   ierr = MatCreate(comm,&Cdense);CHKERRQ(ierr);
2046   ierr = MatSetSizes(Cdense,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N);CHKERRQ(ierr);
2047   ierr = MatSetType(Cdense,MATMPIDENSE);CHKERRQ(ierr);
2048   ierr = MatMPIDenseSetPreallocation(Cdense,NULL);CHKERRQ(ierr);
2049   ierr = PetscObjectGetNewTag((PetscObject)Cdense, &tag);CHKERRQ(ierr);
2050   ierr = MatAssemblyBegin(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2051   ierr = MatAssemblyEnd(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2052   *C   = Cdense;
2053 
2054   /* create data structure for reuse Cdense */
2055   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2056   ierr = PetscNew(&abt);CHKERRQ(ierr);
2057   abt->tag = tag;
2058   abt->alg = alg;
2059   switch (alg) {
2060   case 1:
2061     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i]));
2062     bufsiz = A->cmap->N * maxRows;
2063     ierr = PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1]));CHKERRQ(ierr);
2064     break;
2065   default:
2066     ierr = PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]));CHKERRQ(ierr);
2067     ierr = PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls));CHKERRQ(ierr);
2068     for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N;
2069     for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i];
2070     break;
2071   }
2072 
2073   c                    = (Mat_MPIDense*)Cdense->data;
2074   c->abtdense          = abt;
2075   abt->destroy         = Cdense->ops->destroy;
2076   Cdense->ops->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
2077   PetscFunctionReturn(0);
2078 }
2079 
2080 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
2081 {
2082   Mat_MPIDense   *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2083   Mat_SeqDense   *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data;
2084   Mat_SeqDense   *cseq=(Mat_SeqDense*)(c->A)->data;
2085   Mat_MatTransMultDense *abt = c->abtdense;
2086   PetscErrorCode ierr;
2087   MPI_Comm       comm;
2088   PetscMPIInt    rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom;
2089   PetscScalar    *sendbuf, *recvbuf=0, *carray;
2090   PetscInt       i,cK=A->cmap->N,k,j,bn;
2091   PetscScalar    _DOne=1.0,_DZero=0.0;
2092   PetscBLASInt   cm, cn, ck;
2093   MPI_Request    reqs[2];
2094   const PetscInt *ranges;
2095 
2096   PetscFunctionBegin;
2097   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2098   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2099   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2100 
2101   ierr = MatGetOwnershipRanges(B,&ranges);CHKERRQ(ierr);
2102   bn = B->rmap->n;
2103   if (bseq->lda == bn) {
2104     sendbuf = bseq->v;
2105   } else {
2106     sendbuf = abt->buf[0];
2107     for (k = 0, i = 0; i < cK; i++) {
2108       for (j = 0; j < bn; j++, k++) {
2109         sendbuf[k] = bseq->v[i * bseq->lda + j];
2110       }
2111     }
2112   }
2113   if (size > 1) {
2114     sendto = (rank + size - 1) % size;
2115     recvfrom = (rank + size + 1) % size;
2116   } else {
2117     sendto = recvfrom = 0;
2118   }
2119   ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr);
2120   ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr);
2121   recvisfrom = rank;
2122   for (i = 0; i < size; i++) {
2123     /* we have finished receiving in sending, bufs can be read/modified */
2124     PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2125     PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
2126 
2127     if (nextrecvisfrom != rank) {
2128       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2129       sendsiz = cK * bn;
2130       recvsiz = cK * nextbn;
2131       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
2132       ierr = MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]);CHKERRQ(ierr);
2133       ierr = MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]);CHKERRQ(ierr);
2134     }
2135 
2136     /* local aseq * sendbuf^T */
2137     ierr = PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn);CHKERRQ(ierr);
2138     carray = &cseq->v[cseq->lda * ranges[recvisfrom]];
2139     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,aseq->v,&aseq->lda,sendbuf,&cn,&_DZero,carray,&cseq->lda));
2140 
2141     if (nextrecvisfrom != rank) {
2142       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
2143       ierr = MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2144     }
2145     bn = nextbn;
2146     recvisfrom = nextrecvisfrom;
2147     sendbuf = recvbuf;
2148   }
2149   PetscFunctionReturn(0);
2150 }
2151 
2152 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
2153 {
2154   Mat_MPIDense   *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2155   Mat_SeqDense   *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data;
2156   Mat_SeqDense   *cseq=(Mat_SeqDense*)(c->A)->data;
2157   Mat_MatTransMultDense *abt = c->abtdense;
2158   PetscErrorCode ierr;
2159   MPI_Comm       comm;
2160   PetscMPIInt    rank,size;
2161   PetscScalar    *sendbuf, *recvbuf;
2162   PetscInt       i,cK=A->cmap->N,k,j,bn;
2163   PetscScalar    _DOne=1.0,_DZero=0.0;
2164   PetscBLASInt   cm, cn, ck;
2165 
2166   PetscFunctionBegin;
2167   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2168   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2169   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2170 
2171   /* copy transpose of B into buf[0] */
2172   bn      = B->rmap->n;
2173   sendbuf = abt->buf[0];
2174   recvbuf = abt->buf[1];
2175   for (k = 0, j = 0; j < bn; j++) {
2176     for (i = 0; i < cK; i++, k++) {
2177       sendbuf[k] = bseq->v[i * bseq->lda + j];
2178     }
2179   }
2180   ierr = MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm);CHKERRQ(ierr);
2181   ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr);
2182   ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr);
2183   ierr = PetscBLASIntCast(c->A->cmap->n,&cn);CHKERRQ(ierr);
2184   PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,aseq->v,&aseq->lda,recvbuf,&ck,&_DZero,cseq->v,&cseq->lda));
2185   PetscFunctionReturn(0);
2186 }
2187 
2188 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2189 {
2190   Mat_MPIDense   *c=(Mat_MPIDense*)C->data;
2191   Mat_MatTransMultDense *abt = c->abtdense;
2192   PetscErrorCode ierr;
2193 
2194   PetscFunctionBegin;
2195   switch (abt->alg) {
2196   case 1:
2197     ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C);CHKERRQ(ierr);
2198     break;
2199   default:
2200     ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C);CHKERRQ(ierr);
2201     break;
2202   }
2203   PetscFunctionReturn(0);
2204 }
2205 
2206 static PetscErrorCode MatMatTransposeMult_MPIDense_MPIDense(Mat A,Mat B, MatReuse scall, PetscReal fill, Mat *C)
2207 {
2208   PetscErrorCode ierr;
2209 
2210   PetscFunctionBegin;
2211   if (scall == MAT_INITIAL_MATRIX) {
2212     ierr = MatMatTransposeMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr);
2213   }
2214   ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr);
2215   PetscFunctionReturn(0);
2216 }
2217 
2218 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(Mat A)
2219 {
2220   PetscErrorCode   ierr;
2221   Mat_MPIDense     *a = (Mat_MPIDense*)A->data;
2222   Mat_MatMultDense *ab = a->abdense;
2223 
2224   PetscFunctionBegin;
2225   ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr);
2226   ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr);
2227   ierr = MatDestroy(&ab->Be);CHKERRQ(ierr);
2228 
2229   ierr = (ab->destroy)(A);CHKERRQ(ierr);
2230   ierr = PetscFree(ab);CHKERRQ(ierr);
2231   PetscFunctionReturn(0);
2232 }
2233 
2234 #if defined(PETSC_HAVE_ELEMENTAL)
2235 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2236 {
2237   PetscErrorCode   ierr;
2238   Mat_MPIDense     *c=(Mat_MPIDense*)C->data;
2239   Mat_MatMultDense *ab=c->abdense;
2240 
2241   PetscFunctionBegin;
2242   ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr);
2243   ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr);
2244   ierr = MatMatMultNumeric(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr);
2245   ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
2246   PetscFunctionReturn(0);
2247 }
2248 
2249 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
2250 {
2251   PetscErrorCode   ierr;
2252   Mat              Ae,Be,Ce;
2253   Mat_MPIDense     *c;
2254   Mat_MatMultDense *ab;
2255 
2256   PetscFunctionBegin;
2257   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
2258     SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2259   }
2260 
2261   /* convert A and B to Elemental matrices Ae and Be */
2262   ierr = MatConvert(A,MATELEMENTAL,MAT_INITIAL_MATRIX, &Ae);CHKERRQ(ierr);
2263   ierr = MatConvert(B,MATELEMENTAL,MAT_INITIAL_MATRIX, &Be);CHKERRQ(ierr);
2264 
2265   /* Ce = Ae*Be */
2266   ierr = MatMatMultSymbolic(Ae,Be,fill,&Ce);CHKERRQ(ierr);
2267   ierr = MatMatMultNumeric(Ae,Be,Ce);CHKERRQ(ierr);
2268 
2269   /* convert Ce to C */
2270   ierr = MatConvert(Ce,MATMPIDENSE,MAT_INITIAL_MATRIX,C);CHKERRQ(ierr);
2271 
2272   /* create data structure for reuse Cdense */
2273   ierr = PetscNew(&ab);CHKERRQ(ierr);
2274   c                  = (Mat_MPIDense*)(*C)->data;
2275   c->abdense         = ab;
2276 
2277   ab->Ae             = Ae;
2278   ab->Be             = Be;
2279   ab->Ce             = Ce;
2280   ab->destroy        = (*C)->ops->destroy;
2281   (*C)->ops->destroy        = MatDestroy_MatMatMult_MPIDense_MPIDense;
2282   (*C)->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
2283   PetscFunctionReturn(0);
2284 }
2285 
2286 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
2287 {
2288   PetscErrorCode ierr;
2289 
2290   PetscFunctionBegin;
2291   if (scall == MAT_INITIAL_MATRIX) { /* simbolic product includes numeric product */
2292     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
2293     ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr);
2294     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
2295   } else {
2296     ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
2297     ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr);
2298     ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
2299   }
2300   PetscFunctionReturn(0);
2301 }
2302 #endif
2303 
2304