xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 4bbf5ea87ee73436f0fee53b672039f91ba53c48)
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4 */
5 
6 
7 #include <../src/mat/impls/dense/mpi/mpidense.h>    /*I   "petscmat.h"  I*/
8 #include <../src/mat/impls/aij/mpi/mpiaij.h>
9 #include <petscblaslapack.h>
10 
11 /*@
12 
13       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
14               matrix that represents the operator. For sequential matrices it returns itself.
15 
16     Input Parameter:
17 .      A - the Seq or MPI dense matrix
18 
19     Output Parameter:
20 .      B - the inner matrix
21 
22     Level: intermediate
23 
24 @*/
25 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
26 {
27   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
28   PetscErrorCode ierr;
29   PetscBool      flg;
30 
31   PetscFunctionBegin;
32   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
33   if (flg) *B = mat->A;
34   else *B = A;
35   PetscFunctionReturn(0);
36 }
37 
38 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
39 {
40   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
41   PetscErrorCode ierr;
42   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
43 
44   PetscFunctionBegin;
45   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
46   lrow = row - rstart;
47   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
48   PetscFunctionReturn(0);
49 }
50 
51 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
52 {
53   PetscErrorCode ierr;
54 
55   PetscFunctionBegin;
56   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
57   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
58   PetscFunctionReturn(0);
59 }
60 
61 PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
62 {
63   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
64   PetscErrorCode ierr;
65   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
66   PetscScalar    *array;
67   MPI_Comm       comm;
68   Mat            B;
69 
70   PetscFunctionBegin;
71   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
72 
73   ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr);
74   if (!B) {
75     ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
76     ierr = MatCreate(comm,&B);CHKERRQ(ierr);
77     ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr);
78     ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
79     ierr = MatDenseGetArray(mdn->A,&array);CHKERRQ(ierr);
80     ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr);
81     ierr = MatDenseRestoreArray(mdn->A,&array);CHKERRQ(ierr);
82     ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
83     ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
84     ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr);
85     *a   = B;
86     ierr = MatDestroy(&B);CHKERRQ(ierr);
87   } else *a = B;
88   PetscFunctionReturn(0);
89 }
90 
91 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
92 {
93   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
94   PetscErrorCode ierr;
95   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
96   PetscBool      roworiented = A->roworiented;
97 
98   PetscFunctionBegin;
99   for (i=0; i<m; i++) {
100     if (idxm[i] < 0) continue;
101     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
102     if (idxm[i] >= rstart && idxm[i] < rend) {
103       row = idxm[i] - rstart;
104       if (roworiented) {
105         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
106       } else {
107         for (j=0; j<n; j++) {
108           if (idxn[j] < 0) continue;
109           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
110           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
111         }
112       }
113     } else if (!A->donotstash) {
114       mat->assembled = PETSC_FALSE;
115       if (roworiented) {
116         ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
117       } else {
118         ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
119       }
120     }
121   }
122   PetscFunctionReturn(0);
123 }
124 
125 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
126 {
127   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
128   PetscErrorCode ierr;
129   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
130 
131   PetscFunctionBegin;
132   for (i=0; i<m; i++) {
133     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
134     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
135     if (idxm[i] >= rstart && idxm[i] < rend) {
136       row = idxm[i] - rstart;
137       for (j=0; j<n; j++) {
138         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
139         if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
140         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
141       }
142     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
143   }
144   PetscFunctionReturn(0);
145 }
146 
147 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[])
148 {
149   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
150   PetscErrorCode ierr;
151 
152   PetscFunctionBegin;
153   ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr);
154   PetscFunctionReturn(0);
155 }
156 
157 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar *array[])
158 {
159   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
160   PetscErrorCode ierr;
161 
162   PetscFunctionBegin;
163   ierr = MatDenseGetArrayRead(a->A,array);CHKERRQ(ierr);
164   PetscFunctionReturn(0);
165 }
166 
167 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar array[])
168 {
169   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
170   PetscErrorCode ierr;
171 
172   PetscFunctionBegin;
173   ierr = MatDensePlaceArray(a->A,array);CHKERRQ(ierr);
174   PetscFunctionReturn(0);
175 }
176 
177 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
178 {
179   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
180   PetscErrorCode ierr;
181 
182   PetscFunctionBegin;
183   ierr = MatDenseResetArray(a->A);CHKERRQ(ierr);
184   PetscFunctionReturn(0);
185 }
186 
187 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
188 {
189   Mat_MPIDense   *mat  = (Mat_MPIDense*)A->data,*newmatd;
190   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
191   PetscErrorCode ierr;
192   PetscInt       i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
193   const PetscInt *irow,*icol;
194   PetscScalar    *av,*bv,*v = lmat->v;
195   Mat            newmat;
196   IS             iscol_local;
197 
198   PetscFunctionBegin;
199   ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
200   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
201   ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr);
202   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
203   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
204   ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */
205 
206   /* No parallel redistribution currently supported! Should really check each index set
207      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
208      original matrix! */
209 
210   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
211   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
212 
213   /* Check submatrix call */
214   if (scall == MAT_REUSE_MATRIX) {
215     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
216     /* Really need to test rows and column sizes! */
217     newmat = *B;
218   } else {
219     /* Create and fill new matrix */
220     ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr);
221     ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
222     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
223     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
224   }
225 
226   /* Now extract the data pointers and do the copy, column at a time */
227   newmatd = (Mat_MPIDense*)newmat->data;
228   bv      = ((Mat_SeqDense*)newmatd->A->data)->v;
229 
230   for (i=0; i<Ncols; i++) {
231     av = v + ((Mat_SeqDense*)mat->A->data)->lda*icol[i];
232     for (j=0; j<nrows; j++) {
233       *bv++ = av[irow[j] - rstart];
234     }
235   }
236 
237   /* Assemble the matrices so that the correct flags are set */
238   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
239   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
240 
241   /* Free work space */
242   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
243   ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr);
244   ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
245   *B   = newmat;
246   PetscFunctionReturn(0);
247 }
248 
249 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[])
250 {
251   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
252   PetscErrorCode ierr;
253 
254   PetscFunctionBegin;
255   ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr);
256   PetscFunctionReturn(0);
257 }
258 
259 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar *array[])
260 {
261   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
262   PetscErrorCode ierr;
263 
264   PetscFunctionBegin;
265   ierr = MatDenseRestoreArrayRead(a->A,array);CHKERRQ(ierr);
266   PetscFunctionReturn(0);
267 }
268 
269 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
270 {
271   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
272   MPI_Comm       comm;
273   PetscErrorCode ierr;
274   PetscInt       nstash,reallocs;
275   InsertMode     addv;
276 
277   PetscFunctionBegin;
278   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
279   /* make sure all processors are either in INSERTMODE or ADDMODE */
280   ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,comm);CHKERRQ(ierr);
281   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
282   mat->insertmode = addv; /* in case this processor had no cache */
283 
284   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
285   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
286   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
287   PetscFunctionReturn(0);
288 }
289 
290 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
291 {
292   Mat_MPIDense   *mdn=(Mat_MPIDense*)mat->data;
293   PetscErrorCode ierr;
294   PetscInt       i,*row,*col,flg,j,rstart,ncols;
295   PetscMPIInt    n;
296   PetscScalar    *val;
297 
298   PetscFunctionBegin;
299   /*  wait on receives */
300   while (1) {
301     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
302     if (!flg) break;
303 
304     for (i=0; i<n;) {
305       /* Now identify the consecutive vals belonging to the same row */
306       for (j=i,rstart=row[j]; j<n; j++) {
307         if (row[j] != rstart) break;
308       }
309       if (j < n) ncols = j-i;
310       else       ncols = n-i;
311       /* Now assemble all these values with a single function call */
312       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
313       i    = j;
314     }
315   }
316   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
317 
318   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
319   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
320 
321   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
322     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
323   }
324   PetscFunctionReturn(0);
325 }
326 
327 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
328 {
329   PetscErrorCode ierr;
330   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
331 
332   PetscFunctionBegin;
333   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
334   PetscFunctionReturn(0);
335 }
336 
337 /* the code does not do the diagonal entries correctly unless the
338    matrix is square and the column and row owerships are identical.
339    This is a BUG. The only way to fix it seems to be to access
340    mdn->A and mdn->B directly and not through the MatZeroRows()
341    routine.
342 */
343 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
344 {
345   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
346   PetscErrorCode    ierr;
347   PetscInt          i,*owners = A->rmap->range;
348   PetscInt          *sizes,j,idx,nsends;
349   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
350   PetscInt          *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source;
351   PetscInt          *lens,*lrows,*values;
352   PetscMPIInt       n,imdex,rank = l->rank,size = l->size;
353   MPI_Comm          comm;
354   MPI_Request       *send_waits,*recv_waits;
355   MPI_Status        recv_status,*send_status;
356   PetscBool         found;
357   const PetscScalar *xx;
358   PetscScalar       *bb;
359 
360   PetscFunctionBegin;
361   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
362   if (A->rmap->N != A->cmap->N) SETERRQ(comm,PETSC_ERR_SUP,"Only handles square matrices");
363   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership");
364   /*  first count number of contributors to each processor */
365   ierr = PetscCalloc1(2*size,&sizes);CHKERRQ(ierr);
366   ierr = PetscMalloc1(N+1,&owner);CHKERRQ(ierr);  /* see note*/
367   for (i=0; i<N; i++) {
368     idx   = rows[i];
369     found = PETSC_FALSE;
370     for (j=0; j<size; j++) {
371       if (idx >= owners[j] && idx < owners[j+1]) {
372         sizes[2*j]++; sizes[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
373       }
374     }
375     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
376   }
377   nsends = 0;
378   for (i=0; i<size; i++) nsends += sizes[2*i+1];
379 
380   /* inform other processors of number of messages and max length*/
381   ierr = PetscMaxSum(comm,sizes,&nmax,&nrecvs);CHKERRQ(ierr);
382 
383   /* post receives:   */
384   ierr = PetscMalloc1((nrecvs+1)*(nmax+1),&rvalues);CHKERRQ(ierr);
385   ierr = PetscMalloc1(nrecvs+1,&recv_waits);CHKERRQ(ierr);
386   for (i=0; i<nrecvs; i++) {
387     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
388   }
389 
390   /* do sends:
391       1) starts[i] gives the starting index in svalues for stuff going to
392          the ith processor
393   */
394   ierr = PetscMalloc1(N+1,&svalues);CHKERRQ(ierr);
395   ierr = PetscMalloc1(nsends+1,&send_waits);CHKERRQ(ierr);
396   ierr = PetscMalloc1(size+1,&starts);CHKERRQ(ierr);
397 
398   starts[0] = 0;
399   for (i=1; i<size; i++) starts[i] = starts[i-1] + sizes[2*i-2];
400   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
401 
402   starts[0] = 0;
403   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + sizes[2*i-2];
404   count = 0;
405   for (i=0; i<size; i++) {
406     if (sizes[2*i+1]) {
407       ierr = MPI_Isend(svalues+starts[i],sizes[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
408     }
409   }
410   ierr = PetscFree(starts);CHKERRQ(ierr);
411 
412   base = owners[rank];
413 
414   /*  wait on receives */
415   ierr  = PetscMalloc2(nrecvs,&lens,nrecvs,&source);CHKERRQ(ierr);
416   count = nrecvs;
417   slen  = 0;
418   while (count) {
419     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
420     /* unpack receives into our local space */
421     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
422 
423     source[imdex] = recv_status.MPI_SOURCE;
424     lens[imdex]   = n;
425     slen += n;
426     count--;
427   }
428   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
429 
430   /* move the data into the send scatter */
431   ierr  = PetscMalloc1(slen+1,&lrows);CHKERRQ(ierr);
432   count = 0;
433   for (i=0; i<nrecvs; i++) {
434     values = rvalues + i*nmax;
435     for (j=0; j<lens[i]; j++) {
436       lrows[count++] = values[j] - base;
437     }
438   }
439   ierr = PetscFree(rvalues);CHKERRQ(ierr);
440   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
441   ierr = PetscFree(owner);CHKERRQ(ierr);
442   ierr = PetscFree(sizes);CHKERRQ(ierr);
443 
444   /* fix right hand side if needed */
445   if (x && b) {
446     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
447     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
448     for (i=0; i<slen; i++) {
449       bb[lrows[i]] = diag*xx[lrows[i]];
450     }
451     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
452     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
453   }
454 
455   /* actually zap the local rows */
456   ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
457   if (diag != 0.0) {
458     Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data;
459     PetscInt     m   = ll->lda, i;
460 
461     for (i=0; i<slen; i++) {
462       ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag;
463     }
464   }
465   ierr = PetscFree(lrows);CHKERRQ(ierr);
466 
467   /* wait on sends */
468   if (nsends) {
469     ierr = PetscMalloc1(nsends,&send_status);CHKERRQ(ierr);
470     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
471     ierr = PetscFree(send_status);CHKERRQ(ierr);
472   }
473   ierr = PetscFree(send_waits);CHKERRQ(ierr);
474   ierr = PetscFree(svalues);CHKERRQ(ierr);
475   PetscFunctionReturn(0);
476 }
477 
478 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec);
479 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec);
480 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec);
481 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec);
482 
483 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
484 {
485   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
486   PetscErrorCode ierr;
487 
488   PetscFunctionBegin;
489   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
490   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
491   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
492   PetscFunctionReturn(0);
493 }
494 
495 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
496 {
497   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
498   PetscErrorCode ierr;
499 
500   PetscFunctionBegin;
501   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
502   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
503   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
504   PetscFunctionReturn(0);
505 }
506 
507 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
508 {
509   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
510   PetscErrorCode ierr;
511   PetscScalar    zero = 0.0;
512 
513   PetscFunctionBegin;
514   ierr = VecSet(yy,zero);CHKERRQ(ierr);
515   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
516   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
517   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
518   PetscFunctionReturn(0);
519 }
520 
521 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
522 {
523   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
524   PetscErrorCode ierr;
525 
526   PetscFunctionBegin;
527   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
528   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
529   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
530   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
531   PetscFunctionReturn(0);
532 }
533 
534 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
535 {
536   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
537   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
538   PetscErrorCode ierr;
539   PetscInt       len,i,n,m = A->rmap->n,radd;
540   PetscScalar    *x,zero = 0.0;
541 
542   PetscFunctionBegin;
543   ierr = VecSet(v,zero);CHKERRQ(ierr);
544   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
545   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
546   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
547   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
548   radd = A->rmap->rstart*m;
549   for (i=0; i<len; i++) {
550     x[i] = aloc->v[radd + i*m + i];
551   }
552   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
553   PetscFunctionReturn(0);
554 }
555 
556 PetscErrorCode MatDestroy_MPIDense(Mat mat)
557 {
558   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
559   PetscErrorCode ierr;
560 
561   PetscFunctionBegin;
562 #if defined(PETSC_USE_LOG)
563   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
564 #endif
565   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
566   ierr = MatDestroy(&mdn->A);CHKERRQ(ierr);
567   ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr);
568   ierr = VecScatterDestroy(&mdn->Mvctx);CHKERRQ(ierr);
569 
570   ierr = PetscFree(mat->data);CHKERRQ(ierr);
571   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
572 
573   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr);
574   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr);
575   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL);CHKERRQ(ierr);
576   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL);CHKERRQ(ierr);
577   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);CHKERRQ(ierr);
578   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);CHKERRQ(ierr);
579 #if defined(PETSC_HAVE_ELEMENTAL)
580   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr);
581 #endif
582   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr);
583   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
584   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
585   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
586   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
587   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
588   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
589   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL);CHKERRQ(ierr);
590   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL);CHKERRQ(ierr);
591   PetscFunctionReturn(0);
592 }
593 
594 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
595 {
596   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
597   PetscErrorCode    ierr;
598   PetscViewerFormat format;
599   int               fd;
600   PetscInt          header[4],mmax,N = mat->cmap->N,i,j,m,k;
601   PetscMPIInt       rank,tag  = ((PetscObject)viewer)->tag,size;
602   PetscScalar       *work,*v,*vv;
603   Mat_SeqDense      *a = (Mat_SeqDense*)mdn->A->data;
604 
605   PetscFunctionBegin;
606   if (mdn->size == 1) {
607     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
608   } else {
609     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
610     ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
611     ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
612 
613     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
614     if (format == PETSC_VIEWER_NATIVE) {
615 
616       if (!rank) {
617         /* store the matrix as a dense matrix */
618         header[0] = MAT_FILE_CLASSID;
619         header[1] = mat->rmap->N;
620         header[2] = N;
621         header[3] = MATRIX_BINARY_FORMAT_DENSE;
622         ierr      = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
623 
624         /* get largest work array needed for transposing array */
625         mmax = mat->rmap->n;
626         for (i=1; i<size; i++) {
627           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
628         }
629         ierr = PetscMalloc1(mmax*N,&work);CHKERRQ(ierr);
630 
631         /* write out local array, by rows */
632         m = mat->rmap->n;
633         v = a->v;
634         for (j=0; j<N; j++) {
635           for (i=0; i<m; i++) {
636             work[j + i*N] = *v++;
637           }
638         }
639         ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
640         /* get largest work array to receive messages from other processes, excludes process zero */
641         mmax = 0;
642         for (i=1; i<size; i++) {
643           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
644         }
645         ierr = PetscMalloc1(mmax*N,&vv);CHKERRQ(ierr);
646         for (k = 1; k < size; k++) {
647           v    = vv;
648           m    = mat->rmap->range[k+1] - mat->rmap->range[k];
649           ierr = MPIULong_Recv(v,m*N,MPIU_SCALAR,k,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
650 
651           for (j = 0; j < N; j++) {
652             for (i = 0; i < m; i++) {
653               work[j + i*N] = *v++;
654             }
655           }
656           ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
657         }
658         ierr = PetscFree(work);CHKERRQ(ierr);
659         ierr = PetscFree(vv);CHKERRQ(ierr);
660       } else {
661         ierr = MPIULong_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
662       }
663     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerPushFormat(viewer,PETSC_VIEWER_NATIVE)");
664   }
665   PetscFunctionReturn(0);
666 }
667 
668 extern PetscErrorCode MatView_SeqDense(Mat,PetscViewer);
669 #include <petscdraw.h>
670 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
671 {
672   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
673   PetscErrorCode    ierr;
674   PetscMPIInt       rank = mdn->rank;
675   PetscViewerType   vtype;
676   PetscBool         iascii,isdraw;
677   PetscViewer       sviewer;
678   PetscViewerFormat format;
679 
680   PetscFunctionBegin;
681   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
682   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
683   if (iascii) {
684     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
685     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
686     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
687       MatInfo info;
688       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
689       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
690       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
691       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
692       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
693       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
694       PetscFunctionReturn(0);
695     } else if (format == PETSC_VIEWER_ASCII_INFO) {
696       PetscFunctionReturn(0);
697     }
698   } else if (isdraw) {
699     PetscDraw draw;
700     PetscBool isnull;
701 
702     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
703     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
704     if (isnull) PetscFunctionReturn(0);
705   }
706 
707   {
708     /* assemble the entire matrix onto first processor. */
709     Mat         A;
710     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
711     PetscInt    *cols;
712     PetscScalar *vals;
713 
714     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
715     if (!rank) {
716       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
717     } else {
718       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
719     }
720     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
721     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
722     ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr);
723     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
724 
725     /* Copy the matrix ... This isn't the most efficient means,
726        but it's quick for now */
727     A->insertmode = INSERT_VALUES;
728 
729     row = mat->rmap->rstart;
730     m   = mdn->A->rmap->n;
731     for (i=0; i<m; i++) {
732       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
733       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
734       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
735       row++;
736     }
737 
738     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
739     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
740     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
741     if (!rank) {
742       ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
743       ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
744     }
745     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
746     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
747     ierr = MatDestroy(&A);CHKERRQ(ierr);
748   }
749   PetscFunctionReturn(0);
750 }
751 
752 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
753 {
754   PetscErrorCode ierr;
755   PetscBool      iascii,isbinary,isdraw,issocket;
756 
757   PetscFunctionBegin;
758   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
759   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
760   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
761   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
762 
763   if (iascii || issocket || isdraw) {
764     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
765   } else if (isbinary) {
766     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
767   }
768   PetscFunctionReturn(0);
769 }
770 
771 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
772 {
773   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
774   Mat            mdn  = mat->A;
775   PetscErrorCode ierr;
776   PetscReal      isend[5],irecv[5];
777 
778   PetscFunctionBegin;
779   info->block_size = 1.0;
780 
781   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
782 
783   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
784   isend[3] = info->memory;  isend[4] = info->mallocs;
785   if (flag == MAT_LOCAL) {
786     info->nz_used      = isend[0];
787     info->nz_allocated = isend[1];
788     info->nz_unneeded  = isend[2];
789     info->memory       = isend[3];
790     info->mallocs      = isend[4];
791   } else if (flag == MAT_GLOBAL_MAX) {
792     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
793 
794     info->nz_used      = irecv[0];
795     info->nz_allocated = irecv[1];
796     info->nz_unneeded  = irecv[2];
797     info->memory       = irecv[3];
798     info->mallocs      = irecv[4];
799   } else if (flag == MAT_GLOBAL_SUM) {
800     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
801 
802     info->nz_used      = irecv[0];
803     info->nz_allocated = irecv[1];
804     info->nz_unneeded  = irecv[2];
805     info->memory       = irecv[3];
806     info->mallocs      = irecv[4];
807   }
808   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
809   info->fill_ratio_needed = 0;
810   info->factor_mallocs    = 0;
811   PetscFunctionReturn(0);
812 }
813 
814 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
815 {
816   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
817   PetscErrorCode ierr;
818 
819   PetscFunctionBegin;
820   switch (op) {
821   case MAT_NEW_NONZERO_LOCATIONS:
822   case MAT_NEW_NONZERO_LOCATION_ERR:
823   case MAT_NEW_NONZERO_ALLOCATION_ERR:
824     MatCheckPreallocated(A,1);
825     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
826     break;
827   case MAT_ROW_ORIENTED:
828     MatCheckPreallocated(A,1);
829     a->roworiented = flg;
830     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
831     break;
832   case MAT_NEW_DIAGONALS:
833   case MAT_KEEP_NONZERO_PATTERN:
834   case MAT_USE_HASH_TABLE:
835     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
836     break;
837   case MAT_IGNORE_OFF_PROC_ENTRIES:
838     a->donotstash = flg;
839     break;
840   case MAT_SYMMETRIC:
841   case MAT_STRUCTURALLY_SYMMETRIC:
842   case MAT_HERMITIAN:
843   case MAT_SYMMETRY_ETERNAL:
844   case MAT_IGNORE_LOWER_TRIANGULAR:
845     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
846     break;
847   default:
848     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
849   }
850   PetscFunctionReturn(0);
851 }
852 
853 
854 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
855 {
856   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
857   Mat_SeqDense      *mat = (Mat_SeqDense*)mdn->A->data;
858   const PetscScalar *l,*r;
859   PetscScalar       x,*v;
860   PetscErrorCode    ierr;
861   PetscInt          i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n;
862 
863   PetscFunctionBegin;
864   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
865   if (ll) {
866     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
867     if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
868     ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr);
869     for (i=0; i<m; i++) {
870       x = l[i];
871       v = mat->v + i;
872       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
873     }
874     ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr);
875     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
876   }
877   if (rr) {
878     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
879     if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
880     ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
881     ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
882     ierr = VecGetArrayRead(mdn->lvec,&r);CHKERRQ(ierr);
883     for (i=0; i<n; i++) {
884       x = r[i];
885       v = mat->v + i*m;
886       for (j=0; j<m; j++) (*v++) *= x;
887     }
888     ierr = VecRestoreArrayRead(mdn->lvec,&r);CHKERRQ(ierr);
889     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
890   }
891   PetscFunctionReturn(0);
892 }
893 
894 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
895 {
896   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
897   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
898   PetscErrorCode ierr;
899   PetscInt       i,j;
900   PetscReal      sum = 0.0;
901   PetscScalar    *v  = mat->v;
902 
903   PetscFunctionBegin;
904   if (mdn->size == 1) {
905     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
906   } else {
907     if (type == NORM_FROBENIUS) {
908       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
909         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
910       }
911       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
912       *nrm = PetscSqrtReal(*nrm);
913       ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr);
914     } else if (type == NORM_1) {
915       PetscReal *tmp,*tmp2;
916       ierr = PetscMalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr);
917       ierr = PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
918       ierr = PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
919       *nrm = 0.0;
920       v    = mat->v;
921       for (j=0; j<mdn->A->cmap->n; j++) {
922         for (i=0; i<mdn->A->rmap->n; i++) {
923           tmp[j] += PetscAbsScalar(*v);  v++;
924         }
925       }
926       ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
927       for (j=0; j<A->cmap->N; j++) {
928         if (tmp2[j] > *nrm) *nrm = tmp2[j];
929       }
930       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
931       ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr);
932     } else if (type == NORM_INFINITY) { /* max row norm */
933       PetscReal ntemp;
934       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
935       ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
936     } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
937   }
938   PetscFunctionReturn(0);
939 }
940 
941 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
942 {
943   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
944   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
945   Mat            B;
946   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
947   PetscErrorCode ierr;
948   PetscInt       j,i;
949   PetscScalar    *v;
950 
951   PetscFunctionBegin;
952   if (reuse == MAT_INPLACE_MATRIX  && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports square matrix only in-place");
953   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
954     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
955     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
956     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
957     ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr);
958   } else {
959     B = *matout;
960   }
961 
962   m    = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v;
963   ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr);
964   for (i=0; i<m; i++) rwork[i] = rstart + i;
965   for (j=0; j<n; j++) {
966     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
967     v   += m;
968   }
969   ierr = PetscFree(rwork);CHKERRQ(ierr);
970   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
971   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
972   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
973     *matout = B;
974   } else {
975     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
976   }
977   PetscFunctionReturn(0);
978 }
979 
980 
981 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
982 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
983 
984 PetscErrorCode MatSetUp_MPIDense(Mat A)
985 {
986   PetscErrorCode ierr;
987 
988   PetscFunctionBegin;
989   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
990   PetscFunctionReturn(0);
991 }
992 
993 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
994 {
995   PetscErrorCode ierr;
996   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
997 
998   PetscFunctionBegin;
999   ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr);
1000   ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1001   PetscFunctionReturn(0);
1002 }
1003 
1004 PetscErrorCode  MatConjugate_MPIDense(Mat mat)
1005 {
1006   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
1007   PetscErrorCode ierr;
1008 
1009   PetscFunctionBegin;
1010   ierr = MatConjugate(a->A);CHKERRQ(ierr);
1011   PetscFunctionReturn(0);
1012 }
1013 
1014 PetscErrorCode MatRealPart_MPIDense(Mat A)
1015 {
1016   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1017   PetscErrorCode ierr;
1018 
1019   PetscFunctionBegin;
1020   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1021   PetscFunctionReturn(0);
1022 }
1023 
1024 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1025 {
1026   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1027   PetscErrorCode ierr;
1028 
1029   PetscFunctionBegin;
1030   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1031   PetscFunctionReturn(0);
1032 }
1033 
1034 extern PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*);
1035 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms)
1036 {
1037   PetscErrorCode ierr;
1038   PetscInt       i,n;
1039   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
1040   PetscReal      *work;
1041 
1042   PetscFunctionBegin;
1043   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
1044   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
1045   ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr);
1046   if (type == NORM_2) {
1047     for (i=0; i<n; i++) work[i] *= work[i];
1048   }
1049   if (type == NORM_INFINITY) {
1050     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
1051   } else {
1052     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
1053   }
1054   ierr = PetscFree(work);CHKERRQ(ierr);
1055   if (type == NORM_2) {
1056     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
1057   }
1058   PetscFunctionReturn(0);
1059 }
1060 
1061 static PetscErrorCode  MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1062 {
1063   Mat_MPIDense   *d = (Mat_MPIDense*)x->data;
1064   PetscErrorCode ierr;
1065   PetscScalar    *a;
1066   PetscInt       m,n,i;
1067 
1068   PetscFunctionBegin;
1069   ierr = MatGetSize(d->A,&m,&n);CHKERRQ(ierr);
1070   ierr = MatDenseGetArray(d->A,&a);CHKERRQ(ierr);
1071   for (i=0; i<m*n; i++) {
1072     ierr = PetscRandomGetValue(rctx,a+i);CHKERRQ(ierr);
1073   }
1074   ierr = MatDenseRestoreArray(d->A,&a);CHKERRQ(ierr);
1075   PetscFunctionReturn(0);
1076 }
1077 
1078 extern PetscErrorCode MatMatMultNumeric_MPIDense(Mat A,Mat,Mat);
1079 
1080 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool  *missing,PetscInt *d)
1081 {
1082   PetscFunctionBegin;
1083   *missing = PETSC_FALSE;
1084   PetscFunctionReturn(0);
1085 }
1086 
1087 /* -------------------------------------------------------------------*/
1088 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1089                                         MatGetRow_MPIDense,
1090                                         MatRestoreRow_MPIDense,
1091                                         MatMult_MPIDense,
1092                                 /*  4*/ MatMultAdd_MPIDense,
1093                                         MatMultTranspose_MPIDense,
1094                                         MatMultTransposeAdd_MPIDense,
1095                                         0,
1096                                         0,
1097                                         0,
1098                                 /* 10*/ 0,
1099                                         0,
1100                                         0,
1101                                         0,
1102                                         MatTranspose_MPIDense,
1103                                 /* 15*/ MatGetInfo_MPIDense,
1104                                         MatEqual_MPIDense,
1105                                         MatGetDiagonal_MPIDense,
1106                                         MatDiagonalScale_MPIDense,
1107                                         MatNorm_MPIDense,
1108                                 /* 20*/ MatAssemblyBegin_MPIDense,
1109                                         MatAssemblyEnd_MPIDense,
1110                                         MatSetOption_MPIDense,
1111                                         MatZeroEntries_MPIDense,
1112                                 /* 24*/ MatZeroRows_MPIDense,
1113                                         0,
1114                                         0,
1115                                         0,
1116                                         0,
1117                                 /* 29*/ MatSetUp_MPIDense,
1118                                         0,
1119                                         0,
1120                                         MatGetDiagonalBlock_MPIDense,
1121                                         0,
1122                                 /* 34*/ MatDuplicate_MPIDense,
1123                                         0,
1124                                         0,
1125                                         0,
1126                                         0,
1127                                 /* 39*/ MatAXPY_MPIDense,
1128                                         MatCreateSubMatrices_MPIDense,
1129                                         0,
1130                                         MatGetValues_MPIDense,
1131                                         0,
1132                                 /* 44*/ 0,
1133                                         MatScale_MPIDense,
1134                                         MatShift_Basic,
1135                                         0,
1136                                         0,
1137                                 /* 49*/ MatSetRandom_MPIDense,
1138                                         0,
1139                                         0,
1140                                         0,
1141                                         0,
1142                                 /* 54*/ 0,
1143                                         0,
1144                                         0,
1145                                         0,
1146                                         0,
1147                                 /* 59*/ MatCreateSubMatrix_MPIDense,
1148                                         MatDestroy_MPIDense,
1149                                         MatView_MPIDense,
1150                                         0,
1151                                         0,
1152                                 /* 64*/ 0,
1153                                         0,
1154                                         0,
1155                                         0,
1156                                         0,
1157                                 /* 69*/ 0,
1158                                         0,
1159                                         0,
1160                                         0,
1161                                         0,
1162                                 /* 74*/ 0,
1163                                         0,
1164                                         0,
1165                                         0,
1166                                         0,
1167                                 /* 79*/ 0,
1168                                         0,
1169                                         0,
1170                                         0,
1171                                 /* 83*/ MatLoad_MPIDense,
1172                                         0,
1173                                         0,
1174                                         0,
1175                                         0,
1176                                         0,
1177 #if defined(PETSC_HAVE_ELEMENTAL)
1178                                 /* 89*/ MatMatMult_MPIDense_MPIDense,
1179                                         MatMatMultSymbolic_MPIDense_MPIDense,
1180 #else
1181                                 /* 89*/ 0,
1182                                         0,
1183 #endif
1184                                         MatMatMultNumeric_MPIDense,
1185                                         0,
1186                                         0,
1187                                 /* 94*/ 0,
1188                                         0,
1189                                         0,
1190                                         0,
1191                                         0,
1192                                 /* 99*/ 0,
1193                                         0,
1194                                         0,
1195                                         MatConjugate_MPIDense,
1196                                         0,
1197                                 /*104*/ 0,
1198                                         MatRealPart_MPIDense,
1199                                         MatImaginaryPart_MPIDense,
1200                                         0,
1201                                         0,
1202                                 /*109*/ 0,
1203                                         0,
1204                                         0,
1205                                         0,
1206                                         MatMissingDiagonal_MPIDense,
1207                                 /*114*/ 0,
1208                                         0,
1209                                         0,
1210                                         0,
1211                                         0,
1212                                 /*119*/ 0,
1213                                         0,
1214                                         0,
1215                                         0,
1216                                         0,
1217                                 /*124*/ 0,
1218                                         MatGetColumnNorms_MPIDense,
1219                                         0,
1220                                         0,
1221                                         0,
1222                                 /*129*/ 0,
1223                                         MatTransposeMatMult_MPIDense_MPIDense,
1224                                         MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1225                                         MatTransposeMatMultNumeric_MPIDense_MPIDense,
1226                                         0,
1227                                 /*134*/ 0,
1228                                         0,
1229                                         0,
1230                                         0,
1231                                         0,
1232                                 /*139*/ 0,
1233                                         0,
1234                                         0
1235 };
1236 
1237 PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1238 {
1239   Mat_MPIDense   *a;
1240   PetscErrorCode ierr;
1241 
1242   PetscFunctionBegin;
1243   mat->preallocated = PETSC_TRUE;
1244   /* Note:  For now, when data is specified above, this assumes the user correctly
1245    allocates the local dense storage space.  We should add error checking. */
1246 
1247   a       = (Mat_MPIDense*)mat->data;
1248   ierr    = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
1249   ierr    = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
1250   a->nvec = mat->cmap->n;
1251 
1252   ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1253   ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr);
1254   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
1255   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1256   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1257   PetscFunctionReturn(0);
1258 }
1259 
1260 #if defined(PETSC_HAVE_ELEMENTAL)
1261 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1262 {
1263   Mat            mat_elemental;
1264   PetscErrorCode ierr;
1265   PetscScalar    *v;
1266   PetscInt       m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols;
1267 
1268   PetscFunctionBegin;
1269   if (reuse == MAT_REUSE_MATRIX) {
1270     mat_elemental = *newmat;
1271     ierr = MatZeroEntries(*newmat);CHKERRQ(ierr);
1272   } else {
1273     ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr);
1274     ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1275     ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr);
1276     ierr = MatSetUp(mat_elemental);CHKERRQ(ierr);
1277     ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
1278   }
1279 
1280   ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr);
1281   for (i=0; i<N; i++) cols[i] = i;
1282   for (i=0; i<m; i++) rows[i] = rstart + i;
1283 
1284   /* PETSc-Elemental interaface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1285   ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr);
1286   ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr);
1287   ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1288   ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1289   ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr);
1290   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1291 
1292   if (reuse == MAT_INPLACE_MATRIX) {
1293     ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr);
1294   } else {
1295     *newmat = mat_elemental;
1296   }
1297   PetscFunctionReturn(0);
1298 }
1299 #endif
1300 
1301 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals)
1302 {
1303   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1304   PetscErrorCode ierr;
1305 
1306   PetscFunctionBegin;
1307   ierr = MatDenseGetColumn(mat->A,col,vals);CHKERRQ(ierr);
1308   PetscFunctionReturn(0);
1309 }
1310 
1311 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals)
1312 {
1313   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1314   PetscErrorCode ierr;
1315 
1316   PetscFunctionBegin;
1317   ierr = MatDenseRestoreColumn(mat->A,vals);CHKERRQ(ierr);
1318   PetscFunctionReturn(0);
1319 }
1320 
1321 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1322 {
1323   Mat_MPIDense   *a;
1324   PetscErrorCode ierr;
1325 
1326   PetscFunctionBegin;
1327   ierr      = PetscNewLog(mat,&a);CHKERRQ(ierr);
1328   mat->data = (void*)a;
1329   ierr      = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1330 
1331   mat->insertmode = NOT_SET_VALUES;
1332   ierr            = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);CHKERRQ(ierr);
1333   ierr            = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);CHKERRQ(ierr);
1334 
1335   /* build cache for off array entries formed */
1336   a->donotstash = PETSC_FALSE;
1337 
1338   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr);
1339 
1340   /* stuff used for matrix vector multiply */
1341   a->lvec        = 0;
1342   a->Mvctx       = 0;
1343   a->roworiented = PETSC_TRUE;
1344 
1345   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr);
1346   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr);
1347   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense);CHKERRQ(ierr);
1348   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense);CHKERRQ(ierr);
1349   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);CHKERRQ(ierr);
1350   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);CHKERRQ(ierr);
1351 #if defined(PETSC_HAVE_ELEMENTAL)
1352   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr);
1353 #endif
1354   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1355   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1356   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1357   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1358 
1359   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",MatTransposeMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1360   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",MatTransposeMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1361   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",MatTransposeMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1362   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense);CHKERRQ(ierr);
1363   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense);CHKERRQ(ierr);
1364   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1365   PetscFunctionReturn(0);
1366 }
1367 
1368 /*MC
1369    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1370 
1371    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1372    and MATMPIDENSE otherwise.
1373 
1374    Options Database Keys:
1375 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1376 
1377   Level: beginner
1378 
1379 
1380 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1381 M*/
1382 
1383 /*@C
1384    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1385 
1386    Not collective
1387 
1388    Input Parameters:
1389 .  B - the matrix
1390 -  data - optional location of matrix data.  Set data=NULL for PETSc
1391    to control all matrix memory allocation.
1392 
1393    Notes:
1394    The dense format is fully compatible with standard Fortran 77
1395    storage by columns.
1396 
1397    The data input variable is intended primarily for Fortran programmers
1398    who wish to allocate their own matrix memory space.  Most users should
1399    set data=NULL.
1400 
1401    Level: intermediate
1402 
1403 .keywords: matrix,dense, parallel
1404 
1405 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1406 @*/
1407 PetscErrorCode  MatMPIDenseSetPreallocation(Mat B,PetscScalar *data)
1408 {
1409   PetscErrorCode ierr;
1410 
1411   PetscFunctionBegin;
1412   ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr);
1413   PetscFunctionReturn(0);
1414 }
1415 
1416 /*@
1417    MatDensePlaceArray - Allows one to replace the array in a dense array with an
1418    array provided by the user. This is useful to avoid copying an array
1419    into a matrix
1420 
1421    Not Collective
1422 
1423    Input Parameters:
1424 +  mat - the matrix
1425 -  array - the array in column major order
1426 
1427    Notes:
1428    You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be
1429    freed when the matrix is destroyed.
1430 
1431    Level: developer
1432 
1433 .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
1434 
1435 @*/
1436 PetscErrorCode  MatDensePlaceArray(Mat mat,const PetscScalar array[])
1437 {
1438   PetscErrorCode ierr;
1439   PetscFunctionBegin;
1440   ierr = PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
1441   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
1442   PetscFunctionReturn(0);
1443 }
1444 
1445 /*@
1446    MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray()
1447 
1448    Not Collective
1449 
1450    Input Parameters:
1451 .  mat - the matrix
1452 
1453    Notes:
1454    You can only call this after a call to MatDensePlaceArray()
1455 
1456    Level: developer
1457 
1458 .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
1459 
1460 @*/
1461 PetscErrorCode  MatDenseResetArray(Mat mat)
1462 {
1463   PetscErrorCode ierr;
1464   PetscFunctionBegin;
1465   ierr = PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));CHKERRQ(ierr);
1466   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
1467   PetscFunctionReturn(0);
1468 }
1469 
1470 /*@C
1471    MatCreateDense - Creates a parallel matrix in dense format.
1472 
1473    Collective on MPI_Comm
1474 
1475    Input Parameters:
1476 +  comm - MPI communicator
1477 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1478 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1479 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1480 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1481 -  data - optional location of matrix data.  Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1482    to control all matrix memory allocation.
1483 
1484    Output Parameter:
1485 .  A - the matrix
1486 
1487    Notes:
1488    The dense format is fully compatible with standard Fortran 77
1489    storage by columns.
1490 
1491    The data input variable is intended primarily for Fortran programmers
1492    who wish to allocate their own matrix memory space.  Most users should
1493    set data=NULL (PETSC_NULL_SCALAR for Fortran users).
1494 
1495    The user MUST specify either the local or global matrix dimensions
1496    (possibly both).
1497 
1498    Level: intermediate
1499 
1500 .keywords: matrix,dense, parallel
1501 
1502 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1503 @*/
1504 PetscErrorCode  MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1505 {
1506   PetscErrorCode ierr;
1507   PetscMPIInt    size;
1508 
1509   PetscFunctionBegin;
1510   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1511   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1512   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1513   if (size > 1) {
1514     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1515     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1516     if (data) {  /* user provided data array, so no need to assemble */
1517       ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr);
1518       (*A)->assembled = PETSC_TRUE;
1519     }
1520   } else {
1521     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1522     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1523   }
1524   PetscFunctionReturn(0);
1525 }
1526 
1527 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1528 {
1529   Mat            mat;
1530   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
1531   PetscErrorCode ierr;
1532 
1533   PetscFunctionBegin;
1534   *newmat = 0;
1535   ierr    = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr);
1536   ierr    = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1537   ierr    = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1538   a       = (Mat_MPIDense*)mat->data;
1539   ierr    = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1540 
1541   mat->factortype   = A->factortype;
1542   mat->assembled    = PETSC_TRUE;
1543   mat->preallocated = PETSC_TRUE;
1544 
1545   a->size         = oldmat->size;
1546   a->rank         = oldmat->rank;
1547   mat->insertmode = NOT_SET_VALUES;
1548   a->nvec         = oldmat->nvec;
1549   a->donotstash   = oldmat->donotstash;
1550 
1551   ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr);
1552   ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr);
1553 
1554   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1555   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1556   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1557 
1558   *newmat = mat;
1559   PetscFunctionReturn(0);
1560 }
1561 
1562 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat)
1563 {
1564   PetscErrorCode ierr;
1565   PetscMPIInt    rank,size;
1566   const PetscInt *rowners;
1567   PetscInt       i,m,n,nz,j,mMax;
1568   PetscScalar    *array,*vals,*vals_ptr;
1569   Mat_MPIDense   *a = (Mat_MPIDense*)newmat->data;
1570 
1571   PetscFunctionBegin;
1572   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1573   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1574 
1575   /* determine ownership of rows and columns */
1576   m = (newmat->rmap->n < 0) ? PETSC_DECIDE : newmat->rmap->n;
1577   n = (newmat->cmap->n < 0) ? PETSC_DECIDE : newmat->cmap->n;
1578 
1579   ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr);
1580   if (!a->A || !((Mat_SeqDense*)(a->A->data))->user_alloc) {
1581     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
1582   }
1583   ierr = MatDenseGetArray(newmat,&array);CHKERRQ(ierr);
1584   ierr = MatGetLocalSize(newmat,&m,NULL);CHKERRQ(ierr);
1585   ierr = MatGetOwnershipRanges(newmat,&rowners);CHKERRQ(ierr);
1586   ierr = MPI_Reduce(&m,&mMax,1,MPIU_INT,MPI_MAX,0,comm);CHKERRQ(ierr);
1587   if (!rank) {
1588     ierr = PetscMalloc1(mMax*N,&vals);CHKERRQ(ierr);
1589 
1590     /* read in my part of the matrix numerical values  */
1591     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1592 
1593     /* insert into matrix-by row (this is why cannot directly read into array */
1594     vals_ptr = vals;
1595     for (i=0; i<m; i++) {
1596       for (j=0; j<N; j++) {
1597         array[i + j*m] = *vals_ptr++;
1598       }
1599     }
1600 
1601     /* read in other processors and ship out */
1602     for (i=1; i<size; i++) {
1603       nz   = (rowners[i+1] - rowners[i])*N;
1604       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1605       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr);
1606     }
1607   } else {
1608     /* receive numeric values */
1609     ierr = PetscMalloc1(m*N,&vals);CHKERRQ(ierr);
1610 
1611     /* receive message of values*/
1612     ierr = MPIULong_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr);
1613 
1614     /* insert into matrix-by row (this is why cannot directly read into array */
1615     vals_ptr = vals;
1616     for (i=0; i<m; i++) {
1617       for (j=0; j<N; j++) {
1618         array[i + j*m] = *vals_ptr++;
1619       }
1620     }
1621   }
1622   ierr = MatDenseRestoreArray(newmat,&array);CHKERRQ(ierr);
1623   ierr = PetscFree(vals);CHKERRQ(ierr);
1624   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1625   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1626   PetscFunctionReturn(0);
1627 }
1628 
1629 PetscErrorCode MatLoad_MPIDense(Mat newmat,PetscViewer viewer)
1630 {
1631   Mat_MPIDense   *a;
1632   PetscScalar    *vals,*svals;
1633   MPI_Comm       comm;
1634   MPI_Status     status;
1635   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,n,maxnz;
1636   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1637   PetscInt       *ourlens,*procsnz = 0,jj,*mycols,*smycols;
1638   PetscInt       i,nz,j,rstart,rend;
1639   int            fd;
1640   PetscErrorCode ierr;
1641 
1642   PetscFunctionBegin;
1643   /* force binary viewer to load .info file if it has not yet done so */
1644   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1645   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
1646   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1647   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1648   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1649   if (!rank) {
1650     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
1651     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1652   }
1653   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1654   M    = header[1]; N = header[2]; nz = header[3];
1655 
1656   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
1657   if (newmat->rmap->N < 0) newmat->rmap->N = M;
1658   if (newmat->cmap->N < 0) newmat->cmap->N = N;
1659 
1660   if (newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",M,newmat->rmap->N);
1661   if (newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",N,newmat->cmap->N);
1662 
1663   /*
1664        Handle case where matrix is stored on disk as a dense matrix
1665   */
1666   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1667     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr);
1668     PetscFunctionReturn(0);
1669   }
1670 
1671   /* determine ownership of all rows */
1672   if (newmat->rmap->n < 0) {
1673     ierr = PetscMPIIntCast(M/size + ((M % size) > rank),&m);CHKERRQ(ierr);
1674   } else {
1675     ierr = PetscMPIIntCast(newmat->rmap->n,&m);CHKERRQ(ierr);
1676   }
1677   if (newmat->cmap->n < 0) {
1678     n = PETSC_DECIDE;
1679   } else {
1680     ierr = PetscMPIIntCast(newmat->cmap->n,&n);CHKERRQ(ierr);
1681   }
1682 
1683   ierr       = PetscMalloc1(size+2,&rowners);CHKERRQ(ierr);
1684   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1685   rowners[0] = 0;
1686   for (i=2; i<=size; i++) {
1687     rowners[i] += rowners[i-1];
1688   }
1689   rstart = rowners[rank];
1690   rend   = rowners[rank+1];
1691 
1692   /* distribute row lengths to all processors */
1693   ierr = PetscMalloc1(rend-rstart,&ourlens);CHKERRQ(ierr);
1694   if (!rank) {
1695     ierr = PetscMalloc1(M,&rowlengths);CHKERRQ(ierr);
1696     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1697     ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr);
1698     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1699     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1700     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1701   } else {
1702     ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1703   }
1704 
1705   if (!rank) {
1706     /* calculate the number of nonzeros on each processor */
1707     ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr);
1708     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1709     for (i=0; i<size; i++) {
1710       for (j=rowners[i]; j< rowners[i+1]; j++) {
1711         procsnz[i] += rowlengths[j];
1712       }
1713     }
1714     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1715 
1716     /* determine max buffer needed and allocate it */
1717     maxnz = 0;
1718     for (i=0; i<size; i++) {
1719       maxnz = PetscMax(maxnz,procsnz[i]);
1720     }
1721     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
1722 
1723     /* read in my part of the matrix column indices  */
1724     nz   = procsnz[0];
1725     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
1726     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1727 
1728     /* read in every one elses and ship off */
1729     for (i=1; i<size; i++) {
1730       nz   = procsnz[i];
1731       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1732       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1733     }
1734     ierr = PetscFree(cols);CHKERRQ(ierr);
1735   } else {
1736     /* determine buffer space needed for message */
1737     nz = 0;
1738     for (i=0; i<m; i++) {
1739       nz += ourlens[i];
1740     }
1741     ierr = PetscMalloc1(nz+1,&mycols);CHKERRQ(ierr);
1742 
1743     /* receive message of column indices*/
1744     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1745     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1746     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1747   }
1748 
1749   ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr);
1750   a = (Mat_MPIDense*)newmat->data;
1751   if (!a->A || !((Mat_SeqDense*)(a->A->data))->user_alloc) {
1752     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
1753   }
1754 
1755   if (!rank) {
1756     ierr = PetscMalloc1(maxnz,&vals);CHKERRQ(ierr);
1757 
1758     /* read in my part of the matrix numerical values  */
1759     nz   = procsnz[0];
1760     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1761 
1762     /* insert into matrix */
1763     jj      = rstart;
1764     smycols = mycols;
1765     svals   = vals;
1766     for (i=0; i<m; i++) {
1767       ierr     = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1768       smycols += ourlens[i];
1769       svals   += ourlens[i];
1770       jj++;
1771     }
1772 
1773     /* read in other processors and ship out */
1774     for (i=1; i<size; i++) {
1775       nz   = procsnz[i];
1776       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1777       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
1778     }
1779     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1780   } else {
1781     /* receive numeric values */
1782     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
1783 
1784     /* receive message of values*/
1785     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
1786     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1787     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1788 
1789     /* insert into matrix */
1790     jj      = rstart;
1791     smycols = mycols;
1792     svals   = vals;
1793     for (i=0; i<m; i++) {
1794       ierr     = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1795       smycols += ourlens[i];
1796       svals   += ourlens[i];
1797       jj++;
1798     }
1799   }
1800   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1801   ierr = PetscFree(vals);CHKERRQ(ierr);
1802   ierr = PetscFree(mycols);CHKERRQ(ierr);
1803   ierr = PetscFree(rowners);CHKERRQ(ierr);
1804 
1805   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1806   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1807   PetscFunctionReturn(0);
1808 }
1809 
1810 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool  *flag)
1811 {
1812   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
1813   Mat            a,b;
1814   PetscBool      flg;
1815   PetscErrorCode ierr;
1816 
1817   PetscFunctionBegin;
1818   a    = matA->A;
1819   b    = matB->A;
1820   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
1821   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1822   PetscFunctionReturn(0);
1823 }
1824 
1825 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(Mat A)
1826 {
1827   PetscErrorCode        ierr;
1828   Mat_MPIDense          *a = (Mat_MPIDense*)A->data;
1829   Mat_TransMatMultDense *atb = a->atbdense;
1830 
1831   PetscFunctionBegin;
1832   ierr = PetscFree3(atb->sendbuf,atb->atbarray,atb->recvcounts);CHKERRQ(ierr);
1833   ierr = (atb->destroy)(A);CHKERRQ(ierr);
1834   ierr = PetscFree(atb);CHKERRQ(ierr);
1835   PetscFunctionReturn(0);
1836 }
1837 
1838 PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1839 {
1840   Mat_MPIDense   *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
1841   Mat_SeqDense   *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data;
1842   Mat_TransMatMultDense *atb = c->atbdense;
1843   PetscErrorCode ierr;
1844   MPI_Comm       comm;
1845   PetscMPIInt    rank,size,*recvcounts=atb->recvcounts;
1846   PetscScalar    *carray,*atbarray=atb->atbarray,*sendbuf=atb->sendbuf;
1847   PetscInt       i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j;
1848   PetscScalar    _DOne=1.0,_DZero=0.0;
1849   PetscBLASInt   am,an,bn,aN;
1850   const PetscInt *ranges;
1851 
1852   PetscFunctionBegin;
1853   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1854   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1855   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1856 
1857   /* compute atbarray = aseq^T * bseq */
1858   ierr = PetscBLASIntCast(a->A->cmap->n,&an);CHKERRQ(ierr);
1859   ierr = PetscBLASIntCast(b->A->cmap->n,&bn);CHKERRQ(ierr);
1860   ierr = PetscBLASIntCast(a->A->rmap->n,&am);CHKERRQ(ierr);
1861   ierr = PetscBLASIntCast(A->cmap->N,&aN);CHKERRQ(ierr);
1862   PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&an,&bn,&am,&_DOne,aseq->v,&aseq->lda,bseq->v,&bseq->lda,&_DZero,atbarray,&aN));
1863 
1864   ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr);
1865   for (i=0; i<size; i++) recvcounts[i] = (ranges[i+1] - ranges[i])*cN;
1866 
1867   /* arrange atbarray into sendbuf */
1868   k = 0;
1869   for (proc=0; proc<size; proc++) {
1870     for (j=0; j<cN; j++) {
1871       for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM];
1872     }
1873   }
1874   /* sum all atbarray to local values of C */
1875   ierr = MatDenseGetArray(c->A,&carray);CHKERRQ(ierr);
1876   ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRQ(ierr);
1877   ierr = MatDenseRestoreArray(c->A,&carray);CHKERRQ(ierr);
1878   PetscFunctionReturn(0);
1879 }
1880 
1881 PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1882 {
1883   PetscErrorCode        ierr;
1884   Mat                   Cdense;
1885   MPI_Comm              comm;
1886   PetscMPIInt           size;
1887   PetscInt              cm=A->cmap->n,cM,cN=B->cmap->N;
1888   Mat_MPIDense          *c;
1889   Mat_TransMatMultDense *atb;
1890 
1891   PetscFunctionBegin;
1892   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1893   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
1894     SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
1895   }
1896 
1897   /* create matrix product Cdense */
1898   ierr = MatCreate(comm,&Cdense);CHKERRQ(ierr);
1899   ierr = MatSetSizes(Cdense,cm,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
1900   ierr = MatSetType(Cdense,MATMPIDENSE);CHKERRQ(ierr);
1901   ierr = MatMPIDenseSetPreallocation(Cdense,NULL);CHKERRQ(ierr);
1902   ierr = MatAssemblyBegin(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1903   ierr = MatAssemblyEnd(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1904   *C   = Cdense;
1905 
1906   /* create data structure for reuse Cdense */
1907   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1908   ierr = PetscNew(&atb);CHKERRQ(ierr);
1909   cM = Cdense->rmap->N;
1910   ierr = PetscMalloc3(cM*cN,&atb->sendbuf,cM*cN,&atb->atbarray,size,&atb->recvcounts);CHKERRQ(ierr);
1911 
1912   c                    = (Mat_MPIDense*)Cdense->data;
1913   c->atbdense          = atb;
1914   atb->destroy         = Cdense->ops->destroy;
1915   Cdense->ops->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
1916   PetscFunctionReturn(0);
1917 }
1918 
1919 PetscErrorCode MatTransposeMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
1920 {
1921   PetscErrorCode ierr;
1922 
1923   PetscFunctionBegin;
1924   if (scall == MAT_INITIAL_MATRIX) {
1925     ierr = MatTransposeMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr);
1926   }
1927   ierr = MatTransposeMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr);
1928   PetscFunctionReturn(0);
1929 }
1930 
1931 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(Mat A)
1932 {
1933   PetscErrorCode   ierr;
1934   Mat_MPIDense     *a = (Mat_MPIDense*)A->data;
1935   Mat_MatMultDense *ab = a->abdense;
1936 
1937   PetscFunctionBegin;
1938   ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr);
1939   ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr);
1940   ierr = MatDestroy(&ab->Be);CHKERRQ(ierr);
1941 
1942   ierr = (ab->destroy)(A);CHKERRQ(ierr);
1943   ierr = PetscFree(ab);CHKERRQ(ierr);
1944   PetscFunctionReturn(0);
1945 }
1946 
1947 #if defined(PETSC_HAVE_ELEMENTAL)
1948 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1949 {
1950   PetscErrorCode   ierr;
1951   Mat_MPIDense     *c=(Mat_MPIDense*)C->data;
1952   Mat_MatMultDense *ab=c->abdense;
1953 
1954   PetscFunctionBegin;
1955   ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr);
1956   ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr);
1957   ierr = MatMatMultNumeric(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr);
1958   ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
1959   PetscFunctionReturn(0);
1960 }
1961 
1962 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1963 {
1964   PetscErrorCode   ierr;
1965   Mat              Ae,Be,Ce;
1966   Mat_MPIDense     *c;
1967   Mat_MatMultDense *ab;
1968 
1969   PetscFunctionBegin;
1970   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
1971     SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
1972   }
1973 
1974   /* convert A and B to Elemental matrices Ae and Be */
1975   ierr = MatConvert(A,MATELEMENTAL,MAT_INITIAL_MATRIX, &Ae);CHKERRQ(ierr);
1976   ierr = MatConvert(B,MATELEMENTAL,MAT_INITIAL_MATRIX, &Be);CHKERRQ(ierr);
1977 
1978   /* Ce = Ae*Be */
1979   ierr = MatMatMultSymbolic(Ae,Be,fill,&Ce);CHKERRQ(ierr);
1980   ierr = MatMatMultNumeric(Ae,Be,Ce);CHKERRQ(ierr);
1981 
1982   /* convert Ce to C */
1983   ierr = MatConvert(Ce,MATMPIDENSE,MAT_INITIAL_MATRIX,C);CHKERRQ(ierr);
1984 
1985   /* create data structure for reuse Cdense */
1986   ierr = PetscNew(&ab);CHKERRQ(ierr);
1987   c                  = (Mat_MPIDense*)(*C)->data;
1988   c->abdense         = ab;
1989 
1990   ab->Ae             = Ae;
1991   ab->Be             = Be;
1992   ab->Ce             = Ce;
1993   ab->destroy        = (*C)->ops->destroy;
1994   (*C)->ops->destroy        = MatDestroy_MatMatMult_MPIDense_MPIDense;
1995   (*C)->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
1996   PetscFunctionReturn(0);
1997 }
1998 
1999 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
2000 {
2001   PetscErrorCode ierr;
2002 
2003   PetscFunctionBegin;
2004   if (scall == MAT_INITIAL_MATRIX) { /* simbolic product includes numeric product */
2005     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
2006     ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr);
2007     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
2008   } else {
2009     ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
2010     ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr);
2011     ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
2012   }
2013   PetscFunctionReturn(0);
2014 }
2015 #endif
2016 
2017