xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 5186ea8cb7a1d2a3650ca1d6646b65f27423248f)
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4 */
5 
6 
7 #include <../src/mat/impls/dense/mpi/mpidense.h>    /*I   "petscmat.h"  I*/
8 #include <../src/mat/impls/aij/mpi/mpiaij.h>
9 #include <petscblaslapack.h>
10 
11 /*@
12 
13       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
14               matrix that represents the operator. For sequential matrices it returns itself.
15 
16     Input Parameter:
17 .      A - the Seq or MPI dense matrix
18 
19     Output Parameter:
20 .      B - the inner matrix
21 
22     Level: intermediate
23 
24 @*/
25 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
26 {
27   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
28   PetscErrorCode ierr;
29   PetscBool      flg;
30 
31   PetscFunctionBegin;
32   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
33   if (flg) *B = mat->A;
34   else *B = A;
35   PetscFunctionReturn(0);
36 }
37 
38 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
39 {
40   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
41   PetscErrorCode ierr;
42   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
43 
44   PetscFunctionBegin;
45   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
46   lrow = row - rstart;
47   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
48   PetscFunctionReturn(0);
49 }
50 
51 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
52 {
53   PetscErrorCode ierr;
54 
55   PetscFunctionBegin;
56   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
57   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
58   PetscFunctionReturn(0);
59 }
60 
61 PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
62 {
63   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
64   PetscErrorCode ierr;
65   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
66   PetscScalar    *array;
67   MPI_Comm       comm;
68   Mat            B;
69 
70   PetscFunctionBegin;
71   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
72 
73   ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr);
74   if (!B) {
75     ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
76     ierr = MatCreate(comm,&B);CHKERRQ(ierr);
77     ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr);
78     ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
79     ierr = MatDenseGetArray(mdn->A,&array);CHKERRQ(ierr);
80     ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr);
81     ierr = MatDenseRestoreArray(mdn->A,&array);CHKERRQ(ierr);
82     ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
83     ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
84     ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr);
85     *a   = B;
86     ierr = MatDestroy(&B);CHKERRQ(ierr);
87   } else *a = B;
88   PetscFunctionReturn(0);
89 }
90 
91 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
92 {
93   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
94   PetscErrorCode ierr;
95   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
96   PetscBool      roworiented = A->roworiented;
97 
98   PetscFunctionBegin;
99   for (i=0; i<m; i++) {
100     if (idxm[i] < 0) continue;
101     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
102     if (idxm[i] >= rstart && idxm[i] < rend) {
103       row = idxm[i] - rstart;
104       if (roworiented) {
105         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
106       } else {
107         for (j=0; j<n; j++) {
108           if (idxn[j] < 0) continue;
109           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
110           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
111         }
112       }
113     } else if (!A->donotstash) {
114       mat->assembled = PETSC_FALSE;
115       if (roworiented) {
116         ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
117       } else {
118         ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
119       }
120     }
121   }
122   PetscFunctionReturn(0);
123 }
124 
125 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
126 {
127   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
128   PetscErrorCode ierr;
129   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
130 
131   PetscFunctionBegin;
132   for (i=0; i<m; i++) {
133     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
134     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
135     if (idxm[i] >= rstart && idxm[i] < rend) {
136       row = idxm[i] - rstart;
137       for (j=0; j<n; j++) {
138         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
139         if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
140         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
141       }
142     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
143   }
144   PetscFunctionReturn(0);
145 }
146 
147 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[])
148 {
149   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
150   PetscErrorCode ierr;
151 
152   PetscFunctionBegin;
153   ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr);
154   PetscFunctionReturn(0);
155 }
156 
157 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar *array[])
158 {
159   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
160   PetscErrorCode ierr;
161 
162   PetscFunctionBegin;
163   ierr = MatDenseGetArrayRead(a->A,array);CHKERRQ(ierr);
164   PetscFunctionReturn(0);
165 }
166 
167 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar array[])
168 {
169   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
170   PetscErrorCode ierr;
171 
172   PetscFunctionBegin;
173   ierr = MatDensePlaceArray(a->A,array);CHKERRQ(ierr);
174   PetscFunctionReturn(0);
175 }
176 
177 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
178 {
179   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
180   PetscErrorCode ierr;
181 
182   PetscFunctionBegin;
183   ierr = MatDenseResetArray(a->A);CHKERRQ(ierr);
184   PetscFunctionReturn(0);
185 }
186 
187 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
188 {
189   Mat_MPIDense   *mat  = (Mat_MPIDense*)A->data,*newmatd;
190   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
191   PetscErrorCode ierr;
192   PetscInt       i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
193   const PetscInt *irow,*icol;
194   PetscScalar    *av,*bv,*v = lmat->v;
195   Mat            newmat;
196   IS             iscol_local;
197   MPI_Comm       comm_is,comm_mat;
198 
199   PetscFunctionBegin;
200   ierr = PetscObjectGetComm((PetscObject)A,&comm_mat);CHKERRQ(ierr);
201   ierr = PetscObjectGetComm((PetscObject)iscol,&comm_is);CHKERRQ(ierr);
202   if (comm_mat != comm_is) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator");
203 
204   ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
205   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
206   ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr);
207   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
208   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
209   ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */
210 
211   /* No parallel redistribution currently supported! Should really check each index set
212      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
213      original matrix! */
214 
215   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
216   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
217 
218   /* Check submatrix call */
219   if (scall == MAT_REUSE_MATRIX) {
220     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
221     /* Really need to test rows and column sizes! */
222     newmat = *B;
223   } else {
224     /* Create and fill new matrix */
225     ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr);
226     ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
227     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
228     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
229   }
230 
231   /* Now extract the data pointers and do the copy, column at a time */
232   newmatd = (Mat_MPIDense*)newmat->data;
233   bv      = ((Mat_SeqDense*)newmatd->A->data)->v;
234 
235   for (i=0; i<Ncols; i++) {
236     av = v + ((Mat_SeqDense*)mat->A->data)->lda*icol[i];
237     for (j=0; j<nrows; j++) {
238       *bv++ = av[irow[j] - rstart];
239     }
240   }
241 
242   /* Assemble the matrices so that the correct flags are set */
243   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
244   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
245 
246   /* Free work space */
247   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
248   ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr);
249   ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
250   *B   = newmat;
251   PetscFunctionReturn(0);
252 }
253 
254 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[])
255 {
256   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
257   PetscErrorCode ierr;
258 
259   PetscFunctionBegin;
260   ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr);
261   PetscFunctionReturn(0);
262 }
263 
264 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar *array[])
265 {
266   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
267   PetscErrorCode ierr;
268 
269   PetscFunctionBegin;
270   ierr = MatDenseRestoreArrayRead(a->A,array);CHKERRQ(ierr);
271   PetscFunctionReturn(0);
272 }
273 
274 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
275 {
276   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
277   MPI_Comm       comm;
278   PetscErrorCode ierr;
279   PetscInt       nstash,reallocs;
280   InsertMode     addv;
281 
282   PetscFunctionBegin;
283   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
284   /* make sure all processors are either in INSERTMODE or ADDMODE */
285   ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,comm);CHKERRQ(ierr);
286   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
287   mat->insertmode = addv; /* in case this processor had no cache */
288 
289   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
290   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
291   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
292   PetscFunctionReturn(0);
293 }
294 
295 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
296 {
297   Mat_MPIDense   *mdn=(Mat_MPIDense*)mat->data;
298   PetscErrorCode ierr;
299   PetscInt       i,*row,*col,flg,j,rstart,ncols;
300   PetscMPIInt    n;
301   PetscScalar    *val;
302 
303   PetscFunctionBegin;
304   /*  wait on receives */
305   while (1) {
306     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
307     if (!flg) break;
308 
309     for (i=0; i<n;) {
310       /* Now identify the consecutive vals belonging to the same row */
311       for (j=i,rstart=row[j]; j<n; j++) {
312         if (row[j] != rstart) break;
313       }
314       if (j < n) ncols = j-i;
315       else       ncols = n-i;
316       /* Now assemble all these values with a single function call */
317       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
318       i    = j;
319     }
320   }
321   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
322 
323   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
324   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
325 
326   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
327     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
328   }
329   PetscFunctionReturn(0);
330 }
331 
332 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
333 {
334   PetscErrorCode ierr;
335   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
336 
337   PetscFunctionBegin;
338   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
339   PetscFunctionReturn(0);
340 }
341 
342 /* the code does not do the diagonal entries correctly unless the
343    matrix is square and the column and row owerships are identical.
344    This is a BUG. The only way to fix it seems to be to access
345    mdn->A and mdn->B directly and not through the MatZeroRows()
346    routine.
347 */
348 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
349 {
350   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
351   PetscErrorCode    ierr;
352   PetscInt          i,*owners = A->rmap->range;
353   PetscInt          *sizes,j,idx,nsends;
354   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
355   PetscInt          *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source;
356   PetscInt          *lens,*lrows,*values;
357   PetscMPIInt       n,imdex,rank = l->rank,size = l->size;
358   MPI_Comm          comm;
359   MPI_Request       *send_waits,*recv_waits;
360   MPI_Status        recv_status,*send_status;
361   PetscBool         found;
362   const PetscScalar *xx;
363   PetscScalar       *bb;
364 
365   PetscFunctionBegin;
366   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
367   if (A->rmap->N != A->cmap->N) SETERRQ(comm,PETSC_ERR_SUP,"Only handles square matrices");
368   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership");
369   /*  first count number of contributors to each processor */
370   ierr = PetscCalloc1(2*size,&sizes);CHKERRQ(ierr);
371   ierr = PetscMalloc1(N+1,&owner);CHKERRQ(ierr);  /* see note*/
372   for (i=0; i<N; i++) {
373     idx   = rows[i];
374     found = PETSC_FALSE;
375     for (j=0; j<size; j++) {
376       if (idx >= owners[j] && idx < owners[j+1]) {
377         sizes[2*j]++; sizes[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
378       }
379     }
380     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
381   }
382   nsends = 0;
383   for (i=0; i<size; i++) nsends += sizes[2*i+1];
384 
385   /* inform other processors of number of messages and max length*/
386   ierr = PetscMaxSum(comm,sizes,&nmax,&nrecvs);CHKERRQ(ierr);
387 
388   /* post receives:   */
389   ierr = PetscMalloc1((nrecvs+1)*(nmax+1),&rvalues);CHKERRQ(ierr);
390   ierr = PetscMalloc1(nrecvs+1,&recv_waits);CHKERRQ(ierr);
391   for (i=0; i<nrecvs; i++) {
392     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
393   }
394 
395   /* do sends:
396       1) starts[i] gives the starting index in svalues for stuff going to
397          the ith processor
398   */
399   ierr = PetscMalloc1(N+1,&svalues);CHKERRQ(ierr);
400   ierr = PetscMalloc1(nsends+1,&send_waits);CHKERRQ(ierr);
401   ierr = PetscMalloc1(size+1,&starts);CHKERRQ(ierr);
402 
403   starts[0] = 0;
404   for (i=1; i<size; i++) starts[i] = starts[i-1] + sizes[2*i-2];
405   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
406 
407   starts[0] = 0;
408   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + sizes[2*i-2];
409   count = 0;
410   for (i=0; i<size; i++) {
411     if (sizes[2*i+1]) {
412       ierr = MPI_Isend(svalues+starts[i],sizes[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
413     }
414   }
415   ierr = PetscFree(starts);CHKERRQ(ierr);
416 
417   base = owners[rank];
418 
419   /*  wait on receives */
420   ierr  = PetscMalloc2(nrecvs,&lens,nrecvs,&source);CHKERRQ(ierr);
421   count = nrecvs;
422   slen  = 0;
423   while (count) {
424     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
425     /* unpack receives into our local space */
426     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
427 
428     source[imdex] = recv_status.MPI_SOURCE;
429     lens[imdex]   = n;
430     slen += n;
431     count--;
432   }
433   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
434 
435   /* move the data into the send scatter */
436   ierr  = PetscMalloc1(slen+1,&lrows);CHKERRQ(ierr);
437   count = 0;
438   for (i=0; i<nrecvs; i++) {
439     values = rvalues + i*nmax;
440     for (j=0; j<lens[i]; j++) {
441       lrows[count++] = values[j] - base;
442     }
443   }
444   ierr = PetscFree(rvalues);CHKERRQ(ierr);
445   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
446   ierr = PetscFree(owner);CHKERRQ(ierr);
447   ierr = PetscFree(sizes);CHKERRQ(ierr);
448 
449   /* fix right hand side if needed */
450   if (x && b) {
451     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
452     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
453     for (i=0; i<slen; i++) {
454       bb[lrows[i]] = diag*xx[lrows[i]];
455     }
456     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
457     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
458   }
459 
460   /* actually zap the local rows */
461   ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
462   if (diag != 0.0) {
463     Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data;
464     PetscInt     m   = ll->lda, i;
465 
466     for (i=0; i<slen; i++) {
467       ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag;
468     }
469   }
470   ierr = PetscFree(lrows);CHKERRQ(ierr);
471 
472   /* wait on sends */
473   if (nsends) {
474     ierr = PetscMalloc1(nsends,&send_status);CHKERRQ(ierr);
475     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
476     ierr = PetscFree(send_status);CHKERRQ(ierr);
477   }
478   ierr = PetscFree(send_waits);CHKERRQ(ierr);
479   ierr = PetscFree(svalues);CHKERRQ(ierr);
480   PetscFunctionReturn(0);
481 }
482 
483 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec);
484 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec);
485 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec);
486 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec);
487 
488 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
489 {
490   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
491   PetscErrorCode ierr;
492 
493   PetscFunctionBegin;
494   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
495   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
496   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
497   PetscFunctionReturn(0);
498 }
499 
500 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
501 {
502   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
503   PetscErrorCode ierr;
504 
505   PetscFunctionBegin;
506   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
507   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
508   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
509   PetscFunctionReturn(0);
510 }
511 
512 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
513 {
514   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
515   PetscErrorCode ierr;
516   PetscScalar    zero = 0.0;
517 
518   PetscFunctionBegin;
519   ierr = VecSet(yy,zero);CHKERRQ(ierr);
520   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
521   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
522   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
523   PetscFunctionReturn(0);
524 }
525 
526 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
527 {
528   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
529   PetscErrorCode ierr;
530 
531   PetscFunctionBegin;
532   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
533   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
534   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
535   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
536   PetscFunctionReturn(0);
537 }
538 
539 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
540 {
541   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
542   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
543   PetscErrorCode ierr;
544   PetscInt       len,i,n,m = A->rmap->n,radd;
545   PetscScalar    *x,zero = 0.0;
546 
547   PetscFunctionBegin;
548   ierr = VecSet(v,zero);CHKERRQ(ierr);
549   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
550   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
551   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
552   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
553   radd = A->rmap->rstart*m;
554   for (i=0; i<len; i++) {
555     x[i] = aloc->v[radd + i*m + i];
556   }
557   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
558   PetscFunctionReturn(0);
559 }
560 
561 PetscErrorCode MatDestroy_MPIDense(Mat mat)
562 {
563   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
564   PetscErrorCode ierr;
565 
566   PetscFunctionBegin;
567 #if defined(PETSC_USE_LOG)
568   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
569 #endif
570   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
571   ierr = MatDestroy(&mdn->A);CHKERRQ(ierr);
572   ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr);
573   ierr = VecScatterDestroy(&mdn->Mvctx);CHKERRQ(ierr);
574 
575   ierr = PetscFree(mat->data);CHKERRQ(ierr);
576   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
577 
578   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr);
579   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr);
580   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL);CHKERRQ(ierr);
581   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL);CHKERRQ(ierr);
582   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);CHKERRQ(ierr);
583   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);CHKERRQ(ierr);
584 #if defined(PETSC_HAVE_ELEMENTAL)
585   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr);
586 #endif
587   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr);
588   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
589   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
590   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
591   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
592   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
593   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
594   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL);CHKERRQ(ierr);
595   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL);CHKERRQ(ierr);
596   PetscFunctionReturn(0);
597 }
598 
599 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
600 {
601   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
602   PetscErrorCode    ierr;
603   PetscViewerFormat format;
604   int               fd;
605   PetscInt          header[4],mmax,N = mat->cmap->N,i,j,m,k;
606   PetscMPIInt       rank,tag  = ((PetscObject)viewer)->tag,size;
607   PetscScalar       *work,*v,*vv;
608   Mat_SeqDense      *a = (Mat_SeqDense*)mdn->A->data;
609 
610   PetscFunctionBegin;
611   if (mdn->size == 1) {
612     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
613   } else {
614     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
615     ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
616     ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
617 
618     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
619     if (format == PETSC_VIEWER_NATIVE) {
620 
621       if (!rank) {
622         /* store the matrix as a dense matrix */
623         header[0] = MAT_FILE_CLASSID;
624         header[1] = mat->rmap->N;
625         header[2] = N;
626         header[3] = MATRIX_BINARY_FORMAT_DENSE;
627         ierr      = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
628 
629         /* get largest work array needed for transposing array */
630         mmax = mat->rmap->n;
631         for (i=1; i<size; i++) {
632           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
633         }
634         ierr = PetscMalloc1(mmax*N,&work);CHKERRQ(ierr);
635 
636         /* write out local array, by rows */
637         m = mat->rmap->n;
638         v = a->v;
639         for (j=0; j<N; j++) {
640           for (i=0; i<m; i++) {
641             work[j + i*N] = *v++;
642           }
643         }
644         ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
645         /* get largest work array to receive messages from other processes, excludes process zero */
646         mmax = 0;
647         for (i=1; i<size; i++) {
648           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
649         }
650         ierr = PetscMalloc1(mmax*N,&vv);CHKERRQ(ierr);
651         for (k = 1; k < size; k++) {
652           v    = vv;
653           m    = mat->rmap->range[k+1] - mat->rmap->range[k];
654           ierr = MPIULong_Recv(v,m*N,MPIU_SCALAR,k,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
655 
656           for (j = 0; j < N; j++) {
657             for (i = 0; i < m; i++) {
658               work[j + i*N] = *v++;
659             }
660           }
661           ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
662         }
663         ierr = PetscFree(work);CHKERRQ(ierr);
664         ierr = PetscFree(vv);CHKERRQ(ierr);
665       } else {
666         ierr = MPIULong_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
667       }
668     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerPushFormat(viewer,PETSC_VIEWER_NATIVE)");
669   }
670   PetscFunctionReturn(0);
671 }
672 
673 extern PetscErrorCode MatView_SeqDense(Mat,PetscViewer);
674 #include <petscdraw.h>
675 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
676 {
677   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
678   PetscErrorCode    ierr;
679   PetscMPIInt       rank = mdn->rank;
680   PetscViewerType   vtype;
681   PetscBool         iascii,isdraw;
682   PetscViewer       sviewer;
683   PetscViewerFormat format;
684 
685   PetscFunctionBegin;
686   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
687   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
688   if (iascii) {
689     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
690     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
691     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
692       MatInfo info;
693       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
694       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
695       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
696       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
697       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
698       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
699       PetscFunctionReturn(0);
700     } else if (format == PETSC_VIEWER_ASCII_INFO) {
701       PetscFunctionReturn(0);
702     }
703   } else if (isdraw) {
704     PetscDraw draw;
705     PetscBool isnull;
706 
707     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
708     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
709     if (isnull) PetscFunctionReturn(0);
710   }
711 
712   {
713     /* assemble the entire matrix onto first processor. */
714     Mat         A;
715     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
716     PetscInt    *cols;
717     PetscScalar *vals;
718 
719     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
720     if (!rank) {
721       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
722     } else {
723       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
724     }
725     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
726     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
727     ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr);
728     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
729 
730     /* Copy the matrix ... This isn't the most efficient means,
731        but it's quick for now */
732     A->insertmode = INSERT_VALUES;
733 
734     row = mat->rmap->rstart;
735     m   = mdn->A->rmap->n;
736     for (i=0; i<m; i++) {
737       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
738       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
739       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
740       row++;
741     }
742 
743     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
744     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
745     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
746     if (!rank) {
747       ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
748       ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
749     }
750     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
751     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
752     ierr = MatDestroy(&A);CHKERRQ(ierr);
753   }
754   PetscFunctionReturn(0);
755 }
756 
757 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
758 {
759   PetscErrorCode ierr;
760   PetscBool      iascii,isbinary,isdraw,issocket;
761 
762   PetscFunctionBegin;
763   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
764   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
765   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
766   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
767 
768   if (iascii || issocket || isdraw) {
769     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
770   } else if (isbinary) {
771     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
772   }
773   PetscFunctionReturn(0);
774 }
775 
776 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
777 {
778   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
779   Mat            mdn  = mat->A;
780   PetscErrorCode ierr;
781   PetscReal      isend[5],irecv[5];
782 
783   PetscFunctionBegin;
784   info->block_size = 1.0;
785 
786   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
787 
788   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
789   isend[3] = info->memory;  isend[4] = info->mallocs;
790   if (flag == MAT_LOCAL) {
791     info->nz_used      = isend[0];
792     info->nz_allocated = isend[1];
793     info->nz_unneeded  = isend[2];
794     info->memory       = isend[3];
795     info->mallocs      = isend[4];
796   } else if (flag == MAT_GLOBAL_MAX) {
797     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
798 
799     info->nz_used      = irecv[0];
800     info->nz_allocated = irecv[1];
801     info->nz_unneeded  = irecv[2];
802     info->memory       = irecv[3];
803     info->mallocs      = irecv[4];
804   } else if (flag == MAT_GLOBAL_SUM) {
805     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
806 
807     info->nz_used      = irecv[0];
808     info->nz_allocated = irecv[1];
809     info->nz_unneeded  = irecv[2];
810     info->memory       = irecv[3];
811     info->mallocs      = irecv[4];
812   }
813   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
814   info->fill_ratio_needed = 0;
815   info->factor_mallocs    = 0;
816   PetscFunctionReturn(0);
817 }
818 
819 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
820 {
821   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
822   PetscErrorCode ierr;
823 
824   PetscFunctionBegin;
825   switch (op) {
826   case MAT_NEW_NONZERO_LOCATIONS:
827   case MAT_NEW_NONZERO_LOCATION_ERR:
828   case MAT_NEW_NONZERO_ALLOCATION_ERR:
829     MatCheckPreallocated(A,1);
830     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
831     break;
832   case MAT_ROW_ORIENTED:
833     MatCheckPreallocated(A,1);
834     a->roworiented = flg;
835     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
836     break;
837   case MAT_NEW_DIAGONALS:
838   case MAT_KEEP_NONZERO_PATTERN:
839   case MAT_USE_HASH_TABLE:
840     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
841     break;
842   case MAT_IGNORE_OFF_PROC_ENTRIES:
843     a->donotstash = flg;
844     break;
845   case MAT_SYMMETRIC:
846   case MAT_STRUCTURALLY_SYMMETRIC:
847   case MAT_HERMITIAN:
848   case MAT_SYMMETRY_ETERNAL:
849   case MAT_IGNORE_LOWER_TRIANGULAR:
850     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
851     break;
852   default:
853     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
854   }
855   PetscFunctionReturn(0);
856 }
857 
858 
859 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
860 {
861   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
862   Mat_SeqDense      *mat = (Mat_SeqDense*)mdn->A->data;
863   const PetscScalar *l,*r;
864   PetscScalar       x,*v;
865   PetscErrorCode    ierr;
866   PetscInt          i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n;
867 
868   PetscFunctionBegin;
869   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
870   if (ll) {
871     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
872     if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
873     ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr);
874     for (i=0; i<m; i++) {
875       x = l[i];
876       v = mat->v + i;
877       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
878     }
879     ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr);
880     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
881   }
882   if (rr) {
883     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
884     if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
885     ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
886     ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
887     ierr = VecGetArrayRead(mdn->lvec,&r);CHKERRQ(ierr);
888     for (i=0; i<n; i++) {
889       x = r[i];
890       v = mat->v + i*m;
891       for (j=0; j<m; j++) (*v++) *= x;
892     }
893     ierr = VecRestoreArrayRead(mdn->lvec,&r);CHKERRQ(ierr);
894     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
895   }
896   PetscFunctionReturn(0);
897 }
898 
899 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
900 {
901   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
902   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
903   PetscErrorCode ierr;
904   PetscInt       i,j;
905   PetscReal      sum = 0.0;
906   PetscScalar    *v  = mat->v;
907 
908   PetscFunctionBegin;
909   if (mdn->size == 1) {
910     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
911   } else {
912     if (type == NORM_FROBENIUS) {
913       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
914         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
915       }
916       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
917       *nrm = PetscSqrtReal(*nrm);
918       ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr);
919     } else if (type == NORM_1) {
920       PetscReal *tmp,*tmp2;
921       ierr = PetscMalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr);
922       ierr = PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
923       ierr = PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
924       *nrm = 0.0;
925       v    = mat->v;
926       for (j=0; j<mdn->A->cmap->n; j++) {
927         for (i=0; i<mdn->A->rmap->n; i++) {
928           tmp[j] += PetscAbsScalar(*v);  v++;
929         }
930       }
931       ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
932       for (j=0; j<A->cmap->N; j++) {
933         if (tmp2[j] > *nrm) *nrm = tmp2[j];
934       }
935       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
936       ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr);
937     } else if (type == NORM_INFINITY) { /* max row norm */
938       PetscReal ntemp;
939       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
940       ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
941     } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
942   }
943   PetscFunctionReturn(0);
944 }
945 
946 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
947 {
948   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
949   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
950   Mat            B;
951   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
952   PetscErrorCode ierr;
953   PetscInt       j,i;
954   PetscScalar    *v;
955 
956   PetscFunctionBegin;
957   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
958     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
959     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
960     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
961     ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr);
962   } else {
963     B = *matout;
964   }
965 
966   m    = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v;
967   ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr);
968   for (i=0; i<m; i++) rwork[i] = rstart + i;
969   for (j=0; j<n; j++) {
970     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
971     v   += m;
972   }
973   ierr = PetscFree(rwork);CHKERRQ(ierr);
974   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
975   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
976   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
977     *matout = B;
978   } else {
979     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
980   }
981   PetscFunctionReturn(0);
982 }
983 
984 
985 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
986 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
987 
988 PetscErrorCode MatSetUp_MPIDense(Mat A)
989 {
990   PetscErrorCode ierr;
991 
992   PetscFunctionBegin;
993   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
994   PetscFunctionReturn(0);
995 }
996 
997 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
998 {
999   PetscErrorCode ierr;
1000   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
1001 
1002   PetscFunctionBegin;
1003   ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr);
1004   ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1005   PetscFunctionReturn(0);
1006 }
1007 
1008 PetscErrorCode  MatConjugate_MPIDense(Mat mat)
1009 {
1010   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
1011   PetscErrorCode ierr;
1012 
1013   PetscFunctionBegin;
1014   ierr = MatConjugate(a->A);CHKERRQ(ierr);
1015   PetscFunctionReturn(0);
1016 }
1017 
1018 PetscErrorCode MatRealPart_MPIDense(Mat A)
1019 {
1020   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1021   PetscErrorCode ierr;
1022 
1023   PetscFunctionBegin;
1024   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1025   PetscFunctionReturn(0);
1026 }
1027 
1028 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1029 {
1030   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1031   PetscErrorCode ierr;
1032 
1033   PetscFunctionBegin;
1034   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1035   PetscFunctionReturn(0);
1036 }
1037 
1038 extern PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*);
1039 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms)
1040 {
1041   PetscErrorCode ierr;
1042   PetscInt       i,n;
1043   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
1044   PetscReal      *work;
1045 
1046   PetscFunctionBegin;
1047   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
1048   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
1049   ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr);
1050   if (type == NORM_2) {
1051     for (i=0; i<n; i++) work[i] *= work[i];
1052   }
1053   if (type == NORM_INFINITY) {
1054     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
1055   } else {
1056     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
1057   }
1058   ierr = PetscFree(work);CHKERRQ(ierr);
1059   if (type == NORM_2) {
1060     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
1061   }
1062   PetscFunctionReturn(0);
1063 }
1064 
1065 static PetscErrorCode  MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1066 {
1067   Mat_MPIDense   *d = (Mat_MPIDense*)x->data;
1068   PetscErrorCode ierr;
1069   PetscScalar    *a;
1070   PetscInt       m,n,i;
1071 
1072   PetscFunctionBegin;
1073   ierr = MatGetSize(d->A,&m,&n);CHKERRQ(ierr);
1074   ierr = MatDenseGetArray(d->A,&a);CHKERRQ(ierr);
1075   for (i=0; i<m*n; i++) {
1076     ierr = PetscRandomGetValue(rctx,a+i);CHKERRQ(ierr);
1077   }
1078   ierr = MatDenseRestoreArray(d->A,&a);CHKERRQ(ierr);
1079   PetscFunctionReturn(0);
1080 }
1081 
1082 extern PetscErrorCode MatMatMultNumeric_MPIDense(Mat A,Mat,Mat);
1083 
1084 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool  *missing,PetscInt *d)
1085 {
1086   PetscFunctionBegin;
1087   *missing = PETSC_FALSE;
1088   PetscFunctionReturn(0);
1089 }
1090 
1091 /* -------------------------------------------------------------------*/
1092 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1093                                         MatGetRow_MPIDense,
1094                                         MatRestoreRow_MPIDense,
1095                                         MatMult_MPIDense,
1096                                 /*  4*/ MatMultAdd_MPIDense,
1097                                         MatMultTranspose_MPIDense,
1098                                         MatMultTransposeAdd_MPIDense,
1099                                         0,
1100                                         0,
1101                                         0,
1102                                 /* 10*/ 0,
1103                                         0,
1104                                         0,
1105                                         0,
1106                                         MatTranspose_MPIDense,
1107                                 /* 15*/ MatGetInfo_MPIDense,
1108                                         MatEqual_MPIDense,
1109                                         MatGetDiagonal_MPIDense,
1110                                         MatDiagonalScale_MPIDense,
1111                                         MatNorm_MPIDense,
1112                                 /* 20*/ MatAssemblyBegin_MPIDense,
1113                                         MatAssemblyEnd_MPIDense,
1114                                         MatSetOption_MPIDense,
1115                                         MatZeroEntries_MPIDense,
1116                                 /* 24*/ MatZeroRows_MPIDense,
1117                                         0,
1118                                         0,
1119                                         0,
1120                                         0,
1121                                 /* 29*/ MatSetUp_MPIDense,
1122                                         0,
1123                                         0,
1124                                         MatGetDiagonalBlock_MPIDense,
1125                                         0,
1126                                 /* 34*/ MatDuplicate_MPIDense,
1127                                         0,
1128                                         0,
1129                                         0,
1130                                         0,
1131                                 /* 39*/ MatAXPY_MPIDense,
1132                                         MatCreateSubMatrices_MPIDense,
1133                                         0,
1134                                         MatGetValues_MPIDense,
1135                                         0,
1136                                 /* 44*/ 0,
1137                                         MatScale_MPIDense,
1138                                         MatShift_Basic,
1139                                         0,
1140                                         0,
1141                                 /* 49*/ MatSetRandom_MPIDense,
1142                                         0,
1143                                         0,
1144                                         0,
1145                                         0,
1146                                 /* 54*/ 0,
1147                                         0,
1148                                         0,
1149                                         0,
1150                                         0,
1151                                 /* 59*/ MatCreateSubMatrix_MPIDense,
1152                                         MatDestroy_MPIDense,
1153                                         MatView_MPIDense,
1154                                         0,
1155                                         0,
1156                                 /* 64*/ 0,
1157                                         0,
1158                                         0,
1159                                         0,
1160                                         0,
1161                                 /* 69*/ 0,
1162                                         0,
1163                                         0,
1164                                         0,
1165                                         0,
1166                                 /* 74*/ 0,
1167                                         0,
1168                                         0,
1169                                         0,
1170                                         0,
1171                                 /* 79*/ 0,
1172                                         0,
1173                                         0,
1174                                         0,
1175                                 /* 83*/ MatLoad_MPIDense,
1176                                         0,
1177                                         0,
1178                                         0,
1179                                         0,
1180                                         0,
1181 #if defined(PETSC_HAVE_ELEMENTAL)
1182                                 /* 89*/ MatMatMult_MPIDense_MPIDense,
1183                                         MatMatMultSymbolic_MPIDense_MPIDense,
1184 #else
1185                                 /* 89*/ 0,
1186                                         0,
1187 #endif
1188                                         MatMatMultNumeric_MPIDense,
1189                                         0,
1190                                         0,
1191                                 /* 94*/ 0,
1192                                         0,
1193                                         0,
1194                                         0,
1195                                         0,
1196                                 /* 99*/ 0,
1197                                         0,
1198                                         0,
1199                                         MatConjugate_MPIDense,
1200                                         0,
1201                                 /*104*/ 0,
1202                                         MatRealPart_MPIDense,
1203                                         MatImaginaryPart_MPIDense,
1204                                         0,
1205                                         0,
1206                                 /*109*/ 0,
1207                                         0,
1208                                         0,
1209                                         0,
1210                                         MatMissingDiagonal_MPIDense,
1211                                 /*114*/ 0,
1212                                         0,
1213                                         0,
1214                                         0,
1215                                         0,
1216                                 /*119*/ 0,
1217                                         0,
1218                                         0,
1219                                         0,
1220                                         0,
1221                                 /*124*/ 0,
1222                                         MatGetColumnNorms_MPIDense,
1223                                         0,
1224                                         0,
1225                                         0,
1226                                 /*129*/ 0,
1227                                         MatTransposeMatMult_MPIDense_MPIDense,
1228                                         MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1229                                         MatTransposeMatMultNumeric_MPIDense_MPIDense,
1230                                         0,
1231                                 /*134*/ 0,
1232                                         0,
1233                                         0,
1234                                         0,
1235                                         0,
1236                                 /*139*/ 0,
1237                                         0,
1238                                         0,
1239                                         0,
1240                                         0,
1241                                 /*144*/ MatCreateMPIMatConcatenateSeqMat_MPIDense
1242 };
1243 
1244 PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1245 {
1246   Mat_MPIDense   *a;
1247   PetscErrorCode ierr;
1248 
1249   PetscFunctionBegin;
1250   mat->preallocated = PETSC_TRUE;
1251   /* Note:  For now, when data is specified above, this assumes the user correctly
1252    allocates the local dense storage space.  We should add error checking. */
1253 
1254   a       = (Mat_MPIDense*)mat->data;
1255   ierr    = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
1256   ierr    = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
1257   a->nvec = mat->cmap->n;
1258 
1259   ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1260   ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr);
1261   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
1262   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1263   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1264   PetscFunctionReturn(0);
1265 }
1266 
1267 #if defined(PETSC_HAVE_ELEMENTAL)
1268 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1269 {
1270   Mat            mat_elemental;
1271   PetscErrorCode ierr;
1272   PetscScalar    *v;
1273   PetscInt       m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols;
1274 
1275   PetscFunctionBegin;
1276   if (reuse == MAT_REUSE_MATRIX) {
1277     mat_elemental = *newmat;
1278     ierr = MatZeroEntries(*newmat);CHKERRQ(ierr);
1279   } else {
1280     ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr);
1281     ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1282     ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr);
1283     ierr = MatSetUp(mat_elemental);CHKERRQ(ierr);
1284     ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
1285   }
1286 
1287   ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr);
1288   for (i=0; i<N; i++) cols[i] = i;
1289   for (i=0; i<m; i++) rows[i] = rstart + i;
1290 
1291   /* PETSc-Elemental interaface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1292   ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr);
1293   ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr);
1294   ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1295   ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1296   ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr);
1297   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1298 
1299   if (reuse == MAT_INPLACE_MATRIX) {
1300     ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr);
1301   } else {
1302     *newmat = mat_elemental;
1303   }
1304   PetscFunctionReturn(0);
1305 }
1306 #endif
1307 
1308 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals)
1309 {
1310   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1311   PetscErrorCode ierr;
1312 
1313   PetscFunctionBegin;
1314   ierr = MatDenseGetColumn(mat->A,col,vals);CHKERRQ(ierr);
1315   PetscFunctionReturn(0);
1316 }
1317 
1318 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals)
1319 {
1320   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1321   PetscErrorCode ierr;
1322 
1323   PetscFunctionBegin;
1324   ierr = MatDenseRestoreColumn(mat->A,vals);CHKERRQ(ierr);
1325   PetscFunctionReturn(0);
1326 }
1327 
1328 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
1329 {
1330   PetscErrorCode ierr;
1331   Mat_MPIDense   *mat;
1332   PetscInt       m,nloc,N;
1333 
1334   PetscFunctionBegin;
1335   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
1336   ierr = MatGetLocalSize(inmat,NULL,&nloc);CHKERRQ(ierr);
1337   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1338     PetscInt sum;
1339 
1340     if (n == PETSC_DECIDE) {
1341       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
1342     }
1343     /* Check sum(n) = N */
1344     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
1345     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
1346 
1347     ierr = MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat);CHKERRQ(ierr);
1348   }
1349 
1350   /* numeric phase */
1351   mat = (Mat_MPIDense*)(*outmat)->data;
1352   ierr = MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1353   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1354   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1355   PetscFunctionReturn(0);
1356 }
1357 
1358 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1359 {
1360   Mat_MPIDense   *a;
1361   PetscErrorCode ierr;
1362 
1363   PetscFunctionBegin;
1364   ierr      = PetscNewLog(mat,&a);CHKERRQ(ierr);
1365   mat->data = (void*)a;
1366   ierr      = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1367 
1368   mat->insertmode = NOT_SET_VALUES;
1369   ierr            = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);CHKERRQ(ierr);
1370   ierr            = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);CHKERRQ(ierr);
1371 
1372   /* build cache for off array entries formed */
1373   a->donotstash = PETSC_FALSE;
1374 
1375   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr);
1376 
1377   /* stuff used for matrix vector multiply */
1378   a->lvec        = 0;
1379   a->Mvctx       = 0;
1380   a->roworiented = PETSC_TRUE;
1381 
1382   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr);
1383   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr);
1384   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense);CHKERRQ(ierr);
1385   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense);CHKERRQ(ierr);
1386   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);CHKERRQ(ierr);
1387   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);CHKERRQ(ierr);
1388 #if defined(PETSC_HAVE_ELEMENTAL)
1389   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr);
1390 #endif
1391   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1392   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1393   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1394   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1395 
1396   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",MatTransposeMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1397   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",MatTransposeMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1398   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",MatTransposeMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1399   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense);CHKERRQ(ierr);
1400   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense);CHKERRQ(ierr);
1401   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1402   PetscFunctionReturn(0);
1403 }
1404 
1405 /*MC
1406    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1407 
1408    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1409    and MATMPIDENSE otherwise.
1410 
1411    Options Database Keys:
1412 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1413 
1414   Level: beginner
1415 
1416 
1417 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1418 M*/
1419 
1420 /*@C
1421    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1422 
1423    Not collective
1424 
1425    Input Parameters:
1426 .  B - the matrix
1427 -  data - optional location of matrix data.  Set data=NULL for PETSc
1428    to control all matrix memory allocation.
1429 
1430    Notes:
1431    The dense format is fully compatible with standard Fortran 77
1432    storage by columns.
1433 
1434    The data input variable is intended primarily for Fortran programmers
1435    who wish to allocate their own matrix memory space.  Most users should
1436    set data=NULL.
1437 
1438    Level: intermediate
1439 
1440 .keywords: matrix,dense, parallel
1441 
1442 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1443 @*/
1444 PetscErrorCode  MatMPIDenseSetPreallocation(Mat B,PetscScalar *data)
1445 {
1446   PetscErrorCode ierr;
1447 
1448   PetscFunctionBegin;
1449   ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr);
1450   PetscFunctionReturn(0);
1451 }
1452 
1453 /*@
1454    MatDensePlaceArray - Allows one to replace the array in a dense array with an
1455    array provided by the user. This is useful to avoid copying an array
1456    into a matrix
1457 
1458    Not Collective
1459 
1460    Input Parameters:
1461 +  mat - the matrix
1462 -  array - the array in column major order
1463 
1464    Notes:
1465    You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be
1466    freed when the matrix is destroyed.
1467 
1468    Level: developer
1469 
1470 .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
1471 
1472 @*/
1473 PetscErrorCode  MatDensePlaceArray(Mat mat,const PetscScalar array[])
1474 {
1475   PetscErrorCode ierr;
1476   PetscFunctionBegin;
1477   ierr = PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
1478   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
1479   PetscFunctionReturn(0);
1480 }
1481 
1482 /*@
1483    MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray()
1484 
1485    Not Collective
1486 
1487    Input Parameters:
1488 .  mat - the matrix
1489 
1490    Notes:
1491    You can only call this after a call to MatDensePlaceArray()
1492 
1493    Level: developer
1494 
1495 .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
1496 
1497 @*/
1498 PetscErrorCode  MatDenseResetArray(Mat mat)
1499 {
1500   PetscErrorCode ierr;
1501   PetscFunctionBegin;
1502   ierr = PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));CHKERRQ(ierr);
1503   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
1504   PetscFunctionReturn(0);
1505 }
1506 
1507 /*@C
1508    MatCreateDense - Creates a parallel matrix in dense format.
1509 
1510    Collective on MPI_Comm
1511 
1512    Input Parameters:
1513 +  comm - MPI communicator
1514 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1515 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1516 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1517 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1518 -  data - optional location of matrix data.  Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1519    to control all matrix memory allocation.
1520 
1521    Output Parameter:
1522 .  A - the matrix
1523 
1524    Notes:
1525    The dense format is fully compatible with standard Fortran 77
1526    storage by columns.
1527 
1528    The data input variable is intended primarily for Fortran programmers
1529    who wish to allocate their own matrix memory space.  Most users should
1530    set data=NULL (PETSC_NULL_SCALAR for Fortran users).
1531 
1532    The user MUST specify either the local or global matrix dimensions
1533    (possibly both).
1534 
1535    Level: intermediate
1536 
1537 .keywords: matrix,dense, parallel
1538 
1539 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1540 @*/
1541 PetscErrorCode  MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1542 {
1543   PetscErrorCode ierr;
1544   PetscMPIInt    size;
1545 
1546   PetscFunctionBegin;
1547   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1548   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1549   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1550   if (size > 1) {
1551     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1552     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1553     if (data) {  /* user provided data array, so no need to assemble */
1554       ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr);
1555       (*A)->assembled = PETSC_TRUE;
1556     }
1557   } else {
1558     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1559     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1560   }
1561   PetscFunctionReturn(0);
1562 }
1563 
1564 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1565 {
1566   Mat            mat;
1567   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
1568   PetscErrorCode ierr;
1569 
1570   PetscFunctionBegin;
1571   *newmat = 0;
1572   ierr    = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr);
1573   ierr    = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1574   ierr    = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1575   a       = (Mat_MPIDense*)mat->data;
1576 
1577   mat->factortype   = A->factortype;
1578   mat->assembled    = PETSC_TRUE;
1579   mat->preallocated = PETSC_TRUE;
1580 
1581   a->size         = oldmat->size;
1582   a->rank         = oldmat->rank;
1583   mat->insertmode = NOT_SET_VALUES;
1584   a->nvec         = oldmat->nvec;
1585   a->donotstash   = oldmat->donotstash;
1586 
1587   ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr);
1588   ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr);
1589 
1590   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1591   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1592   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1593 
1594   *newmat = mat;
1595   PetscFunctionReturn(0);
1596 }
1597 
1598 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat)
1599 {
1600   PetscErrorCode ierr;
1601   PetscMPIInt    rank,size;
1602   const PetscInt *rowners;
1603   PetscInt       i,m,n,nz,j,mMax;
1604   PetscScalar    *array,*vals,*vals_ptr;
1605   Mat_MPIDense   *a = (Mat_MPIDense*)newmat->data;
1606 
1607   PetscFunctionBegin;
1608   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1609   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1610 
1611   /* determine ownership of rows and columns */
1612   m = (newmat->rmap->n < 0) ? PETSC_DECIDE : newmat->rmap->n;
1613   n = (newmat->cmap->n < 0) ? PETSC_DECIDE : newmat->cmap->n;
1614 
1615   ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr);
1616   if (!a->A || !((Mat_SeqDense*)(a->A->data))->user_alloc) {
1617     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
1618   }
1619   ierr = MatDenseGetArray(newmat,&array);CHKERRQ(ierr);
1620   ierr = MatGetLocalSize(newmat,&m,NULL);CHKERRQ(ierr);
1621   ierr = MatGetOwnershipRanges(newmat,&rowners);CHKERRQ(ierr);
1622   ierr = MPI_Reduce(&m,&mMax,1,MPIU_INT,MPI_MAX,0,comm);CHKERRQ(ierr);
1623   if (!rank) {
1624     ierr = PetscMalloc1(mMax*N,&vals);CHKERRQ(ierr);
1625 
1626     /* read in my part of the matrix numerical values  */
1627     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1628 
1629     /* insert into matrix-by row (this is why cannot directly read into array */
1630     vals_ptr = vals;
1631     for (i=0; i<m; i++) {
1632       for (j=0; j<N; j++) {
1633         array[i + j*m] = *vals_ptr++;
1634       }
1635     }
1636 
1637     /* read in other processors and ship out */
1638     for (i=1; i<size; i++) {
1639       nz   = (rowners[i+1] - rowners[i])*N;
1640       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1641       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr);
1642     }
1643   } else {
1644     /* receive numeric values */
1645     ierr = PetscMalloc1(m*N,&vals);CHKERRQ(ierr);
1646 
1647     /* receive message of values*/
1648     ierr = MPIULong_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr);
1649 
1650     /* insert into matrix-by row (this is why cannot directly read into array */
1651     vals_ptr = vals;
1652     for (i=0; i<m; i++) {
1653       for (j=0; j<N; j++) {
1654         array[i + j*m] = *vals_ptr++;
1655       }
1656     }
1657   }
1658   ierr = MatDenseRestoreArray(newmat,&array);CHKERRQ(ierr);
1659   ierr = PetscFree(vals);CHKERRQ(ierr);
1660   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1661   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1662   PetscFunctionReturn(0);
1663 }
1664 
1665 PetscErrorCode MatLoad_MPIDense(Mat newmat,PetscViewer viewer)
1666 {
1667   Mat_MPIDense   *a;
1668   PetscScalar    *vals,*svals;
1669   MPI_Comm       comm;
1670   MPI_Status     status;
1671   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,n,maxnz;
1672   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1673   PetscInt       *ourlens,*procsnz = 0,jj,*mycols,*smycols;
1674   PetscInt       i,nz,j,rstart,rend;
1675   int            fd;
1676   PetscBool      isbinary;
1677   PetscErrorCode ierr;
1678 
1679   PetscFunctionBegin;
1680   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1681   if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)newmat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newmat)->type_name);
1682 
1683   /* force binary viewer to load .info file if it has not yet done so */
1684   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1685   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
1686   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1687   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1688   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1689   if (!rank) {
1690     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
1691     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1692   }
1693   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1694   M    = header[1]; N = header[2]; nz = header[3];
1695 
1696   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
1697   if (newmat->rmap->N < 0) newmat->rmap->N = M;
1698   if (newmat->cmap->N < 0) newmat->cmap->N = N;
1699 
1700   if (newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",M,newmat->rmap->N);
1701   if (newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",N,newmat->cmap->N);
1702 
1703   /*
1704        Handle case where matrix is stored on disk as a dense matrix
1705   */
1706   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1707     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr);
1708     PetscFunctionReturn(0);
1709   }
1710 
1711   /* determine ownership of all rows */
1712   if (newmat->rmap->n < 0) {
1713     ierr = PetscMPIIntCast(M/size + ((M % size) > rank),&m);CHKERRQ(ierr);
1714   } else {
1715     ierr = PetscMPIIntCast(newmat->rmap->n,&m);CHKERRQ(ierr);
1716   }
1717   if (newmat->cmap->n < 0) {
1718     n = PETSC_DECIDE;
1719   } else {
1720     ierr = PetscMPIIntCast(newmat->cmap->n,&n);CHKERRQ(ierr);
1721   }
1722 
1723   ierr       = PetscMalloc1(size+2,&rowners);CHKERRQ(ierr);
1724   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1725   rowners[0] = 0;
1726   for (i=2; i<=size; i++) {
1727     rowners[i] += rowners[i-1];
1728   }
1729   rstart = rowners[rank];
1730   rend   = rowners[rank+1];
1731 
1732   /* distribute row lengths to all processors */
1733   ierr = PetscMalloc1(rend-rstart,&ourlens);CHKERRQ(ierr);
1734   if (!rank) {
1735     ierr = PetscMalloc1(M,&rowlengths);CHKERRQ(ierr);
1736     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1737     ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr);
1738     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1739     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1740     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1741   } else {
1742     ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1743   }
1744 
1745   if (!rank) {
1746     /* calculate the number of nonzeros on each processor */
1747     ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr);
1748     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1749     for (i=0; i<size; i++) {
1750       for (j=rowners[i]; j< rowners[i+1]; j++) {
1751         procsnz[i] += rowlengths[j];
1752       }
1753     }
1754     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1755 
1756     /* determine max buffer needed and allocate it */
1757     maxnz = 0;
1758     for (i=0; i<size; i++) {
1759       maxnz = PetscMax(maxnz,procsnz[i]);
1760     }
1761     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
1762 
1763     /* read in my part of the matrix column indices  */
1764     nz   = procsnz[0];
1765     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
1766     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1767 
1768     /* read in every one elses and ship off */
1769     for (i=1; i<size; i++) {
1770       nz   = procsnz[i];
1771       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1772       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1773     }
1774     ierr = PetscFree(cols);CHKERRQ(ierr);
1775   } else {
1776     /* determine buffer space needed for message */
1777     nz = 0;
1778     for (i=0; i<m; i++) {
1779       nz += ourlens[i];
1780     }
1781     ierr = PetscMalloc1(nz+1,&mycols);CHKERRQ(ierr);
1782 
1783     /* receive message of column indices*/
1784     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1785     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1786     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1787   }
1788 
1789   ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr);
1790   a = (Mat_MPIDense*)newmat->data;
1791   if (!a->A || !((Mat_SeqDense*)(a->A->data))->user_alloc) {
1792     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
1793   }
1794 
1795   if (!rank) {
1796     ierr = PetscMalloc1(maxnz,&vals);CHKERRQ(ierr);
1797 
1798     /* read in my part of the matrix numerical values  */
1799     nz   = procsnz[0];
1800     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1801 
1802     /* insert into matrix */
1803     jj      = rstart;
1804     smycols = mycols;
1805     svals   = vals;
1806     for (i=0; i<m; i++) {
1807       ierr     = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1808       smycols += ourlens[i];
1809       svals   += ourlens[i];
1810       jj++;
1811     }
1812 
1813     /* read in other processors and ship out */
1814     for (i=1; i<size; i++) {
1815       nz   = procsnz[i];
1816       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1817       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
1818     }
1819     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1820   } else {
1821     /* receive numeric values */
1822     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
1823 
1824     /* receive message of values*/
1825     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
1826     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1827     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1828 
1829     /* insert into matrix */
1830     jj      = rstart;
1831     smycols = mycols;
1832     svals   = vals;
1833     for (i=0; i<m; i++) {
1834       ierr     = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1835       smycols += ourlens[i];
1836       svals   += ourlens[i];
1837       jj++;
1838     }
1839   }
1840   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1841   ierr = PetscFree(vals);CHKERRQ(ierr);
1842   ierr = PetscFree(mycols);CHKERRQ(ierr);
1843   ierr = PetscFree(rowners);CHKERRQ(ierr);
1844 
1845   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1846   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1847   PetscFunctionReturn(0);
1848 }
1849 
1850 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool  *flag)
1851 {
1852   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
1853   Mat            a,b;
1854   PetscBool      flg;
1855   PetscErrorCode ierr;
1856 
1857   PetscFunctionBegin;
1858   a    = matA->A;
1859   b    = matB->A;
1860   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
1861   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1862   PetscFunctionReturn(0);
1863 }
1864 
1865 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(Mat A)
1866 {
1867   PetscErrorCode        ierr;
1868   Mat_MPIDense          *a = (Mat_MPIDense*)A->data;
1869   Mat_TransMatMultDense *atb = a->atbdense;
1870 
1871   PetscFunctionBegin;
1872   ierr = PetscFree3(atb->sendbuf,atb->atbarray,atb->recvcounts);CHKERRQ(ierr);
1873   ierr = (atb->destroy)(A);CHKERRQ(ierr);
1874   ierr = PetscFree(atb);CHKERRQ(ierr);
1875   PetscFunctionReturn(0);
1876 }
1877 
1878 PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1879 {
1880   Mat_MPIDense   *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
1881   Mat_SeqDense   *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data;
1882   Mat_TransMatMultDense *atb = c->atbdense;
1883   PetscErrorCode ierr;
1884   MPI_Comm       comm;
1885   PetscMPIInt    rank,size,*recvcounts=atb->recvcounts;
1886   PetscScalar    *carray,*atbarray=atb->atbarray,*sendbuf=atb->sendbuf;
1887   PetscInt       i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j;
1888   PetscScalar    _DOne=1.0,_DZero=0.0;
1889   PetscBLASInt   am,an,bn,aN;
1890   const PetscInt *ranges;
1891 
1892   PetscFunctionBegin;
1893   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1894   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1895   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1896 
1897   /* compute atbarray = aseq^T * bseq */
1898   ierr = PetscBLASIntCast(a->A->cmap->n,&an);CHKERRQ(ierr);
1899   ierr = PetscBLASIntCast(b->A->cmap->n,&bn);CHKERRQ(ierr);
1900   ierr = PetscBLASIntCast(a->A->rmap->n,&am);CHKERRQ(ierr);
1901   ierr = PetscBLASIntCast(A->cmap->N,&aN);CHKERRQ(ierr);
1902   PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&an,&bn,&am,&_DOne,aseq->v,&aseq->lda,bseq->v,&bseq->lda,&_DZero,atbarray,&aN));
1903 
1904   ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr);
1905   for (i=0; i<size; i++) recvcounts[i] = (ranges[i+1] - ranges[i])*cN;
1906 
1907   /* arrange atbarray into sendbuf */
1908   k = 0;
1909   for (proc=0; proc<size; proc++) {
1910     for (j=0; j<cN; j++) {
1911       for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM];
1912     }
1913   }
1914   /* sum all atbarray to local values of C */
1915   ierr = MatDenseGetArray(c->A,&carray);CHKERRQ(ierr);
1916   ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRQ(ierr);
1917   ierr = MatDenseRestoreArray(c->A,&carray);CHKERRQ(ierr);
1918   PetscFunctionReturn(0);
1919 }
1920 
1921 PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1922 {
1923   PetscErrorCode        ierr;
1924   Mat                   Cdense;
1925   MPI_Comm              comm;
1926   PetscMPIInt           size;
1927   PetscInt              cm=A->cmap->n,cM,cN=B->cmap->N;
1928   Mat_MPIDense          *c;
1929   Mat_TransMatMultDense *atb;
1930 
1931   PetscFunctionBegin;
1932   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1933   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
1934     SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
1935   }
1936 
1937   /* create matrix product Cdense */
1938   ierr = MatCreate(comm,&Cdense);CHKERRQ(ierr);
1939   ierr = MatSetSizes(Cdense,cm,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
1940   ierr = MatSetType(Cdense,MATMPIDENSE);CHKERRQ(ierr);
1941   ierr = MatMPIDenseSetPreallocation(Cdense,NULL);CHKERRQ(ierr);
1942   ierr = MatAssemblyBegin(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1943   ierr = MatAssemblyEnd(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1944   *C   = Cdense;
1945 
1946   /* create data structure for reuse Cdense */
1947   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1948   ierr = PetscNew(&atb);CHKERRQ(ierr);
1949   cM = Cdense->rmap->N;
1950   ierr = PetscMalloc3(cM*cN,&atb->sendbuf,cM*cN,&atb->atbarray,size,&atb->recvcounts);CHKERRQ(ierr);
1951 
1952   c                    = (Mat_MPIDense*)Cdense->data;
1953   c->atbdense          = atb;
1954   atb->destroy         = Cdense->ops->destroy;
1955   Cdense->ops->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
1956   PetscFunctionReturn(0);
1957 }
1958 
1959 PetscErrorCode MatTransposeMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
1960 {
1961   PetscErrorCode ierr;
1962 
1963   PetscFunctionBegin;
1964   if (scall == MAT_INITIAL_MATRIX) {
1965     ierr = MatTransposeMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr);
1966   }
1967   ierr = MatTransposeMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr);
1968   PetscFunctionReturn(0);
1969 }
1970 
1971 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(Mat A)
1972 {
1973   PetscErrorCode   ierr;
1974   Mat_MPIDense     *a = (Mat_MPIDense*)A->data;
1975   Mat_MatMultDense *ab = a->abdense;
1976 
1977   PetscFunctionBegin;
1978   ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr);
1979   ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr);
1980   ierr = MatDestroy(&ab->Be);CHKERRQ(ierr);
1981 
1982   ierr = (ab->destroy)(A);CHKERRQ(ierr);
1983   ierr = PetscFree(ab);CHKERRQ(ierr);
1984   PetscFunctionReturn(0);
1985 }
1986 
1987 #if defined(PETSC_HAVE_ELEMENTAL)
1988 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1989 {
1990   PetscErrorCode   ierr;
1991   Mat_MPIDense     *c=(Mat_MPIDense*)C->data;
1992   Mat_MatMultDense *ab=c->abdense;
1993 
1994   PetscFunctionBegin;
1995   ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr);
1996   ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr);
1997   ierr = MatMatMultNumeric(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr);
1998   ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
1999   PetscFunctionReturn(0);
2000 }
2001 
2002 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
2003 {
2004   PetscErrorCode   ierr;
2005   Mat              Ae,Be,Ce;
2006   Mat_MPIDense     *c;
2007   Mat_MatMultDense *ab;
2008 
2009   PetscFunctionBegin;
2010   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
2011     SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2012   }
2013 
2014   /* convert A and B to Elemental matrices Ae and Be */
2015   ierr = MatConvert(A,MATELEMENTAL,MAT_INITIAL_MATRIX, &Ae);CHKERRQ(ierr);
2016   ierr = MatConvert(B,MATELEMENTAL,MAT_INITIAL_MATRIX, &Be);CHKERRQ(ierr);
2017 
2018   /* Ce = Ae*Be */
2019   ierr = MatMatMultSymbolic(Ae,Be,fill,&Ce);CHKERRQ(ierr);
2020   ierr = MatMatMultNumeric(Ae,Be,Ce);CHKERRQ(ierr);
2021 
2022   /* convert Ce to C */
2023   ierr = MatConvert(Ce,MATMPIDENSE,MAT_INITIAL_MATRIX,C);CHKERRQ(ierr);
2024 
2025   /* create data structure for reuse Cdense */
2026   ierr = PetscNew(&ab);CHKERRQ(ierr);
2027   c                  = (Mat_MPIDense*)(*C)->data;
2028   c->abdense         = ab;
2029 
2030   ab->Ae             = Ae;
2031   ab->Be             = Be;
2032   ab->Ce             = Ce;
2033   ab->destroy        = (*C)->ops->destroy;
2034   (*C)->ops->destroy        = MatDestroy_MatMatMult_MPIDense_MPIDense;
2035   (*C)->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
2036   PetscFunctionReturn(0);
2037 }
2038 
2039 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
2040 {
2041   PetscErrorCode ierr;
2042 
2043   PetscFunctionBegin;
2044   if (scall == MAT_INITIAL_MATRIX) { /* simbolic product includes numeric product */
2045     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
2046     ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr);
2047     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
2048   } else {
2049     ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
2050     ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr);
2051     ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
2052   }
2053   PetscFunctionReturn(0);
2054 }
2055 #endif
2056 
2057