xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 5c8f6a953e7ed1c81f507d64aebddb11080b60e9)
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4 */
5 
6 
7 #include <../src/mat/impls/dense/mpi/mpidense.h>    /*I   "petscmat.h"  I*/
8 
9 #undef __FUNCT__
10 #define __FUNCT__ "MatDenseGetLocalMatrix"
11 /*@
12 
13       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
14               matrix that represents the operator. For sequential matrices it returns itself.
15 
16     Input Parameter:
17 .      A - the Seq or MPI dense matrix
18 
19     Output Parameter:
20 .      B - the inner matrix
21 
22     Level: intermediate
23 
24 @*/
25 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
26 {
27   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
28   PetscErrorCode ierr;
29   PetscBool      flg;
30 
31   PetscFunctionBegin;
32   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
33   if (flg) {
34     *B = mat->A;
35   } else {
36     *B = A;
37   }
38   PetscFunctionReturn(0);
39 }
40 
41 #undef __FUNCT__
42 #define __FUNCT__ "MatGetRow_MPIDense"
43 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
44 {
45   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
46   PetscErrorCode ierr;
47   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
48 
49   PetscFunctionBegin;
50   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
51   lrow = row - rstart;
52   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr);
53   PetscFunctionReturn(0);
54 }
55 
56 #undef __FUNCT__
57 #define __FUNCT__ "MatRestoreRow_MPIDense"
58 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
59 {
60   PetscErrorCode ierr;
61 
62   PetscFunctionBegin;
63   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
64   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
65   PetscFunctionReturn(0);
66 }
67 
68 EXTERN_C_BEGIN
69 #undef __FUNCT__
70 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense"
71 PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
72 {
73   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
74   PetscErrorCode ierr;
75   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
76   PetscScalar    *array;
77   MPI_Comm       comm;
78   Mat            B;
79 
80   PetscFunctionBegin;
81   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
82 
83   ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr);
84   if (!B) {
85     ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
86     ierr = MatCreate(comm,&B);CHKERRQ(ierr);
87     ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr);
88     ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
89     ierr = MatDenseGetArray(mdn->A,&array);CHKERRQ(ierr);
90     ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr);
91     ierr = MatDenseRestoreArray(mdn->A,&array);CHKERRQ(ierr);
92     ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
93     ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
94     ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr);
95     *a = B;
96     ierr = MatDestroy(&B);CHKERRQ(ierr);
97   } else {
98     *a = B;
99   }
100   PetscFunctionReturn(0);
101 }
102 EXTERN_C_END
103 
104 #undef __FUNCT__
105 #define __FUNCT__ "MatSetValues_MPIDense"
106 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
107 {
108   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
109   PetscErrorCode ierr;
110   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
111   PetscBool      roworiented = A->roworiented;
112 
113   PetscFunctionBegin;
114   if (v) PetscValidScalarPointer(v,6);
115   for (i=0; i<m; i++) {
116     if (idxm[i] < 0) continue;
117     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
118     if (idxm[i] >= rstart && idxm[i] < rend) {
119       row = idxm[i] - rstart;
120       if (roworiented) {
121         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
122       } else {
123         for (j=0; j<n; j++) {
124           if (idxn[j] < 0) continue;
125           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
126           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
127         }
128       }
129     } else {
130       if (!A->donotstash) {
131         mat->assembled = PETSC_FALSE;
132         if (roworiented) {
133           ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
134         } else {
135           ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
136         }
137       }
138     }
139   }
140   PetscFunctionReturn(0);
141 }
142 
143 #undef __FUNCT__
144 #define __FUNCT__ "MatGetValues_MPIDense"
145 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
146 {
147   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
148   PetscErrorCode ierr;
149   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
150 
151   PetscFunctionBegin;
152   for (i=0; i<m; i++) {
153     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
154     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
155     if (idxm[i] >= rstart && idxm[i] < rend) {
156       row = idxm[i] - rstart;
157       for (j=0; j<n; j++) {
158         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
159         if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
160         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
161       }
162     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
163   }
164   PetscFunctionReturn(0);
165 }
166 
167 #undef __FUNCT__
168 #define __FUNCT__ "MatDenseGetArray_MPIDense"
169 PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[])
170 {
171   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
172   PetscErrorCode ierr;
173 
174   PetscFunctionBegin;
175   ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr);
176   PetscFunctionReturn(0);
177 }
178 
179 #undef __FUNCT__
180 #define __FUNCT__ "MatGetSubMatrix_MPIDense"
181 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
182 {
183   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data,*newmatd;
184   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
185   PetscErrorCode ierr;
186   PetscInt       i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
187   const PetscInt *irow,*icol;
188   PetscScalar    *av,*bv,*v = lmat->v;
189   Mat            newmat;
190   IS             iscol_local;
191 
192   PetscFunctionBegin;
193   ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
194   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
195   ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr);
196   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
197   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
198   ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */
199 
200   /* No parallel redistribution currently supported! Should really check each index set
201      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
202      original matrix! */
203 
204   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
205   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
206 
207   /* Check submatrix call */
208   if (scall == MAT_REUSE_MATRIX) {
209     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
210     /* Really need to test rows and column sizes! */
211     newmat = *B;
212   } else {
213     /* Create and fill new matrix */
214     ierr = MatCreate(((PetscObject)A)->comm,&newmat);CHKERRQ(ierr);
215     ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
216     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
217     ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr);
218   }
219 
220   /* Now extract the data pointers and do the copy, column at a time */
221   newmatd = (Mat_MPIDense*)newmat->data;
222   bv      = ((Mat_SeqDense *)newmatd->A->data)->v;
223 
224   for (i=0; i<Ncols; i++) {
225     av = v + ((Mat_SeqDense *)mat->A->data)->lda*icol[i];
226     for (j=0; j<nrows; j++) {
227       *bv++ = av[irow[j] - rstart];
228     }
229   }
230 
231   /* Assemble the matrices so that the correct flags are set */
232   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
233   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
234 
235   /* Free work space */
236   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
237   ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr);
238   ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
239   *B = newmat;
240   PetscFunctionReturn(0);
241 }
242 
243 #undef __FUNCT__
244 #define __FUNCT__ "MatDenseRestoreArray_MPIDense"
245 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[])
246 {
247   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
248   PetscErrorCode ierr;
249 
250   PetscFunctionBegin;
251   ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr);
252   PetscFunctionReturn(0);
253 }
254 
255 #undef __FUNCT__
256 #define __FUNCT__ "MatAssemblyBegin_MPIDense"
257 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
258 {
259   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
260   MPI_Comm       comm = ((PetscObject)mat)->comm;
261   PetscErrorCode ierr;
262   PetscInt       nstash,reallocs;
263   InsertMode     addv;
264 
265   PetscFunctionBegin;
266   /* make sure all processors are either in INSERTMODE or ADDMODE */
267   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,comm);CHKERRQ(ierr);
268   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
269   mat->insertmode = addv; /* in case this processor had no cache */
270 
271   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
272   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
273   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
274   PetscFunctionReturn(0);
275 }
276 
277 #undef __FUNCT__
278 #define __FUNCT__ "MatAssemblyEnd_MPIDense"
279 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
280 {
281   Mat_MPIDense    *mdn=(Mat_MPIDense*)mat->data;
282   PetscErrorCode  ierr;
283   PetscInt        i,*row,*col,flg,j,rstart,ncols;
284   PetscMPIInt     n;
285   PetscScalar     *val;
286   InsertMode      addv=mat->insertmode;
287 
288   PetscFunctionBegin;
289   /*  wait on receives */
290   while (1) {
291     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
292     if (!flg) break;
293 
294     for (i=0; i<n;) {
295       /* Now identify the consecutive vals belonging to the same row */
296       for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
297       if (j < n) ncols = j-i;
298       else       ncols = n-i;
299       /* Now assemble all these values with a single function call */
300       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
301       i = j;
302     }
303   }
304   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
305 
306   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
307   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
308 
309   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
310     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
311   }
312   PetscFunctionReturn(0);
313 }
314 
315 #undef __FUNCT__
316 #define __FUNCT__ "MatZeroEntries_MPIDense"
317 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
318 {
319   PetscErrorCode ierr;
320   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
321 
322   PetscFunctionBegin;
323   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
324   PetscFunctionReturn(0);
325 }
326 
327 /* the code does not do the diagonal entries correctly unless the
328    matrix is square and the column and row owerships are identical.
329    This is a BUG. The only way to fix it seems to be to access
330    mdn->A and mdn->B directly and not through the MatZeroRows()
331    routine.
332 */
333 #undef __FUNCT__
334 #define __FUNCT__ "MatZeroRows_MPIDense"
335 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
336 {
337   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
338   PetscErrorCode    ierr;
339   PetscInt          i,*owners = A->rmap->range;
340   PetscInt          *nprocs,j,idx,nsends;
341   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
342   PetscInt          *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source;
343   PetscInt          *lens,*lrows,*values;
344   PetscMPIInt       n,imdex,rank = l->rank,size = l->size;
345   MPI_Comm          comm = ((PetscObject)A)->comm;
346   MPI_Request       *send_waits,*recv_waits;
347   MPI_Status        recv_status,*send_status;
348   PetscBool         found;
349   const PetscScalar *xx;
350   PetscScalar       *bb;
351 
352   PetscFunctionBegin;
353   if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Only handles square matrices");
354   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership");
355   /*  first count number of contributors to each processor */
356   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
357   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
358   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
359   for (i=0; i<N; i++) {
360     idx = rows[i];
361     found = PETSC_FALSE;
362     for (j=0; j<size; j++) {
363       if (idx >= owners[j] && idx < owners[j+1]) {
364         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
365       }
366     }
367     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
368   }
369   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
370 
371   /* inform other processors of number of messages and max length*/
372   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
373 
374   /* post receives:   */
375   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
376   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
377   for (i=0; i<nrecvs; i++) {
378     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
379   }
380 
381   /* do sends:
382       1) starts[i] gives the starting index in svalues for stuff going to
383          the ith processor
384   */
385   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
386   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
387   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
388   starts[0]  = 0;
389   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
390   for (i=0; i<N; i++) {
391     svalues[starts[owner[i]]++] = rows[i];
392   }
393 
394   starts[0] = 0;
395   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
396   count = 0;
397   for (i=0; i<size; i++) {
398     if (nprocs[2*i+1]) {
399       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
400     }
401   }
402   ierr = PetscFree(starts);CHKERRQ(ierr);
403 
404   base = owners[rank];
405 
406   /*  wait on receives */
407   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
408   count  = nrecvs;
409   slen   = 0;
410   while (count) {
411     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
412     /* unpack receives into our local space */
413     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
414     source[imdex]  = recv_status.MPI_SOURCE;
415     lens[imdex]    = n;
416     slen += n;
417     count--;
418   }
419   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
420 
421   /* move the data into the send scatter */
422   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
423   count = 0;
424   for (i=0; i<nrecvs; i++) {
425     values = rvalues + i*nmax;
426     for (j=0; j<lens[i]; j++) {
427       lrows[count++] = values[j] - base;
428     }
429   }
430   ierr = PetscFree(rvalues);CHKERRQ(ierr);
431   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
432   ierr = PetscFree(owner);CHKERRQ(ierr);
433   ierr = PetscFree(nprocs);CHKERRQ(ierr);
434 
435   /* fix right hand side if needed */
436   if (x && b) {
437     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
438     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
439     for (i=0; i<slen; i++) {
440       bb[lrows[i]] = diag*xx[lrows[i]];
441     }
442     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
443     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
444   }
445 
446   /* actually zap the local rows */
447   ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
448   if (diag != 0.0) {
449     Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data;
450     PetscInt      m = ll->lda, i;
451 
452     for (i=0; i<slen; i++) {
453       ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag;
454     }
455   }
456   ierr = PetscFree(lrows);CHKERRQ(ierr);
457 
458   /* wait on sends */
459   if (nsends) {
460     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
461     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
462     ierr = PetscFree(send_status);CHKERRQ(ierr);
463   }
464   ierr = PetscFree(send_waits);CHKERRQ(ierr);
465   ierr = PetscFree(svalues);CHKERRQ(ierr);
466 
467   PetscFunctionReturn(0);
468 }
469 
470 #undef __FUNCT__
471 #define __FUNCT__ "MatMult_MPIDense"
472 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
473 {
474   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
475   PetscErrorCode ierr;
476 
477   PetscFunctionBegin;
478   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
479   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
480   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
481   PetscFunctionReturn(0);
482 }
483 
484 #undef __FUNCT__
485 #define __FUNCT__ "MatMultAdd_MPIDense"
486 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
487 {
488   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
489   PetscErrorCode ierr;
490 
491   PetscFunctionBegin;
492   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
493   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
494   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
495   PetscFunctionReturn(0);
496 }
497 
498 #undef __FUNCT__
499 #define __FUNCT__ "MatMultTranspose_MPIDense"
500 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
501 {
502   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
503   PetscErrorCode ierr;
504   PetscScalar    zero = 0.0;
505 
506   PetscFunctionBegin;
507   ierr = VecSet(yy,zero);CHKERRQ(ierr);
508   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
509   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
510   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
511   PetscFunctionReturn(0);
512 }
513 
514 #undef __FUNCT__
515 #define __FUNCT__ "MatMultTransposeAdd_MPIDense"
516 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
517 {
518   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
519   PetscErrorCode ierr;
520 
521   PetscFunctionBegin;
522   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
523   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
524   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
525   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
526   PetscFunctionReturn(0);
527 }
528 
529 #undef __FUNCT__
530 #define __FUNCT__ "MatGetDiagonal_MPIDense"
531 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
532 {
533   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
534   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
535   PetscErrorCode ierr;
536   PetscInt       len,i,n,m = A->rmap->n,radd;
537   PetscScalar    *x,zero = 0.0;
538 
539   PetscFunctionBegin;
540   ierr = VecSet(v,zero);CHKERRQ(ierr);
541   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
542   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
543   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
544   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
545   radd = A->rmap->rstart*m;
546   for (i=0; i<len; i++) {
547     x[i] = aloc->v[radd + i*m + i];
548   }
549   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
550   PetscFunctionReturn(0);
551 }
552 
553 #undef __FUNCT__
554 #define __FUNCT__ "MatDestroy_MPIDense"
555 PetscErrorCode MatDestroy_MPIDense(Mat mat)
556 {
557   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
558   PetscErrorCode ierr;
559 
560   PetscFunctionBegin;
561 #if defined(PETSC_USE_LOG)
562   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
563 #endif
564   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
565   ierr = MatDestroy(&mdn->A);CHKERRQ(ierr);
566   ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr);
567   ierr = VecScatterDestroy(&mdn->Mvctx);CHKERRQ(ierr);
568 
569   ierr = PetscFree(mat->data);CHKERRQ(ierr);
570   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
571   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
572   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
573   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
574   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
575   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
576   PetscFunctionReturn(0);
577 }
578 
579 #undef __FUNCT__
580 #define __FUNCT__ "MatView_MPIDense_Binary"
581 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
582 {
583   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
584   PetscErrorCode    ierr;
585   PetscViewerFormat format;
586   int               fd;
587   PetscInt          header[4],mmax,N = mat->cmap->N,i,j,m,k;
588   PetscMPIInt       rank,tag  = ((PetscObject)viewer)->tag,size;
589   PetscScalar       *work,*v,*vv;
590   Mat_SeqDense      *a = (Mat_SeqDense*)mdn->A->data;
591 
592   PetscFunctionBegin;
593   if (mdn->size == 1) {
594     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
595   } else {
596     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
597     ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
598     ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
599 
600     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
601     if (format == PETSC_VIEWER_NATIVE) {
602 
603       if (!rank) {
604         /* store the matrix as a dense matrix */
605         header[0] = MAT_FILE_CLASSID;
606         header[1] = mat->rmap->N;
607         header[2] = N;
608         header[3] = MATRIX_BINARY_FORMAT_DENSE;
609         ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
610 
611         /* get largest work array needed for transposing array */
612         mmax = mat->rmap->n;
613         for (i=1; i<size; i++) {
614           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
615         }
616         ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&work);CHKERRQ(ierr);
617 
618         /* write out local array, by rows */
619         m    = mat->rmap->n;
620         v    = a->v;
621         for (j=0; j<N; j++) {
622           for (i=0; i<m; i++) {
623             work[j + i*N] = *v++;
624           }
625         }
626         ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
627         /* get largest work array to receive messages from other processes, excludes process zero */
628         mmax = 0;
629         for (i=1; i<size; i++) {
630           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
631         }
632         ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&vv);CHKERRQ(ierr);
633         for (k = 1; k < size; k++) {
634           v    = vv;
635           m    = mat->rmap->range[k+1] - mat->rmap->range[k];
636           ierr = MPIULong_Recv(v,m*N,MPIU_SCALAR,k,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
637 
638           for (j = 0; j < N; j++) {
639             for (i = 0; i < m; i++) {
640               work[j + i*N] = *v++;
641             }
642           }
643           ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
644         }
645         ierr = PetscFree(work);CHKERRQ(ierr);
646         ierr = PetscFree(vv);CHKERRQ(ierr);
647       } else {
648         ierr = MPIULong_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
649       }
650     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerSetFormat(viewer,PETSC_VIEWER_NATIVE)");
651   }
652   PetscFunctionReturn(0);
653 }
654 
655 #undef __FUNCT__
656 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket"
657 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
658 {
659   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
660   PetscErrorCode    ierr;
661   PetscMPIInt       size = mdn->size,rank = mdn->rank;
662   PetscViewerType   vtype;
663   PetscBool         iascii,isdraw;
664   PetscViewer       sviewer;
665   PetscViewerFormat format;
666 
667   PetscFunctionBegin;
668   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
669   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
670   if (iascii) {
671     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
672     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
673     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
674       MatInfo info;
675       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
676       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
677       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
678       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
679       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
680       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
681       PetscFunctionReturn(0);
682     } else if (format == PETSC_VIEWER_ASCII_INFO) {
683       PetscFunctionReturn(0);
684     }
685   } else if (isdraw) {
686     PetscDraw  draw;
687     PetscBool  isnull;
688 
689     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
690     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
691     if (isnull) PetscFunctionReturn(0);
692   }
693 
694   if (size == 1) {
695     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
696   } else {
697     /* assemble the entire matrix onto first processor. */
698     Mat         A;
699     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
700     PetscInt    *cols;
701     PetscScalar *vals;
702 
703     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
704     if (!rank) {
705       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
706     } else {
707       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
708     }
709     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
710     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
711     ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL);CHKERRQ(ierr);
712     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
713 
714     /* Copy the matrix ... This isn't the most efficient means,
715        but it's quick for now */
716     A->insertmode = INSERT_VALUES;
717     row = mat->rmap->rstart; m = mdn->A->rmap->n;
718     for (i=0; i<m; i++) {
719       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
720       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
721       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
722       row++;
723     }
724 
725     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
726     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
727     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
728     if (!rank) {
729       ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
730       /* Set the type name to MATMPIDense so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqDense_ASCII()*/
731       PetscStrcpy(((PetscObject)((Mat_MPIDense*)(A->data))->A)->type_name,MATMPIDENSE);
732       ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
733     }
734     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
735     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
736     ierr = MatDestroy(&A);CHKERRQ(ierr);
737   }
738   PetscFunctionReturn(0);
739 }
740 
741 #undef __FUNCT__
742 #define __FUNCT__ "MatView_MPIDense"
743 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
744 {
745   PetscErrorCode ierr;
746   PetscBool      iascii,isbinary,isdraw,issocket;
747 
748   PetscFunctionBegin;
749   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
750   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
751   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
752   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
753 
754   if (iascii || issocket || isdraw) {
755     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
756   } else if (isbinary) {
757     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
758   }
759   PetscFunctionReturn(0);
760 }
761 
762 #undef __FUNCT__
763 #define __FUNCT__ "MatGetInfo_MPIDense"
764 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
765 {
766   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
767   Mat            mdn = mat->A;
768   PetscErrorCode ierr;
769   PetscReal      isend[5],irecv[5];
770 
771   PetscFunctionBegin;
772   info->block_size     = 1.0;
773   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
774   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
775   isend[3] = info->memory;  isend[4] = info->mallocs;
776   if (flag == MAT_LOCAL) {
777     info->nz_used      = isend[0];
778     info->nz_allocated = isend[1];
779     info->nz_unneeded  = isend[2];
780     info->memory       = isend[3];
781     info->mallocs      = isend[4];
782   } else if (flag == MAT_GLOBAL_MAX) {
783     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)A)->comm);CHKERRQ(ierr);
784     info->nz_used      = irecv[0];
785     info->nz_allocated = irecv[1];
786     info->nz_unneeded  = irecv[2];
787     info->memory       = irecv[3];
788     info->mallocs      = irecv[4];
789   } else if (flag == MAT_GLOBAL_SUM) {
790     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
791     info->nz_used      = irecv[0];
792     info->nz_allocated = irecv[1];
793     info->nz_unneeded  = irecv[2];
794     info->memory       = irecv[3];
795     info->mallocs      = irecv[4];
796   }
797   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
798   info->fill_ratio_needed = 0;
799   info->factor_mallocs    = 0;
800   PetscFunctionReturn(0);
801 }
802 
803 #undef __FUNCT__
804 #define __FUNCT__ "MatSetOption_MPIDense"
805 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool  flg)
806 {
807   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
808   PetscErrorCode ierr;
809 
810   PetscFunctionBegin;
811   switch (op) {
812   case MAT_NEW_NONZERO_LOCATIONS:
813   case MAT_NEW_NONZERO_LOCATION_ERR:
814   case MAT_NEW_NONZERO_ALLOCATION_ERR:
815     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
816     break;
817   case MAT_ROW_ORIENTED:
818     a->roworiented = flg;
819     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
820     break;
821   case MAT_NEW_DIAGONALS:
822   case MAT_KEEP_NONZERO_PATTERN:
823   case MAT_USE_HASH_TABLE:
824     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
825     break;
826   case MAT_IGNORE_OFF_PROC_ENTRIES:
827     a->donotstash = flg;
828     break;
829   case MAT_SYMMETRIC:
830   case MAT_STRUCTURALLY_SYMMETRIC:
831   case MAT_HERMITIAN:
832   case MAT_SYMMETRY_ETERNAL:
833   case MAT_IGNORE_LOWER_TRIANGULAR:
834     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
835     break;
836   default:
837     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
838   }
839   PetscFunctionReturn(0);
840 }
841 
842 
843 #undef __FUNCT__
844 #define __FUNCT__ "MatDiagonalScale_MPIDense"
845 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
846 {
847   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
848   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
849   PetscScalar    *l,*r,x,*v;
850   PetscErrorCode ierr;
851   PetscInt       i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n;
852 
853   PetscFunctionBegin;
854   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
855   if (ll) {
856     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
857     if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
858     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
859     for (i=0; i<m; i++) {
860       x = l[i];
861       v = mat->v + i;
862       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
863     }
864     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
865     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
866   }
867   if (rr) {
868     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
869     if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
870     ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
871     ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
872     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
873     for (i=0; i<n; i++) {
874       x = r[i];
875       v = mat->v + i*m;
876       for (j=0; j<m; j++) { (*v++) *= x;}
877     }
878     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
879     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
880   }
881   PetscFunctionReturn(0);
882 }
883 
884 #undef __FUNCT__
885 #define __FUNCT__ "MatNorm_MPIDense"
886 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
887 {
888   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
889   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
890   PetscErrorCode ierr;
891   PetscInt       i,j;
892   PetscReal      sum = 0.0;
893   PetscScalar    *v = mat->v;
894 
895   PetscFunctionBegin;
896   if (mdn->size == 1) {
897     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
898   } else {
899     if (type == NORM_FROBENIUS) {
900       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
901         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
902       }
903       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
904       *nrm = PetscSqrtReal(*nrm);
905       ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr);
906     } else if (type == NORM_1) {
907       PetscReal *tmp,*tmp2;
908       ierr = PetscMalloc2(A->cmap->N,PetscReal,&tmp,A->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr);
909       ierr = PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
910       ierr = PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
911       *nrm = 0.0;
912       v = mat->v;
913       for (j=0; j<mdn->A->cmap->n; j++) {
914         for (i=0; i<mdn->A->rmap->n; i++) {
915           tmp[j] += PetscAbsScalar(*v);  v++;
916         }
917       }
918       ierr = MPI_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
919       for (j=0; j<A->cmap->N; j++) {
920         if (tmp2[j] > *nrm) *nrm = tmp2[j];
921       }
922       ierr = PetscFree2(tmp,tmp);CHKERRQ(ierr);
923       ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr);
924     } else if (type == NORM_INFINITY) { /* max row norm */
925       PetscReal ntemp;
926       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
927       ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,((PetscObject)A)->comm);CHKERRQ(ierr);
928     } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"No support for two norm");
929   }
930   PetscFunctionReturn(0);
931 }
932 
933 #undef __FUNCT__
934 #define __FUNCT__ "MatTranspose_MPIDense"
935 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
936 {
937   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
938   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
939   Mat            B;
940   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
941   PetscErrorCode ierr;
942   PetscInt       j,i;
943   PetscScalar    *v;
944 
945   PetscFunctionBegin;
946   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports square matrix only in-place");
947   if (reuse == MAT_INITIAL_MATRIX || A == *matout) {
948     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
949     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
950     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
951     ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr);
952   } else {
953     B = *matout;
954   }
955 
956   m = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v;
957   ierr = PetscMalloc(m*sizeof(PetscInt),&rwork);CHKERRQ(ierr);
958   for (i=0; i<m; i++) rwork[i] = rstart + i;
959   for (j=0; j<n; j++) {
960     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
961     v   += m;
962   }
963   ierr = PetscFree(rwork);CHKERRQ(ierr);
964   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
965   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
966   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
967     *matout = B;
968   } else {
969     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
970   }
971   PetscFunctionReturn(0);
972 }
973 
974 
975 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
976 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
977 
978 #undef __FUNCT__
979 #define __FUNCT__ "MatSetUp_MPIDense"
980 PetscErrorCode MatSetUp_MPIDense(Mat A)
981 {
982   PetscErrorCode ierr;
983 
984   PetscFunctionBegin;
985   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
986   PetscFunctionReturn(0);
987 }
988 
989 #undef __FUNCT__
990 #define __FUNCT__ "MatAXPY_MPIDense"
991 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
992 {
993   PetscErrorCode ierr;
994   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
995 
996   PetscFunctionBegin;
997   ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr);
998   PetscFunctionReturn(0);
999 }
1000 
1001 #undef __FUNCT__
1002 #define __FUNCT__ "MatConjugate_MPIDense"
1003 PetscErrorCode  MatConjugate_MPIDense(Mat mat)
1004 {
1005   Mat_MPIDense   *a = (Mat_MPIDense *)mat->data;
1006   PetscErrorCode ierr;
1007 
1008   PetscFunctionBegin;
1009   ierr = MatConjugate(a->A);CHKERRQ(ierr);
1010   PetscFunctionReturn(0);
1011 }
1012 
1013 #undef __FUNCT__
1014 #define __FUNCT__ "MatRealPart_MPIDense"
1015 PetscErrorCode MatRealPart_MPIDense(Mat A)
1016 {
1017   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1018   PetscErrorCode ierr;
1019 
1020   PetscFunctionBegin;
1021   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1022   PetscFunctionReturn(0);
1023 }
1024 
1025 #undef __FUNCT__
1026 #define __FUNCT__ "MatImaginaryPart_MPIDense"
1027 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1028 {
1029   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1030   PetscErrorCode ierr;
1031 
1032   PetscFunctionBegin;
1033   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1034   PetscFunctionReturn(0);
1035 }
1036 
1037 extern PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*);
1038 #undef __FUNCT__
1039 #define __FUNCT__ "MatGetColumnNorms_MPIDense"
1040 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms)
1041 {
1042   PetscErrorCode ierr;
1043   PetscInt       i,n;
1044   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
1045   PetscReal      *work;
1046 
1047   PetscFunctionBegin;
1048   ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr);
1049   ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr);
1050   ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr);
1051   if (type == NORM_2) {
1052     for (i=0; i<n; i++) work[i] *= work[i];
1053   }
1054   if (type == NORM_INFINITY) {
1055     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
1056   } else {
1057     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
1058   }
1059   ierr = PetscFree(work);CHKERRQ(ierr);
1060   if (type == NORM_2) {
1061     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
1062   }
1063   PetscFunctionReturn(0);
1064 }
1065 
1066 #undef __FUNCT__
1067 #define __FUNCT__ "MatSetRandom_MPIDense"
1068 static PetscErrorCode  MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1069 {
1070   Mat_MPIDense   *d = (Mat_MPIDense*)x->data;
1071   PetscErrorCode ierr;
1072   PetscScalar    *a;
1073   PetscInt       m,n,i;
1074 
1075   PetscFunctionBegin;
1076   ierr = MatGetSize(d->A,&m,&n);CHKERRQ(ierr);
1077   ierr = MatDenseGetArray(d->A,&a);CHKERRQ(ierr);
1078   for (i=0; i<m*n; i++) {
1079     ierr = PetscRandomGetValue(rctx,a+i);CHKERRQ(ierr);
1080   }
1081   ierr = MatDenseRestoreArray(d->A,&a);CHKERRQ(ierr);
1082   PetscFunctionReturn(0);
1083 }
1084 
1085 /* -------------------------------------------------------------------*/
1086 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1087                                         MatGetRow_MPIDense,
1088                                         MatRestoreRow_MPIDense,
1089                                         MatMult_MPIDense,
1090                                 /*  4*/ MatMultAdd_MPIDense,
1091                                         MatMultTranspose_MPIDense,
1092                                         MatMultTransposeAdd_MPIDense,
1093                                         0,
1094                                         0,
1095                                         0,
1096                                 /* 10*/ 0,
1097                                         0,
1098                                         0,
1099                                         0,
1100                                         MatTranspose_MPIDense,
1101                                 /* 15*/ MatGetInfo_MPIDense,
1102                                         MatEqual_MPIDense,
1103                                         MatGetDiagonal_MPIDense,
1104                                         MatDiagonalScale_MPIDense,
1105                                         MatNorm_MPIDense,
1106                                 /* 20*/ MatAssemblyBegin_MPIDense,
1107                                         MatAssemblyEnd_MPIDense,
1108                                         MatSetOption_MPIDense,
1109                                         MatZeroEntries_MPIDense,
1110                                 /* 24*/ MatZeroRows_MPIDense,
1111                                         0,
1112                                         0,
1113                                         0,
1114                                         0,
1115                                 /* 29*/ MatSetUp_MPIDense,
1116                                         0,
1117                                         0,
1118                                         0,
1119                                         0,
1120                                 /* 34*/ MatDuplicate_MPIDense,
1121                                         0,
1122                                         0,
1123                                         0,
1124                                         0,
1125                                 /* 39*/ MatAXPY_MPIDense,
1126                                         MatGetSubMatrices_MPIDense,
1127                                         0,
1128                                         MatGetValues_MPIDense,
1129                                         0,
1130                                 /* 44*/ 0,
1131                                         MatScale_MPIDense,
1132                                         0,
1133                                         0,
1134                                         0,
1135                                 /* 49*/ MatSetRandom_MPIDense,
1136                                         0,
1137                                         0,
1138                                         0,
1139                                         0,
1140                                 /* 54*/ 0,
1141                                         0,
1142                                         0,
1143                                         0,
1144                                         0,
1145                                 /* 59*/ MatGetSubMatrix_MPIDense,
1146                                         MatDestroy_MPIDense,
1147                                         MatView_MPIDense,
1148                                         0,
1149                                         0,
1150                                 /* 64*/ 0,
1151                                         0,
1152                                         0,
1153                                         0,
1154                                         0,
1155                                 /* 69*/ 0,
1156                                         0,
1157                                         0,
1158                                         0,
1159                                         0,
1160                                 /* 74*/ 0,
1161                                         0,
1162                                         0,
1163                                         0,
1164                                         0,
1165                                 /* 79*/ 0,
1166                                         0,
1167                                         0,
1168                                         0,
1169                                 /* 83*/ MatLoad_MPIDense,
1170                                         0,
1171                                         0,
1172                                         0,
1173                                         0,
1174                                         0,
1175                                 /* 89*/
1176                                         0,
1177                                         0,
1178                                         0,
1179                                         0,
1180                                         0,
1181                                 /* 94*/ 0,
1182                                         0,
1183                                         0,
1184                                         0,
1185                                         0,
1186                                 /* 99*/ 0,
1187                                         0,
1188                                         0,
1189                                         MatConjugate_MPIDense,
1190                                         0,
1191                                 /*104*/ 0,
1192                                         MatRealPart_MPIDense,
1193                                         MatImaginaryPart_MPIDense,
1194                                         0,
1195                                         0,
1196                                 /*109*/ 0,
1197                                         0,
1198                                         0,
1199                                         0,
1200                                         0,
1201                                 /*114*/ 0,
1202                                         0,
1203                                         0,
1204                                         0,
1205                                         0,
1206                                 /*119*/ 0,
1207                                         0,
1208                                         0,
1209                                         0,
1210                                         0,
1211                                 /*124*/ 0,
1212                                         MatGetColumnNorms_MPIDense
1213 };
1214 
1215 EXTERN_C_BEGIN
1216 #undef __FUNCT__
1217 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense"
1218 PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1219 {
1220   Mat_MPIDense   *a;
1221   PetscErrorCode ierr;
1222 
1223   PetscFunctionBegin;
1224   mat->preallocated = PETSC_TRUE;
1225   /* Note:  For now, when data is specified above, this assumes the user correctly
1226    allocates the local dense storage space.  We should add error checking. */
1227 
1228   a    = (Mat_MPIDense*)mat->data;
1229   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
1230   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
1231   a->nvec = mat->cmap->n;
1232 
1233   ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1234   ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr);
1235   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
1236   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1237   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1238   PetscFunctionReturn(0);
1239 }
1240 EXTERN_C_END
1241 
1242 EXTERN_C_BEGIN
1243 #undef __FUNCT__
1244 #define __FUNCT__ "MatCreate_MPIDense"
1245 PetscErrorCode  MatCreate_MPIDense(Mat mat)
1246 {
1247   Mat_MPIDense   *a;
1248   PetscErrorCode ierr;
1249 
1250   PetscFunctionBegin;
1251   ierr              = PetscNewLog(mat,Mat_MPIDense,&a);CHKERRQ(ierr);
1252   mat->data         = (void*)a;
1253   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1254 
1255   mat->insertmode = NOT_SET_VALUES;
1256   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&a->rank);CHKERRQ(ierr);
1257   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&a->size);CHKERRQ(ierr);
1258 
1259   /* build cache for off array entries formed */
1260   a->donotstash = PETSC_FALSE;
1261   ierr = MatStashCreate_Private(((PetscObject)mat)->comm,1,&mat->stash);CHKERRQ(ierr);
1262 
1263   /* stuff used for matrix vector multiply */
1264   a->lvec        = 0;
1265   a->Mvctx       = 0;
1266   a->roworiented = PETSC_TRUE;
1267 
1268   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatDenseGetArray_C","MatDenseGetArray_MPIDense",MatDenseGetArray_MPIDense);CHKERRQ(ierr);
1269   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatDenseRestoreArray_C","MatDenseRestoreArray_MPIDense",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr);
1270 
1271   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
1272                                            "MatGetDiagonalBlock_MPIDense",
1273                                             MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr);
1274   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C",
1275                                            "MatMPIDenseSetPreallocation_MPIDense",
1276                                             MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1277   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",
1278                                            "MatMatMult_MPIAIJ_MPIDense",
1279                                             MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1280   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",
1281                                             "MatMatMultSymbolic_MPIAIJ_MPIDense",
1282                                              MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1283   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",
1284                                            "MatMatMultNumeric_MPIAIJ_MPIDense",
1285                                             MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1286   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1287 
1288   PetscFunctionReturn(0);
1289 }
1290 EXTERN_C_END
1291 
1292 /*MC
1293    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1294 
1295    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1296    and MATMPIDENSE otherwise.
1297 
1298    Options Database Keys:
1299 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1300 
1301   Level: beginner
1302 
1303 
1304 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1305 M*/
1306 
1307 #undef __FUNCT__
1308 #define __FUNCT__ "MatMPIDenseSetPreallocation"
1309 /*@C
1310    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1311 
1312    Not collective
1313 
1314    Input Parameters:
1315 .  A - the matrix
1316 -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
1317    to control all matrix memory allocation.
1318 
1319    Notes:
1320    The dense format is fully compatible with standard Fortran 77
1321    storage by columns.
1322 
1323    The data input variable is intended primarily for Fortran programmers
1324    who wish to allocate their own matrix memory space.  Most users should
1325    set data=PETSC_NULL.
1326 
1327    Level: intermediate
1328 
1329 .keywords: matrix,dense, parallel
1330 
1331 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1332 @*/
1333 PetscErrorCode  MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data)
1334 {
1335   PetscErrorCode ierr;
1336 
1337   PetscFunctionBegin;
1338   ierr = PetscTryMethod(mat,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar *),(mat,data));CHKERRQ(ierr);
1339   PetscFunctionReturn(0);
1340 }
1341 
1342 #undef __FUNCT__
1343 #define __FUNCT__ "MatCreateDense"
1344 /*@C
1345    MatCreateDense - Creates a parallel matrix in dense format.
1346 
1347    Collective on MPI_Comm
1348 
1349    Input Parameters:
1350 +  comm - MPI communicator
1351 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1352 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1353 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1354 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1355 -  data - optional location of matrix data.  Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1356    to control all matrix memory allocation.
1357 
1358    Output Parameter:
1359 .  A - the matrix
1360 
1361    Notes:
1362    The dense format is fully compatible with standard Fortran 77
1363    storage by columns.
1364 
1365    The data input variable is intended primarily for Fortran programmers
1366    who wish to allocate their own matrix memory space.  Most users should
1367    set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users).
1368 
1369    The user MUST specify either the local or global matrix dimensions
1370    (possibly both).
1371 
1372    Level: intermediate
1373 
1374 .keywords: matrix,dense, parallel
1375 
1376 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1377 @*/
1378 PetscErrorCode  MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1379 {
1380   PetscErrorCode ierr;
1381   PetscMPIInt    size;
1382 
1383   PetscFunctionBegin;
1384   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1385   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1386   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1387   if (size > 1) {
1388     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1389     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1390   } else {
1391     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1392     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1393   }
1394   PetscFunctionReturn(0);
1395 }
1396 
1397 #undef __FUNCT__
1398 #define __FUNCT__ "MatDuplicate_MPIDense"
1399 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1400 {
1401   Mat            mat;
1402   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
1403   PetscErrorCode ierr;
1404 
1405   PetscFunctionBegin;
1406   *newmat       = 0;
1407   ierr = MatCreate(((PetscObject)A)->comm,&mat);CHKERRQ(ierr);
1408   ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1409   ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1410   a                 = (Mat_MPIDense*)mat->data;
1411   ierr              = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1412 
1413   mat->factortype   = A->factortype;
1414   mat->assembled    = PETSC_TRUE;
1415   mat->preallocated = PETSC_TRUE;
1416 
1417   a->size           = oldmat->size;
1418   a->rank           = oldmat->rank;
1419   mat->insertmode   = NOT_SET_VALUES;
1420   a->nvec           = oldmat->nvec;
1421   a->donotstash     = oldmat->donotstash;
1422 
1423   ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr);
1424   ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr);
1425 
1426   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1427   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1428   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1429 
1430   *newmat = mat;
1431   PetscFunctionReturn(0);
1432 }
1433 
1434 #undef __FUNCT__
1435 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile"
1436 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat,PetscInt sizesset)
1437 {
1438   PetscErrorCode ierr;
1439   PetscMPIInt    rank,size;
1440   PetscInt       *rowners,i,m,nz,j;
1441   PetscScalar    *array,*vals,*vals_ptr;
1442 
1443   PetscFunctionBegin;
1444   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1445   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1446 
1447   /* determine ownership of all rows */
1448   if (newmat->rmap->n < 0) m          = M/size + ((M % size) > rank);
1449   else m = newmat->rmap->n;
1450   ierr       = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1451   ierr       = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
1452   rowners[0] = 0;
1453   for (i=2; i<=size; i++) {
1454     rowners[i] += rowners[i-1];
1455   }
1456 
1457   if (!sizesset) {
1458     ierr = MatSetSizes(newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1459   }
1460   ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr);
1461   ierr = MatDenseGetArray(newmat,&array);CHKERRQ(ierr);
1462 
1463   if (!rank) {
1464     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1465 
1466     /* read in my part of the matrix numerical values  */
1467     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1468 
1469     /* insert into matrix-by row (this is why cannot directly read into array */
1470     vals_ptr = vals;
1471     for (i=0; i<m; i++) {
1472       for (j=0; j<N; j++) {
1473         array[i + j*m] = *vals_ptr++;
1474       }
1475     }
1476 
1477     /* read in other processors and ship out */
1478     for (i=1; i<size; i++) {
1479       nz   = (rowners[i+1] - rowners[i])*N;
1480       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1481       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr);
1482     }
1483   } else {
1484     /* receive numeric values */
1485     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1486 
1487     /* receive message of values*/
1488     ierr = MPIULong_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr);
1489 
1490     /* insert into matrix-by row (this is why cannot directly read into array */
1491     vals_ptr = vals;
1492     for (i=0; i<m; i++) {
1493       for (j=0; j<N; j++) {
1494         array[i + j*m] = *vals_ptr++;
1495       }
1496     }
1497   }
1498   ierr = MatDenseRestoreArray(newmat,&array);CHKERRQ(ierr);
1499   ierr = PetscFree(rowners);CHKERRQ(ierr);
1500   ierr = PetscFree(vals);CHKERRQ(ierr);
1501   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1502   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1503   PetscFunctionReturn(0);
1504 }
1505 
1506 #undef __FUNCT__
1507 #define __FUNCT__ "MatLoad_MPIDense"
1508 PetscErrorCode MatLoad_MPIDense(Mat newmat,PetscViewer viewer)
1509 {
1510   PetscScalar    *vals,*svals;
1511   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1512   MPI_Status     status;
1513   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz;
1514   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1515   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1516   PetscInt       i,nz,j,rstart,rend,sizesset=1,grows,gcols;
1517   int            fd;
1518   PetscErrorCode ierr;
1519 
1520   PetscFunctionBegin;
1521   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1522   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1523   if (!rank) {
1524     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1525     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1526     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1527   }
1528   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
1529 
1530   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1531   M = header[1]; N = header[2]; nz = header[3];
1532 
1533   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
1534   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
1535   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
1536 
1537   /* If global sizes are set, check if they are consistent with that given in the file */
1538   if (sizesset) {
1539     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
1540   }
1541   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
1542   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
1543 
1544   /*
1545        Handle case where matrix is stored on disk as a dense matrix
1546   */
1547   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1548     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat,sizesset);CHKERRQ(ierr);
1549     PetscFunctionReturn(0);
1550   }
1551 
1552   /* determine ownership of all rows */
1553   if (newmat->rmap->n < 0) m          = PetscMPIIntCast(M/size + ((M % size) > rank));
1554   else m = PetscMPIIntCast(newmat->rmap->n);
1555   ierr       = PetscMalloc((size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr);
1556   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1557   rowners[0] = 0;
1558   for (i=2; i<=size; i++) {
1559     rowners[i] += rowners[i-1];
1560   }
1561   rstart = rowners[rank];
1562   rend   = rowners[rank+1];
1563 
1564   /* distribute row lengths to all processors */
1565   ierr    = PetscMalloc2(rend-rstart,PetscInt,&ourlens,rend-rstart,PetscInt,&offlens);CHKERRQ(ierr);
1566   if (!rank) {
1567     ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
1568     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1569     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
1570     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1571     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1572     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1573   } else {
1574     ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1575   }
1576 
1577   if (!rank) {
1578     /* calculate the number of nonzeros on each processor */
1579     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
1580     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1581     for (i=0; i<size; i++) {
1582       for (j=rowners[i]; j< rowners[i+1]; j++) {
1583         procsnz[i] += rowlengths[j];
1584       }
1585     }
1586     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1587 
1588     /* determine max buffer needed and allocate it */
1589     maxnz = 0;
1590     for (i=0; i<size; i++) {
1591       maxnz = PetscMax(maxnz,procsnz[i]);
1592     }
1593     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1594 
1595     /* read in my part of the matrix column indices  */
1596     nz = procsnz[0];
1597     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1598     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1599 
1600     /* read in every one elses and ship off */
1601     for (i=1; i<size; i++) {
1602       nz   = procsnz[i];
1603       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1604       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1605     }
1606     ierr = PetscFree(cols);CHKERRQ(ierr);
1607   } else {
1608     /* determine buffer space needed for message */
1609     nz = 0;
1610     for (i=0; i<m; i++) {
1611       nz += ourlens[i];
1612     }
1613     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1614 
1615     /* receive message of column indices*/
1616     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1617     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1618     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1619   }
1620 
1621   /* loop over local rows, determining number of off diagonal entries */
1622   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
1623   jj = 0;
1624   for (i=0; i<m; i++) {
1625     for (j=0; j<ourlens[i]; j++) {
1626       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1627       jj++;
1628     }
1629   }
1630 
1631   /* create our matrix */
1632   for (i=0; i<m; i++) {
1633     ourlens[i] -= offlens[i];
1634   }
1635 
1636   if (!sizesset) {
1637     ierr = MatSetSizes(newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1638   }
1639   ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr);
1640   for (i=0; i<m; i++) {
1641     ourlens[i] += offlens[i];
1642   }
1643 
1644   if (!rank) {
1645     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1646 
1647     /* read in my part of the matrix numerical values  */
1648     nz = procsnz[0];
1649     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1650 
1651     /* insert into matrix */
1652     jj      = rstart;
1653     smycols = mycols;
1654     svals   = vals;
1655     for (i=0; i<m; i++) {
1656       ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1657       smycols += ourlens[i];
1658       svals   += ourlens[i];
1659       jj++;
1660     }
1661 
1662     /* read in other processors and ship out */
1663     for (i=1; i<size; i++) {
1664       nz   = procsnz[i];
1665       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1666       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
1667     }
1668     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1669   } else {
1670     /* receive numeric values */
1671     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1672 
1673     /* receive message of values*/
1674     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
1675     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1676     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1677 
1678     /* insert into matrix */
1679     jj      = rstart;
1680     smycols = mycols;
1681     svals   = vals;
1682     for (i=0; i<m; i++) {
1683       ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1684       smycols += ourlens[i];
1685       svals   += ourlens[i];
1686       jj++;
1687     }
1688   }
1689   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
1690   ierr = PetscFree(vals);CHKERRQ(ierr);
1691   ierr = PetscFree(mycols);CHKERRQ(ierr);
1692   ierr = PetscFree(rowners);CHKERRQ(ierr);
1693 
1694   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1695   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1696   PetscFunctionReturn(0);
1697 }
1698 
1699 #undef __FUNCT__
1700 #define __FUNCT__ "MatEqual_MPIDense"
1701 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool  *flag)
1702 {
1703   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
1704   Mat            a,b;
1705   PetscBool      flg;
1706   PetscErrorCode ierr;
1707 
1708   PetscFunctionBegin;
1709   a = matA->A;
1710   b = matB->A;
1711   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
1712   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1713   PetscFunctionReturn(0);
1714 }
1715 
1716