xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 009bbdc485cd9ad46be9940d3549e2dde9cdc322)
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4 */
5 
6 
7 #include <../src/mat/impls/dense/mpi/mpidense.h>    /*I   "petscmat.h"  I*/
8 
9 #undef __FUNCT__
10 #define __FUNCT__ "MatDenseGetLocalMatrix"
11 /*@
12 
13       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
14               matrix that represents the operator. For sequential matrices it returns itself.
15 
16     Input Parameter:
17 .      A - the Seq or MPI dense matrix
18 
19     Output Parameter:
20 .      B - the inner matrix
21 
22     Level: intermediate
23 
24 @*/
25 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
26 {
27   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
28   PetscErrorCode ierr;
29   PetscBool      flg;
30 
31   PetscFunctionBegin;
32   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
33   if (flg) {
34     *B = mat->A;
35   } else {
36     *B = A;
37   }
38   PetscFunctionReturn(0);
39 }
40 
41 #undef __FUNCT__
42 #define __FUNCT__ "MatGetRow_MPIDense"
43 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
44 {
45   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
46   PetscErrorCode ierr;
47   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
48 
49   PetscFunctionBegin;
50   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
51   lrow = row - rstart;
52   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr);
53   PetscFunctionReturn(0);
54 }
55 
56 #undef __FUNCT__
57 #define __FUNCT__ "MatRestoreRow_MPIDense"
58 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
59 {
60   PetscErrorCode ierr;
61 
62   PetscFunctionBegin;
63   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
64   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
65   PetscFunctionReturn(0);
66 }
67 
68 EXTERN_C_BEGIN
69 #undef __FUNCT__
70 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense"
71 PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
72 {
73   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
74   PetscErrorCode ierr;
75   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
76   PetscScalar    *array;
77   MPI_Comm       comm;
78   Mat            B;
79 
80   PetscFunctionBegin;
81   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
82 
83   ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr);
84   if (!B) {
85     ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
86     ierr = MatCreate(comm,&B);CHKERRQ(ierr);
87     ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr);
88     ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
89     ierr = MatDenseGetArray(mdn->A,&array);CHKERRQ(ierr);
90     ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr);
91     ierr = MatDenseRestoreArray(mdn->A,&array);CHKERRQ(ierr);
92     ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
93     ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
94     ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr);
95     *a = B;
96     ierr = MatDestroy(&B);CHKERRQ(ierr);
97   } else {
98     *a = B;
99   }
100   PetscFunctionReturn(0);
101 }
102 EXTERN_C_END
103 
104 #undef __FUNCT__
105 #define __FUNCT__ "MatSetValues_MPIDense"
106 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
107 {
108   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
109   PetscErrorCode ierr;
110   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
111   PetscBool      roworiented = A->roworiented;
112 
113   PetscFunctionBegin;
114   if (v) PetscValidScalarPointer(v,6);
115   for (i=0; i<m; i++) {
116     if (idxm[i] < 0) continue;
117     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
118     if (idxm[i] >= rstart && idxm[i] < rend) {
119       row = idxm[i] - rstart;
120       if (roworiented) {
121         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
122       } else {
123         for (j=0; j<n; j++) {
124           if (idxn[j] < 0) continue;
125           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
126           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
127         }
128       }
129     } else {
130       if (!A->donotstash) {
131         mat->assembled = PETSC_FALSE;
132         if (roworiented) {
133           ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
134         } else {
135           ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
136         }
137       }
138     }
139   }
140   PetscFunctionReturn(0);
141 }
142 
143 #undef __FUNCT__
144 #define __FUNCT__ "MatGetValues_MPIDense"
145 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
146 {
147   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
148   PetscErrorCode ierr;
149   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
150 
151   PetscFunctionBegin;
152   for (i=0; i<m; i++) {
153     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
154     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
155     if (idxm[i] >= rstart && idxm[i] < rend) {
156       row = idxm[i] - rstart;
157       for (j=0; j<n; j++) {
158         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
159         if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
160         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
161       }
162     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
163   }
164   PetscFunctionReturn(0);
165 }
166 
167 #undef __FUNCT__
168 #define __FUNCT__ "MatDenseGetArray_MPIDense"
169 PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[])
170 {
171   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
172   PetscErrorCode ierr;
173 
174   PetscFunctionBegin;
175   ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr);
176   PetscFunctionReturn(0);
177 }
178 
179 #undef __FUNCT__
180 #define __FUNCT__ "MatGetSubMatrix_MPIDense"
181 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
182 {
183   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data,*newmatd;
184   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
185   PetscErrorCode ierr;
186   PetscInt       i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
187   const PetscInt *irow,*icol;
188   PetscScalar    *av,*bv,*v = lmat->v;
189   Mat            newmat;
190   IS             iscol_local;
191 
192   PetscFunctionBegin;
193   ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
194   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
195   ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr);
196   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
197   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
198   ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */
199 
200   /* No parallel redistribution currently supported! Should really check each index set
201      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
202      original matrix! */
203 
204   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
205   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
206 
207   /* Check submatrix call */
208   if (scall == MAT_REUSE_MATRIX) {
209     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
210     /* Really need to test rows and column sizes! */
211     newmat = *B;
212   } else {
213     /* Create and fill new matrix */
214     ierr = MatCreate(((PetscObject)A)->comm,&newmat);CHKERRQ(ierr);
215     ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
216     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
217     ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr);
218   }
219 
220   /* Now extract the data pointers and do the copy, column at a time */
221   newmatd = (Mat_MPIDense*)newmat->data;
222   bv      = ((Mat_SeqDense *)newmatd->A->data)->v;
223 
224   for (i=0; i<Ncols; i++) {
225     av = v + ((Mat_SeqDense *)mat->A->data)->lda*icol[i];
226     for (j=0; j<nrows; j++) {
227       *bv++ = av[irow[j] - rstart];
228     }
229   }
230 
231   /* Assemble the matrices so that the correct flags are set */
232   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
233   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
234 
235   /* Free work space */
236   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
237   ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr);
238   ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
239   *B = newmat;
240   PetscFunctionReturn(0);
241 }
242 
243 #undef __FUNCT__
244 #define __FUNCT__ "MatDenseRestoreArray_MPIDense"
245 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[])
246 {
247   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
248   PetscErrorCode ierr;
249 
250   PetscFunctionBegin;
251   ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr);
252   PetscFunctionReturn(0);
253 }
254 
255 #undef __FUNCT__
256 #define __FUNCT__ "MatAssemblyBegin_MPIDense"
257 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
258 {
259   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
260   MPI_Comm       comm = ((PetscObject)mat)->comm;
261   PetscErrorCode ierr;
262   PetscInt       nstash,reallocs;
263   InsertMode     addv;
264 
265   PetscFunctionBegin;
266   /* make sure all processors are either in INSERTMODE or ADDMODE */
267   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,comm);CHKERRQ(ierr);
268   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
269   mat->insertmode = addv; /* in case this processor had no cache */
270 
271   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
272   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
273   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
274   PetscFunctionReturn(0);
275 }
276 
277 #undef __FUNCT__
278 #define __FUNCT__ "MatAssemblyEnd_MPIDense"
279 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
280 {
281   Mat_MPIDense    *mdn=(Mat_MPIDense*)mat->data;
282   PetscErrorCode  ierr;
283   PetscInt        i,*row,*col,flg,j,rstart,ncols;
284   PetscMPIInt     n;
285   PetscScalar     *val;
286   InsertMode      addv=mat->insertmode;
287 
288   PetscFunctionBegin;
289   /*  wait on receives */
290   while (1) {
291     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
292     if (!flg) break;
293 
294     for (i=0; i<n;) {
295       /* Now identify the consecutive vals belonging to the same row */
296       for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
297       if (j < n) ncols = j-i;
298       else       ncols = n-i;
299       /* Now assemble all these values with a single function call */
300       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
301       i = j;
302     }
303   }
304   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
305 
306   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
307   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
308 
309   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
310     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
311   }
312   PetscFunctionReturn(0);
313 }
314 
315 #undef __FUNCT__
316 #define __FUNCT__ "MatZeroEntries_MPIDense"
317 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
318 {
319   PetscErrorCode ierr;
320   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
321 
322   PetscFunctionBegin;
323   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
324   PetscFunctionReturn(0);
325 }
326 
327 /* the code does not do the diagonal entries correctly unless the
328    matrix is square and the column and row owerships are identical.
329    This is a BUG. The only way to fix it seems to be to access
330    mdn->A and mdn->B directly and not through the MatZeroRows()
331    routine.
332 */
333 #undef __FUNCT__
334 #define __FUNCT__ "MatZeroRows_MPIDense"
335 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
336 {
337   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
338   PetscErrorCode    ierr;
339   PetscInt          i,*owners = A->rmap->range;
340   PetscInt          *nprocs,j,idx,nsends;
341   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
342   PetscInt          *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source;
343   PetscInt          *lens,*lrows,*values;
344   PetscMPIInt       n,imdex,rank = l->rank,size = l->size;
345   MPI_Comm          comm = ((PetscObject)A)->comm;
346   MPI_Request       *send_waits,*recv_waits;
347   MPI_Status        recv_status,*send_status;
348   PetscBool         found;
349   const PetscScalar *xx;
350   PetscScalar       *bb;
351 
352   PetscFunctionBegin;
353   if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Only handles square matrices");
354   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership");
355   /*  first count number of contributors to each processor */
356   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
357   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
358   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
359   for (i=0; i<N; i++) {
360     idx = rows[i];
361     found = PETSC_FALSE;
362     for (j=0; j<size; j++) {
363       if (idx >= owners[j] && idx < owners[j+1]) {
364         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
365       }
366     }
367     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
368   }
369   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
370 
371   /* inform other processors of number of messages and max length*/
372   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
373 
374   /* post receives:   */
375   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
376   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
377   for (i=0; i<nrecvs; i++) {
378     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
379   }
380 
381   /* do sends:
382       1) starts[i] gives the starting index in svalues for stuff going to
383          the ith processor
384   */
385   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
386   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
387   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
388   starts[0]  = 0;
389   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
390   for (i=0; i<N; i++) {
391     svalues[starts[owner[i]]++] = rows[i];
392   }
393 
394   starts[0] = 0;
395   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
396   count = 0;
397   for (i=0; i<size; i++) {
398     if (nprocs[2*i+1]) {
399       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
400     }
401   }
402   ierr = PetscFree(starts);CHKERRQ(ierr);
403 
404   base = owners[rank];
405 
406   /*  wait on receives */
407   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
408   count  = nrecvs;
409   slen   = 0;
410   while (count) {
411     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
412     /* unpack receives into our local space */
413     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
414     source[imdex]  = recv_status.MPI_SOURCE;
415     lens[imdex]    = n;
416     slen += n;
417     count--;
418   }
419   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
420 
421   /* move the data into the send scatter */
422   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
423   count = 0;
424   for (i=0; i<nrecvs; i++) {
425     values = rvalues + i*nmax;
426     for (j=0; j<lens[i]; j++) {
427       lrows[count++] = values[j] - base;
428     }
429   }
430   ierr = PetscFree(rvalues);CHKERRQ(ierr);
431   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
432   ierr = PetscFree(owner);CHKERRQ(ierr);
433   ierr = PetscFree(nprocs);CHKERRQ(ierr);
434 
435   /* fix right hand side if needed */
436   if (x && b) {
437     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
438     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
439     for (i=0; i<slen; i++) {
440       bb[lrows[i]] = diag*xx[lrows[i]];
441     }
442     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
443     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
444   }
445 
446   /* actually zap the local rows */
447   ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
448   if (diag != 0.0) {
449     Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data;
450     PetscInt      m = ll->lda, i;
451 
452     for (i=0; i<slen; i++) {
453       ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag;
454     }
455   }
456   ierr = PetscFree(lrows);CHKERRQ(ierr);
457 
458   /* wait on sends */
459   if (nsends) {
460     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
461     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
462     ierr = PetscFree(send_status);CHKERRQ(ierr);
463   }
464   ierr = PetscFree(send_waits);CHKERRQ(ierr);
465   ierr = PetscFree(svalues);CHKERRQ(ierr);
466   PetscFunctionReturn(0);
467 }
468 
469 #undef __FUNCT__
470 #define __FUNCT__ "MatMult_MPIDense"
471 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
472 {
473   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
474   PetscErrorCode ierr;
475 
476   PetscFunctionBegin;
477   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
478   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
479   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
480   PetscFunctionReturn(0);
481 }
482 
483 #undef __FUNCT__
484 #define __FUNCT__ "MatMultAdd_MPIDense"
485 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
486 {
487   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
488   PetscErrorCode ierr;
489 
490   PetscFunctionBegin;
491   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
492   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
493   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
494   PetscFunctionReturn(0);
495 }
496 
497 #undef __FUNCT__
498 #define __FUNCT__ "MatMultTranspose_MPIDense"
499 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
500 {
501   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
502   PetscErrorCode ierr;
503   PetscScalar    zero = 0.0;
504 
505   PetscFunctionBegin;
506   ierr = VecSet(yy,zero);CHKERRQ(ierr);
507   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
508   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
509   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
510   PetscFunctionReturn(0);
511 }
512 
513 #undef __FUNCT__
514 #define __FUNCT__ "MatMultTransposeAdd_MPIDense"
515 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
516 {
517   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
518   PetscErrorCode ierr;
519 
520   PetscFunctionBegin;
521   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
522   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
523   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
524   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
525   PetscFunctionReturn(0);
526 }
527 
528 #undef __FUNCT__
529 #define __FUNCT__ "MatGetDiagonal_MPIDense"
530 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
531 {
532   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
533   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
534   PetscErrorCode ierr;
535   PetscInt       len,i,n,m = A->rmap->n,radd;
536   PetscScalar    *x,zero = 0.0;
537 
538   PetscFunctionBegin;
539   ierr = VecSet(v,zero);CHKERRQ(ierr);
540   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
541   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
542   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
543   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
544   radd = A->rmap->rstart*m;
545   for (i=0; i<len; i++) {
546     x[i] = aloc->v[radd + i*m + i];
547   }
548   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
549   PetscFunctionReturn(0);
550 }
551 
552 #undef __FUNCT__
553 #define __FUNCT__ "MatDestroy_MPIDense"
554 PetscErrorCode MatDestroy_MPIDense(Mat mat)
555 {
556   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
557   PetscErrorCode ierr;
558 
559   PetscFunctionBegin;
560 #if defined(PETSC_USE_LOG)
561   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
562 #endif
563   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
564   ierr = MatDestroy(&mdn->A);CHKERRQ(ierr);
565   ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr);
566   ierr = VecScatterDestroy(&mdn->Mvctx);CHKERRQ(ierr);
567 
568   ierr = PetscFree(mat->data);CHKERRQ(ierr);
569   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
570   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
571   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
572   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
573   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
574   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
575   PetscFunctionReturn(0);
576 }
577 
578 #undef __FUNCT__
579 #define __FUNCT__ "MatView_MPIDense_Binary"
580 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
581 {
582   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
583   PetscErrorCode    ierr;
584   PetscViewerFormat format;
585   int               fd;
586   PetscInt          header[4],mmax,N = mat->cmap->N,i,j,m,k;
587   PetscMPIInt       rank,tag  = ((PetscObject)viewer)->tag,size;
588   PetscScalar       *work,*v,*vv;
589   Mat_SeqDense      *a = (Mat_SeqDense*)mdn->A->data;
590 
591   PetscFunctionBegin;
592   if (mdn->size == 1) {
593     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
594   } else {
595     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
596     ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
597     ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
598 
599     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
600     if (format == PETSC_VIEWER_NATIVE) {
601 
602       if (!rank) {
603         /* store the matrix as a dense matrix */
604         header[0] = MAT_FILE_CLASSID;
605         header[1] = mat->rmap->N;
606         header[2] = N;
607         header[3] = MATRIX_BINARY_FORMAT_DENSE;
608         ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
609 
610         /* get largest work array needed for transposing array */
611         mmax = mat->rmap->n;
612         for (i=1; i<size; i++) {
613           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
614         }
615         ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&work);CHKERRQ(ierr);
616 
617         /* write out local array, by rows */
618         m    = mat->rmap->n;
619         v    = a->v;
620         for (j=0; j<N; j++) {
621           for (i=0; i<m; i++) {
622             work[j + i*N] = *v++;
623           }
624         }
625         ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
626         /* get largest work array to receive messages from other processes, excludes process zero */
627         mmax = 0;
628         for (i=1; i<size; i++) {
629           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
630         }
631         ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&vv);CHKERRQ(ierr);
632         for (k = 1; k < size; k++) {
633           v    = vv;
634           m    = mat->rmap->range[k+1] - mat->rmap->range[k];
635           ierr = MPIULong_Recv(v,m*N,MPIU_SCALAR,k,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
636 
637           for (j = 0; j < N; j++) {
638             for (i = 0; i < m; i++) {
639               work[j + i*N] = *v++;
640             }
641           }
642           ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
643         }
644         ierr = PetscFree(work);CHKERRQ(ierr);
645         ierr = PetscFree(vv);CHKERRQ(ierr);
646       } else {
647         ierr = MPIULong_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
648       }
649     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerSetFormat(viewer,PETSC_VIEWER_NATIVE)");
650   }
651   PetscFunctionReturn(0);
652 }
653 
654 #undef __FUNCT__
655 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket"
656 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
657 {
658   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
659   PetscErrorCode    ierr;
660   PetscMPIInt       size = mdn->size,rank = mdn->rank;
661   PetscViewerType   vtype;
662   PetscBool         iascii,isdraw;
663   PetscViewer       sviewer;
664   PetscViewerFormat format;
665 
666   PetscFunctionBegin;
667   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
668   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
669   if (iascii) {
670     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
671     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
672     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
673       MatInfo info;
674       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
675       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
676       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
677       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
678       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
679       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
680       PetscFunctionReturn(0);
681     } else if (format == PETSC_VIEWER_ASCII_INFO) {
682       PetscFunctionReturn(0);
683     }
684   } else if (isdraw) {
685     PetscDraw  draw;
686     PetscBool  isnull;
687 
688     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
689     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
690     if (isnull) PetscFunctionReturn(0);
691   }
692 
693   if (size == 1) {
694     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
695   } else {
696     /* assemble the entire matrix onto first processor. */
697     Mat         A;
698     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
699     PetscInt    *cols;
700     PetscScalar *vals;
701 
702     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
703     if (!rank) {
704       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
705     } else {
706       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
707     }
708     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
709     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
710     ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL);CHKERRQ(ierr);
711     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
712 
713     /* Copy the matrix ... This isn't the most efficient means,
714        but it's quick for now */
715     A->insertmode = INSERT_VALUES;
716     row = mat->rmap->rstart; m = mdn->A->rmap->n;
717     for (i=0; i<m; i++) {
718       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
719       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
720       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
721       row++;
722     }
723 
724     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
725     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
726     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
727     if (!rank) {
728       ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
729       /* Set the type name to MATMPIDense so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqDense_ASCII()*/
730       PetscStrcpy(((PetscObject)((Mat_MPIDense*)(A->data))->A)->type_name,MATMPIDENSE);
731       ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
732     }
733     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
734     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
735     ierr = MatDestroy(&A);CHKERRQ(ierr);
736   }
737   PetscFunctionReturn(0);
738 }
739 
740 #undef __FUNCT__
741 #define __FUNCT__ "MatView_MPIDense"
742 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
743 {
744   PetscErrorCode ierr;
745   PetscBool      iascii,isbinary,isdraw,issocket;
746 
747   PetscFunctionBegin;
748   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
749   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
750   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
751   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
752 
753   if (iascii || issocket || isdraw) {
754     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
755   } else if (isbinary) {
756     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
757   }
758   PetscFunctionReturn(0);
759 }
760 
761 #undef __FUNCT__
762 #define __FUNCT__ "MatGetInfo_MPIDense"
763 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
764 {
765   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
766   Mat            mdn = mat->A;
767   PetscErrorCode ierr;
768   PetscReal      isend[5],irecv[5];
769 
770   PetscFunctionBegin;
771   info->block_size     = 1.0;
772   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
773   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
774   isend[3] = info->memory;  isend[4] = info->mallocs;
775   if (flag == MAT_LOCAL) {
776     info->nz_used      = isend[0];
777     info->nz_allocated = isend[1];
778     info->nz_unneeded  = isend[2];
779     info->memory       = isend[3];
780     info->mallocs      = isend[4];
781   } else if (flag == MAT_GLOBAL_MAX) {
782     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)A)->comm);CHKERRQ(ierr);
783     info->nz_used      = irecv[0];
784     info->nz_allocated = irecv[1];
785     info->nz_unneeded  = irecv[2];
786     info->memory       = irecv[3];
787     info->mallocs      = irecv[4];
788   } else if (flag == MAT_GLOBAL_SUM) {
789     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
790     info->nz_used      = irecv[0];
791     info->nz_allocated = irecv[1];
792     info->nz_unneeded  = irecv[2];
793     info->memory       = irecv[3];
794     info->mallocs      = irecv[4];
795   }
796   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
797   info->fill_ratio_needed = 0;
798   info->factor_mallocs    = 0;
799   PetscFunctionReturn(0);
800 }
801 
802 #undef __FUNCT__
803 #define __FUNCT__ "MatSetOption_MPIDense"
804 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool  flg)
805 {
806   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
807   PetscErrorCode ierr;
808 
809   PetscFunctionBegin;
810   switch (op) {
811   case MAT_NEW_NONZERO_LOCATIONS:
812   case MAT_NEW_NONZERO_LOCATION_ERR:
813   case MAT_NEW_NONZERO_ALLOCATION_ERR:
814     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
815     break;
816   case MAT_ROW_ORIENTED:
817     a->roworiented = flg;
818     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
819     break;
820   case MAT_NEW_DIAGONALS:
821   case MAT_KEEP_NONZERO_PATTERN:
822   case MAT_USE_HASH_TABLE:
823     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
824     break;
825   case MAT_IGNORE_OFF_PROC_ENTRIES:
826     a->donotstash = flg;
827     break;
828   case MAT_SYMMETRIC:
829   case MAT_STRUCTURALLY_SYMMETRIC:
830   case MAT_HERMITIAN:
831   case MAT_SYMMETRY_ETERNAL:
832   case MAT_IGNORE_LOWER_TRIANGULAR:
833     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
834     break;
835   default:
836     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
837   }
838   PetscFunctionReturn(0);
839 }
840 
841 
842 #undef __FUNCT__
843 #define __FUNCT__ "MatDiagonalScale_MPIDense"
844 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
845 {
846   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
847   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
848   PetscScalar    *l,*r,x,*v;
849   PetscErrorCode ierr;
850   PetscInt       i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n;
851 
852   PetscFunctionBegin;
853   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
854   if (ll) {
855     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
856     if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
857     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
858     for (i=0; i<m; i++) {
859       x = l[i];
860       v = mat->v + i;
861       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
862     }
863     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
864     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
865   }
866   if (rr) {
867     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
868     if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
869     ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
870     ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
871     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
872     for (i=0; i<n; i++) {
873       x = r[i];
874       v = mat->v + i*m;
875       for (j=0; j<m; j++) { (*v++) *= x;}
876     }
877     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
878     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
879   }
880   PetscFunctionReturn(0);
881 }
882 
883 #undef __FUNCT__
884 #define __FUNCT__ "MatNorm_MPIDense"
885 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
886 {
887   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
888   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
889   PetscErrorCode ierr;
890   PetscInt       i,j;
891   PetscReal      sum = 0.0;
892   PetscScalar    *v = mat->v;
893 
894   PetscFunctionBegin;
895   if (mdn->size == 1) {
896     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
897   } else {
898     if (type == NORM_FROBENIUS) {
899       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
900         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
901       }
902       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
903       *nrm = PetscSqrtReal(*nrm);
904       ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr);
905     } else if (type == NORM_1) {
906       PetscReal *tmp,*tmp2;
907       ierr = PetscMalloc2(A->cmap->N,PetscReal,&tmp,A->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr);
908       ierr = PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
909       ierr = PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
910       *nrm = 0.0;
911       v = mat->v;
912       for (j=0; j<mdn->A->cmap->n; j++) {
913         for (i=0; i<mdn->A->rmap->n; i++) {
914           tmp[j] += PetscAbsScalar(*v);  v++;
915         }
916       }
917       ierr = MPI_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
918       for (j=0; j<A->cmap->N; j++) {
919         if (tmp2[j] > *nrm) *nrm = tmp2[j];
920       }
921       ierr = PetscFree2(tmp,tmp);CHKERRQ(ierr);
922       ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr);
923     } else if (type == NORM_INFINITY) { /* max row norm */
924       PetscReal ntemp;
925       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
926       ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,((PetscObject)A)->comm);CHKERRQ(ierr);
927     } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"No support for two norm");
928   }
929   PetscFunctionReturn(0);
930 }
931 
932 #undef __FUNCT__
933 #define __FUNCT__ "MatTranspose_MPIDense"
934 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
935 {
936   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
937   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
938   Mat            B;
939   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
940   PetscErrorCode ierr;
941   PetscInt       j,i;
942   PetscScalar    *v;
943 
944   PetscFunctionBegin;
945   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports square matrix only in-place");
946   if (reuse == MAT_INITIAL_MATRIX || A == *matout) {
947     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
948     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
949     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
950     ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr);
951   } else {
952     B = *matout;
953   }
954 
955   m = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v;
956   ierr = PetscMalloc(m*sizeof(PetscInt),&rwork);CHKERRQ(ierr);
957   for (i=0; i<m; i++) rwork[i] = rstart + i;
958   for (j=0; j<n; j++) {
959     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
960     v   += m;
961   }
962   ierr = PetscFree(rwork);CHKERRQ(ierr);
963   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
964   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
965   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
966     *matout = B;
967   } else {
968     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
969   }
970   PetscFunctionReturn(0);
971 }
972 
973 
974 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
975 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
976 
977 #undef __FUNCT__
978 #define __FUNCT__ "MatSetUp_MPIDense"
979 PetscErrorCode MatSetUp_MPIDense(Mat A)
980 {
981   PetscErrorCode ierr;
982 
983   PetscFunctionBegin;
984   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
985   PetscFunctionReturn(0);
986 }
987 
988 #undef __FUNCT__
989 #define __FUNCT__ "MatAXPY_MPIDense"
990 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
991 {
992   PetscErrorCode ierr;
993   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
994 
995   PetscFunctionBegin;
996   ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr);
997   PetscFunctionReturn(0);
998 }
999 
1000 #undef __FUNCT__
1001 #define __FUNCT__ "MatConjugate_MPIDense"
1002 PetscErrorCode  MatConjugate_MPIDense(Mat mat)
1003 {
1004   Mat_MPIDense   *a = (Mat_MPIDense *)mat->data;
1005   PetscErrorCode ierr;
1006 
1007   PetscFunctionBegin;
1008   ierr = MatConjugate(a->A);CHKERRQ(ierr);
1009   PetscFunctionReturn(0);
1010 }
1011 
1012 #undef __FUNCT__
1013 #define __FUNCT__ "MatRealPart_MPIDense"
1014 PetscErrorCode MatRealPart_MPIDense(Mat A)
1015 {
1016   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1017   PetscErrorCode ierr;
1018 
1019   PetscFunctionBegin;
1020   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1021   PetscFunctionReturn(0);
1022 }
1023 
1024 #undef __FUNCT__
1025 #define __FUNCT__ "MatImaginaryPart_MPIDense"
1026 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1027 {
1028   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1029   PetscErrorCode ierr;
1030 
1031   PetscFunctionBegin;
1032   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1033   PetscFunctionReturn(0);
1034 }
1035 
1036 extern PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*);
1037 #undef __FUNCT__
1038 #define __FUNCT__ "MatGetColumnNorms_MPIDense"
1039 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms)
1040 {
1041   PetscErrorCode ierr;
1042   PetscInt       i,n;
1043   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
1044   PetscReal      *work;
1045 
1046   PetscFunctionBegin;
1047   ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr);
1048   ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr);
1049   ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr);
1050   if (type == NORM_2) {
1051     for (i=0; i<n; i++) work[i] *= work[i];
1052   }
1053   if (type == NORM_INFINITY) {
1054     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
1055   } else {
1056     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
1057   }
1058   ierr = PetscFree(work);CHKERRQ(ierr);
1059   if (type == NORM_2) {
1060     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
1061   }
1062   PetscFunctionReturn(0);
1063 }
1064 
1065 #undef __FUNCT__
1066 #define __FUNCT__ "MatSetRandom_MPIDense"
1067 static PetscErrorCode  MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1068 {
1069   Mat_MPIDense   *d = (Mat_MPIDense*)x->data;
1070   PetscErrorCode ierr;
1071   PetscScalar    *a;
1072   PetscInt       m,n,i;
1073 
1074   PetscFunctionBegin;
1075   ierr = MatGetSize(d->A,&m,&n);CHKERRQ(ierr);
1076   ierr = MatDenseGetArray(d->A,&a);CHKERRQ(ierr);
1077   for (i=0; i<m*n; i++) {
1078     ierr = PetscRandomGetValue(rctx,a+i);CHKERRQ(ierr);
1079   }
1080   ierr = MatDenseRestoreArray(d->A,&a);CHKERRQ(ierr);
1081   PetscFunctionReturn(0);
1082 }
1083 
1084 /* -------------------------------------------------------------------*/
1085 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1086                                         MatGetRow_MPIDense,
1087                                         MatRestoreRow_MPIDense,
1088                                         MatMult_MPIDense,
1089                                 /*  4*/ MatMultAdd_MPIDense,
1090                                         MatMultTranspose_MPIDense,
1091                                         MatMultTransposeAdd_MPIDense,
1092                                         0,
1093                                         0,
1094                                         0,
1095                                 /* 10*/ 0,
1096                                         0,
1097                                         0,
1098                                         0,
1099                                         MatTranspose_MPIDense,
1100                                 /* 15*/ MatGetInfo_MPIDense,
1101                                         MatEqual_MPIDense,
1102                                         MatGetDiagonal_MPIDense,
1103                                         MatDiagonalScale_MPIDense,
1104                                         MatNorm_MPIDense,
1105                                 /* 20*/ MatAssemblyBegin_MPIDense,
1106                                         MatAssemblyEnd_MPIDense,
1107                                         MatSetOption_MPIDense,
1108                                         MatZeroEntries_MPIDense,
1109                                 /* 24*/ MatZeroRows_MPIDense,
1110                                         0,
1111                                         0,
1112                                         0,
1113                                         0,
1114                                 /* 29*/ MatSetUp_MPIDense,
1115                                         0,
1116                                         0,
1117                                         0,
1118                                         0,
1119                                 /* 34*/ MatDuplicate_MPIDense,
1120                                         0,
1121                                         0,
1122                                         0,
1123                                         0,
1124                                 /* 39*/ MatAXPY_MPIDense,
1125                                         MatGetSubMatrices_MPIDense,
1126                                         0,
1127                                         MatGetValues_MPIDense,
1128                                         0,
1129                                 /* 44*/ 0,
1130                                         MatScale_MPIDense,
1131                                         0,
1132                                         0,
1133                                         0,
1134                                 /* 49*/ MatSetRandom_MPIDense,
1135                                         0,
1136                                         0,
1137                                         0,
1138                                         0,
1139                                 /* 54*/ 0,
1140                                         0,
1141                                         0,
1142                                         0,
1143                                         0,
1144                                 /* 59*/ MatGetSubMatrix_MPIDense,
1145                                         MatDestroy_MPIDense,
1146                                         MatView_MPIDense,
1147                                         0,
1148                                         0,
1149                                 /* 64*/ 0,
1150                                         0,
1151                                         0,
1152                                         0,
1153                                         0,
1154                                 /* 69*/ 0,
1155                                         0,
1156                                         0,
1157                                         0,
1158                                         0,
1159                                 /* 74*/ 0,
1160                                         0,
1161                                         0,
1162                                         0,
1163                                         0,
1164                                 /* 79*/ 0,
1165                                         0,
1166                                         0,
1167                                         0,
1168                                 /* 83*/ MatLoad_MPIDense,
1169                                         0,
1170                                         0,
1171                                         0,
1172                                         0,
1173                                         0,
1174                                 /* 89*/
1175                                         0,
1176                                         0,
1177                                         0,
1178                                         0,
1179                                         0,
1180                                 /* 94*/ 0,
1181                                         0,
1182                                         0,
1183                                         0,
1184                                         0,
1185                                 /* 99*/ 0,
1186                                         0,
1187                                         0,
1188                                         MatConjugate_MPIDense,
1189                                         0,
1190                                 /*104*/ 0,
1191                                         MatRealPart_MPIDense,
1192                                         MatImaginaryPart_MPIDense,
1193                                         0,
1194                                         0,
1195                                 /*109*/ 0,
1196                                         0,
1197                                         0,
1198                                         0,
1199                                         0,
1200                                 /*114*/ 0,
1201                                         0,
1202                                         0,
1203                                         0,
1204                                         0,
1205                                 /*119*/ 0,
1206                                         0,
1207                                         0,
1208                                         0,
1209                                         0,
1210                                 /*124*/ 0,
1211                                         MatGetColumnNorms_MPIDense
1212 };
1213 
1214 EXTERN_C_BEGIN
1215 #undef __FUNCT__
1216 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense"
1217 PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1218 {
1219   Mat_MPIDense   *a;
1220   PetscErrorCode ierr;
1221 
1222   PetscFunctionBegin;
1223   mat->preallocated = PETSC_TRUE;
1224   /* Note:  For now, when data is specified above, this assumes the user correctly
1225    allocates the local dense storage space.  We should add error checking. */
1226 
1227   a    = (Mat_MPIDense*)mat->data;
1228   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
1229   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
1230   a->nvec = mat->cmap->n;
1231 
1232   ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1233   ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr);
1234   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
1235   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1236   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1237   PetscFunctionReturn(0);
1238 }
1239 EXTERN_C_END
1240 
1241 EXTERN_C_BEGIN
1242 #undef __FUNCT__
1243 #define __FUNCT__ "MatCreate_MPIDense"
1244 PetscErrorCode  MatCreate_MPIDense(Mat mat)
1245 {
1246   Mat_MPIDense   *a;
1247   PetscErrorCode ierr;
1248 
1249   PetscFunctionBegin;
1250   ierr              = PetscNewLog(mat,Mat_MPIDense,&a);CHKERRQ(ierr);
1251   mat->data         = (void*)a;
1252   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1253 
1254   mat->insertmode = NOT_SET_VALUES;
1255   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&a->rank);CHKERRQ(ierr);
1256   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&a->size);CHKERRQ(ierr);
1257 
1258   /* build cache for off array entries formed */
1259   a->donotstash = PETSC_FALSE;
1260   ierr = MatStashCreate_Private(((PetscObject)mat)->comm,1,&mat->stash);CHKERRQ(ierr);
1261 
1262   /* stuff used for matrix vector multiply */
1263   a->lvec        = 0;
1264   a->Mvctx       = 0;
1265   a->roworiented = PETSC_TRUE;
1266 
1267   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatDenseGetArray_C","MatDenseGetArray_MPIDense",MatDenseGetArray_MPIDense);CHKERRQ(ierr);
1268   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatDenseRestoreArray_C","MatDenseRestoreArray_MPIDense",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr);
1269 
1270   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
1271                                            "MatGetDiagonalBlock_MPIDense",
1272                                             MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr);
1273   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C",
1274                                            "MatMPIDenseSetPreallocation_MPIDense",
1275                                             MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1276   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",
1277                                            "MatMatMult_MPIAIJ_MPIDense",
1278                                             MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1279   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",
1280                                             "MatMatMultSymbolic_MPIAIJ_MPIDense",
1281                                              MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1282   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",
1283                                            "MatMatMultNumeric_MPIAIJ_MPIDense",
1284                                             MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1285   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1286   PetscFunctionReturn(0);
1287 }
1288 EXTERN_C_END
1289 
1290 /*MC
1291    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1292 
1293    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1294    and MATMPIDENSE otherwise.
1295 
1296    Options Database Keys:
1297 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1298 
1299   Level: beginner
1300 
1301 
1302 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1303 M*/
1304 
1305 #undef __FUNCT__
1306 #define __FUNCT__ "MatMPIDenseSetPreallocation"
1307 /*@C
1308    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1309 
1310    Not collective
1311 
1312    Input Parameters:
1313 .  A - the matrix
1314 -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
1315    to control all matrix memory allocation.
1316 
1317    Notes:
1318    The dense format is fully compatible with standard Fortran 77
1319    storage by columns.
1320 
1321    The data input variable is intended primarily for Fortran programmers
1322    who wish to allocate their own matrix memory space.  Most users should
1323    set data=PETSC_NULL.
1324 
1325    Level: intermediate
1326 
1327 .keywords: matrix,dense, parallel
1328 
1329 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1330 @*/
1331 PetscErrorCode  MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data)
1332 {
1333   PetscErrorCode ierr;
1334 
1335   PetscFunctionBegin;
1336   ierr = PetscTryMethod(mat,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar *),(mat,data));CHKERRQ(ierr);
1337   PetscFunctionReturn(0);
1338 }
1339 
1340 #undef __FUNCT__
1341 #define __FUNCT__ "MatCreateDense"
1342 /*@C
1343    MatCreateDense - Creates a parallel matrix in dense format.
1344 
1345    Collective on MPI_Comm
1346 
1347    Input Parameters:
1348 +  comm - MPI communicator
1349 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1350 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1351 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1352 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1353 -  data - optional location of matrix data.  Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1354    to control all matrix memory allocation.
1355 
1356    Output Parameter:
1357 .  A - the matrix
1358 
1359    Notes:
1360    The dense format is fully compatible with standard Fortran 77
1361    storage by columns.
1362 
1363    The data input variable is intended primarily for Fortran programmers
1364    who wish to allocate their own matrix memory space.  Most users should
1365    set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users).
1366 
1367    The user MUST specify either the local or global matrix dimensions
1368    (possibly both).
1369 
1370    Level: intermediate
1371 
1372 .keywords: matrix,dense, parallel
1373 
1374 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1375 @*/
1376 PetscErrorCode  MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1377 {
1378   PetscErrorCode ierr;
1379   PetscMPIInt    size;
1380 
1381   PetscFunctionBegin;
1382   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1383   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1384   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1385   if (size > 1) {
1386     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1387     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1388   } else {
1389     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1390     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1391   }
1392   PetscFunctionReturn(0);
1393 }
1394 
1395 #undef __FUNCT__
1396 #define __FUNCT__ "MatDuplicate_MPIDense"
1397 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1398 {
1399   Mat            mat;
1400   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
1401   PetscErrorCode ierr;
1402 
1403   PetscFunctionBegin;
1404   *newmat       = 0;
1405   ierr = MatCreate(((PetscObject)A)->comm,&mat);CHKERRQ(ierr);
1406   ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1407   ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1408   a                 = (Mat_MPIDense*)mat->data;
1409   ierr              = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1410 
1411   mat->factortype   = A->factortype;
1412   mat->assembled    = PETSC_TRUE;
1413   mat->preallocated = PETSC_TRUE;
1414 
1415   a->size           = oldmat->size;
1416   a->rank           = oldmat->rank;
1417   mat->insertmode   = NOT_SET_VALUES;
1418   a->nvec           = oldmat->nvec;
1419   a->donotstash     = oldmat->donotstash;
1420 
1421   ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr);
1422   ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr);
1423 
1424   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1425   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1426   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1427 
1428   *newmat = mat;
1429   PetscFunctionReturn(0);
1430 }
1431 
1432 #undef __FUNCT__
1433 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile"
1434 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat,PetscInt sizesset)
1435 {
1436   PetscErrorCode ierr;
1437   PetscMPIInt    rank,size;
1438   PetscInt       *rowners,i,m,nz,j;
1439   PetscScalar    *array,*vals,*vals_ptr;
1440 
1441   PetscFunctionBegin;
1442   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1443   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1444 
1445   /* determine ownership of all rows */
1446   if (newmat->rmap->n < 0) m          = M/size + ((M % size) > rank);
1447   else m = newmat->rmap->n;
1448   ierr       = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1449   ierr       = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
1450   rowners[0] = 0;
1451   for (i=2; i<=size; i++) {
1452     rowners[i] += rowners[i-1];
1453   }
1454 
1455   if (!sizesset) {
1456     ierr = MatSetSizes(newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1457   }
1458   ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr);
1459   ierr = MatDenseGetArray(newmat,&array);CHKERRQ(ierr);
1460 
1461   if (!rank) {
1462     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1463 
1464     /* read in my part of the matrix numerical values  */
1465     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1466 
1467     /* insert into matrix-by row (this is why cannot directly read into array */
1468     vals_ptr = vals;
1469     for (i=0; i<m; i++) {
1470       for (j=0; j<N; j++) {
1471         array[i + j*m] = *vals_ptr++;
1472       }
1473     }
1474 
1475     /* read in other processors and ship out */
1476     for (i=1; i<size; i++) {
1477       nz   = (rowners[i+1] - rowners[i])*N;
1478       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1479       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr);
1480     }
1481   } else {
1482     /* receive numeric values */
1483     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1484 
1485     /* receive message of values*/
1486     ierr = MPIULong_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr);
1487 
1488     /* insert into matrix-by row (this is why cannot directly read into array */
1489     vals_ptr = vals;
1490     for (i=0; i<m; i++) {
1491       for (j=0; j<N; j++) {
1492         array[i + j*m] = *vals_ptr++;
1493       }
1494     }
1495   }
1496   ierr = MatDenseRestoreArray(newmat,&array);CHKERRQ(ierr);
1497   ierr = PetscFree(rowners);CHKERRQ(ierr);
1498   ierr = PetscFree(vals);CHKERRQ(ierr);
1499   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1500   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1501   PetscFunctionReturn(0);
1502 }
1503 
1504 #undef __FUNCT__
1505 #define __FUNCT__ "MatLoad_MPIDense"
1506 PetscErrorCode MatLoad_MPIDense(Mat newmat,PetscViewer viewer)
1507 {
1508   PetscScalar    *vals,*svals;
1509   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1510   MPI_Status     status;
1511   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz;
1512   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1513   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1514   PetscInt       i,nz,j,rstart,rend,sizesset=1,grows,gcols;
1515   int            fd;
1516   PetscErrorCode ierr;
1517 
1518   PetscFunctionBegin;
1519   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1520   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1521   if (!rank) {
1522     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1523     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1524     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1525   }
1526   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
1527 
1528   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1529   M = header[1]; N = header[2]; nz = header[3];
1530 
1531   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
1532   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
1533   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
1534 
1535   /* If global sizes are set, check if they are consistent with that given in the file */
1536   if (sizesset) {
1537     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
1538   }
1539   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
1540   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
1541 
1542   /*
1543        Handle case where matrix is stored on disk as a dense matrix
1544   */
1545   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1546     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat,sizesset);CHKERRQ(ierr);
1547     PetscFunctionReturn(0);
1548   }
1549 
1550   /* determine ownership of all rows */
1551   if (newmat->rmap->n < 0) m          = PetscMPIIntCast(M/size + ((M % size) > rank));
1552   else m = PetscMPIIntCast(newmat->rmap->n);
1553   ierr       = PetscMalloc((size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr);
1554   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1555   rowners[0] = 0;
1556   for (i=2; i<=size; i++) {
1557     rowners[i] += rowners[i-1];
1558   }
1559   rstart = rowners[rank];
1560   rend   = rowners[rank+1];
1561 
1562   /* distribute row lengths to all processors */
1563   ierr    = PetscMalloc2(rend-rstart,PetscInt,&ourlens,rend-rstart,PetscInt,&offlens);CHKERRQ(ierr);
1564   if (!rank) {
1565     ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
1566     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1567     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
1568     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1569     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1570     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1571   } else {
1572     ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1573   }
1574 
1575   if (!rank) {
1576     /* calculate the number of nonzeros on each processor */
1577     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
1578     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1579     for (i=0; i<size; i++) {
1580       for (j=rowners[i]; j< rowners[i+1]; j++) {
1581         procsnz[i] += rowlengths[j];
1582       }
1583     }
1584     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1585 
1586     /* determine max buffer needed and allocate it */
1587     maxnz = 0;
1588     for (i=0; i<size; i++) {
1589       maxnz = PetscMax(maxnz,procsnz[i]);
1590     }
1591     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1592 
1593     /* read in my part of the matrix column indices  */
1594     nz = procsnz[0];
1595     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1596     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1597 
1598     /* read in every one elses and ship off */
1599     for (i=1; i<size; i++) {
1600       nz   = procsnz[i];
1601       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1602       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1603     }
1604     ierr = PetscFree(cols);CHKERRQ(ierr);
1605   } else {
1606     /* determine buffer space needed for message */
1607     nz = 0;
1608     for (i=0; i<m; i++) {
1609       nz += ourlens[i];
1610     }
1611     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1612 
1613     /* receive message of column indices*/
1614     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1615     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1616     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1617   }
1618 
1619   /* loop over local rows, determining number of off diagonal entries */
1620   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
1621   jj = 0;
1622   for (i=0; i<m; i++) {
1623     for (j=0; j<ourlens[i]; j++) {
1624       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1625       jj++;
1626     }
1627   }
1628 
1629   /* create our matrix */
1630   for (i=0; i<m; i++) {
1631     ourlens[i] -= offlens[i];
1632   }
1633 
1634   if (!sizesset) {
1635     ierr = MatSetSizes(newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1636   }
1637   ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr);
1638   for (i=0; i<m; i++) {
1639     ourlens[i] += offlens[i];
1640   }
1641 
1642   if (!rank) {
1643     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1644 
1645     /* read in my part of the matrix numerical values  */
1646     nz = procsnz[0];
1647     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1648 
1649     /* insert into matrix */
1650     jj      = rstart;
1651     smycols = mycols;
1652     svals   = vals;
1653     for (i=0; i<m; i++) {
1654       ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1655       smycols += ourlens[i];
1656       svals   += ourlens[i];
1657       jj++;
1658     }
1659 
1660     /* read in other processors and ship out */
1661     for (i=1; i<size; i++) {
1662       nz   = procsnz[i];
1663       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1664       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
1665     }
1666     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1667   } else {
1668     /* receive numeric values */
1669     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1670 
1671     /* receive message of values*/
1672     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
1673     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1674     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1675 
1676     /* insert into matrix */
1677     jj      = rstart;
1678     smycols = mycols;
1679     svals   = vals;
1680     for (i=0; i<m; i++) {
1681       ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1682       smycols += ourlens[i];
1683       svals   += ourlens[i];
1684       jj++;
1685     }
1686   }
1687   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
1688   ierr = PetscFree(vals);CHKERRQ(ierr);
1689   ierr = PetscFree(mycols);CHKERRQ(ierr);
1690   ierr = PetscFree(rowners);CHKERRQ(ierr);
1691 
1692   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1693   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1694   PetscFunctionReturn(0);
1695 }
1696 
1697 #undef __FUNCT__
1698 #define __FUNCT__ "MatEqual_MPIDense"
1699 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool  *flag)
1700 {
1701   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
1702   Mat            a,b;
1703   PetscBool      flg;
1704   PetscErrorCode ierr;
1705 
1706   PetscFunctionBegin;
1707   a = matA->A;
1708   b = matB->A;
1709   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
1710   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1711   PetscFunctionReturn(0);
1712 }
1713 
1714