xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 3bf036e263fd78807e2931ff42d9ddcd8aae3fd4)
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4 */
5 
6 
7 #include <../src/mat/impls/dense/mpi/mpidense.h>    /*I   "petscmat.h"  I*/
8 
9 #undef __FUNCT__
10 #define __FUNCT__ "MatDenseGetLocalMatrix"
11 /*@
12 
13       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
14               matrix that represents the operator. For sequential matrices it returns itself.
15 
16     Input Parameter:
17 .      A - the Seq or MPI dense matrix
18 
19     Output Parameter:
20 .      B - the inner matrix
21 
22     Level: intermediate
23 
24 @*/
25 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
26 {
27   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
28   PetscErrorCode ierr;
29   PetscBool      flg;
30 
31   PetscFunctionBegin;
32   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
33   if (flg) {
34     *B = mat->A;
35   } else {
36     *B = A;
37   }
38   PetscFunctionReturn(0);
39 }
40 
41 #undef __FUNCT__
42 #define __FUNCT__ "MatGetRow_MPIDense"
43 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
44 {
45   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
46   PetscErrorCode ierr;
47   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
48 
49   PetscFunctionBegin;
50   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
51   lrow = row - rstart;
52   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr);
53   PetscFunctionReturn(0);
54 }
55 
56 #undef __FUNCT__
57 #define __FUNCT__ "MatRestoreRow_MPIDense"
58 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
59 {
60   PetscErrorCode ierr;
61 
62   PetscFunctionBegin;
63   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
64   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
65   PetscFunctionReturn(0);
66 }
67 
68 EXTERN_C_BEGIN
69 #undef __FUNCT__
70 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense"
71 PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
72 {
73   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
74   PetscErrorCode ierr;
75   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
76   PetscScalar    *array;
77   MPI_Comm       comm;
78   Mat            B;
79 
80   PetscFunctionBegin;
81   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
82 
83   ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr);
84   if (!B) {
85     ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
86     ierr = MatCreate(comm,&B);CHKERRQ(ierr);
87     ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr);
88     ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
89     ierr = MatDenseGetArray(mdn->A,&array);CHKERRQ(ierr);
90     ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr);
91     ierr = MatDenseRestoreArray(mdn->A,&array);CHKERRQ(ierr);
92     ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
93     ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
94     ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr);
95     *a = B;
96     ierr = MatDestroy(&B);CHKERRQ(ierr);
97   } else {
98     *a = B;
99   }
100   PetscFunctionReturn(0);
101 }
102 EXTERN_C_END
103 
104 #undef __FUNCT__
105 #define __FUNCT__ "MatSetValues_MPIDense"
106 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
107 {
108   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
109   PetscErrorCode ierr;
110   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
111   PetscBool      roworiented = A->roworiented;
112 
113   PetscFunctionBegin;
114   if (v) PetscValidScalarPointer(v,6);
115   for (i=0; i<m; i++) {
116     if (idxm[i] < 0) continue;
117     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
118     if (idxm[i] >= rstart && idxm[i] < rend) {
119       row = idxm[i] - rstart;
120       if (roworiented) {
121         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
122       } else {
123         for (j=0; j<n; j++) {
124           if (idxn[j] < 0) continue;
125           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
126           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
127         }
128       }
129     } else {
130       if (!A->donotstash) {
131         mat->assembled = PETSC_FALSE;
132         if (roworiented) {
133           ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
134         } else {
135           ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
136         }
137       }
138     }
139   }
140   PetscFunctionReturn(0);
141 }
142 
143 #undef __FUNCT__
144 #define __FUNCT__ "MatGetValues_MPIDense"
145 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
146 {
147   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
148   PetscErrorCode ierr;
149   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
150 
151   PetscFunctionBegin;
152   for (i=0; i<m; i++) {
153     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
154     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
155     if (idxm[i] >= rstart && idxm[i] < rend) {
156       row = idxm[i] - rstart;
157       for (j=0; j<n; j++) {
158         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
159         if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
160         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
161       }
162     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
163   }
164   PetscFunctionReturn(0);
165 }
166 
167 #undef __FUNCT__
168 #define __FUNCT__ "MatDenseGetArray_MPIDense"
169 PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[])
170 {
171   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
172   PetscErrorCode ierr;
173 
174   PetscFunctionBegin;
175   ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr);
176   PetscFunctionReturn(0);
177 }
178 
179 #undef __FUNCT__
180 #define __FUNCT__ "MatGetSubMatrix_MPIDense"
181 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
182 {
183   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data,*newmatd;
184   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
185   PetscErrorCode ierr;
186   PetscInt       i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
187   const PetscInt *irow,*icol;
188   PetscScalar    *av,*bv,*v = lmat->v;
189   Mat            newmat;
190   IS             iscol_local;
191 
192   PetscFunctionBegin;
193   ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
194   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
195   ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr);
196   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
197   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
198   ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */
199 
200   /* No parallel redistribution currently supported! Should really check each index set
201      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
202      original matrix! */
203 
204   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
205   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
206 
207   /* Check submatrix call */
208   if (scall == MAT_REUSE_MATRIX) {
209     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
210     /* Really need to test rows and column sizes! */
211     newmat = *B;
212   } else {
213     /* Create and fill new matrix */
214     ierr = MatCreate(((PetscObject)A)->comm,&newmat);CHKERRQ(ierr);
215     ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
216     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
217     ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr);
218   }
219 
220   /* Now extract the data pointers and do the copy, column at a time */
221   newmatd = (Mat_MPIDense*)newmat->data;
222   bv      = ((Mat_SeqDense *)newmatd->A->data)->v;
223 
224   for (i=0; i<Ncols; i++) {
225     av = v + ((Mat_SeqDense *)mat->A->data)->lda*icol[i];
226     for (j=0; j<nrows; j++) {
227       *bv++ = av[irow[j] - rstart];
228     }
229   }
230 
231   /* Assemble the matrices so that the correct flags are set */
232   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
233   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
234 
235   /* Free work space */
236   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
237   ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr);
238   ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
239   *B = newmat;
240   PetscFunctionReturn(0);
241 }
242 
243 #undef __FUNCT__
244 #define __FUNCT__ "MatDenseRestoreArray_MPIDense"
245 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[])
246 {
247   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
248   PetscErrorCode ierr;
249 
250   PetscFunctionBegin;
251   ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr);
252   PetscFunctionReturn(0);
253 }
254 
255 #undef __FUNCT__
256 #define __FUNCT__ "MatAssemblyBegin_MPIDense"
257 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
258 {
259   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
260   MPI_Comm       comm = ((PetscObject)mat)->comm;
261   PetscErrorCode ierr;
262   PetscInt       nstash,reallocs;
263   InsertMode     addv;
264 
265   PetscFunctionBegin;
266   /* make sure all processors are either in INSERTMODE or ADDMODE */
267   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,comm);CHKERRQ(ierr);
268   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
269   mat->insertmode = addv; /* in case this processor had no cache */
270 
271   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
272   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
273   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
274   PetscFunctionReturn(0);
275 }
276 
277 #undef __FUNCT__
278 #define __FUNCT__ "MatAssemblyEnd_MPIDense"
279 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
280 {
281   Mat_MPIDense    *mdn=(Mat_MPIDense*)mat->data;
282   PetscErrorCode  ierr;
283   PetscInt        i,*row,*col,flg,j,rstart,ncols;
284   PetscMPIInt     n;
285   PetscScalar     *val;
286   InsertMode      addv=mat->insertmode;
287 
288   PetscFunctionBegin;
289   /*  wait on receives */
290   while (1) {
291     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
292     if (!flg) break;
293 
294     for (i=0; i<n;) {
295       /* Now identify the consecutive vals belonging to the same row */
296       for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
297       if (j < n) ncols = j-i;
298       else       ncols = n-i;
299       /* Now assemble all these values with a single function call */
300       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
301       i = j;
302     }
303   }
304   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
305 
306   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
307   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
308 
309   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
310     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
311   }
312   PetscFunctionReturn(0);
313 }
314 
315 #undef __FUNCT__
316 #define __FUNCT__ "MatZeroEntries_MPIDense"
317 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
318 {
319   PetscErrorCode ierr;
320   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
321 
322   PetscFunctionBegin;
323   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
324   PetscFunctionReturn(0);
325 }
326 
327 /* the code does not do the diagonal entries correctly unless the
328    matrix is square and the column and row owerships are identical.
329    This is a BUG. The only way to fix it seems to be to access
330    mdn->A and mdn->B directly and not through the MatZeroRows()
331    routine.
332 */
333 #undef __FUNCT__
334 #define __FUNCT__ "MatZeroRows_MPIDense"
335 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
336 {
337   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
338   PetscErrorCode    ierr;
339   PetscInt          i,*owners = A->rmap->range;
340   PetscInt          *nprocs,j,idx,nsends;
341   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
342   PetscInt          *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source;
343   PetscInt          *lens,*lrows,*values;
344   PetscMPIInt       n,imdex,rank = l->rank,size = l->size;
345   MPI_Comm          comm = ((PetscObject)A)->comm;
346   MPI_Request       *send_waits,*recv_waits;
347   MPI_Status        recv_status,*send_status;
348   PetscBool         found;
349   const PetscScalar *xx;
350   PetscScalar       *bb;
351 
352   PetscFunctionBegin;
353   if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Only handles square matrices");
354   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership");
355   /*  first count number of contributors to each processor */
356   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
357   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
358   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
359   for (i=0; i<N; i++) {
360     idx = rows[i];
361     found = PETSC_FALSE;
362     for (j=0; j<size; j++) {
363       if (idx >= owners[j] && idx < owners[j+1]) {
364         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
365       }
366     }
367     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
368   }
369   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
370 
371   /* inform other processors of number of messages and max length*/
372   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
373 
374   /* post receives:   */
375   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
376   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
377   for (i=0; i<nrecvs; i++) {
378     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
379   }
380 
381   /* do sends:
382       1) starts[i] gives the starting index in svalues for stuff going to
383          the ith processor
384   */
385   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
386   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
387   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
388   starts[0]  = 0;
389   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
390   for (i=0; i<N; i++) {
391     svalues[starts[owner[i]]++] = rows[i];
392   }
393 
394   starts[0] = 0;
395   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
396   count = 0;
397   for (i=0; i<size; i++) {
398     if (nprocs[2*i+1]) {
399       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
400     }
401   }
402   ierr = PetscFree(starts);CHKERRQ(ierr);
403 
404   base = owners[rank];
405 
406   /*  wait on receives */
407   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
408   count  = nrecvs;
409   slen   = 0;
410   while (count) {
411     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
412     /* unpack receives into our local space */
413     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
414     source[imdex]  = recv_status.MPI_SOURCE;
415     lens[imdex]    = n;
416     slen += n;
417     count--;
418   }
419   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
420 
421   /* move the data into the send scatter */
422   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
423   count = 0;
424   for (i=0; i<nrecvs; i++) {
425     values = rvalues + i*nmax;
426     for (j=0; j<lens[i]; j++) {
427       lrows[count++] = values[j] - base;
428     }
429   }
430   ierr = PetscFree(rvalues);CHKERRQ(ierr);
431   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
432   ierr = PetscFree(owner);CHKERRQ(ierr);
433   ierr = PetscFree(nprocs);CHKERRQ(ierr);
434 
435   /* fix right hand side if needed */
436   if (x && b) {
437     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
438     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
439     for (i=0; i<slen; i++) {
440       bb[lrows[i]] = diag*xx[lrows[i]];
441     }
442     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
443     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
444   }
445 
446   /* actually zap the local rows */
447   ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
448   if (diag != 0.0) {
449     Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data;
450     PetscInt      m = ll->lda, i;
451 
452     for (i=0; i<slen; i++) {
453       ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag;
454     }
455   }
456   ierr = PetscFree(lrows);CHKERRQ(ierr);
457 
458   /* wait on sends */
459   if (nsends) {
460     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
461     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
462     ierr = PetscFree(send_status);CHKERRQ(ierr);
463   }
464   ierr = PetscFree(send_waits);CHKERRQ(ierr);
465   ierr = PetscFree(svalues);CHKERRQ(ierr);
466 
467   PetscFunctionReturn(0);
468 }
469 
470 #undef __FUNCT__
471 #define __FUNCT__ "MatMult_MPIDense"
472 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
473 {
474   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
475   PetscErrorCode ierr;
476 
477   PetscFunctionBegin;
478   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
479   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
480   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
481   PetscFunctionReturn(0);
482 }
483 
484 #undef __FUNCT__
485 #define __FUNCT__ "MatMultAdd_MPIDense"
486 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
487 {
488   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
489   PetscErrorCode ierr;
490 
491   PetscFunctionBegin;
492   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
493   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
494   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
495   PetscFunctionReturn(0);
496 }
497 
498 #undef __FUNCT__
499 #define __FUNCT__ "MatMultTranspose_MPIDense"
500 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
501 {
502   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
503   PetscErrorCode ierr;
504   PetscScalar    zero = 0.0;
505 
506   PetscFunctionBegin;
507   ierr = VecSet(yy,zero);CHKERRQ(ierr);
508   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
509   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
510   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
511   PetscFunctionReturn(0);
512 }
513 
514 #undef __FUNCT__
515 #define __FUNCT__ "MatMultTransposeAdd_MPIDense"
516 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
517 {
518   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
519   PetscErrorCode ierr;
520 
521   PetscFunctionBegin;
522   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
523   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
524   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
525   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
526   PetscFunctionReturn(0);
527 }
528 
529 #undef __FUNCT__
530 #define __FUNCT__ "MatGetDiagonal_MPIDense"
531 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
532 {
533   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
534   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
535   PetscErrorCode ierr;
536   PetscInt       len,i,n,m = A->rmap->n,radd;
537   PetscScalar    *x,zero = 0.0;
538 
539   PetscFunctionBegin;
540   ierr = VecSet(v,zero);CHKERRQ(ierr);
541   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
542   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
543   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
544   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
545   radd = A->rmap->rstart*m;
546   for (i=0; i<len; i++) {
547     x[i] = aloc->v[radd + i*m + i];
548   }
549   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
550   PetscFunctionReturn(0);
551 }
552 
553 #undef __FUNCT__
554 #define __FUNCT__ "MatDestroy_MPIDense"
555 PetscErrorCode MatDestroy_MPIDense(Mat mat)
556 {
557   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
558   PetscErrorCode ierr;
559 
560   PetscFunctionBegin;
561 
562 #if defined(PETSC_USE_LOG)
563   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
564 #endif
565   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
566   ierr = MatDestroy(&mdn->A);CHKERRQ(ierr);
567   ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr);
568   ierr = VecScatterDestroy(&mdn->Mvctx);CHKERRQ(ierr);
569 
570   ierr = PetscFree(mat->data);CHKERRQ(ierr);
571   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
572   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
573   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
574   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
575   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
576   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
577   PetscFunctionReturn(0);
578 }
579 
580 #undef __FUNCT__
581 #define __FUNCT__ "MatView_MPIDense_Binary"
582 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
583 {
584   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
585   PetscErrorCode    ierr;
586   PetscViewerFormat format;
587   int               fd;
588   PetscInt          header[4],mmax,N = mat->cmap->N,i,j,m,k;
589   PetscMPIInt       rank,tag  = ((PetscObject)viewer)->tag,size;
590   PetscScalar       *work,*v,*vv;
591   Mat_SeqDense      *a = (Mat_SeqDense*)mdn->A->data;
592 
593   PetscFunctionBegin;
594   if (mdn->size == 1) {
595     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
596   } else {
597     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
598     ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
599     ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
600 
601     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
602     if (format == PETSC_VIEWER_NATIVE) {
603 
604       if (!rank) {
605         /* store the matrix as a dense matrix */
606         header[0] = MAT_FILE_CLASSID;
607         header[1] = mat->rmap->N;
608         header[2] = N;
609         header[3] = MATRIX_BINARY_FORMAT_DENSE;
610         ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
611 
612         /* get largest work array needed for transposing array */
613         mmax = mat->rmap->n;
614         for (i=1; i<size; i++) {
615           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
616         }
617         ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&work);CHKERRQ(ierr);
618 
619         /* write out local array, by rows */
620         m    = mat->rmap->n;
621         v    = a->v;
622         for (j=0; j<N; j++) {
623           for (i=0; i<m; i++) {
624             work[j + i*N] = *v++;
625           }
626         }
627         ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
628         /* get largest work array to receive messages from other processes, excludes process zero */
629         mmax = 0;
630         for (i=1; i<size; i++) {
631           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
632         }
633         ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&vv);CHKERRQ(ierr);
634         for (k = 1; k < size; k++) {
635           v    = vv;
636           m    = mat->rmap->range[k+1] - mat->rmap->range[k];
637           ierr = MPIULong_Recv(v,m*N,MPIU_SCALAR,k,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
638 
639           for (j = 0; j < N; j++) {
640             for (i = 0; i < m; i++) {
641               work[j + i*N] = *v++;
642             }
643           }
644           ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
645         }
646         ierr = PetscFree(work);CHKERRQ(ierr);
647         ierr = PetscFree(vv);CHKERRQ(ierr);
648       } else {
649         ierr = MPIULong_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
650       }
651     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerSetFormat(viewer,PETSC_VIEWER_NATIVE)");
652   }
653   PetscFunctionReturn(0);
654 }
655 
656 #undef __FUNCT__
657 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket"
658 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
659 {
660   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
661   PetscErrorCode    ierr;
662   PetscMPIInt       size = mdn->size,rank = mdn->rank;
663   PetscViewerType   vtype;
664   PetscBool         iascii,isdraw;
665   PetscViewer       sviewer;
666   PetscViewerFormat format;
667 
668   PetscFunctionBegin;
669   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
670   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
671   if (iascii) {
672     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
673     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
674     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
675       MatInfo info;
676       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
677       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
678       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
679       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
680       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
681       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
682       PetscFunctionReturn(0);
683     } else if (format == PETSC_VIEWER_ASCII_INFO) {
684       PetscFunctionReturn(0);
685     }
686   } else if (isdraw) {
687     PetscDraw  draw;
688     PetscBool  isnull;
689 
690     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
691     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
692     if (isnull) PetscFunctionReturn(0);
693   }
694 
695   if (size == 1) {
696     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
697   } else {
698     /* assemble the entire matrix onto first processor. */
699     Mat         A;
700     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
701     PetscInt    *cols;
702     PetscScalar *vals;
703 
704     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
705     if (!rank) {
706       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
707     } else {
708       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
709     }
710     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
711     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
712     ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL);CHKERRQ(ierr);
713     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
714 
715     /* Copy the matrix ... This isn't the most efficient means,
716        but it's quick for now */
717     A->insertmode = INSERT_VALUES;
718     row = mat->rmap->rstart; m = mdn->A->rmap->n;
719     for (i=0; i<m; i++) {
720       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
721       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
722       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
723       row++;
724     }
725 
726     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
727     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
728     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
729     if (!rank) {
730       ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
731       /* Set the type name to MATMPIDense so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqDense_ASCII()*/
732       PetscStrcpy(((PetscObject)((Mat_MPIDense*)(A->data))->A)->type_name,MATMPIDENSE);
733       ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
734     }
735     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
736     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
737     ierr = MatDestroy(&A);CHKERRQ(ierr);
738   }
739   PetscFunctionReturn(0);
740 }
741 
742 #undef __FUNCT__
743 #define __FUNCT__ "MatView_MPIDense"
744 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
745 {
746   PetscErrorCode ierr;
747   PetscBool      iascii,isbinary,isdraw,issocket;
748 
749   PetscFunctionBegin;
750 
751   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
752   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
753   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
754   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
755 
756   if (iascii || issocket || isdraw) {
757     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
758   } else if (isbinary) {
759     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
760   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name);
761   PetscFunctionReturn(0);
762 }
763 
764 #undef __FUNCT__
765 #define __FUNCT__ "MatGetInfo_MPIDense"
766 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
767 {
768   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
769   Mat            mdn = mat->A;
770   PetscErrorCode ierr;
771   PetscReal      isend[5],irecv[5];
772 
773   PetscFunctionBegin;
774   info->block_size     = 1.0;
775   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
776   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
777   isend[3] = info->memory;  isend[4] = info->mallocs;
778   if (flag == MAT_LOCAL) {
779     info->nz_used      = isend[0];
780     info->nz_allocated = isend[1];
781     info->nz_unneeded  = isend[2];
782     info->memory       = isend[3];
783     info->mallocs      = isend[4];
784   } else if (flag == MAT_GLOBAL_MAX) {
785     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)A)->comm);CHKERRQ(ierr);
786     info->nz_used      = irecv[0];
787     info->nz_allocated = irecv[1];
788     info->nz_unneeded  = irecv[2];
789     info->memory       = irecv[3];
790     info->mallocs      = irecv[4];
791   } else if (flag == MAT_GLOBAL_SUM) {
792     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
793     info->nz_used      = irecv[0];
794     info->nz_allocated = irecv[1];
795     info->nz_unneeded  = irecv[2];
796     info->memory       = irecv[3];
797     info->mallocs      = irecv[4];
798   }
799   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
800   info->fill_ratio_needed = 0;
801   info->factor_mallocs    = 0;
802   PetscFunctionReturn(0);
803 }
804 
805 #undef __FUNCT__
806 #define __FUNCT__ "MatSetOption_MPIDense"
807 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool  flg)
808 {
809   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
810   PetscErrorCode ierr;
811 
812   PetscFunctionBegin;
813   switch (op) {
814   case MAT_NEW_NONZERO_LOCATIONS:
815   case MAT_NEW_NONZERO_LOCATION_ERR:
816   case MAT_NEW_NONZERO_ALLOCATION_ERR:
817     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
818     break;
819   case MAT_ROW_ORIENTED:
820     a->roworiented = flg;
821     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
822     break;
823   case MAT_NEW_DIAGONALS:
824   case MAT_KEEP_NONZERO_PATTERN:
825   case MAT_USE_HASH_TABLE:
826     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
827     break;
828   case MAT_IGNORE_OFF_PROC_ENTRIES:
829     a->donotstash = flg;
830     break;
831   case MAT_SYMMETRIC:
832   case MAT_STRUCTURALLY_SYMMETRIC:
833   case MAT_HERMITIAN:
834   case MAT_SYMMETRY_ETERNAL:
835   case MAT_IGNORE_LOWER_TRIANGULAR:
836     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
837     break;
838   default:
839     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
840   }
841   PetscFunctionReturn(0);
842 }
843 
844 
845 #undef __FUNCT__
846 #define __FUNCT__ "MatDiagonalScale_MPIDense"
847 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
848 {
849   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
850   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
851   PetscScalar    *l,*r,x,*v;
852   PetscErrorCode ierr;
853   PetscInt       i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n;
854 
855   PetscFunctionBegin;
856   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
857   if (ll) {
858     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
859     if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
860     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
861     for (i=0; i<m; i++) {
862       x = l[i];
863       v = mat->v + i;
864       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
865     }
866     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
867     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
868   }
869   if (rr) {
870     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
871     if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
872     ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
873     ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
874     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
875     for (i=0; i<n; i++) {
876       x = r[i];
877       v = mat->v + i*m;
878       for (j=0; j<m; j++) { (*v++) *= x;}
879     }
880     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
881     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
882   }
883   PetscFunctionReturn(0);
884 }
885 
886 #undef __FUNCT__
887 #define __FUNCT__ "MatNorm_MPIDense"
888 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
889 {
890   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
891   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
892   PetscErrorCode ierr;
893   PetscInt       i,j;
894   PetscReal      sum = 0.0;
895   PetscScalar    *v = mat->v;
896 
897   PetscFunctionBegin;
898   if (mdn->size == 1) {
899     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
900   } else {
901     if (type == NORM_FROBENIUS) {
902       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
903         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
904       }
905       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
906       *nrm = PetscSqrtReal(*nrm);
907       ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr);
908     } else if (type == NORM_1) {
909       PetscReal *tmp,*tmp2;
910       ierr = PetscMalloc2(A->cmap->N,PetscReal,&tmp,A->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr);
911       ierr = PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
912       ierr = PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
913       *nrm = 0.0;
914       v = mat->v;
915       for (j=0; j<mdn->A->cmap->n; j++) {
916         for (i=0; i<mdn->A->rmap->n; i++) {
917           tmp[j] += PetscAbsScalar(*v);  v++;
918         }
919       }
920       ierr = MPI_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
921       for (j=0; j<A->cmap->N; j++) {
922         if (tmp2[j] > *nrm) *nrm = tmp2[j];
923       }
924       ierr = PetscFree2(tmp,tmp);CHKERRQ(ierr);
925       ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr);
926     } else if (type == NORM_INFINITY) { /* max row norm */
927       PetscReal ntemp;
928       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
929       ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,((PetscObject)A)->comm);CHKERRQ(ierr);
930     } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"No support for two norm");
931   }
932   PetscFunctionReturn(0);
933 }
934 
935 #undef __FUNCT__
936 #define __FUNCT__ "MatTranspose_MPIDense"
937 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
938 {
939   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
940   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
941   Mat            B;
942   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
943   PetscErrorCode ierr;
944   PetscInt       j,i;
945   PetscScalar    *v;
946 
947   PetscFunctionBegin;
948   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports square matrix only in-place");
949   if (reuse == MAT_INITIAL_MATRIX || A == *matout) {
950     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
951     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
952     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
953     ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr);
954   } else {
955     B = *matout;
956   }
957 
958   m = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v;
959   ierr = PetscMalloc(m*sizeof(PetscInt),&rwork);CHKERRQ(ierr);
960   for (i=0; i<m; i++) rwork[i] = rstart + i;
961   for (j=0; j<n; j++) {
962     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
963     v   += m;
964   }
965   ierr = PetscFree(rwork);CHKERRQ(ierr);
966   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
967   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
968   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
969     *matout = B;
970   } else {
971     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
972   }
973   PetscFunctionReturn(0);
974 }
975 
976 
977 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
978 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
979 
980 #undef __FUNCT__
981 #define __FUNCT__ "MatSetUp_MPIDense"
982 PetscErrorCode MatSetUp_MPIDense(Mat A)
983 {
984   PetscErrorCode ierr;
985 
986   PetscFunctionBegin;
987   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
988   PetscFunctionReturn(0);
989 }
990 
991 #undef __FUNCT__
992 #define __FUNCT__ "MatAXPY_MPIDense"
993 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
994 {
995   PetscErrorCode ierr;
996   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
997 
998   PetscFunctionBegin;
999   ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr);
1000   PetscFunctionReturn(0);
1001 }
1002 
1003 #undef __FUNCT__
1004 #define __FUNCT__ "MatConjugate_MPIDense"
1005 PetscErrorCode  MatConjugate_MPIDense(Mat mat)
1006 {
1007   Mat_MPIDense   *a = (Mat_MPIDense *)mat->data;
1008   PetscErrorCode ierr;
1009 
1010   PetscFunctionBegin;
1011   ierr = MatConjugate(a->A);CHKERRQ(ierr);
1012   PetscFunctionReturn(0);
1013 }
1014 
1015 #undef __FUNCT__
1016 #define __FUNCT__ "MatRealPart_MPIDense"
1017 PetscErrorCode MatRealPart_MPIDense(Mat A)
1018 {
1019   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1020   PetscErrorCode ierr;
1021 
1022   PetscFunctionBegin;
1023   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1024   PetscFunctionReturn(0);
1025 }
1026 
1027 #undef __FUNCT__
1028 #define __FUNCT__ "MatImaginaryPart_MPIDense"
1029 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1030 {
1031   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1032   PetscErrorCode ierr;
1033 
1034   PetscFunctionBegin;
1035   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1036   PetscFunctionReturn(0);
1037 }
1038 
1039 extern PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*);
1040 #undef __FUNCT__
1041 #define __FUNCT__ "MatGetColumnNorms_MPIDense"
1042 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms)
1043 {
1044   PetscErrorCode ierr;
1045   PetscInt       i,n;
1046   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
1047   PetscReal      *work;
1048 
1049   PetscFunctionBegin;
1050   ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr);
1051   ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr);
1052   ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr);
1053   if (type == NORM_2) {
1054     for (i=0; i<n; i++) work[i] *= work[i];
1055   }
1056   if (type == NORM_INFINITY) {
1057     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
1058   } else {
1059     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
1060   }
1061   ierr = PetscFree(work);CHKERRQ(ierr);
1062   if (type == NORM_2) {
1063     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
1064   }
1065   PetscFunctionReturn(0);
1066 }
1067 
1068 #undef __FUNCT__
1069 #define __FUNCT__ "MatSetRandom_MPIDense"
1070 static PetscErrorCode  MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1071 {
1072   Mat_MPIDense   *d = (Mat_MPIDense*)x->data;
1073   PetscErrorCode ierr;
1074   PetscScalar    *a;
1075   PetscInt       m,n,i;
1076 
1077   PetscFunctionBegin;
1078   ierr = MatGetSize(d->A,&m,&n);CHKERRQ(ierr);
1079   ierr = MatDenseGetArray(d->A,&a);CHKERRQ(ierr);
1080   for (i=0; i<m*n; i++) {
1081     ierr = PetscRandomGetValue(rctx,a+i);CHKERRQ(ierr);
1082   }
1083   ierr = MatDenseRestoreArray(d->A,&a);CHKERRQ(ierr);
1084   PetscFunctionReturn(0);
1085 }
1086 
1087 /* -------------------------------------------------------------------*/
1088 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
1089        MatGetRow_MPIDense,
1090        MatRestoreRow_MPIDense,
1091        MatMult_MPIDense,
1092 /* 4*/ MatMultAdd_MPIDense,
1093        MatMultTranspose_MPIDense,
1094        MatMultTransposeAdd_MPIDense,
1095        0,
1096        0,
1097        0,
1098 /*10*/ 0,
1099        0,
1100        0,
1101        0,
1102        MatTranspose_MPIDense,
1103 /*15*/ MatGetInfo_MPIDense,
1104        MatEqual_MPIDense,
1105        MatGetDiagonal_MPIDense,
1106        MatDiagonalScale_MPIDense,
1107        MatNorm_MPIDense,
1108 /*20*/ MatAssemblyBegin_MPIDense,
1109        MatAssemblyEnd_MPIDense,
1110        MatSetOption_MPIDense,
1111        MatZeroEntries_MPIDense,
1112 /*24*/ MatZeroRows_MPIDense,
1113        0,
1114        0,
1115        0,
1116        0,
1117 /*29*/ MatSetUp_MPIDense,
1118        0,
1119        0,
1120        0,
1121        0,
1122 /*34*/ MatDuplicate_MPIDense,
1123        0,
1124        0,
1125        0,
1126        0,
1127 /*39*/ MatAXPY_MPIDense,
1128        MatGetSubMatrices_MPIDense,
1129        0,
1130        MatGetValues_MPIDense,
1131        0,
1132 /*44*/ 0,
1133        MatScale_MPIDense,
1134        0,
1135        0,
1136        0,
1137 /*49*/ MatSetRandom_MPIDense,
1138        0,
1139        0,
1140        0,
1141        0,
1142 /*54*/ 0,
1143        0,
1144        0,
1145        0,
1146        0,
1147 /*59*/ MatGetSubMatrix_MPIDense,
1148        MatDestroy_MPIDense,
1149        MatView_MPIDense,
1150        0,
1151        0,
1152 /*64*/ 0,
1153        0,
1154        0,
1155        0,
1156        0,
1157 /*69*/ 0,
1158        0,
1159        0,
1160        0,
1161        0,
1162 /*74*/ 0,
1163        0,
1164        0,
1165        0,
1166        0,
1167 /*79*/ 0,
1168        0,
1169        0,
1170        0,
1171 /*83*/ MatLoad_MPIDense,
1172        0,
1173        0,
1174        0,
1175        0,
1176        0,
1177 /*89*/
1178        0,
1179        0,
1180        0,
1181        0,
1182        0,
1183 /*94*/ 0,
1184        0,
1185        0,
1186        0,
1187        0,
1188 /*99*/ 0,
1189        0,
1190        0,
1191        MatConjugate_MPIDense,
1192        0,
1193 /*104*/0,
1194        MatRealPart_MPIDense,
1195        MatImaginaryPart_MPIDense,
1196        0,
1197        0,
1198 /*109*/0,
1199        0,
1200        0,
1201        0,
1202        0,
1203 /*114*/0,
1204        0,
1205        0,
1206        0,
1207        0,
1208 /*119*/0,
1209        0,
1210        0,
1211        0,
1212        0,
1213 /*124*/0,
1214        MatGetColumnNorms_MPIDense
1215 };
1216 
1217 EXTERN_C_BEGIN
1218 #undef __FUNCT__
1219 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense"
1220 PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1221 {
1222   Mat_MPIDense   *a;
1223   PetscErrorCode ierr;
1224 
1225   PetscFunctionBegin;
1226   mat->preallocated = PETSC_TRUE;
1227   /* Note:  For now, when data is specified above, this assumes the user correctly
1228    allocates the local dense storage space.  We should add error checking. */
1229 
1230   a    = (Mat_MPIDense*)mat->data;
1231   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
1232   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
1233   a->nvec = mat->cmap->n;
1234 
1235   ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1236   ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr);
1237   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
1238   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1239   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1240   PetscFunctionReturn(0);
1241 }
1242 EXTERN_C_END
1243 
1244 EXTERN_C_BEGIN
1245 #undef __FUNCT__
1246 #define __FUNCT__ "MatCreate_MPIDense"
1247 PetscErrorCode  MatCreate_MPIDense(Mat mat)
1248 {
1249   Mat_MPIDense   *a;
1250   PetscErrorCode ierr;
1251 
1252   PetscFunctionBegin;
1253   ierr              = PetscNewLog(mat,Mat_MPIDense,&a);CHKERRQ(ierr);
1254   mat->data         = (void*)a;
1255   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1256 
1257   mat->insertmode = NOT_SET_VALUES;
1258   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&a->rank);CHKERRQ(ierr);
1259   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&a->size);CHKERRQ(ierr);
1260 
1261   /* build cache for off array entries formed */
1262   a->donotstash = PETSC_FALSE;
1263   ierr = MatStashCreate_Private(((PetscObject)mat)->comm,1,&mat->stash);CHKERRQ(ierr);
1264 
1265   /* stuff used for matrix vector multiply */
1266   a->lvec        = 0;
1267   a->Mvctx       = 0;
1268   a->roworiented = PETSC_TRUE;
1269 
1270   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatDenseGetArray_C","MatDenseGetArray_MPIDense",MatDenseGetArray_MPIDense);CHKERRQ(ierr);
1271   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatDenseRestoreArray_C","MatDenseRestoreArray_MPIDense",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr);
1272 
1273   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
1274                                      "MatGetDiagonalBlock_MPIDense",
1275                                      MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr);
1276   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C",
1277                                      "MatMPIDenseSetPreallocation_MPIDense",
1278                                      MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1279   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",
1280                                      "MatMatMult_MPIAIJ_MPIDense",
1281                                       MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1282   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",
1283                                      "MatMatMultSymbolic_MPIAIJ_MPIDense",
1284                                       MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1285   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",
1286                                      "MatMatMultNumeric_MPIAIJ_MPIDense",
1287                                       MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1288   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1289 
1290   PetscFunctionReturn(0);
1291 }
1292 EXTERN_C_END
1293 
1294 /*MC
1295    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1296 
1297    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1298    and MATMPIDENSE otherwise.
1299 
1300    Options Database Keys:
1301 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1302 
1303   Level: beginner
1304 
1305 
1306 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1307 M*/
1308 
1309 #undef __FUNCT__
1310 #define __FUNCT__ "MatMPIDenseSetPreallocation"
1311 /*@C
1312    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1313 
1314    Not collective
1315 
1316    Input Parameters:
1317 .  A - the matrix
1318 -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
1319    to control all matrix memory allocation.
1320 
1321    Notes:
1322    The dense format is fully compatible with standard Fortran 77
1323    storage by columns.
1324 
1325    The data input variable is intended primarily for Fortran programmers
1326    who wish to allocate their own matrix memory space.  Most users should
1327    set data=PETSC_NULL.
1328 
1329    Level: intermediate
1330 
1331 .keywords: matrix,dense, parallel
1332 
1333 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1334 @*/
1335 PetscErrorCode  MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data)
1336 {
1337   PetscErrorCode ierr;
1338 
1339   PetscFunctionBegin;
1340   ierr = PetscTryMethod(mat,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar *),(mat,data));CHKERRQ(ierr);
1341   PetscFunctionReturn(0);
1342 }
1343 
1344 #undef __FUNCT__
1345 #define __FUNCT__ "MatCreateDense"
1346 /*@C
1347    MatCreateDense - Creates a parallel matrix in dense format.
1348 
1349    Collective on MPI_Comm
1350 
1351    Input Parameters:
1352 +  comm - MPI communicator
1353 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1354 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1355 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1356 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1357 -  data - optional location of matrix data.  Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1358    to control all matrix memory allocation.
1359 
1360    Output Parameter:
1361 .  A - the matrix
1362 
1363    Notes:
1364    The dense format is fully compatible with standard Fortran 77
1365    storage by columns.
1366 
1367    The data input variable is intended primarily for Fortran programmers
1368    who wish to allocate their own matrix memory space.  Most users should
1369    set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users).
1370 
1371    The user MUST specify either the local or global matrix dimensions
1372    (possibly both).
1373 
1374    Level: intermediate
1375 
1376 .keywords: matrix,dense, parallel
1377 
1378 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1379 @*/
1380 PetscErrorCode  MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1381 {
1382   PetscErrorCode ierr;
1383   PetscMPIInt    size;
1384 
1385   PetscFunctionBegin;
1386   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1387   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1388   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1389   if (size > 1) {
1390     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1391     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1392   } else {
1393     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1394     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1395   }
1396   PetscFunctionReturn(0);
1397 }
1398 
1399 #undef __FUNCT__
1400 #define __FUNCT__ "MatDuplicate_MPIDense"
1401 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1402 {
1403   Mat            mat;
1404   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
1405   PetscErrorCode ierr;
1406 
1407   PetscFunctionBegin;
1408   *newmat       = 0;
1409   ierr = MatCreate(((PetscObject)A)->comm,&mat);CHKERRQ(ierr);
1410   ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1411   ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1412   a                 = (Mat_MPIDense*)mat->data;
1413   ierr              = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1414 
1415   mat->factortype   = A->factortype;
1416   mat->assembled    = PETSC_TRUE;
1417   mat->preallocated = PETSC_TRUE;
1418 
1419   a->size           = oldmat->size;
1420   a->rank           = oldmat->rank;
1421   mat->insertmode   = NOT_SET_VALUES;
1422   a->nvec           = oldmat->nvec;
1423   a->donotstash     = oldmat->donotstash;
1424 
1425   ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr);
1426   ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr);
1427 
1428   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1429   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1430   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1431 
1432   *newmat = mat;
1433   PetscFunctionReturn(0);
1434 }
1435 
1436 #undef __FUNCT__
1437 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile"
1438 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat,PetscInt sizesset)
1439 {
1440   PetscErrorCode ierr;
1441   PetscMPIInt    rank,size;
1442   PetscInt       *rowners,i,m,nz,j;
1443   PetscScalar    *array,*vals,*vals_ptr;
1444 
1445   PetscFunctionBegin;
1446   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1447   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1448 
1449   /* determine ownership of all rows */
1450   if (newmat->rmap->n < 0) m          = M/size + ((M % size) > rank);
1451   else m = newmat->rmap->n;
1452   ierr       = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1453   ierr       = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
1454   rowners[0] = 0;
1455   for (i=2; i<=size; i++) {
1456     rowners[i] += rowners[i-1];
1457   }
1458 
1459   if (!sizesset) {
1460     ierr = MatSetSizes(newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1461   }
1462   ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr);
1463   ierr = MatDenseGetArray(newmat,&array);CHKERRQ(ierr);
1464 
1465   if (!rank) {
1466     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1467 
1468     /* read in my part of the matrix numerical values  */
1469     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1470 
1471     /* insert into matrix-by row (this is why cannot directly read into array */
1472     vals_ptr = vals;
1473     for (i=0; i<m; i++) {
1474       for (j=0; j<N; j++) {
1475         array[i + j*m] = *vals_ptr++;
1476       }
1477     }
1478 
1479     /* read in other processors and ship out */
1480     for (i=1; i<size; i++) {
1481       nz   = (rowners[i+1] - rowners[i])*N;
1482       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1483       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr);
1484     }
1485   } else {
1486     /* receive numeric values */
1487     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1488 
1489     /* receive message of values*/
1490     ierr = MPIULong_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr);
1491 
1492     /* insert into matrix-by row (this is why cannot directly read into array */
1493     vals_ptr = vals;
1494     for (i=0; i<m; i++) {
1495       for (j=0; j<N; j++) {
1496         array[i + j*m] = *vals_ptr++;
1497       }
1498     }
1499   }
1500   ierr = MatDenseRestoreArray(newmat,&array);CHKERRQ(ierr);
1501   ierr = PetscFree(rowners);CHKERRQ(ierr);
1502   ierr = PetscFree(vals);CHKERRQ(ierr);
1503   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1504   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1505   PetscFunctionReturn(0);
1506 }
1507 
1508 #undef __FUNCT__
1509 #define __FUNCT__ "MatLoad_MPIDense"
1510 PetscErrorCode MatLoad_MPIDense(Mat newmat,PetscViewer viewer)
1511 {
1512   PetscScalar    *vals,*svals;
1513   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1514   MPI_Status     status;
1515   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz;
1516   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1517   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1518   PetscInt       i,nz,j,rstart,rend,sizesset=1,grows,gcols;
1519   int            fd;
1520   PetscErrorCode ierr;
1521 
1522   PetscFunctionBegin;
1523   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1524   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1525   if (!rank) {
1526     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1527     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1528     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1529   }
1530   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
1531 
1532   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1533   M = header[1]; N = header[2]; nz = header[3];
1534 
1535   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
1536   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
1537   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
1538 
1539   /* If global sizes are set, check if they are consistent with that given in the file */
1540   if (sizesset) {
1541     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
1542   }
1543   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
1544   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
1545 
1546   /*
1547        Handle case where matrix is stored on disk as a dense matrix
1548   */
1549   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1550     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat,sizesset);CHKERRQ(ierr);
1551     PetscFunctionReturn(0);
1552   }
1553 
1554   /* determine ownership of all rows */
1555   if (newmat->rmap->n < 0) m          = PetscMPIIntCast(M/size + ((M % size) > rank));
1556   else m = PetscMPIIntCast(newmat->rmap->n);
1557   ierr       = PetscMalloc((size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr);
1558   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1559   rowners[0] = 0;
1560   for (i=2; i<=size; i++) {
1561     rowners[i] += rowners[i-1];
1562   }
1563   rstart = rowners[rank];
1564   rend   = rowners[rank+1];
1565 
1566   /* distribute row lengths to all processors */
1567   ierr    = PetscMalloc2(rend-rstart,PetscInt,&ourlens,rend-rstart,PetscInt,&offlens);CHKERRQ(ierr);
1568   if (!rank) {
1569     ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
1570     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1571     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
1572     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1573     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1574     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1575   } else {
1576     ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1577   }
1578 
1579   if (!rank) {
1580     /* calculate the number of nonzeros on each processor */
1581     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
1582     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1583     for (i=0; i<size; i++) {
1584       for (j=rowners[i]; j< rowners[i+1]; j++) {
1585         procsnz[i] += rowlengths[j];
1586       }
1587     }
1588     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1589 
1590     /* determine max buffer needed and allocate it */
1591     maxnz = 0;
1592     for (i=0; i<size; i++) {
1593       maxnz = PetscMax(maxnz,procsnz[i]);
1594     }
1595     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1596 
1597     /* read in my part of the matrix column indices  */
1598     nz = procsnz[0];
1599     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1600     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1601 
1602     /* read in every one elses and ship off */
1603     for (i=1; i<size; i++) {
1604       nz   = procsnz[i];
1605       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1606       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1607     }
1608     ierr = PetscFree(cols);CHKERRQ(ierr);
1609   } else {
1610     /* determine buffer space needed for message */
1611     nz = 0;
1612     for (i=0; i<m; i++) {
1613       nz += ourlens[i];
1614     }
1615     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1616 
1617     /* receive message of column indices*/
1618     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1619     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1620     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1621   }
1622 
1623   /* loop over local rows, determining number of off diagonal entries */
1624   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
1625   jj = 0;
1626   for (i=0; i<m; i++) {
1627     for (j=0; j<ourlens[i]; j++) {
1628       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1629       jj++;
1630     }
1631   }
1632 
1633   /* create our matrix */
1634   for (i=0; i<m; i++) {
1635     ourlens[i] -= offlens[i];
1636   }
1637 
1638   if (!sizesset) {
1639     ierr = MatSetSizes(newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1640   }
1641   ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr);
1642   for (i=0; i<m; i++) {
1643     ourlens[i] += offlens[i];
1644   }
1645 
1646   if (!rank) {
1647     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1648 
1649     /* read in my part of the matrix numerical values  */
1650     nz = procsnz[0];
1651     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1652 
1653     /* insert into matrix */
1654     jj      = rstart;
1655     smycols = mycols;
1656     svals   = vals;
1657     for (i=0; i<m; i++) {
1658       ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1659       smycols += ourlens[i];
1660       svals   += ourlens[i];
1661       jj++;
1662     }
1663 
1664     /* read in other processors and ship out */
1665     for (i=1; i<size; i++) {
1666       nz   = procsnz[i];
1667       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1668       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
1669     }
1670     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1671   } else {
1672     /* receive numeric values */
1673     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1674 
1675     /* receive message of values*/
1676     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
1677     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1678     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1679 
1680     /* insert into matrix */
1681     jj      = rstart;
1682     smycols = mycols;
1683     svals   = vals;
1684     for (i=0; i<m; i++) {
1685       ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1686       smycols += ourlens[i];
1687       svals   += ourlens[i];
1688       jj++;
1689     }
1690   }
1691   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
1692   ierr = PetscFree(vals);CHKERRQ(ierr);
1693   ierr = PetscFree(mycols);CHKERRQ(ierr);
1694   ierr = PetscFree(rowners);CHKERRQ(ierr);
1695 
1696   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1697   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1698   PetscFunctionReturn(0);
1699 }
1700 
1701 #undef __FUNCT__
1702 #define __FUNCT__ "MatEqual_MPIDense"
1703 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool  *flag)
1704 {
1705   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
1706   Mat            a,b;
1707   PetscBool      flg;
1708   PetscErrorCode ierr;
1709 
1710   PetscFunctionBegin;
1711   a = matA->A;
1712   b = matB->A;
1713   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
1714   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1715   PetscFunctionReturn(0);
1716 }
1717 
1718