xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 82c86c8f8a181c1295b98e018a5a0771744eccf1)
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4 */
5 
6 
7 #include <../src/mat/impls/dense/mpi/mpidense.h>    /*I   "petscmat.h"  I*/
8 
9 #undef __FUNCT__
10 #define __FUNCT__ "MatDenseGetLocalMatrix"
11 /*@
12 
13       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
14               matrix that represents the operator. For sequential matrices it returns itself.
15 
16     Input Parameter:
17 .      A - the Seq or MPI dense matrix
18 
19     Output Parameter:
20 .      B - the inner matrix
21 
22     Level: intermediate
23 
24 @*/
25 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
26 {
27   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
28   PetscErrorCode ierr;
29   PetscBool      flg;
30 
31   PetscFunctionBegin;
32   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
33   if (flg) *B = mat->A;
34   else *B = A;
35   PetscFunctionReturn(0);
36 }
37 
38 #undef __FUNCT__
39 #define __FUNCT__ "MatGetRow_MPIDense"
40 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
41 {
42   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
43   PetscErrorCode ierr;
44   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
45 
46   PetscFunctionBegin;
47   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
48   lrow = row - rstart;
49   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
50   PetscFunctionReturn(0);
51 }
52 
53 #undef __FUNCT__
54 #define __FUNCT__ "MatRestoreRow_MPIDense"
55 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
56 {
57   PetscErrorCode ierr;
58 
59   PetscFunctionBegin;
60   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
61   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
62   PetscFunctionReturn(0);
63 }
64 
65 EXTERN_C_BEGIN
66 #undef __FUNCT__
67 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense"
68 PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
69 {
70   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
71   PetscErrorCode ierr;
72   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
73   PetscScalar    *array;
74   MPI_Comm       comm;
75   Mat            B;
76 
77   PetscFunctionBegin;
78   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
79 
80   ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr);
81   if (!B) {
82     ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
83     ierr = MatCreate(comm,&B);CHKERRQ(ierr);
84     ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr);
85     ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
86     ierr = MatDenseGetArray(mdn->A,&array);CHKERRQ(ierr);
87     ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr);
88     ierr = MatDenseRestoreArray(mdn->A,&array);CHKERRQ(ierr);
89     ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
90     ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
91     ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr);
92     *a   = B;
93     ierr = MatDestroy(&B);CHKERRQ(ierr);
94   } else *a = B;
95   PetscFunctionReturn(0);
96 }
97 EXTERN_C_END
98 
99 #undef __FUNCT__
100 #define __FUNCT__ "MatSetValues_MPIDense"
101 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
102 {
103   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
104   PetscErrorCode ierr;
105   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
106   PetscBool      roworiented = A->roworiented;
107 
108   PetscFunctionBegin;
109   if (v) PetscValidScalarPointer(v,6);
110   for (i=0; i<m; i++) {
111     if (idxm[i] < 0) continue;
112     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
113     if (idxm[i] >= rstart && idxm[i] < rend) {
114       row = idxm[i] - rstart;
115       if (roworiented) {
116         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
117       } else {
118         for (j=0; j<n; j++) {
119           if (idxn[j] < 0) continue;
120           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
121           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
122         }
123       }
124     } else if (!A->donotstash) {
125       mat->assembled = PETSC_FALSE;
126       if (roworiented) {
127         ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
128       } else {
129         ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
130       }
131     }
132   }
133   PetscFunctionReturn(0);
134 }
135 
136 #undef __FUNCT__
137 #define __FUNCT__ "MatGetValues_MPIDense"
138 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
139 {
140   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
141   PetscErrorCode ierr;
142   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
143 
144   PetscFunctionBegin;
145   for (i=0; i<m; i++) {
146     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
147     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
148     if (idxm[i] >= rstart && idxm[i] < rend) {
149       row = idxm[i] - rstart;
150       for (j=0; j<n; j++) {
151         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
152         if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
153         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
154       }
155     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
156   }
157   PetscFunctionReturn(0);
158 }
159 
160 #undef __FUNCT__
161 #define __FUNCT__ "MatDenseGetArray_MPIDense"
162 PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[])
163 {
164   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
165   PetscErrorCode ierr;
166 
167   PetscFunctionBegin;
168   ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr);
169   PetscFunctionReturn(0);
170 }
171 
172 #undef __FUNCT__
173 #define __FUNCT__ "MatGetSubMatrix_MPIDense"
174 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
175 {
176   Mat_MPIDense   *mat  = (Mat_MPIDense*)A->data,*newmatd;
177   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
178   PetscErrorCode ierr;
179   PetscInt       i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
180   const PetscInt *irow,*icol;
181   PetscScalar    *av,*bv,*v = lmat->v;
182   Mat            newmat;
183   IS             iscol_local;
184 
185   PetscFunctionBegin;
186   ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
187   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
188   ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr);
189   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
190   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
191   ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */
192 
193   /* No parallel redistribution currently supported! Should really check each index set
194      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
195      original matrix! */
196 
197   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
198   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
199 
200   /* Check submatrix call */
201   if (scall == MAT_REUSE_MATRIX) {
202     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
203     /* Really need to test rows and column sizes! */
204     newmat = *B;
205   } else {
206     /* Create and fill new matrix */
207     ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr);
208     ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
209     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
210     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
211   }
212 
213   /* Now extract the data pointers and do the copy, column at a time */
214   newmatd = (Mat_MPIDense*)newmat->data;
215   bv      = ((Mat_SeqDense*)newmatd->A->data)->v;
216 
217   for (i=0; i<Ncols; i++) {
218     av = v + ((Mat_SeqDense*)mat->A->data)->lda*icol[i];
219     for (j=0; j<nrows; j++) {
220       *bv++ = av[irow[j] - rstart];
221     }
222   }
223 
224   /* Assemble the matrices so that the correct flags are set */
225   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
226   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
227 
228   /* Free work space */
229   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
230   ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr);
231   ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
232   *B   = newmat;
233   PetscFunctionReturn(0);
234 }
235 
236 #undef __FUNCT__
237 #define __FUNCT__ "MatDenseRestoreArray_MPIDense"
238 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[])
239 {
240   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
241   PetscErrorCode ierr;
242 
243   PetscFunctionBegin;
244   ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr);
245   PetscFunctionReturn(0);
246 }
247 
248 #undef __FUNCT__
249 #define __FUNCT__ "MatAssemblyBegin_MPIDense"
250 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
251 {
252   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
253   MPI_Comm       comm;
254   PetscErrorCode ierr;
255   PetscInt       nstash,reallocs;
256   InsertMode     addv;
257 
258   PetscFunctionBegin;
259   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
260   /* make sure all processors are either in INSERTMODE or ADDMODE */
261   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,comm);CHKERRQ(ierr);
262   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
263   mat->insertmode = addv; /* in case this processor had no cache */
264 
265   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
266   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
267   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
268   PetscFunctionReturn(0);
269 }
270 
271 #undef __FUNCT__
272 #define __FUNCT__ "MatAssemblyEnd_MPIDense"
273 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
274 {
275   Mat_MPIDense   *mdn=(Mat_MPIDense*)mat->data;
276   PetscErrorCode ierr;
277   PetscInt       i,*row,*col,flg,j,rstart,ncols;
278   PetscMPIInt    n;
279   PetscScalar    *val;
280   InsertMode     addv=mat->insertmode;
281 
282   PetscFunctionBegin;
283   /*  wait on receives */
284   while (1) {
285     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
286     if (!flg) break;
287 
288     for (i=0; i<n;) {
289       /* Now identify the consecutive vals belonging to the same row */
290       for (j=i,rstart=row[j]; j<n; j++) {
291         if (row[j] != rstart) break;
292       }
293       if (j < n) ncols = j-i;
294       else       ncols = n-i;
295       /* Now assemble all these values with a single function call */
296       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
297       i    = j;
298     }
299   }
300   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
301 
302   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
303   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
304 
305   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
306     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
307   }
308   PetscFunctionReturn(0);
309 }
310 
311 #undef __FUNCT__
312 #define __FUNCT__ "MatZeroEntries_MPIDense"
313 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
314 {
315   PetscErrorCode ierr;
316   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
317 
318   PetscFunctionBegin;
319   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
320   PetscFunctionReturn(0);
321 }
322 
323 /* the code does not do the diagonal entries correctly unless the
324    matrix is square and the column and row owerships are identical.
325    This is a BUG. The only way to fix it seems to be to access
326    mdn->A and mdn->B directly and not through the MatZeroRows()
327    routine.
328 */
329 #undef __FUNCT__
330 #define __FUNCT__ "MatZeroRows_MPIDense"
331 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
332 {
333   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
334   PetscErrorCode    ierr;
335   PetscInt          i,*owners = A->rmap->range;
336   PetscInt          *nprocs,j,idx,nsends;
337   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
338   PetscInt          *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source;
339   PetscInt          *lens,*lrows,*values;
340   PetscMPIInt       n,imdex,rank = l->rank,size = l->size;
341   MPI_Comm          comm;
342   MPI_Request       *send_waits,*recv_waits;
343   MPI_Status        recv_status,*send_status;
344   PetscBool         found;
345   const PetscScalar *xx;
346   PetscScalar       *bb;
347 
348   PetscFunctionBegin;
349   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
350   if (A->rmap->N != A->cmap->N) SETERRQ(comm,PETSC_ERR_SUP,"Only handles square matrices");
351   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership");
352   /*  first count number of contributors to each processor */
353   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
354   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
355   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr);  /* see note*/
356   for (i=0; i<N; i++) {
357     idx   = rows[i];
358     found = PETSC_FALSE;
359     for (j=0; j<size; j++) {
360       if (idx >= owners[j] && idx < owners[j+1]) {
361         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
362       }
363     }
364     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
365   }
366   nsends = 0;
367   for (i=0; i<size; i++) nsends += nprocs[2*i+1];
368 
369   /* inform other processors of number of messages and max length*/
370   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
371 
372   /* post receives:   */
373   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
374   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
375   for (i=0; i<nrecvs; i++) {
376     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
377   }
378 
379   /* do sends:
380       1) starts[i] gives the starting index in svalues for stuff going to
381          the ith processor
382   */
383   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
384   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
385   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
386 
387   starts[0] = 0;
388   for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
389   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
390 
391   starts[0] = 0;
392   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
393   count = 0;
394   for (i=0; i<size; i++) {
395     if (nprocs[2*i+1]) {
396       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
397     }
398   }
399   ierr = PetscFree(starts);CHKERRQ(ierr);
400 
401   base = owners[rank];
402 
403   /*  wait on receives */
404   ierr  = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
405   count = nrecvs;
406   slen  = 0;
407   while (count) {
408     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
409     /* unpack receives into our local space */
410     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
411 
412     source[imdex] = recv_status.MPI_SOURCE;
413     lens[imdex]   = n;
414     slen += n;
415     count--;
416   }
417   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
418 
419   /* move the data into the send scatter */
420   ierr  = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
421   count = 0;
422   for (i=0; i<nrecvs; i++) {
423     values = rvalues + i*nmax;
424     for (j=0; j<lens[i]; j++) {
425       lrows[count++] = values[j] - base;
426     }
427   }
428   ierr = PetscFree(rvalues);CHKERRQ(ierr);
429   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
430   ierr = PetscFree(owner);CHKERRQ(ierr);
431   ierr = PetscFree(nprocs);CHKERRQ(ierr);
432 
433   /* fix right hand side if needed */
434   if (x && b) {
435     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
436     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
437     for (i=0; i<slen; i++) {
438       bb[lrows[i]] = diag*xx[lrows[i]];
439     }
440     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
441     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
442   }
443 
444   /* actually zap the local rows */
445   ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
446   if (diag != 0.0) {
447     Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data;
448     PetscInt     m   = ll->lda, i;
449 
450     for (i=0; i<slen; i++) {
451       ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag;
452     }
453   }
454   ierr = PetscFree(lrows);CHKERRQ(ierr);
455 
456   /* wait on sends */
457   if (nsends) {
458     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
459     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
460     ierr = PetscFree(send_status);CHKERRQ(ierr);
461   }
462   ierr = PetscFree(send_waits);CHKERRQ(ierr);
463   ierr = PetscFree(svalues);CHKERRQ(ierr);
464   PetscFunctionReturn(0);
465 }
466 
467 #undef __FUNCT__
468 #define __FUNCT__ "MatMult_MPIDense"
469 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
470 {
471   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
472   PetscErrorCode ierr;
473 
474   PetscFunctionBegin;
475   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
476   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
477   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
478   PetscFunctionReturn(0);
479 }
480 
481 #undef __FUNCT__
482 #define __FUNCT__ "MatMultAdd_MPIDense"
483 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
484 {
485   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
486   PetscErrorCode ierr;
487 
488   PetscFunctionBegin;
489   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
490   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
491   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
492   PetscFunctionReturn(0);
493 }
494 
495 #undef __FUNCT__
496 #define __FUNCT__ "MatMultTranspose_MPIDense"
497 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
498 {
499   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
500   PetscErrorCode ierr;
501   PetscScalar    zero = 0.0;
502 
503   PetscFunctionBegin;
504   ierr = VecSet(yy,zero);CHKERRQ(ierr);
505   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
506   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
507   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
508   PetscFunctionReturn(0);
509 }
510 
511 #undef __FUNCT__
512 #define __FUNCT__ "MatMultTransposeAdd_MPIDense"
513 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
514 {
515   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
516   PetscErrorCode ierr;
517 
518   PetscFunctionBegin;
519   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
520   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
521   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
522   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
523   PetscFunctionReturn(0);
524 }
525 
526 #undef __FUNCT__
527 #define __FUNCT__ "MatGetDiagonal_MPIDense"
528 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
529 {
530   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
531   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
532   PetscErrorCode ierr;
533   PetscInt       len,i,n,m = A->rmap->n,radd;
534   PetscScalar    *x,zero = 0.0;
535 
536   PetscFunctionBegin;
537   ierr = VecSet(v,zero);CHKERRQ(ierr);
538   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
539   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
540   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
541   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
542   radd = A->rmap->rstart*m;
543   for (i=0; i<len; i++) {
544     x[i] = aloc->v[radd + i*m + i];
545   }
546   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
547   PetscFunctionReturn(0);
548 }
549 
550 #undef __FUNCT__
551 #define __FUNCT__ "MatDestroy_MPIDense"
552 PetscErrorCode MatDestroy_MPIDense(Mat mat)
553 {
554   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
555   PetscErrorCode ierr;
556 
557   PetscFunctionBegin;
558 #if defined(PETSC_USE_LOG)
559   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
560 #endif
561   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
562   ierr = MatDestroy(&mdn->A);CHKERRQ(ierr);
563   ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr);
564   ierr = VecScatterDestroy(&mdn->Mvctx);CHKERRQ(ierr);
565 
566   ierr = PetscFree(mat->data);CHKERRQ(ierr);
567   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
568   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",NULL);CHKERRQ(ierr);
569   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",NULL);CHKERRQ(ierr);
570   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",NULL);CHKERRQ(ierr);
571   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","",NULL);CHKERRQ(ierr);
572   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","",NULL);CHKERRQ(ierr);
573   PetscFunctionReturn(0);
574 }
575 
576 #undef __FUNCT__
577 #define __FUNCT__ "MatView_MPIDense_Binary"
578 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
579 {
580   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
581   PetscErrorCode    ierr;
582   PetscViewerFormat format;
583   int               fd;
584   PetscInt          header[4],mmax,N = mat->cmap->N,i,j,m,k;
585   PetscMPIInt       rank,tag  = ((PetscObject)viewer)->tag,size;
586   PetscScalar       *work,*v,*vv;
587   Mat_SeqDense      *a = (Mat_SeqDense*)mdn->A->data;
588 
589   PetscFunctionBegin;
590   if (mdn->size == 1) {
591     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
592   } else {
593     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
594     ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
595     ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
596 
597     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
598     if (format == PETSC_VIEWER_NATIVE) {
599 
600       if (!rank) {
601         /* store the matrix as a dense matrix */
602         header[0] = MAT_FILE_CLASSID;
603         header[1] = mat->rmap->N;
604         header[2] = N;
605         header[3] = MATRIX_BINARY_FORMAT_DENSE;
606         ierr      = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
607 
608         /* get largest work array needed for transposing array */
609         mmax = mat->rmap->n;
610         for (i=1; i<size; i++) {
611           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
612         }
613         ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&work);CHKERRQ(ierr);
614 
615         /* write out local array, by rows */
616         m = mat->rmap->n;
617         v = a->v;
618         for (j=0; j<N; j++) {
619           for (i=0; i<m; i++) {
620             work[j + i*N] = *v++;
621           }
622         }
623         ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
624         /* get largest work array to receive messages from other processes, excludes process zero */
625         mmax = 0;
626         for (i=1; i<size; i++) {
627           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
628         }
629         ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&vv);CHKERRQ(ierr);
630         for (k = 1; k < size; k++) {
631           v    = vv;
632           m    = mat->rmap->range[k+1] - mat->rmap->range[k];
633           ierr = MPIULong_Recv(v,m*N,MPIU_SCALAR,k,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
634 
635           for (j = 0; j < N; j++) {
636             for (i = 0; i < m; i++) {
637               work[j + i*N] = *v++;
638             }
639           }
640           ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
641         }
642         ierr = PetscFree(work);CHKERRQ(ierr);
643         ierr = PetscFree(vv);CHKERRQ(ierr);
644       } else {
645         ierr = MPIULong_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
646       }
647     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerSetFormat(viewer,PETSC_VIEWER_NATIVE)");
648   }
649   PetscFunctionReturn(0);
650 }
651 
652 #include <petscdraw.h>
653 #undef __FUNCT__
654 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket"
655 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
656 {
657   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
658   PetscErrorCode    ierr;
659   PetscMPIInt       size = mdn->size,rank = mdn->rank;
660   PetscViewerType   vtype;
661   PetscBool         iascii,isdraw;
662   PetscViewer       sviewer;
663   PetscViewerFormat format;
664 
665   PetscFunctionBegin;
666   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
667   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
668   if (iascii) {
669     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
670     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
671     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
672       MatInfo info;
673       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
674       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
675       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
676       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
677       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
678       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
679       PetscFunctionReturn(0);
680     } else if (format == PETSC_VIEWER_ASCII_INFO) {
681       PetscFunctionReturn(0);
682     }
683   } else if (isdraw) {
684     PetscDraw draw;
685     PetscBool isnull;
686 
687     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
688     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
689     if (isnull) PetscFunctionReturn(0);
690   }
691 
692   if (size == 1) {
693     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
694   } else {
695     /* assemble the entire matrix onto first processor. */
696     Mat         A;
697     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
698     PetscInt    *cols;
699     PetscScalar *vals;
700 
701     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
702     if (!rank) {
703       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
704     } else {
705       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
706     }
707     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
708     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
709     ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr);
710     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
711 
712     /* Copy the matrix ... This isn't the most efficient means,
713        but it's quick for now */
714     A->insertmode = INSERT_VALUES;
715 
716     row = mat->rmap->rstart;
717     m   = mdn->A->rmap->n;
718     for (i=0; i<m; i++) {
719       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
720       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
721       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
722       row++;
723     }
724 
725     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
726     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
727     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
728     if (!rank) {
729       ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
730       /* Set the type name to MATMPIDense so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqDense_ASCII()*/
731       PetscStrcpy(((PetscObject)((Mat_MPIDense*)(A->data))->A)->type_name,MATMPIDENSE);
732       ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
733     }
734     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
735     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
736     ierr = MatDestroy(&A);CHKERRQ(ierr);
737   }
738   PetscFunctionReturn(0);
739 }
740 
741 #undef __FUNCT__
742 #define __FUNCT__ "MatView_MPIDense"
743 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
744 {
745   PetscErrorCode ierr;
746   PetscBool      iascii,isbinary,isdraw,issocket;
747 
748   PetscFunctionBegin;
749   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
750   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
751   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
752   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
753 
754   if (iascii || issocket || isdraw) {
755     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
756   } else if (isbinary) {
757     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
758   }
759   PetscFunctionReturn(0);
760 }
761 
762 #undef __FUNCT__
763 #define __FUNCT__ "MatGetInfo_MPIDense"
764 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
765 {
766   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
767   Mat            mdn  = mat->A;
768   PetscErrorCode ierr;
769   PetscReal      isend[5],irecv[5];
770 
771   PetscFunctionBegin;
772   info->block_size = 1.0;
773 
774   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
775 
776   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
777   isend[3] = info->memory;  isend[4] = info->mallocs;
778   if (flag == MAT_LOCAL) {
779     info->nz_used      = isend[0];
780     info->nz_allocated = isend[1];
781     info->nz_unneeded  = isend[2];
782     info->memory       = isend[3];
783     info->mallocs      = isend[4];
784   } else if (flag == MAT_GLOBAL_MAX) {
785     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
786 
787     info->nz_used      = irecv[0];
788     info->nz_allocated = irecv[1];
789     info->nz_unneeded  = irecv[2];
790     info->memory       = irecv[3];
791     info->mallocs      = irecv[4];
792   } else if (flag == MAT_GLOBAL_SUM) {
793     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
794 
795     info->nz_used      = irecv[0];
796     info->nz_allocated = irecv[1];
797     info->nz_unneeded  = irecv[2];
798     info->memory       = irecv[3];
799     info->mallocs      = irecv[4];
800   }
801   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
802   info->fill_ratio_needed = 0;
803   info->factor_mallocs    = 0;
804   PetscFunctionReturn(0);
805 }
806 
807 #undef __FUNCT__
808 #define __FUNCT__ "MatSetOption_MPIDense"
809 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
810 {
811   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
812   PetscErrorCode ierr;
813 
814   PetscFunctionBegin;
815   switch (op) {
816   case MAT_NEW_NONZERO_LOCATIONS:
817   case MAT_NEW_NONZERO_LOCATION_ERR:
818   case MAT_NEW_NONZERO_ALLOCATION_ERR:
819     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
820     break;
821   case MAT_ROW_ORIENTED:
822     a->roworiented = flg;
823 
824     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
825     break;
826   case MAT_NEW_DIAGONALS:
827   case MAT_KEEP_NONZERO_PATTERN:
828   case MAT_USE_HASH_TABLE:
829     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
830     break;
831   case MAT_IGNORE_OFF_PROC_ENTRIES:
832     a->donotstash = flg;
833     break;
834   case MAT_SYMMETRIC:
835   case MAT_STRUCTURALLY_SYMMETRIC:
836   case MAT_HERMITIAN:
837   case MAT_SYMMETRY_ETERNAL:
838   case MAT_IGNORE_LOWER_TRIANGULAR:
839     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
840     break;
841   default:
842     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
843   }
844   PetscFunctionReturn(0);
845 }
846 
847 
848 #undef __FUNCT__
849 #define __FUNCT__ "MatDiagonalScale_MPIDense"
850 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
851 {
852   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
853   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
854   PetscScalar    *l,*r,x,*v;
855   PetscErrorCode ierr;
856   PetscInt       i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n;
857 
858   PetscFunctionBegin;
859   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
860   if (ll) {
861     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
862     if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
863     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
864     for (i=0; i<m; i++) {
865       x = l[i];
866       v = mat->v + i;
867       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
868     }
869     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
870     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
871   }
872   if (rr) {
873     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
874     if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
875     ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
876     ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
877     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
878     for (i=0; i<n; i++) {
879       x = r[i];
880       v = mat->v + i*m;
881       for (j=0; j<m; j++) (*v++) *= x;
882     }
883     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
884     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
885   }
886   PetscFunctionReturn(0);
887 }
888 
889 #undef __FUNCT__
890 #define __FUNCT__ "MatNorm_MPIDense"
891 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
892 {
893   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
894   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
895   PetscErrorCode ierr;
896   PetscInt       i,j;
897   PetscReal      sum = 0.0;
898   PetscScalar    *v  = mat->v;
899 
900   PetscFunctionBegin;
901   if (mdn->size == 1) {
902     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
903   } else {
904     if (type == NORM_FROBENIUS) {
905       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
906         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
907       }
908       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
909       *nrm = PetscSqrtReal(*nrm);
910       ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr);
911     } else if (type == NORM_1) {
912       PetscReal *tmp,*tmp2;
913       ierr = PetscMalloc2(A->cmap->N,PetscReal,&tmp,A->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr);
914       ierr = PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
915       ierr = PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
916       *nrm = 0.0;
917       v    = mat->v;
918       for (j=0; j<mdn->A->cmap->n; j++) {
919         for (i=0; i<mdn->A->rmap->n; i++) {
920           tmp[j] += PetscAbsScalar(*v);  v++;
921         }
922       }
923       ierr = MPI_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
924       for (j=0; j<A->cmap->N; j++) {
925         if (tmp2[j] > *nrm) *nrm = tmp2[j];
926       }
927       ierr = PetscFree2(tmp,tmp);CHKERRQ(ierr);
928       ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr);
929     } else if (type == NORM_INFINITY) { /* max row norm */
930       PetscReal ntemp;
931       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
932       ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
933     } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
934   }
935   PetscFunctionReturn(0);
936 }
937 
938 #undef __FUNCT__
939 #define __FUNCT__ "MatTranspose_MPIDense"
940 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
941 {
942   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
943   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
944   Mat            B;
945   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
946   PetscErrorCode ierr;
947   PetscInt       j,i;
948   PetscScalar    *v;
949 
950   PetscFunctionBegin;
951   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports square matrix only in-place");
952   if (reuse == MAT_INITIAL_MATRIX || A == *matout) {
953     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
954     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
955     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
956     ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr);
957   } else {
958     B = *matout;
959   }
960 
961   m    = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v;
962   ierr = PetscMalloc(m*sizeof(PetscInt),&rwork);CHKERRQ(ierr);
963   for (i=0; i<m; i++) rwork[i] = rstart + i;
964   for (j=0; j<n; j++) {
965     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
966     v   += m;
967   }
968   ierr = PetscFree(rwork);CHKERRQ(ierr);
969   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
970   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
971   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
972     *matout = B;
973   } else {
974     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
975   }
976   PetscFunctionReturn(0);
977 }
978 
979 
980 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
981 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
982 
983 #undef __FUNCT__
984 #define __FUNCT__ "MatSetUp_MPIDense"
985 PetscErrorCode MatSetUp_MPIDense(Mat A)
986 {
987   PetscErrorCode ierr;
988 
989   PetscFunctionBegin;
990   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
991   PetscFunctionReturn(0);
992 }
993 
994 #undef __FUNCT__
995 #define __FUNCT__ "MatAXPY_MPIDense"
996 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
997 {
998   PetscErrorCode ierr;
999   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
1000 
1001   PetscFunctionBegin;
1002   ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr);
1003   PetscFunctionReturn(0);
1004 }
1005 
1006 #undef __FUNCT__
1007 #define __FUNCT__ "MatConjugate_MPIDense"
1008 PetscErrorCode  MatConjugate_MPIDense(Mat mat)
1009 {
1010   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
1011   PetscErrorCode ierr;
1012 
1013   PetscFunctionBegin;
1014   ierr = MatConjugate(a->A);CHKERRQ(ierr);
1015   PetscFunctionReturn(0);
1016 }
1017 
1018 #undef __FUNCT__
1019 #define __FUNCT__ "MatRealPart_MPIDense"
1020 PetscErrorCode MatRealPart_MPIDense(Mat A)
1021 {
1022   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1023   PetscErrorCode ierr;
1024 
1025   PetscFunctionBegin;
1026   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1027   PetscFunctionReturn(0);
1028 }
1029 
1030 #undef __FUNCT__
1031 #define __FUNCT__ "MatImaginaryPart_MPIDense"
1032 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1033 {
1034   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1035   PetscErrorCode ierr;
1036 
1037   PetscFunctionBegin;
1038   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1039   PetscFunctionReturn(0);
1040 }
1041 
1042 extern PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*);
1043 #undef __FUNCT__
1044 #define __FUNCT__ "MatGetColumnNorms_MPIDense"
1045 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms)
1046 {
1047   PetscErrorCode ierr;
1048   PetscInt       i,n;
1049   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
1050   PetscReal      *work;
1051 
1052   PetscFunctionBegin;
1053   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
1054   ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr);
1055   ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr);
1056   if (type == NORM_2) {
1057     for (i=0; i<n; i++) work[i] *= work[i];
1058   }
1059   if (type == NORM_INFINITY) {
1060     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
1061   } else {
1062     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
1063   }
1064   ierr = PetscFree(work);CHKERRQ(ierr);
1065   if (type == NORM_2) {
1066     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
1067   }
1068   PetscFunctionReturn(0);
1069 }
1070 
1071 #undef __FUNCT__
1072 #define __FUNCT__ "MatSetRandom_MPIDense"
1073 static PetscErrorCode  MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1074 {
1075   Mat_MPIDense   *d = (Mat_MPIDense*)x->data;
1076   PetscErrorCode ierr;
1077   PetscScalar    *a;
1078   PetscInt       m,n,i;
1079 
1080   PetscFunctionBegin;
1081   ierr = MatGetSize(d->A,&m,&n);CHKERRQ(ierr);
1082   ierr = MatDenseGetArray(d->A,&a);CHKERRQ(ierr);
1083   for (i=0; i<m*n; i++) {
1084     ierr = PetscRandomGetValue(rctx,a+i);CHKERRQ(ierr);
1085   }
1086   ierr = MatDenseRestoreArray(d->A,&a);CHKERRQ(ierr);
1087   PetscFunctionReturn(0);
1088 }
1089 
1090 /* -------------------------------------------------------------------*/
1091 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1092                                         MatGetRow_MPIDense,
1093                                         MatRestoreRow_MPIDense,
1094                                         MatMult_MPIDense,
1095                                 /*  4*/ MatMultAdd_MPIDense,
1096                                         MatMultTranspose_MPIDense,
1097                                         MatMultTransposeAdd_MPIDense,
1098                                         0,
1099                                         0,
1100                                         0,
1101                                 /* 10*/ 0,
1102                                         0,
1103                                         0,
1104                                         0,
1105                                         MatTranspose_MPIDense,
1106                                 /* 15*/ MatGetInfo_MPIDense,
1107                                         MatEqual_MPIDense,
1108                                         MatGetDiagonal_MPIDense,
1109                                         MatDiagonalScale_MPIDense,
1110                                         MatNorm_MPIDense,
1111                                 /* 20*/ MatAssemblyBegin_MPIDense,
1112                                         MatAssemblyEnd_MPIDense,
1113                                         MatSetOption_MPIDense,
1114                                         MatZeroEntries_MPIDense,
1115                                 /* 24*/ MatZeroRows_MPIDense,
1116                                         0,
1117                                         0,
1118                                         0,
1119                                         0,
1120                                 /* 29*/ MatSetUp_MPIDense,
1121                                         0,
1122                                         0,
1123                                         0,
1124                                         0,
1125                                 /* 34*/ MatDuplicate_MPIDense,
1126                                         0,
1127                                         0,
1128                                         0,
1129                                         0,
1130                                 /* 39*/ MatAXPY_MPIDense,
1131                                         MatGetSubMatrices_MPIDense,
1132                                         0,
1133                                         MatGetValues_MPIDense,
1134                                         0,
1135                                 /* 44*/ 0,
1136                                         MatScale_MPIDense,
1137                                         0,
1138                                         0,
1139                                         0,
1140                                 /* 49*/ MatSetRandom_MPIDense,
1141                                         0,
1142                                         0,
1143                                         0,
1144                                         0,
1145                                 /* 54*/ 0,
1146                                         0,
1147                                         0,
1148                                         0,
1149                                         0,
1150                                 /* 59*/ MatGetSubMatrix_MPIDense,
1151                                         MatDestroy_MPIDense,
1152                                         MatView_MPIDense,
1153                                         0,
1154                                         0,
1155                                 /* 64*/ 0,
1156                                         0,
1157                                         0,
1158                                         0,
1159                                         0,
1160                                 /* 69*/ 0,
1161                                         0,
1162                                         0,
1163                                         0,
1164                                         0,
1165                                 /* 74*/ 0,
1166                                         0,
1167                                         0,
1168                                         0,
1169                                         0,
1170                                 /* 79*/ 0,
1171                                         0,
1172                                         0,
1173                                         0,
1174                                 /* 83*/ MatLoad_MPIDense,
1175                                         0,
1176                                         0,
1177                                         0,
1178                                         0,
1179                                         0,
1180                                 /* 89*/
1181                                         0,
1182                                         0,
1183                                         0,
1184                                         0,
1185                                         0,
1186                                 /* 94*/ 0,
1187                                         0,
1188                                         0,
1189                                         0,
1190                                         0,
1191                                 /* 99*/ 0,
1192                                         0,
1193                                         0,
1194                                         MatConjugate_MPIDense,
1195                                         0,
1196                                 /*104*/ 0,
1197                                         MatRealPart_MPIDense,
1198                                         MatImaginaryPart_MPIDense,
1199                                         0,
1200                                         0,
1201                                 /*109*/ 0,
1202                                         0,
1203                                         0,
1204                                         0,
1205                                         0,
1206                                 /*114*/ 0,
1207                                         0,
1208                                         0,
1209                                         0,
1210                                         0,
1211                                 /*119*/ 0,
1212                                         0,
1213                                         0,
1214                                         0,
1215                                         0,
1216                                 /*124*/ 0,
1217                                         MatGetColumnNorms_MPIDense
1218 };
1219 
1220 EXTERN_C_BEGIN
1221 #undef __FUNCT__
1222 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense"
1223 PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1224 {
1225   Mat_MPIDense   *a;
1226   PetscErrorCode ierr;
1227 
1228   PetscFunctionBegin;
1229   mat->preallocated = PETSC_TRUE;
1230   /* Note:  For now, when data is specified above, this assumes the user correctly
1231    allocates the local dense storage space.  We should add error checking. */
1232 
1233   a       = (Mat_MPIDense*)mat->data;
1234   ierr    = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
1235   ierr    = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
1236   a->nvec = mat->cmap->n;
1237 
1238   ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1239   ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr);
1240   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
1241   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1242   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1243   PetscFunctionReturn(0);
1244 }
1245 EXTERN_C_END
1246 
1247 EXTERN_C_BEGIN
1248 #undef __FUNCT__
1249 #define __FUNCT__ "MatCreate_MPIDense"
1250 PetscErrorCode  MatCreate_MPIDense(Mat mat)
1251 {
1252   Mat_MPIDense   *a;
1253   PetscErrorCode ierr;
1254 
1255   PetscFunctionBegin;
1256   ierr      = PetscNewLog(mat,Mat_MPIDense,&a);CHKERRQ(ierr);
1257   mat->data = (void*)a;
1258   ierr      = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1259 
1260   mat->insertmode = NOT_SET_VALUES;
1261   ierr            = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);CHKERRQ(ierr);
1262   ierr            = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);CHKERRQ(ierr);
1263 
1264   /* build cache for off array entries formed */
1265   a->donotstash = PETSC_FALSE;
1266 
1267   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr);
1268 
1269   /* stuff used for matrix vector multiply */
1270   a->lvec        = 0;
1271   a->Mvctx       = 0;
1272   a->roworiented = PETSC_TRUE;
1273 
1274   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatDenseGetArray_C","MatDenseGetArray_MPIDense",MatDenseGetArray_MPIDense);CHKERRQ(ierr);
1275   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatDenseRestoreArray_C","MatDenseRestoreArray_MPIDense",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr);
1276 
1277   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
1278                                            "MatGetDiagonalBlock_MPIDense",
1279                                            MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr);
1280   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C",
1281                                            "MatMPIDenseSetPreallocation_MPIDense",
1282                                            MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1283   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",
1284                                            "MatMatMult_MPIAIJ_MPIDense",
1285                                            MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1286   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",
1287                                            "MatMatMultSymbolic_MPIAIJ_MPIDense",
1288                                            MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1289   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",
1290                                            "MatMatMultNumeric_MPIAIJ_MPIDense",
1291                                            MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1292   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1293   PetscFunctionReturn(0);
1294 }
1295 EXTERN_C_END
1296 
1297 /*MC
1298    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1299 
1300    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1301    and MATMPIDENSE otherwise.
1302 
1303    Options Database Keys:
1304 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1305 
1306   Level: beginner
1307 
1308 
1309 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1310 M*/
1311 
1312 #undef __FUNCT__
1313 #define __FUNCT__ "MatMPIDenseSetPreallocation"
1314 /*@C
1315    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1316 
1317    Not collective
1318 
1319    Input Parameters:
1320 .  A - the matrix
1321 -  data - optional location of matrix data.  Set data=NULL for PETSc
1322    to control all matrix memory allocation.
1323 
1324    Notes:
1325    The dense format is fully compatible with standard Fortran 77
1326    storage by columns.
1327 
1328    The data input variable is intended primarily for Fortran programmers
1329    who wish to allocate their own matrix memory space.  Most users should
1330    set data=NULL.
1331 
1332    Level: intermediate
1333 
1334 .keywords: matrix,dense, parallel
1335 
1336 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1337 @*/
1338 PetscErrorCode  MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data)
1339 {
1340   PetscErrorCode ierr;
1341 
1342   PetscFunctionBegin;
1343   ierr = PetscTryMethod(mat,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(mat,data));CHKERRQ(ierr);
1344   PetscFunctionReturn(0);
1345 }
1346 
1347 #undef __FUNCT__
1348 #define __FUNCT__ "MatCreateDense"
1349 /*@C
1350    MatCreateDense - Creates a parallel matrix in dense format.
1351 
1352    Collective on MPI_Comm
1353 
1354    Input Parameters:
1355 +  comm - MPI communicator
1356 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1357 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1358 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1359 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1360 -  data - optional location of matrix data.  Set data=NULL (NULL_SCALAR for Fortran users) for PETSc
1361    to control all matrix memory allocation.
1362 
1363    Output Parameter:
1364 .  A - the matrix
1365 
1366    Notes:
1367    The dense format is fully compatible with standard Fortran 77
1368    storage by columns.
1369 
1370    The data input variable is intended primarily for Fortran programmers
1371    who wish to allocate their own matrix memory space.  Most users should
1372    set data=NULL (NULL_SCALAR for Fortran users).
1373 
1374    The user MUST specify either the local or global matrix dimensions
1375    (possibly both).
1376 
1377    Level: intermediate
1378 
1379 .keywords: matrix,dense, parallel
1380 
1381 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1382 @*/
1383 PetscErrorCode  MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1384 {
1385   PetscErrorCode ierr;
1386   PetscMPIInt    size;
1387 
1388   PetscFunctionBegin;
1389   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1390   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1391   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1392   if (size > 1) {
1393     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1394     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1395   } else {
1396     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1397     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1398   }
1399   PetscFunctionReturn(0);
1400 }
1401 
1402 #undef __FUNCT__
1403 #define __FUNCT__ "MatDuplicate_MPIDense"
1404 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1405 {
1406   Mat            mat;
1407   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
1408   PetscErrorCode ierr;
1409 
1410   PetscFunctionBegin;
1411   *newmat = 0;
1412   ierr    = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr);
1413   ierr    = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1414   ierr    = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1415   a       = (Mat_MPIDense*)mat->data;
1416   ierr    = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1417 
1418   mat->factortype   = A->factortype;
1419   mat->assembled    = PETSC_TRUE;
1420   mat->preallocated = PETSC_TRUE;
1421 
1422   a->size         = oldmat->size;
1423   a->rank         = oldmat->rank;
1424   mat->insertmode = NOT_SET_VALUES;
1425   a->nvec         = oldmat->nvec;
1426   a->donotstash   = oldmat->donotstash;
1427 
1428   ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr);
1429   ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr);
1430 
1431   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1432   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1433   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1434 
1435   *newmat = mat;
1436   PetscFunctionReturn(0);
1437 }
1438 
1439 #undef __FUNCT__
1440 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile"
1441 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat,PetscInt sizesset)
1442 {
1443   PetscErrorCode ierr;
1444   PetscMPIInt    rank,size;
1445   PetscInt       *rowners,i,m,nz,j;
1446   PetscScalar    *array,*vals,*vals_ptr;
1447 
1448   PetscFunctionBegin;
1449   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1450   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1451 
1452   /* determine ownership of all rows */
1453   if (newmat->rmap->n < 0) m = M/size + ((M % size) > rank);
1454   else m = newmat->rmap->n;
1455   ierr       = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1456   ierr       = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
1457   rowners[0] = 0;
1458   for (i=2; i<=size; i++) {
1459     rowners[i] += rowners[i-1];
1460   }
1461 
1462   if (!sizesset) {
1463     ierr = MatSetSizes(newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1464   }
1465   ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
1466   ierr = MatDenseGetArray(newmat,&array);CHKERRQ(ierr);
1467 
1468   if (!rank) {
1469     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1470 
1471     /* read in my part of the matrix numerical values  */
1472     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1473 
1474     /* insert into matrix-by row (this is why cannot directly read into array */
1475     vals_ptr = vals;
1476     for (i=0; i<m; i++) {
1477       for (j=0; j<N; j++) {
1478         array[i + j*m] = *vals_ptr++;
1479       }
1480     }
1481 
1482     /* read in other processors and ship out */
1483     for (i=1; i<size; i++) {
1484       nz   = (rowners[i+1] - rowners[i])*N;
1485       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1486       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr);
1487     }
1488   } else {
1489     /* receive numeric values */
1490     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1491 
1492     /* receive message of values*/
1493     ierr = MPIULong_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr);
1494 
1495     /* insert into matrix-by row (this is why cannot directly read into array */
1496     vals_ptr = vals;
1497     for (i=0; i<m; i++) {
1498       for (j=0; j<N; j++) {
1499         array[i + j*m] = *vals_ptr++;
1500       }
1501     }
1502   }
1503   ierr = MatDenseRestoreArray(newmat,&array);CHKERRQ(ierr);
1504   ierr = PetscFree(rowners);CHKERRQ(ierr);
1505   ierr = PetscFree(vals);CHKERRQ(ierr);
1506   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1507   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1508   PetscFunctionReturn(0);
1509 }
1510 
1511 #undef __FUNCT__
1512 #define __FUNCT__ "MatLoad_MPIDense"
1513 PetscErrorCode MatLoad_MPIDense(Mat newmat,PetscViewer viewer)
1514 {
1515   PetscScalar    *vals,*svals;
1516   MPI_Comm       comm;
1517   MPI_Status     status;
1518   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz;
1519   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1520   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1521   PetscInt       i,nz,j,rstart,rend,sizesset=1,grows,gcols;
1522   int            fd;
1523   PetscErrorCode ierr;
1524 
1525   PetscFunctionBegin;
1526   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
1527   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1528   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1529   if (!rank) {
1530     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1531     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
1532     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1533   }
1534   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
1535 
1536   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1537   M    = header[1]; N = header[2]; nz = header[3];
1538 
1539   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
1540   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
1541   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
1542 
1543   /* If global sizes are set, check if they are consistent with that given in the file */
1544   if (sizesset) {
1545     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
1546   }
1547   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
1548   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
1549 
1550   /*
1551        Handle case where matrix is stored on disk as a dense matrix
1552   */
1553   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1554     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat,sizesset);CHKERRQ(ierr);
1555     PetscFunctionReturn(0);
1556   }
1557 
1558   /* determine ownership of all rows */
1559   if (newmat->rmap->n < 0) {
1560     ierr = PetscMPIIntCast(M/size + ((M % size) > rank),&m);CHKERRQ(ierr);
1561   } else {
1562     ierr = PetscMPIIntCast(newmat->rmap->n,&m);CHKERRQ(ierr);
1563   }
1564   ierr       = PetscMalloc((size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr);
1565   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1566   rowners[0] = 0;
1567   for (i=2; i<=size; i++) {
1568     rowners[i] += rowners[i-1];
1569   }
1570   rstart = rowners[rank];
1571   rend   = rowners[rank+1];
1572 
1573   /* distribute row lengths to all processors */
1574   ierr = PetscMalloc2(rend-rstart,PetscInt,&ourlens,rend-rstart,PetscInt,&offlens);CHKERRQ(ierr);
1575   if (!rank) {
1576     ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
1577     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1578     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
1579     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1580     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1581     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1582   } else {
1583     ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1584   }
1585 
1586   if (!rank) {
1587     /* calculate the number of nonzeros on each processor */
1588     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
1589     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1590     for (i=0; i<size; i++) {
1591       for (j=rowners[i]; j< rowners[i+1]; j++) {
1592         procsnz[i] += rowlengths[j];
1593       }
1594     }
1595     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1596 
1597     /* determine max buffer needed and allocate it */
1598     maxnz = 0;
1599     for (i=0; i<size; i++) {
1600       maxnz = PetscMax(maxnz,procsnz[i]);
1601     }
1602     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1603 
1604     /* read in my part of the matrix column indices  */
1605     nz   = procsnz[0];
1606     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1607     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1608 
1609     /* read in every one elses and ship off */
1610     for (i=1; i<size; i++) {
1611       nz   = procsnz[i];
1612       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1613       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1614     }
1615     ierr = PetscFree(cols);CHKERRQ(ierr);
1616   } else {
1617     /* determine buffer space needed for message */
1618     nz = 0;
1619     for (i=0; i<m; i++) {
1620       nz += ourlens[i];
1621     }
1622     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1623 
1624     /* receive message of column indices*/
1625     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1626     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1627     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1628   }
1629 
1630   /* loop over local rows, determining number of off diagonal entries */
1631   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
1632   jj   = 0;
1633   for (i=0; i<m; i++) {
1634     for (j=0; j<ourlens[i]; j++) {
1635       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1636       jj++;
1637     }
1638   }
1639 
1640   /* create our matrix */
1641   for (i=0; i<m; i++) ourlens[i] -= offlens[i];
1642 
1643   if (!sizesset) {
1644     ierr = MatSetSizes(newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1645   }
1646   ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
1647   for (i=0; i<m; i++) ourlens[i] += offlens[i];
1648 
1649   if (!rank) {
1650     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1651 
1652     /* read in my part of the matrix numerical values  */
1653     nz   = procsnz[0];
1654     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1655 
1656     /* insert into matrix */
1657     jj      = rstart;
1658     smycols = mycols;
1659     svals   = vals;
1660     for (i=0; i<m; i++) {
1661       ierr     = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1662       smycols += ourlens[i];
1663       svals   += ourlens[i];
1664       jj++;
1665     }
1666 
1667     /* read in other processors and ship out */
1668     for (i=1; i<size; i++) {
1669       nz   = procsnz[i];
1670       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1671       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
1672     }
1673     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1674   } else {
1675     /* receive numeric values */
1676     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1677 
1678     /* receive message of values*/
1679     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
1680     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1681     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1682 
1683     /* insert into matrix */
1684     jj      = rstart;
1685     smycols = mycols;
1686     svals   = vals;
1687     for (i=0; i<m; i++) {
1688       ierr     = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1689       smycols += ourlens[i];
1690       svals   += ourlens[i];
1691       jj++;
1692     }
1693   }
1694   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
1695   ierr = PetscFree(vals);CHKERRQ(ierr);
1696   ierr = PetscFree(mycols);CHKERRQ(ierr);
1697   ierr = PetscFree(rowners);CHKERRQ(ierr);
1698 
1699   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1700   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1701   PetscFunctionReturn(0);
1702 }
1703 
1704 #undef __FUNCT__
1705 #define __FUNCT__ "MatEqual_MPIDense"
1706 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool  *flag)
1707 {
1708   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
1709   Mat            a,b;
1710   PetscBool      flg;
1711   PetscErrorCode ierr;
1712 
1713   PetscFunctionBegin;
1714   a    = matA->A;
1715   b    = matB->A;
1716   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
1717   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1718   PetscFunctionReturn(0);
1719 }
1720 
1721