xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision ce94432eddcd14845bc7e8083b7f8ea723b9bf7d)
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4 */
5 
6 
7 #include <../src/mat/impls/dense/mpi/mpidense.h>    /*I   "petscmat.h"  I*/
8 
9 #undef __FUNCT__
10 #define __FUNCT__ "MatDenseGetLocalMatrix"
11 /*@
12 
13       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
14               matrix that represents the operator. For sequential matrices it returns itself.
15 
16     Input Parameter:
17 .      A - the Seq or MPI dense matrix
18 
19     Output Parameter:
20 .      B - the inner matrix
21 
22     Level: intermediate
23 
24 @*/
25 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
26 {
27   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
28   PetscErrorCode ierr;
29   PetscBool      flg;
30 
31   PetscFunctionBegin;
32   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
33   if (flg) *B = mat->A;
34   else *B = A;
35   PetscFunctionReturn(0);
36 }
37 
38 #undef __FUNCT__
39 #define __FUNCT__ "MatGetRow_MPIDense"
40 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
41 {
42   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
43   PetscErrorCode ierr;
44   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
45 
46   PetscFunctionBegin;
47   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
48   lrow = row - rstart;
49   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
50   PetscFunctionReturn(0);
51 }
52 
53 #undef __FUNCT__
54 #define __FUNCT__ "MatRestoreRow_MPIDense"
55 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
56 {
57   PetscErrorCode ierr;
58 
59   PetscFunctionBegin;
60   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
61   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
62   PetscFunctionReturn(0);
63 }
64 
65 EXTERN_C_BEGIN
66 #undef __FUNCT__
67 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense"
68 PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
69 {
70   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
71   PetscErrorCode ierr;
72   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
73   PetscScalar    *array;
74   MPI_Comm       comm;
75   Mat            B;
76 
77   PetscFunctionBegin;
78   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
79 
80   ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr);
81   if (!B) {
82     ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
83     ierr = MatCreate(comm,&B);CHKERRQ(ierr);
84     ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr);
85     ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
86     ierr = MatDenseGetArray(mdn->A,&array);CHKERRQ(ierr);
87     ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr);
88     ierr = MatDenseRestoreArray(mdn->A,&array);CHKERRQ(ierr);
89     ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
90     ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
91     ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr);
92     *a   = B;
93     ierr = MatDestroy(&B);CHKERRQ(ierr);
94   } else *a = B;
95   PetscFunctionReturn(0);
96 }
97 EXTERN_C_END
98 
99 #undef __FUNCT__
100 #define __FUNCT__ "MatSetValues_MPIDense"
101 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
102 {
103   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
104   PetscErrorCode ierr;
105   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
106   PetscBool      roworiented = A->roworiented;
107 
108   PetscFunctionBegin;
109   if (v) PetscValidScalarPointer(v,6);
110   for (i=0; i<m; i++) {
111     if (idxm[i] < 0) continue;
112     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
113     if (idxm[i] >= rstart && idxm[i] < rend) {
114       row = idxm[i] - rstart;
115       if (roworiented) {
116         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
117       } else {
118         for (j=0; j<n; j++) {
119           if (idxn[j] < 0) continue;
120           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
121           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
122         }
123       }
124     } else if (!A->donotstash) {
125       mat->assembled = PETSC_FALSE;
126       if (roworiented) {
127         ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
128       } else {
129         ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
130       }
131     }
132   }
133   PetscFunctionReturn(0);
134 }
135 
136 #undef __FUNCT__
137 #define __FUNCT__ "MatGetValues_MPIDense"
138 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
139 {
140   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
141   PetscErrorCode ierr;
142   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
143 
144   PetscFunctionBegin;
145   for (i=0; i<m; i++) {
146     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
147     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
148     if (idxm[i] >= rstart && idxm[i] < rend) {
149       row = idxm[i] - rstart;
150       for (j=0; j<n; j++) {
151         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
152         if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
153         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
154       }
155     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
156   }
157   PetscFunctionReturn(0);
158 }
159 
160 #undef __FUNCT__
161 #define __FUNCT__ "MatDenseGetArray_MPIDense"
162 PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[])
163 {
164   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
165   PetscErrorCode ierr;
166 
167   PetscFunctionBegin;
168   ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr);
169   PetscFunctionReturn(0);
170 }
171 
172 #undef __FUNCT__
173 #define __FUNCT__ "MatGetSubMatrix_MPIDense"
174 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
175 {
176   Mat_MPIDense   *mat  = (Mat_MPIDense*)A->data,*newmatd;
177   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
178   PetscErrorCode ierr;
179   PetscInt       i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
180   const PetscInt *irow,*icol;
181   PetscScalar    *av,*bv,*v = lmat->v;
182   Mat            newmat;
183   IS             iscol_local;
184 
185   PetscFunctionBegin;
186   ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
187   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
188   ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr);
189   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
190   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
191   ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */
192 
193   /* No parallel redistribution currently supported! Should really check each index set
194      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
195      original matrix! */
196 
197   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
198   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
199 
200   /* Check submatrix call */
201   if (scall == MAT_REUSE_MATRIX) {
202     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
203     /* Really need to test rows and column sizes! */
204     newmat = *B;
205   } else {
206     /* Create and fill new matrix */
207     ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr);
208     ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
209     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
210     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
211   }
212 
213   /* Now extract the data pointers and do the copy, column at a time */
214   newmatd = (Mat_MPIDense*)newmat->data;
215   bv      = ((Mat_SeqDense*)newmatd->A->data)->v;
216 
217   for (i=0; i<Ncols; i++) {
218     av = v + ((Mat_SeqDense*)mat->A->data)->lda*icol[i];
219     for (j=0; j<nrows; j++) {
220       *bv++ = av[irow[j] - rstart];
221     }
222   }
223 
224   /* Assemble the matrices so that the correct flags are set */
225   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
226   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
227 
228   /* Free work space */
229   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
230   ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr);
231   ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
232   *B   = newmat;
233   PetscFunctionReturn(0);
234 }
235 
236 #undef __FUNCT__
237 #define __FUNCT__ "MatDenseRestoreArray_MPIDense"
238 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[])
239 {
240   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
241   PetscErrorCode ierr;
242 
243   PetscFunctionBegin;
244   ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr);
245   PetscFunctionReturn(0);
246 }
247 
248 #undef __FUNCT__
249 #define __FUNCT__ "MatAssemblyBegin_MPIDense"
250 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
251 {
252   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
253   MPI_Comm       comm;
254   PetscErrorCode ierr;
255   PetscInt       nstash,reallocs;
256   InsertMode     addv;
257 
258   PetscFunctionBegin;
259   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
260   /* make sure all processors are either in INSERTMODE or ADDMODE */
261   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,comm);CHKERRQ(ierr);
262   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
263   mat->insertmode = addv; /* in case this processor had no cache */
264 
265   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
266   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
267   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
268   PetscFunctionReturn(0);
269 }
270 
271 #undef __FUNCT__
272 #define __FUNCT__ "MatAssemblyEnd_MPIDense"
273 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
274 {
275   Mat_MPIDense   *mdn=(Mat_MPIDense*)mat->data;
276   PetscErrorCode ierr;
277   PetscInt       i,*row,*col,flg,j,rstart,ncols;
278   PetscMPIInt    n;
279   PetscScalar    *val;
280   InsertMode     addv=mat->insertmode;
281 
282   PetscFunctionBegin;
283   /*  wait on receives */
284   while (1) {
285     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
286     if (!flg) break;
287 
288     for (i=0; i<n;) {
289       /* Now identify the consecutive vals belonging to the same row */
290       for (j=i,rstart=row[j]; j<n; j++) {
291         if (row[j] != rstart) break;
292       }
293       if (j < n) ncols = j-i;
294       else       ncols = n-i;
295       /* Now assemble all these values with a single function call */
296       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
297       i    = j;
298     }
299   }
300   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
301 
302   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
303   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
304 
305   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
306     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
307   }
308   PetscFunctionReturn(0);
309 }
310 
311 #undef __FUNCT__
312 #define __FUNCT__ "MatZeroEntries_MPIDense"
313 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
314 {
315   PetscErrorCode ierr;
316   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
317 
318   PetscFunctionBegin;
319   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
320   PetscFunctionReturn(0);
321 }
322 
323 /* the code does not do the diagonal entries correctly unless the
324    matrix is square and the column and row owerships are identical.
325    This is a BUG. The only way to fix it seems to be to access
326    mdn->A and mdn->B directly and not through the MatZeroRows()
327    routine.
328 */
329 #undef __FUNCT__
330 #define __FUNCT__ "MatZeroRows_MPIDense"
331 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
332 {
333   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
334   PetscErrorCode    ierr;
335   PetscInt          i,*owners = A->rmap->range;
336   PetscInt          *nprocs,j,idx,nsends;
337   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
338   PetscInt          *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source;
339   PetscInt          *lens,*lrows,*values;
340   PetscMPIInt       n,imdex,rank = l->rank,size = l->size;
341   MPI_Comm          comm;
342   MPI_Request       *send_waits,*recv_waits;
343   MPI_Status        recv_status,*send_status;
344   PetscBool         found;
345   const PetscScalar *xx;
346   PetscScalar       *bb;
347 
348   PetscFunctionBegin;
349   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
350   if (A->rmap->N != A->cmap->N) SETERRQ(comm,PETSC_ERR_SUP,"Only handles square matrices");
351   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership");
352   /*  first count number of contributors to each processor */
353   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
354   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
355   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr);  /* see note*/
356   for (i=0; i<N; i++) {
357     idx   = rows[i];
358     found = PETSC_FALSE;
359     for (j=0; j<size; j++) {
360       if (idx >= owners[j] && idx < owners[j+1]) {
361         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
362       }
363     }
364     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
365   }
366   nsends = 0;
367   for (i=0; i<size; i++) nsends += nprocs[2*i+1];
368 
369   /* inform other processors of number of messages and max length*/
370   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
371 
372   /* post receives:   */
373   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
374   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
375   for (i=0; i<nrecvs; i++) {
376     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
377   }
378 
379   /* do sends:
380       1) starts[i] gives the starting index in svalues for stuff going to
381          the ith processor
382   */
383   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
384   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
385   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
386 
387   starts[0] = 0;
388   for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
389   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
390 
391   starts[0] = 0;
392   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
393   count = 0;
394   for (i=0; i<size; i++) {
395     if (nprocs[2*i+1]) {
396       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
397     }
398   }
399   ierr = PetscFree(starts);CHKERRQ(ierr);
400 
401   base = owners[rank];
402 
403   /*  wait on receives */
404   ierr  = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
405   count = nrecvs;
406   slen  = 0;
407   while (count) {
408     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
409     /* unpack receives into our local space */
410     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
411 
412     source[imdex] = recv_status.MPI_SOURCE;
413     lens[imdex]   = n;
414     slen += n;
415     count--;
416   }
417   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
418 
419   /* move the data into the send scatter */
420   ierr  = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
421   count = 0;
422   for (i=0; i<nrecvs; i++) {
423     values = rvalues + i*nmax;
424     for (j=0; j<lens[i]; j++) {
425       lrows[count++] = values[j] - base;
426     }
427   }
428   ierr = PetscFree(rvalues);CHKERRQ(ierr);
429   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
430   ierr = PetscFree(owner);CHKERRQ(ierr);
431   ierr = PetscFree(nprocs);CHKERRQ(ierr);
432 
433   /* fix right hand side if needed */
434   if (x && b) {
435     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
436     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
437     for (i=0; i<slen; i++) {
438       bb[lrows[i]] = diag*xx[lrows[i]];
439     }
440     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
441     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
442   }
443 
444   /* actually zap the local rows */
445   ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
446   if (diag != 0.0) {
447     Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data;
448     PetscInt     m   = ll->lda, i;
449 
450     for (i=0; i<slen; i++) {
451       ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag;
452     }
453   }
454   ierr = PetscFree(lrows);CHKERRQ(ierr);
455 
456   /* wait on sends */
457   if (nsends) {
458     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
459     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
460     ierr = PetscFree(send_status);CHKERRQ(ierr);
461   }
462   ierr = PetscFree(send_waits);CHKERRQ(ierr);
463   ierr = PetscFree(svalues);CHKERRQ(ierr);
464   PetscFunctionReturn(0);
465 }
466 
467 #undef __FUNCT__
468 #define __FUNCT__ "MatMult_MPIDense"
469 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
470 {
471   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
472   PetscErrorCode ierr;
473 
474   PetscFunctionBegin;
475   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
476   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
477   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
478   PetscFunctionReturn(0);
479 }
480 
481 #undef __FUNCT__
482 #define __FUNCT__ "MatMultAdd_MPIDense"
483 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
484 {
485   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
486   PetscErrorCode ierr;
487 
488   PetscFunctionBegin;
489   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
490   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
491   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
492   PetscFunctionReturn(0);
493 }
494 
495 #undef __FUNCT__
496 #define __FUNCT__ "MatMultTranspose_MPIDense"
497 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
498 {
499   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
500   PetscErrorCode ierr;
501   PetscScalar    zero = 0.0;
502 
503   PetscFunctionBegin;
504   ierr = VecSet(yy,zero);CHKERRQ(ierr);
505   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
506   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
507   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
508   PetscFunctionReturn(0);
509 }
510 
511 #undef __FUNCT__
512 #define __FUNCT__ "MatMultTransposeAdd_MPIDense"
513 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
514 {
515   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
516   PetscErrorCode ierr;
517 
518   PetscFunctionBegin;
519   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
520   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
521   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
522   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
523   PetscFunctionReturn(0);
524 }
525 
526 #undef __FUNCT__
527 #define __FUNCT__ "MatGetDiagonal_MPIDense"
528 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
529 {
530   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
531   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
532   PetscErrorCode ierr;
533   PetscInt       len,i,n,m = A->rmap->n,radd;
534   PetscScalar    *x,zero = 0.0;
535 
536   PetscFunctionBegin;
537   ierr = VecSet(v,zero);CHKERRQ(ierr);
538   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
539   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
540   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
541   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
542   radd = A->rmap->rstart*m;
543   for (i=0; i<len; i++) {
544     x[i] = aloc->v[radd + i*m + i];
545   }
546   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
547   PetscFunctionReturn(0);
548 }
549 
550 #undef __FUNCT__
551 #define __FUNCT__ "MatDestroy_MPIDense"
552 PetscErrorCode MatDestroy_MPIDense(Mat mat)
553 {
554   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
555   PetscErrorCode ierr;
556 
557   PetscFunctionBegin;
558 #if defined(PETSC_USE_LOG)
559   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
560 #endif
561   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
562   ierr = MatDestroy(&mdn->A);CHKERRQ(ierr);
563   ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr);
564   ierr = VecScatterDestroy(&mdn->Mvctx);CHKERRQ(ierr);
565 
566   ierr = PetscFree(mat->data);CHKERRQ(ierr);
567   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
568   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",NULL);CHKERRQ(ierr);
569   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",NULL);CHKERRQ(ierr);
570   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",NULL);CHKERRQ(ierr);
571   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","",NULL);CHKERRQ(ierr);
572   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","",NULL);CHKERRQ(ierr);
573   PetscFunctionReturn(0);
574 }
575 
576 #undef __FUNCT__
577 #define __FUNCT__ "MatView_MPIDense_Binary"
578 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
579 {
580   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
581   PetscErrorCode    ierr;
582   PetscViewerFormat format;
583   int               fd;
584   PetscInt          header[4],mmax,N = mat->cmap->N,i,j,m,k;
585   PetscMPIInt       rank,tag  = ((PetscObject)viewer)->tag,size;
586   PetscScalar       *work,*v,*vv;
587   Mat_SeqDense      *a = (Mat_SeqDense*)mdn->A->data;
588 
589   PetscFunctionBegin;
590   if (mdn->size == 1) {
591     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
592   } else {
593     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
594     ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
595     ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
596 
597     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
598     if (format == PETSC_VIEWER_NATIVE) {
599 
600       if (!rank) {
601         /* store the matrix as a dense matrix */
602         header[0] = MAT_FILE_CLASSID;
603         header[1] = mat->rmap->N;
604         header[2] = N;
605         header[3] = MATRIX_BINARY_FORMAT_DENSE;
606         ierr      = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
607 
608         /* get largest work array needed for transposing array */
609         mmax = mat->rmap->n;
610         for (i=1; i<size; i++) {
611           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
612         }
613         ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&work);CHKERRQ(ierr);
614 
615         /* write out local array, by rows */
616         m = mat->rmap->n;
617         v = a->v;
618         for (j=0; j<N; j++) {
619           for (i=0; i<m; i++) {
620             work[j + i*N] = *v++;
621           }
622         }
623         ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
624         /* get largest work array to receive messages from other processes, excludes process zero */
625         mmax = 0;
626         for (i=1; i<size; i++) {
627           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
628         }
629         ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&vv);CHKERRQ(ierr);
630         for (k = 1; k < size; k++) {
631           v    = vv;
632           m    = mat->rmap->range[k+1] - mat->rmap->range[k];
633           ierr = MPIULong_Recv(v,m*N,MPIU_SCALAR,k,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
634 
635           for (j = 0; j < N; j++) {
636             for (i = 0; i < m; i++) {
637               work[j + i*N] = *v++;
638             }
639           }
640           ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
641         }
642         ierr = PetscFree(work);CHKERRQ(ierr);
643         ierr = PetscFree(vv);CHKERRQ(ierr);
644       } else {
645         ierr = MPIULong_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
646       }
647     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerSetFormat(viewer,PETSC_VIEWER_NATIVE)");
648   }
649   PetscFunctionReturn(0);
650 }
651 
652 #undef __FUNCT__
653 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket"
654 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
655 {
656   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
657   PetscErrorCode    ierr;
658   PetscMPIInt       size = mdn->size,rank = mdn->rank;
659   PetscViewerType   vtype;
660   PetscBool         iascii,isdraw;
661   PetscViewer       sviewer;
662   PetscViewerFormat format;
663 
664   PetscFunctionBegin;
665   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
666   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
667   if (iascii) {
668     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
669     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
670     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
671       MatInfo info;
672       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
673       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
674       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
675       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
676       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
677       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
678       PetscFunctionReturn(0);
679     } else if (format == PETSC_VIEWER_ASCII_INFO) {
680       PetscFunctionReturn(0);
681     }
682   } else if (isdraw) {
683     PetscDraw draw;
684     PetscBool isnull;
685 
686     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
687     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
688     if (isnull) PetscFunctionReturn(0);
689   }
690 
691   if (size == 1) {
692     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
693   } else {
694     /* assemble the entire matrix onto first processor. */
695     Mat         A;
696     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
697     PetscInt    *cols;
698     PetscScalar *vals;
699 
700     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
701     if (!rank) {
702       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
703     } else {
704       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
705     }
706     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
707     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
708     ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr);
709     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
710 
711     /* Copy the matrix ... This isn't the most efficient means,
712        but it's quick for now */
713     A->insertmode = INSERT_VALUES;
714 
715     row = mat->rmap->rstart;
716     m   = mdn->A->rmap->n;
717     for (i=0; i<m; i++) {
718       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
719       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
720       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
721       row++;
722     }
723 
724     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
725     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
726     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
727     if (!rank) {
728       ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
729       /* Set the type name to MATMPIDense so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqDense_ASCII()*/
730       PetscStrcpy(((PetscObject)((Mat_MPIDense*)(A->data))->A)->type_name,MATMPIDENSE);
731       ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
732     }
733     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
734     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
735     ierr = MatDestroy(&A);CHKERRQ(ierr);
736   }
737   PetscFunctionReturn(0);
738 }
739 
740 #undef __FUNCT__
741 #define __FUNCT__ "MatView_MPIDense"
742 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
743 {
744   PetscErrorCode ierr;
745   PetscBool      iascii,isbinary,isdraw,issocket;
746 
747   PetscFunctionBegin;
748   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
749   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
750   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
751   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
752 
753   if (iascii || issocket || isdraw) {
754     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
755   } else if (isbinary) {
756     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
757   }
758   PetscFunctionReturn(0);
759 }
760 
761 #undef __FUNCT__
762 #define __FUNCT__ "MatGetInfo_MPIDense"
763 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
764 {
765   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
766   Mat            mdn  = mat->A;
767   PetscErrorCode ierr;
768   PetscReal      isend[5],irecv[5];
769 
770   PetscFunctionBegin;
771   info->block_size = 1.0;
772 
773   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
774 
775   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
776   isend[3] = info->memory;  isend[4] = info->mallocs;
777   if (flag == MAT_LOCAL) {
778     info->nz_used      = isend[0];
779     info->nz_allocated = isend[1];
780     info->nz_unneeded  = isend[2];
781     info->memory       = isend[3];
782     info->mallocs      = isend[4];
783   } else if (flag == MAT_GLOBAL_MAX) {
784     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
785 
786     info->nz_used      = irecv[0];
787     info->nz_allocated = irecv[1];
788     info->nz_unneeded  = irecv[2];
789     info->memory       = irecv[3];
790     info->mallocs      = irecv[4];
791   } else if (flag == MAT_GLOBAL_SUM) {
792     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
793 
794     info->nz_used      = irecv[0];
795     info->nz_allocated = irecv[1];
796     info->nz_unneeded  = irecv[2];
797     info->memory       = irecv[3];
798     info->mallocs      = irecv[4];
799   }
800   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
801   info->fill_ratio_needed = 0;
802   info->factor_mallocs    = 0;
803   PetscFunctionReturn(0);
804 }
805 
806 #undef __FUNCT__
807 #define __FUNCT__ "MatSetOption_MPIDense"
808 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
809 {
810   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
811   PetscErrorCode ierr;
812 
813   PetscFunctionBegin;
814   switch (op) {
815   case MAT_NEW_NONZERO_LOCATIONS:
816   case MAT_NEW_NONZERO_LOCATION_ERR:
817   case MAT_NEW_NONZERO_ALLOCATION_ERR:
818     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
819     break;
820   case MAT_ROW_ORIENTED:
821     a->roworiented = flg;
822 
823     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
824     break;
825   case MAT_NEW_DIAGONALS:
826   case MAT_KEEP_NONZERO_PATTERN:
827   case MAT_USE_HASH_TABLE:
828     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
829     break;
830   case MAT_IGNORE_OFF_PROC_ENTRIES:
831     a->donotstash = flg;
832     break;
833   case MAT_SYMMETRIC:
834   case MAT_STRUCTURALLY_SYMMETRIC:
835   case MAT_HERMITIAN:
836   case MAT_SYMMETRY_ETERNAL:
837   case MAT_IGNORE_LOWER_TRIANGULAR:
838     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
839     break;
840   default:
841     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
842   }
843   PetscFunctionReturn(0);
844 }
845 
846 
847 #undef __FUNCT__
848 #define __FUNCT__ "MatDiagonalScale_MPIDense"
849 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
850 {
851   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
852   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
853   PetscScalar    *l,*r,x,*v;
854   PetscErrorCode ierr;
855   PetscInt       i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n;
856 
857   PetscFunctionBegin;
858   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
859   if (ll) {
860     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
861     if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
862     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
863     for (i=0; i<m; i++) {
864       x = l[i];
865       v = mat->v + i;
866       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
867     }
868     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
869     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
870   }
871   if (rr) {
872     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
873     if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
874     ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
875     ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
876     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
877     for (i=0; i<n; i++) {
878       x = r[i];
879       v = mat->v + i*m;
880       for (j=0; j<m; j++) (*v++) *= x;
881     }
882     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
883     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
884   }
885   PetscFunctionReturn(0);
886 }
887 
888 #undef __FUNCT__
889 #define __FUNCT__ "MatNorm_MPIDense"
890 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
891 {
892   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
893   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
894   PetscErrorCode ierr;
895   PetscInt       i,j;
896   PetscReal      sum = 0.0;
897   PetscScalar    *v  = mat->v;
898 
899   PetscFunctionBegin;
900   if (mdn->size == 1) {
901     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
902   } else {
903     if (type == NORM_FROBENIUS) {
904       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
905         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
906       }
907       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
908       *nrm = PetscSqrtReal(*nrm);
909       ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr);
910     } else if (type == NORM_1) {
911       PetscReal *tmp,*tmp2;
912       ierr = PetscMalloc2(A->cmap->N,PetscReal,&tmp,A->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr);
913       ierr = PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
914       ierr = PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
915       *nrm = 0.0;
916       v    = mat->v;
917       for (j=0; j<mdn->A->cmap->n; j++) {
918         for (i=0; i<mdn->A->rmap->n; i++) {
919           tmp[j] += PetscAbsScalar(*v);  v++;
920         }
921       }
922       ierr = MPI_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
923       for (j=0; j<A->cmap->N; j++) {
924         if (tmp2[j] > *nrm) *nrm = tmp2[j];
925       }
926       ierr = PetscFree2(tmp,tmp);CHKERRQ(ierr);
927       ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr);
928     } else if (type == NORM_INFINITY) { /* max row norm */
929       PetscReal ntemp;
930       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
931       ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
932     } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
933   }
934   PetscFunctionReturn(0);
935 }
936 
937 #undef __FUNCT__
938 #define __FUNCT__ "MatTranspose_MPIDense"
939 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
940 {
941   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
942   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
943   Mat            B;
944   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
945   PetscErrorCode ierr;
946   PetscInt       j,i;
947   PetscScalar    *v;
948 
949   PetscFunctionBegin;
950   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports square matrix only in-place");
951   if (reuse == MAT_INITIAL_MATRIX || A == *matout) {
952     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
953     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
954     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
955     ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr);
956   } else {
957     B = *matout;
958   }
959 
960   m    = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v;
961   ierr = PetscMalloc(m*sizeof(PetscInt),&rwork);CHKERRQ(ierr);
962   for (i=0; i<m; i++) rwork[i] = rstart + i;
963   for (j=0; j<n; j++) {
964     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
965     v   += m;
966   }
967   ierr = PetscFree(rwork);CHKERRQ(ierr);
968   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
969   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
970   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
971     *matout = B;
972   } else {
973     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
974   }
975   PetscFunctionReturn(0);
976 }
977 
978 
979 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
980 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
981 
982 #undef __FUNCT__
983 #define __FUNCT__ "MatSetUp_MPIDense"
984 PetscErrorCode MatSetUp_MPIDense(Mat A)
985 {
986   PetscErrorCode ierr;
987 
988   PetscFunctionBegin;
989   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
990   PetscFunctionReturn(0);
991 }
992 
993 #undef __FUNCT__
994 #define __FUNCT__ "MatAXPY_MPIDense"
995 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
996 {
997   PetscErrorCode ierr;
998   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
999 
1000   PetscFunctionBegin;
1001   ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr);
1002   PetscFunctionReturn(0);
1003 }
1004 
1005 #undef __FUNCT__
1006 #define __FUNCT__ "MatConjugate_MPIDense"
1007 PetscErrorCode  MatConjugate_MPIDense(Mat mat)
1008 {
1009   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
1010   PetscErrorCode ierr;
1011 
1012   PetscFunctionBegin;
1013   ierr = MatConjugate(a->A);CHKERRQ(ierr);
1014   PetscFunctionReturn(0);
1015 }
1016 
1017 #undef __FUNCT__
1018 #define __FUNCT__ "MatRealPart_MPIDense"
1019 PetscErrorCode MatRealPart_MPIDense(Mat A)
1020 {
1021   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1022   PetscErrorCode ierr;
1023 
1024   PetscFunctionBegin;
1025   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1026   PetscFunctionReturn(0);
1027 }
1028 
1029 #undef __FUNCT__
1030 #define __FUNCT__ "MatImaginaryPart_MPIDense"
1031 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1032 {
1033   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1034   PetscErrorCode ierr;
1035 
1036   PetscFunctionBegin;
1037   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1038   PetscFunctionReturn(0);
1039 }
1040 
1041 extern PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*);
1042 #undef __FUNCT__
1043 #define __FUNCT__ "MatGetColumnNorms_MPIDense"
1044 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms)
1045 {
1046   PetscErrorCode ierr;
1047   PetscInt       i,n;
1048   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
1049   PetscReal      *work;
1050 
1051   PetscFunctionBegin;
1052   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
1053   ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr);
1054   ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr);
1055   if (type == NORM_2) {
1056     for (i=0; i<n; i++) work[i] *= work[i];
1057   }
1058   if (type == NORM_INFINITY) {
1059     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
1060   } else {
1061     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
1062   }
1063   ierr = PetscFree(work);CHKERRQ(ierr);
1064   if (type == NORM_2) {
1065     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
1066   }
1067   PetscFunctionReturn(0);
1068 }
1069 
1070 #undef __FUNCT__
1071 #define __FUNCT__ "MatSetRandom_MPIDense"
1072 static PetscErrorCode  MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1073 {
1074   Mat_MPIDense   *d = (Mat_MPIDense*)x->data;
1075   PetscErrorCode ierr;
1076   PetscScalar    *a;
1077   PetscInt       m,n,i;
1078 
1079   PetscFunctionBegin;
1080   ierr = MatGetSize(d->A,&m,&n);CHKERRQ(ierr);
1081   ierr = MatDenseGetArray(d->A,&a);CHKERRQ(ierr);
1082   for (i=0; i<m*n; i++) {
1083     ierr = PetscRandomGetValue(rctx,a+i);CHKERRQ(ierr);
1084   }
1085   ierr = MatDenseRestoreArray(d->A,&a);CHKERRQ(ierr);
1086   PetscFunctionReturn(0);
1087 }
1088 
1089 /* -------------------------------------------------------------------*/
1090 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1091                                         MatGetRow_MPIDense,
1092                                         MatRestoreRow_MPIDense,
1093                                         MatMult_MPIDense,
1094                                 /*  4*/ MatMultAdd_MPIDense,
1095                                         MatMultTranspose_MPIDense,
1096                                         MatMultTransposeAdd_MPIDense,
1097                                         0,
1098                                         0,
1099                                         0,
1100                                 /* 10*/ 0,
1101                                         0,
1102                                         0,
1103                                         0,
1104                                         MatTranspose_MPIDense,
1105                                 /* 15*/ MatGetInfo_MPIDense,
1106                                         MatEqual_MPIDense,
1107                                         MatGetDiagonal_MPIDense,
1108                                         MatDiagonalScale_MPIDense,
1109                                         MatNorm_MPIDense,
1110                                 /* 20*/ MatAssemblyBegin_MPIDense,
1111                                         MatAssemblyEnd_MPIDense,
1112                                         MatSetOption_MPIDense,
1113                                         MatZeroEntries_MPIDense,
1114                                 /* 24*/ MatZeroRows_MPIDense,
1115                                         0,
1116                                         0,
1117                                         0,
1118                                         0,
1119                                 /* 29*/ MatSetUp_MPIDense,
1120                                         0,
1121                                         0,
1122                                         0,
1123                                         0,
1124                                 /* 34*/ MatDuplicate_MPIDense,
1125                                         0,
1126                                         0,
1127                                         0,
1128                                         0,
1129                                 /* 39*/ MatAXPY_MPIDense,
1130                                         MatGetSubMatrices_MPIDense,
1131                                         0,
1132                                         MatGetValues_MPIDense,
1133                                         0,
1134                                 /* 44*/ 0,
1135                                         MatScale_MPIDense,
1136                                         0,
1137                                         0,
1138                                         0,
1139                                 /* 49*/ MatSetRandom_MPIDense,
1140                                         0,
1141                                         0,
1142                                         0,
1143                                         0,
1144                                 /* 54*/ 0,
1145                                         0,
1146                                         0,
1147                                         0,
1148                                         0,
1149                                 /* 59*/ MatGetSubMatrix_MPIDense,
1150                                         MatDestroy_MPIDense,
1151                                         MatView_MPIDense,
1152                                         0,
1153                                         0,
1154                                 /* 64*/ 0,
1155                                         0,
1156                                         0,
1157                                         0,
1158                                         0,
1159                                 /* 69*/ 0,
1160                                         0,
1161                                         0,
1162                                         0,
1163                                         0,
1164                                 /* 74*/ 0,
1165                                         0,
1166                                         0,
1167                                         0,
1168                                         0,
1169                                 /* 79*/ 0,
1170                                         0,
1171                                         0,
1172                                         0,
1173                                 /* 83*/ MatLoad_MPIDense,
1174                                         0,
1175                                         0,
1176                                         0,
1177                                         0,
1178                                         0,
1179                                 /* 89*/
1180                                         0,
1181                                         0,
1182                                         0,
1183                                         0,
1184                                         0,
1185                                 /* 94*/ 0,
1186                                         0,
1187                                         0,
1188                                         0,
1189                                         0,
1190                                 /* 99*/ 0,
1191                                         0,
1192                                         0,
1193                                         MatConjugate_MPIDense,
1194                                         0,
1195                                 /*104*/ 0,
1196                                         MatRealPart_MPIDense,
1197                                         MatImaginaryPart_MPIDense,
1198                                         0,
1199                                         0,
1200                                 /*109*/ 0,
1201                                         0,
1202                                         0,
1203                                         0,
1204                                         0,
1205                                 /*114*/ 0,
1206                                         0,
1207                                         0,
1208                                         0,
1209                                         0,
1210                                 /*119*/ 0,
1211                                         0,
1212                                         0,
1213                                         0,
1214                                         0,
1215                                 /*124*/ 0,
1216                                         MatGetColumnNorms_MPIDense
1217 };
1218 
1219 EXTERN_C_BEGIN
1220 #undef __FUNCT__
1221 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense"
1222 PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1223 {
1224   Mat_MPIDense   *a;
1225   PetscErrorCode ierr;
1226 
1227   PetscFunctionBegin;
1228   mat->preallocated = PETSC_TRUE;
1229   /* Note:  For now, when data is specified above, this assumes the user correctly
1230    allocates the local dense storage space.  We should add error checking. */
1231 
1232   a       = (Mat_MPIDense*)mat->data;
1233   ierr    = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
1234   ierr    = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
1235   a->nvec = mat->cmap->n;
1236 
1237   ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1238   ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr);
1239   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
1240   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1241   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1242   PetscFunctionReturn(0);
1243 }
1244 EXTERN_C_END
1245 
1246 EXTERN_C_BEGIN
1247 #undef __FUNCT__
1248 #define __FUNCT__ "MatCreate_MPIDense"
1249 PetscErrorCode  MatCreate_MPIDense(Mat mat)
1250 {
1251   Mat_MPIDense   *a;
1252   PetscErrorCode ierr;
1253 
1254   PetscFunctionBegin;
1255   ierr      = PetscNewLog(mat,Mat_MPIDense,&a);CHKERRQ(ierr);
1256   mat->data = (void*)a;
1257   ierr      = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1258 
1259   mat->insertmode = NOT_SET_VALUES;
1260   ierr            = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);CHKERRQ(ierr);
1261   ierr            = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);CHKERRQ(ierr);
1262 
1263   /* build cache for off array entries formed */
1264   a->donotstash = PETSC_FALSE;
1265 
1266   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr);
1267 
1268   /* stuff used for matrix vector multiply */
1269   a->lvec        = 0;
1270   a->Mvctx       = 0;
1271   a->roworiented = PETSC_TRUE;
1272 
1273   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatDenseGetArray_C","MatDenseGetArray_MPIDense",MatDenseGetArray_MPIDense);CHKERRQ(ierr);
1274   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatDenseRestoreArray_C","MatDenseRestoreArray_MPIDense",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr);
1275 
1276   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
1277                                            "MatGetDiagonalBlock_MPIDense",
1278                                            MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr);
1279   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C",
1280                                            "MatMPIDenseSetPreallocation_MPIDense",
1281                                            MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1282   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",
1283                                            "MatMatMult_MPIAIJ_MPIDense",
1284                                            MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1285   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",
1286                                            "MatMatMultSymbolic_MPIAIJ_MPIDense",
1287                                            MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1288   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",
1289                                            "MatMatMultNumeric_MPIAIJ_MPIDense",
1290                                            MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1291   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1292   PetscFunctionReturn(0);
1293 }
1294 EXTERN_C_END
1295 
1296 /*MC
1297    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1298 
1299    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1300    and MATMPIDENSE otherwise.
1301 
1302    Options Database Keys:
1303 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1304 
1305   Level: beginner
1306 
1307 
1308 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1309 M*/
1310 
1311 #undef __FUNCT__
1312 #define __FUNCT__ "MatMPIDenseSetPreallocation"
1313 /*@C
1314    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1315 
1316    Not collective
1317 
1318    Input Parameters:
1319 .  A - the matrix
1320 -  data - optional location of matrix data.  Set data=NULL for PETSc
1321    to control all matrix memory allocation.
1322 
1323    Notes:
1324    The dense format is fully compatible with standard Fortran 77
1325    storage by columns.
1326 
1327    The data input variable is intended primarily for Fortran programmers
1328    who wish to allocate their own matrix memory space.  Most users should
1329    set data=NULL.
1330 
1331    Level: intermediate
1332 
1333 .keywords: matrix,dense, parallel
1334 
1335 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1336 @*/
1337 PetscErrorCode  MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data)
1338 {
1339   PetscErrorCode ierr;
1340 
1341   PetscFunctionBegin;
1342   ierr = PetscTryMethod(mat,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(mat,data));CHKERRQ(ierr);
1343   PetscFunctionReturn(0);
1344 }
1345 
1346 #undef __FUNCT__
1347 #define __FUNCT__ "MatCreateDense"
1348 /*@C
1349    MatCreateDense - Creates a parallel matrix in dense format.
1350 
1351    Collective on MPI_Comm
1352 
1353    Input Parameters:
1354 +  comm - MPI communicator
1355 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1356 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1357 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1358 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1359 -  data - optional location of matrix data.  Set data=NULL (NULL_SCALAR for Fortran users) for PETSc
1360    to control all matrix memory allocation.
1361 
1362    Output Parameter:
1363 .  A - the matrix
1364 
1365    Notes:
1366    The dense format is fully compatible with standard Fortran 77
1367    storage by columns.
1368 
1369    The data input variable is intended primarily for Fortran programmers
1370    who wish to allocate their own matrix memory space.  Most users should
1371    set data=NULL (NULL_SCALAR for Fortran users).
1372 
1373    The user MUST specify either the local or global matrix dimensions
1374    (possibly both).
1375 
1376    Level: intermediate
1377 
1378 .keywords: matrix,dense, parallel
1379 
1380 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1381 @*/
1382 PetscErrorCode  MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1383 {
1384   PetscErrorCode ierr;
1385   PetscMPIInt    size;
1386 
1387   PetscFunctionBegin;
1388   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1389   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1390   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1391   if (size > 1) {
1392     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1393     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1394   } else {
1395     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1396     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1397   }
1398   PetscFunctionReturn(0);
1399 }
1400 
1401 #undef __FUNCT__
1402 #define __FUNCT__ "MatDuplicate_MPIDense"
1403 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1404 {
1405   Mat            mat;
1406   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
1407   PetscErrorCode ierr;
1408 
1409   PetscFunctionBegin;
1410   *newmat = 0;
1411   ierr    = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr);
1412   ierr    = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1413   ierr    = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1414   a       = (Mat_MPIDense*)mat->data;
1415   ierr    = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1416 
1417   mat->factortype   = A->factortype;
1418   mat->assembled    = PETSC_TRUE;
1419   mat->preallocated = PETSC_TRUE;
1420 
1421   a->size         = oldmat->size;
1422   a->rank         = oldmat->rank;
1423   mat->insertmode = NOT_SET_VALUES;
1424   a->nvec         = oldmat->nvec;
1425   a->donotstash   = oldmat->donotstash;
1426 
1427   ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr);
1428   ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr);
1429 
1430   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1431   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1432   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1433 
1434   *newmat = mat;
1435   PetscFunctionReturn(0);
1436 }
1437 
1438 #undef __FUNCT__
1439 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile"
1440 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat,PetscInt sizesset)
1441 {
1442   PetscErrorCode ierr;
1443   PetscMPIInt    rank,size;
1444   PetscInt       *rowners,i,m,nz,j;
1445   PetscScalar    *array,*vals,*vals_ptr;
1446 
1447   PetscFunctionBegin;
1448   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1449   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1450 
1451   /* determine ownership of all rows */
1452   if (newmat->rmap->n < 0) m = M/size + ((M % size) > rank);
1453   else m = newmat->rmap->n;
1454   ierr       = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1455   ierr       = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
1456   rowners[0] = 0;
1457   for (i=2; i<=size; i++) {
1458     rowners[i] += rowners[i-1];
1459   }
1460 
1461   if (!sizesset) {
1462     ierr = MatSetSizes(newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1463   }
1464   ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
1465   ierr = MatDenseGetArray(newmat,&array);CHKERRQ(ierr);
1466 
1467   if (!rank) {
1468     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1469 
1470     /* read in my part of the matrix numerical values  */
1471     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1472 
1473     /* insert into matrix-by row (this is why cannot directly read into array */
1474     vals_ptr = vals;
1475     for (i=0; i<m; i++) {
1476       for (j=0; j<N; j++) {
1477         array[i + j*m] = *vals_ptr++;
1478       }
1479     }
1480 
1481     /* read in other processors and ship out */
1482     for (i=1; i<size; i++) {
1483       nz   = (rowners[i+1] - rowners[i])*N;
1484       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1485       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr);
1486     }
1487   } else {
1488     /* receive numeric values */
1489     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1490 
1491     /* receive message of values*/
1492     ierr = MPIULong_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr);
1493 
1494     /* insert into matrix-by row (this is why cannot directly read into array */
1495     vals_ptr = vals;
1496     for (i=0; i<m; i++) {
1497       for (j=0; j<N; j++) {
1498         array[i + j*m] = *vals_ptr++;
1499       }
1500     }
1501   }
1502   ierr = MatDenseRestoreArray(newmat,&array);CHKERRQ(ierr);
1503   ierr = PetscFree(rowners);CHKERRQ(ierr);
1504   ierr = PetscFree(vals);CHKERRQ(ierr);
1505   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1506   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1507   PetscFunctionReturn(0);
1508 }
1509 
1510 #undef __FUNCT__
1511 #define __FUNCT__ "MatLoad_MPIDense"
1512 PetscErrorCode MatLoad_MPIDense(Mat newmat,PetscViewer viewer)
1513 {
1514   PetscScalar    *vals,*svals;
1515   MPI_Comm       comm;
1516   MPI_Status     status;
1517   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz;
1518   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1519   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1520   PetscInt       i,nz,j,rstart,rend,sizesset=1,grows,gcols;
1521   int            fd;
1522   PetscErrorCode ierr;
1523 
1524   PetscFunctionBegin;
1525   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
1526   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1527   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1528   if (!rank) {
1529     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1530     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
1531     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1532   }
1533   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
1534 
1535   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1536   M    = header[1]; N = header[2]; nz = header[3];
1537 
1538   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
1539   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
1540   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
1541 
1542   /* If global sizes are set, check if they are consistent with that given in the file */
1543   if (sizesset) {
1544     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
1545   }
1546   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
1547   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
1548 
1549   /*
1550        Handle case where matrix is stored on disk as a dense matrix
1551   */
1552   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1553     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat,sizesset);CHKERRQ(ierr);
1554     PetscFunctionReturn(0);
1555   }
1556 
1557   /* determine ownership of all rows */
1558   if (newmat->rmap->n < 0) {
1559     ierr = PetscMPIIntCast(M/size + ((M % size) > rank),&m);CHKERRQ(ierr);
1560   } else {
1561     ierr = PetscMPIIntCast(newmat->rmap->n,&m);CHKERRQ(ierr);
1562   }
1563   ierr       = PetscMalloc((size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr);
1564   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1565   rowners[0] = 0;
1566   for (i=2; i<=size; i++) {
1567     rowners[i] += rowners[i-1];
1568   }
1569   rstart = rowners[rank];
1570   rend   = rowners[rank+1];
1571 
1572   /* distribute row lengths to all processors */
1573   ierr = PetscMalloc2(rend-rstart,PetscInt,&ourlens,rend-rstart,PetscInt,&offlens);CHKERRQ(ierr);
1574   if (!rank) {
1575     ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
1576     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1577     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
1578     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1579     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1580     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1581   } else {
1582     ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1583   }
1584 
1585   if (!rank) {
1586     /* calculate the number of nonzeros on each processor */
1587     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
1588     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1589     for (i=0; i<size; i++) {
1590       for (j=rowners[i]; j< rowners[i+1]; j++) {
1591         procsnz[i] += rowlengths[j];
1592       }
1593     }
1594     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1595 
1596     /* determine max buffer needed and allocate it */
1597     maxnz = 0;
1598     for (i=0; i<size; i++) {
1599       maxnz = PetscMax(maxnz,procsnz[i]);
1600     }
1601     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1602 
1603     /* read in my part of the matrix column indices  */
1604     nz   = procsnz[0];
1605     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1606     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1607 
1608     /* read in every one elses and ship off */
1609     for (i=1; i<size; i++) {
1610       nz   = procsnz[i];
1611       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1612       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1613     }
1614     ierr = PetscFree(cols);CHKERRQ(ierr);
1615   } else {
1616     /* determine buffer space needed for message */
1617     nz = 0;
1618     for (i=0; i<m; i++) {
1619       nz += ourlens[i];
1620     }
1621     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1622 
1623     /* receive message of column indices*/
1624     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1625     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1626     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1627   }
1628 
1629   /* loop over local rows, determining number of off diagonal entries */
1630   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
1631   jj   = 0;
1632   for (i=0; i<m; i++) {
1633     for (j=0; j<ourlens[i]; j++) {
1634       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1635       jj++;
1636     }
1637   }
1638 
1639   /* create our matrix */
1640   for (i=0; i<m; i++) ourlens[i] -= offlens[i];
1641 
1642   if (!sizesset) {
1643     ierr = MatSetSizes(newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1644   }
1645   ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
1646   for (i=0; i<m; i++) ourlens[i] += offlens[i];
1647 
1648   if (!rank) {
1649     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1650 
1651     /* read in my part of the matrix numerical values  */
1652     nz   = procsnz[0];
1653     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1654 
1655     /* insert into matrix */
1656     jj      = rstart;
1657     smycols = mycols;
1658     svals   = vals;
1659     for (i=0; i<m; i++) {
1660       ierr     = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1661       smycols += ourlens[i];
1662       svals   += ourlens[i];
1663       jj++;
1664     }
1665 
1666     /* read in other processors and ship out */
1667     for (i=1; i<size; i++) {
1668       nz   = procsnz[i];
1669       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1670       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
1671     }
1672     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1673   } else {
1674     /* receive numeric values */
1675     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1676 
1677     /* receive message of values*/
1678     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
1679     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1680     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1681 
1682     /* insert into matrix */
1683     jj      = rstart;
1684     smycols = mycols;
1685     svals   = vals;
1686     for (i=0; i<m; i++) {
1687       ierr     = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1688       smycols += ourlens[i];
1689       svals   += ourlens[i];
1690       jj++;
1691     }
1692   }
1693   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
1694   ierr = PetscFree(vals);CHKERRQ(ierr);
1695   ierr = PetscFree(mycols);CHKERRQ(ierr);
1696   ierr = PetscFree(rowners);CHKERRQ(ierr);
1697 
1698   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1699   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1700   PetscFunctionReturn(0);
1701 }
1702 
1703 #undef __FUNCT__
1704 #define __FUNCT__ "MatEqual_MPIDense"
1705 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool  *flag)
1706 {
1707   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
1708   Mat            a,b;
1709   PetscBool      flg;
1710   PetscErrorCode ierr;
1711 
1712   PetscFunctionBegin;
1713   a    = matA->A;
1714   b    = matB->A;
1715   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
1716   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1717   PetscFunctionReturn(0);
1718 }
1719 
1720