xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 00de8ff0695ff394d09a2c60082aeaab5870b6e2)
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4 */
5 
6 
7 #include <../src/mat/impls/dense/mpi/mpidense.h>    /*I   "petscmat.h"  I*/
8 
9 #undef __FUNCT__
10 #define __FUNCT__ "MatDenseGetLocalMatrix"
11 /*@
12 
13       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
14               matrix that represents the operator. For sequential matrices it returns itself.
15 
16     Input Parameter:
17 .      A - the Seq or MPI dense matrix
18 
19     Output Parameter:
20 .      B - the inner matrix
21 
22     Level: intermediate
23 
24 @*/
25 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
26 {
27   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
28   PetscErrorCode ierr;
29   PetscBool      flg;
30 
31   PetscFunctionBegin;
32   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
33   if (flg) *B = mat->A;
34   else *B = A;
35   PetscFunctionReturn(0);
36 }
37 
38 #undef __FUNCT__
39 #define __FUNCT__ "MatGetRow_MPIDense"
40 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
41 {
42   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
43   PetscErrorCode ierr;
44   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
45 
46   PetscFunctionBegin;
47   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
48   lrow = row - rstart;
49   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
50   PetscFunctionReturn(0);
51 }
52 
53 #undef __FUNCT__
54 #define __FUNCT__ "MatRestoreRow_MPIDense"
55 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
56 {
57   PetscErrorCode ierr;
58 
59   PetscFunctionBegin;
60   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
61   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
62   PetscFunctionReturn(0);
63 }
64 
65 EXTERN_C_BEGIN
66 #undef __FUNCT__
67 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense"
68 PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
69 {
70   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
71   PetscErrorCode ierr;
72   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
73   PetscScalar    *array;
74   MPI_Comm       comm;
75   Mat            B;
76 
77   PetscFunctionBegin;
78   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
79 
80   ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr);
81   if (!B) {
82     ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
83     ierr = MatCreate(comm,&B);CHKERRQ(ierr);
84     ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr);
85     ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
86     ierr = MatDenseGetArray(mdn->A,&array);CHKERRQ(ierr);
87     ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr);
88     ierr = MatDenseRestoreArray(mdn->A,&array);CHKERRQ(ierr);
89     ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
90     ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
91     ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr);
92     *a   = B;
93     ierr = MatDestroy(&B);CHKERRQ(ierr);
94   } else *a = B;
95   PetscFunctionReturn(0);
96 }
97 EXTERN_C_END
98 
99 #undef __FUNCT__
100 #define __FUNCT__ "MatSetValues_MPIDense"
101 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
102 {
103   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
104   PetscErrorCode ierr;
105   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
106   PetscBool      roworiented = A->roworiented;
107 
108   PetscFunctionBegin;
109   if (v) PetscValidScalarPointer(v,6);
110   for (i=0; i<m; i++) {
111     if (idxm[i] < 0) continue;
112     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
113     if (idxm[i] >= rstart && idxm[i] < rend) {
114       row = idxm[i] - rstart;
115       if (roworiented) {
116         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
117       } else {
118         for (j=0; j<n; j++) {
119           if (idxn[j] < 0) continue;
120           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
121           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
122         }
123       }
124     } else if (!A->donotstash) {
125       mat->assembled = PETSC_FALSE;
126       if (roworiented) {
127         ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
128       } else {
129         ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
130       }
131     }
132   }
133   PetscFunctionReturn(0);
134 }
135 
136 #undef __FUNCT__
137 #define __FUNCT__ "MatGetValues_MPIDense"
138 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
139 {
140   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
141   PetscErrorCode ierr;
142   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
143 
144   PetscFunctionBegin;
145   for (i=0; i<m; i++) {
146     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
147     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
148     if (idxm[i] >= rstart && idxm[i] < rend) {
149       row = idxm[i] - rstart;
150       for (j=0; j<n; j++) {
151         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
152         if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
153         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
154       }
155     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
156   }
157   PetscFunctionReturn(0);
158 }
159 
160 #undef __FUNCT__
161 #define __FUNCT__ "MatDenseGetArray_MPIDense"
162 PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[])
163 {
164   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
165   PetscErrorCode ierr;
166 
167   PetscFunctionBegin;
168   ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr);
169   PetscFunctionReturn(0);
170 }
171 
172 #undef __FUNCT__
173 #define __FUNCT__ "MatGetSubMatrix_MPIDense"
174 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
175 {
176   Mat_MPIDense   *mat  = (Mat_MPIDense*)A->data,*newmatd;
177   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
178   PetscErrorCode ierr;
179   PetscInt       i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
180   const PetscInt *irow,*icol;
181   PetscScalar    *av,*bv,*v = lmat->v;
182   Mat            newmat;
183   IS             iscol_local;
184 
185   PetscFunctionBegin;
186   ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
187   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
188   ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr);
189   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
190   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
191   ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */
192 
193   /* No parallel redistribution currently supported! Should really check each index set
194      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
195      original matrix! */
196 
197   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
198   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
199 
200   /* Check submatrix call */
201   if (scall == MAT_REUSE_MATRIX) {
202     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
203     /* Really need to test rows and column sizes! */
204     newmat = *B;
205   } else {
206     /* Create and fill new matrix */
207     ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr);
208     ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
209     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
210     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
211   }
212 
213   /* Now extract the data pointers and do the copy, column at a time */
214   newmatd = (Mat_MPIDense*)newmat->data;
215   bv      = ((Mat_SeqDense*)newmatd->A->data)->v;
216 
217   for (i=0; i<Ncols; i++) {
218     av = v + ((Mat_SeqDense*)mat->A->data)->lda*icol[i];
219     for (j=0; j<nrows; j++) {
220       *bv++ = av[irow[j] - rstart];
221     }
222   }
223 
224   /* Assemble the matrices so that the correct flags are set */
225   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
226   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
227 
228   /* Free work space */
229   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
230   ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr);
231   ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
232   *B   = newmat;
233   PetscFunctionReturn(0);
234 }
235 
236 #undef __FUNCT__
237 #define __FUNCT__ "MatDenseRestoreArray_MPIDense"
238 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[])
239 {
240   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
241   PetscErrorCode ierr;
242 
243   PetscFunctionBegin;
244   ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr);
245   PetscFunctionReturn(0);
246 }
247 
248 #undef __FUNCT__
249 #define __FUNCT__ "MatAssemblyBegin_MPIDense"
250 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
251 {
252   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
253   MPI_Comm       comm;
254   PetscErrorCode ierr;
255   PetscInt       nstash,reallocs;
256   InsertMode     addv;
257 
258   PetscFunctionBegin;
259   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
260   /* make sure all processors are either in INSERTMODE or ADDMODE */
261   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,comm);CHKERRQ(ierr);
262   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
263   mat->insertmode = addv; /* in case this processor had no cache */
264 
265   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
266   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
267   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
268   PetscFunctionReturn(0);
269 }
270 
271 #undef __FUNCT__
272 #define __FUNCT__ "MatAssemblyEnd_MPIDense"
273 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
274 {
275   Mat_MPIDense   *mdn=(Mat_MPIDense*)mat->data;
276   PetscErrorCode ierr;
277   PetscInt       i,*row,*col,flg,j,rstart,ncols;
278   PetscMPIInt    n;
279   PetscScalar    *val;
280   InsertMode     addv=mat->insertmode;
281 
282   PetscFunctionBegin;
283   /*  wait on receives */
284   while (1) {
285     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
286     if (!flg) break;
287 
288     for (i=0; i<n;) {
289       /* Now identify the consecutive vals belonging to the same row */
290       for (j=i,rstart=row[j]; j<n; j++) {
291         if (row[j] != rstart) break;
292       }
293       if (j < n) ncols = j-i;
294       else       ncols = n-i;
295       /* Now assemble all these values with a single function call */
296       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
297       i    = j;
298     }
299   }
300   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
301 
302   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
303   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
304 
305   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
306     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
307   }
308   PetscFunctionReturn(0);
309 }
310 
311 #undef __FUNCT__
312 #define __FUNCT__ "MatZeroEntries_MPIDense"
313 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
314 {
315   PetscErrorCode ierr;
316   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
317 
318   PetscFunctionBegin;
319   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
320   PetscFunctionReturn(0);
321 }
322 
323 /* the code does not do the diagonal entries correctly unless the
324    matrix is square and the column and row owerships are identical.
325    This is a BUG. The only way to fix it seems to be to access
326    mdn->A and mdn->B directly and not through the MatZeroRows()
327    routine.
328 */
329 #undef __FUNCT__
330 #define __FUNCT__ "MatZeroRows_MPIDense"
331 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
332 {
333   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
334   PetscErrorCode    ierr;
335   PetscInt          i,*owners = A->rmap->range;
336   PetscInt          *nprocs,j,idx,nsends;
337   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
338   PetscInt          *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source;
339   PetscInt          *lens,*lrows,*values;
340   PetscMPIInt       n,imdex,rank = l->rank,size = l->size;
341   MPI_Comm          comm;
342   MPI_Request       *send_waits,*recv_waits;
343   MPI_Status        recv_status,*send_status;
344   PetscBool         found;
345   const PetscScalar *xx;
346   PetscScalar       *bb;
347 
348   PetscFunctionBegin;
349   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
350   if (A->rmap->N != A->cmap->N) SETERRQ(comm,PETSC_ERR_SUP,"Only handles square matrices");
351   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership");
352   /*  first count number of contributors to each processor */
353   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
354   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
355   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr);  /* see note*/
356   for (i=0; i<N; i++) {
357     idx   = rows[i];
358     found = PETSC_FALSE;
359     for (j=0; j<size; j++) {
360       if (idx >= owners[j] && idx < owners[j+1]) {
361         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
362       }
363     }
364     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
365   }
366   nsends = 0;
367   for (i=0; i<size; i++) nsends += nprocs[2*i+1];
368 
369   /* inform other processors of number of messages and max length*/
370   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
371 
372   /* post receives:   */
373   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
374   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
375   for (i=0; i<nrecvs; i++) {
376     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
377   }
378 
379   /* do sends:
380       1) starts[i] gives the starting index in svalues for stuff going to
381          the ith processor
382   */
383   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
384   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
385   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
386 
387   starts[0] = 0;
388   for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
389   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
390 
391   starts[0] = 0;
392   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
393   count = 0;
394   for (i=0; i<size; i++) {
395     if (nprocs[2*i+1]) {
396       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
397     }
398   }
399   ierr = PetscFree(starts);CHKERRQ(ierr);
400 
401   base = owners[rank];
402 
403   /*  wait on receives */
404   ierr  = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
405   count = nrecvs;
406   slen  = 0;
407   while (count) {
408     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
409     /* unpack receives into our local space */
410     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
411 
412     source[imdex] = recv_status.MPI_SOURCE;
413     lens[imdex]   = n;
414     slen += n;
415     count--;
416   }
417   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
418 
419   /* move the data into the send scatter */
420   ierr  = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
421   count = 0;
422   for (i=0; i<nrecvs; i++) {
423     values = rvalues + i*nmax;
424     for (j=0; j<lens[i]; j++) {
425       lrows[count++] = values[j] - base;
426     }
427   }
428   ierr = PetscFree(rvalues);CHKERRQ(ierr);
429   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
430   ierr = PetscFree(owner);CHKERRQ(ierr);
431   ierr = PetscFree(nprocs);CHKERRQ(ierr);
432 
433   /* fix right hand side if needed */
434   if (x && b) {
435     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
436     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
437     for (i=0; i<slen; i++) {
438       bb[lrows[i]] = diag*xx[lrows[i]];
439     }
440     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
441     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
442   }
443 
444   /* actually zap the local rows */
445   ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
446   if (diag != 0.0) {
447     Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data;
448     PetscInt     m   = ll->lda, i;
449 
450     for (i=0; i<slen; i++) {
451       ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag;
452     }
453   }
454   ierr = PetscFree(lrows);CHKERRQ(ierr);
455 
456   /* wait on sends */
457   if (nsends) {
458     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
459     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
460     ierr = PetscFree(send_status);CHKERRQ(ierr);
461   }
462   ierr = PetscFree(send_waits);CHKERRQ(ierr);
463   ierr = PetscFree(svalues);CHKERRQ(ierr);
464   PetscFunctionReturn(0);
465 }
466 
467 #undef __FUNCT__
468 #define __FUNCT__ "MatMult_MPIDense"
469 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
470 {
471   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
472   PetscErrorCode ierr;
473 
474   PetscFunctionBegin;
475   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
476   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
477   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
478   PetscFunctionReturn(0);
479 }
480 
481 #undef __FUNCT__
482 #define __FUNCT__ "MatMultAdd_MPIDense"
483 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
484 {
485   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
486   PetscErrorCode ierr;
487 
488   PetscFunctionBegin;
489   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
490   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
491   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
492   PetscFunctionReturn(0);
493 }
494 
495 #undef __FUNCT__
496 #define __FUNCT__ "MatMultTranspose_MPIDense"
497 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
498 {
499   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
500   PetscErrorCode ierr;
501   PetscScalar    zero = 0.0;
502 
503   PetscFunctionBegin;
504   ierr = VecSet(yy,zero);CHKERRQ(ierr);
505   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
506   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
507   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
508   PetscFunctionReturn(0);
509 }
510 
511 #undef __FUNCT__
512 #define __FUNCT__ "MatMultTransposeAdd_MPIDense"
513 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
514 {
515   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
516   PetscErrorCode ierr;
517 
518   PetscFunctionBegin;
519   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
520   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
521   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
522   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
523   PetscFunctionReturn(0);
524 }
525 
526 #undef __FUNCT__
527 #define __FUNCT__ "MatGetDiagonal_MPIDense"
528 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
529 {
530   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
531   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
532   PetscErrorCode ierr;
533   PetscInt       len,i,n,m = A->rmap->n,radd;
534   PetscScalar    *x,zero = 0.0;
535 
536   PetscFunctionBegin;
537   ierr = VecSet(v,zero);CHKERRQ(ierr);
538   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
539   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
540   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
541   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
542   radd = A->rmap->rstart*m;
543   for (i=0; i<len; i++) {
544     x[i] = aloc->v[radd + i*m + i];
545   }
546   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
547   PetscFunctionReturn(0);
548 }
549 
550 #undef __FUNCT__
551 #define __FUNCT__ "MatDestroy_MPIDense"
552 PetscErrorCode MatDestroy_MPIDense(Mat mat)
553 {
554   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
555   PetscErrorCode ierr;
556 
557   PetscFunctionBegin;
558 #if defined(PETSC_USE_LOG)
559   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
560 #endif
561   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
562   ierr = MatDestroy(&mdn->A);CHKERRQ(ierr);
563   ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr);
564   ierr = VecScatterDestroy(&mdn->Mvctx);CHKERRQ(ierr);
565 
566   ierr = PetscFree(mat->data);CHKERRQ(ierr);
567   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
568   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",NULL);CHKERRQ(ierr);
569   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",NULL);CHKERRQ(ierr);
570   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",NULL);CHKERRQ(ierr);
571   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","",NULL);CHKERRQ(ierr);
572   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","",NULL);CHKERRQ(ierr);
573   PetscFunctionReturn(0);
574 }
575 
576 #undef __FUNCT__
577 #define __FUNCT__ "MatView_MPIDense_Binary"
578 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
579 {
580   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
581   PetscErrorCode    ierr;
582   PetscViewerFormat format;
583   int               fd;
584   PetscInt          header[4],mmax,N = mat->cmap->N,i,j,m,k;
585   PetscMPIInt       rank,tag  = ((PetscObject)viewer)->tag,size;
586   PetscScalar       *work,*v,*vv;
587   Mat_SeqDense      *a = (Mat_SeqDense*)mdn->A->data;
588 
589   PetscFunctionBegin;
590   if (mdn->size == 1) {
591     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
592   } else {
593     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
594     ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
595     ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
596 
597     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
598     if (format == PETSC_VIEWER_NATIVE) {
599 
600       if (!rank) {
601         /* store the matrix as a dense matrix */
602         header[0] = MAT_FILE_CLASSID;
603         header[1] = mat->rmap->N;
604         header[2] = N;
605         header[3] = MATRIX_BINARY_FORMAT_DENSE;
606         ierr      = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
607 
608         /* get largest work array needed for transposing array */
609         mmax = mat->rmap->n;
610         for (i=1; i<size; i++) {
611           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
612         }
613         ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&work);CHKERRQ(ierr);
614 
615         /* write out local array, by rows */
616         m = mat->rmap->n;
617         v = a->v;
618         for (j=0; j<N; j++) {
619           for (i=0; i<m; i++) {
620             work[j + i*N] = *v++;
621           }
622         }
623         ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
624         /* get largest work array to receive messages from other processes, excludes process zero */
625         mmax = 0;
626         for (i=1; i<size; i++) {
627           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
628         }
629         ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&vv);CHKERRQ(ierr);
630         for (k = 1; k < size; k++) {
631           v    = vv;
632           m    = mat->rmap->range[k+1] - mat->rmap->range[k];
633           ierr = MPIULong_Recv(v,m*N,MPIU_SCALAR,k,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
634 
635           for (j = 0; j < N; j++) {
636             for (i = 0; i < m; i++) {
637               work[j + i*N] = *v++;
638             }
639           }
640           ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
641         }
642         ierr = PetscFree(work);CHKERRQ(ierr);
643         ierr = PetscFree(vv);CHKERRQ(ierr);
644       } else {
645         ierr = MPIULong_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
646       }
647     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerSetFormat(viewer,PETSC_VIEWER_NATIVE)");
648   }
649   PetscFunctionReturn(0);
650 }
651 
652 #include <petscdraw.h>
653 #undef __FUNCT__
654 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket"
655 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
656 {
657   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
658   PetscErrorCode    ierr;
659   PetscMPIInt       size = mdn->size,rank = mdn->rank;
660   PetscViewerType   vtype;
661   PetscBool         iascii,isdraw;
662   PetscViewer       sviewer;
663   PetscViewerFormat format;
664 
665   PetscFunctionBegin;
666   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
667   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
668   if (iascii) {
669     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
670     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
671     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
672       MatInfo info;
673       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
674       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
675       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
676       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
677       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
678       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
679       PetscFunctionReturn(0);
680     } else if (format == PETSC_VIEWER_ASCII_INFO) {
681       PetscFunctionReturn(0);
682     }
683   } else if (isdraw) {
684     PetscDraw draw;
685     PetscBool isnull;
686 
687     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
688     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
689     if (isnull) PetscFunctionReturn(0);
690   }
691 
692   if (size == 1) {
693     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
694   } else {
695     /* assemble the entire matrix onto first processor. */
696     Mat         A;
697     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
698     PetscInt    *cols;
699     PetscScalar *vals;
700 
701     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
702     if (!rank) {
703       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
704     } else {
705       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
706     }
707     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
708     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
709     ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr);
710     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
711 
712     /* Copy the matrix ... This isn't the most efficient means,
713        but it's quick for now */
714     A->insertmode = INSERT_VALUES;
715 
716     row = mat->rmap->rstart;
717     m   = mdn->A->rmap->n;
718     for (i=0; i<m; i++) {
719       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
720       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
721       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
722       row++;
723     }
724 
725     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
726     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
727     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
728     if (!rank) {
729       ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
730       /* Set the type name to MATMPIDense so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqDense_ASCII()*/
731       PetscStrcpy(((PetscObject)((Mat_MPIDense*)(A->data))->A)->type_name,MATMPIDENSE);
732       ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
733     }
734     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
735     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
736     ierr = MatDestroy(&A);CHKERRQ(ierr);
737   }
738   PetscFunctionReturn(0);
739 }
740 
741 #undef __FUNCT__
742 #define __FUNCT__ "MatView_MPIDense"
743 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
744 {
745   PetscErrorCode ierr;
746   PetscBool      iascii,isbinary,isdraw,issocket;
747 
748   PetscFunctionBegin;
749   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
750   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
751   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
752   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
753 
754   if (iascii || issocket || isdraw) {
755     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
756   } else if (isbinary) {
757     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
758   }
759   PetscFunctionReturn(0);
760 }
761 
762 #undef __FUNCT__
763 #define __FUNCT__ "MatGetInfo_MPIDense"
764 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
765 {
766   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
767   Mat            mdn  = mat->A;
768   PetscErrorCode ierr;
769   PetscReal      isend[5],irecv[5];
770 
771   PetscFunctionBegin;
772   info->block_size = 1.0;
773 
774   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
775 
776   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
777   isend[3] = info->memory;  isend[4] = info->mallocs;
778   if (flag == MAT_LOCAL) {
779     info->nz_used      = isend[0];
780     info->nz_allocated = isend[1];
781     info->nz_unneeded  = isend[2];
782     info->memory       = isend[3];
783     info->mallocs      = isend[4];
784   } else if (flag == MAT_GLOBAL_MAX) {
785     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
786 
787     info->nz_used      = irecv[0];
788     info->nz_allocated = irecv[1];
789     info->nz_unneeded  = irecv[2];
790     info->memory       = irecv[3];
791     info->mallocs      = irecv[4];
792   } else if (flag == MAT_GLOBAL_SUM) {
793     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
794 
795     info->nz_used      = irecv[0];
796     info->nz_allocated = irecv[1];
797     info->nz_unneeded  = irecv[2];
798     info->memory       = irecv[3];
799     info->mallocs      = irecv[4];
800   }
801   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
802   info->fill_ratio_needed = 0;
803   info->factor_mallocs    = 0;
804   PetscFunctionReturn(0);
805 }
806 
807 #undef __FUNCT__
808 #define __FUNCT__ "MatSetOption_MPIDense"
809 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
810 {
811   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
812   PetscErrorCode ierr;
813 
814   PetscFunctionBegin;
815   switch (op) {
816   case MAT_NEW_NONZERO_LOCATIONS:
817   case MAT_NEW_NONZERO_LOCATION_ERR:
818   case MAT_NEW_NONZERO_ALLOCATION_ERR:
819     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
820     break;
821   case MAT_ROW_ORIENTED:
822     a->roworiented = flg;
823 
824     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
825     break;
826   case MAT_NEW_DIAGONALS:
827   case MAT_KEEP_NONZERO_PATTERN:
828   case MAT_USE_HASH_TABLE:
829     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
830     break;
831   case MAT_IGNORE_OFF_PROC_ENTRIES:
832     a->donotstash = flg;
833     break;
834   case MAT_SYMMETRIC:
835   case MAT_STRUCTURALLY_SYMMETRIC:
836   case MAT_HERMITIAN:
837   case MAT_SYMMETRY_ETERNAL:
838   case MAT_IGNORE_LOWER_TRIANGULAR:
839     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
840     break;
841   default:
842     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
843   }
844   PetscFunctionReturn(0);
845 }
846 
847 
848 #undef __FUNCT__
849 #define __FUNCT__ "MatDiagonalScale_MPIDense"
850 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
851 {
852   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
853   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
854   PetscScalar    *l,*r,x,*v;
855   PetscErrorCode ierr;
856   PetscInt       i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n;
857 
858   PetscFunctionBegin;
859   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
860   if (ll) {
861     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
862     if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
863     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
864     for (i=0; i<m; i++) {
865       x = l[i];
866       v = mat->v + i;
867       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
868     }
869     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
870     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
871   }
872   if (rr) {
873     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
874     if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
875     ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
876     ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
877     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
878     for (i=0; i<n; i++) {
879       x = r[i];
880       v = mat->v + i*m;
881       for (j=0; j<m; j++) (*v++) *= x;
882     }
883     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
884     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
885   }
886   PetscFunctionReturn(0);
887 }
888 
889 #undef __FUNCT__
890 #define __FUNCT__ "MatNorm_MPIDense"
891 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
892 {
893   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
894   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
895   PetscErrorCode ierr;
896   PetscInt       i,j;
897   PetscReal      sum = 0.0;
898   PetscScalar    *v  = mat->v;
899 
900   PetscFunctionBegin;
901   if (mdn->size == 1) {
902     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
903   } else {
904     if (type == NORM_FROBENIUS) {
905       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
906         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
907       }
908       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
909       *nrm = PetscSqrtReal(*nrm);
910       ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr);
911     } else if (type == NORM_1) {
912       PetscReal *tmp,*tmp2;
913       ierr = PetscMalloc2(A->cmap->N,PetscReal,&tmp,A->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr);
914       ierr = PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
915       ierr = PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
916       *nrm = 0.0;
917       v    = mat->v;
918       for (j=0; j<mdn->A->cmap->n; j++) {
919         for (i=0; i<mdn->A->rmap->n; i++) {
920           tmp[j] += PetscAbsScalar(*v);  v++;
921         }
922       }
923       ierr = MPI_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
924       for (j=0; j<A->cmap->N; j++) {
925         if (tmp2[j] > *nrm) *nrm = tmp2[j];
926       }
927       ierr = PetscFree2(tmp,tmp);CHKERRQ(ierr);
928       ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr);
929     } else if (type == NORM_INFINITY) { /* max row norm */
930       PetscReal ntemp;
931       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
932       ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
933     } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
934   }
935   PetscFunctionReturn(0);
936 }
937 
938 #undef __FUNCT__
939 #define __FUNCT__ "MatTranspose_MPIDense"
940 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
941 {
942   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
943   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
944   Mat            B;
945   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
946   PetscErrorCode ierr;
947   PetscInt       j,i;
948   PetscScalar    *v;
949 
950   PetscFunctionBegin;
951   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports square matrix only in-place");
952   if (reuse == MAT_INITIAL_MATRIX || A == *matout) {
953     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
954     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
955     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
956     ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr);
957   } else {
958     B = *matout;
959   }
960 
961   m    = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v;
962   ierr = PetscMalloc(m*sizeof(PetscInt),&rwork);CHKERRQ(ierr);
963   for (i=0; i<m; i++) rwork[i] = rstart + i;
964   for (j=0; j<n; j++) {
965     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
966     v   += m;
967   }
968   ierr = PetscFree(rwork);CHKERRQ(ierr);
969   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
970   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
971   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
972     *matout = B;
973   } else {
974     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
975   }
976   PetscFunctionReturn(0);
977 }
978 
979 
980 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
981 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
982 
983 #undef __FUNCT__
984 #define __FUNCT__ "MatSetUp_MPIDense"
985 PetscErrorCode MatSetUp_MPIDense(Mat A)
986 {
987   PetscErrorCode ierr;
988 
989   PetscFunctionBegin;
990   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
991   PetscFunctionReturn(0);
992 }
993 
994 #undef __FUNCT__
995 #define __FUNCT__ "MatAXPY_MPIDense"
996 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
997 {
998   PetscErrorCode ierr;
999   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
1000 
1001   PetscFunctionBegin;
1002   ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr);
1003   PetscFunctionReturn(0);
1004 }
1005 
1006 #undef __FUNCT__
1007 #define __FUNCT__ "MatConjugate_MPIDense"
1008 PetscErrorCode  MatConjugate_MPIDense(Mat mat)
1009 {
1010   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
1011   PetscErrorCode ierr;
1012 
1013   PetscFunctionBegin;
1014   ierr = MatConjugate(a->A);CHKERRQ(ierr);
1015   PetscFunctionReturn(0);
1016 }
1017 
1018 #undef __FUNCT__
1019 #define __FUNCT__ "MatRealPart_MPIDense"
1020 PetscErrorCode MatRealPart_MPIDense(Mat A)
1021 {
1022   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1023   PetscErrorCode ierr;
1024 
1025   PetscFunctionBegin;
1026   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1027   PetscFunctionReturn(0);
1028 }
1029 
1030 #undef __FUNCT__
1031 #define __FUNCT__ "MatImaginaryPart_MPIDense"
1032 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1033 {
1034   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1035   PetscErrorCode ierr;
1036 
1037   PetscFunctionBegin;
1038   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1039   PetscFunctionReturn(0);
1040 }
1041 
1042 extern PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*);
1043 #undef __FUNCT__
1044 #define __FUNCT__ "MatGetColumnNorms_MPIDense"
1045 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms)
1046 {
1047   PetscErrorCode ierr;
1048   PetscInt       i,n;
1049   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
1050   PetscReal      *work;
1051 
1052   PetscFunctionBegin;
1053   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
1054   ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr);
1055   ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr);
1056   if (type == NORM_2) {
1057     for (i=0; i<n; i++) work[i] *= work[i];
1058   }
1059   if (type == NORM_INFINITY) {
1060     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
1061   } else {
1062     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
1063   }
1064   ierr = PetscFree(work);CHKERRQ(ierr);
1065   if (type == NORM_2) {
1066     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
1067   }
1068   PetscFunctionReturn(0);
1069 }
1070 
1071 #undef __FUNCT__
1072 #define __FUNCT__ "MatSetRandom_MPIDense"
1073 static PetscErrorCode  MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1074 {
1075   Mat_MPIDense   *d = (Mat_MPIDense*)x->data;
1076   PetscErrorCode ierr;
1077   PetscScalar    *a;
1078   PetscInt       m,n,i;
1079 
1080   PetscFunctionBegin;
1081   ierr = MatGetSize(d->A,&m,&n);CHKERRQ(ierr);
1082   ierr = MatDenseGetArray(d->A,&a);CHKERRQ(ierr);
1083   for (i=0; i<m*n; i++) {
1084     ierr = PetscRandomGetValue(rctx,a+i);CHKERRQ(ierr);
1085   }
1086   ierr = MatDenseRestoreArray(d->A,&a);CHKERRQ(ierr);
1087   PetscFunctionReturn(0);
1088 }
1089 
1090 /* -------------------------------------------------------------------*/
1091 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1092                                         MatGetRow_MPIDense,
1093                                         MatRestoreRow_MPIDense,
1094                                         MatMult_MPIDense,
1095                                 /*  4*/ MatMultAdd_MPIDense,
1096                                         MatMultTranspose_MPIDense,
1097                                         MatMultTransposeAdd_MPIDense,
1098                                         0,
1099                                         0,
1100                                         0,
1101                                 /* 10*/ 0,
1102                                         0,
1103                                         0,
1104                                         0,
1105                                         MatTranspose_MPIDense,
1106                                 /* 15*/ MatGetInfo_MPIDense,
1107                                         MatEqual_MPIDense,
1108                                         MatGetDiagonal_MPIDense,
1109                                         MatDiagonalScale_MPIDense,
1110                                         MatNorm_MPIDense,
1111                                 /* 20*/ MatAssemblyBegin_MPIDense,
1112                                         MatAssemblyEnd_MPIDense,
1113                                         MatSetOption_MPIDense,
1114                                         MatZeroEntries_MPIDense,
1115                                 /* 24*/ MatZeroRows_MPIDense,
1116                                         0,
1117                                         0,
1118                                         0,
1119                                         0,
1120                                 /* 29*/ MatSetUp_MPIDense,
1121                                         0,
1122                                         0,
1123                                         0,
1124                                         0,
1125                                 /* 34*/ MatDuplicate_MPIDense,
1126                                         0,
1127                                         0,
1128                                         0,
1129                                         0,
1130                                 /* 39*/ MatAXPY_MPIDense,
1131                                         MatGetSubMatrices_MPIDense,
1132                                         0,
1133                                         MatGetValues_MPIDense,
1134                                         0,
1135                                 /* 44*/ 0,
1136                                         MatScale_MPIDense,
1137                                         0,
1138                                         0,
1139                                         0,
1140                                 /* 49*/ MatSetRandom_MPIDense,
1141                                         0,
1142                                         0,
1143                                         0,
1144                                         0,
1145                                 /* 54*/ 0,
1146                                         0,
1147                                         0,
1148                                         0,
1149                                         0,
1150                                 /* 59*/ MatGetSubMatrix_MPIDense,
1151                                         MatDestroy_MPIDense,
1152                                         MatView_MPIDense,
1153                                         0,
1154                                         0,
1155                                 /* 64*/ 0,
1156                                         0,
1157                                         0,
1158                                         0,
1159                                         0,
1160                                 /* 69*/ 0,
1161                                         0,
1162                                         0,
1163                                         0,
1164                                         0,
1165                                 /* 74*/ 0,
1166                                         0,
1167                                         0,
1168                                         0,
1169                                         0,
1170                                 /* 79*/ 0,
1171                                         0,
1172                                         0,
1173                                         0,
1174                                 /* 83*/ MatLoad_MPIDense,
1175                                         0,
1176                                         0,
1177                                         0,
1178                                         0,
1179                                         0,
1180                                 /* 89*/
1181                                         0,
1182                                         0,
1183                                         0,
1184                                         0,
1185                                         0,
1186                                 /* 94*/ 0,
1187                                         0,
1188                                         0,
1189                                         0,
1190                                         0,
1191                                 /* 99*/ 0,
1192                                         0,
1193                                         0,
1194                                         MatConjugate_MPIDense,
1195                                         0,
1196                                 /*104*/ 0,
1197                                         MatRealPart_MPIDense,
1198                                         MatImaginaryPart_MPIDense,
1199                                         0,
1200                                         0,
1201                                 /*109*/ 0,
1202                                         0,
1203                                         0,
1204                                         0,
1205                                         0,
1206                                 /*114*/ 0,
1207                                         0,
1208                                         0,
1209                                         0,
1210                                         0,
1211                                 /*119*/ 0,
1212                                         0,
1213                                         0,
1214                                         0,
1215                                         0,
1216                                 /*124*/ 0,
1217                                         MatGetColumnNorms_MPIDense
1218 };
1219 
1220 EXTERN_C_BEGIN
1221 #undef __FUNCT__
1222 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense"
1223 PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1224 {
1225   Mat_MPIDense   *a;
1226   PetscErrorCode ierr;
1227 
1228   PetscFunctionBegin;
1229   mat->preallocated = PETSC_TRUE;
1230   /* Note:  For now, when data is specified above, this assumes the user correctly
1231    allocates the local dense storage space.  We should add error checking. */
1232 
1233   a       = (Mat_MPIDense*)mat->data;
1234   ierr    = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
1235   ierr    = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
1236   a->nvec = mat->cmap->n;
1237 
1238   ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1239   ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr);
1240   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
1241   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1242   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1243   PetscFunctionReturn(0);
1244 }
1245 EXTERN_C_END
1246 
1247 EXTERN_C_BEGIN
1248 #undef __FUNCT__
1249 #define __FUNCT__ "MatCreate_MPIDense"
1250 PetscErrorCode  MatCreate_MPIDense(Mat mat)
1251 {
1252   Mat_MPIDense   *a;
1253   PetscErrorCode ierr;
1254 
1255   PetscFunctionBegin;
1256   ierr      = PetscNewLog(mat,Mat_MPIDense,&a);CHKERRQ(ierr);
1257   mat->data = (void*)a;
1258   ierr      = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1259 
1260   mat->insertmode = NOT_SET_VALUES;
1261   ierr            = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);CHKERRQ(ierr);
1262   ierr            = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);CHKERRQ(ierr);
1263 
1264   /* build cache for off array entries formed */
1265   a->donotstash = PETSC_FALSE;
1266 
1267   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr);
1268 
1269   /* stuff used for matrix vector multiply */
1270   a->lvec        = 0;
1271   a->Mvctx       = 0;
1272   a->roworiented = PETSC_TRUE;
1273 
1274   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C","MatDenseGetArray_MPIDense",MatDenseGetArray_MPIDense);CHKERRQ(ierr);
1275   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C","MatDenseRestoreArray_MPIDense",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr);
1276 
1277   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","MatGetDiagonalBlock_MPIDense",MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr);
1278   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C","MatMPIDenseSetPreallocation_MPIDense",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1279   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","MatMatMult_MPIAIJ_MPIDense",MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1280   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","MatMatMultSymbolic_MPIAIJ_MPIDense",MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1281   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","MatMatMultNumeric_MPIAIJ_MPIDense",MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1282   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1283   PetscFunctionReturn(0);
1284 }
1285 EXTERN_C_END
1286 
1287 /*MC
1288    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1289 
1290    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1291    and MATMPIDENSE otherwise.
1292 
1293    Options Database Keys:
1294 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1295 
1296   Level: beginner
1297 
1298 
1299 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1300 M*/
1301 
1302 #undef __FUNCT__
1303 #define __FUNCT__ "MatMPIDenseSetPreallocation"
1304 /*@C
1305    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1306 
1307    Not collective
1308 
1309    Input Parameters:
1310 .  A - the matrix
1311 -  data - optional location of matrix data.  Set data=NULL for PETSc
1312    to control all matrix memory allocation.
1313 
1314    Notes:
1315    The dense format is fully compatible with standard Fortran 77
1316    storage by columns.
1317 
1318    The data input variable is intended primarily for Fortran programmers
1319    who wish to allocate their own matrix memory space.  Most users should
1320    set data=NULL.
1321 
1322    Level: intermediate
1323 
1324 .keywords: matrix,dense, parallel
1325 
1326 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1327 @*/
1328 PetscErrorCode  MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data)
1329 {
1330   PetscErrorCode ierr;
1331 
1332   PetscFunctionBegin;
1333   ierr = PetscTryMethod(mat,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(mat,data));CHKERRQ(ierr);
1334   PetscFunctionReturn(0);
1335 }
1336 
1337 #undef __FUNCT__
1338 #define __FUNCT__ "MatCreateDense"
1339 /*@C
1340    MatCreateDense - Creates a parallel matrix in dense format.
1341 
1342    Collective on MPI_Comm
1343 
1344    Input Parameters:
1345 +  comm - MPI communicator
1346 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1347 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1348 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1349 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1350 -  data - optional location of matrix data.  Set data=NULL (NULL_SCALAR for Fortran users) for PETSc
1351    to control all matrix memory allocation.
1352 
1353    Output Parameter:
1354 .  A - the matrix
1355 
1356    Notes:
1357    The dense format is fully compatible with standard Fortran 77
1358    storage by columns.
1359 
1360    The data input variable is intended primarily for Fortran programmers
1361    who wish to allocate their own matrix memory space.  Most users should
1362    set data=NULL (NULL_SCALAR for Fortran users).
1363 
1364    The user MUST specify either the local or global matrix dimensions
1365    (possibly both).
1366 
1367    Level: intermediate
1368 
1369 .keywords: matrix,dense, parallel
1370 
1371 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1372 @*/
1373 PetscErrorCode  MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1374 {
1375   PetscErrorCode ierr;
1376   PetscMPIInt    size;
1377 
1378   PetscFunctionBegin;
1379   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1380   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1381   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1382   if (size > 1) {
1383     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1384     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1385   } else {
1386     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1387     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1388   }
1389   PetscFunctionReturn(0);
1390 }
1391 
1392 #undef __FUNCT__
1393 #define __FUNCT__ "MatDuplicate_MPIDense"
1394 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1395 {
1396   Mat            mat;
1397   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
1398   PetscErrorCode ierr;
1399 
1400   PetscFunctionBegin;
1401   *newmat = 0;
1402   ierr    = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr);
1403   ierr    = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1404   ierr    = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1405   a       = (Mat_MPIDense*)mat->data;
1406   ierr    = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1407 
1408   mat->factortype   = A->factortype;
1409   mat->assembled    = PETSC_TRUE;
1410   mat->preallocated = PETSC_TRUE;
1411 
1412   a->size         = oldmat->size;
1413   a->rank         = oldmat->rank;
1414   mat->insertmode = NOT_SET_VALUES;
1415   a->nvec         = oldmat->nvec;
1416   a->donotstash   = oldmat->donotstash;
1417 
1418   ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr);
1419   ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr);
1420 
1421   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1422   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1423   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1424 
1425   *newmat = mat;
1426   PetscFunctionReturn(0);
1427 }
1428 
1429 #undef __FUNCT__
1430 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile"
1431 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat,PetscInt sizesset)
1432 {
1433   PetscErrorCode ierr;
1434   PetscMPIInt    rank,size;
1435   PetscInt       *rowners,i,m,nz,j;
1436   PetscScalar    *array,*vals,*vals_ptr;
1437 
1438   PetscFunctionBegin;
1439   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1440   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1441 
1442   /* determine ownership of all rows */
1443   if (newmat->rmap->n < 0) m = M/size + ((M % size) > rank);
1444   else m = newmat->rmap->n;
1445   ierr       = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1446   ierr       = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
1447   rowners[0] = 0;
1448   for (i=2; i<=size; i++) {
1449     rowners[i] += rowners[i-1];
1450   }
1451 
1452   if (!sizesset) {
1453     ierr = MatSetSizes(newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1454   }
1455   ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
1456   ierr = MatDenseGetArray(newmat,&array);CHKERRQ(ierr);
1457 
1458   if (!rank) {
1459     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1460 
1461     /* read in my part of the matrix numerical values  */
1462     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1463 
1464     /* insert into matrix-by row (this is why cannot directly read into array */
1465     vals_ptr = vals;
1466     for (i=0; i<m; i++) {
1467       for (j=0; j<N; j++) {
1468         array[i + j*m] = *vals_ptr++;
1469       }
1470     }
1471 
1472     /* read in other processors and ship out */
1473     for (i=1; i<size; i++) {
1474       nz   = (rowners[i+1] - rowners[i])*N;
1475       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1476       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr);
1477     }
1478   } else {
1479     /* receive numeric values */
1480     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1481 
1482     /* receive message of values*/
1483     ierr = MPIULong_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr);
1484 
1485     /* insert into matrix-by row (this is why cannot directly read into array */
1486     vals_ptr = vals;
1487     for (i=0; i<m; i++) {
1488       for (j=0; j<N; j++) {
1489         array[i + j*m] = *vals_ptr++;
1490       }
1491     }
1492   }
1493   ierr = MatDenseRestoreArray(newmat,&array);CHKERRQ(ierr);
1494   ierr = PetscFree(rowners);CHKERRQ(ierr);
1495   ierr = PetscFree(vals);CHKERRQ(ierr);
1496   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1497   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1498   PetscFunctionReturn(0);
1499 }
1500 
1501 #undef __FUNCT__
1502 #define __FUNCT__ "MatLoad_MPIDense"
1503 PetscErrorCode MatLoad_MPIDense(Mat newmat,PetscViewer viewer)
1504 {
1505   PetscScalar    *vals,*svals;
1506   MPI_Comm       comm;
1507   MPI_Status     status;
1508   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz;
1509   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1510   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1511   PetscInt       i,nz,j,rstart,rend,sizesset=1,grows,gcols;
1512   int            fd;
1513   PetscErrorCode ierr;
1514 
1515   PetscFunctionBegin;
1516   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
1517   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1518   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1519   if (!rank) {
1520     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1521     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
1522     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1523   }
1524   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
1525 
1526   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1527   M    = header[1]; N = header[2]; nz = header[3];
1528 
1529   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
1530   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
1531   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
1532 
1533   /* If global sizes are set, check if they are consistent with that given in the file */
1534   if (sizesset) {
1535     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
1536   }
1537   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
1538   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
1539 
1540   /*
1541        Handle case where matrix is stored on disk as a dense matrix
1542   */
1543   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1544     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat,sizesset);CHKERRQ(ierr);
1545     PetscFunctionReturn(0);
1546   }
1547 
1548   /* determine ownership of all rows */
1549   if (newmat->rmap->n < 0) {
1550     ierr = PetscMPIIntCast(M/size + ((M % size) > rank),&m);CHKERRQ(ierr);
1551   } else {
1552     ierr = PetscMPIIntCast(newmat->rmap->n,&m);CHKERRQ(ierr);
1553   }
1554   ierr       = PetscMalloc((size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr);
1555   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1556   rowners[0] = 0;
1557   for (i=2; i<=size; i++) {
1558     rowners[i] += rowners[i-1];
1559   }
1560   rstart = rowners[rank];
1561   rend   = rowners[rank+1];
1562 
1563   /* distribute row lengths to all processors */
1564   ierr = PetscMalloc2(rend-rstart,PetscInt,&ourlens,rend-rstart,PetscInt,&offlens);CHKERRQ(ierr);
1565   if (!rank) {
1566     ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
1567     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1568     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
1569     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1570     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1571     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1572   } else {
1573     ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1574   }
1575 
1576   if (!rank) {
1577     /* calculate the number of nonzeros on each processor */
1578     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
1579     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1580     for (i=0; i<size; i++) {
1581       for (j=rowners[i]; j< rowners[i+1]; j++) {
1582         procsnz[i] += rowlengths[j];
1583       }
1584     }
1585     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1586 
1587     /* determine max buffer needed and allocate it */
1588     maxnz = 0;
1589     for (i=0; i<size; i++) {
1590       maxnz = PetscMax(maxnz,procsnz[i]);
1591     }
1592     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1593 
1594     /* read in my part of the matrix column indices  */
1595     nz   = procsnz[0];
1596     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1597     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1598 
1599     /* read in every one elses and ship off */
1600     for (i=1; i<size; i++) {
1601       nz   = procsnz[i];
1602       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1603       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1604     }
1605     ierr = PetscFree(cols);CHKERRQ(ierr);
1606   } else {
1607     /* determine buffer space needed for message */
1608     nz = 0;
1609     for (i=0; i<m; i++) {
1610       nz += ourlens[i];
1611     }
1612     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1613 
1614     /* receive message of column indices*/
1615     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1616     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1617     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1618   }
1619 
1620   /* loop over local rows, determining number of off diagonal entries */
1621   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
1622   jj   = 0;
1623   for (i=0; i<m; i++) {
1624     for (j=0; j<ourlens[i]; j++) {
1625       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1626       jj++;
1627     }
1628   }
1629 
1630   /* create our matrix */
1631   for (i=0; i<m; i++) ourlens[i] -= offlens[i];
1632 
1633   if (!sizesset) {
1634     ierr = MatSetSizes(newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1635   }
1636   ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
1637   for (i=0; i<m; i++) ourlens[i] += offlens[i];
1638 
1639   if (!rank) {
1640     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1641 
1642     /* read in my part of the matrix numerical values  */
1643     nz   = procsnz[0];
1644     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1645 
1646     /* insert into matrix */
1647     jj      = rstart;
1648     smycols = mycols;
1649     svals   = vals;
1650     for (i=0; i<m; i++) {
1651       ierr     = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1652       smycols += ourlens[i];
1653       svals   += ourlens[i];
1654       jj++;
1655     }
1656 
1657     /* read in other processors and ship out */
1658     for (i=1; i<size; i++) {
1659       nz   = procsnz[i];
1660       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1661       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
1662     }
1663     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1664   } else {
1665     /* receive numeric values */
1666     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1667 
1668     /* receive message of values*/
1669     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
1670     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1671     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1672 
1673     /* insert into matrix */
1674     jj      = rstart;
1675     smycols = mycols;
1676     svals   = vals;
1677     for (i=0; i<m; i++) {
1678       ierr     = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1679       smycols += ourlens[i];
1680       svals   += ourlens[i];
1681       jj++;
1682     }
1683   }
1684   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
1685   ierr = PetscFree(vals);CHKERRQ(ierr);
1686   ierr = PetscFree(mycols);CHKERRQ(ierr);
1687   ierr = PetscFree(rowners);CHKERRQ(ierr);
1688 
1689   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1690   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1691   PetscFunctionReturn(0);
1692 }
1693 
1694 #undef __FUNCT__
1695 #define __FUNCT__ "MatEqual_MPIDense"
1696 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool  *flag)
1697 {
1698   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
1699   Mat            a,b;
1700   PetscBool      flg;
1701   PetscErrorCode ierr;
1702 
1703   PetscFunctionBegin;
1704   a    = matA->A;
1705   b    = matB->A;
1706   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
1707   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1708   PetscFunctionReturn(0);
1709 }
1710 
1711