xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 2205254efee3a00a594e5e2a3a70f74dcb40bc03)
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4 */
5 
6 
7 #include <../src/mat/impls/dense/mpi/mpidense.h>    /*I   "petscmat.h"  I*/
8 
9 #undef __FUNCT__
10 #define __FUNCT__ "MatDenseGetLocalMatrix"
11 /*@
12 
13       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
14               matrix that represents the operator. For sequential matrices it returns itself.
15 
16     Input Parameter:
17 .      A - the Seq or MPI dense matrix
18 
19     Output Parameter:
20 .      B - the inner matrix
21 
22     Level: intermediate
23 
24 @*/
25 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
26 {
27   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
28   PetscErrorCode ierr;
29   PetscBool      flg;
30 
31   PetscFunctionBegin;
32   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
33   if (flg) *B = mat->A;
34   else *B = A;
35   PetscFunctionReturn(0);
36 }
37 
38 #undef __FUNCT__
39 #define __FUNCT__ "MatGetRow_MPIDense"
40 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
41 {
42   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
43   PetscErrorCode ierr;
44   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
45 
46   PetscFunctionBegin;
47   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
48   lrow = row - rstart;
49   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
50   PetscFunctionReturn(0);
51 }
52 
53 #undef __FUNCT__
54 #define __FUNCT__ "MatRestoreRow_MPIDense"
55 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
56 {
57   PetscErrorCode ierr;
58 
59   PetscFunctionBegin;
60   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
61   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
62   PetscFunctionReturn(0);
63 }
64 
65 EXTERN_C_BEGIN
66 #undef __FUNCT__
67 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense"
68 PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
69 {
70   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
71   PetscErrorCode ierr;
72   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
73   PetscScalar    *array;
74   MPI_Comm       comm;
75   Mat            B;
76 
77   PetscFunctionBegin;
78   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
79 
80   ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr);
81   if (!B) {
82     ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
83     ierr = MatCreate(comm,&B);CHKERRQ(ierr);
84     ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr);
85     ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
86     ierr = MatDenseGetArray(mdn->A,&array);CHKERRQ(ierr);
87     ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr);
88     ierr = MatDenseRestoreArray(mdn->A,&array);CHKERRQ(ierr);
89     ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
90     ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
91     ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr);
92     *a   = B;
93     ierr = MatDestroy(&B);CHKERRQ(ierr);
94   } else *a = B;
95   PetscFunctionReturn(0);
96 }
97 EXTERN_C_END
98 
99 #undef __FUNCT__
100 #define __FUNCT__ "MatSetValues_MPIDense"
101 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
102 {
103   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
104   PetscErrorCode ierr;
105   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
106   PetscBool      roworiented = A->roworiented;
107 
108   PetscFunctionBegin;
109   if (v) PetscValidScalarPointer(v,6);
110   for (i=0; i<m; i++) {
111     if (idxm[i] < 0) continue;
112     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
113     if (idxm[i] >= rstart && idxm[i] < rend) {
114       row = idxm[i] - rstart;
115       if (roworiented) {
116         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
117       } else {
118         for (j=0; j<n; j++) {
119           if (idxn[j] < 0) continue;
120           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
121           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
122         }
123       }
124     } else if (!A->donotstash) {
125       mat->assembled = PETSC_FALSE;
126       if (roworiented) {
127         ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
128       } else {
129         ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
130       }
131     }
132   }
133   PetscFunctionReturn(0);
134 }
135 
136 #undef __FUNCT__
137 #define __FUNCT__ "MatGetValues_MPIDense"
138 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
139 {
140   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
141   PetscErrorCode ierr;
142   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
143 
144   PetscFunctionBegin;
145   for (i=0; i<m; i++) {
146     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
147     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
148     if (idxm[i] >= rstart && idxm[i] < rend) {
149       row = idxm[i] - rstart;
150       for (j=0; j<n; j++) {
151         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
152         if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
153         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
154       }
155     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
156   }
157   PetscFunctionReturn(0);
158 }
159 
160 #undef __FUNCT__
161 #define __FUNCT__ "MatDenseGetArray_MPIDense"
162 PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[])
163 {
164   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
165   PetscErrorCode ierr;
166 
167   PetscFunctionBegin;
168   ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr);
169   PetscFunctionReturn(0);
170 }
171 
172 #undef __FUNCT__
173 #define __FUNCT__ "MatGetSubMatrix_MPIDense"
174 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
175 {
176   Mat_MPIDense   *mat  = (Mat_MPIDense*)A->data,*newmatd;
177   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
178   PetscErrorCode ierr;
179   PetscInt       i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
180   const PetscInt *irow,*icol;
181   PetscScalar    *av,*bv,*v = lmat->v;
182   Mat            newmat;
183   IS             iscol_local;
184 
185   PetscFunctionBegin;
186   ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
187   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
188   ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr);
189   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
190   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
191   ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */
192 
193   /* No parallel redistribution currently supported! Should really check each index set
194      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
195      original matrix! */
196 
197   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
198   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
199 
200   /* Check submatrix call */
201   if (scall == MAT_REUSE_MATRIX) {
202     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
203     /* Really need to test rows and column sizes! */
204     newmat = *B;
205   } else {
206     /* Create and fill new matrix */
207     ierr = MatCreate(((PetscObject)A)->comm,&newmat);CHKERRQ(ierr);
208     ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
209     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
210     ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr);
211   }
212 
213   /* Now extract the data pointers and do the copy, column at a time */
214   newmatd = (Mat_MPIDense*)newmat->data;
215   bv      = ((Mat_SeqDense*)newmatd->A->data)->v;
216 
217   for (i=0; i<Ncols; i++) {
218     av = v + ((Mat_SeqDense*)mat->A->data)->lda*icol[i];
219     for (j=0; j<nrows; j++) {
220       *bv++ = av[irow[j] - rstart];
221     }
222   }
223 
224   /* Assemble the matrices so that the correct flags are set */
225   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
226   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
227 
228   /* Free work space */
229   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
230   ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr);
231   ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
232   *B   = newmat;
233   PetscFunctionReturn(0);
234 }
235 
236 #undef __FUNCT__
237 #define __FUNCT__ "MatDenseRestoreArray_MPIDense"
238 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[])
239 {
240   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
241   PetscErrorCode ierr;
242 
243   PetscFunctionBegin;
244   ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr);
245   PetscFunctionReturn(0);
246 }
247 
248 #undef __FUNCT__
249 #define __FUNCT__ "MatAssemblyBegin_MPIDense"
250 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
251 {
252   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
253   MPI_Comm       comm = ((PetscObject)mat)->comm;
254   PetscErrorCode ierr;
255   PetscInt       nstash,reallocs;
256   InsertMode     addv;
257 
258   PetscFunctionBegin;
259   /* make sure all processors are either in INSERTMODE or ADDMODE */
260   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,comm);CHKERRQ(ierr);
261   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
262   mat->insertmode = addv; /* in case this processor had no cache */
263 
264   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
265   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
266   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
267   PetscFunctionReturn(0);
268 }
269 
270 #undef __FUNCT__
271 #define __FUNCT__ "MatAssemblyEnd_MPIDense"
272 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
273 {
274   Mat_MPIDense   *mdn=(Mat_MPIDense*)mat->data;
275   PetscErrorCode ierr;
276   PetscInt       i,*row,*col,flg,j,rstart,ncols;
277   PetscMPIInt    n;
278   PetscScalar    *val;
279   InsertMode     addv=mat->insertmode;
280 
281   PetscFunctionBegin;
282   /*  wait on receives */
283   while (1) {
284     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
285     if (!flg) break;
286 
287     for (i=0; i<n;) {
288       /* Now identify the consecutive vals belonging to the same row */
289       for (j=i,rstart=row[j]; j<n; j++) {
290         if (row[j] != rstart) break;
291       }
292       if (j < n) ncols = j-i;
293       else       ncols = n-i;
294       /* Now assemble all these values with a single function call */
295       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
296       i    = j;
297     }
298   }
299   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
300 
301   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
302   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
303 
304   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
305     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
306   }
307   PetscFunctionReturn(0);
308 }
309 
310 #undef __FUNCT__
311 #define __FUNCT__ "MatZeroEntries_MPIDense"
312 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
313 {
314   PetscErrorCode ierr;
315   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
316 
317   PetscFunctionBegin;
318   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
319   PetscFunctionReturn(0);
320 }
321 
322 /* the code does not do the diagonal entries correctly unless the
323    matrix is square and the column and row owerships are identical.
324    This is a BUG. The only way to fix it seems to be to access
325    mdn->A and mdn->B directly and not through the MatZeroRows()
326    routine.
327 */
328 #undef __FUNCT__
329 #define __FUNCT__ "MatZeroRows_MPIDense"
330 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
331 {
332   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
333   PetscErrorCode    ierr;
334   PetscInt          i,*owners = A->rmap->range;
335   PetscInt          *nprocs,j,idx,nsends;
336   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
337   PetscInt          *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source;
338   PetscInt          *lens,*lrows,*values;
339   PetscMPIInt       n,imdex,rank = l->rank,size = l->size;
340   MPI_Comm          comm = ((PetscObject)A)->comm;
341   MPI_Request       *send_waits,*recv_waits;
342   MPI_Status        recv_status,*send_status;
343   PetscBool         found;
344   const PetscScalar *xx;
345   PetscScalar       *bb;
346 
347   PetscFunctionBegin;
348   if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Only handles square matrices");
349   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership");
350   /*  first count number of contributors to each processor */
351   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
352   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
353   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr);  /* see note*/
354   for (i=0; i<N; i++) {
355     idx   = rows[i];
356     found = PETSC_FALSE;
357     for (j=0; j<size; j++) {
358       if (idx >= owners[j] && idx < owners[j+1]) {
359         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
360       }
361     }
362     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
363   }
364   nsends = 0;
365   for (i=0; i<size; i++) nsends += nprocs[2*i+1];
366 
367   /* inform other processors of number of messages and max length*/
368   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
369 
370   /* post receives:   */
371   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
372   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
373   for (i=0; i<nrecvs; i++) {
374     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
375   }
376 
377   /* do sends:
378       1) starts[i] gives the starting index in svalues for stuff going to
379          the ith processor
380   */
381   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
382   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
383   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
384 
385   starts[0] = 0;
386   for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
387   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
388 
389   starts[0] = 0;
390   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
391   count = 0;
392   for (i=0; i<size; i++) {
393     if (nprocs[2*i+1]) {
394       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
395     }
396   }
397   ierr = PetscFree(starts);CHKERRQ(ierr);
398 
399   base = owners[rank];
400 
401   /*  wait on receives */
402   ierr  = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
403   count = nrecvs;
404   slen  = 0;
405   while (count) {
406     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
407     /* unpack receives into our local space */
408     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
409 
410     source[imdex] = recv_status.MPI_SOURCE;
411     lens[imdex]   = n;
412     slen += n;
413     count--;
414   }
415   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
416 
417   /* move the data into the send scatter */
418   ierr  = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
419   count = 0;
420   for (i=0; i<nrecvs; i++) {
421     values = rvalues + i*nmax;
422     for (j=0; j<lens[i]; j++) {
423       lrows[count++] = values[j] - base;
424     }
425   }
426   ierr = PetscFree(rvalues);CHKERRQ(ierr);
427   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
428   ierr = PetscFree(owner);CHKERRQ(ierr);
429   ierr = PetscFree(nprocs);CHKERRQ(ierr);
430 
431   /* fix right hand side if needed */
432   if (x && b) {
433     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
434     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
435     for (i=0; i<slen; i++) {
436       bb[lrows[i]] = diag*xx[lrows[i]];
437     }
438     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
439     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
440   }
441 
442   /* actually zap the local rows */
443   ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
444   if (diag != 0.0) {
445     Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data;
446     PetscInt     m   = ll->lda, i;
447 
448     for (i=0; i<slen; i++) {
449       ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag;
450     }
451   }
452   ierr = PetscFree(lrows);CHKERRQ(ierr);
453 
454   /* wait on sends */
455   if (nsends) {
456     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
457     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
458     ierr = PetscFree(send_status);CHKERRQ(ierr);
459   }
460   ierr = PetscFree(send_waits);CHKERRQ(ierr);
461   ierr = PetscFree(svalues);CHKERRQ(ierr);
462   PetscFunctionReturn(0);
463 }
464 
465 #undef __FUNCT__
466 #define __FUNCT__ "MatMult_MPIDense"
467 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
468 {
469   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
470   PetscErrorCode ierr;
471 
472   PetscFunctionBegin;
473   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
474   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
475   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
476   PetscFunctionReturn(0);
477 }
478 
479 #undef __FUNCT__
480 #define __FUNCT__ "MatMultAdd_MPIDense"
481 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
482 {
483   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
484   PetscErrorCode ierr;
485 
486   PetscFunctionBegin;
487   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
488   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
489   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
490   PetscFunctionReturn(0);
491 }
492 
493 #undef __FUNCT__
494 #define __FUNCT__ "MatMultTranspose_MPIDense"
495 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
496 {
497   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
498   PetscErrorCode ierr;
499   PetscScalar    zero = 0.0;
500 
501   PetscFunctionBegin;
502   ierr = VecSet(yy,zero);CHKERRQ(ierr);
503   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
504   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
505   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
506   PetscFunctionReturn(0);
507 }
508 
509 #undef __FUNCT__
510 #define __FUNCT__ "MatMultTransposeAdd_MPIDense"
511 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
512 {
513   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
514   PetscErrorCode ierr;
515 
516   PetscFunctionBegin;
517   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
518   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
519   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
520   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
521   PetscFunctionReturn(0);
522 }
523 
524 #undef __FUNCT__
525 #define __FUNCT__ "MatGetDiagonal_MPIDense"
526 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
527 {
528   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
529   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
530   PetscErrorCode ierr;
531   PetscInt       len,i,n,m = A->rmap->n,radd;
532   PetscScalar    *x,zero = 0.0;
533 
534   PetscFunctionBegin;
535   ierr = VecSet(v,zero);CHKERRQ(ierr);
536   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
537   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
538   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
539   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
540   radd = A->rmap->rstart*m;
541   for (i=0; i<len; i++) {
542     x[i] = aloc->v[radd + i*m + i];
543   }
544   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
545   PetscFunctionReturn(0);
546 }
547 
548 #undef __FUNCT__
549 #define __FUNCT__ "MatDestroy_MPIDense"
550 PetscErrorCode MatDestroy_MPIDense(Mat mat)
551 {
552   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
553   PetscErrorCode ierr;
554 
555   PetscFunctionBegin;
556 #if defined(PETSC_USE_LOG)
557   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
558 #endif
559   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
560   ierr = MatDestroy(&mdn->A);CHKERRQ(ierr);
561   ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr);
562   ierr = VecScatterDestroy(&mdn->Mvctx);CHKERRQ(ierr);
563 
564   ierr = PetscFree(mat->data);CHKERRQ(ierr);
565   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
566   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
567   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
568   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
569   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
570   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
571   PetscFunctionReturn(0);
572 }
573 
574 #undef __FUNCT__
575 #define __FUNCT__ "MatView_MPIDense_Binary"
576 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
577 {
578   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
579   PetscErrorCode    ierr;
580   PetscViewerFormat format;
581   int               fd;
582   PetscInt          header[4],mmax,N = mat->cmap->N,i,j,m,k;
583   PetscMPIInt       rank,tag  = ((PetscObject)viewer)->tag,size;
584   PetscScalar       *work,*v,*vv;
585   Mat_SeqDense      *a = (Mat_SeqDense*)mdn->A->data;
586 
587   PetscFunctionBegin;
588   if (mdn->size == 1) {
589     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
590   } else {
591     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
592     ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
593     ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
594 
595     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
596     if (format == PETSC_VIEWER_NATIVE) {
597 
598       if (!rank) {
599         /* store the matrix as a dense matrix */
600         header[0] = MAT_FILE_CLASSID;
601         header[1] = mat->rmap->N;
602         header[2] = N;
603         header[3] = MATRIX_BINARY_FORMAT_DENSE;
604         ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
605 
606         /* get largest work array needed for transposing array */
607         mmax = mat->rmap->n;
608         for (i=1; i<size; i++) {
609           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
610         }
611         ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&work);CHKERRQ(ierr);
612 
613         /* write out local array, by rows */
614         m = mat->rmap->n;
615         v = a->v;
616         for (j=0; j<N; j++) {
617           for (i=0; i<m; i++) {
618             work[j + i*N] = *v++;
619           }
620         }
621         ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
622         /* get largest work array to receive messages from other processes, excludes process zero */
623         mmax = 0;
624         for (i=1; i<size; i++) {
625           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
626         }
627         ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&vv);CHKERRQ(ierr);
628         for (k = 1; k < size; k++) {
629           v    = vv;
630           m    = mat->rmap->range[k+1] - mat->rmap->range[k];
631           ierr = MPIULong_Recv(v,m*N,MPIU_SCALAR,k,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
632 
633           for (j = 0; j < N; j++) {
634             for (i = 0; i < m; i++) {
635               work[j + i*N] = *v++;
636             }
637           }
638           ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
639         }
640         ierr = PetscFree(work);CHKERRQ(ierr);
641         ierr = PetscFree(vv);CHKERRQ(ierr);
642       } else {
643         ierr = MPIULong_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
644       }
645     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerSetFormat(viewer,PETSC_VIEWER_NATIVE)");
646   }
647   PetscFunctionReturn(0);
648 }
649 
650 #undef __FUNCT__
651 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket"
652 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
653 {
654   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
655   PetscErrorCode    ierr;
656   PetscMPIInt       size = mdn->size,rank = mdn->rank;
657   PetscViewerType   vtype;
658   PetscBool         iascii,isdraw;
659   PetscViewer       sviewer;
660   PetscViewerFormat format;
661 
662   PetscFunctionBegin;
663   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
664   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
665   if (iascii) {
666     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
667     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
668     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
669       MatInfo info;
670       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
671       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
672       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
673       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
674       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
675       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
676       PetscFunctionReturn(0);
677     } else if (format == PETSC_VIEWER_ASCII_INFO) {
678       PetscFunctionReturn(0);
679     }
680   } else if (isdraw) {
681     PetscDraw draw;
682     PetscBool isnull;
683 
684     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
685     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
686     if (isnull) PetscFunctionReturn(0);
687   }
688 
689   if (size == 1) {
690     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
691   } else {
692     /* assemble the entire matrix onto first processor. */
693     Mat         A;
694     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
695     PetscInt    *cols;
696     PetscScalar *vals;
697 
698     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
699     if (!rank) {
700       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
701     } else {
702       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
703     }
704     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
705     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
706     ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL);CHKERRQ(ierr);
707     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
708 
709     /* Copy the matrix ... This isn't the most efficient means,
710        but it's quick for now */
711     A->insertmode = INSERT_VALUES;
712 
713     row = mat->rmap->rstart;
714     m   = mdn->A->rmap->n;
715     for (i=0; i<m; i++) {
716       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
717       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
718       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
719       row++;
720     }
721 
722     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
723     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
724     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
725     if (!rank) {
726       ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
727       /* Set the type name to MATMPIDense so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqDense_ASCII()*/
728       PetscStrcpy(((PetscObject)((Mat_MPIDense*)(A->data))->A)->type_name,MATMPIDENSE);
729       ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
730     }
731     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
732     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
733     ierr = MatDestroy(&A);CHKERRQ(ierr);
734   }
735   PetscFunctionReturn(0);
736 }
737 
738 #undef __FUNCT__
739 #define __FUNCT__ "MatView_MPIDense"
740 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
741 {
742   PetscErrorCode ierr;
743   PetscBool      iascii,isbinary,isdraw,issocket;
744 
745   PetscFunctionBegin;
746   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
747   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
748   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
749   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
750 
751   if (iascii || issocket || isdraw) {
752     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
753   } else if (isbinary) {
754     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
755   }
756   PetscFunctionReturn(0);
757 }
758 
759 #undef __FUNCT__
760 #define __FUNCT__ "MatGetInfo_MPIDense"
761 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
762 {
763   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
764   Mat            mdn  = mat->A;
765   PetscErrorCode ierr;
766   PetscReal      isend[5],irecv[5];
767 
768   PetscFunctionBegin;
769   info->block_size = 1.0;
770 
771   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
772 
773   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
774   isend[3] = info->memory;  isend[4] = info->mallocs;
775   if (flag == MAT_LOCAL) {
776     info->nz_used      = isend[0];
777     info->nz_allocated = isend[1];
778     info->nz_unneeded  = isend[2];
779     info->memory       = isend[3];
780     info->mallocs      = isend[4];
781   } else if (flag == MAT_GLOBAL_MAX) {
782     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)A)->comm);CHKERRQ(ierr);
783 
784     info->nz_used      = irecv[0];
785     info->nz_allocated = irecv[1];
786     info->nz_unneeded  = irecv[2];
787     info->memory       = irecv[3];
788     info->mallocs      = irecv[4];
789   } else if (flag == MAT_GLOBAL_SUM) {
790     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
791 
792     info->nz_used      = irecv[0];
793     info->nz_allocated = irecv[1];
794     info->nz_unneeded  = irecv[2];
795     info->memory       = irecv[3];
796     info->mallocs      = irecv[4];
797   }
798   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
799   info->fill_ratio_needed = 0;
800   info->factor_mallocs    = 0;
801   PetscFunctionReturn(0);
802 }
803 
804 #undef __FUNCT__
805 #define __FUNCT__ "MatSetOption_MPIDense"
806 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
807 {
808   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
809   PetscErrorCode ierr;
810 
811   PetscFunctionBegin;
812   switch (op) {
813   case MAT_NEW_NONZERO_LOCATIONS:
814   case MAT_NEW_NONZERO_LOCATION_ERR:
815   case MAT_NEW_NONZERO_ALLOCATION_ERR:
816     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
817     break;
818   case MAT_ROW_ORIENTED:
819     a->roworiented = flg;
820 
821     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
822     break;
823   case MAT_NEW_DIAGONALS:
824   case MAT_KEEP_NONZERO_PATTERN:
825   case MAT_USE_HASH_TABLE:
826     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
827     break;
828   case MAT_IGNORE_OFF_PROC_ENTRIES:
829     a->donotstash = flg;
830     break;
831   case MAT_SYMMETRIC:
832   case MAT_STRUCTURALLY_SYMMETRIC:
833   case MAT_HERMITIAN:
834   case MAT_SYMMETRY_ETERNAL:
835   case MAT_IGNORE_LOWER_TRIANGULAR:
836     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
837     break;
838   default:
839     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
840   }
841   PetscFunctionReturn(0);
842 }
843 
844 
845 #undef __FUNCT__
846 #define __FUNCT__ "MatDiagonalScale_MPIDense"
847 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
848 {
849   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
850   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
851   PetscScalar    *l,*r,x,*v;
852   PetscErrorCode ierr;
853   PetscInt       i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n;
854 
855   PetscFunctionBegin;
856   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
857   if (ll) {
858     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
859     if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
860     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
861     for (i=0; i<m; i++) {
862       x = l[i];
863       v = mat->v + i;
864       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
865     }
866     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
867     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
868   }
869   if (rr) {
870     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
871     if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
872     ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
873     ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
874     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
875     for (i=0; i<n; i++) {
876       x = r[i];
877       v = mat->v + i*m;
878       for (j=0; j<m; j++) (*v++) *= x;
879     }
880     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
881     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
882   }
883   PetscFunctionReturn(0);
884 }
885 
886 #undef __FUNCT__
887 #define __FUNCT__ "MatNorm_MPIDense"
888 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
889 {
890   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
891   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
892   PetscErrorCode ierr;
893   PetscInt       i,j;
894   PetscReal      sum = 0.0;
895   PetscScalar    *v  = mat->v;
896 
897   PetscFunctionBegin;
898   if (mdn->size == 1) {
899     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
900   } else {
901     if (type == NORM_FROBENIUS) {
902       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
903         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
904       }
905       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
906       *nrm = PetscSqrtReal(*nrm);
907       ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr);
908     } else if (type == NORM_1) {
909       PetscReal *tmp,*tmp2;
910       ierr = PetscMalloc2(A->cmap->N,PetscReal,&tmp,A->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr);
911       ierr = PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
912       ierr = PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
913       *nrm = 0.0;
914       v = mat->v;
915       for (j=0; j<mdn->A->cmap->n; j++) {
916         for (i=0; i<mdn->A->rmap->n; i++) {
917           tmp[j] += PetscAbsScalar(*v);  v++;
918         }
919       }
920       ierr = MPI_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
921       for (j=0; j<A->cmap->N; j++) {
922         if (tmp2[j] > *nrm) *nrm = tmp2[j];
923       }
924       ierr = PetscFree2(tmp,tmp);CHKERRQ(ierr);
925       ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr);
926     } else if (type == NORM_INFINITY) { /* max row norm */
927       PetscReal ntemp;
928       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
929       ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,((PetscObject)A)->comm);CHKERRQ(ierr);
930     } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"No support for two norm");
931   }
932   PetscFunctionReturn(0);
933 }
934 
935 #undef __FUNCT__
936 #define __FUNCT__ "MatTranspose_MPIDense"
937 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
938 {
939   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
940   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
941   Mat            B;
942   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
943   PetscErrorCode ierr;
944   PetscInt       j,i;
945   PetscScalar    *v;
946 
947   PetscFunctionBegin;
948   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports square matrix only in-place");
949   if (reuse == MAT_INITIAL_MATRIX || A == *matout) {
950     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
951     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
952     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
953     ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr);
954   } else {
955     B = *matout;
956   }
957 
958   m = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v;
959   ierr = PetscMalloc(m*sizeof(PetscInt),&rwork);CHKERRQ(ierr);
960   for (i=0; i<m; i++) rwork[i] = rstart + i;
961   for (j=0; j<n; j++) {
962     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
963     v   += m;
964   }
965   ierr = PetscFree(rwork);CHKERRQ(ierr);
966   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
967   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
968   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
969     *matout = B;
970   } else {
971     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
972   }
973   PetscFunctionReturn(0);
974 }
975 
976 
977 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
978 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
979 
980 #undef __FUNCT__
981 #define __FUNCT__ "MatSetUp_MPIDense"
982 PetscErrorCode MatSetUp_MPIDense(Mat A)
983 {
984   PetscErrorCode ierr;
985 
986   PetscFunctionBegin;
987   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
988   PetscFunctionReturn(0);
989 }
990 
991 #undef __FUNCT__
992 #define __FUNCT__ "MatAXPY_MPIDense"
993 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
994 {
995   PetscErrorCode ierr;
996   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
997 
998   PetscFunctionBegin;
999   ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr);
1000   PetscFunctionReturn(0);
1001 }
1002 
1003 #undef __FUNCT__
1004 #define __FUNCT__ "MatConjugate_MPIDense"
1005 PetscErrorCode  MatConjugate_MPIDense(Mat mat)
1006 {
1007   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
1008   PetscErrorCode ierr;
1009 
1010   PetscFunctionBegin;
1011   ierr = MatConjugate(a->A);CHKERRQ(ierr);
1012   PetscFunctionReturn(0);
1013 }
1014 
1015 #undef __FUNCT__
1016 #define __FUNCT__ "MatRealPart_MPIDense"
1017 PetscErrorCode MatRealPart_MPIDense(Mat A)
1018 {
1019   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1020   PetscErrorCode ierr;
1021 
1022   PetscFunctionBegin;
1023   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1024   PetscFunctionReturn(0);
1025 }
1026 
1027 #undef __FUNCT__
1028 #define __FUNCT__ "MatImaginaryPart_MPIDense"
1029 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1030 {
1031   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1032   PetscErrorCode ierr;
1033 
1034   PetscFunctionBegin;
1035   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1036   PetscFunctionReturn(0);
1037 }
1038 
1039 extern PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*);
1040 #undef __FUNCT__
1041 #define __FUNCT__ "MatGetColumnNorms_MPIDense"
1042 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms)
1043 {
1044   PetscErrorCode ierr;
1045   PetscInt       i,n;
1046   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
1047   PetscReal      *work;
1048 
1049   PetscFunctionBegin;
1050   ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr);
1051   ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr);
1052   ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr);
1053   if (type == NORM_2) {
1054     for (i=0; i<n; i++) work[i] *= work[i];
1055   }
1056   if (type == NORM_INFINITY) {
1057     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
1058   } else {
1059     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
1060   }
1061   ierr = PetscFree(work);CHKERRQ(ierr);
1062   if (type == NORM_2) {
1063     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
1064   }
1065   PetscFunctionReturn(0);
1066 }
1067 
1068 #undef __FUNCT__
1069 #define __FUNCT__ "MatSetRandom_MPIDense"
1070 static PetscErrorCode  MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1071 {
1072   Mat_MPIDense   *d = (Mat_MPIDense*)x->data;
1073   PetscErrorCode ierr;
1074   PetscScalar    *a;
1075   PetscInt       m,n,i;
1076 
1077   PetscFunctionBegin;
1078   ierr = MatGetSize(d->A,&m,&n);CHKERRQ(ierr);
1079   ierr = MatDenseGetArray(d->A,&a);CHKERRQ(ierr);
1080   for (i=0; i<m*n; i++) {
1081     ierr = PetscRandomGetValue(rctx,a+i);CHKERRQ(ierr);
1082   }
1083   ierr = MatDenseRestoreArray(d->A,&a);CHKERRQ(ierr);
1084   PetscFunctionReturn(0);
1085 }
1086 
1087 /* -------------------------------------------------------------------*/
1088 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1089                                         MatGetRow_MPIDense,
1090                                         MatRestoreRow_MPIDense,
1091                                         MatMult_MPIDense,
1092                                 /*  4*/ MatMultAdd_MPIDense,
1093                                         MatMultTranspose_MPIDense,
1094                                         MatMultTransposeAdd_MPIDense,
1095                                         0,
1096                                         0,
1097                                         0,
1098                                 /* 10*/ 0,
1099                                         0,
1100                                         0,
1101                                         0,
1102                                         MatTranspose_MPIDense,
1103                                 /* 15*/ MatGetInfo_MPIDense,
1104                                         MatEqual_MPIDense,
1105                                         MatGetDiagonal_MPIDense,
1106                                         MatDiagonalScale_MPIDense,
1107                                         MatNorm_MPIDense,
1108                                 /* 20*/ MatAssemblyBegin_MPIDense,
1109                                         MatAssemblyEnd_MPIDense,
1110                                         MatSetOption_MPIDense,
1111                                         MatZeroEntries_MPIDense,
1112                                 /* 24*/ MatZeroRows_MPIDense,
1113                                         0,
1114                                         0,
1115                                         0,
1116                                         0,
1117                                 /* 29*/ MatSetUp_MPIDense,
1118                                         0,
1119                                         0,
1120                                         0,
1121                                         0,
1122                                 /* 34*/ MatDuplicate_MPIDense,
1123                                         0,
1124                                         0,
1125                                         0,
1126                                         0,
1127                                 /* 39*/ MatAXPY_MPIDense,
1128                                         MatGetSubMatrices_MPIDense,
1129                                         0,
1130                                         MatGetValues_MPIDense,
1131                                         0,
1132                                 /* 44*/ 0,
1133                                         MatScale_MPIDense,
1134                                         0,
1135                                         0,
1136                                         0,
1137                                 /* 49*/ MatSetRandom_MPIDense,
1138                                         0,
1139                                         0,
1140                                         0,
1141                                         0,
1142                                 /* 54*/ 0,
1143                                         0,
1144                                         0,
1145                                         0,
1146                                         0,
1147                                 /* 59*/ MatGetSubMatrix_MPIDense,
1148                                         MatDestroy_MPIDense,
1149                                         MatView_MPIDense,
1150                                         0,
1151                                         0,
1152                                 /* 64*/ 0,
1153                                         0,
1154                                         0,
1155                                         0,
1156                                         0,
1157                                 /* 69*/ 0,
1158                                         0,
1159                                         0,
1160                                         0,
1161                                         0,
1162                                 /* 74*/ 0,
1163                                         0,
1164                                         0,
1165                                         0,
1166                                         0,
1167                                 /* 79*/ 0,
1168                                         0,
1169                                         0,
1170                                         0,
1171                                 /* 83*/ MatLoad_MPIDense,
1172                                         0,
1173                                         0,
1174                                         0,
1175                                         0,
1176                                         0,
1177                                 /* 89*/
1178                                         0,
1179                                         0,
1180                                         0,
1181                                         0,
1182                                         0,
1183                                 /* 94*/ 0,
1184                                         0,
1185                                         0,
1186                                         0,
1187                                         0,
1188                                 /* 99*/ 0,
1189                                         0,
1190                                         0,
1191                                         MatConjugate_MPIDense,
1192                                         0,
1193                                 /*104*/ 0,
1194                                         MatRealPart_MPIDense,
1195                                         MatImaginaryPart_MPIDense,
1196                                         0,
1197                                         0,
1198                                 /*109*/ 0,
1199                                         0,
1200                                         0,
1201                                         0,
1202                                         0,
1203                                 /*114*/ 0,
1204                                         0,
1205                                         0,
1206                                         0,
1207                                         0,
1208                                 /*119*/ 0,
1209                                         0,
1210                                         0,
1211                                         0,
1212                                         0,
1213                                 /*124*/ 0,
1214                                         MatGetColumnNorms_MPIDense
1215 };
1216 
1217 EXTERN_C_BEGIN
1218 #undef __FUNCT__
1219 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense"
1220 PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1221 {
1222   Mat_MPIDense   *a;
1223   PetscErrorCode ierr;
1224 
1225   PetscFunctionBegin;
1226   mat->preallocated = PETSC_TRUE;
1227   /* Note:  For now, when data is specified above, this assumes the user correctly
1228    allocates the local dense storage space.  We should add error checking. */
1229 
1230   a       = (Mat_MPIDense*)mat->data;
1231   ierr    = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
1232   ierr    = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
1233   a->nvec = mat->cmap->n;
1234 
1235   ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1236   ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr);
1237   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
1238   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1239   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1240   PetscFunctionReturn(0);
1241 }
1242 EXTERN_C_END
1243 
1244 EXTERN_C_BEGIN
1245 #undef __FUNCT__
1246 #define __FUNCT__ "MatCreate_MPIDense"
1247 PetscErrorCode  MatCreate_MPIDense(Mat mat)
1248 {
1249   Mat_MPIDense   *a;
1250   PetscErrorCode ierr;
1251 
1252   PetscFunctionBegin;
1253   ierr      = PetscNewLog(mat,Mat_MPIDense,&a);CHKERRQ(ierr);
1254   mat->data = (void*)a;
1255   ierr      = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1256 
1257   mat->insertmode = NOT_SET_VALUES;
1258   ierr            = MPI_Comm_rank(((PetscObject)mat)->comm,&a->rank);CHKERRQ(ierr);
1259   ierr            = MPI_Comm_size(((PetscObject)mat)->comm,&a->size);CHKERRQ(ierr);
1260 
1261   /* build cache for off array entries formed */
1262   a->donotstash = PETSC_FALSE;
1263 
1264   ierr = MatStashCreate_Private(((PetscObject)mat)->comm,1,&mat->stash);CHKERRQ(ierr);
1265 
1266   /* stuff used for matrix vector multiply */
1267   a->lvec        = 0;
1268   a->Mvctx       = 0;
1269   a->roworiented = PETSC_TRUE;
1270 
1271   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatDenseGetArray_C","MatDenseGetArray_MPIDense",MatDenseGetArray_MPIDense);CHKERRQ(ierr);
1272   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatDenseRestoreArray_C","MatDenseRestoreArray_MPIDense",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr);
1273 
1274   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
1275                                            "MatGetDiagonalBlock_MPIDense",
1276                                            MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr);
1277   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C",
1278                                            "MatMPIDenseSetPreallocation_MPIDense",
1279                                            MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1280   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",
1281                                            "MatMatMult_MPIAIJ_MPIDense",
1282                                            MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1283   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",
1284                                            "MatMatMultSymbolic_MPIAIJ_MPIDense",
1285                                            MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1286   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",
1287                                            "MatMatMultNumeric_MPIAIJ_MPIDense",
1288                                            MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1289   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1290   PetscFunctionReturn(0);
1291 }
1292 EXTERN_C_END
1293 
1294 /*MC
1295    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1296 
1297    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1298    and MATMPIDENSE otherwise.
1299 
1300    Options Database Keys:
1301 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1302 
1303   Level: beginner
1304 
1305 
1306 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1307 M*/
1308 
1309 #undef __FUNCT__
1310 #define __FUNCT__ "MatMPIDenseSetPreallocation"
1311 /*@C
1312    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1313 
1314    Not collective
1315 
1316    Input Parameters:
1317 .  A - the matrix
1318 -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
1319    to control all matrix memory allocation.
1320 
1321    Notes:
1322    The dense format is fully compatible with standard Fortran 77
1323    storage by columns.
1324 
1325    The data input variable is intended primarily for Fortran programmers
1326    who wish to allocate their own matrix memory space.  Most users should
1327    set data=PETSC_NULL.
1328 
1329    Level: intermediate
1330 
1331 .keywords: matrix,dense, parallel
1332 
1333 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1334 @*/
1335 PetscErrorCode  MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data)
1336 {
1337   PetscErrorCode ierr;
1338 
1339   PetscFunctionBegin;
1340   ierr = PetscTryMethod(mat,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(mat,data));CHKERRQ(ierr);
1341   PetscFunctionReturn(0);
1342 }
1343 
1344 #undef __FUNCT__
1345 #define __FUNCT__ "MatCreateDense"
1346 /*@C
1347    MatCreateDense - Creates a parallel matrix in dense format.
1348 
1349    Collective on MPI_Comm
1350 
1351    Input Parameters:
1352 +  comm - MPI communicator
1353 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1354 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1355 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1356 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1357 -  data - optional location of matrix data.  Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1358    to control all matrix memory allocation.
1359 
1360    Output Parameter:
1361 .  A - the matrix
1362 
1363    Notes:
1364    The dense format is fully compatible with standard Fortran 77
1365    storage by columns.
1366 
1367    The data input variable is intended primarily for Fortran programmers
1368    who wish to allocate their own matrix memory space.  Most users should
1369    set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users).
1370 
1371    The user MUST specify either the local or global matrix dimensions
1372    (possibly both).
1373 
1374    Level: intermediate
1375 
1376 .keywords: matrix,dense, parallel
1377 
1378 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1379 @*/
1380 PetscErrorCode  MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1381 {
1382   PetscErrorCode ierr;
1383   PetscMPIInt    size;
1384 
1385   PetscFunctionBegin;
1386   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1387   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1388   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1389   if (size > 1) {
1390     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1391     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1392   } else {
1393     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1394     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1395   }
1396   PetscFunctionReturn(0);
1397 }
1398 
1399 #undef __FUNCT__
1400 #define __FUNCT__ "MatDuplicate_MPIDense"
1401 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1402 {
1403   Mat            mat;
1404   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
1405   PetscErrorCode ierr;
1406 
1407   PetscFunctionBegin;
1408   *newmat = 0;
1409   ierr    = MatCreate(((PetscObject)A)->comm,&mat);CHKERRQ(ierr);
1410   ierr    = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1411   ierr    = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1412   a       = (Mat_MPIDense*)mat->data;
1413   ierr    = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1414 
1415   mat->factortype   = A->factortype;
1416   mat->assembled    = PETSC_TRUE;
1417   mat->preallocated = PETSC_TRUE;
1418 
1419   a->size         = oldmat->size;
1420   a->rank         = oldmat->rank;
1421   mat->insertmode = NOT_SET_VALUES;
1422   a->nvec         = oldmat->nvec;
1423   a->donotstash   = oldmat->donotstash;
1424 
1425   ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr);
1426   ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr);
1427 
1428   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1429   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1430   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1431 
1432   *newmat = mat;
1433   PetscFunctionReturn(0);
1434 }
1435 
1436 #undef __FUNCT__
1437 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile"
1438 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat,PetscInt sizesset)
1439 {
1440   PetscErrorCode ierr;
1441   PetscMPIInt    rank,size;
1442   PetscInt       *rowners,i,m,nz,j;
1443   PetscScalar    *array,*vals,*vals_ptr;
1444 
1445   PetscFunctionBegin;
1446   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1447   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1448 
1449   /* determine ownership of all rows */
1450   if (newmat->rmap->n < 0) m = M/size + ((M % size) > rank);
1451   else m = newmat->rmap->n;
1452   ierr       = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1453   ierr       = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
1454   rowners[0] = 0;
1455   for (i=2; i<=size; i++) {
1456     rowners[i] += rowners[i-1];
1457   }
1458 
1459   if (!sizesset) {
1460     ierr = MatSetSizes(newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1461   }
1462   ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr);
1463   ierr = MatDenseGetArray(newmat,&array);CHKERRQ(ierr);
1464 
1465   if (!rank) {
1466     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1467 
1468     /* read in my part of the matrix numerical values  */
1469     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1470 
1471     /* insert into matrix-by row (this is why cannot directly read into array */
1472     vals_ptr = vals;
1473     for (i=0; i<m; i++) {
1474       for (j=0; j<N; j++) {
1475         array[i + j*m] = *vals_ptr++;
1476       }
1477     }
1478 
1479     /* read in other processors and ship out */
1480     for (i=1; i<size; i++) {
1481       nz   = (rowners[i+1] - rowners[i])*N;
1482       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1483       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr);
1484     }
1485   } else {
1486     /* receive numeric values */
1487     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1488 
1489     /* receive message of values*/
1490     ierr = MPIULong_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr);
1491 
1492     /* insert into matrix-by row (this is why cannot directly read into array */
1493     vals_ptr = vals;
1494     for (i=0; i<m; i++) {
1495       for (j=0; j<N; j++) {
1496         array[i + j*m] = *vals_ptr++;
1497       }
1498     }
1499   }
1500   ierr = MatDenseRestoreArray(newmat,&array);CHKERRQ(ierr);
1501   ierr = PetscFree(rowners);CHKERRQ(ierr);
1502   ierr = PetscFree(vals);CHKERRQ(ierr);
1503   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1504   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1505   PetscFunctionReturn(0);
1506 }
1507 
1508 #undef __FUNCT__
1509 #define __FUNCT__ "MatLoad_MPIDense"
1510 PetscErrorCode MatLoad_MPIDense(Mat newmat,PetscViewer viewer)
1511 {
1512   PetscScalar    *vals,*svals;
1513   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1514   MPI_Status     status;
1515   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz;
1516   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1517   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1518   PetscInt       i,nz,j,rstart,rend,sizesset=1,grows,gcols;
1519   int            fd;
1520   PetscErrorCode ierr;
1521 
1522   PetscFunctionBegin;
1523   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1524   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1525   if (!rank) {
1526     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1527     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
1528     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1529   }
1530   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
1531 
1532   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1533   M    = header[1]; N = header[2]; nz = header[3];
1534 
1535   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
1536   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
1537   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
1538 
1539   /* If global sizes are set, check if they are consistent with that given in the file */
1540   if (sizesset) {
1541     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
1542   }
1543   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
1544   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
1545 
1546   /*
1547        Handle case where matrix is stored on disk as a dense matrix
1548   */
1549   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1550     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat,sizesset);CHKERRQ(ierr);
1551     PetscFunctionReturn(0);
1552   }
1553 
1554   /* determine ownership of all rows */
1555   if (newmat->rmap->n < 0) {
1556     ierr = PetscMPIIntCast(M/size + ((M % size) > rank),&m);CHKERRQ(ierr);
1557   } else {
1558     ierr = PetscMPIIntCast(newmat->rmap->n,&m);CHKERRQ(ierr);
1559   }
1560   ierr       = PetscMalloc((size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr);
1561   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1562   rowners[0] = 0;
1563   for (i=2; i<=size; i++) {
1564     rowners[i] += rowners[i-1];
1565   }
1566   rstart = rowners[rank];
1567   rend   = rowners[rank+1];
1568 
1569   /* distribute row lengths to all processors */
1570   ierr = PetscMalloc2(rend-rstart,PetscInt,&ourlens,rend-rstart,PetscInt,&offlens);CHKERRQ(ierr);
1571   if (!rank) {
1572     ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
1573     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1574     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
1575     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1576     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1577     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1578   } else {
1579     ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1580   }
1581 
1582   if (!rank) {
1583     /* calculate the number of nonzeros on each processor */
1584     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
1585     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1586     for (i=0; i<size; i++) {
1587       for (j=rowners[i]; j< rowners[i+1]; j++) {
1588         procsnz[i] += rowlengths[j];
1589       }
1590     }
1591     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1592 
1593     /* determine max buffer needed and allocate it */
1594     maxnz = 0;
1595     for (i=0; i<size; i++) {
1596       maxnz = PetscMax(maxnz,procsnz[i]);
1597     }
1598     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1599 
1600     /* read in my part of the matrix column indices  */
1601     nz = procsnz[0];
1602     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1603     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1604 
1605     /* read in every one elses and ship off */
1606     for (i=1; i<size; i++) {
1607       nz   = procsnz[i];
1608       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1609       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1610     }
1611     ierr = PetscFree(cols);CHKERRQ(ierr);
1612   } else {
1613     /* determine buffer space needed for message */
1614     nz = 0;
1615     for (i=0; i<m; i++) {
1616       nz += ourlens[i];
1617     }
1618     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1619 
1620     /* receive message of column indices*/
1621     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1622     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1623     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1624   }
1625 
1626   /* loop over local rows, determining number of off diagonal entries */
1627   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
1628   jj   = 0;
1629   for (i=0; i<m; i++) {
1630     for (j=0; j<ourlens[i]; j++) {
1631       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1632       jj++;
1633     }
1634   }
1635 
1636   /* create our matrix */
1637   for (i=0; i<m; i++) ourlens[i] -= offlens[i];
1638 
1639   if (!sizesset) {
1640     ierr = MatSetSizes(newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1641   }
1642   ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr);
1643   for (i=0; i<m; i++) ourlens[i] += offlens[i];
1644 
1645   if (!rank) {
1646     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1647 
1648     /* read in my part of the matrix numerical values  */
1649     nz   = procsnz[0];
1650     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1651 
1652     /* insert into matrix */
1653     jj      = rstart;
1654     smycols = mycols;
1655     svals   = vals;
1656     for (i=0; i<m; i++) {
1657       ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1658       smycols += ourlens[i];
1659       svals   += ourlens[i];
1660       jj++;
1661     }
1662 
1663     /* read in other processors and ship out */
1664     for (i=1; i<size; i++) {
1665       nz   = procsnz[i];
1666       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1667       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
1668     }
1669     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1670   } else {
1671     /* receive numeric values */
1672     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1673 
1674     /* receive message of values*/
1675     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
1676     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1677     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1678 
1679     /* insert into matrix */
1680     jj      = rstart;
1681     smycols = mycols;
1682     svals   = vals;
1683     for (i=0; i<m; i++) {
1684       ierr     = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1685       smycols += ourlens[i];
1686       svals   += ourlens[i];
1687       jj++;
1688     }
1689   }
1690   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
1691   ierr = PetscFree(vals);CHKERRQ(ierr);
1692   ierr = PetscFree(mycols);CHKERRQ(ierr);
1693   ierr = PetscFree(rowners);CHKERRQ(ierr);
1694 
1695   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1696   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1697   PetscFunctionReturn(0);
1698 }
1699 
1700 #undef __FUNCT__
1701 #define __FUNCT__ "MatEqual_MPIDense"
1702 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool  *flag)
1703 {
1704   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
1705   Mat            a,b;
1706   PetscBool      flg;
1707   PetscErrorCode ierr;
1708 
1709   PetscFunctionBegin;
1710   a    = matA->A;
1711   b    = matB->A;
1712   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
1713   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1714   PetscFunctionReturn(0);
1715 }
1716 
1717