xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision be7a6d03210faab0df8c3f3b720c976929326be8)
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4 */
5 
6 
7 #include <../src/mat/impls/dense/mpi/mpidense.h>    /*I   "petscmat.h"  I*/
8 #include <../src/mat/impls/aij/mpi/mpiaij.h>
9 
10 #undef __FUNCT__
11 #define __FUNCT__ "MatDenseGetLocalMatrix"
12 /*@
13 
14       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
15               matrix that represents the operator. For sequential matrices it returns itself.
16 
17     Input Parameter:
18 .      A - the Seq or MPI dense matrix
19 
20     Output Parameter:
21 .      B - the inner matrix
22 
23     Level: intermediate
24 
25 @*/
26 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
27 {
28   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
29   PetscErrorCode ierr;
30   PetscBool      flg;
31 
32   PetscFunctionBegin;
33   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
34   if (flg) *B = mat->A;
35   else *B = A;
36   PetscFunctionReturn(0);
37 }
38 
39 #undef __FUNCT__
40 #define __FUNCT__ "MatGetRow_MPIDense"
41 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
42 {
43   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
44   PetscErrorCode ierr;
45   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
46 
47   PetscFunctionBegin;
48   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
49   lrow = row - rstart;
50   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
51   PetscFunctionReturn(0);
52 }
53 
54 #undef __FUNCT__
55 #define __FUNCT__ "MatRestoreRow_MPIDense"
56 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
57 {
58   PetscErrorCode ierr;
59 
60   PetscFunctionBegin;
61   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
62   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
63   PetscFunctionReturn(0);
64 }
65 
66 #undef __FUNCT__
67 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense"
68 PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
69 {
70   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
71   PetscErrorCode ierr;
72   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
73   PetscScalar    *array;
74   MPI_Comm       comm;
75   Mat            B;
76 
77   PetscFunctionBegin;
78   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
79 
80   ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr);
81   if (!B) {
82     ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
83     ierr = MatCreate(comm,&B);CHKERRQ(ierr);
84     ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr);
85     ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
86     ierr = MatDenseGetArray(mdn->A,&array);CHKERRQ(ierr);
87     ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr);
88     ierr = MatDenseRestoreArray(mdn->A,&array);CHKERRQ(ierr);
89     ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
90     ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
91     ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr);
92     *a   = B;
93     ierr = MatDestroy(&B);CHKERRQ(ierr);
94   } else *a = B;
95   PetscFunctionReturn(0);
96 }
97 
98 #undef __FUNCT__
99 #define __FUNCT__ "MatSetValues_MPIDense"
100 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
101 {
102   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
103   PetscErrorCode ierr;
104   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
105   PetscBool      roworiented = A->roworiented;
106 
107   PetscFunctionBegin;
108   if (v) PetscValidScalarPointer(v,6);
109   for (i=0; i<m; i++) {
110     if (idxm[i] < 0) continue;
111     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
112     if (idxm[i] >= rstart && idxm[i] < rend) {
113       row = idxm[i] - rstart;
114       if (roworiented) {
115         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
116       } else {
117         for (j=0; j<n; j++) {
118           if (idxn[j] < 0) continue;
119           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
120           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
121         }
122       }
123     } else if (!A->donotstash) {
124       mat->assembled = PETSC_FALSE;
125       if (roworiented) {
126         ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
127       } else {
128         ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
129       }
130     }
131   }
132   PetscFunctionReturn(0);
133 }
134 
135 #undef __FUNCT__
136 #define __FUNCT__ "MatGetValues_MPIDense"
137 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
138 {
139   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
140   PetscErrorCode ierr;
141   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
142 
143   PetscFunctionBegin;
144   for (i=0; i<m; i++) {
145     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
146     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
147     if (idxm[i] >= rstart && idxm[i] < rend) {
148       row = idxm[i] - rstart;
149       for (j=0; j<n; j++) {
150         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
151         if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
152         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
153       }
154     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
155   }
156   PetscFunctionReturn(0);
157 }
158 
159 #undef __FUNCT__
160 #define __FUNCT__ "MatDenseGetArray_MPIDense"
161 PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[])
162 {
163   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
164   PetscErrorCode ierr;
165 
166   PetscFunctionBegin;
167   ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr);
168   PetscFunctionReturn(0);
169 }
170 
171 #undef __FUNCT__
172 #define __FUNCT__ "MatGetSubMatrix_MPIDense"
173 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
174 {
175   Mat_MPIDense   *mat  = (Mat_MPIDense*)A->data,*newmatd;
176   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
177   PetscErrorCode ierr;
178   PetscInt       i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
179   const PetscInt *irow,*icol;
180   PetscScalar    *av,*bv,*v = lmat->v;
181   Mat            newmat;
182   IS             iscol_local;
183 
184   PetscFunctionBegin;
185   ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
186   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
187   ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr);
188   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
189   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
190   ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */
191 
192   /* No parallel redistribution currently supported! Should really check each index set
193      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
194      original matrix! */
195 
196   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
197   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
198 
199   /* Check submatrix call */
200   if (scall == MAT_REUSE_MATRIX) {
201     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
202     /* Really need to test rows and column sizes! */
203     newmat = *B;
204   } else {
205     /* Create and fill new matrix */
206     ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr);
207     ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
208     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
209     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
210   }
211 
212   /* Now extract the data pointers and do the copy, column at a time */
213   newmatd = (Mat_MPIDense*)newmat->data;
214   bv      = ((Mat_SeqDense*)newmatd->A->data)->v;
215 
216   for (i=0; i<Ncols; i++) {
217     av = v + ((Mat_SeqDense*)mat->A->data)->lda*icol[i];
218     for (j=0; j<nrows; j++) {
219       *bv++ = av[irow[j] - rstart];
220     }
221   }
222 
223   /* Assemble the matrices so that the correct flags are set */
224   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
225   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
226 
227   /* Free work space */
228   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
229   ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr);
230   ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
231   *B   = newmat;
232   PetscFunctionReturn(0);
233 }
234 
235 #undef __FUNCT__
236 #define __FUNCT__ "MatDenseRestoreArray_MPIDense"
237 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[])
238 {
239   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
240   PetscErrorCode ierr;
241 
242   PetscFunctionBegin;
243   ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr);
244   PetscFunctionReturn(0);
245 }
246 
247 #undef __FUNCT__
248 #define __FUNCT__ "MatAssemblyBegin_MPIDense"
249 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
250 {
251   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
252   MPI_Comm       comm;
253   PetscErrorCode ierr;
254   PetscInt       nstash,reallocs;
255   InsertMode     addv;
256 
257   PetscFunctionBegin;
258   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
259   /* make sure all processors are either in INSERTMODE or ADDMODE */
260   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,comm);CHKERRQ(ierr);
261   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
262   mat->insertmode = addv; /* in case this processor had no cache */
263 
264   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
265   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
266   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
267   PetscFunctionReturn(0);
268 }
269 
270 #undef __FUNCT__
271 #define __FUNCT__ "MatAssemblyEnd_MPIDense"
272 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
273 {
274   Mat_MPIDense   *mdn=(Mat_MPIDense*)mat->data;
275   PetscErrorCode ierr;
276   PetscInt       i,*row,*col,flg,j,rstart,ncols;
277   PetscMPIInt    n;
278   PetscScalar    *val;
279   InsertMode     addv=mat->insertmode;
280 
281   PetscFunctionBegin;
282   /*  wait on receives */
283   while (1) {
284     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
285     if (!flg) break;
286 
287     for (i=0; i<n;) {
288       /* Now identify the consecutive vals belonging to the same row */
289       for (j=i,rstart=row[j]; j<n; j++) {
290         if (row[j] != rstart) break;
291       }
292       if (j < n) ncols = j-i;
293       else       ncols = n-i;
294       /* Now assemble all these values with a single function call */
295       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
296       i    = j;
297     }
298   }
299   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
300 
301   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
302   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
303 
304   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
305     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
306   }
307   PetscFunctionReturn(0);
308 }
309 
310 #undef __FUNCT__
311 #define __FUNCT__ "MatZeroEntries_MPIDense"
312 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
313 {
314   PetscErrorCode ierr;
315   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
316 
317   PetscFunctionBegin;
318   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
319   PetscFunctionReturn(0);
320 }
321 
322 /* the code does not do the diagonal entries correctly unless the
323    matrix is square and the column and row owerships are identical.
324    This is a BUG. The only way to fix it seems to be to access
325    mdn->A and mdn->B directly and not through the MatZeroRows()
326    routine.
327 */
328 #undef __FUNCT__
329 #define __FUNCT__ "MatZeroRows_MPIDense"
330 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
331 {
332   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
333   PetscErrorCode    ierr;
334   PetscInt          i,*owners = A->rmap->range;
335   PetscInt          *nprocs,j,idx,nsends;
336   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
337   PetscInt          *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source;
338   PetscInt          *lens,*lrows,*values;
339   PetscMPIInt       n,imdex,rank = l->rank,size = l->size;
340   MPI_Comm          comm;
341   MPI_Request       *send_waits,*recv_waits;
342   MPI_Status        recv_status,*send_status;
343   PetscBool         found;
344   const PetscScalar *xx;
345   PetscScalar       *bb;
346 
347   PetscFunctionBegin;
348   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
349   if (A->rmap->N != A->cmap->N) SETERRQ(comm,PETSC_ERR_SUP,"Only handles square matrices");
350   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership");
351   /*  first count number of contributors to each processor */
352   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
353   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
354   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr);  /* see note*/
355   for (i=0; i<N; i++) {
356     idx   = rows[i];
357     found = PETSC_FALSE;
358     for (j=0; j<size; j++) {
359       if (idx >= owners[j] && idx < owners[j+1]) {
360         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
361       }
362     }
363     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
364   }
365   nsends = 0;
366   for (i=0; i<size; i++) nsends += nprocs[2*i+1];
367 
368   /* inform other processors of number of messages and max length*/
369   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
370 
371   /* post receives:   */
372   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
373   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
374   for (i=0; i<nrecvs; i++) {
375     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
376   }
377 
378   /* do sends:
379       1) starts[i] gives the starting index in svalues for stuff going to
380          the ith processor
381   */
382   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
383   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
384   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
385 
386   starts[0] = 0;
387   for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
388   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
389 
390   starts[0] = 0;
391   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
392   count = 0;
393   for (i=0; i<size; i++) {
394     if (nprocs[2*i+1]) {
395       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
396     }
397   }
398   ierr = PetscFree(starts);CHKERRQ(ierr);
399 
400   base = owners[rank];
401 
402   /*  wait on receives */
403   ierr  = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
404   count = nrecvs;
405   slen  = 0;
406   while (count) {
407     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
408     /* unpack receives into our local space */
409     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
410 
411     source[imdex] = recv_status.MPI_SOURCE;
412     lens[imdex]   = n;
413     slen += n;
414     count--;
415   }
416   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
417 
418   /* move the data into the send scatter */
419   ierr  = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
420   count = 0;
421   for (i=0; i<nrecvs; i++) {
422     values = rvalues + i*nmax;
423     for (j=0; j<lens[i]; j++) {
424       lrows[count++] = values[j] - base;
425     }
426   }
427   ierr = PetscFree(rvalues);CHKERRQ(ierr);
428   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
429   ierr = PetscFree(owner);CHKERRQ(ierr);
430   ierr = PetscFree(nprocs);CHKERRQ(ierr);
431 
432   /* fix right hand side if needed */
433   if (x && b) {
434     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
435     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
436     for (i=0; i<slen; i++) {
437       bb[lrows[i]] = diag*xx[lrows[i]];
438     }
439     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
440     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
441   }
442 
443   /* actually zap the local rows */
444   ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
445   if (diag != 0.0) {
446     Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data;
447     PetscInt     m   = ll->lda, i;
448 
449     for (i=0; i<slen; i++) {
450       ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag;
451     }
452   }
453   ierr = PetscFree(lrows);CHKERRQ(ierr);
454 
455   /* wait on sends */
456   if (nsends) {
457     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
458     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
459     ierr = PetscFree(send_status);CHKERRQ(ierr);
460   }
461   ierr = PetscFree(send_waits);CHKERRQ(ierr);
462   ierr = PetscFree(svalues);CHKERRQ(ierr);
463   PetscFunctionReturn(0);
464 }
465 
466 #undef __FUNCT__
467 #define __FUNCT__ "MatMult_MPIDense"
468 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
469 {
470   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
471   PetscErrorCode ierr;
472 
473   PetscFunctionBegin;
474   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
475   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
476   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
477   PetscFunctionReturn(0);
478 }
479 
480 #undef __FUNCT__
481 #define __FUNCT__ "MatMultAdd_MPIDense"
482 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
483 {
484   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
485   PetscErrorCode ierr;
486 
487   PetscFunctionBegin;
488   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
489   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
490   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
491   PetscFunctionReturn(0);
492 }
493 
494 #undef __FUNCT__
495 #define __FUNCT__ "MatMultTranspose_MPIDense"
496 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
497 {
498   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
499   PetscErrorCode ierr;
500   PetscScalar    zero = 0.0;
501 
502   PetscFunctionBegin;
503   ierr = VecSet(yy,zero);CHKERRQ(ierr);
504   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
505   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
506   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
507   PetscFunctionReturn(0);
508 }
509 
510 #undef __FUNCT__
511 #define __FUNCT__ "MatMultTransposeAdd_MPIDense"
512 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
513 {
514   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
515   PetscErrorCode ierr;
516 
517   PetscFunctionBegin;
518   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
519   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
520   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
521   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
522   PetscFunctionReturn(0);
523 }
524 
525 #undef __FUNCT__
526 #define __FUNCT__ "MatGetDiagonal_MPIDense"
527 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
528 {
529   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
530   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
531   PetscErrorCode ierr;
532   PetscInt       len,i,n,m = A->rmap->n,radd;
533   PetscScalar    *x,zero = 0.0;
534 
535   PetscFunctionBegin;
536   ierr = VecSet(v,zero);CHKERRQ(ierr);
537   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
538   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
539   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
540   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
541   radd = A->rmap->rstart*m;
542   for (i=0; i<len; i++) {
543     x[i] = aloc->v[radd + i*m + i];
544   }
545   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
546   PetscFunctionReturn(0);
547 }
548 
549 #undef __FUNCT__
550 #define __FUNCT__ "MatDestroy_MPIDense"
551 PetscErrorCode MatDestroy_MPIDense(Mat mat)
552 {
553   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
554   PetscErrorCode ierr;
555 
556   PetscFunctionBegin;
557 #if defined(PETSC_USE_LOG)
558   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
559 #endif
560   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
561   ierr = MatDestroy(&mdn->A);CHKERRQ(ierr);
562   ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr);
563   ierr = VecScatterDestroy(&mdn->Mvctx);CHKERRQ(ierr);
564 
565   ierr = PetscFree(mat->data);CHKERRQ(ierr);
566   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
567   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
568   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr);
569   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
570   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
571   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
572   PetscFunctionReturn(0);
573 }
574 
575 #undef __FUNCT__
576 #define __FUNCT__ "MatView_MPIDense_Binary"
577 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
578 {
579   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
580   PetscErrorCode    ierr;
581   PetscViewerFormat format;
582   int               fd;
583   PetscInt          header[4],mmax,N = mat->cmap->N,i,j,m,k;
584   PetscMPIInt       rank,tag  = ((PetscObject)viewer)->tag,size;
585   PetscScalar       *work,*v,*vv;
586   Mat_SeqDense      *a = (Mat_SeqDense*)mdn->A->data;
587 
588   PetscFunctionBegin;
589   if (mdn->size == 1) {
590     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
591   } else {
592     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
593     ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
594     ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
595 
596     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
597     if (format == PETSC_VIEWER_NATIVE) {
598 
599       if (!rank) {
600         /* store the matrix as a dense matrix */
601         header[0] = MAT_FILE_CLASSID;
602         header[1] = mat->rmap->N;
603         header[2] = N;
604         header[3] = MATRIX_BINARY_FORMAT_DENSE;
605         ierr      = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
606 
607         /* get largest work array needed for transposing array */
608         mmax = mat->rmap->n;
609         for (i=1; i<size; i++) {
610           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
611         }
612         ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&work);CHKERRQ(ierr);
613 
614         /* write out local array, by rows */
615         m = mat->rmap->n;
616         v = a->v;
617         for (j=0; j<N; j++) {
618           for (i=0; i<m; i++) {
619             work[j + i*N] = *v++;
620           }
621         }
622         ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
623         /* get largest work array to receive messages from other processes, excludes process zero */
624         mmax = 0;
625         for (i=1; i<size; i++) {
626           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
627         }
628         ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&vv);CHKERRQ(ierr);
629         for (k = 1; k < size; k++) {
630           v    = vv;
631           m    = mat->rmap->range[k+1] - mat->rmap->range[k];
632           ierr = MPIULong_Recv(v,m*N,MPIU_SCALAR,k,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
633 
634           for (j = 0; j < N; j++) {
635             for (i = 0; i < m; i++) {
636               work[j + i*N] = *v++;
637             }
638           }
639           ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
640         }
641         ierr = PetscFree(work);CHKERRQ(ierr);
642         ierr = PetscFree(vv);CHKERRQ(ierr);
643       } else {
644         ierr = MPIULong_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
645       }
646     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerSetFormat(viewer,PETSC_VIEWER_NATIVE)");
647   }
648   PetscFunctionReturn(0);
649 }
650 
651 #include <petscdraw.h>
652 #undef __FUNCT__
653 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket"
654 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
655 {
656   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
657   PetscErrorCode    ierr;
658   PetscMPIInt       size = mdn->size,rank = mdn->rank;
659   PetscViewerType   vtype;
660   PetscBool         iascii,isdraw;
661   PetscViewer       sviewer;
662   PetscViewerFormat format;
663 
664   PetscFunctionBegin;
665   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
666   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
667   if (iascii) {
668     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
669     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
670     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
671       MatInfo info;
672       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
673       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
674       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
675       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
676       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
677       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
678       PetscFunctionReturn(0);
679     } else if (format == PETSC_VIEWER_ASCII_INFO) {
680       PetscFunctionReturn(0);
681     }
682   } else if (isdraw) {
683     PetscDraw draw;
684     PetscBool isnull;
685 
686     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
687     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
688     if (isnull) PetscFunctionReturn(0);
689   }
690 
691   if (size == 1) {
692     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
693   } else {
694     /* assemble the entire matrix onto first processor. */
695     Mat         A;
696     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
697     PetscInt    *cols;
698     PetscScalar *vals;
699 
700     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
701     if (!rank) {
702       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
703     } else {
704       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
705     }
706     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
707     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
708     ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr);
709     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
710 
711     /* Copy the matrix ... This isn't the most efficient means,
712        but it's quick for now */
713     A->insertmode = INSERT_VALUES;
714 
715     row = mat->rmap->rstart;
716     m   = mdn->A->rmap->n;
717     for (i=0; i<m; i++) {
718       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
719       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
720       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
721       row++;
722     }
723 
724     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
725     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
726     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
727     if (!rank) {
728       ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
729       /* Set the type name to MATMPIDense so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqDense_ASCII()*/
730       PetscStrcpy(((PetscObject)((Mat_MPIDense*)(A->data))->A)->type_name,MATMPIDENSE);
731       ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
732     }
733     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
734     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
735     ierr = MatDestroy(&A);CHKERRQ(ierr);
736   }
737   PetscFunctionReturn(0);
738 }
739 
740 #undef __FUNCT__
741 #define __FUNCT__ "MatView_MPIDense"
742 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
743 {
744   PetscErrorCode ierr;
745   PetscBool      iascii,isbinary,isdraw,issocket;
746 
747   PetscFunctionBegin;
748   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
749   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
750   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
751   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
752 
753   if (iascii || issocket || isdraw) {
754     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
755   } else if (isbinary) {
756     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
757   }
758   PetscFunctionReturn(0);
759 }
760 
761 #undef __FUNCT__
762 #define __FUNCT__ "MatGetInfo_MPIDense"
763 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
764 {
765   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
766   Mat            mdn  = mat->A;
767   PetscErrorCode ierr;
768   PetscReal      isend[5],irecv[5];
769 
770   PetscFunctionBegin;
771   info->block_size = 1.0;
772 
773   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
774 
775   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
776   isend[3] = info->memory;  isend[4] = info->mallocs;
777   if (flag == MAT_LOCAL) {
778     info->nz_used      = isend[0];
779     info->nz_allocated = isend[1];
780     info->nz_unneeded  = isend[2];
781     info->memory       = isend[3];
782     info->mallocs      = isend[4];
783   } else if (flag == MAT_GLOBAL_MAX) {
784     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
785 
786     info->nz_used      = irecv[0];
787     info->nz_allocated = irecv[1];
788     info->nz_unneeded  = irecv[2];
789     info->memory       = irecv[3];
790     info->mallocs      = irecv[4];
791   } else if (flag == MAT_GLOBAL_SUM) {
792     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
793 
794     info->nz_used      = irecv[0];
795     info->nz_allocated = irecv[1];
796     info->nz_unneeded  = irecv[2];
797     info->memory       = irecv[3];
798     info->mallocs      = irecv[4];
799   }
800   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
801   info->fill_ratio_needed = 0;
802   info->factor_mallocs    = 0;
803   PetscFunctionReturn(0);
804 }
805 
806 #undef __FUNCT__
807 #define __FUNCT__ "MatSetOption_MPIDense"
808 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
809 {
810   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
811   PetscErrorCode ierr;
812 
813   PetscFunctionBegin;
814   switch (op) {
815   case MAT_NEW_NONZERO_LOCATIONS:
816   case MAT_NEW_NONZERO_LOCATION_ERR:
817   case MAT_NEW_NONZERO_ALLOCATION_ERR:
818     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
819     break;
820   case MAT_ROW_ORIENTED:
821     a->roworiented = flg;
822 
823     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
824     break;
825   case MAT_NEW_DIAGONALS:
826   case MAT_KEEP_NONZERO_PATTERN:
827   case MAT_USE_HASH_TABLE:
828     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
829     break;
830   case MAT_IGNORE_OFF_PROC_ENTRIES:
831     a->donotstash = flg;
832     break;
833   case MAT_SYMMETRIC:
834   case MAT_STRUCTURALLY_SYMMETRIC:
835   case MAT_HERMITIAN:
836   case MAT_SYMMETRY_ETERNAL:
837   case MAT_IGNORE_LOWER_TRIANGULAR:
838     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
839     break;
840   default:
841     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
842   }
843   PetscFunctionReturn(0);
844 }
845 
846 
847 #undef __FUNCT__
848 #define __FUNCT__ "MatDiagonalScale_MPIDense"
849 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
850 {
851   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
852   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
853   PetscScalar    *l,*r,x,*v;
854   PetscErrorCode ierr;
855   PetscInt       i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n;
856 
857   PetscFunctionBegin;
858   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
859   if (ll) {
860     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
861     if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
862     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
863     for (i=0; i<m; i++) {
864       x = l[i];
865       v = mat->v + i;
866       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
867     }
868     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
869     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
870   }
871   if (rr) {
872     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
873     if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
874     ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
875     ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
876     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
877     for (i=0; i<n; i++) {
878       x = r[i];
879       v = mat->v + i*m;
880       for (j=0; j<m; j++) (*v++) *= x;
881     }
882     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
883     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
884   }
885   PetscFunctionReturn(0);
886 }
887 
888 #undef __FUNCT__
889 #define __FUNCT__ "MatNorm_MPIDense"
890 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
891 {
892   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
893   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
894   PetscErrorCode ierr;
895   PetscInt       i,j;
896   PetscReal      sum = 0.0;
897   PetscScalar    *v  = mat->v;
898 
899   PetscFunctionBegin;
900   if (mdn->size == 1) {
901     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
902   } else {
903     if (type == NORM_FROBENIUS) {
904       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
905         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
906       }
907       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
908       *nrm = PetscSqrtReal(*nrm);
909       ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr);
910     } else if (type == NORM_1) {
911       PetscReal *tmp,*tmp2;
912       ierr = PetscMalloc2(A->cmap->N,PetscReal,&tmp,A->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr);
913       ierr = PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
914       ierr = PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
915       *nrm = 0.0;
916       v    = mat->v;
917       for (j=0; j<mdn->A->cmap->n; j++) {
918         for (i=0; i<mdn->A->rmap->n; i++) {
919           tmp[j] += PetscAbsScalar(*v);  v++;
920         }
921       }
922       ierr = MPI_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
923       for (j=0; j<A->cmap->N; j++) {
924         if (tmp2[j] > *nrm) *nrm = tmp2[j];
925       }
926       ierr = PetscFree2(tmp,tmp);CHKERRQ(ierr);
927       ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr);
928     } else if (type == NORM_INFINITY) { /* max row norm */
929       PetscReal ntemp;
930       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
931       ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
932     } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
933   }
934   PetscFunctionReturn(0);
935 }
936 
937 #undef __FUNCT__
938 #define __FUNCT__ "MatTranspose_MPIDense"
939 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
940 {
941   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
942   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
943   Mat            B;
944   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
945   PetscErrorCode ierr;
946   PetscInt       j,i;
947   PetscScalar    *v;
948 
949   PetscFunctionBegin;
950   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports square matrix only in-place");
951   if (reuse == MAT_INITIAL_MATRIX || A == *matout) {
952     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
953     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
954     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
955     ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr);
956   } else {
957     B = *matout;
958   }
959 
960   m    = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v;
961   ierr = PetscMalloc(m*sizeof(PetscInt),&rwork);CHKERRQ(ierr);
962   for (i=0; i<m; i++) rwork[i] = rstart + i;
963   for (j=0; j<n; j++) {
964     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
965     v   += m;
966   }
967   ierr = PetscFree(rwork);CHKERRQ(ierr);
968   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
969   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
970   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
971     *matout = B;
972   } else {
973     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
974   }
975   PetscFunctionReturn(0);
976 }
977 
978 
979 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
980 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
981 
982 #undef __FUNCT__
983 #define __FUNCT__ "MatSetUp_MPIDense"
984 PetscErrorCode MatSetUp_MPIDense(Mat A)
985 {
986   PetscErrorCode ierr;
987 
988   PetscFunctionBegin;
989   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
990   PetscFunctionReturn(0);
991 }
992 
993 #undef __FUNCT__
994 #define __FUNCT__ "MatAXPY_MPIDense"
995 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
996 {
997   PetscErrorCode ierr;
998   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
999 
1000   PetscFunctionBegin;
1001   ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr);
1002   PetscFunctionReturn(0);
1003 }
1004 
1005 #undef __FUNCT__
1006 #define __FUNCT__ "MatConjugate_MPIDense"
1007 PetscErrorCode  MatConjugate_MPIDense(Mat mat)
1008 {
1009   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
1010   PetscErrorCode ierr;
1011 
1012   PetscFunctionBegin;
1013   ierr = MatConjugate(a->A);CHKERRQ(ierr);
1014   PetscFunctionReturn(0);
1015 }
1016 
1017 #undef __FUNCT__
1018 #define __FUNCT__ "MatRealPart_MPIDense"
1019 PetscErrorCode MatRealPart_MPIDense(Mat A)
1020 {
1021   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1022   PetscErrorCode ierr;
1023 
1024   PetscFunctionBegin;
1025   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1026   PetscFunctionReturn(0);
1027 }
1028 
1029 #undef __FUNCT__
1030 #define __FUNCT__ "MatImaginaryPart_MPIDense"
1031 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1032 {
1033   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1034   PetscErrorCode ierr;
1035 
1036   PetscFunctionBegin;
1037   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1038   PetscFunctionReturn(0);
1039 }
1040 
1041 extern PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*);
1042 #undef __FUNCT__
1043 #define __FUNCT__ "MatGetColumnNorms_MPIDense"
1044 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms)
1045 {
1046   PetscErrorCode ierr;
1047   PetscInt       i,n;
1048   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
1049   PetscReal      *work;
1050 
1051   PetscFunctionBegin;
1052   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
1053   ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr);
1054   ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr);
1055   if (type == NORM_2) {
1056     for (i=0; i<n; i++) work[i] *= work[i];
1057   }
1058   if (type == NORM_INFINITY) {
1059     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
1060   } else {
1061     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
1062   }
1063   ierr = PetscFree(work);CHKERRQ(ierr);
1064   if (type == NORM_2) {
1065     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
1066   }
1067   PetscFunctionReturn(0);
1068 }
1069 
1070 #undef __FUNCT__
1071 #define __FUNCT__ "MatSetRandom_MPIDense"
1072 static PetscErrorCode  MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1073 {
1074   Mat_MPIDense   *d = (Mat_MPIDense*)x->data;
1075   PetscErrorCode ierr;
1076   PetscScalar    *a;
1077   PetscInt       m,n,i;
1078 
1079   PetscFunctionBegin;
1080   ierr = MatGetSize(d->A,&m,&n);CHKERRQ(ierr);
1081   ierr = MatDenseGetArray(d->A,&a);CHKERRQ(ierr);
1082   for (i=0; i<m*n; i++) {
1083     ierr = PetscRandomGetValue(rctx,a+i);CHKERRQ(ierr);
1084   }
1085   ierr = MatDenseRestoreArray(d->A,&a);CHKERRQ(ierr);
1086   PetscFunctionReturn(0);
1087 }
1088 
1089 /* -------------------------------------------------------------------*/
1090 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1091                                         MatGetRow_MPIDense,
1092                                         MatRestoreRow_MPIDense,
1093                                         MatMult_MPIDense,
1094                                 /*  4*/ MatMultAdd_MPIDense,
1095                                         MatMultTranspose_MPIDense,
1096                                         MatMultTransposeAdd_MPIDense,
1097                                         0,
1098                                         0,
1099                                         0,
1100                                 /* 10*/ 0,
1101                                         0,
1102                                         0,
1103                                         0,
1104                                         MatTranspose_MPIDense,
1105                                 /* 15*/ MatGetInfo_MPIDense,
1106                                         MatEqual_MPIDense,
1107                                         MatGetDiagonal_MPIDense,
1108                                         MatDiagonalScale_MPIDense,
1109                                         MatNorm_MPIDense,
1110                                 /* 20*/ MatAssemblyBegin_MPIDense,
1111                                         MatAssemblyEnd_MPIDense,
1112                                         MatSetOption_MPIDense,
1113                                         MatZeroEntries_MPIDense,
1114                                 /* 24*/ MatZeroRows_MPIDense,
1115                                         0,
1116                                         0,
1117                                         0,
1118                                         0,
1119                                 /* 29*/ MatSetUp_MPIDense,
1120                                         0,
1121                                         0,
1122                                         0,
1123                                         0,
1124                                 /* 34*/ MatDuplicate_MPIDense,
1125                                         0,
1126                                         0,
1127                                         0,
1128                                         0,
1129                                 /* 39*/ MatAXPY_MPIDense,
1130                                         MatGetSubMatrices_MPIDense,
1131                                         0,
1132                                         MatGetValues_MPIDense,
1133                                         0,
1134                                 /* 44*/ 0,
1135                                         MatScale_MPIDense,
1136                                         0,
1137                                         0,
1138                                         0,
1139                                 /* 49*/ MatSetRandom_MPIDense,
1140                                         0,
1141                                         0,
1142                                         0,
1143                                         0,
1144                                 /* 54*/ 0,
1145                                         0,
1146                                         0,
1147                                         0,
1148                                         0,
1149                                 /* 59*/ MatGetSubMatrix_MPIDense,
1150                                         MatDestroy_MPIDense,
1151                                         MatView_MPIDense,
1152                                         0,
1153                                         0,
1154                                 /* 64*/ 0,
1155                                         0,
1156                                         0,
1157                                         0,
1158                                         0,
1159                                 /* 69*/ 0,
1160                                         0,
1161                                         0,
1162                                         0,
1163                                         0,
1164                                 /* 74*/ 0,
1165                                         0,
1166                                         0,
1167                                         0,
1168                                         0,
1169                                 /* 79*/ 0,
1170                                         0,
1171                                         0,
1172                                         0,
1173                                 /* 83*/ MatLoad_MPIDense,
1174                                         0,
1175                                         0,
1176                                         0,
1177                                         0,
1178                                         0,
1179                                 /* 89*/
1180                                         0,
1181                                         0,
1182                                         0,
1183                                         0,
1184                                         0,
1185                                 /* 94*/ 0,
1186                                         0,
1187                                         0,
1188                                         0,
1189                                         0,
1190                                 /* 99*/ 0,
1191                                         0,
1192                                         0,
1193                                         MatConjugate_MPIDense,
1194                                         0,
1195                                 /*104*/ 0,
1196                                         MatRealPart_MPIDense,
1197                                         MatImaginaryPart_MPIDense,
1198                                         0,
1199                                         0,
1200                                 /*109*/ 0,
1201                                         0,
1202                                         0,
1203                                         0,
1204                                         0,
1205                                 /*114*/ 0,
1206                                         0,
1207                                         0,
1208                                         0,
1209                                         0,
1210                                 /*119*/ 0,
1211                                         0,
1212                                         0,
1213                                         0,
1214                                         0,
1215                                 /*124*/ 0,
1216                                         MatGetColumnNorms_MPIDense,
1217                                         0,
1218                                         0,
1219                                         0,
1220                                 /*129*/ 0,
1221                                         0,
1222                                         0,
1223                                         0,
1224                                         0,
1225                                 /*134*/ 0,
1226                                         0,
1227                                         0,
1228                                         0,
1229                                         0,
1230                                 /*139*/ 0,
1231                                         0
1232 };
1233 
1234 #undef __FUNCT__
1235 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense"
1236 PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1237 {
1238   Mat_MPIDense   *a;
1239   PetscErrorCode ierr;
1240 
1241   PetscFunctionBegin;
1242   mat->preallocated = PETSC_TRUE;
1243   /* Note:  For now, when data is specified above, this assumes the user correctly
1244    allocates the local dense storage space.  We should add error checking. */
1245 
1246   a       = (Mat_MPIDense*)mat->data;
1247   ierr    = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
1248   ierr    = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
1249   a->nvec = mat->cmap->n;
1250 
1251   ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1252   ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr);
1253   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
1254   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1255   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1256   PetscFunctionReturn(0);
1257 }
1258 
1259 #undef __FUNCT__
1260 #define __FUNCT__ "MatCreate_MPIDense"
1261 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1262 {
1263   Mat_MPIDense   *a;
1264   PetscErrorCode ierr;
1265 
1266   PetscFunctionBegin;
1267   ierr      = PetscNewLog(mat,Mat_MPIDense,&a);CHKERRQ(ierr);
1268   mat->data = (void*)a;
1269   ierr      = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1270 
1271   mat->insertmode = NOT_SET_VALUES;
1272   ierr            = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);CHKERRQ(ierr);
1273   ierr            = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);CHKERRQ(ierr);
1274 
1275   /* build cache for off array entries formed */
1276   a->donotstash = PETSC_FALSE;
1277 
1278   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr);
1279 
1280   /* stuff used for matrix vector multiply */
1281   a->lvec        = 0;
1282   a->Mvctx       = 0;
1283   a->roworiented = PETSC_TRUE;
1284 
1285   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr);
1286   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr);
1287 
1288   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr);
1289   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1290   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1291   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1292   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1293 
1294   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",MatTransposeMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1295   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",MatTransposeMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1296   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",MatTransposeMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1297   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1298   PetscFunctionReturn(0);
1299 }
1300 
1301 /*MC
1302    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1303 
1304    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1305    and MATMPIDENSE otherwise.
1306 
1307    Options Database Keys:
1308 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1309 
1310   Level: beginner
1311 
1312 
1313 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1314 M*/
1315 
1316 #undef __FUNCT__
1317 #define __FUNCT__ "MatMPIDenseSetPreallocation"
1318 /*@C
1319    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1320 
1321    Not collective
1322 
1323    Input Parameters:
1324 .  A - the matrix
1325 -  data - optional location of matrix data.  Set data=NULL for PETSc
1326    to control all matrix memory allocation.
1327 
1328    Notes:
1329    The dense format is fully compatible with standard Fortran 77
1330    storage by columns.
1331 
1332    The data input variable is intended primarily for Fortran programmers
1333    who wish to allocate their own matrix memory space.  Most users should
1334    set data=NULL.
1335 
1336    Level: intermediate
1337 
1338 .keywords: matrix,dense, parallel
1339 
1340 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1341 @*/
1342 PetscErrorCode  MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data)
1343 {
1344   PetscErrorCode ierr;
1345 
1346   PetscFunctionBegin;
1347   ierr = PetscTryMethod(mat,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(mat,data));CHKERRQ(ierr);
1348   PetscFunctionReturn(0);
1349 }
1350 
1351 #undef __FUNCT__
1352 #define __FUNCT__ "MatCreateDense"
1353 /*@C
1354    MatCreateDense - Creates a parallel matrix in dense format.
1355 
1356    Collective on MPI_Comm
1357 
1358    Input Parameters:
1359 +  comm - MPI communicator
1360 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1361 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1362 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1363 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1364 -  data - optional location of matrix data.  Set data=NULL (NULL_SCALAR for Fortran users) for PETSc
1365    to control all matrix memory allocation.
1366 
1367    Output Parameter:
1368 .  A - the matrix
1369 
1370    Notes:
1371    The dense format is fully compatible with standard Fortran 77
1372    storage by columns.
1373 
1374    The data input variable is intended primarily for Fortran programmers
1375    who wish to allocate their own matrix memory space.  Most users should
1376    set data=NULL (NULL_SCALAR for Fortran users).
1377 
1378    The user MUST specify either the local or global matrix dimensions
1379    (possibly both).
1380 
1381    Level: intermediate
1382 
1383 .keywords: matrix,dense, parallel
1384 
1385 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1386 @*/
1387 PetscErrorCode  MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1388 {
1389   PetscErrorCode ierr;
1390   PetscMPIInt    size;
1391 
1392   PetscFunctionBegin;
1393   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1394   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1395   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1396   if (size > 1) {
1397     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1398     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1399   } else {
1400     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1401     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1402   }
1403   PetscFunctionReturn(0);
1404 }
1405 
1406 #undef __FUNCT__
1407 #define __FUNCT__ "MatDuplicate_MPIDense"
1408 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1409 {
1410   Mat            mat;
1411   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
1412   PetscErrorCode ierr;
1413 
1414   PetscFunctionBegin;
1415   *newmat = 0;
1416   ierr    = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr);
1417   ierr    = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1418   ierr    = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1419   a       = (Mat_MPIDense*)mat->data;
1420   ierr    = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1421 
1422   mat->factortype   = A->factortype;
1423   mat->assembled    = PETSC_TRUE;
1424   mat->preallocated = PETSC_TRUE;
1425 
1426   a->size         = oldmat->size;
1427   a->rank         = oldmat->rank;
1428   mat->insertmode = NOT_SET_VALUES;
1429   a->nvec         = oldmat->nvec;
1430   a->donotstash   = oldmat->donotstash;
1431 
1432   ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr);
1433   ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr);
1434 
1435   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1436   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1437   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1438 
1439   *newmat = mat;
1440   PetscFunctionReturn(0);
1441 }
1442 
1443 #undef __FUNCT__
1444 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile"
1445 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat,PetscInt sizesset)
1446 {
1447   PetscErrorCode ierr;
1448   PetscMPIInt    rank,size;
1449   PetscInt       *rowners,i,m,nz,j;
1450   PetscScalar    *array,*vals,*vals_ptr;
1451 
1452   PetscFunctionBegin;
1453   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1454   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1455 
1456   /* determine ownership of all rows */
1457   if (newmat->rmap->n < 0) m = M/size + ((M % size) > rank);
1458   else m = newmat->rmap->n;
1459   ierr       = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1460   ierr       = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
1461   rowners[0] = 0;
1462   for (i=2; i<=size; i++) {
1463     rowners[i] += rowners[i-1];
1464   }
1465 
1466   if (!sizesset) {
1467     ierr = MatSetSizes(newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1468   }
1469   ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
1470   ierr = MatDenseGetArray(newmat,&array);CHKERRQ(ierr);
1471 
1472   if (!rank) {
1473     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1474 
1475     /* read in my part of the matrix numerical values  */
1476     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1477 
1478     /* insert into matrix-by row (this is why cannot directly read into array */
1479     vals_ptr = vals;
1480     for (i=0; i<m; i++) {
1481       for (j=0; j<N; j++) {
1482         array[i + j*m] = *vals_ptr++;
1483       }
1484     }
1485 
1486     /* read in other processors and ship out */
1487     for (i=1; i<size; i++) {
1488       nz   = (rowners[i+1] - rowners[i])*N;
1489       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1490       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr);
1491     }
1492   } else {
1493     /* receive numeric values */
1494     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1495 
1496     /* receive message of values*/
1497     ierr = MPIULong_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr);
1498 
1499     /* insert into matrix-by row (this is why cannot directly read into array */
1500     vals_ptr = vals;
1501     for (i=0; i<m; i++) {
1502       for (j=0; j<N; j++) {
1503         array[i + j*m] = *vals_ptr++;
1504       }
1505     }
1506   }
1507   ierr = MatDenseRestoreArray(newmat,&array);CHKERRQ(ierr);
1508   ierr = PetscFree(rowners);CHKERRQ(ierr);
1509   ierr = PetscFree(vals);CHKERRQ(ierr);
1510   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1511   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1512   PetscFunctionReturn(0);
1513 }
1514 
1515 #undef __FUNCT__
1516 #define __FUNCT__ "MatLoad_MPIDense"
1517 PetscErrorCode MatLoad_MPIDense(Mat newmat,PetscViewer viewer)
1518 {
1519   PetscScalar    *vals,*svals;
1520   MPI_Comm       comm;
1521   MPI_Status     status;
1522   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz;
1523   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1524   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1525   PetscInt       i,nz,j,rstart,rend,sizesset=1,grows,gcols;
1526   int            fd;
1527   PetscErrorCode ierr;
1528 
1529   PetscFunctionBegin;
1530   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
1531   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1532   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1533   if (!rank) {
1534     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1535     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
1536     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1537   }
1538   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
1539 
1540   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1541   M    = header[1]; N = header[2]; nz = header[3];
1542 
1543   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
1544   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
1545   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
1546 
1547   /* If global sizes are set, check if they are consistent with that given in the file */
1548   if (sizesset) {
1549     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
1550   }
1551   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
1552   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
1553 
1554   /*
1555        Handle case where matrix is stored on disk as a dense matrix
1556   */
1557   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1558     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat,sizesset);CHKERRQ(ierr);
1559     PetscFunctionReturn(0);
1560   }
1561 
1562   /* determine ownership of all rows */
1563   if (newmat->rmap->n < 0) {
1564     ierr = PetscMPIIntCast(M/size + ((M % size) > rank),&m);CHKERRQ(ierr);
1565   } else {
1566     ierr = PetscMPIIntCast(newmat->rmap->n,&m);CHKERRQ(ierr);
1567   }
1568   ierr       = PetscMalloc((size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr);
1569   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1570   rowners[0] = 0;
1571   for (i=2; i<=size; i++) {
1572     rowners[i] += rowners[i-1];
1573   }
1574   rstart = rowners[rank];
1575   rend   = rowners[rank+1];
1576 
1577   /* distribute row lengths to all processors */
1578   ierr = PetscMalloc2(rend-rstart,PetscInt,&ourlens,rend-rstart,PetscInt,&offlens);CHKERRQ(ierr);
1579   if (!rank) {
1580     ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
1581     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1582     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
1583     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1584     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1585     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1586   } else {
1587     ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1588   }
1589 
1590   if (!rank) {
1591     /* calculate the number of nonzeros on each processor */
1592     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
1593     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1594     for (i=0; i<size; i++) {
1595       for (j=rowners[i]; j< rowners[i+1]; j++) {
1596         procsnz[i] += rowlengths[j];
1597       }
1598     }
1599     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1600 
1601     /* determine max buffer needed and allocate it */
1602     maxnz = 0;
1603     for (i=0; i<size; i++) {
1604       maxnz = PetscMax(maxnz,procsnz[i]);
1605     }
1606     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1607 
1608     /* read in my part of the matrix column indices  */
1609     nz   = procsnz[0];
1610     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1611     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1612 
1613     /* read in every one elses and ship off */
1614     for (i=1; i<size; i++) {
1615       nz   = procsnz[i];
1616       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1617       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1618     }
1619     ierr = PetscFree(cols);CHKERRQ(ierr);
1620   } else {
1621     /* determine buffer space needed for message */
1622     nz = 0;
1623     for (i=0; i<m; i++) {
1624       nz += ourlens[i];
1625     }
1626     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1627 
1628     /* receive message of column indices*/
1629     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1630     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1631     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1632   }
1633 
1634   /* loop over local rows, determining number of off diagonal entries */
1635   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
1636   jj   = 0;
1637   for (i=0; i<m; i++) {
1638     for (j=0; j<ourlens[i]; j++) {
1639       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1640       jj++;
1641     }
1642   }
1643 
1644   /* create our matrix */
1645   for (i=0; i<m; i++) ourlens[i] -= offlens[i];
1646 
1647   if (!sizesset) {
1648     ierr = MatSetSizes(newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1649   }
1650   ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
1651   for (i=0; i<m; i++) ourlens[i] += offlens[i];
1652 
1653   if (!rank) {
1654     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1655 
1656     /* read in my part of the matrix numerical values  */
1657     nz   = procsnz[0];
1658     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1659 
1660     /* insert into matrix */
1661     jj      = rstart;
1662     smycols = mycols;
1663     svals   = vals;
1664     for (i=0; i<m; i++) {
1665       ierr     = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1666       smycols += ourlens[i];
1667       svals   += ourlens[i];
1668       jj++;
1669     }
1670 
1671     /* read in other processors and ship out */
1672     for (i=1; i<size; i++) {
1673       nz   = procsnz[i];
1674       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1675       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
1676     }
1677     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1678   } else {
1679     /* receive numeric values */
1680     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1681 
1682     /* receive message of values*/
1683     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
1684     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1685     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1686 
1687     /* insert into matrix */
1688     jj      = rstart;
1689     smycols = mycols;
1690     svals   = vals;
1691     for (i=0; i<m; i++) {
1692       ierr     = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1693       smycols += ourlens[i];
1694       svals   += ourlens[i];
1695       jj++;
1696     }
1697   }
1698   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
1699   ierr = PetscFree(vals);CHKERRQ(ierr);
1700   ierr = PetscFree(mycols);CHKERRQ(ierr);
1701   ierr = PetscFree(rowners);CHKERRQ(ierr);
1702 
1703   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1704   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1705   PetscFunctionReturn(0);
1706 }
1707 
1708 #undef __FUNCT__
1709 #define __FUNCT__ "MatEqual_MPIDense"
1710 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool  *flag)
1711 {
1712   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
1713   Mat            a,b;
1714   PetscBool      flg;
1715   PetscErrorCode ierr;
1716 
1717   PetscFunctionBegin;
1718   a    = matA->A;
1719   b    = matB->A;
1720   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
1721   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1722   PetscFunctionReturn(0);
1723 }
1724 
1725