xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 445d14fee1acd31685d8e5d04c46e6b09e28ef19)
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4 */
5 
6 
7 #include <../src/mat/impls/dense/mpi/mpidense.h>    /*I   "petscmat.h"  I*/
8 #include <../src/mat/impls/aij/mpi/mpiaij.h>
9 
10 #undef __FUNCT__
11 #define __FUNCT__ "MatDenseGetLocalMatrix"
12 /*@
13 
14       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
15               matrix that represents the operator. For sequential matrices it returns itself.
16 
17     Input Parameter:
18 .      A - the Seq or MPI dense matrix
19 
20     Output Parameter:
21 .      B - the inner matrix
22 
23     Level: intermediate
24 
25 @*/
26 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
27 {
28   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
29   PetscErrorCode ierr;
30   PetscBool      flg;
31 
32   PetscFunctionBegin;
33   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
34   if (flg) *B = mat->A;
35   else *B = A;
36   PetscFunctionReturn(0);
37 }
38 
39 #undef __FUNCT__
40 #define __FUNCT__ "MatGetRow_MPIDense"
41 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
42 {
43   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
44   PetscErrorCode ierr;
45   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
46 
47   PetscFunctionBegin;
48   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
49   lrow = row - rstart;
50   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
51   PetscFunctionReturn(0);
52 }
53 
54 #undef __FUNCT__
55 #define __FUNCT__ "MatRestoreRow_MPIDense"
56 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
57 {
58   PetscErrorCode ierr;
59 
60   PetscFunctionBegin;
61   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
62   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
63   PetscFunctionReturn(0);
64 }
65 
66 #undef __FUNCT__
67 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense"
68 PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
69 {
70   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
71   PetscErrorCode ierr;
72   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
73   PetscScalar    *array;
74   MPI_Comm       comm;
75   Mat            B;
76 
77   PetscFunctionBegin;
78   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
79 
80   ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr);
81   if (!B) {
82     ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
83     ierr = MatCreate(comm,&B);CHKERRQ(ierr);
84     ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr);
85     ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
86     ierr = MatDenseGetArray(mdn->A,&array);CHKERRQ(ierr);
87     ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr);
88     ierr = MatDenseRestoreArray(mdn->A,&array);CHKERRQ(ierr);
89     ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
90     ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
91     ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr);
92     *a   = B;
93     ierr = MatDestroy(&B);CHKERRQ(ierr);
94   } else *a = B;
95   PetscFunctionReturn(0);
96 }
97 
98 #undef __FUNCT__
99 #define __FUNCT__ "MatSetValues_MPIDense"
100 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
101 {
102   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
103   PetscErrorCode ierr;
104   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
105   PetscBool      roworiented = A->roworiented;
106 
107   PetscFunctionBegin;
108   for (i=0; i<m; i++) {
109     if (idxm[i] < 0) continue;
110     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
111     if (idxm[i] >= rstart && idxm[i] < rend) {
112       row = idxm[i] - rstart;
113       if (roworiented) {
114         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
115       } else {
116         for (j=0; j<n; j++) {
117           if (idxn[j] < 0) continue;
118           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
119           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
120         }
121       }
122     } else if (!A->donotstash) {
123       mat->assembled = PETSC_FALSE;
124       if (roworiented) {
125         ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
126       } else {
127         ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
128       }
129     }
130   }
131   PetscFunctionReturn(0);
132 }
133 
134 #undef __FUNCT__
135 #define __FUNCT__ "MatGetValues_MPIDense"
136 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
137 {
138   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
139   PetscErrorCode ierr;
140   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
141 
142   PetscFunctionBegin;
143   for (i=0; i<m; i++) {
144     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
145     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
146     if (idxm[i] >= rstart && idxm[i] < rend) {
147       row = idxm[i] - rstart;
148       for (j=0; j<n; j++) {
149         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
150         if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
151         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
152       }
153     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
154   }
155   PetscFunctionReturn(0);
156 }
157 
158 #undef __FUNCT__
159 #define __FUNCT__ "MatDenseGetArray_MPIDense"
160 PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[])
161 {
162   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
163   PetscErrorCode ierr;
164 
165   PetscFunctionBegin;
166   ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr);
167   PetscFunctionReturn(0);
168 }
169 
170 #undef __FUNCT__
171 #define __FUNCT__ "MatGetSubMatrix_MPIDense"
172 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
173 {
174   Mat_MPIDense   *mat  = (Mat_MPIDense*)A->data,*newmatd;
175   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
176   PetscErrorCode ierr;
177   PetscInt       i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
178   const PetscInt *irow,*icol;
179   PetscScalar    *av,*bv,*v = lmat->v;
180   Mat            newmat;
181   IS             iscol_local;
182 
183   PetscFunctionBegin;
184   ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
185   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
186   ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr);
187   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
188   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
189   ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */
190 
191   /* No parallel redistribution currently supported! Should really check each index set
192      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
193      original matrix! */
194 
195   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
196   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
197 
198   /* Check submatrix call */
199   if (scall == MAT_REUSE_MATRIX) {
200     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
201     /* Really need to test rows and column sizes! */
202     newmat = *B;
203   } else {
204     /* Create and fill new matrix */
205     ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr);
206     ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
207     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
208     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
209   }
210 
211   /* Now extract the data pointers and do the copy, column at a time */
212   newmatd = (Mat_MPIDense*)newmat->data;
213   bv      = ((Mat_SeqDense*)newmatd->A->data)->v;
214 
215   for (i=0; i<Ncols; i++) {
216     av = v + ((Mat_SeqDense*)mat->A->data)->lda*icol[i];
217     for (j=0; j<nrows; j++) {
218       *bv++ = av[irow[j] - rstart];
219     }
220   }
221 
222   /* Assemble the matrices so that the correct flags are set */
223   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
224   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
225 
226   /* Free work space */
227   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
228   ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr);
229   ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
230   *B   = newmat;
231   PetscFunctionReturn(0);
232 }
233 
234 #undef __FUNCT__
235 #define __FUNCT__ "MatDenseRestoreArray_MPIDense"
236 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[])
237 {
238   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
239   PetscErrorCode ierr;
240 
241   PetscFunctionBegin;
242   ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr);
243   PetscFunctionReturn(0);
244 }
245 
246 #undef __FUNCT__
247 #define __FUNCT__ "MatAssemblyBegin_MPIDense"
248 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
249 {
250   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
251   MPI_Comm       comm;
252   PetscErrorCode ierr;
253   PetscInt       nstash,reallocs;
254   InsertMode     addv;
255 
256   PetscFunctionBegin;
257   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
258   /* make sure all processors are either in INSERTMODE or ADDMODE */
259   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,comm);CHKERRQ(ierr);
260   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
261   mat->insertmode = addv; /* in case this processor had no cache */
262 
263   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
264   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
265   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
266   PetscFunctionReturn(0);
267 }
268 
269 #undef __FUNCT__
270 #define __FUNCT__ "MatAssemblyEnd_MPIDense"
271 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
272 {
273   Mat_MPIDense   *mdn=(Mat_MPIDense*)mat->data;
274   PetscErrorCode ierr;
275   PetscInt       i,*row,*col,flg,j,rstart,ncols;
276   PetscMPIInt    n;
277   PetscScalar    *val;
278 
279   PetscFunctionBegin;
280   /*  wait on receives */
281   while (1) {
282     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
283     if (!flg) break;
284 
285     for (i=0; i<n;) {
286       /* Now identify the consecutive vals belonging to the same row */
287       for (j=i,rstart=row[j]; j<n; j++) {
288         if (row[j] != rstart) break;
289       }
290       if (j < n) ncols = j-i;
291       else       ncols = n-i;
292       /* Now assemble all these values with a single function call */
293       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
294       i    = j;
295     }
296   }
297   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
298 
299   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
300   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
301 
302   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
303     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
304   }
305   PetscFunctionReturn(0);
306 }
307 
308 #undef __FUNCT__
309 #define __FUNCT__ "MatZeroEntries_MPIDense"
310 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
311 {
312   PetscErrorCode ierr;
313   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
314 
315   PetscFunctionBegin;
316   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
317   PetscFunctionReturn(0);
318 }
319 
320 /* the code does not do the diagonal entries correctly unless the
321    matrix is square and the column and row owerships are identical.
322    This is a BUG. The only way to fix it seems to be to access
323    mdn->A and mdn->B directly and not through the MatZeroRows()
324    routine.
325 */
326 #undef __FUNCT__
327 #define __FUNCT__ "MatZeroRows_MPIDense"
328 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
329 {
330   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
331   PetscErrorCode    ierr;
332   PetscInt          i,*owners = A->rmap->range;
333   PetscInt          *sizes,j,idx,nsends;
334   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
335   PetscInt          *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source;
336   PetscInt          *lens,*lrows,*values;
337   PetscMPIInt       n,imdex,rank = l->rank,size = l->size;
338   MPI_Comm          comm;
339   MPI_Request       *send_waits,*recv_waits;
340   MPI_Status        recv_status,*send_status;
341   PetscBool         found;
342   const PetscScalar *xx;
343   PetscScalar       *bb;
344 
345   PetscFunctionBegin;
346   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
347   if (A->rmap->N != A->cmap->N) SETERRQ(comm,PETSC_ERR_SUP,"Only handles square matrices");
348   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership");
349   /*  first count number of contributors to each processor */
350   ierr = PetscCalloc1(2*size,&sizes);CHKERRQ(ierr);
351   ierr = PetscMalloc1(N+1,&owner);CHKERRQ(ierr);  /* see note*/
352   for (i=0; i<N; i++) {
353     idx   = rows[i];
354     found = PETSC_FALSE;
355     for (j=0; j<size; j++) {
356       if (idx >= owners[j] && idx < owners[j+1]) {
357         sizes[2*j]++; sizes[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
358       }
359     }
360     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
361   }
362   nsends = 0;
363   for (i=0; i<size; i++) nsends += sizes[2*i+1];
364 
365   /* inform other processors of number of messages and max length*/
366   ierr = PetscMaxSum(comm,sizes,&nmax,&nrecvs);CHKERRQ(ierr);
367 
368   /* post receives:   */
369   ierr = PetscMalloc1((nrecvs+1)*(nmax+1),&rvalues);CHKERRQ(ierr);
370   ierr = PetscMalloc1(nrecvs+1,&recv_waits);CHKERRQ(ierr);
371   for (i=0; i<nrecvs; i++) {
372     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
373   }
374 
375   /* do sends:
376       1) starts[i] gives the starting index in svalues for stuff going to
377          the ith processor
378   */
379   ierr = PetscMalloc1(N+1,&svalues);CHKERRQ(ierr);
380   ierr = PetscMalloc1(nsends+1,&send_waits);CHKERRQ(ierr);
381   ierr = PetscMalloc1(size+1,&starts);CHKERRQ(ierr);
382 
383   starts[0] = 0;
384   for (i=1; i<size; i++) starts[i] = starts[i-1] + sizes[2*i-2];
385   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
386 
387   starts[0] = 0;
388   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + sizes[2*i-2];
389   count = 0;
390   for (i=0; i<size; i++) {
391     if (sizes[2*i+1]) {
392       ierr = MPI_Isend(svalues+starts[i],sizes[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
393     }
394   }
395   ierr = PetscFree(starts);CHKERRQ(ierr);
396 
397   base = owners[rank];
398 
399   /*  wait on receives */
400   ierr  = PetscMalloc2(nrecvs,&lens,nrecvs,&source);CHKERRQ(ierr);
401   count = nrecvs;
402   slen  = 0;
403   while (count) {
404     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
405     /* unpack receives into our local space */
406     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
407 
408     source[imdex] = recv_status.MPI_SOURCE;
409     lens[imdex]   = n;
410     slen += n;
411     count--;
412   }
413   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
414 
415   /* move the data into the send scatter */
416   ierr  = PetscMalloc1(slen+1,&lrows);CHKERRQ(ierr);
417   count = 0;
418   for (i=0; i<nrecvs; i++) {
419     values = rvalues + i*nmax;
420     for (j=0; j<lens[i]; j++) {
421       lrows[count++] = values[j] - base;
422     }
423   }
424   ierr = PetscFree(rvalues);CHKERRQ(ierr);
425   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
426   ierr = PetscFree(owner);CHKERRQ(ierr);
427   ierr = PetscFree(sizes);CHKERRQ(ierr);
428 
429   /* fix right hand side if needed */
430   if (x && b) {
431     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
432     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
433     for (i=0; i<slen; i++) {
434       bb[lrows[i]] = diag*xx[lrows[i]];
435     }
436     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
437     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
438   }
439 
440   /* actually zap the local rows */
441   ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
442   if (diag != 0.0) {
443     Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data;
444     PetscInt     m   = ll->lda, i;
445 
446     for (i=0; i<slen; i++) {
447       ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag;
448     }
449   }
450   ierr = PetscFree(lrows);CHKERRQ(ierr);
451 
452   /* wait on sends */
453   if (nsends) {
454     ierr = PetscMalloc1(nsends,&send_status);CHKERRQ(ierr);
455     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
456     ierr = PetscFree(send_status);CHKERRQ(ierr);
457   }
458   ierr = PetscFree(send_waits);CHKERRQ(ierr);
459   ierr = PetscFree(svalues);CHKERRQ(ierr);
460   PetscFunctionReturn(0);
461 }
462 
463 #undef __FUNCT__
464 #define __FUNCT__ "MatMult_MPIDense"
465 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
466 {
467   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
468   PetscErrorCode ierr;
469 
470   PetscFunctionBegin;
471   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
472   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
473   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
474   PetscFunctionReturn(0);
475 }
476 
477 #undef __FUNCT__
478 #define __FUNCT__ "MatMultAdd_MPIDense"
479 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
480 {
481   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
482   PetscErrorCode ierr;
483 
484   PetscFunctionBegin;
485   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
486   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
487   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
488   PetscFunctionReturn(0);
489 }
490 
491 #undef __FUNCT__
492 #define __FUNCT__ "MatMultTranspose_MPIDense"
493 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
494 {
495   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
496   PetscErrorCode ierr;
497   PetscScalar    zero = 0.0;
498 
499   PetscFunctionBegin;
500   ierr = VecSet(yy,zero);CHKERRQ(ierr);
501   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
502   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
503   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
504   PetscFunctionReturn(0);
505 }
506 
507 #undef __FUNCT__
508 #define __FUNCT__ "MatMultTransposeAdd_MPIDense"
509 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
510 {
511   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
512   PetscErrorCode ierr;
513 
514   PetscFunctionBegin;
515   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
516   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
517   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
518   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
519   PetscFunctionReturn(0);
520 }
521 
522 #undef __FUNCT__
523 #define __FUNCT__ "MatGetDiagonal_MPIDense"
524 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
525 {
526   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
527   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
528   PetscErrorCode ierr;
529   PetscInt       len,i,n,m = A->rmap->n,radd;
530   PetscScalar    *x,zero = 0.0;
531 
532   PetscFunctionBegin;
533   ierr = VecSet(v,zero);CHKERRQ(ierr);
534   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
535   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
536   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
537   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
538   radd = A->rmap->rstart*m;
539   for (i=0; i<len; i++) {
540     x[i] = aloc->v[radd + i*m + i];
541   }
542   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
543   PetscFunctionReturn(0);
544 }
545 
546 #undef __FUNCT__
547 #define __FUNCT__ "MatDestroy_MPIDense"
548 PetscErrorCode MatDestroy_MPIDense(Mat mat)
549 {
550   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
551   PetscErrorCode ierr;
552 
553   PetscFunctionBegin;
554 #if defined(PETSC_USE_LOG)
555   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
556 #endif
557   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
558   ierr = MatDestroy(&mdn->A);CHKERRQ(ierr);
559   ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr);
560   ierr = VecScatterDestroy(&mdn->Mvctx);CHKERRQ(ierr);
561 
562   ierr = PetscFree(mat->data);CHKERRQ(ierr);
563   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
564 
565   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr);
566   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr);
567 #if defined(PETSC_HAVE_ELEMENTAL)
568   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr);
569 #endif
570   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
571   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr);
572   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
573   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
574   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
575   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
576   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
577   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
578   PetscFunctionReturn(0);
579 }
580 
581 #undef __FUNCT__
582 #define __FUNCT__ "MatView_MPIDense_Binary"
583 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
584 {
585   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
586   PetscErrorCode    ierr;
587   PetscViewerFormat format;
588   int               fd;
589   PetscInt          header[4],mmax,N = mat->cmap->N,i,j,m,k;
590   PetscMPIInt       rank,tag  = ((PetscObject)viewer)->tag,size;
591   PetscScalar       *work,*v,*vv;
592   Mat_SeqDense      *a = (Mat_SeqDense*)mdn->A->data;
593 
594   PetscFunctionBegin;
595   if (mdn->size == 1) {
596     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
597   } else {
598     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
599     ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
600     ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
601 
602     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
603     if (format == PETSC_VIEWER_NATIVE) {
604 
605       if (!rank) {
606         /* store the matrix as a dense matrix */
607         header[0] = MAT_FILE_CLASSID;
608         header[1] = mat->rmap->N;
609         header[2] = N;
610         header[3] = MATRIX_BINARY_FORMAT_DENSE;
611         ierr      = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
612 
613         /* get largest work array needed for transposing array */
614         mmax = mat->rmap->n;
615         for (i=1; i<size; i++) {
616           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
617         }
618         ierr = PetscMalloc1(mmax*N,&work);CHKERRQ(ierr);
619 
620         /* write out local array, by rows */
621         m = mat->rmap->n;
622         v = a->v;
623         for (j=0; j<N; j++) {
624           for (i=0; i<m; i++) {
625             work[j + i*N] = *v++;
626           }
627         }
628         ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
629         /* get largest work array to receive messages from other processes, excludes process zero */
630         mmax = 0;
631         for (i=1; i<size; i++) {
632           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
633         }
634         ierr = PetscMalloc1(mmax*N,&vv);CHKERRQ(ierr);
635         for (k = 1; k < size; k++) {
636           v    = vv;
637           m    = mat->rmap->range[k+1] - mat->rmap->range[k];
638           ierr = MPIULong_Recv(v,m*N,MPIU_SCALAR,k,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
639 
640           for (j = 0; j < N; j++) {
641             for (i = 0; i < m; i++) {
642               work[j + i*N] = *v++;
643             }
644           }
645           ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
646         }
647         ierr = PetscFree(work);CHKERRQ(ierr);
648         ierr = PetscFree(vv);CHKERRQ(ierr);
649       } else {
650         ierr = MPIULong_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
651       }
652     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerSetFormat(viewer,PETSC_VIEWER_NATIVE)");
653   }
654   PetscFunctionReturn(0);
655 }
656 
657 extern PetscErrorCode MatView_SeqDense(Mat,PetscViewer);
658 #include <petscdraw.h>
659 #undef __FUNCT__
660 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket"
661 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
662 {
663   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
664   PetscErrorCode    ierr;
665   PetscMPIInt       rank = mdn->rank;
666   PetscViewerType   vtype;
667   PetscBool         iascii,isdraw;
668   PetscViewer       sviewer;
669   PetscViewerFormat format;
670 
671   PetscFunctionBegin;
672   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
673   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
674   if (iascii) {
675     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
676     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
677     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
678       MatInfo info;
679       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
680       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
681       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
682       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
683       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
684       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
685       PetscFunctionReturn(0);
686     } else if (format == PETSC_VIEWER_ASCII_INFO) {
687       PetscFunctionReturn(0);
688     }
689   } else if (isdraw) {
690     PetscDraw draw;
691     PetscBool isnull;
692 
693     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
694     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
695     if (isnull) PetscFunctionReturn(0);
696   }
697 
698   {
699     /* assemble the entire matrix onto first processor. */
700     Mat         A;
701     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
702     PetscInt    *cols;
703     PetscScalar *vals;
704 
705     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
706     if (!rank) {
707       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
708     } else {
709       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
710     }
711     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
712     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
713     ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr);
714     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
715 
716     /* Copy the matrix ... This isn't the most efficient means,
717        but it's quick for now */
718     A->insertmode = INSERT_VALUES;
719 
720     row = mat->rmap->rstart;
721     m   = mdn->A->rmap->n;
722     for (i=0; i<m; i++) {
723       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
724       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
725       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
726       row++;
727     }
728 
729     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
730     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
731     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
732     if (!rank) {
733         ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
734     }
735     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
736     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
737     ierr = MatDestroy(&A);CHKERRQ(ierr);
738   }
739   PetscFunctionReturn(0);
740 }
741 
742 #undef __FUNCT__
743 #define __FUNCT__ "MatView_MPIDense"
744 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
745 {
746   PetscErrorCode ierr;
747   PetscBool      iascii,isbinary,isdraw,issocket;
748 
749   PetscFunctionBegin;
750   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
751   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
752   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
753   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
754 
755   if (iascii || issocket || isdraw) {
756     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
757   } else if (isbinary) {
758     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
759   }
760   PetscFunctionReturn(0);
761 }
762 
763 #undef __FUNCT__
764 #define __FUNCT__ "MatGetInfo_MPIDense"
765 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
766 {
767   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
768   Mat            mdn  = mat->A;
769   PetscErrorCode ierr;
770   PetscReal      isend[5],irecv[5];
771 
772   PetscFunctionBegin;
773   info->block_size = 1.0;
774 
775   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
776 
777   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
778   isend[3] = info->memory;  isend[4] = info->mallocs;
779   if (flag == MAT_LOCAL) {
780     info->nz_used      = isend[0];
781     info->nz_allocated = isend[1];
782     info->nz_unneeded  = isend[2];
783     info->memory       = isend[3];
784     info->mallocs      = isend[4];
785   } else if (flag == MAT_GLOBAL_MAX) {
786     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
787 
788     info->nz_used      = irecv[0];
789     info->nz_allocated = irecv[1];
790     info->nz_unneeded  = irecv[2];
791     info->memory       = irecv[3];
792     info->mallocs      = irecv[4];
793   } else if (flag == MAT_GLOBAL_SUM) {
794     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
795 
796     info->nz_used      = irecv[0];
797     info->nz_allocated = irecv[1];
798     info->nz_unneeded  = irecv[2];
799     info->memory       = irecv[3];
800     info->mallocs      = irecv[4];
801   }
802   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
803   info->fill_ratio_needed = 0;
804   info->factor_mallocs    = 0;
805   PetscFunctionReturn(0);
806 }
807 
808 #undef __FUNCT__
809 #define __FUNCT__ "MatSetOption_MPIDense"
810 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
811 {
812   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
813   PetscErrorCode ierr;
814 
815   PetscFunctionBegin;
816   switch (op) {
817   case MAT_NEW_NONZERO_LOCATIONS:
818   case MAT_NEW_NONZERO_LOCATION_ERR:
819   case MAT_NEW_NONZERO_ALLOCATION_ERR:
820     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
821     break;
822   case MAT_ROW_ORIENTED:
823     a->roworiented = flg;
824 
825     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
826     break;
827   case MAT_NEW_DIAGONALS:
828   case MAT_KEEP_NONZERO_PATTERN:
829   case MAT_USE_HASH_TABLE:
830     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
831     break;
832   case MAT_IGNORE_OFF_PROC_ENTRIES:
833     a->donotstash = flg;
834     break;
835   case MAT_SYMMETRIC:
836   case MAT_STRUCTURALLY_SYMMETRIC:
837   case MAT_HERMITIAN:
838   case MAT_SYMMETRY_ETERNAL:
839   case MAT_IGNORE_LOWER_TRIANGULAR:
840     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
841     break;
842   default:
843     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
844   }
845   PetscFunctionReturn(0);
846 }
847 
848 
849 #undef __FUNCT__
850 #define __FUNCT__ "MatDiagonalScale_MPIDense"
851 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
852 {
853   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
854   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
855   PetscScalar    *l,*r,x,*v;
856   PetscErrorCode ierr;
857   PetscInt       i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n;
858 
859   PetscFunctionBegin;
860   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
861   if (ll) {
862     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
863     if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
864     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
865     for (i=0; i<m; i++) {
866       x = l[i];
867       v = mat->v + i;
868       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
869     }
870     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
871     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
872   }
873   if (rr) {
874     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
875     if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
876     ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
877     ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
878     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
879     for (i=0; i<n; i++) {
880       x = r[i];
881       v = mat->v + i*m;
882       for (j=0; j<m; j++) (*v++) *= x;
883     }
884     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
885     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
886   }
887   PetscFunctionReturn(0);
888 }
889 
890 #undef __FUNCT__
891 #define __FUNCT__ "MatNorm_MPIDense"
892 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
893 {
894   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
895   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
896   PetscErrorCode ierr;
897   PetscInt       i,j;
898   PetscReal      sum = 0.0;
899   PetscScalar    *v  = mat->v;
900 
901   PetscFunctionBegin;
902   if (mdn->size == 1) {
903     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
904   } else {
905     if (type == NORM_FROBENIUS) {
906       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
907         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
908       }
909       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
910       *nrm = PetscSqrtReal(*nrm);
911       ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr);
912     } else if (type == NORM_1) {
913       PetscReal *tmp,*tmp2;
914       ierr = PetscMalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr);
915       ierr = PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
916       ierr = PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
917       *nrm = 0.0;
918       v    = mat->v;
919       for (j=0; j<mdn->A->cmap->n; j++) {
920         for (i=0; i<mdn->A->rmap->n; i++) {
921           tmp[j] += PetscAbsScalar(*v);  v++;
922         }
923       }
924       ierr = MPI_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
925       for (j=0; j<A->cmap->N; j++) {
926         if (tmp2[j] > *nrm) *nrm = tmp2[j];
927       }
928       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
929       ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr);
930     } else if (type == NORM_INFINITY) { /* max row norm */
931       PetscReal ntemp;
932       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
933       ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
934     } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
935   }
936   PetscFunctionReturn(0);
937 }
938 
939 #undef __FUNCT__
940 #define __FUNCT__ "MatTranspose_MPIDense"
941 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
942 {
943   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
944   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
945   Mat            B;
946   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
947   PetscErrorCode ierr;
948   PetscInt       j,i;
949   PetscScalar    *v;
950 
951   PetscFunctionBegin;
952   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports square matrix only in-place");
953   if (reuse == MAT_INITIAL_MATRIX || A == *matout) {
954     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
955     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
956     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
957     ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr);
958   } else {
959     B = *matout;
960   }
961 
962   m    = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v;
963   ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr);
964   for (i=0; i<m; i++) rwork[i] = rstart + i;
965   for (j=0; j<n; j++) {
966     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
967     v   += m;
968   }
969   ierr = PetscFree(rwork);CHKERRQ(ierr);
970   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
971   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
972   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
973     *matout = B;
974   } else {
975     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
976   }
977   PetscFunctionReturn(0);
978 }
979 
980 
981 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
982 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
983 
984 #undef __FUNCT__
985 #define __FUNCT__ "MatSetUp_MPIDense"
986 PetscErrorCode MatSetUp_MPIDense(Mat A)
987 {
988   PetscErrorCode ierr;
989 
990   PetscFunctionBegin;
991   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
992   PetscFunctionReturn(0);
993 }
994 
995 #undef __FUNCT__
996 #define __FUNCT__ "MatAXPY_MPIDense"
997 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
998 {
999   PetscErrorCode ierr;
1000   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
1001 
1002   PetscFunctionBegin;
1003   ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr);
1004   ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1005   PetscFunctionReturn(0);
1006 }
1007 
1008 #undef __FUNCT__
1009 #define __FUNCT__ "MatConjugate_MPIDense"
1010 PetscErrorCode  MatConjugate_MPIDense(Mat mat)
1011 {
1012   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
1013   PetscErrorCode ierr;
1014 
1015   PetscFunctionBegin;
1016   ierr = MatConjugate(a->A);CHKERRQ(ierr);
1017   PetscFunctionReturn(0);
1018 }
1019 
1020 #undef __FUNCT__
1021 #define __FUNCT__ "MatRealPart_MPIDense"
1022 PetscErrorCode MatRealPart_MPIDense(Mat A)
1023 {
1024   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1025   PetscErrorCode ierr;
1026 
1027   PetscFunctionBegin;
1028   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1029   PetscFunctionReturn(0);
1030 }
1031 
1032 #undef __FUNCT__
1033 #define __FUNCT__ "MatImaginaryPart_MPIDense"
1034 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1035 {
1036   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1037   PetscErrorCode ierr;
1038 
1039   PetscFunctionBegin;
1040   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1041   PetscFunctionReturn(0);
1042 }
1043 
1044 extern PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*);
1045 #undef __FUNCT__
1046 #define __FUNCT__ "MatGetColumnNorms_MPIDense"
1047 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms)
1048 {
1049   PetscErrorCode ierr;
1050   PetscInt       i,n;
1051   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
1052   PetscReal      *work;
1053 
1054   PetscFunctionBegin;
1055   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
1056   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
1057   ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr);
1058   if (type == NORM_2) {
1059     for (i=0; i<n; i++) work[i] *= work[i];
1060   }
1061   if (type == NORM_INFINITY) {
1062     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
1063   } else {
1064     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
1065   }
1066   ierr = PetscFree(work);CHKERRQ(ierr);
1067   if (type == NORM_2) {
1068     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
1069   }
1070   PetscFunctionReturn(0);
1071 }
1072 
1073 #undef __FUNCT__
1074 #define __FUNCT__ "MatSetRandom_MPIDense"
1075 static PetscErrorCode  MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1076 {
1077   Mat_MPIDense   *d = (Mat_MPIDense*)x->data;
1078   PetscErrorCode ierr;
1079   PetscScalar    *a;
1080   PetscInt       m,n,i;
1081 
1082   PetscFunctionBegin;
1083   ierr = MatGetSize(d->A,&m,&n);CHKERRQ(ierr);
1084   ierr = MatDenseGetArray(d->A,&a);CHKERRQ(ierr);
1085   for (i=0; i<m*n; i++) {
1086     ierr = PetscRandomGetValue(rctx,a+i);CHKERRQ(ierr);
1087   }
1088   ierr = MatDenseRestoreArray(d->A,&a);CHKERRQ(ierr);
1089   PetscFunctionReturn(0);
1090 }
1091 
1092 extern PetscErrorCode MatMatMultNumeric_MPIDense(Mat A,Mat,Mat);
1093 
1094 #undef __FUNCT__
1095 #define __FUNCT__ "MatMissingDiagonal_MPIDense"
1096 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool  *missing,PetscInt *d)
1097 {
1098   PetscFunctionBegin;
1099   *missing = PETSC_FALSE;
1100   PetscFunctionReturn(0);
1101 }
1102 
1103 /* -------------------------------------------------------------------*/
1104 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1105                                         MatGetRow_MPIDense,
1106                                         MatRestoreRow_MPIDense,
1107                                         MatMult_MPIDense,
1108                                 /*  4*/ MatMultAdd_MPIDense,
1109                                         MatMultTranspose_MPIDense,
1110                                         MatMultTransposeAdd_MPIDense,
1111                                         0,
1112                                         0,
1113                                         0,
1114                                 /* 10*/ 0,
1115                                         0,
1116                                         0,
1117                                         0,
1118                                         MatTranspose_MPIDense,
1119                                 /* 15*/ MatGetInfo_MPIDense,
1120                                         MatEqual_MPIDense,
1121                                         MatGetDiagonal_MPIDense,
1122                                         MatDiagonalScale_MPIDense,
1123                                         MatNorm_MPIDense,
1124                                 /* 20*/ MatAssemblyBegin_MPIDense,
1125                                         MatAssemblyEnd_MPIDense,
1126                                         MatSetOption_MPIDense,
1127                                         MatZeroEntries_MPIDense,
1128                                 /* 24*/ MatZeroRows_MPIDense,
1129                                         0,
1130                                         0,
1131                                         0,
1132                                         0,
1133                                 /* 29*/ MatSetUp_MPIDense,
1134                                         0,
1135                                         0,
1136                                         0,
1137                                         0,
1138                                 /* 34*/ MatDuplicate_MPIDense,
1139                                         0,
1140                                         0,
1141                                         0,
1142                                         0,
1143                                 /* 39*/ MatAXPY_MPIDense,
1144                                         MatGetSubMatrices_MPIDense,
1145                                         0,
1146                                         MatGetValues_MPIDense,
1147                                         0,
1148                                 /* 44*/ 0,
1149                                         MatScale_MPIDense,
1150                                         MatShift_Basic,
1151                                         0,
1152                                         0,
1153                                 /* 49*/ MatSetRandom_MPIDense,
1154                                         0,
1155                                         0,
1156                                         0,
1157                                         0,
1158                                 /* 54*/ 0,
1159                                         0,
1160                                         0,
1161                                         0,
1162                                         0,
1163                                 /* 59*/ MatGetSubMatrix_MPIDense,
1164                                         MatDestroy_MPIDense,
1165                                         MatView_MPIDense,
1166                                         0,
1167                                         0,
1168                                 /* 64*/ 0,
1169                                         0,
1170                                         0,
1171                                         0,
1172                                         0,
1173                                 /* 69*/ 0,
1174                                         0,
1175                                         0,
1176                                         0,
1177                                         0,
1178                                 /* 74*/ 0,
1179                                         0,
1180                                         0,
1181                                         0,
1182                                         0,
1183                                 /* 79*/ 0,
1184                                         0,
1185                                         0,
1186                                         0,
1187                                 /* 83*/ MatLoad_MPIDense,
1188                                         0,
1189                                         0,
1190                                         0,
1191                                         0,
1192                                         0,
1193                                 /* 89*/
1194                                         0,
1195                                         0,
1196                                         MatMatMultNumeric_MPIDense,
1197                                         0,
1198                                         0,
1199                                 /* 94*/ 0,
1200                                         0,
1201                                         0,
1202                                         0,
1203                                         0,
1204                                 /* 99*/ 0,
1205                                         0,
1206                                         0,
1207                                         MatConjugate_MPIDense,
1208                                         0,
1209                                 /*104*/ 0,
1210                                         MatRealPart_MPIDense,
1211                                         MatImaginaryPart_MPIDense,
1212                                         0,
1213                                         0,
1214                                 /*109*/ 0,
1215                                         0,
1216                                         0,
1217                                         0,
1218                                         MatMissingDiagonal_MPIDense,
1219                                 /*114*/ 0,
1220                                         0,
1221                                         0,
1222                                         0,
1223                                         0,
1224                                 /*119*/ 0,
1225                                         0,
1226                                         0,
1227                                         0,
1228                                         0,
1229                                 /*124*/ 0,
1230                                         MatGetColumnNorms_MPIDense,
1231                                         0,
1232                                         0,
1233                                         0,
1234                                 /*129*/ 0,
1235                                         0,
1236                                         0,
1237                                         0,
1238                                         0,
1239                                 /*134*/ 0,
1240                                         0,
1241                                         0,
1242                                         0,
1243                                         0,
1244                                 /*139*/ 0,
1245                                         0,
1246                                         0
1247 };
1248 
1249 #undef __FUNCT__
1250 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense"
1251 PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1252 {
1253   Mat_MPIDense   *a;
1254   PetscErrorCode ierr;
1255 
1256   PetscFunctionBegin;
1257   mat->preallocated = PETSC_TRUE;
1258   /* Note:  For now, when data is specified above, this assumes the user correctly
1259    allocates the local dense storage space.  We should add error checking. */
1260 
1261   a       = (Mat_MPIDense*)mat->data;
1262   ierr    = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
1263   ierr    = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
1264   a->nvec = mat->cmap->n;
1265 
1266   ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1267   ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr);
1268   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
1269   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1270   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1271   PetscFunctionReturn(0);
1272 }
1273 
1274 #if defined(PETSC_HAVE_ELEMENTAL)
1275 #undef __FUNCT__
1276 #define __FUNCT__ "MatConvert_MPIDense_Elemental"
1277 PETSC_EXTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1278 {
1279   Mat            mat_elemental;
1280   PetscErrorCode ierr;
1281   PetscScalar    *array,*v_rowwise;
1282   PetscInt       m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,j,k,*rows,*cols;
1283 
1284   PetscFunctionBegin;
1285   ierr = PetscMalloc3(m*N,&v_rowwise,m,&rows,N,&cols);CHKERRQ(ierr);
1286   ierr = MatDenseGetArray(A,&array);CHKERRQ(ierr);
1287   /* convert column-wise array into row-wise v_rowwise, see MatSetValues_Elemental() */
1288   k = 0;
1289   for (j=0; j<N; j++) {
1290     cols[j] = j;
1291     for (i=0; i<m; i++) {
1292       v_rowwise[i*N+j] = array[k++];
1293     }
1294   }
1295   for (i=0; i<m; i++) {
1296     rows[i] = rstart + i;
1297   }
1298   ierr = MatDenseRestoreArray(A,&array);CHKERRQ(ierr);
1299 
1300   ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr);
1301   ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1302   ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr);
1303   ierr = MatSetUp(mat_elemental);CHKERRQ(ierr);
1304 
1305   /* PETSc-Elemental interaface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1306   ierr = MatSetValues(mat_elemental,m,rows,N,cols,v_rowwise,ADD_VALUES);CHKERRQ(ierr);
1307   ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1308   ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1309   ierr = PetscFree3(v_rowwise,rows,cols);CHKERRQ(ierr);
1310 
1311   if (reuse == MAT_REUSE_MATRIX) {
1312     ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr);
1313   } else {
1314     *newmat = mat_elemental;
1315   }
1316   PetscFunctionReturn(0);
1317 }
1318 #endif
1319 
1320 #undef __FUNCT__
1321 #define __FUNCT__ "MatCreate_MPIDense"
1322 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1323 {
1324   Mat_MPIDense   *a;
1325   PetscErrorCode ierr;
1326 
1327   PetscFunctionBegin;
1328   ierr      = PetscNewLog(mat,&a);CHKERRQ(ierr);
1329   mat->data = (void*)a;
1330   ierr      = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1331 
1332   mat->insertmode = NOT_SET_VALUES;
1333   ierr            = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);CHKERRQ(ierr);
1334   ierr            = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);CHKERRQ(ierr);
1335 
1336   /* build cache for off array entries formed */
1337   a->donotstash = PETSC_FALSE;
1338 
1339   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr);
1340 
1341   /* stuff used for matrix vector multiply */
1342   a->lvec        = 0;
1343   a->Mvctx       = 0;
1344   a->roworiented = PETSC_TRUE;
1345 
1346   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr);
1347   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr);
1348 #if defined(PETSC_HAVE_ELEMENTAL)
1349   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr);
1350 #endif
1351   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr);
1352   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1353   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1354   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1355   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1356 
1357   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",MatTransposeMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1358   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",MatTransposeMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1359   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",MatTransposeMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1360   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1361   PetscFunctionReturn(0);
1362 }
1363 
1364 /*MC
1365    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1366 
1367    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1368    and MATMPIDENSE otherwise.
1369 
1370    Options Database Keys:
1371 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1372 
1373   Level: beginner
1374 
1375 
1376 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1377 M*/
1378 
1379 #undef __FUNCT__
1380 #define __FUNCT__ "MatMPIDenseSetPreallocation"
1381 /*@C
1382    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1383 
1384    Not collective
1385 
1386    Input Parameters:
1387 .  B - the matrix
1388 -  data - optional location of matrix data.  Set data=NULL for PETSc
1389    to control all matrix memory allocation.
1390 
1391    Notes:
1392    The dense format is fully compatible with standard Fortran 77
1393    storage by columns.
1394 
1395    The data input variable is intended primarily for Fortran programmers
1396    who wish to allocate their own matrix memory space.  Most users should
1397    set data=NULL.
1398 
1399    Level: intermediate
1400 
1401 .keywords: matrix,dense, parallel
1402 
1403 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1404 @*/
1405 PetscErrorCode  MatMPIDenseSetPreallocation(Mat B,PetscScalar *data)
1406 {
1407   PetscErrorCode ierr;
1408 
1409   PetscFunctionBegin;
1410   ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr);
1411   PetscFunctionReturn(0);
1412 }
1413 
1414 #undef __FUNCT__
1415 #define __FUNCT__ "MatCreateDense"
1416 /*@C
1417    MatCreateDense - Creates a parallel matrix in dense format.
1418 
1419    Collective on MPI_Comm
1420 
1421    Input Parameters:
1422 +  comm - MPI communicator
1423 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1424 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1425 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1426 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1427 -  data - optional location of matrix data.  Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1428    to control all matrix memory allocation.
1429 
1430    Output Parameter:
1431 .  A - the matrix
1432 
1433    Notes:
1434    The dense format is fully compatible with standard Fortran 77
1435    storage by columns.
1436 
1437    The data input variable is intended primarily for Fortran programmers
1438    who wish to allocate their own matrix memory space.  Most users should
1439    set data=NULL (PETSC_NULL_SCALAR for Fortran users).
1440 
1441    The user MUST specify either the local or global matrix dimensions
1442    (possibly both).
1443 
1444    Level: intermediate
1445 
1446 .keywords: matrix,dense, parallel
1447 
1448 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1449 @*/
1450 PetscErrorCode  MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1451 {
1452   PetscErrorCode ierr;
1453   PetscMPIInt    size;
1454 
1455   PetscFunctionBegin;
1456   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1457   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1458   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1459   if (size > 1) {
1460     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1461     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1462     if (data) {  /* user provided data array, so no need to assemble */
1463       ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr);
1464       (*A)->assembled = PETSC_TRUE;
1465     }
1466   } else {
1467     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1468     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1469   }
1470   PetscFunctionReturn(0);
1471 }
1472 
1473 #undef __FUNCT__
1474 #define __FUNCT__ "MatDuplicate_MPIDense"
1475 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1476 {
1477   Mat            mat;
1478   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
1479   PetscErrorCode ierr;
1480 
1481   PetscFunctionBegin;
1482   *newmat = 0;
1483   ierr    = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr);
1484   ierr    = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1485   ierr    = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1486   a       = (Mat_MPIDense*)mat->data;
1487   ierr    = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1488 
1489   mat->factortype   = A->factortype;
1490   mat->assembled    = PETSC_TRUE;
1491   mat->preallocated = PETSC_TRUE;
1492 
1493   a->size         = oldmat->size;
1494   a->rank         = oldmat->rank;
1495   mat->insertmode = NOT_SET_VALUES;
1496   a->nvec         = oldmat->nvec;
1497   a->donotstash   = oldmat->donotstash;
1498 
1499   ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr);
1500   ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr);
1501 
1502   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1503   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1504   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1505 
1506   *newmat = mat;
1507   PetscFunctionReturn(0);
1508 }
1509 
1510 #undef __FUNCT__
1511 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile"
1512 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat)
1513 {
1514   PetscErrorCode ierr;
1515   PetscMPIInt    rank,size;
1516   const PetscInt *rowners;
1517   PetscInt       i,m,n,nz,j,mMax;
1518   PetscScalar    *array,*vals,*vals_ptr;
1519   Mat_MPIDense   *a = (Mat_MPIDense*)newmat->data;
1520 
1521   PetscFunctionBegin;
1522   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1523   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1524 
1525   /* determine ownership of rows and columns */
1526   m = (newmat->rmap->n < 0) ? PETSC_DECIDE : newmat->rmap->n;
1527   n = (newmat->cmap->n < 0) ? PETSC_DECIDE : newmat->cmap->n;
1528 
1529   ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr);
1530   if (!a->A || !((Mat_SeqDense*)(a->A->data))->user_alloc) {
1531     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
1532   }
1533   ierr = MatDenseGetArray(newmat,&array);CHKERRQ(ierr);
1534   ierr = MatGetLocalSize(newmat,&m,NULL);CHKERRQ(ierr);
1535   ierr = MatGetOwnershipRanges(newmat,&rowners);CHKERRQ(ierr);
1536   ierr = MPI_Reduce(&m,&mMax,1,MPIU_INT,MPI_MAX,0,comm);CHKERRQ(ierr);
1537   if (!rank) {
1538     ierr = PetscMalloc1(mMax*N,&vals);CHKERRQ(ierr);
1539 
1540     /* read in my part of the matrix numerical values  */
1541     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1542 
1543     /* insert into matrix-by row (this is why cannot directly read into array */
1544     vals_ptr = vals;
1545     for (i=0; i<m; i++) {
1546       for (j=0; j<N; j++) {
1547         array[i + j*m] = *vals_ptr++;
1548       }
1549     }
1550 
1551     /* read in other processors and ship out */
1552     for (i=1; i<size; i++) {
1553       nz   = (rowners[i+1] - rowners[i])*N;
1554       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1555       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr);
1556     }
1557   } else {
1558     /* receive numeric values */
1559     ierr = PetscMalloc1(m*N,&vals);CHKERRQ(ierr);
1560 
1561     /* receive message of values*/
1562     ierr = MPIULong_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr);
1563 
1564     /* insert into matrix-by row (this is why cannot directly read into array */
1565     vals_ptr = vals;
1566     for (i=0; i<m; i++) {
1567       for (j=0; j<N; j++) {
1568         array[i + j*m] = *vals_ptr++;
1569       }
1570     }
1571   }
1572   ierr = MatDenseRestoreArray(newmat,&array);CHKERRQ(ierr);
1573   ierr = PetscFree(vals);CHKERRQ(ierr);
1574   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1575   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1576   PetscFunctionReturn(0);
1577 }
1578 
1579 #undef __FUNCT__
1580 #define __FUNCT__ "MatLoad_MPIDense"
1581 PetscErrorCode MatLoad_MPIDense(Mat newmat,PetscViewer viewer)
1582 {
1583   Mat_MPIDense   *a;
1584   PetscScalar    *vals,*svals;
1585   MPI_Comm       comm;
1586   MPI_Status     status;
1587   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,n,maxnz;
1588   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1589   PetscInt       *ourlens,*procsnz = 0,jj,*mycols,*smycols;
1590   PetscInt       i,nz,j,rstart,rend;
1591   int            fd;
1592   PetscErrorCode ierr;
1593 
1594   PetscFunctionBegin;
1595   /* force binary viewer to load .info file if it has not yet done so */
1596   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1597   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
1598   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1599   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1600   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1601   if (!rank) {
1602     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
1603     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1604   }
1605   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1606   M    = header[1]; N = header[2]; nz = header[3];
1607 
1608   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
1609   if (newmat->rmap->N < 0) newmat->rmap->N = M;
1610   if (newmat->cmap->N < 0) newmat->cmap->N = N;
1611 
1612   if (newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",M,newmat->rmap->N);
1613   if (newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",N,newmat->cmap->N);
1614 
1615   /*
1616        Handle case where matrix is stored on disk as a dense matrix
1617   */
1618   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1619     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);CHKERRQ(ierr);
1620     PetscFunctionReturn(0);
1621   }
1622 
1623   /* determine ownership of all rows */
1624   if (newmat->rmap->n < 0) {
1625     ierr = PetscMPIIntCast(M/size + ((M % size) > rank),&m);CHKERRQ(ierr);
1626   } else {
1627     ierr = PetscMPIIntCast(newmat->rmap->n,&m);CHKERRQ(ierr);
1628   }
1629   if (newmat->cmap->n < 0) {
1630     n = PETSC_DECIDE;
1631   } else {
1632     ierr = PetscMPIIntCast(newmat->cmap->n,&n);CHKERRQ(ierr);
1633   }
1634 
1635   ierr       = PetscMalloc1(size+2,&rowners);CHKERRQ(ierr);
1636   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1637   rowners[0] = 0;
1638   for (i=2; i<=size; i++) {
1639     rowners[i] += rowners[i-1];
1640   }
1641   rstart = rowners[rank];
1642   rend   = rowners[rank+1];
1643 
1644   /* distribute row lengths to all processors */
1645   ierr = PetscMalloc1(rend-rstart,&ourlens);CHKERRQ(ierr);
1646   if (!rank) {
1647     ierr = PetscMalloc1(M,&rowlengths);CHKERRQ(ierr);
1648     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1649     ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr);
1650     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1651     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1652     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1653   } else {
1654     ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1655   }
1656 
1657   if (!rank) {
1658     /* calculate the number of nonzeros on each processor */
1659     ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr);
1660     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1661     for (i=0; i<size; i++) {
1662       for (j=rowners[i]; j< rowners[i+1]; j++) {
1663         procsnz[i] += rowlengths[j];
1664       }
1665     }
1666     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1667 
1668     /* determine max buffer needed and allocate it */
1669     maxnz = 0;
1670     for (i=0; i<size; i++) {
1671       maxnz = PetscMax(maxnz,procsnz[i]);
1672     }
1673     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
1674 
1675     /* read in my part of the matrix column indices  */
1676     nz   = procsnz[0];
1677     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
1678     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1679 
1680     /* read in every one elses and ship off */
1681     for (i=1; i<size; i++) {
1682       nz   = procsnz[i];
1683       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1684       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1685     }
1686     ierr = PetscFree(cols);CHKERRQ(ierr);
1687   } else {
1688     /* determine buffer space needed for message */
1689     nz = 0;
1690     for (i=0; i<m; i++) {
1691       nz += ourlens[i];
1692     }
1693     ierr = PetscMalloc1(nz+1,&mycols);CHKERRQ(ierr);
1694 
1695     /* receive message of column indices*/
1696     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1697     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1698     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1699   }
1700 
1701   ierr = MatSetSizes(newmat,m,n,M,N);CHKERRQ(ierr);
1702   a = (Mat_MPIDense*)newmat->data;
1703   if (!a->A || !((Mat_SeqDense*)(a->A->data))->user_alloc) {
1704     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
1705   }
1706 
1707   if (!rank) {
1708     ierr = PetscMalloc1(maxnz,&vals);CHKERRQ(ierr);
1709 
1710     /* read in my part of the matrix numerical values  */
1711     nz   = procsnz[0];
1712     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1713 
1714     /* insert into matrix */
1715     jj      = rstart;
1716     smycols = mycols;
1717     svals   = vals;
1718     for (i=0; i<m; i++) {
1719       ierr     = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1720       smycols += ourlens[i];
1721       svals   += ourlens[i];
1722       jj++;
1723     }
1724 
1725     /* read in other processors and ship out */
1726     for (i=1; i<size; i++) {
1727       nz   = procsnz[i];
1728       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1729       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
1730     }
1731     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1732   } else {
1733     /* receive numeric values */
1734     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
1735 
1736     /* receive message of values*/
1737     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
1738     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1739     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1740 
1741     /* insert into matrix */
1742     jj      = rstart;
1743     smycols = mycols;
1744     svals   = vals;
1745     for (i=0; i<m; i++) {
1746       ierr     = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1747       smycols += ourlens[i];
1748       svals   += ourlens[i];
1749       jj++;
1750     }
1751   }
1752   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1753   ierr = PetscFree(vals);CHKERRQ(ierr);
1754   ierr = PetscFree(mycols);CHKERRQ(ierr);
1755   ierr = PetscFree(rowners);CHKERRQ(ierr);
1756 
1757   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1758   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1759   PetscFunctionReturn(0);
1760 }
1761 
1762 #undef __FUNCT__
1763 #define __FUNCT__ "MatEqual_MPIDense"
1764 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool  *flag)
1765 {
1766   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
1767   Mat            a,b;
1768   PetscBool      flg;
1769   PetscErrorCode ierr;
1770 
1771   PetscFunctionBegin;
1772   a    = matA->A;
1773   b    = matB->A;
1774   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
1775   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1776   PetscFunctionReturn(0);
1777 }
1778 
1779