xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 3923b477fd0dced8a2d147b4fb4519fe3af97d3f)
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4 */
5 
6 
7 #include <../src/mat/impls/dense/mpi/mpidense.h>    /*I   "petscmat.h"  I*/
8 
9 #undef __FUNCT__
10 #define __FUNCT__ "MatDenseGetLocalMatrix"
11 /*@
12 
13       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
14               matrix that represents the operator. For sequential matrices it returns itself.
15 
16     Input Parameter:
17 .      A - the Seq or MPI dense matrix
18 
19     Output Parameter:
20 .      B - the inner matrix
21 
22     Level: intermediate
23 
24 @*/
25 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
26 {
27   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
28   PetscErrorCode ierr;
29   PetscBool      flg;
30 
31   PetscFunctionBegin;
32   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
33   if (flg) {
34     *B = mat->A;
35   } else {
36     *B = A;
37   }
38   PetscFunctionReturn(0);
39 }
40 
41 #undef __FUNCT__
42 #define __FUNCT__ "MatGetRow_MPIDense"
43 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
44 {
45   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
46   PetscErrorCode ierr;
47   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
48 
49   PetscFunctionBegin;
50   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
51   lrow = row - rstart;
52   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr);
53   PetscFunctionReturn(0);
54 }
55 
56 #undef __FUNCT__
57 #define __FUNCT__ "MatRestoreRow_MPIDense"
58 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
59 {
60   PetscErrorCode ierr;
61 
62   PetscFunctionBegin;
63   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
64   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
65   PetscFunctionReturn(0);
66 }
67 
68 EXTERN_C_BEGIN
69 #undef __FUNCT__
70 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense"
71 PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
72 {
73   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
74   PetscErrorCode ierr;
75   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
76   PetscScalar    *array;
77   MPI_Comm       comm;
78   Mat            B;
79 
80   PetscFunctionBegin;
81   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
82 
83   ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr);
84   if (!B) {
85     ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
86     ierr = MatCreate(comm,&B);CHKERRQ(ierr);
87     ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr);
88     ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
89     ierr = MatDenseGetArray(mdn->A,&array);CHKERRQ(ierr);
90     ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr);
91     ierr = MatDenseRestoreArray(mdn->A,&array);CHKERRQ(ierr);
92     ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
93     ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
94     ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr);
95     *a = B;
96     ierr = MatDestroy(&B);CHKERRQ(ierr);
97   } else {
98     *a = B;
99   }
100   PetscFunctionReturn(0);
101 }
102 EXTERN_C_END
103 
104 #undef __FUNCT__
105 #define __FUNCT__ "MatSetValues_MPIDense"
106 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
107 {
108   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
109   PetscErrorCode ierr;
110   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
111   PetscBool      roworiented = A->roworiented;
112 
113   PetscFunctionBegin;
114   if (v) PetscValidScalarPointer(v,6);
115   for (i=0; i<m; i++) {
116     if (idxm[i] < 0) continue;
117     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
118     if (idxm[i] >= rstart && idxm[i] < rend) {
119       row = idxm[i] - rstart;
120       if (roworiented) {
121         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
122       } else {
123         for (j=0; j<n; j++) {
124           if (idxn[j] < 0) continue;
125           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
126           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
127         }
128       }
129     } else {
130       if (!A->donotstash) {
131         mat->assembled = PETSC_FALSE;
132         if (roworiented) {
133           ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
134         } else {
135           ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
136         }
137       }
138     }
139   }
140   PetscFunctionReturn(0);
141 }
142 
143 #undef __FUNCT__
144 #define __FUNCT__ "MatGetValues_MPIDense"
145 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
146 {
147   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
148   PetscErrorCode ierr;
149   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
150 
151   PetscFunctionBegin;
152   for (i=0; i<m; i++) {
153     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
154     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
155     if (idxm[i] >= rstart && idxm[i] < rend) {
156       row = idxm[i] - rstart;
157       for (j=0; j<n; j++) {
158         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
159         if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
160         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
161       }
162     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
163   }
164   PetscFunctionReturn(0);
165 }
166 
167 #undef __FUNCT__
168 #define __FUNCT__ "MatDenseGetArray_MPIDense"
169 PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[])
170 {
171   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
172   PetscErrorCode ierr;
173 
174   PetscFunctionBegin;
175   ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr);
176   PetscFunctionReturn(0);
177 }
178 
179 #undef __FUNCT__
180 #define __FUNCT__ "MatGetSubMatrix_MPIDense"
181 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
182 {
183   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data,*newmatd;
184   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
185   PetscErrorCode ierr;
186   PetscInt       i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
187   const PetscInt *irow,*icol;
188   PetscScalar    *av,*bv,*v = lmat->v;
189   Mat            newmat;
190   IS             iscol_local;
191 
192   PetscFunctionBegin;
193   ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
194   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
195   ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr);
196   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
197   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
198   ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */
199 
200   /* No parallel redistribution currently supported! Should really check each index set
201      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
202      original matrix! */
203 
204   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
205   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
206 
207   /* Check submatrix call */
208   if (scall == MAT_REUSE_MATRIX) {
209     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
210     /* Really need to test rows and column sizes! */
211     newmat = *B;
212   } else {
213     /* Create and fill new matrix */
214     ierr = MatCreate(((PetscObject)A)->comm,&newmat);CHKERRQ(ierr);
215     ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
216     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
217     ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr);
218   }
219 
220   /* Now extract the data pointers and do the copy, column at a time */
221   newmatd = (Mat_MPIDense*)newmat->data;
222   bv      = ((Mat_SeqDense *)newmatd->A->data)->v;
223 
224   for (i=0; i<Ncols; i++) {
225     av = v + ((Mat_SeqDense *)mat->A->data)->lda*icol[i];
226     for (j=0; j<nrows; j++) {
227       *bv++ = av[irow[j] - rstart];
228     }
229   }
230 
231   /* Assemble the matrices so that the correct flags are set */
232   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
233   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
234 
235   /* Free work space */
236   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
237   ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr);
238   ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
239   *B = newmat;
240   PetscFunctionReturn(0);
241 }
242 
243 #undef __FUNCT__
244 #define __FUNCT__ "MatDenseRestoreArray_MPIDense"
245 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[])
246 {
247   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
248   PetscErrorCode ierr;
249 
250   PetscFunctionBegin;
251   ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr);
252   PetscFunctionReturn(0);
253 }
254 
255 #undef __FUNCT__
256 #define __FUNCT__ "MatAssemblyBegin_MPIDense"
257 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
258 {
259   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
260   MPI_Comm       comm = ((PetscObject)mat)->comm;
261   PetscErrorCode ierr;
262   PetscInt       nstash,reallocs;
263   InsertMode     addv;
264 
265   PetscFunctionBegin;
266   /* make sure all processors are either in INSERTMODE or ADDMODE */
267   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
268   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
269   mat->insertmode = addv; /* in case this processor had no cache */
270 
271   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
272   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
273   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
274   PetscFunctionReturn(0);
275 }
276 
277 #undef __FUNCT__
278 #define __FUNCT__ "MatAssemblyEnd_MPIDense"
279 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
280 {
281   Mat_MPIDense    *mdn=(Mat_MPIDense*)mat->data;
282   PetscErrorCode  ierr;
283   PetscInt        i,*row,*col,flg,j,rstart,ncols;
284   PetscMPIInt     n;
285   PetscScalar     *val;
286   InsertMode      addv=mat->insertmode;
287 
288   PetscFunctionBegin;
289   /*  wait on receives */
290   while (1) {
291     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
292     if (!flg) break;
293 
294     for (i=0; i<n;) {
295       /* Now identify the consecutive vals belonging to the same row */
296       for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
297       if (j < n) ncols = j-i;
298       else       ncols = n-i;
299       /* Now assemble all these values with a single function call */
300       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
301       i = j;
302     }
303   }
304   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
305 
306   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
307   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
308 
309   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
310     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
311   }
312   PetscFunctionReturn(0);
313 }
314 
315 #undef __FUNCT__
316 #define __FUNCT__ "MatZeroEntries_MPIDense"
317 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
318 {
319   PetscErrorCode ierr;
320   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
321 
322   PetscFunctionBegin;
323   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
324   PetscFunctionReturn(0);
325 }
326 
327 /* the code does not do the diagonal entries correctly unless the
328    matrix is square and the column and row owerships are identical.
329    This is a BUG. The only way to fix it seems to be to access
330    mdn->A and mdn->B directly and not through the MatZeroRows()
331    routine.
332 */
333 #undef __FUNCT__
334 #define __FUNCT__ "MatZeroRows_MPIDense"
335 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
336 {
337   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
338   PetscErrorCode    ierr;
339   PetscInt          i,*owners = A->rmap->range;
340   PetscInt          *nprocs,j,idx,nsends;
341   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
342   PetscInt          *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source;
343   PetscInt          *lens,*lrows,*values;
344   PetscMPIInt       n,imdex,rank = l->rank,size = l->size;
345   MPI_Comm          comm = ((PetscObject)A)->comm;
346   MPI_Request       *send_waits,*recv_waits;
347   MPI_Status        recv_status,*send_status;
348   PetscBool         found;
349   const PetscScalar *xx;
350   PetscScalar       *bb;
351 
352   PetscFunctionBegin;
353   if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Only handles square matrices");
354   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership");
355   /*  first count number of contributors to each processor */
356   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
357   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
358   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
359   for (i=0; i<N; i++) {
360     idx = rows[i];
361     found = PETSC_FALSE;
362     for (j=0; j<size; j++) {
363       if (idx >= owners[j] && idx < owners[j+1]) {
364         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
365       }
366     }
367     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
368   }
369   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
370 
371   /* inform other processors of number of messages and max length*/
372   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
373 
374   /* post receives:   */
375   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
376   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
377   for (i=0; i<nrecvs; i++) {
378     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
379   }
380 
381   /* do sends:
382       1) starts[i] gives the starting index in svalues for stuff going to
383          the ith processor
384   */
385   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
386   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
387   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
388   starts[0]  = 0;
389   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
390   for (i=0; i<N; i++) {
391     svalues[starts[owner[i]]++] = rows[i];
392   }
393 
394   starts[0] = 0;
395   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
396   count = 0;
397   for (i=0; i<size; i++) {
398     if (nprocs[2*i+1]) {
399       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
400     }
401   }
402   ierr = PetscFree(starts);CHKERRQ(ierr);
403 
404   base = owners[rank];
405 
406   /*  wait on receives */
407   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
408   count  = nrecvs;
409   slen   = 0;
410   while (count) {
411     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
412     /* unpack receives into our local space */
413     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
414     source[imdex]  = recv_status.MPI_SOURCE;
415     lens[imdex]    = n;
416     slen += n;
417     count--;
418   }
419   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
420 
421   /* move the data into the send scatter */
422   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
423   count = 0;
424   for (i=0; i<nrecvs; i++) {
425     values = rvalues + i*nmax;
426     for (j=0; j<lens[i]; j++) {
427       lrows[count++] = values[j] - base;
428     }
429   }
430   ierr = PetscFree(rvalues);CHKERRQ(ierr);
431   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
432   ierr = PetscFree(owner);CHKERRQ(ierr);
433   ierr = PetscFree(nprocs);CHKERRQ(ierr);
434 
435   /* fix right hand side if needed */
436   if (x && b) {
437     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
438     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
439     for (i=0; i<slen; i++) {
440       bb[lrows[i]] = diag*xx[lrows[i]];
441     }
442     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
443     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
444   }
445 
446   /* actually zap the local rows */
447   ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
448   if (diag != 0.0) {
449     Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data;
450     PetscInt      m = ll->lda, i;
451 
452     for (i=0; i<slen; i++) {
453       ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag;
454     }
455   }
456   ierr = PetscFree(lrows);CHKERRQ(ierr);
457 
458   /* wait on sends */
459   if (nsends) {
460     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
461     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
462     ierr = PetscFree(send_status);CHKERRQ(ierr);
463   }
464   ierr = PetscFree(send_waits);CHKERRQ(ierr);
465   ierr = PetscFree(svalues);CHKERRQ(ierr);
466 
467   PetscFunctionReturn(0);
468 }
469 
470 #undef __FUNCT__
471 #define __FUNCT__ "MatMult_MPIDense"
472 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
473 {
474   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
475   PetscErrorCode ierr;
476 
477   PetscFunctionBegin;
478   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
479   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
480   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
481   PetscFunctionReturn(0);
482 }
483 
484 #undef __FUNCT__
485 #define __FUNCT__ "MatMultAdd_MPIDense"
486 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
487 {
488   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
489   PetscErrorCode ierr;
490 
491   PetscFunctionBegin;
492   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
493   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
494   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
495   PetscFunctionReturn(0);
496 }
497 
498 #undef __FUNCT__
499 #define __FUNCT__ "MatMultTranspose_MPIDense"
500 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
501 {
502   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
503   PetscErrorCode ierr;
504   PetscScalar    zero = 0.0;
505 
506   PetscFunctionBegin;
507   ierr = VecSet(yy,zero);CHKERRQ(ierr);
508   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
509   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
510   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
511   PetscFunctionReturn(0);
512 }
513 
514 #undef __FUNCT__
515 #define __FUNCT__ "MatMultTransposeAdd_MPIDense"
516 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
517 {
518   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
519   PetscErrorCode ierr;
520 
521   PetscFunctionBegin;
522   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
523   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
524   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
525   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
526   PetscFunctionReturn(0);
527 }
528 
529 #undef __FUNCT__
530 #define __FUNCT__ "MatGetDiagonal_MPIDense"
531 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
532 {
533   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
534   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
535   PetscErrorCode ierr;
536   PetscInt       len,i,n,m = A->rmap->n,radd;
537   PetscScalar    *x,zero = 0.0;
538 
539   PetscFunctionBegin;
540   ierr = VecSet(v,zero);CHKERRQ(ierr);
541   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
542   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
543   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
544   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
545   radd = A->rmap->rstart*m;
546   for (i=0; i<len; i++) {
547     x[i] = aloc->v[radd + i*m + i];
548   }
549   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
550   PetscFunctionReturn(0);
551 }
552 
553 #undef __FUNCT__
554 #define __FUNCT__ "MatDestroy_MPIDense"
555 PetscErrorCode MatDestroy_MPIDense(Mat mat)
556 {
557   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
558   PetscErrorCode ierr;
559 
560   PetscFunctionBegin;
561 
562 #if defined(PETSC_USE_LOG)
563   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
564 #endif
565   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
566   ierr = MatDestroy(&mdn->A);CHKERRQ(ierr);
567   ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr);
568   ierr = VecScatterDestroy(&mdn->Mvctx);CHKERRQ(ierr);
569 
570   ierr = PetscFree(mat->data);CHKERRQ(ierr);
571   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
572   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
573   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
574   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
575   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
576   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
577   PetscFunctionReturn(0);
578 }
579 
580 #undef __FUNCT__
581 #define __FUNCT__ "MatView_MPIDense_Binary"
582 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
583 {
584   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
585   PetscErrorCode    ierr;
586   PetscViewerFormat format;
587   int               fd;
588   PetscInt          header[4],mmax,N = mat->cmap->N,i,j,m,k;
589   PetscMPIInt       rank,tag  = ((PetscObject)viewer)->tag,size;
590   PetscScalar       *work,*v,*vv;
591   Mat_SeqDense      *a = (Mat_SeqDense*)mdn->A->data;
592 
593   PetscFunctionBegin;
594   if (mdn->size == 1) {
595     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
596   } else {
597     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
598     ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
599     ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
600 
601     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
602     if (format == PETSC_VIEWER_NATIVE) {
603 
604       if (!rank) {
605         /* store the matrix as a dense matrix */
606         header[0] = MAT_FILE_CLASSID;
607         header[1] = mat->rmap->N;
608         header[2] = N;
609         header[3] = MATRIX_BINARY_FORMAT_DENSE;
610         ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
611 
612         /* get largest work array needed for transposing array */
613         mmax = mat->rmap->n;
614         for (i=1; i<size; i++) {
615           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
616         }
617         ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&work);CHKERRQ(ierr);
618 
619         /* write out local array, by rows */
620         m    = mat->rmap->n;
621         v    = a->v;
622         for (j=0; j<N; j++) {
623           for (i=0; i<m; i++) {
624             work[j + i*N] = *v++;
625           }
626         }
627         ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
628         /* get largest work array to receive messages from other processes, excludes process zero */
629         mmax = 0;
630         for (i=1; i<size; i++) {
631           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
632         }
633         ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&vv);CHKERRQ(ierr);
634         for (k = 1; k < size; k++) {
635           v    = vv;
636           m    = mat->rmap->range[k+1] - mat->rmap->range[k];
637           ierr = MPIULong_Recv(v,m*N,MPIU_SCALAR,k,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
638 
639           for (j = 0; j < N; j++) {
640             for (i = 0; i < m; i++) {
641               work[j + i*N] = *v++;
642             }
643           }
644           ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
645         }
646         ierr = PetscFree(work);CHKERRQ(ierr);
647         ierr = PetscFree(vv);CHKERRQ(ierr);
648       } else {
649         ierr = MPIULong_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
650       }
651     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerSetFormat(viewer,PETSC_VIEWER_NATIVE)");
652   }
653   PetscFunctionReturn(0);
654 }
655 
656 #undef __FUNCT__
657 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket"
658 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
659 {
660   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
661   PetscErrorCode    ierr;
662   PetscMPIInt       size = mdn->size,rank = mdn->rank;
663   PetscViewerType   vtype;
664   PetscBool         iascii,isdraw;
665   PetscViewer       sviewer;
666   PetscViewerFormat format;
667 
668   PetscFunctionBegin;
669   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
670   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
671   if (iascii) {
672     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
673     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
674     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
675       MatInfo info;
676       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
677       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
678       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
679       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
680       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
681       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
682       PetscFunctionReturn(0);
683     } else if (format == PETSC_VIEWER_ASCII_INFO) {
684       PetscFunctionReturn(0);
685     }
686   } else if (isdraw) {
687     PetscDraw  draw;
688     PetscBool  isnull;
689 
690     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
691     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
692     if (isnull) PetscFunctionReturn(0);
693   }
694 
695   if (size == 1) {
696     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
697   } else {
698     /* assemble the entire matrix onto first processor. */
699     Mat         A;
700     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
701     PetscInt    *cols;
702     PetscScalar *vals;
703 
704     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
705     if (!rank) {
706       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
707     } else {
708       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
709     }
710     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
711     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
712     ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL);
713     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
714 
715     /* Copy the matrix ... This isn't the most efficient means,
716        but it's quick for now */
717     A->insertmode = INSERT_VALUES;
718     row = mat->rmap->rstart; m = mdn->A->rmap->n;
719     for (i=0; i<m; i++) {
720       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
721       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
722       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
723       row++;
724     }
725 
726     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
727     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
728     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
729     if (!rank) {
730       ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
731       /* Set the type name to MATMPIDense so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqDense_ASCII()*/
732       PetscStrcpy(((PetscObject)((Mat_MPIDense*)(A->data))->A)->type_name,MATMPIDENSE);
733       ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
734     }
735     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
736     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
737     ierr = MatDestroy(&A);CHKERRQ(ierr);
738   }
739   PetscFunctionReturn(0);
740 }
741 
742 #undef __FUNCT__
743 #define __FUNCT__ "MatView_MPIDense"
744 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
745 {
746   PetscErrorCode ierr;
747   PetscBool      iascii,isbinary,isdraw,issocket;
748 
749   PetscFunctionBegin;
750 
751   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
752   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
753   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
754   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
755 
756   if (iascii || issocket || isdraw) {
757     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
758   } else if (isbinary) {
759     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
760   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name);
761   PetscFunctionReturn(0);
762 }
763 
764 #undef __FUNCT__
765 #define __FUNCT__ "MatGetInfo_MPIDense"
766 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
767 {
768   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
769   Mat            mdn = mat->A;
770   PetscErrorCode ierr;
771   PetscReal      isend[5],irecv[5];
772 
773   PetscFunctionBegin;
774   info->block_size     = 1.0;
775   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
776   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
777   isend[3] = info->memory;  isend[4] = info->mallocs;
778   if (flag == MAT_LOCAL) {
779     info->nz_used      = isend[0];
780     info->nz_allocated = isend[1];
781     info->nz_unneeded  = isend[2];
782     info->memory       = isend[3];
783     info->mallocs      = isend[4];
784   } else if (flag == MAT_GLOBAL_MAX) {
785     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)A)->comm);CHKERRQ(ierr);
786     info->nz_used      = irecv[0];
787     info->nz_allocated = irecv[1];
788     info->nz_unneeded  = irecv[2];
789     info->memory       = irecv[3];
790     info->mallocs      = irecv[4];
791   } else if (flag == MAT_GLOBAL_SUM) {
792     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
793     info->nz_used      = irecv[0];
794     info->nz_allocated = irecv[1];
795     info->nz_unneeded  = irecv[2];
796     info->memory       = irecv[3];
797     info->mallocs      = irecv[4];
798   }
799   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
800   info->fill_ratio_needed = 0;
801   info->factor_mallocs    = 0;
802   PetscFunctionReturn(0);
803 }
804 
805 #undef __FUNCT__
806 #define __FUNCT__ "MatSetOption_MPIDense"
807 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool  flg)
808 {
809   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
810   PetscErrorCode ierr;
811 
812   PetscFunctionBegin;
813   switch (op) {
814   case MAT_NEW_NONZERO_LOCATIONS:
815   case MAT_NEW_NONZERO_LOCATION_ERR:
816   case MAT_NEW_NONZERO_ALLOCATION_ERR:
817     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
818     break;
819   case MAT_ROW_ORIENTED:
820     a->roworiented = flg;
821     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
822     break;
823   case MAT_NEW_DIAGONALS:
824   case MAT_KEEP_NONZERO_PATTERN:
825   case MAT_USE_HASH_TABLE:
826     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
827     break;
828   case MAT_IGNORE_OFF_PROC_ENTRIES:
829     a->donotstash = flg;
830     break;
831   case MAT_SYMMETRIC:
832   case MAT_STRUCTURALLY_SYMMETRIC:
833   case MAT_HERMITIAN:
834   case MAT_SYMMETRY_ETERNAL:
835   case MAT_IGNORE_LOWER_TRIANGULAR:
836     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
837     break;
838   default:
839     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
840   }
841   PetscFunctionReturn(0);
842 }
843 
844 
845 #undef __FUNCT__
846 #define __FUNCT__ "MatDiagonalScale_MPIDense"
847 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
848 {
849   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
850   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
851   PetscScalar    *l,*r,x,*v;
852   PetscErrorCode ierr;
853   PetscInt       i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n;
854 
855   PetscFunctionBegin;
856   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
857   if (ll) {
858     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
859     if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
860     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
861     for (i=0; i<m; i++) {
862       x = l[i];
863       v = mat->v + i;
864       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
865     }
866     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
867     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
868   }
869   if (rr) {
870     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
871     if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
872     ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
873     ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
874     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
875     for (i=0; i<n; i++) {
876       x = r[i];
877       v = mat->v + i*m;
878       for (j=0; j<m; j++) { (*v++) *= x;}
879     }
880     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
881     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
882   }
883   PetscFunctionReturn(0);
884 }
885 
886 #undef __FUNCT__
887 #define __FUNCT__ "MatNorm_MPIDense"
888 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
889 {
890   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
891   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
892   PetscErrorCode ierr;
893   PetscInt       i,j;
894   PetscReal      sum = 0.0;
895   PetscScalar    *v = mat->v;
896 
897   PetscFunctionBegin;
898   if (mdn->size == 1) {
899     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
900   } else {
901     if (type == NORM_FROBENIUS) {
902       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
903 #if defined(PETSC_USE_COMPLEX)
904         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
905 #else
906         sum += (*v)*(*v); v++;
907 #endif
908       }
909       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
910       *nrm = PetscSqrtReal(*nrm);
911       ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr);
912     } else if (type == NORM_1) {
913       PetscReal *tmp,*tmp2;
914       ierr = PetscMalloc2(A->cmap->N,PetscReal,&tmp,A->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr);
915       ierr = PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
916       ierr = PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
917       *nrm = 0.0;
918       v = mat->v;
919       for (j=0; j<mdn->A->cmap->n; j++) {
920         for (i=0; i<mdn->A->rmap->n; i++) {
921           tmp[j] += PetscAbsScalar(*v);  v++;
922         }
923       }
924       ierr = MPI_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
925       for (j=0; j<A->cmap->N; j++) {
926         if (tmp2[j] > *nrm) *nrm = tmp2[j];
927       }
928       ierr = PetscFree2(tmp,tmp);CHKERRQ(ierr);
929       ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr);
930     } else if (type == NORM_INFINITY) { /* max row norm */
931       PetscReal ntemp;
932       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
933       ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,((PetscObject)A)->comm);CHKERRQ(ierr);
934     } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"No support for two norm");
935   }
936   PetscFunctionReturn(0);
937 }
938 
939 #undef __FUNCT__
940 #define __FUNCT__ "MatTranspose_MPIDense"
941 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
942 {
943   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
944   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
945   Mat            B;
946   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
947   PetscErrorCode ierr;
948   PetscInt       j,i;
949   PetscScalar    *v;
950 
951   PetscFunctionBegin;
952   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports square matrix only in-place");
953   if (reuse == MAT_INITIAL_MATRIX || A == *matout) {
954     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
955     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
956     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
957     ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr);
958   } else {
959     B = *matout;
960   }
961 
962   m = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v;
963   ierr = PetscMalloc(m*sizeof(PetscInt),&rwork);CHKERRQ(ierr);
964   for (i=0; i<m; i++) rwork[i] = rstart + i;
965   for (j=0; j<n; j++) {
966     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
967     v   += m;
968   }
969   ierr = PetscFree(rwork);CHKERRQ(ierr);
970   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
971   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
972   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
973     *matout = B;
974   } else {
975     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
976   }
977   PetscFunctionReturn(0);
978 }
979 
980 
981 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
982 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
983 
984 #undef __FUNCT__
985 #define __FUNCT__ "MatSetUp_MPIDense"
986 PetscErrorCode MatSetUp_MPIDense(Mat A)
987 {
988   PetscErrorCode ierr;
989 
990   PetscFunctionBegin;
991   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
992   PetscFunctionReturn(0);
993 }
994 
995 #undef __FUNCT__
996 #define __FUNCT__ "MatAXPY_MPIDense"
997 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
998 {
999   PetscErrorCode ierr;
1000   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
1001 
1002   PetscFunctionBegin;
1003   ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr);
1004   PetscFunctionReturn(0);
1005 }
1006 
1007 #undef __FUNCT__
1008 #define __FUNCT__ "MatConjugate_MPIDense"
1009 PetscErrorCode  MatConjugate_MPIDense(Mat mat)
1010 {
1011   Mat_MPIDense   *a = (Mat_MPIDense *)mat->data;
1012   PetscErrorCode ierr;
1013 
1014   PetscFunctionBegin;
1015   ierr = MatConjugate(a->A);CHKERRQ(ierr);
1016   PetscFunctionReturn(0);
1017 }
1018 
1019 #undef __FUNCT__
1020 #define __FUNCT__ "MatRealPart_MPIDense"
1021 PetscErrorCode MatRealPart_MPIDense(Mat A)
1022 {
1023   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1024   PetscErrorCode ierr;
1025 
1026   PetscFunctionBegin;
1027   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1028   PetscFunctionReturn(0);
1029 }
1030 
1031 #undef __FUNCT__
1032 #define __FUNCT__ "MatImaginaryPart_MPIDense"
1033 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1034 {
1035   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1036   PetscErrorCode ierr;
1037 
1038   PetscFunctionBegin;
1039   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1040   PetscFunctionReturn(0);
1041 }
1042 
1043 extern PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*);
1044 #undef __FUNCT__
1045 #define __FUNCT__ "MatGetColumnNorms_MPIDense"
1046 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms)
1047 {
1048   PetscErrorCode ierr;
1049   PetscInt       i,n;
1050   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
1051   PetscReal      *work;
1052 
1053   PetscFunctionBegin;
1054   ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr);
1055   ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr);
1056   ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr);
1057   if (type == NORM_2) {
1058     for (i=0; i<n; i++) work[i] *= work[i];
1059   }
1060   if (type == NORM_INFINITY) {
1061     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
1062   } else {
1063     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
1064   }
1065   ierr = PetscFree(work);CHKERRQ(ierr);
1066   if (type == NORM_2) {
1067     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
1068   }
1069   PetscFunctionReturn(0);
1070 }
1071 
1072 #undef __FUNCT__
1073 #define __FUNCT__ "MatSetRandom_MPIDense"
1074 static PetscErrorCode  MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1075 {
1076   Mat_MPIDense   *d = (Mat_MPIDense*)x->data;
1077   PetscErrorCode ierr;
1078   PetscScalar    *a;
1079   PetscInt       m,n,i;
1080 
1081   PetscFunctionBegin;
1082   ierr = MatGetSize(d->A,&m,&n);CHKERRQ(ierr);
1083   ierr = MatDenseGetArray(d->A,&a);CHKERRQ(ierr);
1084   for (i=0; i<m*n; i++) {
1085     ierr = PetscRandomGetValue(rctx,a+i);CHKERRQ(ierr);
1086   }
1087   ierr = MatDenseRestoreArray(d->A,&a);CHKERRQ(ierr);
1088   PetscFunctionReturn(0);
1089 }
1090 
1091 /* -------------------------------------------------------------------*/
1092 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
1093        MatGetRow_MPIDense,
1094        MatRestoreRow_MPIDense,
1095        MatMult_MPIDense,
1096 /* 4*/ MatMultAdd_MPIDense,
1097        MatMultTranspose_MPIDense,
1098        MatMultTransposeAdd_MPIDense,
1099        0,
1100        0,
1101        0,
1102 /*10*/ 0,
1103        0,
1104        0,
1105        0,
1106        MatTranspose_MPIDense,
1107 /*15*/ MatGetInfo_MPIDense,
1108        MatEqual_MPIDense,
1109        MatGetDiagonal_MPIDense,
1110        MatDiagonalScale_MPIDense,
1111        MatNorm_MPIDense,
1112 /*20*/ MatAssemblyBegin_MPIDense,
1113        MatAssemblyEnd_MPIDense,
1114        MatSetOption_MPIDense,
1115        MatZeroEntries_MPIDense,
1116 /*24*/ MatZeroRows_MPIDense,
1117        0,
1118        0,
1119        0,
1120        0,
1121 /*29*/ MatSetUp_MPIDense,
1122        0,
1123        0,
1124        0,
1125        0,
1126 /*34*/ MatDuplicate_MPIDense,
1127        0,
1128        0,
1129        0,
1130        0,
1131 /*39*/ MatAXPY_MPIDense,
1132        MatGetSubMatrices_MPIDense,
1133        0,
1134        MatGetValues_MPIDense,
1135        0,
1136 /*44*/ 0,
1137        MatScale_MPIDense,
1138        0,
1139        0,
1140        0,
1141 /*49*/ MatSetRandom_MPIDense,
1142        0,
1143        0,
1144        0,
1145        0,
1146 /*54*/ 0,
1147        0,
1148        0,
1149        0,
1150        0,
1151 /*59*/ MatGetSubMatrix_MPIDense,
1152        MatDestroy_MPIDense,
1153        MatView_MPIDense,
1154        0,
1155        0,
1156 /*64*/ 0,
1157        0,
1158        0,
1159        0,
1160        0,
1161 /*69*/ 0,
1162        0,
1163        0,
1164        0,
1165        0,
1166 /*74*/ 0,
1167        0,
1168        0,
1169        0,
1170        0,
1171 /*79*/ 0,
1172        0,
1173        0,
1174        0,
1175 /*83*/ MatLoad_MPIDense,
1176        0,
1177        0,
1178        0,
1179        0,
1180        0,
1181 /*89*/
1182        0,
1183        0,
1184        0,
1185        0,
1186        0,
1187 /*94*/ 0,
1188        0,
1189        0,
1190        0,
1191        0,
1192 /*99*/ 0,
1193        0,
1194        0,
1195        MatConjugate_MPIDense,
1196        0,
1197 /*104*/0,
1198        MatRealPart_MPIDense,
1199        MatImaginaryPart_MPIDense,
1200        0,
1201        0,
1202 /*109*/0,
1203        0,
1204        0,
1205        0,
1206        0,
1207 /*114*/0,
1208        0,
1209        0,
1210        0,
1211        0,
1212 /*119*/0,
1213        0,
1214        0,
1215        0,
1216        0,
1217 /*124*/0,
1218        MatGetColumnNorms_MPIDense
1219 };
1220 
1221 EXTERN_C_BEGIN
1222 #undef __FUNCT__
1223 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense"
1224 PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1225 {
1226   Mat_MPIDense   *a;
1227   PetscErrorCode ierr;
1228 
1229   PetscFunctionBegin;
1230   mat->preallocated = PETSC_TRUE;
1231   /* Note:  For now, when data is specified above, this assumes the user correctly
1232    allocates the local dense storage space.  We should add error checking. */
1233 
1234   a    = (Mat_MPIDense*)mat->data;
1235   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
1236   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
1237   a->nvec = mat->cmap->n;
1238 
1239   ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1240   ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr);
1241   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
1242   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1243   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1244   PetscFunctionReturn(0);
1245 }
1246 EXTERN_C_END
1247 
1248 EXTERN_C_BEGIN
1249 #undef __FUNCT__
1250 #define __FUNCT__ "MatCreate_MPIDense"
1251 PetscErrorCode  MatCreate_MPIDense(Mat mat)
1252 {
1253   Mat_MPIDense   *a;
1254   PetscErrorCode ierr;
1255 
1256   PetscFunctionBegin;
1257   ierr              = PetscNewLog(mat,Mat_MPIDense,&a);CHKERRQ(ierr);
1258   mat->data         = (void*)a;
1259   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1260 
1261   mat->insertmode = NOT_SET_VALUES;
1262   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&a->rank);CHKERRQ(ierr);
1263   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&a->size);CHKERRQ(ierr);
1264 
1265   /* build cache for off array entries formed */
1266   a->donotstash = PETSC_FALSE;
1267   ierr = MatStashCreate_Private(((PetscObject)mat)->comm,1,&mat->stash);CHKERRQ(ierr);
1268 
1269   /* stuff used for matrix vector multiply */
1270   a->lvec        = 0;
1271   a->Mvctx       = 0;
1272   a->roworiented = PETSC_TRUE;
1273 
1274   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatDenseGetArray_C","MatDenseGetArray_MPIDense",MatDenseGetArray_MPIDense);CHKERRQ(ierr);
1275   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatDenseRestoreArray_C","MatDenseRestoreArray_MPIDense",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr);
1276 
1277   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
1278                                      "MatGetDiagonalBlock_MPIDense",
1279                                      MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr);
1280   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C",
1281                                      "MatMPIDenseSetPreallocation_MPIDense",
1282                                      MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1283   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",
1284                                      "MatMatMult_MPIAIJ_MPIDense",
1285                                       MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1286   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",
1287                                      "MatMatMultSymbolic_MPIAIJ_MPIDense",
1288                                       MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1289   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",
1290                                      "MatMatMultNumeric_MPIAIJ_MPIDense",
1291                                       MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1292   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1293 
1294   PetscFunctionReturn(0);
1295 }
1296 EXTERN_C_END
1297 
1298 /*MC
1299    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1300 
1301    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1302    and MATMPIDENSE otherwise.
1303 
1304    Options Database Keys:
1305 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1306 
1307   Level: beginner
1308 
1309 
1310 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1311 M*/
1312 
1313 #undef __FUNCT__
1314 #define __FUNCT__ "MatMPIDenseSetPreallocation"
1315 /*@C
1316    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1317 
1318    Not collective
1319 
1320    Input Parameters:
1321 .  A - the matrix
1322 -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
1323    to control all matrix memory allocation.
1324 
1325    Notes:
1326    The dense format is fully compatible with standard Fortran 77
1327    storage by columns.
1328 
1329    The data input variable is intended primarily for Fortran programmers
1330    who wish to allocate their own matrix memory space.  Most users should
1331    set data=PETSC_NULL.
1332 
1333    Level: intermediate
1334 
1335 .keywords: matrix,dense, parallel
1336 
1337 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1338 @*/
1339 PetscErrorCode  MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data)
1340 {
1341   PetscErrorCode ierr;
1342 
1343   PetscFunctionBegin;
1344   ierr = PetscTryMethod(mat,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar *),(mat,data));CHKERRQ(ierr);
1345   PetscFunctionReturn(0);
1346 }
1347 
1348 #undef __FUNCT__
1349 #define __FUNCT__ "MatCreateDense"
1350 /*@C
1351    MatCreateDense - Creates a parallel matrix in dense format.
1352 
1353    Collective on MPI_Comm
1354 
1355    Input Parameters:
1356 +  comm - MPI communicator
1357 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1358 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1359 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1360 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1361 -  data - optional location of matrix data.  Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1362    to control all matrix memory allocation.
1363 
1364    Output Parameter:
1365 .  A - the matrix
1366 
1367    Notes:
1368    The dense format is fully compatible with standard Fortran 77
1369    storage by columns.
1370 
1371    The data input variable is intended primarily for Fortran programmers
1372    who wish to allocate their own matrix memory space.  Most users should
1373    set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users).
1374 
1375    The user MUST specify either the local or global matrix dimensions
1376    (possibly both).
1377 
1378    Level: intermediate
1379 
1380 .keywords: matrix,dense, parallel
1381 
1382 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1383 @*/
1384 PetscErrorCode  MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1385 {
1386   PetscErrorCode ierr;
1387   PetscMPIInt    size;
1388 
1389   PetscFunctionBegin;
1390   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1391   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1392   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1393   if (size > 1) {
1394     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1395     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1396   } else {
1397     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1398     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1399   }
1400   PetscFunctionReturn(0);
1401 }
1402 
1403 #undef __FUNCT__
1404 #define __FUNCT__ "MatDuplicate_MPIDense"
1405 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1406 {
1407   Mat            mat;
1408   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
1409   PetscErrorCode ierr;
1410 
1411   PetscFunctionBegin;
1412   *newmat       = 0;
1413   ierr = MatCreate(((PetscObject)A)->comm,&mat);CHKERRQ(ierr);
1414   ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1415   ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1416   a                 = (Mat_MPIDense*)mat->data;
1417   ierr              = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1418 
1419   mat->factortype   = A->factortype;
1420   mat->assembled    = PETSC_TRUE;
1421   mat->preallocated = PETSC_TRUE;
1422 
1423   a->size           = oldmat->size;
1424   a->rank           = oldmat->rank;
1425   mat->insertmode   = NOT_SET_VALUES;
1426   a->nvec           = oldmat->nvec;
1427   a->donotstash     = oldmat->donotstash;
1428 
1429   ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr);
1430   ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr);
1431 
1432   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1433   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1434   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1435 
1436   *newmat = mat;
1437   PetscFunctionReturn(0);
1438 }
1439 
1440 #undef __FUNCT__
1441 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile"
1442 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat,PetscInt sizesset)
1443 {
1444   PetscErrorCode ierr;
1445   PetscMPIInt    rank,size;
1446   PetscInt       *rowners,i,m,nz,j;
1447   PetscScalar    *array,*vals,*vals_ptr;
1448 
1449   PetscFunctionBegin;
1450   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1451   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1452 
1453   /* determine ownership of all rows */
1454   if (newmat->rmap->n < 0) m          = M/size + ((M % size) > rank);
1455   else m = newmat->rmap->n;
1456   ierr       = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1457   ierr       = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
1458   rowners[0] = 0;
1459   for (i=2; i<=size; i++) {
1460     rowners[i] += rowners[i-1];
1461   }
1462 
1463   if (!sizesset) {
1464     ierr = MatSetSizes(newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1465   }
1466   ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr);
1467   ierr = MatDenseGetArray(newmat,&array);CHKERRQ(ierr);
1468 
1469   if (!rank) {
1470     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1471 
1472     /* read in my part of the matrix numerical values  */
1473     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1474 
1475     /* insert into matrix-by row (this is why cannot directly read into array */
1476     vals_ptr = vals;
1477     for (i=0; i<m; i++) {
1478       for (j=0; j<N; j++) {
1479         array[i + j*m] = *vals_ptr++;
1480       }
1481     }
1482 
1483     /* read in other processors and ship out */
1484     for (i=1; i<size; i++) {
1485       nz   = (rowners[i+1] - rowners[i])*N;
1486       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1487       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr);
1488     }
1489   } else {
1490     /* receive numeric values */
1491     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1492 
1493     /* receive message of values*/
1494     ierr = MPIULong_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr);
1495 
1496     /* insert into matrix-by row (this is why cannot directly read into array */
1497     vals_ptr = vals;
1498     for (i=0; i<m; i++) {
1499       for (j=0; j<N; j++) {
1500         array[i + j*m] = *vals_ptr++;
1501       }
1502     }
1503   }
1504   ierr = MatDenseRestoreArray(newmat,&array);CHKERRQ(ierr);
1505   ierr = PetscFree(rowners);CHKERRQ(ierr);
1506   ierr = PetscFree(vals);CHKERRQ(ierr);
1507   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1508   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1509   PetscFunctionReturn(0);
1510 }
1511 
1512 #undef __FUNCT__
1513 #define __FUNCT__ "MatLoad_MPIDense"
1514 PetscErrorCode MatLoad_MPIDense(Mat newmat,PetscViewer viewer)
1515 {
1516   PetscScalar    *vals,*svals;
1517   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1518   MPI_Status     status;
1519   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz;
1520   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1521   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1522   PetscInt       i,nz,j,rstart,rend,sizesset=1,grows,gcols;
1523   int            fd;
1524   PetscErrorCode ierr;
1525 
1526   PetscFunctionBegin;
1527   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1528   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1529   if (!rank) {
1530     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1531     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1532     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1533   }
1534   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
1535 
1536   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1537   M = header[1]; N = header[2]; nz = header[3];
1538 
1539   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
1540   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
1541   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
1542 
1543   /* If global sizes are set, check if they are consistent with that given in the file */
1544   if (sizesset) {
1545     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
1546   }
1547   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
1548   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
1549 
1550   /*
1551        Handle case where matrix is stored on disk as a dense matrix
1552   */
1553   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1554     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat,sizesset);CHKERRQ(ierr);
1555     PetscFunctionReturn(0);
1556   }
1557 
1558   /* determine ownership of all rows */
1559   if (newmat->rmap->n < 0) m          = PetscMPIIntCast(M/size + ((M % size) > rank));
1560   else m = PetscMPIIntCast(newmat->rmap->n);
1561   ierr       = PetscMalloc((size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr);
1562   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1563   rowners[0] = 0;
1564   for (i=2; i<=size; i++) {
1565     rowners[i] += rowners[i-1];
1566   }
1567   rstart = rowners[rank];
1568   rend   = rowners[rank+1];
1569 
1570   /* distribute row lengths to all processors */
1571   ierr    = PetscMalloc2(rend-rstart,PetscInt,&ourlens,rend-rstart,PetscInt,&offlens);CHKERRQ(ierr);
1572   if (!rank) {
1573     ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
1574     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1575     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
1576     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1577     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1578     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1579   } else {
1580     ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1581   }
1582 
1583   if (!rank) {
1584     /* calculate the number of nonzeros on each processor */
1585     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
1586     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1587     for (i=0; i<size; i++) {
1588       for (j=rowners[i]; j< rowners[i+1]; j++) {
1589         procsnz[i] += rowlengths[j];
1590       }
1591     }
1592     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1593 
1594     /* determine max buffer needed and allocate it */
1595     maxnz = 0;
1596     for (i=0; i<size; i++) {
1597       maxnz = PetscMax(maxnz,procsnz[i]);
1598     }
1599     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1600 
1601     /* read in my part of the matrix column indices  */
1602     nz = procsnz[0];
1603     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1604     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1605 
1606     /* read in every one elses and ship off */
1607     for (i=1; i<size; i++) {
1608       nz   = procsnz[i];
1609       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1610       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1611     }
1612     ierr = PetscFree(cols);CHKERRQ(ierr);
1613   } else {
1614     /* determine buffer space needed for message */
1615     nz = 0;
1616     for (i=0; i<m; i++) {
1617       nz += ourlens[i];
1618     }
1619     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1620 
1621     /* receive message of column indices*/
1622     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1623     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1624     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1625   }
1626 
1627   /* loop over local rows, determining number of off diagonal entries */
1628   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
1629   jj = 0;
1630   for (i=0; i<m; i++) {
1631     for (j=0; j<ourlens[i]; j++) {
1632       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1633       jj++;
1634     }
1635   }
1636 
1637   /* create our matrix */
1638   for (i=0; i<m; i++) {
1639     ourlens[i] -= offlens[i];
1640   }
1641 
1642   if (!sizesset) {
1643     ierr = MatSetSizes(newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1644   }
1645   ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr);
1646   for (i=0; i<m; i++) {
1647     ourlens[i] += offlens[i];
1648   }
1649 
1650   if (!rank) {
1651     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1652 
1653     /* read in my part of the matrix numerical values  */
1654     nz = procsnz[0];
1655     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1656 
1657     /* insert into matrix */
1658     jj      = rstart;
1659     smycols = mycols;
1660     svals   = vals;
1661     for (i=0; i<m; i++) {
1662       ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1663       smycols += ourlens[i];
1664       svals   += ourlens[i];
1665       jj++;
1666     }
1667 
1668     /* read in other processors and ship out */
1669     for (i=1; i<size; i++) {
1670       nz   = procsnz[i];
1671       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1672       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
1673     }
1674     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1675   } else {
1676     /* receive numeric values */
1677     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1678 
1679     /* receive message of values*/
1680     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
1681     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1682     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1683 
1684     /* insert into matrix */
1685     jj      = rstart;
1686     smycols = mycols;
1687     svals   = vals;
1688     for (i=0; i<m; i++) {
1689       ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1690       smycols += ourlens[i];
1691       svals   += ourlens[i];
1692       jj++;
1693     }
1694   }
1695   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
1696   ierr = PetscFree(vals);CHKERRQ(ierr);
1697   ierr = PetscFree(mycols);CHKERRQ(ierr);
1698   ierr = PetscFree(rowners);CHKERRQ(ierr);
1699 
1700   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1701   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1702   PetscFunctionReturn(0);
1703 }
1704 
1705 #undef __FUNCT__
1706 #define __FUNCT__ "MatEqual_MPIDense"
1707 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool  *flag)
1708 {
1709   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
1710   Mat            a,b;
1711   PetscBool      flg;
1712   PetscErrorCode ierr;
1713 
1714   PetscFunctionBegin;
1715   a = matA->A;
1716   b = matB->A;
1717   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
1718   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1719   PetscFunctionReturn(0);
1720 }
1721 
1722