xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 772ec989c7718bb40cfbe6762601ac78951cc001)
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4 */
5 
6 
7 #include <../src/mat/impls/dense/mpi/mpidense.h>    /*I   "petscmat.h"  I*/
8 #include <../src/mat/impls/aij/mpi/mpiaij.h>
9 
10 #undef __FUNCT__
11 #define __FUNCT__ "MatDenseGetLocalMatrix"
12 /*@
13 
14       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
15               matrix that represents the operator. For sequential matrices it returns itself.
16 
17     Input Parameter:
18 .      A - the Seq or MPI dense matrix
19 
20     Output Parameter:
21 .      B - the inner matrix
22 
23     Level: intermediate
24 
25 @*/
26 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
27 {
28   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
29   PetscErrorCode ierr;
30   PetscBool      flg;
31 
32   PetscFunctionBegin;
33   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
34   if (flg) *B = mat->A;
35   else *B = A;
36   PetscFunctionReturn(0);
37 }
38 
39 #undef __FUNCT__
40 #define __FUNCT__ "MatGetRow_MPIDense"
41 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
42 {
43   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
44   PetscErrorCode ierr;
45   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
46 
47   PetscFunctionBegin;
48   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
49   lrow = row - rstart;
50   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
51   PetscFunctionReturn(0);
52 }
53 
54 #undef __FUNCT__
55 #define __FUNCT__ "MatRestoreRow_MPIDense"
56 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
57 {
58   PetscErrorCode ierr;
59 
60   PetscFunctionBegin;
61   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
62   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
63   PetscFunctionReturn(0);
64 }
65 
66 #undef __FUNCT__
67 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense"
68 PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
69 {
70   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
71   PetscErrorCode ierr;
72   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
73   PetscScalar    *array;
74   MPI_Comm       comm;
75   Mat            B;
76 
77   PetscFunctionBegin;
78   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
79 
80   ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr);
81   if (!B) {
82     ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
83     ierr = MatCreate(comm,&B);CHKERRQ(ierr);
84     ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr);
85     ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
86     ierr = MatDenseGetArray(mdn->A,&array);CHKERRQ(ierr);
87     ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr);
88     ierr = MatDenseRestoreArray(mdn->A,&array);CHKERRQ(ierr);
89     ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
90     ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
91     ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr);
92     *a   = B;
93     ierr = MatDestroy(&B);CHKERRQ(ierr);
94   } else *a = B;
95   PetscFunctionReturn(0);
96 }
97 
98 #undef __FUNCT__
99 #define __FUNCT__ "MatSetValues_MPIDense"
100 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
101 {
102   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
103   PetscErrorCode ierr;
104   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
105   PetscBool      roworiented = A->roworiented;
106 
107   PetscFunctionBegin;
108   for (i=0; i<m; i++) {
109     if (idxm[i] < 0) continue;
110     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
111     if (idxm[i] >= rstart && idxm[i] < rend) {
112       row = idxm[i] - rstart;
113       if (roworiented) {
114         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
115       } else {
116         for (j=0; j<n; j++) {
117           if (idxn[j] < 0) continue;
118           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
119           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
120         }
121       }
122     } else if (!A->donotstash) {
123       mat->assembled = PETSC_FALSE;
124       if (roworiented) {
125         ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
126       } else {
127         ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
128       }
129     }
130   }
131   PetscFunctionReturn(0);
132 }
133 
134 #undef __FUNCT__
135 #define __FUNCT__ "MatGetValues_MPIDense"
136 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
137 {
138   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
139   PetscErrorCode ierr;
140   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
141 
142   PetscFunctionBegin;
143   for (i=0; i<m; i++) {
144     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
145     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
146     if (idxm[i] >= rstart && idxm[i] < rend) {
147       row = idxm[i] - rstart;
148       for (j=0; j<n; j++) {
149         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
150         if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
151         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
152       }
153     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
154   }
155   PetscFunctionReturn(0);
156 }
157 
158 #undef __FUNCT__
159 #define __FUNCT__ "MatDenseGetArray_MPIDense"
160 PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[])
161 {
162   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
163   PetscErrorCode ierr;
164 
165   PetscFunctionBegin;
166   ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr);
167   PetscFunctionReturn(0);
168 }
169 
170 #undef __FUNCT__
171 #define __FUNCT__ "MatGetSubMatrix_MPIDense"
172 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
173 {
174   Mat_MPIDense   *mat  = (Mat_MPIDense*)A->data,*newmatd;
175   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
176   PetscErrorCode ierr;
177   PetscInt       i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
178   const PetscInt *irow,*icol;
179   PetscScalar    *av,*bv,*v = lmat->v;
180   Mat            newmat;
181   IS             iscol_local;
182 
183   PetscFunctionBegin;
184   ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
185   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
186   ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr);
187   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
188   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
189   ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */
190 
191   /* No parallel redistribution currently supported! Should really check each index set
192      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
193      original matrix! */
194 
195   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
196   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
197 
198   /* Check submatrix call */
199   if (scall == MAT_REUSE_MATRIX) {
200     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
201     /* Really need to test rows and column sizes! */
202     newmat = *B;
203   } else {
204     /* Create and fill new matrix */
205     ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr);
206     ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
207     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
208     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
209   }
210 
211   /* Now extract the data pointers and do the copy, column at a time */
212   newmatd = (Mat_MPIDense*)newmat->data;
213   bv      = ((Mat_SeqDense*)newmatd->A->data)->v;
214 
215   for (i=0; i<Ncols; i++) {
216     av = v + ((Mat_SeqDense*)mat->A->data)->lda*icol[i];
217     for (j=0; j<nrows; j++) {
218       *bv++ = av[irow[j] - rstart];
219     }
220   }
221 
222   /* Assemble the matrices so that the correct flags are set */
223   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
224   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
225 
226   /* Free work space */
227   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
228   ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr);
229   ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
230   *B   = newmat;
231   PetscFunctionReturn(0);
232 }
233 
234 #undef __FUNCT__
235 #define __FUNCT__ "MatDenseRestoreArray_MPIDense"
236 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[])
237 {
238   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
239   PetscErrorCode ierr;
240 
241   PetscFunctionBegin;
242   ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr);
243   PetscFunctionReturn(0);
244 }
245 
246 #undef __FUNCT__
247 #define __FUNCT__ "MatAssemblyBegin_MPIDense"
248 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
249 {
250   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
251   MPI_Comm       comm;
252   PetscErrorCode ierr;
253   PetscInt       nstash,reallocs;
254   InsertMode     addv;
255 
256   PetscFunctionBegin;
257   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
258   /* make sure all processors are either in INSERTMODE or ADDMODE */
259   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,comm);CHKERRQ(ierr);
260   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
261   mat->insertmode = addv; /* in case this processor had no cache */
262 
263   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
264   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
265   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
266   PetscFunctionReturn(0);
267 }
268 
269 #undef __FUNCT__
270 #define __FUNCT__ "MatAssemblyEnd_MPIDense"
271 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
272 {
273   Mat_MPIDense   *mdn=(Mat_MPIDense*)mat->data;
274   PetscErrorCode ierr;
275   PetscInt       i,*row,*col,flg,j,rstart,ncols;
276   PetscMPIInt    n;
277   PetscScalar    *val;
278   InsertMode     addv=mat->insertmode;
279 
280   PetscFunctionBegin;
281   /*  wait on receives */
282   while (1) {
283     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
284     if (!flg) break;
285 
286     for (i=0; i<n;) {
287       /* Now identify the consecutive vals belonging to the same row */
288       for (j=i,rstart=row[j]; j<n; j++) {
289         if (row[j] != rstart) break;
290       }
291       if (j < n) ncols = j-i;
292       else       ncols = n-i;
293       /* Now assemble all these values with a single function call */
294       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
295       i    = j;
296     }
297   }
298   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
299 
300   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
301   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
302 
303   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
304     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
305   }
306   PetscFunctionReturn(0);
307 }
308 
309 #undef __FUNCT__
310 #define __FUNCT__ "MatZeroEntries_MPIDense"
311 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
312 {
313   PetscErrorCode ierr;
314   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
315 
316   PetscFunctionBegin;
317   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
318   PetscFunctionReturn(0);
319 }
320 
321 /* the code does not do the diagonal entries correctly unless the
322    matrix is square and the column and row owerships are identical.
323    This is a BUG. The only way to fix it seems to be to access
324    mdn->A and mdn->B directly and not through the MatZeroRows()
325    routine.
326 */
327 #undef __FUNCT__
328 #define __FUNCT__ "MatZeroRows_MPIDense"
329 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
330 {
331   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
332   PetscErrorCode    ierr;
333   PetscInt          i,*owners = A->rmap->range;
334   PetscInt          *sizes,j,idx,nsends;
335   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
336   PetscInt          *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source;
337   PetscInt          *lens,*lrows,*values;
338   PetscMPIInt       n,imdex,rank = l->rank,size = l->size;
339   MPI_Comm          comm;
340   MPI_Request       *send_waits,*recv_waits;
341   MPI_Status        recv_status,*send_status;
342   PetscBool         found;
343   const PetscScalar *xx;
344   PetscScalar       *bb;
345 
346   PetscFunctionBegin;
347   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
348   if (A->rmap->N != A->cmap->N) SETERRQ(comm,PETSC_ERR_SUP,"Only handles square matrices");
349   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership");
350   /*  first count number of contributors to each processor */
351   ierr = PetscCalloc1(2*size,&sizes);CHKERRQ(ierr);
352   ierr = PetscMalloc1(N+1,&owner);CHKERRQ(ierr);  /* see note*/
353   for (i=0; i<N; i++) {
354     idx   = rows[i];
355     found = PETSC_FALSE;
356     for (j=0; j<size; j++) {
357       if (idx >= owners[j] && idx < owners[j+1]) {
358         sizes[2*j]++; sizes[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
359       }
360     }
361     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
362   }
363   nsends = 0;
364   for (i=0; i<size; i++) nsends += sizes[2*i+1];
365 
366   /* inform other processors of number of messages and max length*/
367   ierr = PetscMaxSum(comm,sizes,&nmax,&nrecvs);CHKERRQ(ierr);
368 
369   /* post receives:   */
370   ierr = PetscMalloc1((nrecvs+1)*(nmax+1),&rvalues);CHKERRQ(ierr);
371   ierr = PetscMalloc1((nrecvs+1),&recv_waits);CHKERRQ(ierr);
372   for (i=0; i<nrecvs; i++) {
373     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
374   }
375 
376   /* do sends:
377       1) starts[i] gives the starting index in svalues for stuff going to
378          the ith processor
379   */
380   ierr = PetscMalloc1((N+1),&svalues);CHKERRQ(ierr);
381   ierr = PetscMalloc1((nsends+1),&send_waits);CHKERRQ(ierr);
382   ierr = PetscMalloc1((size+1),&starts);CHKERRQ(ierr);
383 
384   starts[0] = 0;
385   for (i=1; i<size; i++) starts[i] = starts[i-1] + sizes[2*i-2];
386   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
387 
388   starts[0] = 0;
389   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + sizes[2*i-2];
390   count = 0;
391   for (i=0; i<size; i++) {
392     if (sizes[2*i+1]) {
393       ierr = MPI_Isend(svalues+starts[i],sizes[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
394     }
395   }
396   ierr = PetscFree(starts);CHKERRQ(ierr);
397 
398   base = owners[rank];
399 
400   /*  wait on receives */
401   ierr  = PetscMalloc2(nrecvs,&lens,nrecvs,&source);CHKERRQ(ierr);
402   count = nrecvs;
403   slen  = 0;
404   while (count) {
405     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
406     /* unpack receives into our local space */
407     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
408 
409     source[imdex] = recv_status.MPI_SOURCE;
410     lens[imdex]   = n;
411     slen += n;
412     count--;
413   }
414   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
415 
416   /* move the data into the send scatter */
417   ierr  = PetscMalloc1((slen+1),&lrows);CHKERRQ(ierr);
418   count = 0;
419   for (i=0; i<nrecvs; i++) {
420     values = rvalues + i*nmax;
421     for (j=0; j<lens[i]; j++) {
422       lrows[count++] = values[j] - base;
423     }
424   }
425   ierr = PetscFree(rvalues);CHKERRQ(ierr);
426   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
427   ierr = PetscFree(owner);CHKERRQ(ierr);
428   ierr = PetscFree(sizes);CHKERRQ(ierr);
429 
430   /* fix right hand side if needed */
431   if (x && b) {
432     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
433     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
434     for (i=0; i<slen; i++) {
435       bb[lrows[i]] = diag*xx[lrows[i]];
436     }
437     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
438     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
439   }
440 
441   /* actually zap the local rows */
442   ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
443   if (diag != 0.0) {
444     Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data;
445     PetscInt     m   = ll->lda, i;
446 
447     for (i=0; i<slen; i++) {
448       ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag;
449     }
450   }
451   ierr = PetscFree(lrows);CHKERRQ(ierr);
452 
453   /* wait on sends */
454   if (nsends) {
455     ierr = PetscMalloc1(nsends,&send_status);CHKERRQ(ierr);
456     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
457     ierr = PetscFree(send_status);CHKERRQ(ierr);
458   }
459   ierr = PetscFree(send_waits);CHKERRQ(ierr);
460   ierr = PetscFree(svalues);CHKERRQ(ierr);
461   PetscFunctionReturn(0);
462 }
463 
464 #undef __FUNCT__
465 #define __FUNCT__ "MatMult_MPIDense"
466 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
467 {
468   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
469   PetscErrorCode ierr;
470 
471   PetscFunctionBegin;
472   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
473   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
474   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
475   PetscFunctionReturn(0);
476 }
477 
478 #undef __FUNCT__
479 #define __FUNCT__ "MatMultAdd_MPIDense"
480 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
481 {
482   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
483   PetscErrorCode ierr;
484 
485   PetscFunctionBegin;
486   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
487   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
488   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
489   PetscFunctionReturn(0);
490 }
491 
492 #undef __FUNCT__
493 #define __FUNCT__ "MatMultTranspose_MPIDense"
494 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
495 {
496   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
497   PetscErrorCode ierr;
498   PetscScalar    zero = 0.0;
499 
500   PetscFunctionBegin;
501   ierr = VecSet(yy,zero);CHKERRQ(ierr);
502   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
503   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
504   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
505   PetscFunctionReturn(0);
506 }
507 
508 #undef __FUNCT__
509 #define __FUNCT__ "MatMultTransposeAdd_MPIDense"
510 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
511 {
512   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
513   PetscErrorCode ierr;
514 
515   PetscFunctionBegin;
516   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
517   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
518   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
519   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
520   PetscFunctionReturn(0);
521 }
522 
523 #undef __FUNCT__
524 #define __FUNCT__ "MatGetDiagonal_MPIDense"
525 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
526 {
527   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
528   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
529   PetscErrorCode ierr;
530   PetscInt       len,i,n,m = A->rmap->n,radd;
531   PetscScalar    *x,zero = 0.0;
532 
533   PetscFunctionBegin;
534   ierr = VecSet(v,zero);CHKERRQ(ierr);
535   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
536   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
537   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
538   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
539   radd = A->rmap->rstart*m;
540   for (i=0; i<len; i++) {
541     x[i] = aloc->v[radd + i*m + i];
542   }
543   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
544   PetscFunctionReturn(0);
545 }
546 
547 #undef __FUNCT__
548 #define __FUNCT__ "MatDestroy_MPIDense"
549 PetscErrorCode MatDestroy_MPIDense(Mat mat)
550 {
551   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
552   PetscErrorCode ierr;
553 
554   PetscFunctionBegin;
555 #if defined(PETSC_USE_LOG)
556   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
557 #endif
558   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
559   ierr = MatDestroy(&mdn->A);CHKERRQ(ierr);
560   ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr);
561   ierr = VecScatterDestroy(&mdn->Mvctx);CHKERRQ(ierr);
562 
563   ierr = PetscFree(mat->data);CHKERRQ(ierr);
564   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
565   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
566   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr);
567   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
568   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
569   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
570   PetscFunctionReturn(0);
571 }
572 
573 #undef __FUNCT__
574 #define __FUNCT__ "MatView_MPIDense_Binary"
575 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
576 {
577   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
578   PetscErrorCode    ierr;
579   PetscViewerFormat format;
580   int               fd;
581   PetscInt          header[4],mmax,N = mat->cmap->N,i,j,m,k;
582   PetscMPIInt       rank,tag  = ((PetscObject)viewer)->tag,size;
583   PetscScalar       *work,*v,*vv;
584   Mat_SeqDense      *a = (Mat_SeqDense*)mdn->A->data;
585 
586   PetscFunctionBegin;
587   if (mdn->size == 1) {
588     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
589   } else {
590     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
591     ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
592     ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
593 
594     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
595     if (format == PETSC_VIEWER_NATIVE) {
596 
597       if (!rank) {
598         /* store the matrix as a dense matrix */
599         header[0] = MAT_FILE_CLASSID;
600         header[1] = mat->rmap->N;
601         header[2] = N;
602         header[3] = MATRIX_BINARY_FORMAT_DENSE;
603         ierr      = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
604 
605         /* get largest work array needed for transposing array */
606         mmax = mat->rmap->n;
607         for (i=1; i<size; i++) {
608           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
609         }
610         ierr = PetscMalloc1(mmax*N,&work);CHKERRQ(ierr);
611 
612         /* write out local array, by rows */
613         m = mat->rmap->n;
614         v = a->v;
615         for (j=0; j<N; j++) {
616           for (i=0; i<m; i++) {
617             work[j + i*N] = *v++;
618           }
619         }
620         ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
621         /* get largest work array to receive messages from other processes, excludes process zero */
622         mmax = 0;
623         for (i=1; i<size; i++) {
624           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
625         }
626         ierr = PetscMalloc1(mmax*N,&vv);CHKERRQ(ierr);
627         for (k = 1; k < size; k++) {
628           v    = vv;
629           m    = mat->rmap->range[k+1] - mat->rmap->range[k];
630           ierr = MPIULong_Recv(v,m*N,MPIU_SCALAR,k,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
631 
632           for (j = 0; j < N; j++) {
633             for (i = 0; i < m; i++) {
634               work[j + i*N] = *v++;
635             }
636           }
637           ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
638         }
639         ierr = PetscFree(work);CHKERRQ(ierr);
640         ierr = PetscFree(vv);CHKERRQ(ierr);
641       } else {
642         ierr = MPIULong_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
643       }
644     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerSetFormat(viewer,PETSC_VIEWER_NATIVE)");
645   }
646   PetscFunctionReturn(0);
647 }
648 
649 extern PetscErrorCode MatView_SeqDense(Mat,PetscViewer);
650 #include <petscdraw.h>
651 #undef __FUNCT__
652 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket"
653 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
654 {
655   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
656   PetscErrorCode    ierr;
657   PetscMPIInt       rank = mdn->rank;
658   PetscViewerType   vtype;
659   PetscBool         iascii,isdraw;
660   PetscViewer       sviewer;
661   PetscViewerFormat format;
662 
663   PetscFunctionBegin;
664   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
665   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
666   if (iascii) {
667     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
668     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
669     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
670       MatInfo info;
671       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
672       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
673       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
674       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
675       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
676       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
677       PetscFunctionReturn(0);
678     } else if (format == PETSC_VIEWER_ASCII_INFO) {
679       PetscFunctionReturn(0);
680     }
681   } else if (isdraw) {
682     PetscDraw draw;
683     PetscBool isnull;
684 
685     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
686     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
687     if (isnull) PetscFunctionReturn(0);
688   }
689 
690   {
691     /* assemble the entire matrix onto first processor. */
692     Mat         A;
693     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
694     PetscInt    *cols;
695     PetscScalar *vals;
696 
697     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
698     if (!rank) {
699       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
700     } else {
701       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
702     }
703     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
704     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
705     ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr);
706     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
707 
708     /* Copy the matrix ... This isn't the most efficient means,
709        but it's quick for now */
710     A->insertmode = INSERT_VALUES;
711 
712     row = mat->rmap->rstart;
713     m   = mdn->A->rmap->n;
714     for (i=0; i<m; i++) {
715       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
716       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
717       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
718       row++;
719     }
720 
721     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
722     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
723     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
724     if (!rank) {
725         ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
726     }
727     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
728     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
729     ierr = MatDestroy(&A);CHKERRQ(ierr);
730   }
731   PetscFunctionReturn(0);
732 }
733 
734 #undef __FUNCT__
735 #define __FUNCT__ "MatView_MPIDense"
736 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
737 {
738   PetscErrorCode ierr;
739   PetscBool      iascii,isbinary,isdraw,issocket;
740 
741   PetscFunctionBegin;
742   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
743   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
744   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
745   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
746 
747   if (iascii || issocket || isdraw) {
748     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
749   } else if (isbinary) {
750     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
751   }
752   PetscFunctionReturn(0);
753 }
754 
755 #undef __FUNCT__
756 #define __FUNCT__ "MatGetInfo_MPIDense"
757 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
758 {
759   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
760   Mat            mdn  = mat->A;
761   PetscErrorCode ierr;
762   PetscReal      isend[5],irecv[5];
763 
764   PetscFunctionBegin;
765   info->block_size = 1.0;
766 
767   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
768 
769   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
770   isend[3] = info->memory;  isend[4] = info->mallocs;
771   if (flag == MAT_LOCAL) {
772     info->nz_used      = isend[0];
773     info->nz_allocated = isend[1];
774     info->nz_unneeded  = isend[2];
775     info->memory       = isend[3];
776     info->mallocs      = isend[4];
777   } else if (flag == MAT_GLOBAL_MAX) {
778     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
779 
780     info->nz_used      = irecv[0];
781     info->nz_allocated = irecv[1];
782     info->nz_unneeded  = irecv[2];
783     info->memory       = irecv[3];
784     info->mallocs      = irecv[4];
785   } else if (flag == MAT_GLOBAL_SUM) {
786     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
787 
788     info->nz_used      = irecv[0];
789     info->nz_allocated = irecv[1];
790     info->nz_unneeded  = irecv[2];
791     info->memory       = irecv[3];
792     info->mallocs      = irecv[4];
793   }
794   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
795   info->fill_ratio_needed = 0;
796   info->factor_mallocs    = 0;
797   PetscFunctionReturn(0);
798 }
799 
800 #undef __FUNCT__
801 #define __FUNCT__ "MatSetOption_MPIDense"
802 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
803 {
804   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
805   PetscErrorCode ierr;
806 
807   PetscFunctionBegin;
808   switch (op) {
809   case MAT_NEW_NONZERO_LOCATIONS:
810   case MAT_NEW_NONZERO_LOCATION_ERR:
811   case MAT_NEW_NONZERO_ALLOCATION_ERR:
812     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
813     break;
814   case MAT_ROW_ORIENTED:
815     a->roworiented = flg;
816 
817     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
818     break;
819   case MAT_NEW_DIAGONALS:
820   case MAT_KEEP_NONZERO_PATTERN:
821   case MAT_USE_HASH_TABLE:
822     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
823     break;
824   case MAT_IGNORE_OFF_PROC_ENTRIES:
825     a->donotstash = flg;
826     break;
827   case MAT_SYMMETRIC:
828   case MAT_STRUCTURALLY_SYMMETRIC:
829   case MAT_HERMITIAN:
830   case MAT_SYMMETRY_ETERNAL:
831   case MAT_IGNORE_LOWER_TRIANGULAR:
832     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
833     break;
834   default:
835     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
836   }
837   PetscFunctionReturn(0);
838 }
839 
840 
841 #undef __FUNCT__
842 #define __FUNCT__ "MatDiagonalScale_MPIDense"
843 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
844 {
845   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
846   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
847   PetscScalar    *l,*r,x,*v;
848   PetscErrorCode ierr;
849   PetscInt       i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n;
850 
851   PetscFunctionBegin;
852   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
853   if (ll) {
854     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
855     if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
856     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
857     for (i=0; i<m; i++) {
858       x = l[i];
859       v = mat->v + i;
860       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
861     }
862     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
863     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
864   }
865   if (rr) {
866     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
867     if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
868     ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
869     ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
870     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
871     for (i=0; i<n; i++) {
872       x = r[i];
873       v = mat->v + i*m;
874       for (j=0; j<m; j++) (*v++) *= x;
875     }
876     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
877     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
878   }
879   PetscFunctionReturn(0);
880 }
881 
882 #undef __FUNCT__
883 #define __FUNCT__ "MatNorm_MPIDense"
884 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
885 {
886   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
887   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
888   PetscErrorCode ierr;
889   PetscInt       i,j;
890   PetscReal      sum = 0.0;
891   PetscScalar    *v  = mat->v;
892 
893   PetscFunctionBegin;
894   if (mdn->size == 1) {
895     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
896   } else {
897     if (type == NORM_FROBENIUS) {
898       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
899         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
900       }
901       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
902       *nrm = PetscSqrtReal(*nrm);
903       ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr);
904     } else if (type == NORM_1) {
905       PetscReal *tmp,*tmp2;
906       ierr = PetscMalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr);
907       ierr = PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
908       ierr = PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
909       *nrm = 0.0;
910       v    = mat->v;
911       for (j=0; j<mdn->A->cmap->n; j++) {
912         for (i=0; i<mdn->A->rmap->n; i++) {
913           tmp[j] += PetscAbsScalar(*v);  v++;
914         }
915       }
916       ierr = MPI_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
917       for (j=0; j<A->cmap->N; j++) {
918         if (tmp2[j] > *nrm) *nrm = tmp2[j];
919       }
920       ierr = PetscFree2(tmp,tmp);CHKERRQ(ierr);
921       ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr);
922     } else if (type == NORM_INFINITY) { /* max row norm */
923       PetscReal ntemp;
924       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
925       ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
926     } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
927   }
928   PetscFunctionReturn(0);
929 }
930 
931 #undef __FUNCT__
932 #define __FUNCT__ "MatTranspose_MPIDense"
933 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
934 {
935   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
936   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
937   Mat            B;
938   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
939   PetscErrorCode ierr;
940   PetscInt       j,i;
941   PetscScalar    *v;
942 
943   PetscFunctionBegin;
944   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports square matrix only in-place");
945   if (reuse == MAT_INITIAL_MATRIX || A == *matout) {
946     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
947     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
948     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
949     ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr);
950   } else {
951     B = *matout;
952   }
953 
954   m    = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v;
955   ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr);
956   for (i=0; i<m; i++) rwork[i] = rstart + i;
957   for (j=0; j<n; j++) {
958     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
959     v   += m;
960   }
961   ierr = PetscFree(rwork);CHKERRQ(ierr);
962   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
963   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
964   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
965     *matout = B;
966   } else {
967     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
968   }
969   PetscFunctionReturn(0);
970 }
971 
972 
973 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
974 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
975 
976 #undef __FUNCT__
977 #define __FUNCT__ "MatSetUp_MPIDense"
978 PetscErrorCode MatSetUp_MPIDense(Mat A)
979 {
980   PetscErrorCode ierr;
981 
982   PetscFunctionBegin;
983   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
984   PetscFunctionReturn(0);
985 }
986 
987 #undef __FUNCT__
988 #define __FUNCT__ "MatAXPY_MPIDense"
989 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
990 {
991   PetscErrorCode ierr;
992   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
993 
994   PetscFunctionBegin;
995   ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr);
996   PetscFunctionReturn(0);
997 }
998 
999 #undef __FUNCT__
1000 #define __FUNCT__ "MatConjugate_MPIDense"
1001 PetscErrorCode  MatConjugate_MPIDense(Mat mat)
1002 {
1003   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
1004   PetscErrorCode ierr;
1005 
1006   PetscFunctionBegin;
1007   ierr = MatConjugate(a->A);CHKERRQ(ierr);
1008   PetscFunctionReturn(0);
1009 }
1010 
1011 #undef __FUNCT__
1012 #define __FUNCT__ "MatRealPart_MPIDense"
1013 PetscErrorCode MatRealPart_MPIDense(Mat A)
1014 {
1015   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1016   PetscErrorCode ierr;
1017 
1018   PetscFunctionBegin;
1019   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1020   PetscFunctionReturn(0);
1021 }
1022 
1023 #undef __FUNCT__
1024 #define __FUNCT__ "MatImaginaryPart_MPIDense"
1025 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1026 {
1027   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1028   PetscErrorCode ierr;
1029 
1030   PetscFunctionBegin;
1031   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1032   PetscFunctionReturn(0);
1033 }
1034 
1035 extern PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*);
1036 #undef __FUNCT__
1037 #define __FUNCT__ "MatGetColumnNorms_MPIDense"
1038 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms)
1039 {
1040   PetscErrorCode ierr;
1041   PetscInt       i,n;
1042   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
1043   PetscReal      *work;
1044 
1045   PetscFunctionBegin;
1046   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
1047   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
1048   ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr);
1049   if (type == NORM_2) {
1050     for (i=0; i<n; i++) work[i] *= work[i];
1051   }
1052   if (type == NORM_INFINITY) {
1053     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
1054   } else {
1055     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
1056   }
1057   ierr = PetscFree(work);CHKERRQ(ierr);
1058   if (type == NORM_2) {
1059     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
1060   }
1061   PetscFunctionReturn(0);
1062 }
1063 
1064 #undef __FUNCT__
1065 #define __FUNCT__ "MatSetRandom_MPIDense"
1066 static PetscErrorCode  MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1067 {
1068   Mat_MPIDense   *d = (Mat_MPIDense*)x->data;
1069   PetscErrorCode ierr;
1070   PetscScalar    *a;
1071   PetscInt       m,n,i;
1072 
1073   PetscFunctionBegin;
1074   ierr = MatGetSize(d->A,&m,&n);CHKERRQ(ierr);
1075   ierr = MatDenseGetArray(d->A,&a);CHKERRQ(ierr);
1076   for (i=0; i<m*n; i++) {
1077     ierr = PetscRandomGetValue(rctx,a+i);CHKERRQ(ierr);
1078   }
1079   ierr = MatDenseRestoreArray(d->A,&a);CHKERRQ(ierr);
1080   PetscFunctionReturn(0);
1081 }
1082 
1083 /* -------------------------------------------------------------------*/
1084 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1085                                         MatGetRow_MPIDense,
1086                                         MatRestoreRow_MPIDense,
1087                                         MatMult_MPIDense,
1088                                 /*  4*/ MatMultAdd_MPIDense,
1089                                         MatMultTranspose_MPIDense,
1090                                         MatMultTransposeAdd_MPIDense,
1091                                         0,
1092                                         0,
1093                                         0,
1094                                 /* 10*/ 0,
1095                                         0,
1096                                         0,
1097                                         0,
1098                                         MatTranspose_MPIDense,
1099                                 /* 15*/ MatGetInfo_MPIDense,
1100                                         MatEqual_MPIDense,
1101                                         MatGetDiagonal_MPIDense,
1102                                         MatDiagonalScale_MPIDense,
1103                                         MatNorm_MPIDense,
1104                                 /* 20*/ MatAssemblyBegin_MPIDense,
1105                                         MatAssemblyEnd_MPIDense,
1106                                         MatSetOption_MPIDense,
1107                                         MatZeroEntries_MPIDense,
1108                                 /* 24*/ MatZeroRows_MPIDense,
1109                                         0,
1110                                         0,
1111                                         0,
1112                                         0,
1113                                 /* 29*/ MatSetUp_MPIDense,
1114                                         0,
1115                                         0,
1116                                         0,
1117                                         0,
1118                                 /* 34*/ MatDuplicate_MPIDense,
1119                                         0,
1120                                         0,
1121                                         0,
1122                                         0,
1123                                 /* 39*/ MatAXPY_MPIDense,
1124                                         MatGetSubMatrices_MPIDense,
1125                                         0,
1126                                         MatGetValues_MPIDense,
1127                                         0,
1128                                 /* 44*/ 0,
1129                                         MatScale_MPIDense,
1130                                         0,
1131                                         0,
1132                                         0,
1133                                 /* 49*/ MatSetRandom_MPIDense,
1134                                         0,
1135                                         0,
1136                                         0,
1137                                         0,
1138                                 /* 54*/ 0,
1139                                         0,
1140                                         0,
1141                                         0,
1142                                         0,
1143                                 /* 59*/ MatGetSubMatrix_MPIDense,
1144                                         MatDestroy_MPIDense,
1145                                         MatView_MPIDense,
1146                                         0,
1147                                         0,
1148                                 /* 64*/ 0,
1149                                         0,
1150                                         0,
1151                                         0,
1152                                         0,
1153                                 /* 69*/ 0,
1154                                         0,
1155                                         0,
1156                                         0,
1157                                         0,
1158                                 /* 74*/ 0,
1159                                         0,
1160                                         0,
1161                                         0,
1162                                         0,
1163                                 /* 79*/ 0,
1164                                         0,
1165                                         0,
1166                                         0,
1167                                 /* 83*/ MatLoad_MPIDense,
1168                                         0,
1169                                         0,
1170                                         0,
1171                                         0,
1172                                         0,
1173                                 /* 89*/
1174                                         0,
1175                                         0,
1176                                         0,
1177                                         0,
1178                                         0,
1179                                 /* 94*/ 0,
1180                                         0,
1181                                         0,
1182                                         0,
1183                                         0,
1184                                 /* 99*/ 0,
1185                                         0,
1186                                         0,
1187                                         MatConjugate_MPIDense,
1188                                         0,
1189                                 /*104*/ 0,
1190                                         MatRealPart_MPIDense,
1191                                         MatImaginaryPart_MPIDense,
1192                                         0,
1193                                         0,
1194                                 /*109*/ 0,
1195                                         0,
1196                                         0,
1197                                         0,
1198                                         0,
1199                                 /*114*/ 0,
1200                                         0,
1201                                         0,
1202                                         0,
1203                                         0,
1204                                 /*119*/ 0,
1205                                         0,
1206                                         0,
1207                                         0,
1208                                         0,
1209                                 /*124*/ 0,
1210                                         MatGetColumnNorms_MPIDense,
1211                                         0,
1212                                         0,
1213                                         0,
1214                                 /*129*/ 0,
1215                                         0,
1216                                         0,
1217                                         0,
1218                                         0,
1219                                 /*134*/ 0,
1220                                         0,
1221                                         0,
1222                                         0,
1223                                         0,
1224                                 /*139*/ 0,
1225                                         0,
1226                                         0
1227 };
1228 
1229 #undef __FUNCT__
1230 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense"
1231 PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1232 {
1233   Mat_MPIDense   *a;
1234   PetscErrorCode ierr;
1235 
1236   PetscFunctionBegin;
1237   mat->preallocated = PETSC_TRUE;
1238   /* Note:  For now, when data is specified above, this assumes the user correctly
1239    allocates the local dense storage space.  We should add error checking. */
1240 
1241   a       = (Mat_MPIDense*)mat->data;
1242   ierr    = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
1243   ierr    = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
1244   a->nvec = mat->cmap->n;
1245 
1246   ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1247   ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr);
1248   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
1249   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1250   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1251   PetscFunctionReturn(0);
1252 }
1253 
1254 #undef __FUNCT__
1255 #define __FUNCT__ "MatCreate_MPIDense"
1256 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1257 {
1258   Mat_MPIDense   *a;
1259   PetscErrorCode ierr;
1260 
1261   PetscFunctionBegin;
1262   ierr      = PetscNewLog(mat,&a);CHKERRQ(ierr);
1263   mat->data = (void*)a;
1264   ierr      = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1265 
1266   mat->insertmode = NOT_SET_VALUES;
1267   ierr            = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);CHKERRQ(ierr);
1268   ierr            = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);CHKERRQ(ierr);
1269 
1270   /* build cache for off array entries formed */
1271   a->donotstash = PETSC_FALSE;
1272 
1273   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr);
1274 
1275   /* stuff used for matrix vector multiply */
1276   a->lvec        = 0;
1277   a->Mvctx       = 0;
1278   a->roworiented = PETSC_TRUE;
1279 
1280   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr);
1281   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr);
1282 
1283   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr);
1284   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1285   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1286   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1287   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1288 
1289   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",MatTransposeMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1290   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",MatTransposeMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1291   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",MatTransposeMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1292   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1293   PetscFunctionReturn(0);
1294 }
1295 
1296 /*MC
1297    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1298 
1299    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1300    and MATMPIDENSE otherwise.
1301 
1302    Options Database Keys:
1303 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1304 
1305   Level: beginner
1306 
1307 
1308 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1309 M*/
1310 
1311 #undef __FUNCT__
1312 #define __FUNCT__ "MatMPIDenseSetPreallocation"
1313 /*@C
1314    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1315 
1316    Not collective
1317 
1318    Input Parameters:
1319 .  A - the matrix
1320 -  data - optional location of matrix data.  Set data=NULL for PETSc
1321    to control all matrix memory allocation.
1322 
1323    Notes:
1324    The dense format is fully compatible with standard Fortran 77
1325    storage by columns.
1326 
1327    The data input variable is intended primarily for Fortran programmers
1328    who wish to allocate their own matrix memory space.  Most users should
1329    set data=NULL.
1330 
1331    Level: intermediate
1332 
1333 .keywords: matrix,dense, parallel
1334 
1335 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1336 @*/
1337 PetscErrorCode  MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data)
1338 {
1339   PetscErrorCode ierr;
1340 
1341   PetscFunctionBegin;
1342   ierr = PetscTryMethod(mat,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(mat,data));CHKERRQ(ierr);
1343   PetscFunctionReturn(0);
1344 }
1345 
1346 #undef __FUNCT__
1347 #define __FUNCT__ "MatCreateDense"
1348 /*@C
1349    MatCreateDense - Creates a parallel matrix in dense format.
1350 
1351    Collective on MPI_Comm
1352 
1353    Input Parameters:
1354 +  comm - MPI communicator
1355 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1356 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1357 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1358 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1359 -  data - optional location of matrix data.  Set data=NULL (NULL_SCALAR for Fortran users) for PETSc
1360    to control all matrix memory allocation.
1361 
1362    Output Parameter:
1363 .  A - the matrix
1364 
1365    Notes:
1366    The dense format is fully compatible with standard Fortran 77
1367    storage by columns.
1368 
1369    The data input variable is intended primarily for Fortran programmers
1370    who wish to allocate their own matrix memory space.  Most users should
1371    set data=NULL (NULL_SCALAR for Fortran users).
1372 
1373    The user MUST specify either the local or global matrix dimensions
1374    (possibly both).
1375 
1376    Level: intermediate
1377 
1378 .keywords: matrix,dense, parallel
1379 
1380 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1381 @*/
1382 PetscErrorCode  MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1383 {
1384   PetscErrorCode ierr;
1385   PetscMPIInt    size;
1386 
1387   PetscFunctionBegin;
1388   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1389   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1390   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1391   if (size > 1) {
1392     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1393     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1394   } else {
1395     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1396     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1397   }
1398   PetscFunctionReturn(0);
1399 }
1400 
1401 #undef __FUNCT__
1402 #define __FUNCT__ "MatDuplicate_MPIDense"
1403 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1404 {
1405   Mat            mat;
1406   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
1407   PetscErrorCode ierr;
1408 
1409   PetscFunctionBegin;
1410   *newmat = 0;
1411   ierr    = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr);
1412   ierr    = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1413   ierr    = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1414   a       = (Mat_MPIDense*)mat->data;
1415   ierr    = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1416 
1417   mat->factortype   = A->factortype;
1418   mat->assembled    = PETSC_TRUE;
1419   mat->preallocated = PETSC_TRUE;
1420 
1421   a->size         = oldmat->size;
1422   a->rank         = oldmat->rank;
1423   mat->insertmode = NOT_SET_VALUES;
1424   a->nvec         = oldmat->nvec;
1425   a->donotstash   = oldmat->donotstash;
1426 
1427   ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr);
1428   ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr);
1429 
1430   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1431   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1432   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1433 
1434   *newmat = mat;
1435   PetscFunctionReturn(0);
1436 }
1437 
1438 #undef __FUNCT__
1439 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile"
1440 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat,PetscInt sizesset)
1441 {
1442   PetscErrorCode ierr;
1443   PetscMPIInt    rank,size;
1444   PetscInt       *rowners,i,m,nz,j;
1445   PetscScalar    *array,*vals,*vals_ptr;
1446 
1447   PetscFunctionBegin;
1448   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1449   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1450 
1451   /* determine ownership of all rows */
1452   if (newmat->rmap->n < 0) m = M/size + ((M % size) > rank);
1453   else m = newmat->rmap->n;
1454   ierr       = PetscMalloc1((size+2),&rowners);CHKERRQ(ierr);
1455   ierr       = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
1456   rowners[0] = 0;
1457   for (i=2; i<=size; i++) {
1458     rowners[i] += rowners[i-1];
1459   }
1460 
1461   if (!sizesset) {
1462     ierr = MatSetSizes(newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1463   }
1464   ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
1465   ierr = MatDenseGetArray(newmat,&array);CHKERRQ(ierr);
1466 
1467   if (!rank) {
1468     ierr = PetscMalloc1(m*N,&vals);CHKERRQ(ierr);
1469 
1470     /* read in my part of the matrix numerical values  */
1471     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1472 
1473     /* insert into matrix-by row (this is why cannot directly read into array */
1474     vals_ptr = vals;
1475     for (i=0; i<m; i++) {
1476       for (j=0; j<N; j++) {
1477         array[i + j*m] = *vals_ptr++;
1478       }
1479     }
1480 
1481     /* read in other processors and ship out */
1482     for (i=1; i<size; i++) {
1483       nz   = (rowners[i+1] - rowners[i])*N;
1484       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1485       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr);
1486     }
1487   } else {
1488     /* receive numeric values */
1489     ierr = PetscMalloc1(m*N,&vals);CHKERRQ(ierr);
1490 
1491     /* receive message of values*/
1492     ierr = MPIULong_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr);
1493 
1494     /* insert into matrix-by row (this is why cannot directly read into array */
1495     vals_ptr = vals;
1496     for (i=0; i<m; i++) {
1497       for (j=0; j<N; j++) {
1498         array[i + j*m] = *vals_ptr++;
1499       }
1500     }
1501   }
1502   ierr = MatDenseRestoreArray(newmat,&array);CHKERRQ(ierr);
1503   ierr = PetscFree(rowners);CHKERRQ(ierr);
1504   ierr = PetscFree(vals);CHKERRQ(ierr);
1505   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1506   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1507   PetscFunctionReturn(0);
1508 }
1509 
1510 #undef __FUNCT__
1511 #define __FUNCT__ "MatLoad_MPIDense"
1512 PetscErrorCode MatLoad_MPIDense(Mat newmat,PetscViewer viewer)
1513 {
1514   PetscScalar    *vals,*svals;
1515   MPI_Comm       comm;
1516   MPI_Status     status;
1517   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz;
1518   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1519   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1520   PetscInt       i,nz,j,rstart,rend,sizesset=1,grows,gcols;
1521   int            fd;
1522   PetscErrorCode ierr;
1523 
1524   PetscFunctionBegin;
1525   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
1526   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1527   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1528   if (!rank) {
1529     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1530     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
1531     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1532   }
1533   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
1534 
1535   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1536   M    = header[1]; N = header[2]; nz = header[3];
1537 
1538   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
1539   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
1540   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
1541 
1542   /* If global sizes are set, check if they are consistent with that given in the file */
1543   if (sizesset) {
1544     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
1545   }
1546   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
1547   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
1548 
1549   /*
1550        Handle case where matrix is stored on disk as a dense matrix
1551   */
1552   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1553     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat,sizesset);CHKERRQ(ierr);
1554     PetscFunctionReturn(0);
1555   }
1556 
1557   /* determine ownership of all rows */
1558   if (newmat->rmap->n < 0) {
1559     ierr = PetscMPIIntCast(M/size + ((M % size) > rank),&m);CHKERRQ(ierr);
1560   } else {
1561     ierr = PetscMPIIntCast(newmat->rmap->n,&m);CHKERRQ(ierr);
1562   }
1563   ierr       = PetscMalloc1((size+2),&rowners);CHKERRQ(ierr);
1564   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1565   rowners[0] = 0;
1566   for (i=2; i<=size; i++) {
1567     rowners[i] += rowners[i-1];
1568   }
1569   rstart = rowners[rank];
1570   rend   = rowners[rank+1];
1571 
1572   /* distribute row lengths to all processors */
1573   ierr = PetscMalloc2(rend-rstart,&ourlens,rend-rstart,&offlens);CHKERRQ(ierr);
1574   if (!rank) {
1575     ierr = PetscMalloc1(M,&rowlengths);CHKERRQ(ierr);
1576     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1577     ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr);
1578     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1579     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1580     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1581   } else {
1582     ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1583   }
1584 
1585   if (!rank) {
1586     /* calculate the number of nonzeros on each processor */
1587     ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr);
1588     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1589     for (i=0; i<size; i++) {
1590       for (j=rowners[i]; j< rowners[i+1]; j++) {
1591         procsnz[i] += rowlengths[j];
1592       }
1593     }
1594     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1595 
1596     /* determine max buffer needed and allocate it */
1597     maxnz = 0;
1598     for (i=0; i<size; i++) {
1599       maxnz = PetscMax(maxnz,procsnz[i]);
1600     }
1601     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
1602 
1603     /* read in my part of the matrix column indices  */
1604     nz   = procsnz[0];
1605     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
1606     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1607 
1608     /* read in every one elses and ship off */
1609     for (i=1; i<size; i++) {
1610       nz   = procsnz[i];
1611       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1612       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1613     }
1614     ierr = PetscFree(cols);CHKERRQ(ierr);
1615   } else {
1616     /* determine buffer space needed for message */
1617     nz = 0;
1618     for (i=0; i<m; i++) {
1619       nz += ourlens[i];
1620     }
1621     ierr = PetscMalloc1((nz+1),&mycols);CHKERRQ(ierr);
1622 
1623     /* receive message of column indices*/
1624     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1625     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1626     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1627   }
1628 
1629   /* loop over local rows, determining number of off diagonal entries */
1630   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
1631   jj   = 0;
1632   for (i=0; i<m; i++) {
1633     for (j=0; j<ourlens[i]; j++) {
1634       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1635       jj++;
1636     }
1637   }
1638 
1639   /* create our matrix */
1640   for (i=0; i<m; i++) ourlens[i] -= offlens[i];
1641 
1642   if (!sizesset) {
1643     ierr = MatSetSizes(newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1644   }
1645   ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
1646   for (i=0; i<m; i++) ourlens[i] += offlens[i];
1647 
1648   if (!rank) {
1649     ierr = PetscMalloc1(maxnz,&vals);CHKERRQ(ierr);
1650 
1651     /* read in my part of the matrix numerical values  */
1652     nz   = procsnz[0];
1653     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1654 
1655     /* insert into matrix */
1656     jj      = rstart;
1657     smycols = mycols;
1658     svals   = vals;
1659     for (i=0; i<m; i++) {
1660       ierr     = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1661       smycols += ourlens[i];
1662       svals   += ourlens[i];
1663       jj++;
1664     }
1665 
1666     /* read in other processors and ship out */
1667     for (i=1; i<size; i++) {
1668       nz   = procsnz[i];
1669       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1670       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
1671     }
1672     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1673   } else {
1674     /* receive numeric values */
1675     ierr = PetscMalloc1((nz+1),&vals);CHKERRQ(ierr);
1676 
1677     /* receive message of values*/
1678     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
1679     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1680     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1681 
1682     /* insert into matrix */
1683     jj      = rstart;
1684     smycols = mycols;
1685     svals   = vals;
1686     for (i=0; i<m; i++) {
1687       ierr     = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1688       smycols += ourlens[i];
1689       svals   += ourlens[i];
1690       jj++;
1691     }
1692   }
1693   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
1694   ierr = PetscFree(vals);CHKERRQ(ierr);
1695   ierr = PetscFree(mycols);CHKERRQ(ierr);
1696   ierr = PetscFree(rowners);CHKERRQ(ierr);
1697 
1698   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1699   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1700   PetscFunctionReturn(0);
1701 }
1702 
1703 #undef __FUNCT__
1704 #define __FUNCT__ "MatEqual_MPIDense"
1705 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool  *flag)
1706 {
1707   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
1708   Mat            a,b;
1709   PetscBool      flg;
1710   PetscErrorCode ierr;
1711 
1712   PetscFunctionBegin;
1713   a    = matA->A;
1714   b    = matB->A;
1715   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
1716   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1717   PetscFunctionReturn(0);
1718 }
1719 
1720