xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision fab16db7bb4a26ecb8a7466bbc83a808c3b2921c)
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4 */
5 
6 
7 #include <../src/mat/impls/dense/mpi/mpidense.h>    /*I   "petscmat.h"  I*/
8 #include <../src/mat/impls/aij/mpi/mpiaij.h>
9 
10 #undef __FUNCT__
11 #define __FUNCT__ "MatDenseGetLocalMatrix"
12 /*@
13 
14       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
15               matrix that represents the operator. For sequential matrices it returns itself.
16 
17     Input Parameter:
18 .      A - the Seq or MPI dense matrix
19 
20     Output Parameter:
21 .      B - the inner matrix
22 
23     Level: intermediate
24 
25 @*/
26 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
27 {
28   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
29   PetscErrorCode ierr;
30   PetscBool      flg;
31 
32   PetscFunctionBegin;
33   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
34   if (flg) *B = mat->A;
35   else *B = A;
36   PetscFunctionReturn(0);
37 }
38 
39 #undef __FUNCT__
40 #define __FUNCT__ "MatGetRow_MPIDense"
41 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
42 {
43   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
44   PetscErrorCode ierr;
45   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
46 
47   PetscFunctionBegin;
48   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
49   lrow = row - rstart;
50   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
51   PetscFunctionReturn(0);
52 }
53 
54 #undef __FUNCT__
55 #define __FUNCT__ "MatRestoreRow_MPIDense"
56 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
57 {
58   PetscErrorCode ierr;
59 
60   PetscFunctionBegin;
61   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
62   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
63   PetscFunctionReturn(0);
64 }
65 
66 #undef __FUNCT__
67 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense"
68 PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
69 {
70   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
71   PetscErrorCode ierr;
72   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
73   PetscScalar    *array;
74   MPI_Comm       comm;
75   Mat            B;
76 
77   PetscFunctionBegin;
78   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
79 
80   ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr);
81   if (!B) {
82     ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
83     ierr = MatCreate(comm,&B);CHKERRQ(ierr);
84     ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr);
85     ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
86     ierr = MatDenseGetArray(mdn->A,&array);CHKERRQ(ierr);
87     ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr);
88     ierr = MatDenseRestoreArray(mdn->A,&array);CHKERRQ(ierr);
89     ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
90     ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
91     ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr);
92     *a   = B;
93     ierr = MatDestroy(&B);CHKERRQ(ierr);
94   } else *a = B;
95   PetscFunctionReturn(0);
96 }
97 
98 #undef __FUNCT__
99 #define __FUNCT__ "MatSetValues_MPIDense"
100 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
101 {
102   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
103   PetscErrorCode ierr;
104   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
105   PetscBool      roworiented = A->roworiented;
106 
107   PetscFunctionBegin;
108   if (v) PetscValidScalarPointer(v,6);
109   for (i=0; i<m; i++) {
110     if (idxm[i] < 0) continue;
111     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
112     if (idxm[i] >= rstart && idxm[i] < rend) {
113       row = idxm[i] - rstart;
114       if (roworiented) {
115         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
116       } else {
117         for (j=0; j<n; j++) {
118           if (idxn[j] < 0) continue;
119           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
120           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
121         }
122       }
123     } else if (!A->donotstash) {
124       mat->assembled = PETSC_FALSE;
125       if (roworiented) {
126         ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
127       } else {
128         ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
129       }
130     }
131   }
132   PetscFunctionReturn(0);
133 }
134 
135 #undef __FUNCT__
136 #define __FUNCT__ "MatGetValues_MPIDense"
137 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
138 {
139   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
140   PetscErrorCode ierr;
141   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
142 
143   PetscFunctionBegin;
144   for (i=0; i<m; i++) {
145     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
146     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
147     if (idxm[i] >= rstart && idxm[i] < rend) {
148       row = idxm[i] - rstart;
149       for (j=0; j<n; j++) {
150         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
151         if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
152         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
153       }
154     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
155   }
156   PetscFunctionReturn(0);
157 }
158 
159 #undef __FUNCT__
160 #define __FUNCT__ "MatDenseGetArray_MPIDense"
161 PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[])
162 {
163   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
164   PetscErrorCode ierr;
165 
166   PetscFunctionBegin;
167   ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr);
168   PetscFunctionReturn(0);
169 }
170 
171 #undef __FUNCT__
172 #define __FUNCT__ "MatGetSubMatrix_MPIDense"
173 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
174 {
175   Mat_MPIDense   *mat  = (Mat_MPIDense*)A->data,*newmatd;
176   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
177   PetscErrorCode ierr;
178   PetscInt       i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
179   const PetscInt *irow,*icol;
180   PetscScalar    *av,*bv,*v = lmat->v;
181   Mat            newmat;
182   IS             iscol_local;
183 
184   PetscFunctionBegin;
185   ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
186   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
187   ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr);
188   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
189   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
190   ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */
191 
192   /* No parallel redistribution currently supported! Should really check each index set
193      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
194      original matrix! */
195 
196   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
197   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
198 
199   /* Check submatrix call */
200   if (scall == MAT_REUSE_MATRIX) {
201     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
202     /* Really need to test rows and column sizes! */
203     newmat = *B;
204   } else {
205     /* Create and fill new matrix */
206     ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr);
207     ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
208     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
209     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
210   }
211 
212   /* Now extract the data pointers and do the copy, column at a time */
213   newmatd = (Mat_MPIDense*)newmat->data;
214   bv      = ((Mat_SeqDense*)newmatd->A->data)->v;
215 
216   for (i=0; i<Ncols; i++) {
217     av = v + ((Mat_SeqDense*)mat->A->data)->lda*icol[i];
218     for (j=0; j<nrows; j++) {
219       *bv++ = av[irow[j] - rstart];
220     }
221   }
222 
223   /* Assemble the matrices so that the correct flags are set */
224   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
225   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
226 
227   /* Free work space */
228   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
229   ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr);
230   ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
231   *B   = newmat;
232   PetscFunctionReturn(0);
233 }
234 
235 #undef __FUNCT__
236 #define __FUNCT__ "MatDenseRestoreArray_MPIDense"
237 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[])
238 {
239   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
240   PetscErrorCode ierr;
241 
242   PetscFunctionBegin;
243   ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr);
244   PetscFunctionReturn(0);
245 }
246 
247 #undef __FUNCT__
248 #define __FUNCT__ "MatAssemblyBegin_MPIDense"
249 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
250 {
251   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
252   MPI_Comm       comm;
253   PetscErrorCode ierr;
254   PetscInt       nstash,reallocs;
255   InsertMode     addv;
256 
257   PetscFunctionBegin;
258   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
259   /* make sure all processors are either in INSERTMODE or ADDMODE */
260   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,comm);CHKERRQ(ierr);
261   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
262   mat->insertmode = addv; /* in case this processor had no cache */
263 
264   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
265   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
266   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
267   PetscFunctionReturn(0);
268 }
269 
270 #undef __FUNCT__
271 #define __FUNCT__ "MatAssemblyEnd_MPIDense"
272 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
273 {
274   Mat_MPIDense   *mdn=(Mat_MPIDense*)mat->data;
275   PetscErrorCode ierr;
276   PetscInt       i,*row,*col,flg,j,rstart,ncols;
277   PetscMPIInt    n;
278   PetscScalar    *val;
279   InsertMode     addv=mat->insertmode;
280 
281   PetscFunctionBegin;
282   /*  wait on receives */
283   while (1) {
284     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
285     if (!flg) break;
286 
287     for (i=0; i<n;) {
288       /* Now identify the consecutive vals belonging to the same row */
289       for (j=i,rstart=row[j]; j<n; j++) {
290         if (row[j] != rstart) break;
291       }
292       if (j < n) ncols = j-i;
293       else       ncols = n-i;
294       /* Now assemble all these values with a single function call */
295       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
296       i    = j;
297     }
298   }
299   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
300 
301   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
302   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
303 
304   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
305     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
306   }
307   PetscFunctionReturn(0);
308 }
309 
310 #undef __FUNCT__
311 #define __FUNCT__ "MatZeroEntries_MPIDense"
312 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
313 {
314   PetscErrorCode ierr;
315   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
316 
317   PetscFunctionBegin;
318   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
319   PetscFunctionReturn(0);
320 }
321 
322 /* the code does not do the diagonal entries correctly unless the
323    matrix is square and the column and row owerships are identical.
324    This is a BUG. The only way to fix it seems to be to access
325    mdn->A and mdn->B directly and not through the MatZeroRows()
326    routine.
327 */
328 #undef __FUNCT__
329 #define __FUNCT__ "MatZeroRows_MPIDense"
330 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
331 {
332   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
333   PetscErrorCode    ierr;
334   PetscInt          i,*owners = A->rmap->range;
335   PetscInt          *sizes,j,idx,nsends;
336   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
337   PetscInt          *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source;
338   PetscInt          *lens,*lrows,*values;
339   PetscMPIInt       n,imdex,rank = l->rank,size = l->size;
340   MPI_Comm          comm;
341   MPI_Request       *send_waits,*recv_waits;
342   MPI_Status        recv_status,*send_status;
343   PetscBool         found;
344   const PetscScalar *xx;
345   PetscScalar       *bb;
346 
347   PetscFunctionBegin;
348   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
349   if (A->rmap->N != A->cmap->N) SETERRQ(comm,PETSC_ERR_SUP,"Only handles square matrices");
350   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership");
351   /*  first count number of contributors to each processor */
352   ierr = PetscCalloc1(2*size,&sizes);CHKERRQ(ierr);
353   ierr = PetscMalloc1(N+1,&owner);CHKERRQ(ierr);  /* see note*/
354   for (i=0; i<N; i++) {
355     idx   = rows[i];
356     found = PETSC_FALSE;
357     for (j=0; j<size; j++) {
358       if (idx >= owners[j] && idx < owners[j+1]) {
359         sizes[2*j]++; sizes[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
360       }
361     }
362     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
363   }
364   nsends = 0;
365   for (i=0; i<size; i++) nsends += sizes[2*i+1];
366 
367   /* inform other processors of number of messages and max length*/
368   ierr = PetscMaxSum(comm,sizes,&nmax,&nrecvs);CHKERRQ(ierr);
369 
370   /* post receives:   */
371   ierr = PetscMalloc1((nrecvs+1)*(nmax+1),&rvalues);CHKERRQ(ierr);
372   ierr = PetscMalloc1((nrecvs+1),&recv_waits);CHKERRQ(ierr);
373   for (i=0; i<nrecvs; i++) {
374     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
375   }
376 
377   /* do sends:
378       1) starts[i] gives the starting index in svalues for stuff going to
379          the ith processor
380   */
381   ierr = PetscMalloc1((N+1),&svalues);CHKERRQ(ierr);
382   ierr = PetscMalloc1((nsends+1),&send_waits);CHKERRQ(ierr);
383   ierr = PetscMalloc1((size+1),&starts);CHKERRQ(ierr);
384 
385   starts[0] = 0;
386   for (i=1; i<size; i++) starts[i] = starts[i-1] + sizes[2*i-2];
387   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
388 
389   starts[0] = 0;
390   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + sizes[2*i-2];
391   count = 0;
392   for (i=0; i<size; i++) {
393     if (sizes[2*i+1]) {
394       ierr = MPI_Isend(svalues+starts[i],sizes[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
395     }
396   }
397   ierr = PetscFree(starts);CHKERRQ(ierr);
398 
399   base = owners[rank];
400 
401   /*  wait on receives */
402   ierr  = PetscMalloc2(nrecvs,&lens,nrecvs,&source);CHKERRQ(ierr);
403   count = nrecvs;
404   slen  = 0;
405   while (count) {
406     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
407     /* unpack receives into our local space */
408     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
409 
410     source[imdex] = recv_status.MPI_SOURCE;
411     lens[imdex]   = n;
412     slen += n;
413     count--;
414   }
415   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
416 
417   /* move the data into the send scatter */
418   ierr  = PetscMalloc1((slen+1),&lrows);CHKERRQ(ierr);
419   count = 0;
420   for (i=0; i<nrecvs; i++) {
421     values = rvalues + i*nmax;
422     for (j=0; j<lens[i]; j++) {
423       lrows[count++] = values[j] - base;
424     }
425   }
426   ierr = PetscFree(rvalues);CHKERRQ(ierr);
427   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
428   ierr = PetscFree(owner);CHKERRQ(ierr);
429   ierr = PetscFree(sizes);CHKERRQ(ierr);
430 
431   /* fix right hand side if needed */
432   if (x && b) {
433     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
434     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
435     for (i=0; i<slen; i++) {
436       bb[lrows[i]] = diag*xx[lrows[i]];
437     }
438     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
439     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
440   }
441 
442   /* actually zap the local rows */
443   ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
444   if (diag != 0.0) {
445     Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data;
446     PetscInt     m   = ll->lda, i;
447 
448     for (i=0; i<slen; i++) {
449       ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag;
450     }
451   }
452   ierr = PetscFree(lrows);CHKERRQ(ierr);
453 
454   /* wait on sends */
455   if (nsends) {
456     ierr = PetscMalloc1(nsends,&send_status);CHKERRQ(ierr);
457     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
458     ierr = PetscFree(send_status);CHKERRQ(ierr);
459   }
460   ierr = PetscFree(send_waits);CHKERRQ(ierr);
461   ierr = PetscFree(svalues);CHKERRQ(ierr);
462   PetscFunctionReturn(0);
463 }
464 
465 #undef __FUNCT__
466 #define __FUNCT__ "MatMult_MPIDense"
467 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
468 {
469   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
470   PetscErrorCode ierr;
471 
472   PetscFunctionBegin;
473   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
474   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
475   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
476   PetscFunctionReturn(0);
477 }
478 
479 #undef __FUNCT__
480 #define __FUNCT__ "MatMultAdd_MPIDense"
481 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
482 {
483   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
484   PetscErrorCode ierr;
485 
486   PetscFunctionBegin;
487   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
488   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
489   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
490   PetscFunctionReturn(0);
491 }
492 
493 #undef __FUNCT__
494 #define __FUNCT__ "MatMultTranspose_MPIDense"
495 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
496 {
497   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
498   PetscErrorCode ierr;
499   PetscScalar    zero = 0.0;
500 
501   PetscFunctionBegin;
502   ierr = VecSet(yy,zero);CHKERRQ(ierr);
503   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
504   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
505   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
506   PetscFunctionReturn(0);
507 }
508 
509 #undef __FUNCT__
510 #define __FUNCT__ "MatMultTransposeAdd_MPIDense"
511 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
512 {
513   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
514   PetscErrorCode ierr;
515 
516   PetscFunctionBegin;
517   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
518   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
519   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
520   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
521   PetscFunctionReturn(0);
522 }
523 
524 #undef __FUNCT__
525 #define __FUNCT__ "MatGetDiagonal_MPIDense"
526 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
527 {
528   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
529   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
530   PetscErrorCode ierr;
531   PetscInt       len,i,n,m = A->rmap->n,radd;
532   PetscScalar    *x,zero = 0.0;
533 
534   PetscFunctionBegin;
535   ierr = VecSet(v,zero);CHKERRQ(ierr);
536   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
537   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
538   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
539   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
540   radd = A->rmap->rstart*m;
541   for (i=0; i<len; i++) {
542     x[i] = aloc->v[radd + i*m + i];
543   }
544   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
545   PetscFunctionReturn(0);
546 }
547 
548 #undef __FUNCT__
549 #define __FUNCT__ "MatDestroy_MPIDense"
550 PetscErrorCode MatDestroy_MPIDense(Mat mat)
551 {
552   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
553   PetscErrorCode ierr;
554 
555   PetscFunctionBegin;
556 #if defined(PETSC_USE_LOG)
557   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
558 #endif
559   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
560   ierr = MatDestroy(&mdn->A);CHKERRQ(ierr);
561   ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr);
562   ierr = VecScatterDestroy(&mdn->Mvctx);CHKERRQ(ierr);
563 
564   ierr = PetscFree(mat->data);CHKERRQ(ierr);
565   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
566   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
567   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr);
568   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
569   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
570   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
571   PetscFunctionReturn(0);
572 }
573 
574 #undef __FUNCT__
575 #define __FUNCT__ "MatView_MPIDense_Binary"
576 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
577 {
578   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
579   PetscErrorCode    ierr;
580   PetscViewerFormat format;
581   int               fd;
582   PetscInt          header[4],mmax,N = mat->cmap->N,i,j,m,k;
583   PetscMPIInt       rank,tag  = ((PetscObject)viewer)->tag,size;
584   PetscScalar       *work,*v,*vv;
585   Mat_SeqDense      *a = (Mat_SeqDense*)mdn->A->data;
586 
587   PetscFunctionBegin;
588   if (mdn->size == 1) {
589     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
590   } else {
591     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
592     ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
593     ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
594 
595     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
596     if (format == PETSC_VIEWER_NATIVE) {
597 
598       if (!rank) {
599         /* store the matrix as a dense matrix */
600         header[0] = MAT_FILE_CLASSID;
601         header[1] = mat->rmap->N;
602         header[2] = N;
603         header[3] = MATRIX_BINARY_FORMAT_DENSE;
604         ierr      = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
605 
606         /* get largest work array needed for transposing array */
607         mmax = mat->rmap->n;
608         for (i=1; i<size; i++) {
609           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
610         }
611         ierr = PetscMalloc1(mmax*N,&work);CHKERRQ(ierr);
612 
613         /* write out local array, by rows */
614         m = mat->rmap->n;
615         v = a->v;
616         for (j=0; j<N; j++) {
617           for (i=0; i<m; i++) {
618             work[j + i*N] = *v++;
619           }
620         }
621         ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
622         /* get largest work array to receive messages from other processes, excludes process zero */
623         mmax = 0;
624         for (i=1; i<size; i++) {
625           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
626         }
627         ierr = PetscMalloc1(mmax*N,&vv);CHKERRQ(ierr);
628         for (k = 1; k < size; k++) {
629           v    = vv;
630           m    = mat->rmap->range[k+1] - mat->rmap->range[k];
631           ierr = MPIULong_Recv(v,m*N,MPIU_SCALAR,k,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
632 
633           for (j = 0; j < N; j++) {
634             for (i = 0; i < m; i++) {
635               work[j + i*N] = *v++;
636             }
637           }
638           ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
639         }
640         ierr = PetscFree(work);CHKERRQ(ierr);
641         ierr = PetscFree(vv);CHKERRQ(ierr);
642       } else {
643         ierr = MPIULong_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
644       }
645     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerSetFormat(viewer,PETSC_VIEWER_NATIVE)");
646   }
647   PetscFunctionReturn(0);
648 }
649 
650 extern PetscErrorCode MatView_SeqDense(Mat,PetscViewer);
651 #include <petscdraw.h>
652 #undef __FUNCT__
653 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket"
654 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
655 {
656   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
657   PetscErrorCode    ierr;
658   PetscMPIInt       rank = mdn->rank;
659   PetscViewerType   vtype;
660   PetscBool         iascii,isdraw;
661   PetscViewer       sviewer;
662   PetscViewerFormat format;
663 
664   PetscFunctionBegin;
665   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
666   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
667   if (iascii) {
668     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
669     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
670     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
671       MatInfo info;
672       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
673       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
674       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
675       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
676       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
677       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
678       PetscFunctionReturn(0);
679     } else if (format == PETSC_VIEWER_ASCII_INFO) {
680       PetscFunctionReturn(0);
681     }
682   } else if (isdraw) {
683     PetscDraw draw;
684     PetscBool isnull;
685 
686     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
687     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
688     if (isnull) PetscFunctionReturn(0);
689   }
690 
691   {
692     /* assemble the entire matrix onto first processor. */
693     Mat         A;
694     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
695     PetscInt    *cols;
696     PetscScalar *vals;
697 
698     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
699     if (!rank) {
700       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
701     } else {
702       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
703     }
704     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
705     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
706     ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr);
707     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
708 
709     /* Copy the matrix ... This isn't the most efficient means,
710        but it's quick for now */
711     A->insertmode = INSERT_VALUES;
712 
713     row = mat->rmap->rstart;
714     m   = mdn->A->rmap->n;
715     for (i=0; i<m; i++) {
716       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
717       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
718       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
719       row++;
720     }
721 
722     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
723     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
724     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
725     if (!rank) {
726         ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
727     }
728     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
729     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
730     ierr = MatDestroy(&A);CHKERRQ(ierr);
731   }
732   PetscFunctionReturn(0);
733 }
734 
735 #undef __FUNCT__
736 #define __FUNCT__ "MatView_MPIDense"
737 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
738 {
739   PetscErrorCode ierr;
740   PetscBool      iascii,isbinary,isdraw,issocket;
741 
742   PetscFunctionBegin;
743   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
744   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
745   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
746   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
747 
748   if (iascii || issocket || isdraw) {
749     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
750   } else if (isbinary) {
751     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
752   }
753   PetscFunctionReturn(0);
754 }
755 
756 #undef __FUNCT__
757 #define __FUNCT__ "MatGetInfo_MPIDense"
758 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
759 {
760   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
761   Mat            mdn  = mat->A;
762   PetscErrorCode ierr;
763   PetscReal      isend[5],irecv[5];
764 
765   PetscFunctionBegin;
766   info->block_size = 1.0;
767 
768   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
769 
770   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
771   isend[3] = info->memory;  isend[4] = info->mallocs;
772   if (flag == MAT_LOCAL) {
773     info->nz_used      = isend[0];
774     info->nz_allocated = isend[1];
775     info->nz_unneeded  = isend[2];
776     info->memory       = isend[3];
777     info->mallocs      = isend[4];
778   } else if (flag == MAT_GLOBAL_MAX) {
779     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
780 
781     info->nz_used      = irecv[0];
782     info->nz_allocated = irecv[1];
783     info->nz_unneeded  = irecv[2];
784     info->memory       = irecv[3];
785     info->mallocs      = irecv[4];
786   } else if (flag == MAT_GLOBAL_SUM) {
787     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
788 
789     info->nz_used      = irecv[0];
790     info->nz_allocated = irecv[1];
791     info->nz_unneeded  = irecv[2];
792     info->memory       = irecv[3];
793     info->mallocs      = irecv[4];
794   }
795   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
796   info->fill_ratio_needed = 0;
797   info->factor_mallocs    = 0;
798   PetscFunctionReturn(0);
799 }
800 
801 #undef __FUNCT__
802 #define __FUNCT__ "MatSetOption_MPIDense"
803 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
804 {
805   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
806   PetscErrorCode ierr;
807 
808   PetscFunctionBegin;
809   switch (op) {
810   case MAT_NEW_NONZERO_LOCATIONS:
811   case MAT_NEW_NONZERO_LOCATION_ERR:
812   case MAT_NEW_NONZERO_ALLOCATION_ERR:
813     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
814     break;
815   case MAT_ROW_ORIENTED:
816     a->roworiented = flg;
817 
818     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
819     break;
820   case MAT_NEW_DIAGONALS:
821   case MAT_KEEP_NONZERO_PATTERN:
822   case MAT_USE_HASH_TABLE:
823     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
824     break;
825   case MAT_IGNORE_OFF_PROC_ENTRIES:
826     a->donotstash = flg;
827     break;
828   case MAT_SYMMETRIC:
829   case MAT_STRUCTURALLY_SYMMETRIC:
830   case MAT_HERMITIAN:
831   case MAT_SYMMETRY_ETERNAL:
832   case MAT_IGNORE_LOWER_TRIANGULAR:
833     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
834     break;
835   default:
836     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
837   }
838   PetscFunctionReturn(0);
839 }
840 
841 
842 #undef __FUNCT__
843 #define __FUNCT__ "MatDiagonalScale_MPIDense"
844 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
845 {
846   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
847   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
848   PetscScalar    *l,*r,x,*v;
849   PetscErrorCode ierr;
850   PetscInt       i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n;
851 
852   PetscFunctionBegin;
853   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
854   if (ll) {
855     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
856     if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
857     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
858     for (i=0; i<m; i++) {
859       x = l[i];
860       v = mat->v + i;
861       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
862     }
863     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
864     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
865   }
866   if (rr) {
867     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
868     if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
869     ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
870     ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
871     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
872     for (i=0; i<n; i++) {
873       x = r[i];
874       v = mat->v + i*m;
875       for (j=0; j<m; j++) (*v++) *= x;
876     }
877     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
878     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
879   }
880   PetscFunctionReturn(0);
881 }
882 
883 #undef __FUNCT__
884 #define __FUNCT__ "MatNorm_MPIDense"
885 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
886 {
887   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
888   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
889   PetscErrorCode ierr;
890   PetscInt       i,j;
891   PetscReal      sum = 0.0;
892   PetscScalar    *v  = mat->v;
893 
894   PetscFunctionBegin;
895   if (mdn->size == 1) {
896     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
897   } else {
898     if (type == NORM_FROBENIUS) {
899       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
900         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
901       }
902       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
903       *nrm = PetscSqrtReal(*nrm);
904       ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr);
905     } else if (type == NORM_1) {
906       PetscReal *tmp,*tmp2;
907       ierr = PetscMalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr);
908       ierr = PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
909       ierr = PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
910       *nrm = 0.0;
911       v    = mat->v;
912       for (j=0; j<mdn->A->cmap->n; j++) {
913         for (i=0; i<mdn->A->rmap->n; i++) {
914           tmp[j] += PetscAbsScalar(*v);  v++;
915         }
916       }
917       ierr = MPI_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
918       for (j=0; j<A->cmap->N; j++) {
919         if (tmp2[j] > *nrm) *nrm = tmp2[j];
920       }
921       ierr = PetscFree2(tmp,tmp);CHKERRQ(ierr);
922       ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr);
923     } else if (type == NORM_INFINITY) { /* max row norm */
924       PetscReal ntemp;
925       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
926       ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
927     } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
928   }
929   PetscFunctionReturn(0);
930 }
931 
932 #undef __FUNCT__
933 #define __FUNCT__ "MatTranspose_MPIDense"
934 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
935 {
936   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
937   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
938   Mat            B;
939   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
940   PetscErrorCode ierr;
941   PetscInt       j,i;
942   PetscScalar    *v;
943 
944   PetscFunctionBegin;
945   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports square matrix only in-place");
946   if (reuse == MAT_INITIAL_MATRIX || A == *matout) {
947     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
948     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
949     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
950     ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr);
951   } else {
952     B = *matout;
953   }
954 
955   m    = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v;
956   ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr);
957   for (i=0; i<m; i++) rwork[i] = rstart + i;
958   for (j=0; j<n; j++) {
959     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
960     v   += m;
961   }
962   ierr = PetscFree(rwork);CHKERRQ(ierr);
963   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
964   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
965   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
966     *matout = B;
967   } else {
968     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
969   }
970   PetscFunctionReturn(0);
971 }
972 
973 
974 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
975 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
976 
977 #undef __FUNCT__
978 #define __FUNCT__ "MatSetUp_MPIDense"
979 PetscErrorCode MatSetUp_MPIDense(Mat A)
980 {
981   PetscErrorCode ierr;
982 
983   PetscFunctionBegin;
984   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
985   PetscFunctionReturn(0);
986 }
987 
988 #undef __FUNCT__
989 #define __FUNCT__ "MatAXPY_MPIDense"
990 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
991 {
992   PetscErrorCode ierr;
993   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
994 
995   PetscFunctionBegin;
996   ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr);
997   PetscFunctionReturn(0);
998 }
999 
1000 #undef __FUNCT__
1001 #define __FUNCT__ "MatConjugate_MPIDense"
1002 PetscErrorCode  MatConjugate_MPIDense(Mat mat)
1003 {
1004   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
1005   PetscErrorCode ierr;
1006 
1007   PetscFunctionBegin;
1008   ierr = MatConjugate(a->A);CHKERRQ(ierr);
1009   PetscFunctionReturn(0);
1010 }
1011 
1012 #undef __FUNCT__
1013 #define __FUNCT__ "MatRealPart_MPIDense"
1014 PetscErrorCode MatRealPart_MPIDense(Mat A)
1015 {
1016   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1017   PetscErrorCode ierr;
1018 
1019   PetscFunctionBegin;
1020   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1021   PetscFunctionReturn(0);
1022 }
1023 
1024 #undef __FUNCT__
1025 #define __FUNCT__ "MatImaginaryPart_MPIDense"
1026 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1027 {
1028   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1029   PetscErrorCode ierr;
1030 
1031   PetscFunctionBegin;
1032   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1033   PetscFunctionReturn(0);
1034 }
1035 
1036 extern PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*);
1037 #undef __FUNCT__
1038 #define __FUNCT__ "MatGetColumnNorms_MPIDense"
1039 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms)
1040 {
1041   PetscErrorCode ierr;
1042   PetscInt       i,n;
1043   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
1044   PetscReal      *work;
1045 
1046   PetscFunctionBegin;
1047   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
1048   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
1049   ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr);
1050   if (type == NORM_2) {
1051     for (i=0; i<n; i++) work[i] *= work[i];
1052   }
1053   if (type == NORM_INFINITY) {
1054     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
1055   } else {
1056     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
1057   }
1058   ierr = PetscFree(work);CHKERRQ(ierr);
1059   if (type == NORM_2) {
1060     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
1061   }
1062   PetscFunctionReturn(0);
1063 }
1064 
1065 #undef __FUNCT__
1066 #define __FUNCT__ "MatSetRandom_MPIDense"
1067 static PetscErrorCode  MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1068 {
1069   Mat_MPIDense   *d = (Mat_MPIDense*)x->data;
1070   PetscErrorCode ierr;
1071   PetscScalar    *a;
1072   PetscInt       m,n,i;
1073 
1074   PetscFunctionBegin;
1075   ierr = MatGetSize(d->A,&m,&n);CHKERRQ(ierr);
1076   ierr = MatDenseGetArray(d->A,&a);CHKERRQ(ierr);
1077   for (i=0; i<m*n; i++) {
1078     ierr = PetscRandomGetValue(rctx,a+i);CHKERRQ(ierr);
1079   }
1080   ierr = MatDenseRestoreArray(d->A,&a);CHKERRQ(ierr);
1081   PetscFunctionReturn(0);
1082 }
1083 
1084 /* -------------------------------------------------------------------*/
1085 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1086                                         MatGetRow_MPIDense,
1087                                         MatRestoreRow_MPIDense,
1088                                         MatMult_MPIDense,
1089                                 /*  4*/ MatMultAdd_MPIDense,
1090                                         MatMultTranspose_MPIDense,
1091                                         MatMultTransposeAdd_MPIDense,
1092                                         0,
1093                                         0,
1094                                         0,
1095                                 /* 10*/ 0,
1096                                         0,
1097                                         0,
1098                                         0,
1099                                         MatTranspose_MPIDense,
1100                                 /* 15*/ MatGetInfo_MPIDense,
1101                                         MatEqual_MPIDense,
1102                                         MatGetDiagonal_MPIDense,
1103                                         MatDiagonalScale_MPIDense,
1104                                         MatNorm_MPIDense,
1105                                 /* 20*/ MatAssemblyBegin_MPIDense,
1106                                         MatAssemblyEnd_MPIDense,
1107                                         MatSetOption_MPIDense,
1108                                         MatZeroEntries_MPIDense,
1109                                 /* 24*/ MatZeroRows_MPIDense,
1110                                         0,
1111                                         0,
1112                                         0,
1113                                         0,
1114                                 /* 29*/ MatSetUp_MPIDense,
1115                                         0,
1116                                         0,
1117                                         0,
1118                                         0,
1119                                 /* 34*/ MatDuplicate_MPIDense,
1120                                         0,
1121                                         0,
1122                                         0,
1123                                         0,
1124                                 /* 39*/ MatAXPY_MPIDense,
1125                                         MatGetSubMatrices_MPIDense,
1126                                         0,
1127                                         MatGetValues_MPIDense,
1128                                         0,
1129                                 /* 44*/ 0,
1130                                         MatScale_MPIDense,
1131                                         0,
1132                                         0,
1133                                         0,
1134                                 /* 49*/ MatSetRandom_MPIDense,
1135                                         0,
1136                                         0,
1137                                         0,
1138                                         0,
1139                                 /* 54*/ 0,
1140                                         0,
1141                                         0,
1142                                         0,
1143                                         0,
1144                                 /* 59*/ MatGetSubMatrix_MPIDense,
1145                                         MatDestroy_MPIDense,
1146                                         MatView_MPIDense,
1147                                         0,
1148                                         0,
1149                                 /* 64*/ 0,
1150                                         0,
1151                                         0,
1152                                         0,
1153                                         0,
1154                                 /* 69*/ 0,
1155                                         0,
1156                                         0,
1157                                         0,
1158                                         0,
1159                                 /* 74*/ 0,
1160                                         0,
1161                                         0,
1162                                         0,
1163                                         0,
1164                                 /* 79*/ 0,
1165                                         0,
1166                                         0,
1167                                         0,
1168                                 /* 83*/ MatLoad_MPIDense,
1169                                         0,
1170                                         0,
1171                                         0,
1172                                         0,
1173                                         0,
1174                                 /* 89*/
1175                                         0,
1176                                         0,
1177                                         0,
1178                                         0,
1179                                         0,
1180                                 /* 94*/ 0,
1181                                         0,
1182                                         0,
1183                                         0,
1184                                         0,
1185                                 /* 99*/ 0,
1186                                         0,
1187                                         0,
1188                                         MatConjugate_MPIDense,
1189                                         0,
1190                                 /*104*/ 0,
1191                                         MatRealPart_MPIDense,
1192                                         MatImaginaryPart_MPIDense,
1193                                         0,
1194                                         0,
1195                                 /*109*/ 0,
1196                                         0,
1197                                         0,
1198                                         0,
1199                                         0,
1200                                 /*114*/ 0,
1201                                         0,
1202                                         0,
1203                                         0,
1204                                         0,
1205                                 /*119*/ 0,
1206                                         0,
1207                                         0,
1208                                         0,
1209                                         0,
1210                                 /*124*/ 0,
1211                                         MatGetColumnNorms_MPIDense,
1212                                         0,
1213                                         0,
1214                                         0,
1215                                 /*129*/ 0,
1216                                         0,
1217                                         0,
1218                                         0,
1219                                         0,
1220                                 /*134*/ 0,
1221                                         0,
1222                                         0,
1223                                         0,
1224                                         0,
1225                                 /*139*/ 0,
1226                                         0,
1227                                         0
1228 };
1229 
1230 #undef __FUNCT__
1231 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense"
1232 PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1233 {
1234   Mat_MPIDense   *a;
1235   PetscErrorCode ierr;
1236 
1237   PetscFunctionBegin;
1238   mat->preallocated = PETSC_TRUE;
1239   /* Note:  For now, when data is specified above, this assumes the user correctly
1240    allocates the local dense storage space.  We should add error checking. */
1241 
1242   a       = (Mat_MPIDense*)mat->data;
1243   ierr    = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
1244   ierr    = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
1245   a->nvec = mat->cmap->n;
1246 
1247   ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1248   ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr);
1249   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
1250   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1251   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1252   PetscFunctionReturn(0);
1253 }
1254 
1255 #undef __FUNCT__
1256 #define __FUNCT__ "MatCreate_MPIDense"
1257 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1258 {
1259   Mat_MPIDense   *a;
1260   PetscErrorCode ierr;
1261 
1262   PetscFunctionBegin;
1263   ierr      = PetscNewLog(mat,&a);CHKERRQ(ierr);
1264   mat->data = (void*)a;
1265   ierr      = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1266 
1267   mat->insertmode = NOT_SET_VALUES;
1268   ierr            = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);CHKERRQ(ierr);
1269   ierr            = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);CHKERRQ(ierr);
1270 
1271   /* build cache for off array entries formed */
1272   a->donotstash = PETSC_FALSE;
1273 
1274   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr);
1275 
1276   /* stuff used for matrix vector multiply */
1277   a->lvec        = 0;
1278   a->Mvctx       = 0;
1279   a->roworiented = PETSC_TRUE;
1280 
1281   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr);
1282   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr);
1283 
1284   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr);
1285   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1286   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1287   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1288   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1289 
1290   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",MatTransposeMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1291   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",MatTransposeMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1292   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",MatTransposeMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1293   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1294   PetscFunctionReturn(0);
1295 }
1296 
1297 /*MC
1298    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1299 
1300    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1301    and MATMPIDENSE otherwise.
1302 
1303    Options Database Keys:
1304 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1305 
1306   Level: beginner
1307 
1308 
1309 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1310 M*/
1311 
1312 #undef __FUNCT__
1313 #define __FUNCT__ "MatMPIDenseSetPreallocation"
1314 /*@C
1315    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1316 
1317    Not collective
1318 
1319    Input Parameters:
1320 .  A - the matrix
1321 -  data - optional location of matrix data.  Set data=NULL for PETSc
1322    to control all matrix memory allocation.
1323 
1324    Notes:
1325    The dense format is fully compatible with standard Fortran 77
1326    storage by columns.
1327 
1328    The data input variable is intended primarily for Fortran programmers
1329    who wish to allocate their own matrix memory space.  Most users should
1330    set data=NULL.
1331 
1332    Level: intermediate
1333 
1334 .keywords: matrix,dense, parallel
1335 
1336 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1337 @*/
1338 PetscErrorCode  MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data)
1339 {
1340   PetscErrorCode ierr;
1341 
1342   PetscFunctionBegin;
1343   ierr = PetscTryMethod(mat,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(mat,data));CHKERRQ(ierr);
1344   PetscFunctionReturn(0);
1345 }
1346 
1347 #undef __FUNCT__
1348 #define __FUNCT__ "MatCreateDense"
1349 /*@C
1350    MatCreateDense - Creates a parallel matrix in dense format.
1351 
1352    Collective on MPI_Comm
1353 
1354    Input Parameters:
1355 +  comm - MPI communicator
1356 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1357 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1358 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1359 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1360 -  data - optional location of matrix data.  Set data=NULL (NULL_SCALAR for Fortran users) for PETSc
1361    to control all matrix memory allocation.
1362 
1363    Output Parameter:
1364 .  A - the matrix
1365 
1366    Notes:
1367    The dense format is fully compatible with standard Fortran 77
1368    storage by columns.
1369 
1370    The data input variable is intended primarily for Fortran programmers
1371    who wish to allocate their own matrix memory space.  Most users should
1372    set data=NULL (NULL_SCALAR for Fortran users).
1373 
1374    The user MUST specify either the local or global matrix dimensions
1375    (possibly both).
1376 
1377    Level: intermediate
1378 
1379 .keywords: matrix,dense, parallel
1380 
1381 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1382 @*/
1383 PetscErrorCode  MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1384 {
1385   PetscErrorCode ierr;
1386   PetscMPIInt    size;
1387 
1388   PetscFunctionBegin;
1389   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1390   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1391   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1392   if (size > 1) {
1393     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1394     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1395   } else {
1396     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1397     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1398   }
1399   PetscFunctionReturn(0);
1400 }
1401 
1402 #undef __FUNCT__
1403 #define __FUNCT__ "MatDuplicate_MPIDense"
1404 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1405 {
1406   Mat            mat;
1407   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
1408   PetscErrorCode ierr;
1409 
1410   PetscFunctionBegin;
1411   *newmat = 0;
1412   ierr    = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr);
1413   ierr    = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1414   ierr    = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1415   a       = (Mat_MPIDense*)mat->data;
1416   ierr    = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1417 
1418   mat->factortype   = A->factortype;
1419   mat->assembled    = PETSC_TRUE;
1420   mat->preallocated = PETSC_TRUE;
1421 
1422   a->size         = oldmat->size;
1423   a->rank         = oldmat->rank;
1424   mat->insertmode = NOT_SET_VALUES;
1425   a->nvec         = oldmat->nvec;
1426   a->donotstash   = oldmat->donotstash;
1427 
1428   ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr);
1429   ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr);
1430 
1431   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1432   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1433   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1434 
1435   *newmat = mat;
1436   PetscFunctionReturn(0);
1437 }
1438 
1439 #undef __FUNCT__
1440 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile"
1441 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat,PetscInt sizesset)
1442 {
1443   PetscErrorCode ierr;
1444   PetscMPIInt    rank,size;
1445   PetscInt       *rowners,i,m,nz,j;
1446   PetscScalar    *array,*vals,*vals_ptr;
1447 
1448   PetscFunctionBegin;
1449   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1450   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1451 
1452   /* determine ownership of all rows */
1453   if (newmat->rmap->n < 0) m = M/size + ((M % size) > rank);
1454   else m = newmat->rmap->n;
1455   ierr       = PetscMalloc1((size+2),&rowners);CHKERRQ(ierr);
1456   ierr       = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
1457   rowners[0] = 0;
1458   for (i=2; i<=size; i++) {
1459     rowners[i] += rowners[i-1];
1460   }
1461 
1462   if (!sizesset) {
1463     ierr = MatSetSizes(newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1464   }
1465   ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
1466   ierr = MatDenseGetArray(newmat,&array);CHKERRQ(ierr);
1467 
1468   if (!rank) {
1469     ierr = PetscMalloc1(m*N,&vals);CHKERRQ(ierr);
1470 
1471     /* read in my part of the matrix numerical values  */
1472     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1473 
1474     /* insert into matrix-by row (this is why cannot directly read into array */
1475     vals_ptr = vals;
1476     for (i=0; i<m; i++) {
1477       for (j=0; j<N; j++) {
1478         array[i + j*m] = *vals_ptr++;
1479       }
1480     }
1481 
1482     /* read in other processors and ship out */
1483     for (i=1; i<size; i++) {
1484       nz   = (rowners[i+1] - rowners[i])*N;
1485       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1486       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr);
1487     }
1488   } else {
1489     /* receive numeric values */
1490     ierr = PetscMalloc1(m*N,&vals);CHKERRQ(ierr);
1491 
1492     /* receive message of values*/
1493     ierr = MPIULong_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr);
1494 
1495     /* insert into matrix-by row (this is why cannot directly read into array */
1496     vals_ptr = vals;
1497     for (i=0; i<m; i++) {
1498       for (j=0; j<N; j++) {
1499         array[i + j*m] = *vals_ptr++;
1500       }
1501     }
1502   }
1503   ierr = MatDenseRestoreArray(newmat,&array);CHKERRQ(ierr);
1504   ierr = PetscFree(rowners);CHKERRQ(ierr);
1505   ierr = PetscFree(vals);CHKERRQ(ierr);
1506   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1507   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1508   PetscFunctionReturn(0);
1509 }
1510 
1511 #undef __FUNCT__
1512 #define __FUNCT__ "MatLoad_MPIDense"
1513 PetscErrorCode MatLoad_MPIDense(Mat newmat,PetscViewer viewer)
1514 {
1515   PetscScalar    *vals,*svals;
1516   MPI_Comm       comm;
1517   MPI_Status     status;
1518   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz;
1519   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1520   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1521   PetscInt       i,nz,j,rstart,rend,sizesset=1,grows,gcols;
1522   int            fd;
1523   PetscErrorCode ierr;
1524 
1525   PetscFunctionBegin;
1526   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
1527   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1528   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1529   if (!rank) {
1530     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1531     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
1532     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1533   }
1534   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
1535 
1536   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1537   M    = header[1]; N = header[2]; nz = header[3];
1538 
1539   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
1540   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
1541   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
1542 
1543   /* If global sizes are set, check if they are consistent with that given in the file */
1544   if (sizesset) {
1545     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
1546   }
1547   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
1548   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
1549 
1550   /*
1551        Handle case where matrix is stored on disk as a dense matrix
1552   */
1553   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1554     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat,sizesset);CHKERRQ(ierr);
1555     PetscFunctionReturn(0);
1556   }
1557 
1558   /* determine ownership of all rows */
1559   if (newmat->rmap->n < 0) {
1560     ierr = PetscMPIIntCast(M/size + ((M % size) > rank),&m);CHKERRQ(ierr);
1561   } else {
1562     ierr = PetscMPIIntCast(newmat->rmap->n,&m);CHKERRQ(ierr);
1563   }
1564   ierr       = PetscMalloc1((size+2),&rowners);CHKERRQ(ierr);
1565   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1566   rowners[0] = 0;
1567   for (i=2; i<=size; i++) {
1568     rowners[i] += rowners[i-1];
1569   }
1570   rstart = rowners[rank];
1571   rend   = rowners[rank+1];
1572 
1573   /* distribute row lengths to all processors */
1574   ierr = PetscMalloc2(rend-rstart,&ourlens,rend-rstart,&offlens);CHKERRQ(ierr);
1575   if (!rank) {
1576     ierr = PetscMalloc1(M,&rowlengths);CHKERRQ(ierr);
1577     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1578     ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr);
1579     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1580     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1581     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1582   } else {
1583     ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1584   }
1585 
1586   if (!rank) {
1587     /* calculate the number of nonzeros on each processor */
1588     ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr);
1589     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1590     for (i=0; i<size; i++) {
1591       for (j=rowners[i]; j< rowners[i+1]; j++) {
1592         procsnz[i] += rowlengths[j];
1593       }
1594     }
1595     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1596 
1597     /* determine max buffer needed and allocate it */
1598     maxnz = 0;
1599     for (i=0; i<size; i++) {
1600       maxnz = PetscMax(maxnz,procsnz[i]);
1601     }
1602     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
1603 
1604     /* read in my part of the matrix column indices  */
1605     nz   = procsnz[0];
1606     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
1607     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1608 
1609     /* read in every one elses and ship off */
1610     for (i=1; i<size; i++) {
1611       nz   = procsnz[i];
1612       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1613       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1614     }
1615     ierr = PetscFree(cols);CHKERRQ(ierr);
1616   } else {
1617     /* determine buffer space needed for message */
1618     nz = 0;
1619     for (i=0; i<m; i++) {
1620       nz += ourlens[i];
1621     }
1622     ierr = PetscMalloc1((nz+1),&mycols);CHKERRQ(ierr);
1623 
1624     /* receive message of column indices*/
1625     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1626     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1627     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1628   }
1629 
1630   /* loop over local rows, determining number of off diagonal entries */
1631   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
1632   jj   = 0;
1633   for (i=0; i<m; i++) {
1634     for (j=0; j<ourlens[i]; j++) {
1635       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1636       jj++;
1637     }
1638   }
1639 
1640   /* create our matrix */
1641   for (i=0; i<m; i++) ourlens[i] -= offlens[i];
1642 
1643   if (!sizesset) {
1644     ierr = MatSetSizes(newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1645   }
1646   ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
1647   for (i=0; i<m; i++) ourlens[i] += offlens[i];
1648 
1649   if (!rank) {
1650     ierr = PetscMalloc1(maxnz,&vals);CHKERRQ(ierr);
1651 
1652     /* read in my part of the matrix numerical values  */
1653     nz   = procsnz[0];
1654     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1655 
1656     /* insert into matrix */
1657     jj      = rstart;
1658     smycols = mycols;
1659     svals   = vals;
1660     for (i=0; i<m; i++) {
1661       ierr     = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1662       smycols += ourlens[i];
1663       svals   += ourlens[i];
1664       jj++;
1665     }
1666 
1667     /* read in other processors and ship out */
1668     for (i=1; i<size; i++) {
1669       nz   = procsnz[i];
1670       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1671       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
1672     }
1673     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1674   } else {
1675     /* receive numeric values */
1676     ierr = PetscMalloc1((nz+1),&vals);CHKERRQ(ierr);
1677 
1678     /* receive message of values*/
1679     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
1680     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1681     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1682 
1683     /* insert into matrix */
1684     jj      = rstart;
1685     smycols = mycols;
1686     svals   = vals;
1687     for (i=0; i<m; i++) {
1688       ierr     = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1689       smycols += ourlens[i];
1690       svals   += ourlens[i];
1691       jj++;
1692     }
1693   }
1694   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
1695   ierr = PetscFree(vals);CHKERRQ(ierr);
1696   ierr = PetscFree(mycols);CHKERRQ(ierr);
1697   ierr = PetscFree(rowners);CHKERRQ(ierr);
1698 
1699   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1700   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1701   PetscFunctionReturn(0);
1702 }
1703 
1704 #undef __FUNCT__
1705 #define __FUNCT__ "MatEqual_MPIDense"
1706 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool  *flag)
1707 {
1708   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
1709   Mat            a,b;
1710   PetscBool      flg;
1711   PetscErrorCode ierr;
1712 
1713   PetscFunctionBegin;
1714   a    = matA->A;
1715   b    = matB->A;
1716   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
1717   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1718   PetscFunctionReturn(0);
1719 }
1720 
1721