xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 600fe468acc2ac2777298392d64183a7b36d2e25)
1 #define PETSCMAT_DLL
2 
3 /*
4    Basic functions for basic parallel dense matrices.
5 */
6 
7 #include "src/mat/impls/dense/mpi/mpidense.h"    /*I   "petscmat.h"  I*/
8 
9 #undef __FUNCT__
10 #define __FUNCT__ "MatLUFactorSymbolic_MPIDense"
11 PetscErrorCode MatLUFactorSymbolic_MPIDense(Mat A,IS row,IS col,MatFactorInfo *info,Mat *fact)
12 {
13   PetscErrorCode ierr;
14 
15   PetscFunctionBegin;
16   ierr = MatDuplicate(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr);
17   PetscFunctionReturn(0);
18 }
19 
20 #undef __FUNCT__
21 #define __FUNCT__ "MatCholeskyFactorSymbolic_MPIDense"
22 PetscErrorCode MatCholeskyFactorSymbolic_MPIDense(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
23 {
24   PetscErrorCode ierr;
25 
26   PetscFunctionBegin;
27   ierr = MatDuplicate(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr);
28   PetscFunctionReturn(0);
29 }
30 
31 #undef __FUNCT__
32 #define __FUNCT__ "MatDenseGetLocalMatrix"
33 /*@
34 
35       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
36               matrix that represents the operator. For sequential matrices it returns itself.
37 
38     Input Parameter:
39 .      A - the Seq or MPI dense matrix
40 
41     Output Parameter:
42 .      B - the inner matrix
43 
44     Level: intermediate
45 
46 @*/
47 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
48 {
49   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
50   PetscErrorCode ierr;
51   PetscTruth     flg;
52   PetscMPIInt    size;
53 
54   PetscFunctionBegin;
55   ierr = PetscTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
56   if (!flg) {  /* this check sucks! */
57     ierr = PetscTypeCompare((PetscObject)A,MATDENSE,&flg);CHKERRQ(ierr);
58     if (flg) {
59       ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
60       if (size == 1) flg = PETSC_FALSE;
61     }
62   }
63   if (flg) {
64     *B = mat->A;
65   } else {
66     *B = A;
67   }
68   PetscFunctionReturn(0);
69 }
70 
71 #undef __FUNCT__
72 #define __FUNCT__ "MatGetRow_MPIDense"
73 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
74 {
75   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
76   PetscErrorCode ierr;
77   PetscInt       lrow,rstart = A->rmap.rstart,rend = A->rmap.rend;
78 
79   PetscFunctionBegin;
80   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows")
81   lrow = row - rstart;
82   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr);
83   PetscFunctionReturn(0);
84 }
85 
86 #undef __FUNCT__
87 #define __FUNCT__ "MatRestoreRow_MPIDense"
88 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
89 {
90   PetscErrorCode ierr;
91 
92   PetscFunctionBegin;
93   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
94   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
95   PetscFunctionReturn(0);
96 }
97 
98 EXTERN_C_BEGIN
99 #undef __FUNCT__
100 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense"
101 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B)
102 {
103   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
104   PetscErrorCode ierr;
105   PetscInt       m = A->rmap.n,rstart = A->rmap.rstart;
106   PetscScalar    *array;
107   MPI_Comm       comm;
108 
109   PetscFunctionBegin;
110   if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported.");
111 
112   /* The reuse aspect is not implemented efficiently */
113   if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);}
114 
115   ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
116   ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr);
117   ierr = MatCreate(comm,B);CHKERRQ(ierr);
118   ierr = MatSetSizes(*B,m,m,m,m);CHKERRQ(ierr);
119   ierr = MatSetType(*B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
120   ierr = MatSeqDenseSetPreallocation(*B,array+m*rstart);CHKERRQ(ierr);
121   ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr);
122   ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
123   ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
124 
125   *iscopy = PETSC_TRUE;
126   PetscFunctionReturn(0);
127 }
128 EXTERN_C_END
129 
130 #undef __FUNCT__
131 #define __FUNCT__ "MatSetValues_MPIDense"
132 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
133 {
134   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
135   PetscErrorCode ierr;
136   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend,row;
137   PetscTruth     roworiented = A->roworiented;
138 
139   PetscFunctionBegin;
140   for (i=0; i<m; i++) {
141     if (idxm[i] < 0) continue;
142     if (idxm[i] >= mat->rmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
143     if (idxm[i] >= rstart && idxm[i] < rend) {
144       row = idxm[i] - rstart;
145       if (roworiented) {
146         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
147       } else {
148         for (j=0; j<n; j++) {
149           if (idxn[j] < 0) continue;
150           if (idxn[j] >= mat->cmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
151           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
152         }
153       }
154     } else {
155       if (!A->donotstash) {
156         if (roworiented) {
157           ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr);
158         } else {
159           ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr);
160         }
161       }
162     }
163   }
164   PetscFunctionReturn(0);
165 }
166 
167 #undef __FUNCT__
168 #define __FUNCT__ "MatGetValues_MPIDense"
169 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
170 {
171   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
172   PetscErrorCode ierr;
173   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend,row;
174 
175   PetscFunctionBegin;
176   for (i=0; i<m; i++) {
177     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
178     if (idxm[i] >= mat->rmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
179     if (idxm[i] >= rstart && idxm[i] < rend) {
180       row = idxm[i] - rstart;
181       for (j=0; j<n; j++) {
182         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
183         if (idxn[j] >= mat->cmap.N) {
184           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
185         }
186         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
187       }
188     } else {
189       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
190     }
191   }
192   PetscFunctionReturn(0);
193 }
194 
195 #undef __FUNCT__
196 #define __FUNCT__ "MatGetArray_MPIDense"
197 PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[])
198 {
199   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
200   PetscErrorCode ierr;
201 
202   PetscFunctionBegin;
203   ierr = MatGetArray(a->A,array);CHKERRQ(ierr);
204   PetscFunctionReturn(0);
205 }
206 
207 #undef __FUNCT__
208 #define __FUNCT__ "MatGetSubMatrix_MPIDense"
209 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B)
210 {
211   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data,*newmatd;
212   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
213   PetscErrorCode ierr;
214   PetscInt       i,j,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols;
215   PetscScalar    *av,*bv,*v = lmat->v;
216   Mat            newmat;
217 
218   PetscFunctionBegin;
219   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
220   ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr);
221   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
222   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
223 
224   /* No parallel redistribution currently supported! Should really check each index set
225      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
226      original matrix! */
227 
228   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
229   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
230 
231   /* Check submatrix call */
232   if (scall == MAT_REUSE_MATRIX) {
233     /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
234     /* Really need to test rows and column sizes! */
235     newmat = *B;
236   } else {
237     /* Create and fill new matrix */
238     ierr = MatCreate(((PetscObject)A)->comm,&newmat);CHKERRQ(ierr);
239     ierr = MatSetSizes(newmat,nrows,cs,PETSC_DECIDE,ncols);CHKERRQ(ierr);
240     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
241     ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr);
242   }
243 
244   /* Now extract the data pointers and do the copy, column at a time */
245   newmatd = (Mat_MPIDense*)newmat->data;
246   bv      = ((Mat_SeqDense *)newmatd->A->data)->v;
247 
248   for (i=0; i<ncols; i++) {
249     av = v + nlrows*icol[i];
250     for (j=0; j<nrows; j++) {
251       *bv++ = av[irow[j] - rstart];
252     }
253   }
254 
255   /* Assemble the matrices so that the correct flags are set */
256   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
257   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
258 
259   /* Free work space */
260   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
261   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
262   *B = newmat;
263   PetscFunctionReturn(0);
264 }
265 
266 #undef __FUNCT__
267 #define __FUNCT__ "MatRestoreArray_MPIDense"
268 PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[])
269 {
270   PetscFunctionBegin;
271   PetscFunctionReturn(0);
272 }
273 
274 #undef __FUNCT__
275 #define __FUNCT__ "MatAssemblyBegin_MPIDense"
276 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
277 {
278   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
279   MPI_Comm       comm = ((PetscObject)mat)->comm;
280   PetscErrorCode ierr;
281   PetscInt       nstash,reallocs;
282   InsertMode     addv;
283 
284   PetscFunctionBegin;
285   /* make sure all processors are either in INSERTMODE or ADDMODE */
286   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
287   if (addv == (ADD_VALUES|INSERT_VALUES)) {
288     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
289   }
290   mat->insertmode = addv; /* in case this processor had no cache */
291 
292   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap.range);CHKERRQ(ierr);
293   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
294   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
295   PetscFunctionReturn(0);
296 }
297 
298 #undef __FUNCT__
299 #define __FUNCT__ "MatAssemblyEnd_MPIDense"
300 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
301 {
302   Mat_MPIDense    *mdn=(Mat_MPIDense*)mat->data;
303   PetscErrorCode  ierr;
304   PetscInt        i,*row,*col,flg,j,rstart,ncols;
305   PetscMPIInt     n;
306   PetscScalar     *val;
307   InsertMode      addv=mat->insertmode;
308 
309   PetscFunctionBegin;
310   /*  wait on receives */
311   while (1) {
312     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
313     if (!flg) break;
314 
315     for (i=0; i<n;) {
316       /* Now identify the consecutive vals belonging to the same row */
317       for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
318       if (j < n) ncols = j-i;
319       else       ncols = n-i;
320       /* Now assemble all these values with a single function call */
321       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
322       i = j;
323     }
324   }
325   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
326 
327   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
328   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
329 
330   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
331     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
332   }
333   PetscFunctionReturn(0);
334 }
335 
336 #undef __FUNCT__
337 #define __FUNCT__ "MatZeroEntries_MPIDense"
338 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
339 {
340   PetscErrorCode ierr;
341   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
342 
343   PetscFunctionBegin;
344   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
345   PetscFunctionReturn(0);
346 }
347 
348 /* the code does not do the diagonal entries correctly unless the
349    matrix is square and the column and row owerships are identical.
350    This is a BUG. The only way to fix it seems to be to access
351    mdn->A and mdn->B directly and not through the MatZeroRows()
352    routine.
353 */
354 #undef __FUNCT__
355 #define __FUNCT__ "MatZeroRows_MPIDense"
356 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
357 {
358   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
359   PetscErrorCode ierr;
360   PetscInt       i,*owners = A->rmap.range;
361   PetscInt       *nprocs,j,idx,nsends;
362   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
363   PetscInt       *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source;
364   PetscInt       *lens,*lrows,*values;
365   PetscMPIInt    n,imdex,rank = l->rank,size = l->size;
366   MPI_Comm       comm = ((PetscObject)A)->comm;
367   MPI_Request    *send_waits,*recv_waits;
368   MPI_Status     recv_status,*send_status;
369   PetscTruth     found;
370 
371   PetscFunctionBegin;
372   /*  first count number of contributors to each processor */
373   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
374   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
375   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
376   for (i=0; i<N; i++) {
377     idx = rows[i];
378     found = PETSC_FALSE;
379     for (j=0; j<size; j++) {
380       if (idx >= owners[j] && idx < owners[j+1]) {
381         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
382       }
383     }
384     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
385   }
386   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
387 
388   /* inform other processors of number of messages and max length*/
389   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
390 
391   /* post receives:   */
392   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
393   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
394   for (i=0; i<nrecvs; i++) {
395     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
396   }
397 
398   /* do sends:
399       1) starts[i] gives the starting index in svalues for stuff going to
400          the ith processor
401   */
402   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
403   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
404   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
405   starts[0]  = 0;
406   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
407   for (i=0; i<N; i++) {
408     svalues[starts[owner[i]]++] = rows[i];
409   }
410 
411   starts[0] = 0;
412   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
413   count = 0;
414   for (i=0; i<size; i++) {
415     if (nprocs[2*i+1]) {
416       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
417     }
418   }
419   ierr = PetscFree(starts);CHKERRQ(ierr);
420 
421   base = owners[rank];
422 
423   /*  wait on receives */
424   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
425   source = lens + nrecvs;
426   count  = nrecvs; slen = 0;
427   while (count) {
428     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
429     /* unpack receives into our local space */
430     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
431     source[imdex]  = recv_status.MPI_SOURCE;
432     lens[imdex]    = n;
433     slen += n;
434     count--;
435   }
436   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
437 
438   /* move the data into the send scatter */
439   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
440   count = 0;
441   for (i=0; i<nrecvs; i++) {
442     values = rvalues + i*nmax;
443     for (j=0; j<lens[i]; j++) {
444       lrows[count++] = values[j] - base;
445     }
446   }
447   ierr = PetscFree(rvalues);CHKERRQ(ierr);
448   ierr = PetscFree(lens);CHKERRQ(ierr);
449   ierr = PetscFree(owner);CHKERRQ(ierr);
450   ierr = PetscFree(nprocs);CHKERRQ(ierr);
451 
452   /* actually zap the local rows */
453   ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
454   ierr = PetscFree(lrows);CHKERRQ(ierr);
455 
456   /* wait on sends */
457   if (nsends) {
458     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
459     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
460     ierr = PetscFree(send_status);CHKERRQ(ierr);
461   }
462   ierr = PetscFree(send_waits);CHKERRQ(ierr);
463   ierr = PetscFree(svalues);CHKERRQ(ierr);
464 
465   PetscFunctionReturn(0);
466 }
467 
468 #undef __FUNCT__
469 #define __FUNCT__ "MatMult_MPIDense"
470 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
471 {
472   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
473   PetscErrorCode ierr;
474 
475   PetscFunctionBegin;
476   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
477   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
478   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
479   PetscFunctionReturn(0);
480 }
481 
482 #undef __FUNCT__
483 #define __FUNCT__ "MatMultAdd_MPIDense"
484 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
485 {
486   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
487   PetscErrorCode ierr;
488 
489   PetscFunctionBegin;
490   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
491   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
492   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
493   PetscFunctionReturn(0);
494 }
495 
496 #undef __FUNCT__
497 #define __FUNCT__ "MatMultTranspose_MPIDense"
498 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
499 {
500   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
501   PetscErrorCode ierr;
502   PetscScalar    zero = 0.0;
503 
504   PetscFunctionBegin;
505   ierr = VecSet(yy,zero);CHKERRQ(ierr);
506   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
507   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
508   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
509   PetscFunctionReturn(0);
510 }
511 
512 #undef __FUNCT__
513 #define __FUNCT__ "MatMultTransposeAdd_MPIDense"
514 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
515 {
516   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
517   PetscErrorCode ierr;
518 
519   PetscFunctionBegin;
520   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
521   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
522   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
523   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
524   PetscFunctionReturn(0);
525 }
526 
527 #undef __FUNCT__
528 #define __FUNCT__ "MatGetDiagonal_MPIDense"
529 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
530 {
531   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
532   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
533   PetscErrorCode ierr;
534   PetscInt       len,i,n,m = A->rmap.n,radd;
535   PetscScalar    *x,zero = 0.0;
536 
537   PetscFunctionBegin;
538   ierr = VecSet(v,zero);CHKERRQ(ierr);
539   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
540   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
541   if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
542   len  = PetscMin(a->A->rmap.n,a->A->cmap.n);
543   radd = A->rmap.rstart*m;
544   for (i=0; i<len; i++) {
545     x[i] = aloc->v[radd + i*m + i];
546   }
547   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
548   PetscFunctionReturn(0);
549 }
550 
551 #undef __FUNCT__
552 #define __FUNCT__ "MatDestroy_MPIDense"
553 PetscErrorCode MatDestroy_MPIDense(Mat mat)
554 {
555   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
556   PetscErrorCode ierr;
557 
558   PetscFunctionBegin;
559 
560 #if defined(PETSC_USE_LOG)
561   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N);
562 #endif
563   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
564   ierr = MatDestroy(mdn->A);CHKERRQ(ierr);
565   if (mdn->lvec)   {ierr = VecDestroy(mdn->lvec);CHKERRQ(ierr);}
566   if (mdn->Mvctx)  {ierr = VecScatterDestroy(mdn->Mvctx);CHKERRQ(ierr);}
567   if (mdn->factor) {
568     ierr = PetscFree(mdn->factor->temp);CHKERRQ(ierr);
569     ierr = PetscFree(mdn->factor->tag);CHKERRQ(ierr);
570     ierr = PetscFree(mdn->factor->pivots);CHKERRQ(ierr);
571     ierr = PetscFree(mdn->factor);CHKERRQ(ierr);
572   }
573   ierr = PetscFree(mdn);CHKERRQ(ierr);
574   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
575   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
576   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
577   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
578   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
579   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
580   PetscFunctionReturn(0);
581 }
582 
583 #undef __FUNCT__
584 #define __FUNCT__ "MatView_MPIDense_Binary"
585 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
586 {
587   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
588   PetscErrorCode    ierr;
589   PetscViewerFormat format;
590   int               fd;
591   PetscInt          header[4],mmax,N = mat->cmap.N,i,j,m,k;
592   PetscMPIInt       rank,tag  = ((PetscObject)viewer)->tag,size;
593   PetscScalar       *work,*v,*vv;
594   Mat_SeqDense      *a = (Mat_SeqDense*)mdn->A->data;
595   MPI_Status        status;
596 
597   PetscFunctionBegin;
598   if (mdn->size == 1) {
599     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
600   } else {
601     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
602     ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
603     ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
604 
605     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
606     if (format == PETSC_VIEWER_BINARY_NATIVE) {
607 
608       if (!rank) {
609 	/* store the matrix as a dense matrix */
610 	header[0] = MAT_FILE_COOKIE;
611 	header[1] = mat->rmap.N;
612 	header[2] = N;
613 	header[3] = MATRIX_BINARY_FORMAT_DENSE;
614 	ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
615 
616 	/* get largest work array needed for transposing array */
617         mmax = mat->rmap.n;
618         for (i=1; i<size; i++) {
619           mmax = PetscMax(mmax,mat->rmap.range[i+1] - mat->rmap.range[i]);
620         }
621 	ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&work);CHKERRQ(ierr);
622 
623 	/* write out local array, by rows */
624         m    = mat->rmap.n;
625 	v    = a->v;
626         for (j=0; j<N; j++) {
627           for (i=0; i<m; i++) {
628 	    work[j + i*N] = *v++;
629 	  }
630 	}
631 	ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
632         /* get largest work array to receive messages from other processes, excludes process zero */
633         mmax = 0;
634         for (i=1; i<size; i++) {
635           mmax = PetscMax(mmax,mat->rmap.range[i+1] - mat->rmap.range[i]);
636         }
637 	ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&vv);CHKERRQ(ierr);
638         v = vv;
639         for (k=1; k<size; k++) {
640           m    = mat->rmap.range[k+1] - mat->rmap.range[k];
641           ierr = MPI_Recv(v,m*N,MPIU_SCALAR,k,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
642 
643           for (j=0; j<N; j++) {
644             for (i=0; i<m; i++) {
645               work[j + i*N] = *v++;
646 	    }
647 	  }
648 	  ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
649         }
650         ierr = PetscFree(work);CHKERRQ(ierr);
651         ierr = PetscFree(vv);CHKERRQ(ierr);
652       } else {
653         ierr = MPI_Send(a->v,mat->rmap.n*mat->cmap.N,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
654       }
655     }
656   }
657   PetscFunctionReturn(0);
658 }
659 
660 #undef __FUNCT__
661 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket"
662 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
663 {
664   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
665   PetscErrorCode    ierr;
666   PetscMPIInt       size = mdn->size,rank = mdn->rank;
667   PetscViewerType   vtype;
668   PetscTruth        iascii,isdraw;
669   PetscViewer       sviewer;
670   PetscViewerFormat format;
671 
672   PetscFunctionBegin;
673   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
674   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
675   if (iascii) {
676     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
677     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
678     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
679       MatInfo info;
680       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
681       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap.n,
682                    (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
683       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
684       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
685       PetscFunctionReturn(0);
686     } else if (format == PETSC_VIEWER_ASCII_INFO) {
687       PetscFunctionReturn(0);
688     }
689   } else if (isdraw) {
690     PetscDraw  draw;
691     PetscTruth isnull;
692 
693     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
694     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
695     if (isnull) PetscFunctionReturn(0);
696   }
697 
698   if (size == 1) {
699     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
700   } else {
701     /* assemble the entire matrix onto first processor. */
702     Mat         A;
703     PetscInt    M = mat->rmap.N,N = mat->cmap.N,m,row,i,nz;
704     PetscInt    *cols;
705     PetscScalar *vals;
706 
707     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
708     if (!rank) {
709       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
710     } else {
711       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
712     }
713     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
714     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
715     ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL);
716     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
717 
718     /* Copy the matrix ... This isn't the most efficient means,
719        but it's quick for now */
720     A->insertmode = INSERT_VALUES;
721     row = mat->rmap.rstart; m = mdn->A->rmap.n;
722     for (i=0; i<m; i++) {
723       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
724       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
725       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
726       row++;
727     }
728 
729     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
730     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
731     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
732     if (!rank) {
733       ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
734     }
735     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
736     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
737     ierr = MatDestroy(A);CHKERRQ(ierr);
738   }
739   PetscFunctionReturn(0);
740 }
741 
742 #undef __FUNCT__
743 #define __FUNCT__ "MatView_MPIDense"
744 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
745 {
746   PetscErrorCode ierr;
747   PetscTruth     iascii,isbinary,isdraw,issocket;
748 
749   PetscFunctionBegin;
750 
751   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
752   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
753   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
754   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
755 
756   if (iascii || issocket || isdraw) {
757     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
758   } else if (isbinary) {
759     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
760   } else {
761     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name);
762   }
763   PetscFunctionReturn(0);
764 }
765 
766 #undef __FUNCT__
767 #define __FUNCT__ "MatGetInfo_MPIDense"
768 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
769 {
770   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
771   Mat            mdn = mat->A;
772   PetscErrorCode ierr;
773   PetscReal      isend[5],irecv[5];
774 
775   PetscFunctionBegin;
776   info->rows_global    = (double)A->rmap.N;
777   info->columns_global = (double)A->cmap.N;
778   info->rows_local     = (double)A->rmap.n;
779   info->columns_local  = (double)A->cmap.N;
780   info->block_size     = 1.0;
781   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
782   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
783   isend[3] = info->memory;  isend[4] = info->mallocs;
784   if (flag == MAT_LOCAL) {
785     info->nz_used      = isend[0];
786     info->nz_allocated = isend[1];
787     info->nz_unneeded  = isend[2];
788     info->memory       = isend[3];
789     info->mallocs      = isend[4];
790   } else if (flag == MAT_GLOBAL_MAX) {
791     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr);
792     info->nz_used      = irecv[0];
793     info->nz_allocated = irecv[1];
794     info->nz_unneeded  = irecv[2];
795     info->memory       = irecv[3];
796     info->mallocs      = irecv[4];
797   } else if (flag == MAT_GLOBAL_SUM) {
798     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
799     info->nz_used      = irecv[0];
800     info->nz_allocated = irecv[1];
801     info->nz_unneeded  = irecv[2];
802     info->memory       = irecv[3];
803     info->mallocs      = irecv[4];
804   }
805   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
806   info->fill_ratio_needed = 0;
807   info->factor_mallocs    = 0;
808   PetscFunctionReturn(0);
809 }
810 
811 #undef __FUNCT__
812 #define __FUNCT__ "MatSetOption_MPIDense"
813 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscTruth flg)
814 {
815   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
816   PetscErrorCode ierr;
817 
818   PetscFunctionBegin;
819   switch (op) {
820   case MAT_NEW_NONZERO_LOCATIONS:
821   case MAT_NEW_NONZERO_LOCATION_ERR:
822   case MAT_NEW_NONZERO_ALLOCATION_ERR:
823     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
824     break;
825   case MAT_ROW_ORIENTED:
826     a->roworiented = flg;
827     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
828     break;
829   case MAT_NEW_DIAGONALS:
830   case MAT_USE_HASH_TABLE:
831     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
832     break;
833   case MAT_IGNORE_OFF_PROC_ENTRIES:
834     a->donotstash = flg;
835     break;
836   case MAT_SYMMETRIC:
837   case MAT_STRUCTURALLY_SYMMETRIC:
838   case MAT_HERMITIAN:
839   case MAT_SYMMETRY_ETERNAL:
840   case MAT_IGNORE_LOWER_TRIANGULAR:
841     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
842     break;
843   default:
844     SETERRQ1(PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
845   }
846   PetscFunctionReturn(0);
847 }
848 
849 
850 #undef __FUNCT__
851 #define __FUNCT__ "MatDiagonalScale_MPIDense"
852 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
853 {
854   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
855   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
856   PetscScalar    *l,*r,x,*v;
857   PetscErrorCode ierr;
858   PetscInt       i,j,s2a,s3a,s2,s3,m=mdn->A->rmap.n,n=mdn->A->cmap.n;
859 
860   PetscFunctionBegin;
861   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
862   if (ll) {
863     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
864     if (s2a != s2) SETERRQ2(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
865     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
866     for (i=0; i<m; i++) {
867       x = l[i];
868       v = mat->v + i;
869       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
870     }
871     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
872     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
873   }
874   if (rr) {
875     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
876     if (s3a != s3) SETERRQ2(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
877     ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
878     ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
879     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
880     for (i=0; i<n; i++) {
881       x = r[i];
882       v = mat->v + i*m;
883       for (j=0; j<m; j++) { (*v++) *= x;}
884     }
885     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
886     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
887   }
888   PetscFunctionReturn(0);
889 }
890 
891 #undef __FUNCT__
892 #define __FUNCT__ "MatNorm_MPIDense"
893 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
894 {
895   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
896   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
897   PetscErrorCode ierr;
898   PetscInt       i,j;
899   PetscReal      sum = 0.0;
900   PetscScalar    *v = mat->v;
901 
902   PetscFunctionBegin;
903   if (mdn->size == 1) {
904     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
905   } else {
906     if (type == NORM_FROBENIUS) {
907       for (i=0; i<mdn->A->cmap.n*mdn->A->rmap.n; i++) {
908 #if defined(PETSC_USE_COMPLEX)
909         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
910 #else
911         sum += (*v)*(*v); v++;
912 #endif
913       }
914       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
915       *nrm = sqrt(*nrm);
916       ierr = PetscLogFlops(2*mdn->A->cmap.n*mdn->A->rmap.n);CHKERRQ(ierr);
917     } else if (type == NORM_1) {
918       PetscReal *tmp,*tmp2;
919       ierr = PetscMalloc(2*A->cmap.N*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
920       tmp2 = tmp + A->cmap.N;
921       ierr = PetscMemzero(tmp,2*A->cmap.N*sizeof(PetscReal));CHKERRQ(ierr);
922       *nrm = 0.0;
923       v = mat->v;
924       for (j=0; j<mdn->A->cmap.n; j++) {
925         for (i=0; i<mdn->A->rmap.n; i++) {
926           tmp[j] += PetscAbsScalar(*v);  v++;
927         }
928       }
929       ierr = MPI_Allreduce(tmp,tmp2,A->cmap.N,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
930       for (j=0; j<A->cmap.N; j++) {
931         if (tmp2[j] > *nrm) *nrm = tmp2[j];
932       }
933       ierr = PetscFree(tmp);CHKERRQ(ierr);
934       ierr = PetscLogFlops(A->cmap.n*A->rmap.n);CHKERRQ(ierr);
935     } else if (type == NORM_INFINITY) { /* max row norm */
936       PetscReal ntemp;
937       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
938       ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr);
939     } else {
940       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
941     }
942   }
943   PetscFunctionReturn(0);
944 }
945 
946 #undef __FUNCT__
947 #define __FUNCT__ "MatTranspose_MPIDense"
948 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
949 {
950   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
951   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
952   Mat            B;
953   PetscInt       M = A->rmap.N,N = A->cmap.N,m,n,*rwork,rstart = A->rmap.rstart;
954   PetscErrorCode ierr;
955   PetscInt       j,i;
956   PetscScalar    *v;
957 
958   PetscFunctionBegin;
959   if (A == *matout && M != N) {
960     SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place");
961   }
962   if (reuse == MAT_INITIAL_MATRIX || A == *matout) {
963     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
964     ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,N,M);CHKERRQ(ierr);
965     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
966     ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr);
967   } else {
968     B = *matout;
969   }
970 
971   m = a->A->rmap.n; n = a->A->cmap.n; v = Aloc->v;
972   ierr = PetscMalloc(m*sizeof(PetscInt),&rwork);CHKERRQ(ierr);
973   for (i=0; i<m; i++) rwork[i] = rstart + i;
974   for (j=0; j<n; j++) {
975     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
976     v   += m;
977   }
978   ierr = PetscFree(rwork);CHKERRQ(ierr);
979   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
980   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
981   if (*matout != A) {
982     *matout = B;
983   } else {
984     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
985   }
986   PetscFunctionReturn(0);
987 }
988 
989 #include "petscblaslapack.h"
990 #undef __FUNCT__
991 #define __FUNCT__ "MatScale_MPIDense"
992 PetscErrorCode MatScale_MPIDense(Mat inA,PetscScalar alpha)
993 {
994   Mat_MPIDense   *A = (Mat_MPIDense*)inA->data;
995   Mat_SeqDense   *a = (Mat_SeqDense*)A->A->data;
996   PetscScalar    oalpha = alpha;
997   PetscErrorCode ierr;
998   PetscBLASInt   one = 1,nz = PetscBLASIntCast(inA->rmap.n*inA->cmap.N);
999 
1000   PetscFunctionBegin;
1001   BLASscal_(&nz,&oalpha,a->v,&one);
1002   ierr = PetscLogFlops(nz);CHKERRQ(ierr);
1003   PetscFunctionReturn(0);
1004 }
1005 
1006 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
1007 
1008 #undef __FUNCT__
1009 #define __FUNCT__ "MatSetUpPreallocation_MPIDense"
1010 PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A)
1011 {
1012   PetscErrorCode ierr;
1013 
1014   PetscFunctionBegin;
1015   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
1016   PetscFunctionReturn(0);
1017 }
1018 
1019 #undef __FUNCT__
1020 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIDense"
1021 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1022 {
1023   PetscErrorCode ierr;
1024   PetscInt       m=A->rmap.n,n=B->cmap.n;
1025   Mat            Cmat;
1026 
1027   PetscFunctionBegin;
1028   if (A->cmap.n != B->rmap.n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap.n %d != B->rmap.n %d\n",A->cmap.n,B->rmap.n);
1029   ierr = MatCreate(((PetscObject)B)->comm,&Cmat);CHKERRQ(ierr);
1030   ierr = MatSetSizes(Cmat,m,n,A->rmap.N,B->cmap.N);CHKERRQ(ierr);
1031   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
1032   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
1033   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1034   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1035   *C = Cmat;
1036   PetscFunctionReturn(0);
1037 }
1038 
1039 /* -------------------------------------------------------------------*/
1040 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
1041        MatGetRow_MPIDense,
1042        MatRestoreRow_MPIDense,
1043        MatMult_MPIDense,
1044 /* 4*/ MatMultAdd_MPIDense,
1045        MatMultTranspose_MPIDense,
1046        MatMultTransposeAdd_MPIDense,
1047        0,
1048        0,
1049        0,
1050 /*10*/ 0,
1051        0,
1052        0,
1053        0,
1054        MatTranspose_MPIDense,
1055 /*15*/ MatGetInfo_MPIDense,
1056        MatEqual_MPIDense,
1057        MatGetDiagonal_MPIDense,
1058        MatDiagonalScale_MPIDense,
1059        MatNorm_MPIDense,
1060 /*20*/ MatAssemblyBegin_MPIDense,
1061        MatAssemblyEnd_MPIDense,
1062        0,
1063        MatSetOption_MPIDense,
1064        MatZeroEntries_MPIDense,
1065 /*25*/ MatZeroRows_MPIDense,
1066        MatLUFactorSymbolic_MPIDense,
1067        0,
1068        MatCholeskyFactorSymbolic_MPIDense,
1069        0,
1070 /*30*/ MatSetUpPreallocation_MPIDense,
1071        0,
1072        0,
1073        MatGetArray_MPIDense,
1074        MatRestoreArray_MPIDense,
1075 /*35*/ MatDuplicate_MPIDense,
1076        0,
1077        0,
1078        0,
1079        0,
1080 /*40*/ 0,
1081        MatGetSubMatrices_MPIDense,
1082        0,
1083        MatGetValues_MPIDense,
1084        0,
1085 /*45*/ 0,
1086        MatScale_MPIDense,
1087        0,
1088        0,
1089        0,
1090 /*50*/ 0,
1091        0,
1092        0,
1093        0,
1094        0,
1095 /*55*/ 0,
1096        0,
1097        0,
1098        0,
1099        0,
1100 /*60*/ MatGetSubMatrix_MPIDense,
1101        MatDestroy_MPIDense,
1102        MatView_MPIDense,
1103        0,
1104        0,
1105 /*65*/ 0,
1106        0,
1107        0,
1108        0,
1109        0,
1110 /*70*/ 0,
1111        0,
1112        0,
1113        0,
1114        0,
1115 /*75*/ 0,
1116        0,
1117        0,
1118        0,
1119        0,
1120 /*80*/ 0,
1121        0,
1122        0,
1123        0,
1124 /*84*/ MatLoad_MPIDense,
1125        0,
1126        0,
1127        0,
1128        0,
1129        0,
1130 /*90*/ 0,
1131        MatMatMultSymbolic_MPIDense_MPIDense,
1132        0,
1133        0,
1134        0,
1135 /*95*/ 0,
1136        0,
1137        0,
1138        0};
1139 
1140 EXTERN_C_BEGIN
1141 #undef __FUNCT__
1142 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense"
1143 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1144 {
1145   Mat_MPIDense   *a;
1146   PetscErrorCode ierr;
1147 
1148   PetscFunctionBegin;
1149   mat->preallocated = PETSC_TRUE;
1150   /* Note:  For now, when data is specified above, this assumes the user correctly
1151    allocates the local dense storage space.  We should add error checking. */
1152 
1153   a    = (Mat_MPIDense*)mat->data;
1154   ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1155   ierr = MatSetSizes(a->A,mat->rmap.n,mat->cmap.N,mat->rmap.n,mat->cmap.N);CHKERRQ(ierr);
1156   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
1157   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1158   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1159   PetscFunctionReturn(0);
1160 }
1161 EXTERN_C_END
1162 
1163 /*MC
1164    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1165 
1166    Options Database Keys:
1167 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions()
1168 
1169   Level: beginner
1170 
1171 .seealso: MatCreateMPIDense
1172 M*/
1173 
1174 EXTERN_C_BEGIN
1175 #undef __FUNCT__
1176 #define __FUNCT__ "MatCreate_MPIDense"
1177 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIDense(Mat mat)
1178 {
1179   Mat_MPIDense   *a;
1180   PetscErrorCode ierr;
1181 
1182   PetscFunctionBegin;
1183   ierr              = PetscNewLog(mat,Mat_MPIDense,&a);CHKERRQ(ierr);
1184   mat->data         = (void*)a;
1185   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1186   mat->factor       = 0;
1187   mat->mapping      = 0;
1188 
1189   a->factor       = 0;
1190   mat->insertmode = NOT_SET_VALUES;
1191   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&a->rank);CHKERRQ(ierr);
1192   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&a->size);CHKERRQ(ierr);
1193 
1194   mat->rmap.bs = mat->cmap.bs = 1;
1195   ierr = PetscMapSetUp(&mat->rmap);CHKERRQ(ierr);
1196   ierr = PetscMapSetUp(&mat->cmap);CHKERRQ(ierr);
1197   a->nvec = mat->cmap.n;
1198 
1199   /* build cache for off array entries formed */
1200   a->donotstash = PETSC_FALSE;
1201   ierr = MatStashCreate_Private(((PetscObject)mat)->comm,1,&mat->stash);CHKERRQ(ierr);
1202 
1203   /* stuff used for matrix vector multiply */
1204   a->lvec        = 0;
1205   a->Mvctx       = 0;
1206   a->roworiented = PETSC_TRUE;
1207 
1208   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
1209                                      "MatGetDiagonalBlock_MPIDense",
1210                                      MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr);
1211   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C",
1212                                      "MatMPIDenseSetPreallocation_MPIDense",
1213                                      MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1214   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",
1215                                      "MatMatMult_MPIAIJ_MPIDense",
1216                                       MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1217   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",
1218                                      "MatMatMultSymbolic_MPIAIJ_MPIDense",
1219                                       MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1220   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",
1221                                      "MatMatMultNumeric_MPIAIJ_MPIDense",
1222                                       MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1223   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1224   PetscFunctionReturn(0);
1225 }
1226 EXTERN_C_END
1227 
1228 /*MC
1229    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1230 
1231    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1232    and MATMPIDENSE otherwise.
1233 
1234    Options Database Keys:
1235 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1236 
1237   Level: beginner
1238 
1239 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1240 M*/
1241 
1242 EXTERN_C_BEGIN
1243 #undef __FUNCT__
1244 #define __FUNCT__ "MatCreate_Dense"
1245 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_Dense(Mat A)
1246 {
1247   PetscErrorCode ierr;
1248   PetscMPIInt    size;
1249 
1250   PetscFunctionBegin;
1251   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
1252   if (size == 1) {
1253     ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr);
1254   } else {
1255     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
1256   }
1257   PetscFunctionReturn(0);
1258 }
1259 EXTERN_C_END
1260 
1261 #undef __FUNCT__
1262 #define __FUNCT__ "MatMPIDenseSetPreallocation"
1263 /*@C
1264    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1265 
1266    Not collective
1267 
1268    Input Parameters:
1269 .  A - the matrix
1270 -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
1271    to control all matrix memory allocation.
1272 
1273    Notes:
1274    The dense format is fully compatible with standard Fortran 77
1275    storage by columns.
1276 
1277    The data input variable is intended primarily for Fortran programmers
1278    who wish to allocate their own matrix memory space.  Most users should
1279    set data=PETSC_NULL.
1280 
1281    Level: intermediate
1282 
1283 .keywords: matrix,dense, parallel
1284 
1285 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1286 @*/
1287 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data)
1288 {
1289   PetscErrorCode ierr,(*f)(Mat,PetscScalar *);
1290 
1291   PetscFunctionBegin;
1292   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1293   if (f) {
1294     ierr = (*f)(mat,data);CHKERRQ(ierr);
1295   }
1296   PetscFunctionReturn(0);
1297 }
1298 
1299 #undef __FUNCT__
1300 #define __FUNCT__ "MatCreateMPIDense"
1301 /*@C
1302    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
1303 
1304    Collective on MPI_Comm
1305 
1306    Input Parameters:
1307 +  comm - MPI communicator
1308 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1309 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1310 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1311 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1312 -  data - optional location of matrix data.  Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1313    to control all matrix memory allocation.
1314 
1315    Output Parameter:
1316 .  A - the matrix
1317 
1318    Notes:
1319    The dense format is fully compatible with standard Fortran 77
1320    storage by columns.
1321 
1322    The data input variable is intended primarily for Fortran programmers
1323    who wish to allocate their own matrix memory space.  Most users should
1324    set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users).
1325 
1326    The user MUST specify either the local or global matrix dimensions
1327    (possibly both).
1328 
1329    Level: intermediate
1330 
1331 .keywords: matrix,dense, parallel
1332 
1333 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1334 @*/
1335 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1336 {
1337   PetscErrorCode ierr;
1338   PetscMPIInt    size;
1339 
1340   PetscFunctionBegin;
1341   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1342   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1343   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1344   if (size > 1) {
1345     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1346     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1347   } else {
1348     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1349     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1350   }
1351   PetscFunctionReturn(0);
1352 }
1353 
1354 #undef __FUNCT__
1355 #define __FUNCT__ "MatDuplicate_MPIDense"
1356 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1357 {
1358   Mat            mat;
1359   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
1360   PetscErrorCode ierr;
1361 
1362   PetscFunctionBegin;
1363   *newmat       = 0;
1364   ierr = MatCreate(((PetscObject)A)->comm,&mat);CHKERRQ(ierr);
1365   ierr = MatSetSizes(mat,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N);CHKERRQ(ierr);
1366   ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1367   a                 = (Mat_MPIDense*)mat->data;
1368   ierr              = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1369   mat->factor       = A->factor;
1370   mat->assembled    = PETSC_TRUE;
1371   mat->preallocated = PETSC_TRUE;
1372 
1373   mat->rmap.rstart     = A->rmap.rstart;
1374   mat->rmap.rend       = A->rmap.rend;
1375   a->size              = oldmat->size;
1376   a->rank              = oldmat->rank;
1377   mat->insertmode      = NOT_SET_VALUES;
1378   a->nvec              = oldmat->nvec;
1379   a->donotstash        = oldmat->donotstash;
1380 
1381   ierr = PetscMemcpy(mat->rmap.range,A->rmap.range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
1382   ierr = PetscMemcpy(mat->cmap.range,A->cmap.range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
1383   ierr = MatStashCreate_Private(((PetscObject)A)->comm,1,&mat->stash);CHKERRQ(ierr);
1384 
1385   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1386   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1387   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1388   *newmat = mat;
1389   PetscFunctionReturn(0);
1390 }
1391 
1392 #include "petscsys.h"
1393 
1394 #undef __FUNCT__
1395 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile"
1396 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N, MatType type,Mat *newmat)
1397 {
1398   PetscErrorCode ierr;
1399   PetscMPIInt    rank,size;
1400   PetscInt       *rowners,i,m,nz,j;
1401   PetscScalar    *array,*vals,*vals_ptr;
1402   MPI_Status     status;
1403 
1404   PetscFunctionBegin;
1405   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1406   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1407 
1408   /* determine ownership of all rows */
1409   m          = M/size + ((M % size) > rank);
1410   ierr       = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1411   ierr       = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
1412   rowners[0] = 0;
1413   for (i=2; i<=size; i++) {
1414     rowners[i] += rowners[i-1];
1415   }
1416 
1417   ierr = MatCreate(comm,newmat);CHKERRQ(ierr);
1418   ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1419   ierr = MatSetType(*newmat,type);CHKERRQ(ierr);
1420   ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr);
1421   ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr);
1422 
1423   if (!rank) {
1424     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1425 
1426     /* read in my part of the matrix numerical values  */
1427     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1428 
1429     /* insert into matrix-by row (this is why cannot directly read into array */
1430     vals_ptr = vals;
1431     for (i=0; i<m; i++) {
1432       for (j=0; j<N; j++) {
1433         array[i + j*m] = *vals_ptr++;
1434       }
1435     }
1436 
1437     /* read in other processors and ship out */
1438     for (i=1; i<size; i++) {
1439       nz   = (rowners[i+1] - rowners[i])*N;
1440       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1441       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(*newmat))->tag,comm);CHKERRQ(ierr);
1442     }
1443   } else {
1444     /* receive numeric values */
1445     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1446 
1447     /* receive message of values*/
1448     ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(*newmat))->tag,comm,&status);CHKERRQ(ierr);
1449 
1450     /* insert into matrix-by row (this is why cannot directly read into array */
1451     vals_ptr = vals;
1452     for (i=0; i<m; i++) {
1453       for (j=0; j<N; j++) {
1454         array[i + j*m] = *vals_ptr++;
1455       }
1456     }
1457   }
1458   ierr = PetscFree(rowners);CHKERRQ(ierr);
1459   ierr = PetscFree(vals);CHKERRQ(ierr);
1460   ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1461   ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1462   PetscFunctionReturn(0);
1463 }
1464 
1465 #undef __FUNCT__
1466 #define __FUNCT__ "MatLoad_MPIDense"
1467 PetscErrorCode MatLoad_MPIDense(PetscViewer viewer, MatType type,Mat *newmat)
1468 {
1469   Mat            A;
1470   PetscScalar    *vals,*svals;
1471   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1472   MPI_Status     status;
1473   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz;
1474   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1475   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1476   PetscInt       i,nz,j,rstart,rend;
1477   int            fd;
1478   PetscErrorCode ierr;
1479 
1480   PetscFunctionBegin;
1481   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1482   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1483   if (!rank) {
1484     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1485     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1486     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1487   }
1488 
1489   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1490   M = header[1]; N = header[2]; nz = header[3];
1491 
1492   /*
1493        Handle case where matrix is stored on disk as a dense matrix
1494   */
1495   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1496     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,type,newmat);CHKERRQ(ierr);
1497     PetscFunctionReturn(0);
1498   }
1499 
1500   /* determine ownership of all rows */
1501   m          = PetscMPIIntCast(M/size + ((M % size) > rank));
1502   ierr       = PetscMalloc((size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr);
1503   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1504   rowners[0] = 0;
1505   for (i=2; i<=size; i++) {
1506     rowners[i] += rowners[i-1];
1507   }
1508   rstart = rowners[rank];
1509   rend   = rowners[rank+1];
1510 
1511   /* distribute row lengths to all processors */
1512   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&ourlens);CHKERRQ(ierr);
1513   offlens = ourlens + (rend-rstart);
1514   if (!rank) {
1515     ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
1516     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1517     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
1518     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1519     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1520     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1521   } else {
1522     ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1523   }
1524 
1525   if (!rank) {
1526     /* calculate the number of nonzeros on each processor */
1527     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
1528     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1529     for (i=0; i<size; i++) {
1530       for (j=rowners[i]; j< rowners[i+1]; j++) {
1531         procsnz[i] += rowlengths[j];
1532       }
1533     }
1534     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1535 
1536     /* determine max buffer needed and allocate it */
1537     maxnz = 0;
1538     for (i=0; i<size; i++) {
1539       maxnz = PetscMax(maxnz,procsnz[i]);
1540     }
1541     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1542 
1543     /* read in my part of the matrix column indices  */
1544     nz = procsnz[0];
1545     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1546     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1547 
1548     /* read in every one elses and ship off */
1549     for (i=1; i<size; i++) {
1550       nz   = procsnz[i];
1551       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1552       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1553     }
1554     ierr = PetscFree(cols);CHKERRQ(ierr);
1555   } else {
1556     /* determine buffer space needed for message */
1557     nz = 0;
1558     for (i=0; i<m; i++) {
1559       nz += ourlens[i];
1560     }
1561     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1562 
1563     /* receive message of column indices*/
1564     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1565     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1566     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1567   }
1568 
1569   /* loop over local rows, determining number of off diagonal entries */
1570   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
1571   jj = 0;
1572   for (i=0; i<m; i++) {
1573     for (j=0; j<ourlens[i]; j++) {
1574       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1575       jj++;
1576     }
1577   }
1578 
1579   /* create our matrix */
1580   for (i=0; i<m; i++) {
1581     ourlens[i] -= offlens[i];
1582   }
1583   ierr = MatCreate(comm,newmat);CHKERRQ(ierr);
1584   ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1585   ierr = MatSetType(*newmat,type);CHKERRQ(ierr);
1586   ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr);
1587   A = *newmat;
1588   for (i=0; i<m; i++) {
1589     ourlens[i] += offlens[i];
1590   }
1591 
1592   if (!rank) {
1593     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1594 
1595     /* read in my part of the matrix numerical values  */
1596     nz = procsnz[0];
1597     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1598 
1599     /* insert into matrix */
1600     jj      = rstart;
1601     smycols = mycols;
1602     svals   = vals;
1603     for (i=0; i<m; i++) {
1604       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1605       smycols += ourlens[i];
1606       svals   += ourlens[i];
1607       jj++;
1608     }
1609 
1610     /* read in other processors and ship out */
1611     for (i=1; i<size; i++) {
1612       nz   = procsnz[i];
1613       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1614       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
1615     }
1616     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1617   } else {
1618     /* receive numeric values */
1619     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1620 
1621     /* receive message of values*/
1622     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
1623     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1624     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1625 
1626     /* insert into matrix */
1627     jj      = rstart;
1628     smycols = mycols;
1629     svals   = vals;
1630     for (i=0; i<m; i++) {
1631       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1632       smycols += ourlens[i];
1633       svals   += ourlens[i];
1634       jj++;
1635     }
1636   }
1637   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1638   ierr = PetscFree(vals);CHKERRQ(ierr);
1639   ierr = PetscFree(mycols);CHKERRQ(ierr);
1640   ierr = PetscFree(rowners);CHKERRQ(ierr);
1641 
1642   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1643   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1644   PetscFunctionReturn(0);
1645 }
1646 
1647 #undef __FUNCT__
1648 #define __FUNCT__ "MatEqual_MPIDense"
1649 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscTruth *flag)
1650 {
1651   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
1652   Mat            a,b;
1653   PetscTruth     flg;
1654   PetscErrorCode ierr;
1655 
1656   PetscFunctionBegin;
1657   a = matA->A;
1658   b = matB->A;
1659   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
1660   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1661   PetscFunctionReturn(0);
1662 }
1663 
1664 
1665 
1666