xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision fc4dec0ab267e831b41a38fdd32730b7f8abdc58)
1 #define PETSCMAT_DLL
2 
3 /*
4    Basic functions for basic parallel dense matrices.
5 */
6 
7 #include "src/mat/impls/dense/mpi/mpidense.h"    /*I   "petscmat.h"  I*/
8 
9 #undef __FUNCT__
10 #define __FUNCT__ "MatLUFactorSymbolic_MPIDense"
11 PetscErrorCode MatLUFactorSymbolic_MPIDense(Mat A,IS row,IS col,MatFactorInfo *info,Mat *fact)
12 {
13   PetscErrorCode ierr;
14 
15   PetscFunctionBegin;
16   ierr = MatDuplicate(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr);
17   PetscFunctionReturn(0);
18 }
19 
20 #undef __FUNCT__
21 #define __FUNCT__ "MatCholeskyFactorSymbolic_MPIDense"
22 PetscErrorCode MatCholeskyFactorSymbolic_MPIDense(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
23 {
24   PetscErrorCode ierr;
25 
26   PetscFunctionBegin;
27   ierr = MatDuplicate(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr);
28   PetscFunctionReturn(0);
29 }
30 
31 #undef __FUNCT__
32 #define __FUNCT__ "MatDenseGetLocalMatrix"
33 /*@
34 
35       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
36               matrix that represents the operator. For sequential matrices it returns itself.
37 
38     Input Parameter:
39 .      A - the Seq or MPI dense matrix
40 
41     Output Parameter:
42 .      B - the inner matrix
43 
44     Level: intermediate
45 
46 @*/
47 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
48 {
49   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
50   PetscErrorCode ierr;
51   PetscTruth     flg;
52   PetscMPIInt    size;
53 
54   PetscFunctionBegin;
55   ierr = PetscTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
56   if (!flg) {  /* this check sucks! */
57     ierr = PetscTypeCompare((PetscObject)A,MATDENSE,&flg);CHKERRQ(ierr);
58     if (flg) {
59       ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
60       if (size == 1) flg = PETSC_FALSE;
61     }
62   }
63   if (flg) {
64     *B = mat->A;
65   } else {
66     *B = A;
67   }
68   PetscFunctionReturn(0);
69 }
70 
71 #undef __FUNCT__
72 #define __FUNCT__ "MatGetRow_MPIDense"
73 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
74 {
75   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
76   PetscErrorCode ierr;
77   PetscInt       lrow,rstart = A->rmap.rstart,rend = A->rmap.rend;
78 
79   PetscFunctionBegin;
80   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows")
81   lrow = row - rstart;
82   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr);
83   PetscFunctionReturn(0);
84 }
85 
86 #undef __FUNCT__
87 #define __FUNCT__ "MatRestoreRow_MPIDense"
88 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
89 {
90   PetscErrorCode ierr;
91 
92   PetscFunctionBegin;
93   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
94   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
95   PetscFunctionReturn(0);
96 }
97 
98 EXTERN_C_BEGIN
99 #undef __FUNCT__
100 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense"
101 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B)
102 {
103   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
104   PetscErrorCode ierr;
105   PetscInt       m = A->rmap.n,rstart = A->rmap.rstart;
106   PetscScalar    *array;
107   MPI_Comm       comm;
108 
109   PetscFunctionBegin;
110   if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported.");
111 
112   /* The reuse aspect is not implemented efficiently */
113   if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);}
114 
115   ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
116   ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr);
117   ierr = MatCreate(comm,B);CHKERRQ(ierr);
118   ierr = MatSetSizes(*B,m,m,m,m);CHKERRQ(ierr);
119   ierr = MatSetType(*B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
120   ierr = MatSeqDenseSetPreallocation(*B,array+m*rstart);CHKERRQ(ierr);
121   ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr);
122   ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
123   ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
124 
125   *iscopy = PETSC_TRUE;
126   PetscFunctionReturn(0);
127 }
128 EXTERN_C_END
129 
130 #undef __FUNCT__
131 #define __FUNCT__ "MatSetValues_MPIDense"
132 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
133 {
134   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
135   PetscErrorCode ierr;
136   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend,row;
137   PetscTruth     roworiented = A->roworiented;
138 
139   PetscFunctionBegin;
140   for (i=0; i<m; i++) {
141     if (idxm[i] < 0) continue;
142     if (idxm[i] >= mat->rmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
143     if (idxm[i] >= rstart && idxm[i] < rend) {
144       row = idxm[i] - rstart;
145       if (roworiented) {
146         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
147       } else {
148         for (j=0; j<n; j++) {
149           if (idxn[j] < 0) continue;
150           if (idxn[j] >= mat->cmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
151           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
152         }
153       }
154     } else {
155       if (!A->donotstash) {
156         if (roworiented) {
157           ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr);
158         } else {
159           ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr);
160         }
161       }
162     }
163   }
164   PetscFunctionReturn(0);
165 }
166 
167 #undef __FUNCT__
168 #define __FUNCT__ "MatGetValues_MPIDense"
169 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
170 {
171   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
172   PetscErrorCode ierr;
173   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend,row;
174 
175   PetscFunctionBegin;
176   for (i=0; i<m; i++) {
177     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
178     if (idxm[i] >= mat->rmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
179     if (idxm[i] >= rstart && idxm[i] < rend) {
180       row = idxm[i] - rstart;
181       for (j=0; j<n; j++) {
182         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
183         if (idxn[j] >= mat->cmap.N) {
184           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
185         }
186         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
187       }
188     } else {
189       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
190     }
191   }
192   PetscFunctionReturn(0);
193 }
194 
195 #undef __FUNCT__
196 #define __FUNCT__ "MatGetArray_MPIDense"
197 PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[])
198 {
199   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
200   PetscErrorCode ierr;
201 
202   PetscFunctionBegin;
203   ierr = MatGetArray(a->A,array);CHKERRQ(ierr);
204   PetscFunctionReturn(0);
205 }
206 
207 #undef __FUNCT__
208 #define __FUNCT__ "MatGetSubMatrix_MPIDense"
209 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B)
210 {
211   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data,*newmatd;
212   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
213   PetscErrorCode ierr;
214   PetscInt       i,j,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols;
215   PetscScalar    *av,*bv,*v = lmat->v;
216   Mat            newmat;
217 
218   PetscFunctionBegin;
219   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
220   ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr);
221   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
222   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
223 
224   /* No parallel redistribution currently supported! Should really check each index set
225      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
226      original matrix! */
227 
228   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
229   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
230 
231   /* Check submatrix call */
232   if (scall == MAT_REUSE_MATRIX) {
233     /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
234     /* Really need to test rows and column sizes! */
235     newmat = *B;
236   } else {
237     /* Create and fill new matrix */
238     ierr = MatCreate(((PetscObject)A)->comm,&newmat);CHKERRQ(ierr);
239     ierr = MatSetSizes(newmat,nrows,cs,PETSC_DECIDE,ncols);CHKERRQ(ierr);
240     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
241     ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr);
242   }
243 
244   /* Now extract the data pointers and do the copy, column at a time */
245   newmatd = (Mat_MPIDense*)newmat->data;
246   bv      = ((Mat_SeqDense *)newmatd->A->data)->v;
247 
248   for (i=0; i<ncols; i++) {
249     av = v + nlrows*icol[i];
250     for (j=0; j<nrows; j++) {
251       *bv++ = av[irow[j] - rstart];
252     }
253   }
254 
255   /* Assemble the matrices so that the correct flags are set */
256   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
257   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
258 
259   /* Free work space */
260   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
261   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
262   *B = newmat;
263   PetscFunctionReturn(0);
264 }
265 
266 #undef __FUNCT__
267 #define __FUNCT__ "MatRestoreArray_MPIDense"
268 PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[])
269 {
270   PetscFunctionBegin;
271   PetscFunctionReturn(0);
272 }
273 
274 #undef __FUNCT__
275 #define __FUNCT__ "MatAssemblyBegin_MPIDense"
276 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
277 {
278   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
279   MPI_Comm       comm = ((PetscObject)mat)->comm;
280   PetscErrorCode ierr;
281   PetscInt       nstash,reallocs;
282   InsertMode     addv;
283 
284   PetscFunctionBegin;
285   /* make sure all processors are either in INSERTMODE or ADDMODE */
286   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
287   if (addv == (ADD_VALUES|INSERT_VALUES)) {
288     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
289   }
290   mat->insertmode = addv; /* in case this processor had no cache */
291 
292   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap.range);CHKERRQ(ierr);
293   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
294   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
295   PetscFunctionReturn(0);
296 }
297 
298 #undef __FUNCT__
299 #define __FUNCT__ "MatAssemblyEnd_MPIDense"
300 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
301 {
302   Mat_MPIDense    *mdn=(Mat_MPIDense*)mat->data;
303   PetscErrorCode  ierr;
304   PetscInt        i,*row,*col,flg,j,rstart,ncols;
305   PetscMPIInt     n;
306   PetscScalar     *val;
307   InsertMode      addv=mat->insertmode;
308 
309   PetscFunctionBegin;
310   /*  wait on receives */
311   while (1) {
312     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
313     if (!flg) break;
314 
315     for (i=0; i<n;) {
316       /* Now identify the consecutive vals belonging to the same row */
317       for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
318       if (j < n) ncols = j-i;
319       else       ncols = n-i;
320       /* Now assemble all these values with a single function call */
321       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
322       i = j;
323     }
324   }
325   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
326 
327   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
328   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
329 
330   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
331     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
332   }
333   PetscFunctionReturn(0);
334 }
335 
336 #undef __FUNCT__
337 #define __FUNCT__ "MatZeroEntries_MPIDense"
338 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
339 {
340   PetscErrorCode ierr;
341   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
342 
343   PetscFunctionBegin;
344   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
345   PetscFunctionReturn(0);
346 }
347 
348 /* the code does not do the diagonal entries correctly unless the
349    matrix is square and the column and row owerships are identical.
350    This is a BUG. The only way to fix it seems to be to access
351    mdn->A and mdn->B directly and not through the MatZeroRows()
352    routine.
353 */
354 #undef __FUNCT__
355 #define __FUNCT__ "MatZeroRows_MPIDense"
356 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
357 {
358   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
359   PetscErrorCode ierr;
360   PetscInt       i,*owners = A->rmap.range;
361   PetscInt       *nprocs,j,idx,nsends;
362   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
363   PetscInt       *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source;
364   PetscInt       *lens,*lrows,*values;
365   PetscMPIInt    n,imdex,rank = l->rank,size = l->size;
366   MPI_Comm       comm = ((PetscObject)A)->comm;
367   MPI_Request    *send_waits,*recv_waits;
368   MPI_Status     recv_status,*send_status;
369   PetscTruth     found;
370 
371   PetscFunctionBegin;
372   /*  first count number of contributors to each processor */
373   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
374   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
375   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
376   for (i=0; i<N; i++) {
377     idx = rows[i];
378     found = PETSC_FALSE;
379     for (j=0; j<size; j++) {
380       if (idx >= owners[j] && idx < owners[j+1]) {
381         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
382       }
383     }
384     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
385   }
386   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
387 
388   /* inform other processors of number of messages and max length*/
389   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
390 
391   /* post receives:   */
392   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
393   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
394   for (i=0; i<nrecvs; i++) {
395     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
396   }
397 
398   /* do sends:
399       1) starts[i] gives the starting index in svalues for stuff going to
400          the ith processor
401   */
402   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
403   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
404   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
405   starts[0]  = 0;
406   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
407   for (i=0; i<N; i++) {
408     svalues[starts[owner[i]]++] = rows[i];
409   }
410 
411   starts[0] = 0;
412   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
413   count = 0;
414   for (i=0; i<size; i++) {
415     if (nprocs[2*i+1]) {
416       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
417     }
418   }
419   ierr = PetscFree(starts);CHKERRQ(ierr);
420 
421   base = owners[rank];
422 
423   /*  wait on receives */
424   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
425   source = lens + nrecvs;
426   count  = nrecvs; slen = 0;
427   while (count) {
428     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
429     /* unpack receives into our local space */
430     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
431     source[imdex]  = recv_status.MPI_SOURCE;
432     lens[imdex]    = n;
433     slen += n;
434     count--;
435   }
436   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
437 
438   /* move the data into the send scatter */
439   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
440   count = 0;
441   for (i=0; i<nrecvs; i++) {
442     values = rvalues + i*nmax;
443     for (j=0; j<lens[i]; j++) {
444       lrows[count++] = values[j] - base;
445     }
446   }
447   ierr = PetscFree(rvalues);CHKERRQ(ierr);
448   ierr = PetscFree(lens);CHKERRQ(ierr);
449   ierr = PetscFree(owner);CHKERRQ(ierr);
450   ierr = PetscFree(nprocs);CHKERRQ(ierr);
451 
452   /* actually zap the local rows */
453   ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
454   ierr = PetscFree(lrows);CHKERRQ(ierr);
455 
456   /* wait on sends */
457   if (nsends) {
458     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
459     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
460     ierr = PetscFree(send_status);CHKERRQ(ierr);
461   }
462   ierr = PetscFree(send_waits);CHKERRQ(ierr);
463   ierr = PetscFree(svalues);CHKERRQ(ierr);
464 
465   PetscFunctionReturn(0);
466 }
467 
468 #undef __FUNCT__
469 #define __FUNCT__ "MatMult_MPIDense"
470 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
471 {
472   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
473   PetscErrorCode ierr;
474 
475   PetscFunctionBegin;
476   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
477   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
478   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
479   PetscFunctionReturn(0);
480 }
481 
482 #undef __FUNCT__
483 #define __FUNCT__ "MatMultAdd_MPIDense"
484 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
485 {
486   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
487   PetscErrorCode ierr;
488 
489   PetscFunctionBegin;
490   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
491   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
492   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
493   PetscFunctionReturn(0);
494 }
495 
496 #undef __FUNCT__
497 #define __FUNCT__ "MatMultTranspose_MPIDense"
498 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
499 {
500   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
501   PetscErrorCode ierr;
502   PetscScalar    zero = 0.0;
503 
504   PetscFunctionBegin;
505   ierr = VecSet(yy,zero);CHKERRQ(ierr);
506   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
507   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
508   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
509   PetscFunctionReturn(0);
510 }
511 
512 #undef __FUNCT__
513 #define __FUNCT__ "MatMultTransposeAdd_MPIDense"
514 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
515 {
516   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
517   PetscErrorCode ierr;
518 
519   PetscFunctionBegin;
520   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
521   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
522   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
523   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
524   PetscFunctionReturn(0);
525 }
526 
527 #undef __FUNCT__
528 #define __FUNCT__ "MatGetDiagonal_MPIDense"
529 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
530 {
531   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
532   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
533   PetscErrorCode ierr;
534   PetscInt       len,i,n,m = A->rmap.n,radd;
535   PetscScalar    *x,zero = 0.0;
536 
537   PetscFunctionBegin;
538   ierr = VecSet(v,zero);CHKERRQ(ierr);
539   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
540   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
541   if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
542   len  = PetscMin(a->A->rmap.n,a->A->cmap.n);
543   radd = A->rmap.rstart*m;
544   for (i=0; i<len; i++) {
545     x[i] = aloc->v[radd + i*m + i];
546   }
547   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
548   PetscFunctionReturn(0);
549 }
550 
551 #undef __FUNCT__
552 #define __FUNCT__ "MatDestroy_MPIDense"
553 PetscErrorCode MatDestroy_MPIDense(Mat mat)
554 {
555   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
556   PetscErrorCode ierr;
557 
558   PetscFunctionBegin;
559 
560 #if defined(PETSC_USE_LOG)
561   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N);
562 #endif
563   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
564   ierr = MatDestroy(mdn->A);CHKERRQ(ierr);
565   if (mdn->lvec)   {ierr = VecDestroy(mdn->lvec);CHKERRQ(ierr);}
566   if (mdn->Mvctx)  {ierr = VecScatterDestroy(mdn->Mvctx);CHKERRQ(ierr);}
567   if (mdn->factor) {
568     ierr = PetscFree(mdn->factor->temp);CHKERRQ(ierr);
569     ierr = PetscFree(mdn->factor->tag);CHKERRQ(ierr);
570     ierr = PetscFree(mdn->factor->pivots);CHKERRQ(ierr);
571     ierr = PetscFree(mdn->factor);CHKERRQ(ierr);
572   }
573   ierr = PetscFree(mdn);CHKERRQ(ierr);
574   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
575   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
576   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
577   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
578   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
579   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
580   PetscFunctionReturn(0);
581 }
582 
583 #undef __FUNCT__
584 #define __FUNCT__ "MatView_MPIDense_Binary"
585 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
586 {
587   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
588   PetscErrorCode    ierr;
589   PetscViewerFormat format;
590   int               fd;
591   PetscInt          header[4],mmax,N = mat->cmap.N,i,j,m,k;
592   PetscMPIInt       rank,tag  = ((PetscObject)viewer)->tag,size;
593   PetscScalar       *work,*v,*vv;
594   Mat_SeqDense      *a = (Mat_SeqDense*)mdn->A->data;
595   MPI_Status        status;
596 
597   PetscFunctionBegin;
598   if (mdn->size == 1) {
599     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
600   } else {
601     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
602     ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
603     ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
604 
605     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
606     if (format == PETSC_VIEWER_BINARY_NATIVE) {
607 
608       if (!rank) {
609 	/* store the matrix as a dense matrix */
610 	header[0] = MAT_FILE_COOKIE;
611 	header[1] = mat->rmap.N;
612 	header[2] = N;
613 	header[3] = MATRIX_BINARY_FORMAT_DENSE;
614 	ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
615 
616 	/* get largest work array needed for transposing array */
617         mmax = mat->rmap.n;
618         for (i=1; i<size; i++) {
619           mmax = PetscMax(mmax,mat->rmap.range[i+1] - mat->rmap.range[i]);
620         }
621 	ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&work);CHKERRQ(ierr);
622 
623 	/* write out local array, by rows */
624         m    = mat->rmap.n;
625 	v    = a->v;
626         for (j=0; j<N; j++) {
627           for (i=0; i<m; i++) {
628 	    work[j + i*N] = *v++;
629 	  }
630 	}
631 	ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
632         /* get largest work array to receive messages from other processes, excludes process zero */
633         mmax = 0;
634         for (i=1; i<size; i++) {
635           mmax = PetscMax(mmax,mat->rmap.range[i+1] - mat->rmap.range[i]);
636         }
637 	ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&vv);CHKERRQ(ierr);
638         v = vv;
639         for (k=1; k<size; k++) {
640           m    = mat->rmap.range[k+1] - mat->rmap.range[k];
641           ierr = MPI_Recv(v,m*N,MPIU_SCALAR,k,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
642 
643           for (j=0; j<N; j++) {
644             for (i=0; i<m; i++) {
645               work[j + i*N] = *v++;
646 	    }
647 	  }
648 	  ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
649         }
650         ierr = PetscFree(work);CHKERRQ(ierr);
651         ierr = PetscFree(vv);CHKERRQ(ierr);
652       } else {
653         ierr = MPI_Send(a->v,mat->rmap.n*mat->cmap.N,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
654       }
655     }
656   }
657   PetscFunctionReturn(0);
658 }
659 
660 #undef __FUNCT__
661 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket"
662 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
663 {
664   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
665   PetscErrorCode    ierr;
666   PetscMPIInt       size = mdn->size,rank = mdn->rank;
667   PetscViewerType   vtype;
668   PetscTruth        iascii,isdraw;
669   PetscViewer       sviewer;
670   PetscViewerFormat format;
671 
672   PetscFunctionBegin;
673   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
674   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
675   if (iascii) {
676     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
677     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
678     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
679       MatInfo info;
680       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
681       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap.n,
682                    (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
683       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
684       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
685       PetscFunctionReturn(0);
686     } else if (format == PETSC_VIEWER_ASCII_INFO) {
687       PetscFunctionReturn(0);
688     }
689   } else if (isdraw) {
690     PetscDraw  draw;
691     PetscTruth isnull;
692 
693     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
694     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
695     if (isnull) PetscFunctionReturn(0);
696   }
697 
698   if (size == 1) {
699     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
700   } else {
701     /* assemble the entire matrix onto first processor. */
702     Mat         A;
703     PetscInt    M = mat->rmap.N,N = mat->cmap.N,m,row,i,nz;
704     PetscInt    *cols;
705     PetscScalar *vals;
706 
707     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
708     if (!rank) {
709       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
710     } else {
711       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
712     }
713     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
714     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
715     ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL);
716     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
717 
718     /* Copy the matrix ... This isn't the most efficient means,
719        but it's quick for now */
720     A->insertmode = INSERT_VALUES;
721     row = mat->rmap.rstart; m = mdn->A->rmap.n;
722     for (i=0; i<m; i++) {
723       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
724       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
725       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
726       row++;
727     }
728 
729     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
730     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
731     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
732     if (!rank) {
733       ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
734     }
735     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
736     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
737     ierr = MatDestroy(A);CHKERRQ(ierr);
738   }
739   PetscFunctionReturn(0);
740 }
741 
742 #undef __FUNCT__
743 #define __FUNCT__ "MatView_MPIDense"
744 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
745 {
746   PetscErrorCode ierr;
747   PetscTruth     iascii,isbinary,isdraw,issocket;
748 
749   PetscFunctionBegin;
750 
751   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
752   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
753   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
754   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
755 
756   if (iascii || issocket || isdraw) {
757     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
758   } else if (isbinary) {
759     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
760   } else {
761     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name);
762   }
763   PetscFunctionReturn(0);
764 }
765 
766 #undef __FUNCT__
767 #define __FUNCT__ "MatGetInfo_MPIDense"
768 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
769 {
770   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
771   Mat            mdn = mat->A;
772   PetscErrorCode ierr;
773   PetscReal      isend[5],irecv[5];
774 
775   PetscFunctionBegin;
776   info->rows_global    = (double)A->rmap.N;
777   info->columns_global = (double)A->cmap.N;
778   info->rows_local     = (double)A->rmap.n;
779   info->columns_local  = (double)A->cmap.N;
780   info->block_size     = 1.0;
781   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
782   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
783   isend[3] = info->memory;  isend[4] = info->mallocs;
784   if (flag == MAT_LOCAL) {
785     info->nz_used      = isend[0];
786     info->nz_allocated = isend[1];
787     info->nz_unneeded  = isend[2];
788     info->memory       = isend[3];
789     info->mallocs      = isend[4];
790   } else if (flag == MAT_GLOBAL_MAX) {
791     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr);
792     info->nz_used      = irecv[0];
793     info->nz_allocated = irecv[1];
794     info->nz_unneeded  = irecv[2];
795     info->memory       = irecv[3];
796     info->mallocs      = irecv[4];
797   } else if (flag == MAT_GLOBAL_SUM) {
798     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
799     info->nz_used      = irecv[0];
800     info->nz_allocated = irecv[1];
801     info->nz_unneeded  = irecv[2];
802     info->memory       = irecv[3];
803     info->mallocs      = irecv[4];
804   }
805   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
806   info->fill_ratio_needed = 0;
807   info->factor_mallocs    = 0;
808   PetscFunctionReturn(0);
809 }
810 
811 #undef __FUNCT__
812 #define __FUNCT__ "MatSetOption_MPIDense"
813 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscTruth flg)
814 {
815   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
816   PetscErrorCode ierr;
817 
818   PetscFunctionBegin;
819   switch (op) {
820   case MAT_NEW_NONZERO_LOCATIONS:
821   case MAT_NEW_NONZERO_LOCATION_ERR:
822   case MAT_NEW_NONZERO_ALLOCATION_ERR:
823     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
824     break;
825   case MAT_ROW_ORIENTED:
826     a->roworiented = flg;
827     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
828     break;
829   case MAT_NEW_DIAGONALS:
830   case MAT_USE_HASH_TABLE:
831     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
832     break;
833   case MAT_IGNORE_OFF_PROC_ENTRIES:
834     a->donotstash = flg;
835     break;
836   case MAT_SYMMETRIC:
837   case MAT_STRUCTURALLY_SYMMETRIC:
838   case MAT_HERMITIAN:
839   case MAT_SYMMETRY_ETERNAL:
840     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
841     break;
842   default:
843     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
844   }
845   PetscFunctionReturn(0);
846 }
847 
848 
849 #undef __FUNCT__
850 #define __FUNCT__ "MatDiagonalScale_MPIDense"
851 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
852 {
853   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
854   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
855   PetscScalar    *l,*r,x,*v;
856   PetscErrorCode ierr;
857   PetscInt       i,j,s2a,s3a,s2,s3,m=mdn->A->rmap.n,n=mdn->A->cmap.n;
858 
859   PetscFunctionBegin;
860   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
861   if (ll) {
862     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
863     if (s2a != s2) SETERRQ2(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
864     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
865     for (i=0; i<m; i++) {
866       x = l[i];
867       v = mat->v + i;
868       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
869     }
870     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
871     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
872   }
873   if (rr) {
874     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
875     if (s3a != s3) SETERRQ2(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
876     ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
877     ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
878     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
879     for (i=0; i<n; i++) {
880       x = r[i];
881       v = mat->v + i*m;
882       for (j=0; j<m; j++) { (*v++) *= x;}
883     }
884     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
885     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
886   }
887   PetscFunctionReturn(0);
888 }
889 
890 #undef __FUNCT__
891 #define __FUNCT__ "MatNorm_MPIDense"
892 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
893 {
894   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
895   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
896   PetscErrorCode ierr;
897   PetscInt       i,j;
898   PetscReal      sum = 0.0;
899   PetscScalar    *v = mat->v;
900 
901   PetscFunctionBegin;
902   if (mdn->size == 1) {
903     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
904   } else {
905     if (type == NORM_FROBENIUS) {
906       for (i=0; i<mdn->A->cmap.n*mdn->A->rmap.n; i++) {
907 #if defined(PETSC_USE_COMPLEX)
908         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
909 #else
910         sum += (*v)*(*v); v++;
911 #endif
912       }
913       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
914       *nrm = sqrt(*nrm);
915       ierr = PetscLogFlops(2*mdn->A->cmap.n*mdn->A->rmap.n);CHKERRQ(ierr);
916     } else if (type == NORM_1) {
917       PetscReal *tmp,*tmp2;
918       ierr = PetscMalloc(2*A->cmap.N*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
919       tmp2 = tmp + A->cmap.N;
920       ierr = PetscMemzero(tmp,2*A->cmap.N*sizeof(PetscReal));CHKERRQ(ierr);
921       *nrm = 0.0;
922       v = mat->v;
923       for (j=0; j<mdn->A->cmap.n; j++) {
924         for (i=0; i<mdn->A->rmap.n; i++) {
925           tmp[j] += PetscAbsScalar(*v);  v++;
926         }
927       }
928       ierr = MPI_Allreduce(tmp,tmp2,A->cmap.N,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
929       for (j=0; j<A->cmap.N; j++) {
930         if (tmp2[j] > *nrm) *nrm = tmp2[j];
931       }
932       ierr = PetscFree(tmp);CHKERRQ(ierr);
933       ierr = PetscLogFlops(A->cmap.n*A->rmap.n);CHKERRQ(ierr);
934     } else if (type == NORM_INFINITY) { /* max row norm */
935       PetscReal ntemp;
936       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
937       ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr);
938     } else {
939       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
940     }
941   }
942   PetscFunctionReturn(0);
943 }
944 
945 #undef __FUNCT__
946 #define __FUNCT__ "MatTranspose_MPIDense"
947 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
948 {
949   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
950   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
951   Mat            B;
952   PetscInt       M = A->rmap.N,N = A->cmap.N,m,n,*rwork,rstart = A->rmap.rstart;
953   PetscErrorCode ierr;
954   PetscInt       j,i;
955   PetscScalar    *v;
956 
957   PetscFunctionBegin;
958   if (A == *matout && M != N) {
959     SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place");
960   }
961   if (reuse == MAT_INITIAL_MATRIX || A == *matout) {
962     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
963     ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,N,M);CHKERRQ(ierr);
964     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
965     ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr);
966   } else {
967     B = *matout;
968   }
969 
970   m = a->A->rmap.n; n = a->A->cmap.n; v = Aloc->v;
971   ierr = PetscMalloc(m*sizeof(PetscInt),&rwork);CHKERRQ(ierr);
972   for (i=0; i<m; i++) rwork[i] = rstart + i;
973   for (j=0; j<n; j++) {
974     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
975     v   += m;
976   }
977   ierr = PetscFree(rwork);CHKERRQ(ierr);
978   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
979   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
980   if (*matout != A) {
981     *matout = B;
982   } else {
983     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
984   }
985   PetscFunctionReturn(0);
986 }
987 
988 #include "petscblaslapack.h"
989 #undef __FUNCT__
990 #define __FUNCT__ "MatScale_MPIDense"
991 PetscErrorCode MatScale_MPIDense(Mat inA,PetscScalar alpha)
992 {
993   Mat_MPIDense   *A = (Mat_MPIDense*)inA->data;
994   Mat_SeqDense   *a = (Mat_SeqDense*)A->A->data;
995   PetscScalar    oalpha = alpha;
996   PetscErrorCode ierr;
997   PetscBLASInt   one = 1,nz = PetscBLASIntCast(inA->rmap.n*inA->cmap.N);
998 
999   PetscFunctionBegin;
1000   BLASscal_(&nz,&oalpha,a->v,&one);
1001   ierr = PetscLogFlops(nz);CHKERRQ(ierr);
1002   PetscFunctionReturn(0);
1003 }
1004 
1005 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
1006 
1007 #undef __FUNCT__
1008 #define __FUNCT__ "MatSetUpPreallocation_MPIDense"
1009 PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A)
1010 {
1011   PetscErrorCode ierr;
1012 
1013   PetscFunctionBegin;
1014   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
1015   PetscFunctionReturn(0);
1016 }
1017 
1018 #undef __FUNCT__
1019 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIDense"
1020 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1021 {
1022   PetscErrorCode ierr;
1023   PetscInt       m=A->rmap.n,n=B->cmap.n;
1024   Mat            Cmat;
1025 
1026   PetscFunctionBegin;
1027   if (A->cmap.n != B->rmap.n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap.n %d != B->rmap.n %d\n",A->cmap.n,B->rmap.n);
1028   ierr = MatCreate(((PetscObject)B)->comm,&Cmat);CHKERRQ(ierr);
1029   ierr = MatSetSizes(Cmat,m,n,A->rmap.N,B->cmap.N);CHKERRQ(ierr);
1030   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
1031   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
1032   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1033   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1034   *C = Cmat;
1035   PetscFunctionReturn(0);
1036 }
1037 
1038 /* -------------------------------------------------------------------*/
1039 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
1040        MatGetRow_MPIDense,
1041        MatRestoreRow_MPIDense,
1042        MatMult_MPIDense,
1043 /* 4*/ MatMultAdd_MPIDense,
1044        MatMultTranspose_MPIDense,
1045        MatMultTransposeAdd_MPIDense,
1046        0,
1047        0,
1048        0,
1049 /*10*/ 0,
1050        0,
1051        0,
1052        0,
1053        MatTranspose_MPIDense,
1054 /*15*/ MatGetInfo_MPIDense,
1055        MatEqual_MPIDense,
1056        MatGetDiagonal_MPIDense,
1057        MatDiagonalScale_MPIDense,
1058        MatNorm_MPIDense,
1059 /*20*/ MatAssemblyBegin_MPIDense,
1060        MatAssemblyEnd_MPIDense,
1061        0,
1062        MatSetOption_MPIDense,
1063        MatZeroEntries_MPIDense,
1064 /*25*/ MatZeroRows_MPIDense,
1065        MatLUFactorSymbolic_MPIDense,
1066        0,
1067        MatCholeskyFactorSymbolic_MPIDense,
1068        0,
1069 /*30*/ MatSetUpPreallocation_MPIDense,
1070        0,
1071        0,
1072        MatGetArray_MPIDense,
1073        MatRestoreArray_MPIDense,
1074 /*35*/ MatDuplicate_MPIDense,
1075        0,
1076        0,
1077        0,
1078        0,
1079 /*40*/ 0,
1080        MatGetSubMatrices_MPIDense,
1081        0,
1082        MatGetValues_MPIDense,
1083        0,
1084 /*45*/ 0,
1085        MatScale_MPIDense,
1086        0,
1087        0,
1088        0,
1089 /*50*/ 0,
1090        0,
1091        0,
1092        0,
1093        0,
1094 /*55*/ 0,
1095        0,
1096        0,
1097        0,
1098        0,
1099 /*60*/ MatGetSubMatrix_MPIDense,
1100        MatDestroy_MPIDense,
1101        MatView_MPIDense,
1102        0,
1103        0,
1104 /*65*/ 0,
1105        0,
1106        0,
1107        0,
1108        0,
1109 /*70*/ 0,
1110        0,
1111        0,
1112        0,
1113        0,
1114 /*75*/ 0,
1115        0,
1116        0,
1117        0,
1118        0,
1119 /*80*/ 0,
1120        0,
1121        0,
1122        0,
1123 /*84*/ MatLoad_MPIDense,
1124        0,
1125        0,
1126        0,
1127        0,
1128        0,
1129 /*90*/ 0,
1130        MatMatMultSymbolic_MPIDense_MPIDense,
1131        0,
1132        0,
1133        0,
1134 /*95*/ 0,
1135        0,
1136        0,
1137        0};
1138 
1139 EXTERN_C_BEGIN
1140 #undef __FUNCT__
1141 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense"
1142 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1143 {
1144   Mat_MPIDense   *a;
1145   PetscErrorCode ierr;
1146 
1147   PetscFunctionBegin;
1148   mat->preallocated = PETSC_TRUE;
1149   /* Note:  For now, when data is specified above, this assumes the user correctly
1150    allocates the local dense storage space.  We should add error checking. */
1151 
1152   a    = (Mat_MPIDense*)mat->data;
1153   ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1154   ierr = MatSetSizes(a->A,mat->rmap.n,mat->cmap.N,mat->rmap.n,mat->cmap.N);CHKERRQ(ierr);
1155   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
1156   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1157   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1158   PetscFunctionReturn(0);
1159 }
1160 EXTERN_C_END
1161 
1162 /*MC
1163    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1164 
1165    Options Database Keys:
1166 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions()
1167 
1168   Level: beginner
1169 
1170 .seealso: MatCreateMPIDense
1171 M*/
1172 
1173 EXTERN_C_BEGIN
1174 #undef __FUNCT__
1175 #define __FUNCT__ "MatCreate_MPIDense"
1176 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIDense(Mat mat)
1177 {
1178   Mat_MPIDense   *a;
1179   PetscErrorCode ierr;
1180 
1181   PetscFunctionBegin;
1182   ierr              = PetscNewLog(mat,Mat_MPIDense,&a);CHKERRQ(ierr);
1183   mat->data         = (void*)a;
1184   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1185   mat->factor       = 0;
1186   mat->mapping      = 0;
1187 
1188   a->factor       = 0;
1189   mat->insertmode = NOT_SET_VALUES;
1190   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&a->rank);CHKERRQ(ierr);
1191   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&a->size);CHKERRQ(ierr);
1192 
1193   mat->rmap.bs = mat->cmap.bs = 1;
1194   ierr = PetscMapSetUp(&mat->rmap);CHKERRQ(ierr);
1195   ierr = PetscMapSetUp(&mat->cmap);CHKERRQ(ierr);
1196   a->nvec = mat->cmap.n;
1197 
1198   /* build cache for off array entries formed */
1199   a->donotstash = PETSC_FALSE;
1200   ierr = MatStashCreate_Private(((PetscObject)mat)->comm,1,&mat->stash);CHKERRQ(ierr);
1201 
1202   /* stuff used for matrix vector multiply */
1203   a->lvec        = 0;
1204   a->Mvctx       = 0;
1205   a->roworiented = PETSC_TRUE;
1206 
1207   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
1208                                      "MatGetDiagonalBlock_MPIDense",
1209                                      MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr);
1210   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C",
1211                                      "MatMPIDenseSetPreallocation_MPIDense",
1212                                      MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1213   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",
1214                                      "MatMatMult_MPIAIJ_MPIDense",
1215                                       MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1216   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",
1217                                      "MatMatMultSymbolic_MPIAIJ_MPIDense",
1218                                       MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1219   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",
1220                                      "MatMatMultNumeric_MPIAIJ_MPIDense",
1221                                       MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1222   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1223   PetscFunctionReturn(0);
1224 }
1225 EXTERN_C_END
1226 
1227 /*MC
1228    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1229 
1230    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1231    and MATMPIDENSE otherwise.
1232 
1233    Options Database Keys:
1234 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1235 
1236   Level: beginner
1237 
1238 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1239 M*/
1240 
1241 EXTERN_C_BEGIN
1242 #undef __FUNCT__
1243 #define __FUNCT__ "MatCreate_Dense"
1244 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_Dense(Mat A)
1245 {
1246   PetscErrorCode ierr;
1247   PetscMPIInt    size;
1248 
1249   PetscFunctionBegin;
1250   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
1251   if (size == 1) {
1252     ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr);
1253   } else {
1254     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
1255   }
1256   PetscFunctionReturn(0);
1257 }
1258 EXTERN_C_END
1259 
1260 #undef __FUNCT__
1261 #define __FUNCT__ "MatMPIDenseSetPreallocation"
1262 /*@C
1263    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1264 
1265    Not collective
1266 
1267    Input Parameters:
1268 .  A - the matrix
1269 -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
1270    to control all matrix memory allocation.
1271 
1272    Notes:
1273    The dense format is fully compatible with standard Fortran 77
1274    storage by columns.
1275 
1276    The data input variable is intended primarily for Fortran programmers
1277    who wish to allocate their own matrix memory space.  Most users should
1278    set data=PETSC_NULL.
1279 
1280    Level: intermediate
1281 
1282 .keywords: matrix,dense, parallel
1283 
1284 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1285 @*/
1286 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data)
1287 {
1288   PetscErrorCode ierr,(*f)(Mat,PetscScalar *);
1289 
1290   PetscFunctionBegin;
1291   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1292   if (f) {
1293     ierr = (*f)(mat,data);CHKERRQ(ierr);
1294   }
1295   PetscFunctionReturn(0);
1296 }
1297 
1298 #undef __FUNCT__
1299 #define __FUNCT__ "MatCreateMPIDense"
1300 /*@C
1301    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
1302 
1303    Collective on MPI_Comm
1304 
1305    Input Parameters:
1306 +  comm - MPI communicator
1307 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1308 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1309 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1310 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1311 -  data - optional location of matrix data.  Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1312    to control all matrix memory allocation.
1313 
1314    Output Parameter:
1315 .  A - the matrix
1316 
1317    Notes:
1318    The dense format is fully compatible with standard Fortran 77
1319    storage by columns.
1320 
1321    The data input variable is intended primarily for Fortran programmers
1322    who wish to allocate their own matrix memory space.  Most users should
1323    set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users).
1324 
1325    The user MUST specify either the local or global matrix dimensions
1326    (possibly both).
1327 
1328    Level: intermediate
1329 
1330 .keywords: matrix,dense, parallel
1331 
1332 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1333 @*/
1334 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1335 {
1336   PetscErrorCode ierr;
1337   PetscMPIInt    size;
1338 
1339   PetscFunctionBegin;
1340   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1341   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1342   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1343   if (size > 1) {
1344     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1345     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1346   } else {
1347     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1348     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1349   }
1350   PetscFunctionReturn(0);
1351 }
1352 
1353 #undef __FUNCT__
1354 #define __FUNCT__ "MatDuplicate_MPIDense"
1355 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1356 {
1357   Mat            mat;
1358   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
1359   PetscErrorCode ierr;
1360 
1361   PetscFunctionBegin;
1362   *newmat       = 0;
1363   ierr = MatCreate(((PetscObject)A)->comm,&mat);CHKERRQ(ierr);
1364   ierr = MatSetSizes(mat,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N);CHKERRQ(ierr);
1365   ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1366   a                 = (Mat_MPIDense*)mat->data;
1367   ierr              = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1368   mat->factor       = A->factor;
1369   mat->assembled    = PETSC_TRUE;
1370   mat->preallocated = PETSC_TRUE;
1371 
1372   mat->rmap.rstart     = A->rmap.rstart;
1373   mat->rmap.rend       = A->rmap.rend;
1374   a->size              = oldmat->size;
1375   a->rank              = oldmat->rank;
1376   mat->insertmode      = NOT_SET_VALUES;
1377   a->nvec              = oldmat->nvec;
1378   a->donotstash        = oldmat->donotstash;
1379 
1380   ierr = PetscMemcpy(mat->rmap.range,A->rmap.range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
1381   ierr = PetscMemcpy(mat->cmap.range,A->cmap.range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
1382   ierr = MatStashCreate_Private(((PetscObject)A)->comm,1,&mat->stash);CHKERRQ(ierr);
1383 
1384   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1385   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1386   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1387   *newmat = mat;
1388   PetscFunctionReturn(0);
1389 }
1390 
1391 #include "petscsys.h"
1392 
1393 #undef __FUNCT__
1394 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile"
1395 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N, MatType type,Mat *newmat)
1396 {
1397   PetscErrorCode ierr;
1398   PetscMPIInt    rank,size;
1399   PetscInt       *rowners,i,m,nz,j;
1400   PetscScalar    *array,*vals,*vals_ptr;
1401   MPI_Status     status;
1402 
1403   PetscFunctionBegin;
1404   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1405   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1406 
1407   /* determine ownership of all rows */
1408   m          = M/size + ((M % size) > rank);
1409   ierr       = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1410   ierr       = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
1411   rowners[0] = 0;
1412   for (i=2; i<=size; i++) {
1413     rowners[i] += rowners[i-1];
1414   }
1415 
1416   ierr = MatCreate(comm,newmat);CHKERRQ(ierr);
1417   ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1418   ierr = MatSetType(*newmat,type);CHKERRQ(ierr);
1419   ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr);
1420   ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr);
1421 
1422   if (!rank) {
1423     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1424 
1425     /* read in my part of the matrix numerical values  */
1426     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1427 
1428     /* insert into matrix-by row (this is why cannot directly read into array */
1429     vals_ptr = vals;
1430     for (i=0; i<m; i++) {
1431       for (j=0; j<N; j++) {
1432         array[i + j*m] = *vals_ptr++;
1433       }
1434     }
1435 
1436     /* read in other processors and ship out */
1437     for (i=1; i<size; i++) {
1438       nz   = (rowners[i+1] - rowners[i])*N;
1439       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1440       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(*newmat))->tag,comm);CHKERRQ(ierr);
1441     }
1442   } else {
1443     /* receive numeric values */
1444     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1445 
1446     /* receive message of values*/
1447     ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(*newmat))->tag,comm,&status);CHKERRQ(ierr);
1448 
1449     /* insert into matrix-by row (this is why cannot directly read into array */
1450     vals_ptr = vals;
1451     for (i=0; i<m; i++) {
1452       for (j=0; j<N; j++) {
1453         array[i + j*m] = *vals_ptr++;
1454       }
1455     }
1456   }
1457   ierr = PetscFree(rowners);CHKERRQ(ierr);
1458   ierr = PetscFree(vals);CHKERRQ(ierr);
1459   ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1460   ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1461   PetscFunctionReturn(0);
1462 }
1463 
1464 #undef __FUNCT__
1465 #define __FUNCT__ "MatLoad_MPIDense"
1466 PetscErrorCode MatLoad_MPIDense(PetscViewer viewer, MatType type,Mat *newmat)
1467 {
1468   Mat            A;
1469   PetscScalar    *vals,*svals;
1470   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1471   MPI_Status     status;
1472   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz;
1473   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1474   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1475   PetscInt       i,nz,j,rstart,rend;
1476   int            fd;
1477   PetscErrorCode ierr;
1478 
1479   PetscFunctionBegin;
1480   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1481   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1482   if (!rank) {
1483     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1484     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1485     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1486   }
1487 
1488   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1489   M = header[1]; N = header[2]; nz = header[3];
1490 
1491   /*
1492        Handle case where matrix is stored on disk as a dense matrix
1493   */
1494   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1495     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,type,newmat);CHKERRQ(ierr);
1496     PetscFunctionReturn(0);
1497   }
1498 
1499   /* determine ownership of all rows */
1500   m          = PetscMPIIntCast(M/size + ((M % size) > rank));
1501   ierr       = PetscMalloc((size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr);
1502   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1503   rowners[0] = 0;
1504   for (i=2; i<=size; i++) {
1505     rowners[i] += rowners[i-1];
1506   }
1507   rstart = rowners[rank];
1508   rend   = rowners[rank+1];
1509 
1510   /* distribute row lengths to all processors */
1511   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&ourlens);CHKERRQ(ierr);
1512   offlens = ourlens + (rend-rstart);
1513   if (!rank) {
1514     ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
1515     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1516     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
1517     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1518     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1519     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1520   } else {
1521     ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1522   }
1523 
1524   if (!rank) {
1525     /* calculate the number of nonzeros on each processor */
1526     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
1527     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1528     for (i=0; i<size; i++) {
1529       for (j=rowners[i]; j< rowners[i+1]; j++) {
1530         procsnz[i] += rowlengths[j];
1531       }
1532     }
1533     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1534 
1535     /* determine max buffer needed and allocate it */
1536     maxnz = 0;
1537     for (i=0; i<size; i++) {
1538       maxnz = PetscMax(maxnz,procsnz[i]);
1539     }
1540     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1541 
1542     /* read in my part of the matrix column indices  */
1543     nz = procsnz[0];
1544     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1545     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1546 
1547     /* read in every one elses and ship off */
1548     for (i=1; i<size; i++) {
1549       nz   = procsnz[i];
1550       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1551       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1552     }
1553     ierr = PetscFree(cols);CHKERRQ(ierr);
1554   } else {
1555     /* determine buffer space needed for message */
1556     nz = 0;
1557     for (i=0; i<m; i++) {
1558       nz += ourlens[i];
1559     }
1560     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1561 
1562     /* receive message of column indices*/
1563     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1564     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1565     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1566   }
1567 
1568   /* loop over local rows, determining number of off diagonal entries */
1569   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
1570   jj = 0;
1571   for (i=0; i<m; i++) {
1572     for (j=0; j<ourlens[i]; j++) {
1573       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1574       jj++;
1575     }
1576   }
1577 
1578   /* create our matrix */
1579   for (i=0; i<m; i++) {
1580     ourlens[i] -= offlens[i];
1581   }
1582   ierr = MatCreate(comm,newmat);CHKERRQ(ierr);
1583   ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1584   ierr = MatSetType(*newmat,type);CHKERRQ(ierr);
1585   ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr);
1586   A = *newmat;
1587   for (i=0; i<m; i++) {
1588     ourlens[i] += offlens[i];
1589   }
1590 
1591   if (!rank) {
1592     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1593 
1594     /* read in my part of the matrix numerical values  */
1595     nz = procsnz[0];
1596     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1597 
1598     /* insert into matrix */
1599     jj      = rstart;
1600     smycols = mycols;
1601     svals   = vals;
1602     for (i=0; i<m; i++) {
1603       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1604       smycols += ourlens[i];
1605       svals   += ourlens[i];
1606       jj++;
1607     }
1608 
1609     /* read in other processors and ship out */
1610     for (i=1; i<size; i++) {
1611       nz   = procsnz[i];
1612       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1613       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
1614     }
1615     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1616   } else {
1617     /* receive numeric values */
1618     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1619 
1620     /* receive message of values*/
1621     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
1622     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1623     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1624 
1625     /* insert into matrix */
1626     jj      = rstart;
1627     smycols = mycols;
1628     svals   = vals;
1629     for (i=0; i<m; i++) {
1630       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1631       smycols += ourlens[i];
1632       svals   += ourlens[i];
1633       jj++;
1634     }
1635   }
1636   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1637   ierr = PetscFree(vals);CHKERRQ(ierr);
1638   ierr = PetscFree(mycols);CHKERRQ(ierr);
1639   ierr = PetscFree(rowners);CHKERRQ(ierr);
1640 
1641   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1642   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1643   PetscFunctionReturn(0);
1644 }
1645 
1646 #undef __FUNCT__
1647 #define __FUNCT__ "MatEqual_MPIDense"
1648 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscTruth *flag)
1649 {
1650   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
1651   Mat            a,b;
1652   PetscTruth     flg;
1653   PetscErrorCode ierr;
1654 
1655   PetscFunctionBegin;
1656   a = matA->A;
1657   b = matB->A;
1658   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
1659   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1660   PetscFunctionReturn(0);
1661 }
1662 
1663 
1664 
1665