xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 4e0d8c253c5baa1c48327ddb2b78bf7a43f58497)
1 #define PETSCMAT_DLL
2 
3 /*
4    Basic functions for basic parallel dense matrices.
5 */
6 
7 #include "src/mat/impls/dense/mpi/mpidense.h"    /*I   "petscmat.h"  I*/
8 
9 #undef __FUNCT__
10 #define __FUNCT__ "MatLUFactorSymbolic_MPIDense"
11 PetscErrorCode MatLUFactorSymbolic_MPIDense(Mat A,IS row,IS col,MatFactorInfo *info,Mat *fact)
12 {
13   PetscErrorCode ierr;
14 
15   PetscFunctionBegin;
16   ierr = MatDuplicate(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr);
17   PetscFunctionReturn(0);
18 }
19 
20 #undef __FUNCT__
21 #define __FUNCT__ "MatCholeskyFactorSymbolic_MPIDense"
22 PetscErrorCode MatCholeskyFactorSymbolic_MPIDense(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
23 {
24   PetscErrorCode ierr;
25 
26   PetscFunctionBegin;
27   ierr = MatDuplicate(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr);
28   PetscFunctionReturn(0);
29 }
30 
31 #undef __FUNCT__
32 #define __FUNCT__ "MatDenseGetLocalMatrix"
33 /*@
34 
35       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
36               matrix that represents the operator. For sequential matrices it returns itself.
37 
38     Input Parameter:
39 .      A - the Seq or MPI dense matrix
40 
41     Output Parameter:
42 .      B - the inner matrix
43 
44     Level: intermediate
45 
46 @*/
47 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
48 {
49   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
50   PetscErrorCode ierr;
51   PetscTruth     flg;
52   PetscMPIInt    size;
53 
54   PetscFunctionBegin;
55   ierr = PetscTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
56   if (!flg) {  /* this check sucks! */
57     ierr = PetscTypeCompare((PetscObject)A,MATDENSE,&flg);CHKERRQ(ierr);
58     if (flg) {
59       ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
60       if (size == 1) flg = PETSC_FALSE;
61     }
62   }
63   if (flg) {
64     *B = mat->A;
65   } else {
66     *B = A;
67   }
68   PetscFunctionReturn(0);
69 }
70 
71 #undef __FUNCT__
72 #define __FUNCT__ "MatGetRow_MPIDense"
73 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
74 {
75   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
76   PetscErrorCode ierr;
77   PetscInt       lrow,rstart = A->rmap.rstart,rend = A->rmap.rend;
78 
79   PetscFunctionBegin;
80   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows")
81   lrow = row - rstart;
82   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr);
83   PetscFunctionReturn(0);
84 }
85 
86 #undef __FUNCT__
87 #define __FUNCT__ "MatRestoreRow_MPIDense"
88 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
89 {
90   PetscErrorCode ierr;
91 
92   PetscFunctionBegin;
93   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
94   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
95   PetscFunctionReturn(0);
96 }
97 
98 EXTERN_C_BEGIN
99 #undef __FUNCT__
100 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense"
101 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B)
102 {
103   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
104   PetscErrorCode ierr;
105   PetscInt       m = A->rmap.n,rstart = A->rmap.rstart;
106   PetscScalar    *array;
107   MPI_Comm       comm;
108 
109   PetscFunctionBegin;
110   if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported.");
111 
112   /* The reuse aspect is not implemented efficiently */
113   if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);}
114 
115   ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
116   ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr);
117   ierr = MatCreate(comm,B);CHKERRQ(ierr);
118   ierr = MatSetSizes(*B,m,m,m,m);CHKERRQ(ierr);
119   ierr = MatSetType(*B,mdn->A->type_name);CHKERRQ(ierr);
120   ierr = MatSeqDenseSetPreallocation(*B,array+m*rstart);CHKERRQ(ierr);
121   ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr);
122   ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
123   ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
124 
125   *iscopy = PETSC_TRUE;
126   PetscFunctionReturn(0);
127 }
128 EXTERN_C_END
129 
130 #undef __FUNCT__
131 #define __FUNCT__ "MatSetValues_MPIDense"
132 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
133 {
134   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
135   PetscErrorCode ierr;
136   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend,row;
137   PetscTruth     roworiented = A->roworiented;
138 
139   PetscFunctionBegin;
140   for (i=0; i<m; i++) {
141     if (idxm[i] < 0) continue;
142     if (idxm[i] >= mat->rmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
143     if (idxm[i] >= rstart && idxm[i] < rend) {
144       row = idxm[i] - rstart;
145       if (roworiented) {
146         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
147       } else {
148         for (j=0; j<n; j++) {
149           if (idxn[j] < 0) continue;
150           if (idxn[j] >= mat->cmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
151           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
152         }
153       }
154     } else {
155       if (!A->donotstash) {
156         if (roworiented) {
157           ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr);
158         } else {
159           ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr);
160         }
161       }
162     }
163   }
164   PetscFunctionReturn(0);
165 }
166 
167 #undef __FUNCT__
168 #define __FUNCT__ "MatGetValues_MPIDense"
169 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
170 {
171   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
172   PetscErrorCode ierr;
173   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend,row;
174 
175   PetscFunctionBegin;
176   for (i=0; i<m; i++) {
177     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
178     if (idxm[i] >= mat->rmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
179     if (idxm[i] >= rstart && idxm[i] < rend) {
180       row = idxm[i] - rstart;
181       for (j=0; j<n; j++) {
182         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
183         if (idxn[j] >= mat->cmap.N) {
184           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
185         }
186         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
187       }
188     } else {
189       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
190     }
191   }
192   PetscFunctionReturn(0);
193 }
194 
195 #undef __FUNCT__
196 #define __FUNCT__ "MatGetArray_MPIDense"
197 PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[])
198 {
199   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
200   PetscErrorCode ierr;
201 
202   PetscFunctionBegin;
203   ierr = MatGetArray(a->A,array);CHKERRQ(ierr);
204   PetscFunctionReturn(0);
205 }
206 
207 #undef __FUNCT__
208 #define __FUNCT__ "MatGetSubMatrix_MPIDense"
209 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B)
210 {
211   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data,*newmatd;
212   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
213   PetscErrorCode ierr;
214   PetscInt       i,j,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols;
215   PetscScalar    *av,*bv,*v = lmat->v;
216   Mat            newmat;
217 
218   PetscFunctionBegin;
219   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
220   ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr);
221   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
222   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
223 
224   /* No parallel redistribution currently supported! Should really check each index set
225      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
226      original matrix! */
227 
228   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
229   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
230 
231   /* Check submatrix call */
232   if (scall == MAT_REUSE_MATRIX) {
233     /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
234     /* Really need to test rows and column sizes! */
235     newmat = *B;
236   } else {
237     /* Create and fill new matrix */
238     ierr = MatCreate(A->comm,&newmat);CHKERRQ(ierr);
239     ierr = MatSetSizes(newmat,nrows,cs,PETSC_DECIDE,ncols);CHKERRQ(ierr);
240     ierr = MatSetType(newmat,A->type_name);CHKERRQ(ierr);
241     ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr);
242   }
243 
244   /* Now extract the data pointers and do the copy, column at a time */
245   newmatd = (Mat_MPIDense*)newmat->data;
246   bv      = ((Mat_SeqDense *)newmatd->A->data)->v;
247 
248   for (i=0; i<ncols; i++) {
249     av = v + nlrows*icol[i];
250     for (j=0; j<nrows; j++) {
251       *bv++ = av[irow[j] - rstart];
252     }
253   }
254 
255   /* Assemble the matrices so that the correct flags are set */
256   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
257   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
258 
259   /* Free work space */
260   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
261   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
262   *B = newmat;
263   PetscFunctionReturn(0);
264 }
265 
266 #undef __FUNCT__
267 #define __FUNCT__ "MatRestoreArray_MPIDense"
268 PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[])
269 {
270   PetscFunctionBegin;
271   PetscFunctionReturn(0);
272 }
273 
274 #undef __FUNCT__
275 #define __FUNCT__ "MatAssemblyBegin_MPIDense"
276 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
277 {
278   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
279   MPI_Comm       comm = mat->comm;
280   PetscErrorCode ierr;
281   PetscInt       nstash,reallocs;
282   InsertMode     addv;
283 
284   PetscFunctionBegin;
285   /* make sure all processors are either in INSERTMODE or ADDMODE */
286   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
287   if (addv == (ADD_VALUES|INSERT_VALUES)) {
288     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
289   }
290   mat->insertmode = addv; /* in case this processor had no cache */
291 
292   ierr = MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);CHKERRQ(ierr);
293   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
294   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
295   PetscFunctionReturn(0);
296 }
297 
298 #undef __FUNCT__
299 #define __FUNCT__ "MatAssemblyEnd_MPIDense"
300 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
301 {
302   Mat_MPIDense    *mdn=(Mat_MPIDense*)mat->data;
303   PetscErrorCode  ierr;
304   PetscInt        i,*row,*col,flg,j,rstart,ncols;
305   PetscMPIInt     n;
306   PetscScalar     *val;
307   InsertMode      addv=mat->insertmode;
308 
309   PetscFunctionBegin;
310   /*  wait on receives */
311   while (1) {
312     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
313     if (!flg) break;
314 
315     for (i=0; i<n;) {
316       /* Now identify the consecutive vals belonging to the same row */
317       for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
318       if (j < n) ncols = j-i;
319       else       ncols = n-i;
320       /* Now assemble all these values with a single function call */
321       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
322       i = j;
323     }
324   }
325   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
326 
327   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
328   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
329 
330   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
331     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
332   }
333   PetscFunctionReturn(0);
334 }
335 
336 #undef __FUNCT__
337 #define __FUNCT__ "MatZeroEntries_MPIDense"
338 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
339 {
340   PetscErrorCode ierr;
341   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
342 
343   PetscFunctionBegin;
344   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
345   PetscFunctionReturn(0);
346 }
347 
348 /* the code does not do the diagonal entries correctly unless the
349    matrix is square and the column and row owerships are identical.
350    This is a BUG. The only way to fix it seems to be to access
351    mdn->A and mdn->B directly and not through the MatZeroRows()
352    routine.
353 */
354 #undef __FUNCT__
355 #define __FUNCT__ "MatZeroRows_MPIDense"
356 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
357 {
358   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
359   PetscErrorCode ierr;
360   PetscInt       i,*owners = A->rmap.range;
361   PetscInt       *nprocs,j,idx,nsends;
362   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
363   PetscInt       *rvalues,tag = A->tag,count,base,slen,*source;
364   PetscInt       *lens,*lrows,*values;
365   PetscMPIInt    n,imdex,rank = l->rank,size = l->size;
366   MPI_Comm       comm = A->comm;
367   MPI_Request    *send_waits,*recv_waits;
368   MPI_Status     recv_status,*send_status;
369   PetscTruth     found;
370 
371   PetscFunctionBegin;
372   /*  first count number of contributors to each processor */
373   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
374   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
375   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
376   for (i=0; i<N; i++) {
377     idx = rows[i];
378     found = PETSC_FALSE;
379     for (j=0; j<size; j++) {
380       if (idx >= owners[j] && idx < owners[j+1]) {
381         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
382       }
383     }
384     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
385   }
386   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
387 
388   /* inform other processors of number of messages and max length*/
389   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
390 
391   /* post receives:   */
392   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
393   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
394   for (i=0; i<nrecvs; i++) {
395     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
396   }
397 
398   /* do sends:
399       1) starts[i] gives the starting index in svalues for stuff going to
400          the ith processor
401   */
402   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
403   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
404   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
405   starts[0]  = 0;
406   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
407   for (i=0; i<N; i++) {
408     svalues[starts[owner[i]]++] = rows[i];
409   }
410 
411   starts[0] = 0;
412   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
413   count = 0;
414   for (i=0; i<size; i++) {
415     if (nprocs[2*i+1]) {
416       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
417     }
418   }
419   ierr = PetscFree(starts);CHKERRQ(ierr);
420 
421   base = owners[rank];
422 
423   /*  wait on receives */
424   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
425   source = lens + nrecvs;
426   count  = nrecvs; slen = 0;
427   while (count) {
428     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
429     /* unpack receives into our local space */
430     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
431     source[imdex]  = recv_status.MPI_SOURCE;
432     lens[imdex]    = n;
433     slen += n;
434     count--;
435   }
436   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
437 
438   /* move the data into the send scatter */
439   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
440   count = 0;
441   for (i=0; i<nrecvs; i++) {
442     values = rvalues + i*nmax;
443     for (j=0; j<lens[i]; j++) {
444       lrows[count++] = values[j] - base;
445     }
446   }
447   ierr = PetscFree(rvalues);CHKERRQ(ierr);
448   ierr = PetscFree(lens);CHKERRQ(ierr);
449   ierr = PetscFree(owner);CHKERRQ(ierr);
450   ierr = PetscFree(nprocs);CHKERRQ(ierr);
451 
452   /* actually zap the local rows */
453   ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
454   ierr = PetscFree(lrows);CHKERRQ(ierr);
455 
456   /* wait on sends */
457   if (nsends) {
458     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
459     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
460     ierr = PetscFree(send_status);CHKERRQ(ierr);
461   }
462   ierr = PetscFree(send_waits);CHKERRQ(ierr);
463   ierr = PetscFree(svalues);CHKERRQ(ierr);
464 
465   PetscFunctionReturn(0);
466 }
467 
468 #undef __FUNCT__
469 #define __FUNCT__ "MatMult_MPIDense"
470 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
471 {
472   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
473   PetscErrorCode ierr;
474 
475   PetscFunctionBegin;
476   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
477   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
478   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
479   PetscFunctionReturn(0);
480 }
481 
482 #undef __FUNCT__
483 #define __FUNCT__ "MatMultAdd_MPIDense"
484 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
485 {
486   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
487   PetscErrorCode ierr;
488 
489   PetscFunctionBegin;
490   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
491   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
492   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
493   PetscFunctionReturn(0);
494 }
495 
496 #undef __FUNCT__
497 #define __FUNCT__ "MatMultTranspose_MPIDense"
498 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
499 {
500   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
501   PetscErrorCode ierr;
502   PetscScalar    zero = 0.0;
503 
504   PetscFunctionBegin;
505   ierr = VecSet(yy,zero);CHKERRQ(ierr);
506   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
507   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
508   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
509   PetscFunctionReturn(0);
510 }
511 
512 #undef __FUNCT__
513 #define __FUNCT__ "MatMultTransposeAdd_MPIDense"
514 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
515 {
516   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
517   PetscErrorCode ierr;
518 
519   PetscFunctionBegin;
520   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
521   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
522   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
523   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
524   PetscFunctionReturn(0);
525 }
526 
527 #undef __FUNCT__
528 #define __FUNCT__ "MatGetDiagonal_MPIDense"
529 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
530 {
531   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
532   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
533   PetscErrorCode ierr;
534   PetscInt       len,i,n,m = A->rmap.n,radd;
535   PetscScalar    *x,zero = 0.0;
536 
537   PetscFunctionBegin;
538   ierr = VecSet(v,zero);CHKERRQ(ierr);
539   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
540   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
541   if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
542   len  = PetscMin(a->A->rmap.n,a->A->cmap.n);
543   radd = A->rmap.rstart*m;
544   for (i=0; i<len; i++) {
545     x[i] = aloc->v[radd + i*m + i];
546   }
547   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
548   PetscFunctionReturn(0);
549 }
550 
551 #undef __FUNCT__
552 #define __FUNCT__ "MatDestroy_MPIDense"
553 PetscErrorCode MatDestroy_MPIDense(Mat mat)
554 {
555   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
556   PetscErrorCode ierr;
557 
558   PetscFunctionBegin;
559 
560 #if defined(PETSC_USE_LOG)
561   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N);
562 #endif
563   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
564   ierr = MatDestroy(mdn->A);CHKERRQ(ierr);
565   if (mdn->lvec)   {ierr = VecDestroy(mdn->lvec);CHKERRQ(ierr);}
566   if (mdn->Mvctx)  {ierr = VecScatterDestroy(mdn->Mvctx);CHKERRQ(ierr);}
567   if (mdn->factor) {
568     ierr = PetscFree(mdn->factor->temp);CHKERRQ(ierr);
569     ierr = PetscFree(mdn->factor->tag);CHKERRQ(ierr);
570     ierr = PetscFree(mdn->factor->pivots);CHKERRQ(ierr);
571     ierr = PetscFree(mdn->factor);CHKERRQ(ierr);
572   }
573   ierr = PetscFree(mdn);CHKERRQ(ierr);
574   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
575   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
576   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
577   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
578   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
579   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
580   PetscFunctionReturn(0);
581 }
582 
583 #undef __FUNCT__
584 #define __FUNCT__ "MatView_MPIDense_Binary"
585 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
586 {
587   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
588   PetscErrorCode    ierr;
589   PetscViewerFormat format;
590   int               fd;
591   PetscInt          header[4],mmax,N = mat->cmap.N,i,j,m,k;
592   PetscMPIInt       rank,tag  = ((PetscObject)viewer)->tag,size;
593   PetscScalar       *work,*v;
594   Mat_SeqDense      *a = (Mat_SeqDense*)mdn->A->data;
595   MPI_Status        status;
596 
597   PetscFunctionBegin;
598   if (mdn->size == 1) {
599     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
600   } else {
601     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
602     ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
603     ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr);
604 
605     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
606     if (format == PETSC_VIEWER_BINARY_NATIVE) {
607 
608       if (!rank) {
609 	/* store the matrix as a dense matrix */
610 	header[0] = MAT_FILE_COOKIE;
611 	header[1] = mat->rmap.N;
612 	header[2] = N;
613 	header[3] = MATRIX_BINARY_FORMAT_DENSE;
614 	ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
615 
616 	/* get largest work array needed for transposing array */
617         mmax = mat->rmap.n;
618         for (i=1; i<size; i++) {
619           mmax = PetscMax(mmax,mat->rmap.range[i+1] - mat->rmap.range[i]);
620         }
621 	ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&work);CHKERRQ(ierr);
622 
623 	/* write out local array, by rows */
624         m    = mat->rmap.n;
625 	v    = a->v;
626         for (j=0; j<N; j++) {
627 	  for (i=0; i<m; i++) {
628 	    work[i + j*N] = *v++;
629 	  }
630 	}
631 	ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
632 
633         /* get largest work array to receive messages from other processes, excludes process zero */
634         mmax = 0;
635         for (i=1; i<size; i++) {
636           mmax = PetscMax(mmax,mat->rmap.range[i+1] - mat->rmap.range[i]);
637         }
638 	ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&v);CHKERRQ(ierr);
639         for (k=1; k<size; k++) {
640           m    = mat->rmap.range[k+1] - mat->rmap.range[k];
641           ierr = MPI_Recv(v,m*N,MPIU_SCALAR,k,tag,mat->comm,&status);CHKERRQ(ierr);
642 
643 	  for (j=0; j<N; j++) {
644 	    for (i=0; i<m; i++) {
645 	      work[i + j*N] = *v++;
646 	    }
647 	  }
648 	  ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
649         }
650         ierr = PetscFree(work);CHKERRQ(ierr);
651         ierr = PetscFree(v);CHKERRQ(ierr);
652       } else {
653         ierr = MPI_Send(a->v,mat->rmap.n*mat->cmap.N,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr);
654       }
655     }
656   }
657   PetscFunctionReturn(0);
658 }
659 
660 #undef __FUNCT__
661 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket"
662 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
663 {
664   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
665   PetscErrorCode    ierr;
666   PetscMPIInt       size = mdn->size,rank = mdn->rank;
667   PetscViewerType   vtype;
668   PetscTruth        iascii,isdraw;
669   PetscViewer       sviewer;
670   PetscViewerFormat format;
671 
672   PetscFunctionBegin;
673   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
674   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
675   if (iascii) {
676     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
677     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
678     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
679       MatInfo info;
680       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
681       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap.n,
682                    (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
683       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
684       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
685       PetscFunctionReturn(0);
686     } else if (format == PETSC_VIEWER_ASCII_INFO) {
687       PetscFunctionReturn(0);
688     }
689   } else if (isdraw) {
690     PetscDraw  draw;
691     PetscTruth isnull;
692 
693     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
694     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
695     if (isnull) PetscFunctionReturn(0);
696   }
697 
698   if (size == 1) {
699     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
700   } else {
701     /* assemble the entire matrix onto first processor. */
702     Mat         A;
703     PetscInt    M = mat->rmap.N,N = mat->cmap.N,m,row,i,nz;
704     PetscInt    *cols;
705     PetscScalar *vals;
706 
707     ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr);
708     if (!rank) {
709       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
710     } else {
711       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
712     }
713     /* Since this is a temporary matrix, MATMPIDENSE instead of A->type_name here is probably acceptable. */
714     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
715     ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL);
716     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
717 
718     /* Copy the matrix ... This isn't the most efficient means,
719        but it's quick for now */
720     A->insertmode = INSERT_VALUES;
721     row = mat->rmap.rstart; m = mdn->A->rmap.n;
722     for (i=0; i<m; i++) {
723       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
724       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
725       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
726       row++;
727     }
728 
729     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
730     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
731     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
732     if (!rank) {
733       ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
734     }
735     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
736     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
737     ierr = MatDestroy(A);CHKERRQ(ierr);
738   }
739   PetscFunctionReturn(0);
740 }
741 
742 #undef __FUNCT__
743 #define __FUNCT__ "MatView_MPIDense"
744 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
745 {
746   PetscErrorCode ierr;
747   PetscTruth     iascii,isbinary,isdraw,issocket;
748 
749   PetscFunctionBegin;
750 
751   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
752   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
753   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
754   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
755 
756   if (iascii || issocket || isdraw) {
757     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
758   } else if (isbinary) {
759     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
760   } else {
761     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name);
762   }
763   PetscFunctionReturn(0);
764 }
765 
766 #undef __FUNCT__
767 #define __FUNCT__ "MatGetInfo_MPIDense"
768 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
769 {
770   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
771   Mat            mdn = mat->A;
772   PetscErrorCode ierr;
773   PetscReal      isend[5],irecv[5];
774 
775   PetscFunctionBegin;
776   info->rows_global    = (double)A->rmap.N;
777   info->columns_global = (double)A->cmap.N;
778   info->rows_local     = (double)A->rmap.n;
779   info->columns_local  = (double)A->cmap.N;
780   info->block_size     = 1.0;
781   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
782   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
783   isend[3] = info->memory;  isend[4] = info->mallocs;
784   if (flag == MAT_LOCAL) {
785     info->nz_used      = isend[0];
786     info->nz_allocated = isend[1];
787     info->nz_unneeded  = isend[2];
788     info->memory       = isend[3];
789     info->mallocs      = isend[4];
790   } else if (flag == MAT_GLOBAL_MAX) {
791     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr);
792     info->nz_used      = irecv[0];
793     info->nz_allocated = irecv[1];
794     info->nz_unneeded  = irecv[2];
795     info->memory       = irecv[3];
796     info->mallocs      = irecv[4];
797   } else if (flag == MAT_GLOBAL_SUM) {
798     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr);
799     info->nz_used      = irecv[0];
800     info->nz_allocated = irecv[1];
801     info->nz_unneeded  = irecv[2];
802     info->memory       = irecv[3];
803     info->mallocs      = irecv[4];
804   }
805   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
806   info->fill_ratio_needed = 0;
807   info->factor_mallocs    = 0;
808   PetscFunctionReturn(0);
809 }
810 
811 #undef __FUNCT__
812 #define __FUNCT__ "MatSetOption_MPIDense"
813 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscTruth flg)
814 {
815   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
816   PetscErrorCode ierr;
817 
818   PetscFunctionBegin;
819   switch (op) {
820   case MAT_NO_NEW_NONZERO_LOCATIONS:
821   case MAT_NEW_NONZERO_LOCATION_ERR:
822   case MAT_NEW_NONZERO_ALLOCATION_ERR:
823   case MAT_COLUMNS_SORTED:
824     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
825     break;
826   case MAT_ROW_ORIENTED:
827     a->roworiented = flg;
828     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
829     break;
830   case MAT_ROWS_SORTED:
831   case MAT_NEW_DIAGONALS:
832   case MAT_USE_HASH_TABLE:
833     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
834     break;
835   case MAT_IGNORE_OFF_PROC_ENTRIES:
836     a->donotstash = flg;
837     break;
838   case MAT_SYMMETRIC:
839   case MAT_STRUCTURALLY_SYMMETRIC:
840   case MAT_HERMITIAN:
841   case MAT_SYMMETRY_ETERNAL:
842     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
843     break;
844   default:
845     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
846   }
847   PetscFunctionReturn(0);
848 }
849 
850 
851 #undef __FUNCT__
852 #define __FUNCT__ "MatDiagonalScale_MPIDense"
853 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
854 {
855   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
856   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
857   PetscScalar    *l,*r,x,*v;
858   PetscErrorCode ierr;
859   PetscInt       i,j,s2a,s3a,s2,s3,m=mdn->A->rmap.n,n=mdn->A->cmap.n;
860 
861   PetscFunctionBegin;
862   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
863   if (ll) {
864     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
865     if (s2a != s2) SETERRQ2(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
866     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
867     for (i=0; i<m; i++) {
868       x = l[i];
869       v = mat->v + i;
870       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
871     }
872     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
873     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
874   }
875   if (rr) {
876     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
877     if (s3a != s3) SETERRQ2(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
878     ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
879     ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
880     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
881     for (i=0; i<n; i++) {
882       x = r[i];
883       v = mat->v + i*m;
884       for (j=0; j<m; j++) { (*v++) *= x;}
885     }
886     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
887     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
888   }
889   PetscFunctionReturn(0);
890 }
891 
892 #undef __FUNCT__
893 #define __FUNCT__ "MatNorm_MPIDense"
894 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
895 {
896   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
897   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
898   PetscErrorCode ierr;
899   PetscInt       i,j;
900   PetscReal      sum = 0.0;
901   PetscScalar    *v = mat->v;
902 
903   PetscFunctionBegin;
904   if (mdn->size == 1) {
905     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
906   } else {
907     if (type == NORM_FROBENIUS) {
908       for (i=0; i<mdn->A->cmap.n*mdn->A->rmap.n; i++) {
909 #if defined(PETSC_USE_COMPLEX)
910         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
911 #else
912         sum += (*v)*(*v); v++;
913 #endif
914       }
915       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr);
916       *nrm = sqrt(*nrm);
917       ierr = PetscLogFlops(2*mdn->A->cmap.n*mdn->A->rmap.n);CHKERRQ(ierr);
918     } else if (type == NORM_1) {
919       PetscReal *tmp,*tmp2;
920       ierr = PetscMalloc(2*A->cmap.N*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
921       tmp2 = tmp + A->cmap.N;
922       ierr = PetscMemzero(tmp,2*A->cmap.N*sizeof(PetscReal));CHKERRQ(ierr);
923       *nrm = 0.0;
924       v = mat->v;
925       for (j=0; j<mdn->A->cmap.n; j++) {
926         for (i=0; i<mdn->A->rmap.n; i++) {
927           tmp[j] += PetscAbsScalar(*v);  v++;
928         }
929       }
930       ierr = MPI_Allreduce(tmp,tmp2,A->cmap.N,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr);
931       for (j=0; j<A->cmap.N; j++) {
932         if (tmp2[j] > *nrm) *nrm = tmp2[j];
933       }
934       ierr = PetscFree(tmp);CHKERRQ(ierr);
935       ierr = PetscLogFlops(A->cmap.n*A->rmap.n);CHKERRQ(ierr);
936     } else if (type == NORM_INFINITY) { /* max row norm */
937       PetscReal ntemp;
938       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
939       ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr);
940     } else {
941       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
942     }
943   }
944   PetscFunctionReturn(0);
945 }
946 
947 #undef __FUNCT__
948 #define __FUNCT__ "MatTranspose_MPIDense"
949 PetscErrorCode MatTranspose_MPIDense(Mat A,Mat *matout)
950 {
951   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
952   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
953   Mat            B;
954   PetscInt       M = A->rmap.N,N = A->cmap.N,m,n,*rwork,rstart = A->rmap.rstart;
955   PetscErrorCode ierr;
956   PetscInt       j,i;
957   PetscScalar    *v;
958 
959   PetscFunctionBegin;
960   if (!matout && M != N) {
961     SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place");
962   }
963   ierr = MatCreate(A->comm,&B);CHKERRQ(ierr);
964   ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,N,M);CHKERRQ(ierr);
965   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
966   ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr);
967 
968   m = a->A->rmap.n; n = a->A->cmap.n; v = Aloc->v;
969   ierr = PetscMalloc(m*sizeof(PetscInt),&rwork);CHKERRQ(ierr);
970   for (i=0; i<m; i++) rwork[i] = rstart + i;
971   for (j=0; j<n; j++) {
972     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
973     v   += m;
974   }
975   ierr = PetscFree(rwork);CHKERRQ(ierr);
976   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
977   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
978   if (matout) {
979     *matout = B;
980   } else {
981     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
982   }
983   PetscFunctionReturn(0);
984 }
985 
986 #include "petscblaslapack.h"
987 #undef __FUNCT__
988 #define __FUNCT__ "MatScale_MPIDense"
989 PetscErrorCode MatScale_MPIDense(Mat inA,PetscScalar alpha)
990 {
991   Mat_MPIDense   *A = (Mat_MPIDense*)inA->data;
992   Mat_SeqDense   *a = (Mat_SeqDense*)A->A->data;
993   PetscScalar    oalpha = alpha;
994   PetscBLASInt   one = 1,nz = (PetscBLASInt)inA->rmap.n*inA->cmap.N;
995   PetscErrorCode ierr;
996 
997   PetscFunctionBegin;
998   BLASscal_(&nz,&oalpha,a->v,&one);
999   ierr = PetscLogFlops(nz);CHKERRQ(ierr);
1000   PetscFunctionReturn(0);
1001 }
1002 
1003 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
1004 
1005 #undef __FUNCT__
1006 #define __FUNCT__ "MatSetUpPreallocation_MPIDense"
1007 PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A)
1008 {
1009   PetscErrorCode ierr;
1010 
1011   PetscFunctionBegin;
1012   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
1013   PetscFunctionReturn(0);
1014 }
1015 
1016 #undef __FUNCT__
1017 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIDense"
1018 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1019 {
1020   PetscErrorCode ierr;
1021   PetscInt       m=A->rmap.n,n=B->cmap.n;
1022   Mat            Cmat;
1023 
1024   PetscFunctionBegin;
1025   if (A->cmap.n != B->rmap.n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap.n %d != B->rmap.n %d\n",A->cmap.n,B->rmap.n);
1026   ierr = MatCreate(B->comm,&Cmat);CHKERRQ(ierr);
1027   ierr = MatSetSizes(Cmat,m,n,A->rmap.N,B->cmap.N);CHKERRQ(ierr);
1028   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
1029   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
1030   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1031   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1032   *C = Cmat;
1033   PetscFunctionReturn(0);
1034 }
1035 
1036 /* -------------------------------------------------------------------*/
1037 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
1038        MatGetRow_MPIDense,
1039        MatRestoreRow_MPIDense,
1040        MatMult_MPIDense,
1041 /* 4*/ MatMultAdd_MPIDense,
1042        MatMultTranspose_MPIDense,
1043        MatMultTransposeAdd_MPIDense,
1044        0,
1045        0,
1046        0,
1047 /*10*/ 0,
1048        0,
1049        0,
1050        0,
1051        MatTranspose_MPIDense,
1052 /*15*/ MatGetInfo_MPIDense,
1053        MatEqual_MPIDense,
1054        MatGetDiagonal_MPIDense,
1055        MatDiagonalScale_MPIDense,
1056        MatNorm_MPIDense,
1057 /*20*/ MatAssemblyBegin_MPIDense,
1058        MatAssemblyEnd_MPIDense,
1059        0,
1060        MatSetOption_MPIDense,
1061        MatZeroEntries_MPIDense,
1062 /*25*/ MatZeroRows_MPIDense,
1063        MatLUFactorSymbolic_MPIDense,
1064        0,
1065        MatCholeskyFactorSymbolic_MPIDense,
1066        0,
1067 /*30*/ MatSetUpPreallocation_MPIDense,
1068        0,
1069        0,
1070        MatGetArray_MPIDense,
1071        MatRestoreArray_MPIDense,
1072 /*35*/ MatDuplicate_MPIDense,
1073        0,
1074        0,
1075        0,
1076        0,
1077 /*40*/ 0,
1078        MatGetSubMatrices_MPIDense,
1079        0,
1080        MatGetValues_MPIDense,
1081        0,
1082 /*45*/ 0,
1083        MatScale_MPIDense,
1084        0,
1085        0,
1086        0,
1087 /*50*/ 0,
1088        0,
1089        0,
1090        0,
1091        0,
1092 /*55*/ 0,
1093        0,
1094        0,
1095        0,
1096        0,
1097 /*60*/ MatGetSubMatrix_MPIDense,
1098        MatDestroy_MPIDense,
1099        MatView_MPIDense,
1100        0,
1101        0,
1102 /*65*/ 0,
1103        0,
1104        0,
1105        0,
1106        0,
1107 /*70*/ 0,
1108        0,
1109        0,
1110        0,
1111        0,
1112 /*75*/ 0,
1113        0,
1114        0,
1115        0,
1116        0,
1117 /*80*/ 0,
1118        0,
1119        0,
1120        0,
1121 /*84*/ MatLoad_MPIDense,
1122        0,
1123        0,
1124        0,
1125        0,
1126        0,
1127 /*90*/ 0,
1128        MatMatMultSymbolic_MPIDense_MPIDense,
1129        0,
1130        0,
1131        0,
1132 /*95*/ 0,
1133        0,
1134        0,
1135        0};
1136 
1137 EXTERN_C_BEGIN
1138 #undef __FUNCT__
1139 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense"
1140 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1141 {
1142   Mat_MPIDense   *a;
1143   PetscErrorCode ierr;
1144 
1145   PetscFunctionBegin;
1146   mat->preallocated = PETSC_TRUE;
1147   /* Note:  For now, when data is specified above, this assumes the user correctly
1148    allocates the local dense storage space.  We should add error checking. */
1149 
1150   a    = (Mat_MPIDense*)mat->data;
1151   ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1152   ierr = MatSetSizes(a->A,mat->rmap.n,mat->cmap.N,mat->rmap.n,mat->cmap.N);CHKERRQ(ierr);
1153   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
1154   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1155   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1156   PetscFunctionReturn(0);
1157 }
1158 EXTERN_C_END
1159 
1160 /*MC
1161    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1162 
1163    Options Database Keys:
1164 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions()
1165 
1166   Level: beginner
1167 
1168 .seealso: MatCreateMPIDense
1169 M*/
1170 
1171 EXTERN_C_BEGIN
1172 #undef __FUNCT__
1173 #define __FUNCT__ "MatCreate_MPIDense"
1174 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIDense(Mat mat)
1175 {
1176   Mat_MPIDense   *a;
1177   PetscErrorCode ierr;
1178 
1179   PetscFunctionBegin;
1180   ierr              = PetscNewLog(mat,Mat_MPIDense,&a);CHKERRQ(ierr);
1181   mat->data         = (void*)a;
1182   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1183   mat->factor       = 0;
1184   mat->mapping      = 0;
1185 
1186   a->factor       = 0;
1187   mat->insertmode = NOT_SET_VALUES;
1188   ierr = MPI_Comm_rank(mat->comm,&a->rank);CHKERRQ(ierr);
1189   ierr = MPI_Comm_size(mat->comm,&a->size);CHKERRQ(ierr);
1190 
1191   mat->rmap.bs = mat->cmap.bs = 1;
1192   ierr = PetscMapSetUp(&mat->rmap);CHKERRQ(ierr);
1193   ierr = PetscMapSetUp(&mat->cmap);CHKERRQ(ierr);
1194   a->nvec = mat->cmap.n;
1195 
1196   /* build cache for off array entries formed */
1197   a->donotstash = PETSC_FALSE;
1198   ierr = MatStashCreate_Private(mat->comm,1,&mat->stash);CHKERRQ(ierr);
1199 
1200   /* stuff used for matrix vector multiply */
1201   a->lvec        = 0;
1202   a->Mvctx       = 0;
1203   a->roworiented = PETSC_TRUE;
1204 
1205   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
1206                                      "MatGetDiagonalBlock_MPIDense",
1207                                      MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr);
1208   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C",
1209                                      "MatMPIDenseSetPreallocation_MPIDense",
1210                                      MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1211   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",
1212                                      "MatMatMult_MPIAIJ_MPIDense",
1213                                       MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1214   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",
1215                                      "MatMatMultSymbolic_MPIAIJ_MPIDense",
1216                                       MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1217   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",
1218                                      "MatMatMultNumeric_MPIAIJ_MPIDense",
1219                                       MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1220   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1221   PetscFunctionReturn(0);
1222 }
1223 EXTERN_C_END
1224 
1225 /*MC
1226    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1227 
1228    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1229    and MATMPIDENSE otherwise.
1230 
1231    Options Database Keys:
1232 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1233 
1234   Level: beginner
1235 
1236 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1237 M*/
1238 
1239 EXTERN_C_BEGIN
1240 #undef __FUNCT__
1241 #define __FUNCT__ "MatCreate_Dense"
1242 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_Dense(Mat A)
1243 {
1244   PetscErrorCode ierr;
1245   PetscMPIInt    size;
1246 
1247   PetscFunctionBegin;
1248   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1249   if (size == 1) {
1250     ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr);
1251   } else {
1252     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
1253   }
1254   PetscFunctionReturn(0);
1255 }
1256 EXTERN_C_END
1257 
1258 #undef __FUNCT__
1259 #define __FUNCT__ "MatMPIDenseSetPreallocation"
1260 /*@C
1261    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1262 
1263    Not collective
1264 
1265    Input Parameters:
1266 .  A - the matrix
1267 -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
1268    to control all matrix memory allocation.
1269 
1270    Notes:
1271    The dense format is fully compatible with standard Fortran 77
1272    storage by columns.
1273 
1274    The data input variable is intended primarily for Fortran programmers
1275    who wish to allocate their own matrix memory space.  Most users should
1276    set data=PETSC_NULL.
1277 
1278    Level: intermediate
1279 
1280 .keywords: matrix,dense, parallel
1281 
1282 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1283 @*/
1284 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data)
1285 {
1286   PetscErrorCode ierr,(*f)(Mat,PetscScalar *);
1287 
1288   PetscFunctionBegin;
1289   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1290   if (f) {
1291     ierr = (*f)(mat,data);CHKERRQ(ierr);
1292   }
1293   PetscFunctionReturn(0);
1294 }
1295 
1296 #undef __FUNCT__
1297 #define __FUNCT__ "MatCreateMPIDense"
1298 /*@C
1299    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
1300 
1301    Collective on MPI_Comm
1302 
1303    Input Parameters:
1304 +  comm - MPI communicator
1305 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1306 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1307 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1308 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1309 -  data - optional location of matrix data.  Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1310    to control all matrix memory allocation.
1311 
1312    Output Parameter:
1313 .  A - the matrix
1314 
1315    Notes:
1316    The dense format is fully compatible with standard Fortran 77
1317    storage by columns.
1318 
1319    The data input variable is intended primarily for Fortran programmers
1320    who wish to allocate their own matrix memory space.  Most users should
1321    set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users).
1322 
1323    The user MUST specify either the local or global matrix dimensions
1324    (possibly both).
1325 
1326    Level: intermediate
1327 
1328 .keywords: matrix,dense, parallel
1329 
1330 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1331 @*/
1332 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1333 {
1334   PetscErrorCode ierr;
1335   PetscMPIInt    size;
1336 
1337   PetscFunctionBegin;
1338   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1339   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1340   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1341   if (size > 1) {
1342     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1343     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1344   } else {
1345     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1346     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1347   }
1348   PetscFunctionReturn(0);
1349 }
1350 
1351 #undef __FUNCT__
1352 #define __FUNCT__ "MatDuplicate_MPIDense"
1353 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1354 {
1355   Mat            mat;
1356   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
1357   PetscErrorCode ierr;
1358 
1359   PetscFunctionBegin;
1360   *newmat       = 0;
1361   ierr = MatCreate(A->comm,&mat);CHKERRQ(ierr);
1362   ierr = MatSetSizes(mat,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N);CHKERRQ(ierr);
1363   ierr = MatSetType(mat,A->type_name);CHKERRQ(ierr);
1364   a                 = (Mat_MPIDense*)mat->data;
1365   ierr              = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1366   mat->factor       = A->factor;
1367   mat->assembled    = PETSC_TRUE;
1368   mat->preallocated = PETSC_TRUE;
1369 
1370   mat->rmap.rstart     = A->rmap.rstart;
1371   mat->rmap.rend       = A->rmap.rend;
1372   a->size              = oldmat->size;
1373   a->rank              = oldmat->rank;
1374   mat->insertmode      = NOT_SET_VALUES;
1375   a->nvec              = oldmat->nvec;
1376   a->donotstash        = oldmat->donotstash;
1377 
1378   ierr = PetscMemcpy(mat->rmap.range,A->rmap.range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
1379   ierr = PetscMemcpy(mat->cmap.range,A->cmap.range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
1380   ierr = MatStashCreate_Private(A->comm,1,&mat->stash);CHKERRQ(ierr);
1381 
1382   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1383   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1384   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1385   *newmat = mat;
1386   PetscFunctionReturn(0);
1387 }
1388 
1389 #include "petscsys.h"
1390 
1391 #undef __FUNCT__
1392 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile"
1393 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N, MatType type,Mat *newmat)
1394 {
1395   PetscErrorCode ierr;
1396   PetscMPIInt    rank,size;
1397   PetscInt       *rowners,i,m,nz,j;
1398   PetscScalar    *array,*vals,*vals_ptr;
1399   MPI_Status     status;
1400 
1401   PetscFunctionBegin;
1402   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1403   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1404 
1405   /* determine ownership of all rows */
1406   m          = M/size + ((M % size) > rank);
1407   ierr       = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1408   ierr       = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
1409   rowners[0] = 0;
1410   for (i=2; i<=size; i++) {
1411     rowners[i] += rowners[i-1];
1412   }
1413 
1414   ierr = MatCreate(comm,newmat);CHKERRQ(ierr);
1415   ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1416   ierr = MatSetType(*newmat,type);CHKERRQ(ierr);
1417   ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr);
1418   ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr);
1419 
1420   if (!rank) {
1421     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1422 
1423     /* read in my part of the matrix numerical values  */
1424     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1425 
1426     /* insert into matrix-by row (this is why cannot directly read into array */
1427     vals_ptr = vals;
1428     for (i=0; i<m; i++) {
1429       for (j=0; j<N; j++) {
1430         array[i + j*m] = *vals_ptr++;
1431       }
1432     }
1433 
1434     /* read in other processors and ship out */
1435     for (i=1; i<size; i++) {
1436       nz   = (rowners[i+1] - rowners[i])*N;
1437       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1438       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr);
1439     }
1440   } else {
1441     /* receive numeric values */
1442     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1443 
1444     /* receive message of values*/
1445     ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr);
1446 
1447     /* insert into matrix-by row (this is why cannot directly read into array */
1448     vals_ptr = vals;
1449     for (i=0; i<m; i++) {
1450       for (j=0; j<N; j++) {
1451         array[i + j*m] = *vals_ptr++;
1452       }
1453     }
1454   }
1455   ierr = PetscFree(rowners);CHKERRQ(ierr);
1456   ierr = PetscFree(vals);CHKERRQ(ierr);
1457   ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1458   ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1459   PetscFunctionReturn(0);
1460 }
1461 
1462 #undef __FUNCT__
1463 #define __FUNCT__ "MatLoad_MPIDense"
1464 PetscErrorCode MatLoad_MPIDense(PetscViewer viewer, MatType type,Mat *newmat)
1465 {
1466   Mat            A;
1467   PetscScalar    *vals,*svals;
1468   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1469   MPI_Status     status;
1470   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz;
1471   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1472   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1473   PetscInt       i,nz,j,rstart,rend;
1474   int            fd;
1475   PetscErrorCode ierr;
1476 
1477   PetscFunctionBegin;
1478   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1479   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1480   if (!rank) {
1481     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1482     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1483     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1484   }
1485 
1486   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1487   M = header[1]; N = header[2]; nz = header[3];
1488 
1489   /*
1490        Handle case where matrix is stored on disk as a dense matrix
1491   */
1492   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1493     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,type,newmat);CHKERRQ(ierr);
1494     PetscFunctionReturn(0);
1495   }
1496 
1497   /* determine ownership of all rows */
1498   m          = M/size + ((M % size) > rank);
1499   ierr       = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1500   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1501   rowners[0] = 0;
1502   for (i=2; i<=size; i++) {
1503     rowners[i] += rowners[i-1];
1504   }
1505   rstart = rowners[rank];
1506   rend   = rowners[rank+1];
1507 
1508   /* distribute row lengths to all processors */
1509   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&ourlens);CHKERRQ(ierr);
1510   offlens = ourlens + (rend-rstart);
1511   if (!rank) {
1512     ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
1513     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1514     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
1515     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1516     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1517     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1518   } else {
1519     ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1520   }
1521 
1522   if (!rank) {
1523     /* calculate the number of nonzeros on each processor */
1524     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
1525     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1526     for (i=0; i<size; i++) {
1527       for (j=rowners[i]; j< rowners[i+1]; j++) {
1528         procsnz[i] += rowlengths[j];
1529       }
1530     }
1531     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1532 
1533     /* determine max buffer needed and allocate it */
1534     maxnz = 0;
1535     for (i=0; i<size; i++) {
1536       maxnz = PetscMax(maxnz,procsnz[i]);
1537     }
1538     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1539 
1540     /* read in my part of the matrix column indices  */
1541     nz = procsnz[0];
1542     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1543     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1544 
1545     /* read in every one elses and ship off */
1546     for (i=1; i<size; i++) {
1547       nz   = procsnz[i];
1548       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1549       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1550     }
1551     ierr = PetscFree(cols);CHKERRQ(ierr);
1552   } else {
1553     /* determine buffer space needed for message */
1554     nz = 0;
1555     for (i=0; i<m; i++) {
1556       nz += ourlens[i];
1557     }
1558     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1559 
1560     /* receive message of column indices*/
1561     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1562     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1563     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1564   }
1565 
1566   /* loop over local rows, determining number of off diagonal entries */
1567   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
1568   jj = 0;
1569   for (i=0; i<m; i++) {
1570     for (j=0; j<ourlens[i]; j++) {
1571       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1572       jj++;
1573     }
1574   }
1575 
1576   /* create our matrix */
1577   for (i=0; i<m; i++) {
1578     ourlens[i] -= offlens[i];
1579   }
1580   ierr = MatCreate(comm,newmat);CHKERRQ(ierr);
1581   ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1582   ierr = MatSetType(*newmat,type);CHKERRQ(ierr);
1583   ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr);
1584   A = *newmat;
1585   for (i=0; i<m; i++) {
1586     ourlens[i] += offlens[i];
1587   }
1588 
1589   if (!rank) {
1590     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1591 
1592     /* read in my part of the matrix numerical values  */
1593     nz = procsnz[0];
1594     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1595 
1596     /* insert into matrix */
1597     jj      = rstart;
1598     smycols = mycols;
1599     svals   = vals;
1600     for (i=0; i<m; i++) {
1601       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1602       smycols += ourlens[i];
1603       svals   += ourlens[i];
1604       jj++;
1605     }
1606 
1607     /* read in other processors and ship out */
1608     for (i=1; i<size; i++) {
1609       nz   = procsnz[i];
1610       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1611       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1612     }
1613     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1614   } else {
1615     /* receive numeric values */
1616     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1617 
1618     /* receive message of values*/
1619     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1620     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1621     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1622 
1623     /* insert into matrix */
1624     jj      = rstart;
1625     smycols = mycols;
1626     svals   = vals;
1627     for (i=0; i<m; i++) {
1628       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1629       smycols += ourlens[i];
1630       svals   += ourlens[i];
1631       jj++;
1632     }
1633   }
1634   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1635   ierr = PetscFree(vals);CHKERRQ(ierr);
1636   ierr = PetscFree(mycols);CHKERRQ(ierr);
1637   ierr = PetscFree(rowners);CHKERRQ(ierr);
1638 
1639   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1640   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1641   PetscFunctionReturn(0);
1642 }
1643 
1644 #undef __FUNCT__
1645 #define __FUNCT__ "MatEqual_MPIDense"
1646 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscTruth *flag)
1647 {
1648   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
1649   Mat            a,b;
1650   PetscTruth     flg;
1651   PetscErrorCode ierr;
1652 
1653   PetscFunctionBegin;
1654   a = matA->A;
1655   b = matB->A;
1656   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
1657   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1658   PetscFunctionReturn(0);
1659 }
1660 
1661 
1662 
1663