xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision a58c3bc3391eee32bc3fd94ac7edeea38fe57aae)
1 #define PETSCMAT_DLL
2 
3 /*
4    Basic functions for basic parallel dense matrices.
5 */
6 
7 #include "src/mat/impls/dense/mpi/mpidense.h"    /*I   "petscmat.h"  I*/
8 
9 #undef __FUNCT__
10 #define __FUNCT__ "MatLUFactorSymbolic_MPIDense"
11 PetscErrorCode MatLUFactorSymbolic_MPIDense(Mat A,IS row,IS col,MatFactorInfo *info,Mat *fact)
12 {
13   PetscErrorCode ierr;
14 
15   PetscFunctionBegin;
16   ierr = MatDuplicate(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr);
17   PetscFunctionReturn(0);
18 }
19 
20 #undef __FUNCT__
21 #define __FUNCT__ "MatCholeskyFactorSymbolic_MPIDense"
22 PetscErrorCode MatCholeskyFactorSymbolic_MPIDense(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
23 {
24   PetscErrorCode ierr;
25 
26   PetscFunctionBegin;
27   ierr = MatDuplicate(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr);
28   PetscFunctionReturn(0);
29 }
30 
31 #undef __FUNCT__
32 #define __FUNCT__ "MatDenseGetLocalMatrix"
33 /*@
34 
35       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
36               matrix that represents the operator. For sequential matrices it returns itself.
37 
38     Input Parameter:
39 .      A - the Seq or MPI dense matrix
40 
41     Output Parameter:
42 .      B - the inner matrix
43 
44     Level: intermediate
45 
46 @*/
47 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
48 {
49   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
50   PetscErrorCode ierr;
51   PetscTruth     flg;
52   PetscMPIInt    size;
53 
54   PetscFunctionBegin;
55   ierr = PetscTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
56   if (!flg) {  /* this check sucks! */
57     ierr = PetscTypeCompare((PetscObject)A,MATDENSE,&flg);CHKERRQ(ierr);
58     if (flg) {
59       ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
60       if (size == 1) flg = PETSC_FALSE;
61     }
62   }
63   if (flg) {
64     *B = mat->A;
65   } else {
66     *B = A;
67   }
68   PetscFunctionReturn(0);
69 }
70 
71 #undef __FUNCT__
72 #define __FUNCT__ "MatGetRow_MPIDense"
73 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
74 {
75   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
76   PetscErrorCode ierr;
77   PetscInt       lrow,rstart = A->rmap.rstart,rend = A->rmap.rend;
78 
79   PetscFunctionBegin;
80   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows")
81   lrow = row - rstart;
82   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr);
83   PetscFunctionReturn(0);
84 }
85 
86 #undef __FUNCT__
87 #define __FUNCT__ "MatRestoreRow_MPIDense"
88 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
89 {
90   PetscErrorCode ierr;
91 
92   PetscFunctionBegin;
93   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
94   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
95   PetscFunctionReturn(0);
96 }
97 
98 EXTERN_C_BEGIN
99 #undef __FUNCT__
100 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense"
101 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B)
102 {
103   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
104   PetscErrorCode ierr;
105   PetscInt       m = A->rmap.n,rstart = A->rmap.rstart;
106   PetscScalar    *array;
107   MPI_Comm       comm;
108 
109   PetscFunctionBegin;
110   if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported.");
111 
112   /* The reuse aspect is not implemented efficiently */
113   if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);}
114 
115   ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
116   ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr);
117   ierr = MatCreate(comm,B);CHKERRQ(ierr);
118   ierr = MatSetSizes(*B,m,m,m,m);CHKERRQ(ierr);
119   ierr = MatSetType(*B,mdn->A->type_name);CHKERRQ(ierr);
120   ierr = MatSeqDenseSetPreallocation(*B,array+m*rstart);CHKERRQ(ierr);
121   ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr);
122   ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
123   ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
124 
125   *iscopy = PETSC_TRUE;
126   PetscFunctionReturn(0);
127 }
128 EXTERN_C_END
129 
130 #undef __FUNCT__
131 #define __FUNCT__ "MatSetValues_MPIDense"
132 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
133 {
134   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
135   PetscErrorCode ierr;
136   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend,row;
137   PetscTruth     roworiented = A->roworiented;
138 
139   PetscFunctionBegin;
140   for (i=0; i<m; i++) {
141     if (idxm[i] < 0) continue;
142     if (idxm[i] >= mat->rmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
143     if (idxm[i] >= rstart && idxm[i] < rend) {
144       row = idxm[i] - rstart;
145       if (roworiented) {
146         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
147       } else {
148         for (j=0; j<n; j++) {
149           if (idxn[j] < 0) continue;
150           if (idxn[j] >= mat->cmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
151           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
152         }
153       }
154     } else {
155       if (!A->donotstash) {
156         if (roworiented) {
157           ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr);
158         } else {
159           ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr);
160         }
161       }
162     }
163   }
164   PetscFunctionReturn(0);
165 }
166 
167 #undef __FUNCT__
168 #define __FUNCT__ "MatGetValues_MPIDense"
169 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
170 {
171   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
172   PetscErrorCode ierr;
173   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend,row;
174 
175   PetscFunctionBegin;
176   for (i=0; i<m; i++) {
177     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
178     if (idxm[i] >= mat->rmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
179     if (idxm[i] >= rstart && idxm[i] < rend) {
180       row = idxm[i] - rstart;
181       for (j=0; j<n; j++) {
182         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
183         if (idxn[j] >= mat->cmap.N) {
184           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
185         }
186         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
187       }
188     } else {
189       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
190     }
191   }
192   PetscFunctionReturn(0);
193 }
194 
195 #undef __FUNCT__
196 #define __FUNCT__ "MatGetArray_MPIDense"
197 PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[])
198 {
199   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
200   PetscErrorCode ierr;
201 
202   PetscFunctionBegin;
203   ierr = MatGetArray(a->A,array);CHKERRQ(ierr);
204   PetscFunctionReturn(0);
205 }
206 
207 #undef __FUNCT__
208 #define __FUNCT__ "MatGetSubMatrix_MPIDense"
209 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B)
210 {
211   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data,*newmatd;
212   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
213   PetscErrorCode ierr;
214   PetscInt       i,j,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols;
215   PetscScalar    *av,*bv,*v = lmat->v;
216   Mat            newmat;
217 
218   PetscFunctionBegin;
219   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
220   ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr);
221   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
222   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
223 
224   /* No parallel redistribution currently supported! Should really check each index set
225      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
226      original matrix! */
227 
228   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
229   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
230 
231   /* Check submatrix call */
232   if (scall == MAT_REUSE_MATRIX) {
233     /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
234     /* Really need to test rows and column sizes! */
235     newmat = *B;
236   } else {
237     /* Create and fill new matrix */
238     ierr = MatCreate(A->comm,&newmat);CHKERRQ(ierr);
239     ierr = MatSetSizes(newmat,nrows,cs,PETSC_DECIDE,ncols);CHKERRQ(ierr);
240     ierr = MatSetType(newmat,A->type_name);CHKERRQ(ierr);
241     ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr);
242   }
243 
244   /* Now extract the data pointers and do the copy, column at a time */
245   newmatd = (Mat_MPIDense*)newmat->data;
246   bv      = ((Mat_SeqDense *)newmatd->A->data)->v;
247 
248   for (i=0; i<ncols; i++) {
249     av = v + nlrows*icol[i];
250     for (j=0; j<nrows; j++) {
251       *bv++ = av[irow[j] - rstart];
252     }
253   }
254 
255   /* Assemble the matrices so that the correct flags are set */
256   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
257   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
258 
259   /* Free work space */
260   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
261   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
262   *B = newmat;
263   PetscFunctionReturn(0);
264 }
265 
266 #undef __FUNCT__
267 #define __FUNCT__ "MatRestoreArray_MPIDense"
268 PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[])
269 {
270   PetscFunctionBegin;
271   PetscFunctionReturn(0);
272 }
273 
274 #undef __FUNCT__
275 #define __FUNCT__ "MatAssemblyBegin_MPIDense"
276 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
277 {
278   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
279   MPI_Comm       comm = mat->comm;
280   PetscErrorCode ierr;
281   PetscInt       nstash,reallocs;
282   InsertMode     addv;
283 
284   PetscFunctionBegin;
285   /* make sure all processors are either in INSERTMODE or ADDMODE */
286   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
287   if (addv == (ADD_VALUES|INSERT_VALUES)) {
288     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
289   }
290   mat->insertmode = addv; /* in case this processor had no cache */
291 
292   ierr = MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);CHKERRQ(ierr);
293   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
294   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
295   PetscFunctionReturn(0);
296 }
297 
298 #undef __FUNCT__
299 #define __FUNCT__ "MatAssemblyEnd_MPIDense"
300 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
301 {
302   Mat_MPIDense    *mdn=(Mat_MPIDense*)mat->data;
303   PetscErrorCode  ierr;
304   PetscInt        i,*row,*col,flg,j,rstart,ncols;
305   PetscMPIInt     n;
306   PetscScalar     *val;
307   InsertMode      addv=mat->insertmode;
308 
309   PetscFunctionBegin;
310   /*  wait on receives */
311   while (1) {
312     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
313     if (!flg) break;
314 
315     for (i=0; i<n;) {
316       /* Now identify the consecutive vals belonging to the same row */
317       for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
318       if (j < n) ncols = j-i;
319       else       ncols = n-i;
320       /* Now assemble all these values with a single function call */
321       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
322       i = j;
323     }
324   }
325   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
326 
327   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
328   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
329 
330   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
331     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
332   }
333   PetscFunctionReturn(0);
334 }
335 
336 #undef __FUNCT__
337 #define __FUNCT__ "MatZeroEntries_MPIDense"
338 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
339 {
340   PetscErrorCode ierr;
341   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
342 
343   PetscFunctionBegin;
344   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
345   PetscFunctionReturn(0);
346 }
347 
348 /* the code does not do the diagonal entries correctly unless the
349    matrix is square and the column and row owerships are identical.
350    This is a BUG. The only way to fix it seems to be to access
351    mdn->A and mdn->B directly and not through the MatZeroRows()
352    routine.
353 */
354 #undef __FUNCT__
355 #define __FUNCT__ "MatZeroRows_MPIDense"
356 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
357 {
358   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
359   PetscErrorCode ierr;
360   PetscInt       i,*owners = A->rmap.range;
361   PetscInt       *nprocs,j,idx,nsends;
362   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
363   PetscInt       *rvalues,tag = A->tag,count,base,slen,*source;
364   PetscInt       *lens,*lrows,*values;
365   PetscMPIInt    n,imdex,rank = l->rank,size = l->size;
366   MPI_Comm       comm = A->comm;
367   MPI_Request    *send_waits,*recv_waits;
368   MPI_Status     recv_status,*send_status;
369   PetscTruth     found;
370 
371   PetscFunctionBegin;
372   /*  first count number of contributors to each processor */
373   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
374   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
375   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
376   for (i=0; i<N; i++) {
377     idx = rows[i];
378     found = PETSC_FALSE;
379     for (j=0; j<size; j++) {
380       if (idx >= owners[j] && idx < owners[j+1]) {
381         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
382       }
383     }
384     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
385   }
386   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
387 
388   /* inform other processors of number of messages and max length*/
389   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
390 
391   /* post receives:   */
392   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
393   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
394   for (i=0; i<nrecvs; i++) {
395     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
396   }
397 
398   /* do sends:
399       1) starts[i] gives the starting index in svalues for stuff going to
400          the ith processor
401   */
402   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
403   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
404   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
405   starts[0]  = 0;
406   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
407   for (i=0; i<N; i++) {
408     svalues[starts[owner[i]]++] = rows[i];
409   }
410 
411   starts[0] = 0;
412   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
413   count = 0;
414   for (i=0; i<size; i++) {
415     if (nprocs[2*i+1]) {
416       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
417     }
418   }
419   ierr = PetscFree(starts);CHKERRQ(ierr);
420 
421   base = owners[rank];
422 
423   /*  wait on receives */
424   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
425   source = lens + nrecvs;
426   count  = nrecvs; slen = 0;
427   while (count) {
428     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
429     /* unpack receives into our local space */
430     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
431     source[imdex]  = recv_status.MPI_SOURCE;
432     lens[imdex]    = n;
433     slen += n;
434     count--;
435   }
436   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
437 
438   /* move the data into the send scatter */
439   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
440   count = 0;
441   for (i=0; i<nrecvs; i++) {
442     values = rvalues + i*nmax;
443     for (j=0; j<lens[i]; j++) {
444       lrows[count++] = values[j] - base;
445     }
446   }
447   ierr = PetscFree(rvalues);CHKERRQ(ierr);
448   ierr = PetscFree(lens);CHKERRQ(ierr);
449   ierr = PetscFree(owner);CHKERRQ(ierr);
450   ierr = PetscFree(nprocs);CHKERRQ(ierr);
451 
452   /* actually zap the local rows */
453   ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
454   ierr = PetscFree(lrows);CHKERRQ(ierr);
455 
456   /* wait on sends */
457   if (nsends) {
458     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
459     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
460     ierr = PetscFree(send_status);CHKERRQ(ierr);
461   }
462   ierr = PetscFree(send_waits);CHKERRQ(ierr);
463   ierr = PetscFree(svalues);CHKERRQ(ierr);
464 
465   PetscFunctionReturn(0);
466 }
467 
468 #undef __FUNCT__
469 #define __FUNCT__ "MatMult_MPIDense"
470 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
471 {
472   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
473   PetscErrorCode ierr;
474 
475   PetscFunctionBegin;
476   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
477   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
478   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
479   PetscFunctionReturn(0);
480 }
481 
482 #undef __FUNCT__
483 #define __FUNCT__ "MatMultAdd_MPIDense"
484 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
485 {
486   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
487   PetscErrorCode ierr;
488 
489   PetscFunctionBegin;
490   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
491   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
492   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
493   PetscFunctionReturn(0);
494 }
495 
496 #undef __FUNCT__
497 #define __FUNCT__ "MatMultTranspose_MPIDense"
498 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
499 {
500   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
501   PetscErrorCode ierr;
502   PetscScalar    zero = 0.0;
503 
504   PetscFunctionBegin;
505   ierr = VecSet(yy,zero);CHKERRQ(ierr);
506   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
507   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
508   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
509   PetscFunctionReturn(0);
510 }
511 
512 #undef __FUNCT__
513 #define __FUNCT__ "MatMultTransposeAdd_MPIDense"
514 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
515 {
516   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
517   PetscErrorCode ierr;
518 
519   PetscFunctionBegin;
520   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
521   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
522   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
523   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
524   PetscFunctionReturn(0);
525 }
526 
527 #undef __FUNCT__
528 #define __FUNCT__ "MatGetDiagonal_MPIDense"
529 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
530 {
531   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
532   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
533   PetscErrorCode ierr;
534   PetscInt       len,i,n,m = A->rmap.n,radd;
535   PetscScalar    *x,zero = 0.0;
536 
537   PetscFunctionBegin;
538   ierr = VecSet(v,zero);CHKERRQ(ierr);
539   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
540   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
541   if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
542   len  = PetscMin(a->A->rmap.n,a->A->cmap.n);
543   radd = A->rmap.rstart*m;
544   for (i=0; i<len; i++) {
545     x[i] = aloc->v[radd + i*m + i];
546   }
547   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
548   PetscFunctionReturn(0);
549 }
550 
551 #undef __FUNCT__
552 #define __FUNCT__ "MatDestroy_MPIDense"
553 PetscErrorCode MatDestroy_MPIDense(Mat mat)
554 {
555   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
556   PetscErrorCode ierr;
557 
558   PetscFunctionBegin;
559 
560 #if defined(PETSC_USE_LOG)
561   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N);
562 #endif
563   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
564   ierr = MatDestroy(mdn->A);CHKERRQ(ierr);
565   if (mdn->lvec)   {ierr = VecDestroy(mdn->lvec);CHKERRQ(ierr);}
566   if (mdn->Mvctx)  {ierr = VecScatterDestroy(mdn->Mvctx);CHKERRQ(ierr);}
567   if (mdn->factor) {
568     ierr = PetscFree(mdn->factor->temp);CHKERRQ(ierr);
569     ierr = PetscFree(mdn->factor->tag);CHKERRQ(ierr);
570     ierr = PetscFree(mdn->factor->pivots);CHKERRQ(ierr);
571     ierr = PetscFree(mdn->factor);CHKERRQ(ierr);
572   }
573   ierr = PetscFree(mdn);CHKERRQ(ierr);
574   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
575   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
576   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
577   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
578   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
579   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
580   PetscFunctionReturn(0);
581 }
582 
583 #undef __FUNCT__
584 #define __FUNCT__ "MatView_MPIDense_Binary"
585 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
586 {
587   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
588   PetscErrorCode ierr;
589 
590   PetscFunctionBegin;
591   if (mdn->size == 1) {
592     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
593   }
594   else SETERRQ(PETSC_ERR_SUP,"Only uniprocessor output supported");
595   PetscFunctionReturn(0);
596 }
597 
598 #undef __FUNCT__
599 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket"
600 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
601 {
602   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
603   PetscErrorCode    ierr;
604   PetscMPIInt       size = mdn->size,rank = mdn->rank;
605   PetscViewerType   vtype;
606   PetscTruth        iascii,isdraw;
607   PetscViewer       sviewer;
608   PetscViewerFormat format;
609 
610   PetscFunctionBegin;
611   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
612   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
613   if (iascii) {
614     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
615     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
616     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
617       MatInfo info;
618       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
619       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap.n,
620                    (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
621       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
622       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
623       PetscFunctionReturn(0);
624     } else if (format == PETSC_VIEWER_ASCII_INFO) {
625       PetscFunctionReturn(0);
626     }
627   } else if (isdraw) {
628     PetscDraw  draw;
629     PetscTruth isnull;
630 
631     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
632     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
633     if (isnull) PetscFunctionReturn(0);
634   }
635 
636   if (size == 1) {
637     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
638   } else {
639     /* assemble the entire matrix onto first processor. */
640     Mat         A;
641     PetscInt    M = mat->rmap.N,N = mat->cmap.N,m,row,i,nz;
642     PetscInt    *cols;
643     PetscScalar *vals;
644 
645     ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr);
646     if (!rank) {
647       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
648     } else {
649       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
650     }
651     /* Since this is a temporary matrix, MATMPIDENSE instead of A->type_name here is probably acceptable. */
652     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
653     ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL);
654     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
655 
656     /* Copy the matrix ... This isn't the most efficient means,
657        but it's quick for now */
658     A->insertmode = INSERT_VALUES;
659     row = mat->rmap.rstart; m = mdn->A->rmap.n;
660     for (i=0; i<m; i++) {
661       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
662       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
663       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
664       row++;
665     }
666 
667     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
668     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
669     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
670     if (!rank) {
671       ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
672     }
673     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
674     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
675     ierr = MatDestroy(A);CHKERRQ(ierr);
676   }
677   PetscFunctionReturn(0);
678 }
679 
680 #undef __FUNCT__
681 #define __FUNCT__ "MatView_MPIDense"
682 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
683 {
684   PetscErrorCode ierr;
685   PetscTruth     iascii,isbinary,isdraw,issocket;
686 
687   PetscFunctionBegin;
688 
689   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
690   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
691   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
692   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
693 
694   if (iascii || issocket || isdraw) {
695     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
696   } else if (isbinary) {
697     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
698   } else {
699     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name);
700   }
701   PetscFunctionReturn(0);
702 }
703 
704 #undef __FUNCT__
705 #define __FUNCT__ "MatGetInfo_MPIDense"
706 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
707 {
708   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
709   Mat            mdn = mat->A;
710   PetscErrorCode ierr;
711   PetscReal      isend[5],irecv[5];
712 
713   PetscFunctionBegin;
714   info->rows_global    = (double)A->rmap.N;
715   info->columns_global = (double)A->cmap.N;
716   info->rows_local     = (double)A->rmap.n;
717   info->columns_local  = (double)A->cmap.N;
718   info->block_size     = 1.0;
719   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
720   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
721   isend[3] = info->memory;  isend[4] = info->mallocs;
722   if (flag == MAT_LOCAL) {
723     info->nz_used      = isend[0];
724     info->nz_allocated = isend[1];
725     info->nz_unneeded  = isend[2];
726     info->memory       = isend[3];
727     info->mallocs      = isend[4];
728   } else if (flag == MAT_GLOBAL_MAX) {
729     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr);
730     info->nz_used      = irecv[0];
731     info->nz_allocated = irecv[1];
732     info->nz_unneeded  = irecv[2];
733     info->memory       = irecv[3];
734     info->mallocs      = irecv[4];
735   } else if (flag == MAT_GLOBAL_SUM) {
736     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr);
737     info->nz_used      = irecv[0];
738     info->nz_allocated = irecv[1];
739     info->nz_unneeded  = irecv[2];
740     info->memory       = irecv[3];
741     info->mallocs      = irecv[4];
742   }
743   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
744   info->fill_ratio_needed = 0;
745   info->factor_mallocs    = 0;
746   PetscFunctionReturn(0);
747 }
748 
749 #undef __FUNCT__
750 #define __FUNCT__ "MatSetOption_MPIDense"
751 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op)
752 {
753   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
754   PetscErrorCode ierr;
755 
756   PetscFunctionBegin;
757   switch (op) {
758   case MAT_NO_NEW_NONZERO_LOCATIONS:
759   case MAT_YES_NEW_NONZERO_LOCATIONS:
760   case MAT_NEW_NONZERO_LOCATION_ERR:
761   case MAT_NEW_NONZERO_ALLOCATION_ERR:
762   case MAT_COLUMNS_SORTED:
763   case MAT_COLUMNS_UNSORTED:
764     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
765     break;
766   case MAT_ROW_ORIENTED:
767     a->roworiented = PETSC_TRUE;
768     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
769     break;
770   case MAT_ROWS_SORTED:
771   case MAT_ROWS_UNSORTED:
772   case MAT_YES_NEW_DIAGONALS:
773   case MAT_USE_HASH_TABLE:
774     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
775     break;
776   case MAT_COLUMN_ORIENTED:
777     a->roworiented = PETSC_FALSE;
778     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
779     break;
780   case MAT_IGNORE_OFF_PROC_ENTRIES:
781     a->donotstash = PETSC_TRUE;
782     break;
783   case MAT_NO_NEW_DIAGONALS:
784     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
785   case MAT_SYMMETRIC:
786   case MAT_STRUCTURALLY_SYMMETRIC:
787   case MAT_NOT_SYMMETRIC:
788   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
789   case MAT_HERMITIAN:
790   case MAT_NOT_HERMITIAN:
791   case MAT_SYMMETRY_ETERNAL:
792   case MAT_NOT_SYMMETRY_ETERNAL:
793     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
794     break;
795   default:
796     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
797   }
798   PetscFunctionReturn(0);
799 }
800 
801 
802 #undef __FUNCT__
803 #define __FUNCT__ "MatDiagonalScale_MPIDense"
804 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
805 {
806   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
807   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
808   PetscScalar    *l,*r,x,*v;
809   PetscErrorCode ierr;
810   PetscInt       i,j,s2a,s3a,s2,s3,m=mdn->A->rmap.n,n=mdn->A->cmap.n;
811 
812   PetscFunctionBegin;
813   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
814   if (ll) {
815     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
816     if (s2a != s2) SETERRQ2(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
817     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
818     for (i=0; i<m; i++) {
819       x = l[i];
820       v = mat->v + i;
821       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
822     }
823     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
824     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
825   }
826   if (rr) {
827     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
828     if (s3a != s3) SETERRQ2(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
829     ierr = VecScatterBegin(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
830     ierr = VecScatterEnd(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
831     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
832     for (i=0; i<n; i++) {
833       x = r[i];
834       v = mat->v + i*m;
835       for (j=0; j<m; j++) { (*v++) *= x;}
836     }
837     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
838     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
839   }
840   PetscFunctionReturn(0);
841 }
842 
843 #undef __FUNCT__
844 #define __FUNCT__ "MatNorm_MPIDense"
845 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
846 {
847   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
848   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
849   PetscErrorCode ierr;
850   PetscInt       i,j;
851   PetscReal      sum = 0.0;
852   PetscScalar    *v = mat->v;
853 
854   PetscFunctionBegin;
855   if (mdn->size == 1) {
856     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
857   } else {
858     if (type == NORM_FROBENIUS) {
859       for (i=0; i<mdn->A->cmap.n*mdn->A->rmap.n; i++) {
860 #if defined(PETSC_USE_COMPLEX)
861         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
862 #else
863         sum += (*v)*(*v); v++;
864 #endif
865       }
866       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr);
867       *nrm = sqrt(*nrm);
868       ierr = PetscLogFlops(2*mdn->A->cmap.n*mdn->A->rmap.n);CHKERRQ(ierr);
869     } else if (type == NORM_1) {
870       PetscReal *tmp,*tmp2;
871       ierr = PetscMalloc(2*A->cmap.N*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
872       tmp2 = tmp + A->cmap.N;
873       ierr = PetscMemzero(tmp,2*A->cmap.N*sizeof(PetscReal));CHKERRQ(ierr);
874       *nrm = 0.0;
875       v = mat->v;
876       for (j=0; j<mdn->A->cmap.n; j++) {
877         for (i=0; i<mdn->A->rmap.n; i++) {
878           tmp[j] += PetscAbsScalar(*v);  v++;
879         }
880       }
881       ierr = MPI_Allreduce(tmp,tmp2,A->cmap.N,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr);
882       for (j=0; j<A->cmap.N; j++) {
883         if (tmp2[j] > *nrm) *nrm = tmp2[j];
884       }
885       ierr = PetscFree(tmp);CHKERRQ(ierr);
886       ierr = PetscLogFlops(A->cmap.n*A->rmap.n);CHKERRQ(ierr);
887     } else if (type == NORM_INFINITY) { /* max row norm */
888       PetscReal ntemp;
889       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
890       ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr);
891     } else {
892       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
893     }
894   }
895   PetscFunctionReturn(0);
896 }
897 
898 #undef __FUNCT__
899 #define __FUNCT__ "MatTranspose_MPIDense"
900 PetscErrorCode MatTranspose_MPIDense(Mat A,Mat *matout)
901 {
902   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
903   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
904   Mat            B;
905   PetscInt       M = A->rmap.N,N = A->cmap.N,m,n,*rwork,rstart = A->rmap.rstart;
906   PetscErrorCode ierr;
907   PetscInt       j,i;
908   PetscScalar    *v;
909 
910   PetscFunctionBegin;
911   if (!matout && M != N) {
912     SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place");
913   }
914   ierr = MatCreate(A->comm,&B);CHKERRQ(ierr);
915   ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,N,M);CHKERRQ(ierr);
916   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
917   ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr);
918 
919   m = a->A->rmap.n; n = a->A->cmap.n; v = Aloc->v;
920   ierr = PetscMalloc(n*sizeof(PetscInt),&rwork);CHKERRQ(ierr);
921   for (j=0; j<n; j++) {
922     for (i=0; i<m; i++) rwork[i] = rstart + i;
923     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
924     v   += m;
925   }
926   ierr = PetscFree(rwork);CHKERRQ(ierr);
927   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
928   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
929   if (matout) {
930     *matout = B;
931   } else {
932     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
933   }
934   PetscFunctionReturn(0);
935 }
936 
937 #include "petscblaslapack.h"
938 #undef __FUNCT__
939 #define __FUNCT__ "MatScale_MPIDense"
940 PetscErrorCode MatScale_MPIDense(Mat inA,PetscScalar alpha)
941 {
942   Mat_MPIDense   *A = (Mat_MPIDense*)inA->data;
943   Mat_SeqDense   *a = (Mat_SeqDense*)A->A->data;
944   PetscScalar    oalpha = alpha;
945   PetscBLASInt   one = 1,nz = (PetscBLASInt)inA->rmap.n*inA->cmap.N;
946   PetscErrorCode ierr;
947 
948   PetscFunctionBegin;
949   BLASscal_(&nz,&oalpha,a->v,&one);
950   ierr = PetscLogFlops(nz);CHKERRQ(ierr);
951   PetscFunctionReturn(0);
952 }
953 
954 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
955 
956 #undef __FUNCT__
957 #define __FUNCT__ "MatSetUpPreallocation_MPIDense"
958 PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A)
959 {
960   PetscErrorCode ierr;
961 
962   PetscFunctionBegin;
963   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
964   PetscFunctionReturn(0);
965 }
966 
967 #undef __FUNCT__
968 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIDense"
969 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
970 {
971   PetscErrorCode ierr;
972   PetscInt       m=A->rmap.n,n=B->cmap.n;
973   Mat            Cmat;
974 
975   PetscFunctionBegin;
976   if (A->cmap.n != B->rmap.n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap.n %d != B->rmap.n %d\n",A->cmap.n,B->rmap.n);
977   ierr = MatCreate(B->comm,&Cmat);CHKERRQ(ierr);
978   ierr = MatSetSizes(Cmat,m,n,A->rmap.N,B->cmap.N);CHKERRQ(ierr);
979   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
980   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
981   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
982   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
983   *C = Cmat;
984   PetscFunctionReturn(0);
985 }
986 
987 /* -------------------------------------------------------------------*/
988 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
989        MatGetRow_MPIDense,
990        MatRestoreRow_MPIDense,
991        MatMult_MPIDense,
992 /* 4*/ MatMultAdd_MPIDense,
993        MatMultTranspose_MPIDense,
994        MatMultTransposeAdd_MPIDense,
995        0,
996        0,
997        0,
998 /*10*/ 0,
999        0,
1000        0,
1001        0,
1002        MatTranspose_MPIDense,
1003 /*15*/ MatGetInfo_MPIDense,
1004        MatEqual_MPIDense,
1005        MatGetDiagonal_MPIDense,
1006        MatDiagonalScale_MPIDense,
1007        MatNorm_MPIDense,
1008 /*20*/ MatAssemblyBegin_MPIDense,
1009        MatAssemblyEnd_MPIDense,
1010        0,
1011        MatSetOption_MPIDense,
1012        MatZeroEntries_MPIDense,
1013 /*25*/ MatZeroRows_MPIDense,
1014        MatLUFactorSymbolic_MPIDense,
1015        0,
1016        MatCholeskyFactorSymbolic_MPIDense,
1017        0,
1018 /*30*/ MatSetUpPreallocation_MPIDense,
1019        0,
1020        0,
1021        MatGetArray_MPIDense,
1022        MatRestoreArray_MPIDense,
1023 /*35*/ MatDuplicate_MPIDense,
1024        0,
1025        0,
1026        0,
1027        0,
1028 /*40*/ 0,
1029        MatGetSubMatrices_MPIDense,
1030        0,
1031        MatGetValues_MPIDense,
1032        0,
1033 /*45*/ 0,
1034        MatScale_MPIDense,
1035        0,
1036        0,
1037        0,
1038 /*50*/ 0,
1039        0,
1040        0,
1041        0,
1042        0,
1043 /*55*/ 0,
1044        0,
1045        0,
1046        0,
1047        0,
1048 /*60*/ MatGetSubMatrix_MPIDense,
1049        MatDestroy_MPIDense,
1050        MatView_MPIDense,
1051        0,
1052        0,
1053 /*65*/ 0,
1054        0,
1055        0,
1056        0,
1057        0,
1058 /*70*/ 0,
1059        0,
1060        0,
1061        0,
1062        0,
1063 /*75*/ 0,
1064        0,
1065        0,
1066        0,
1067        0,
1068 /*80*/ 0,
1069        0,
1070        0,
1071        0,
1072 /*84*/ MatLoad_MPIDense,
1073        0,
1074        0,
1075        0,
1076        0,
1077        0,
1078 /*90*/ 0,
1079        MatMatMultSymbolic_MPIDense_MPIDense,
1080        0,
1081        0,
1082        0,
1083 /*95*/ 0,
1084        0,
1085        0,
1086        0};
1087 
1088 EXTERN_C_BEGIN
1089 #undef __FUNCT__
1090 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense"
1091 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1092 {
1093   Mat_MPIDense   *a;
1094   PetscErrorCode ierr;
1095 
1096   PetscFunctionBegin;
1097   mat->preallocated = PETSC_TRUE;
1098   /* Note:  For now, when data is specified above, this assumes the user correctly
1099    allocates the local dense storage space.  We should add error checking. */
1100 
1101   a    = (Mat_MPIDense*)mat->data;
1102   ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1103   ierr = MatSetSizes(a->A,mat->rmap.n,mat->cmap.N,mat->rmap.n,mat->cmap.N);CHKERRQ(ierr);
1104   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
1105   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1106   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1107   PetscFunctionReturn(0);
1108 }
1109 EXTERN_C_END
1110 
1111 /*MC
1112    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1113 
1114    Options Database Keys:
1115 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions()
1116 
1117   Level: beginner
1118 
1119 .seealso: MatCreateMPIDense
1120 M*/
1121 
1122 EXTERN_C_BEGIN
1123 #undef __FUNCT__
1124 #define __FUNCT__ "MatCreate_MPIDense"
1125 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIDense(Mat mat)
1126 {
1127   Mat_MPIDense   *a;
1128   PetscErrorCode ierr;
1129 
1130   PetscFunctionBegin;
1131   ierr              = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr);
1132   mat->data         = (void*)a;
1133   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1134   mat->factor       = 0;
1135   mat->mapping      = 0;
1136 
1137   a->factor       = 0;
1138   mat->insertmode = NOT_SET_VALUES;
1139   ierr = MPI_Comm_rank(mat->comm,&a->rank);CHKERRQ(ierr);
1140   ierr = MPI_Comm_size(mat->comm,&a->size);CHKERRQ(ierr);
1141 
1142   mat->rmap.bs = mat->cmap.bs = 1;
1143   ierr = PetscMapSetUp(&mat->rmap);CHKERRQ(ierr);
1144   ierr = PetscMapSetUp(&mat->cmap);CHKERRQ(ierr);
1145   a->nvec = mat->cmap.n;
1146 
1147   /* build cache for off array entries formed */
1148   a->donotstash = PETSC_FALSE;
1149   ierr = MatStashCreate_Private(mat->comm,1,&mat->stash);CHKERRQ(ierr);
1150 
1151   /* stuff used for matrix vector multiply */
1152   a->lvec        = 0;
1153   a->Mvctx       = 0;
1154   a->roworiented = PETSC_TRUE;
1155 
1156   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
1157                                      "MatGetDiagonalBlock_MPIDense",
1158                                      MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr);
1159   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C",
1160                                      "MatMPIDenseSetPreallocation_MPIDense",
1161                                      MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1162   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",
1163                                      "MatMatMult_MPIAIJ_MPIDense",
1164                                       MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1165   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",
1166                                      "MatMatMultSymbolic_MPIAIJ_MPIDense",
1167                                       MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1168   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",
1169                                      "MatMatMultNumeric_MPIAIJ_MPIDense",
1170                                       MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1171   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1172   PetscFunctionReturn(0);
1173 }
1174 EXTERN_C_END
1175 
1176 /*MC
1177    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1178 
1179    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1180    and MATMPIDENSE otherwise.
1181 
1182    Options Database Keys:
1183 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1184 
1185   Level: beginner
1186 
1187 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1188 M*/
1189 
1190 EXTERN_C_BEGIN
1191 #undef __FUNCT__
1192 #define __FUNCT__ "MatCreate_Dense"
1193 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_Dense(Mat A)
1194 {
1195   PetscErrorCode ierr;
1196   PetscMPIInt    size;
1197 
1198   PetscFunctionBegin;
1199   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1200   if (size == 1) {
1201     ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr);
1202   } else {
1203     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
1204   }
1205   PetscFunctionReturn(0);
1206 }
1207 EXTERN_C_END
1208 
1209 #undef __FUNCT__
1210 #define __FUNCT__ "MatMPIDenseSetPreallocation"
1211 /*@C
1212    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1213 
1214    Not collective
1215 
1216    Input Parameters:
1217 .  A - the matrix
1218 -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
1219    to control all matrix memory allocation.
1220 
1221    Notes:
1222    The dense format is fully compatible with standard Fortran 77
1223    storage by columns.
1224 
1225    The data input variable is intended primarily for Fortran programmers
1226    who wish to allocate their own matrix memory space.  Most users should
1227    set data=PETSC_NULL.
1228 
1229    Level: intermediate
1230 
1231 .keywords: matrix,dense, parallel
1232 
1233 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1234 @*/
1235 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data)
1236 {
1237   PetscErrorCode ierr,(*f)(Mat,PetscScalar *);
1238 
1239   PetscFunctionBegin;
1240   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1241   if (f) {
1242     ierr = (*f)(mat,data);CHKERRQ(ierr);
1243   }
1244   PetscFunctionReturn(0);
1245 }
1246 
1247 #undef __FUNCT__
1248 #define __FUNCT__ "MatCreateMPIDense"
1249 /*@C
1250    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
1251 
1252    Collective on MPI_Comm
1253 
1254    Input Parameters:
1255 +  comm - MPI communicator
1256 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1257 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1258 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1259 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1260 -  data - optional location of matrix data.  Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1261    to control all matrix memory allocation.
1262 
1263    Output Parameter:
1264 .  A - the matrix
1265 
1266    Notes:
1267    The dense format is fully compatible with standard Fortran 77
1268    storage by columns.
1269 
1270    The data input variable is intended primarily for Fortran programmers
1271    who wish to allocate their own matrix memory space.  Most users should
1272    set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users).
1273 
1274    The user MUST specify either the local or global matrix dimensions
1275    (possibly both).
1276 
1277    Level: intermediate
1278 
1279 .keywords: matrix,dense, parallel
1280 
1281 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1282 @*/
1283 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1284 {
1285   PetscErrorCode ierr;
1286   PetscMPIInt    size;
1287 
1288   PetscFunctionBegin;
1289   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1290   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1291   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1292   if (size > 1) {
1293     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1294     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1295   } else {
1296     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1297     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1298   }
1299   PetscFunctionReturn(0);
1300 }
1301 
1302 #undef __FUNCT__
1303 #define __FUNCT__ "MatDuplicate_MPIDense"
1304 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1305 {
1306   Mat            mat;
1307   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
1308   PetscErrorCode ierr;
1309 
1310   PetscFunctionBegin;
1311   *newmat       = 0;
1312   ierr = MatCreate(A->comm,&mat);CHKERRQ(ierr);
1313   ierr = MatSetSizes(mat,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N);CHKERRQ(ierr);
1314   ierr = MatSetType(mat,A->type_name);CHKERRQ(ierr);
1315   a                 = (Mat_MPIDense*)mat->data;
1316   ierr              = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1317   mat->factor       = A->factor;
1318   mat->assembled    = PETSC_TRUE;
1319   mat->preallocated = PETSC_TRUE;
1320 
1321   mat->rmap.rstart     = A->rmap.rstart;
1322   mat->rmap.rend       = A->rmap.rend;
1323   a->size              = oldmat->size;
1324   a->rank              = oldmat->rank;
1325   mat->insertmode      = NOT_SET_VALUES;
1326   a->nvec              = oldmat->nvec;
1327   a->donotstash        = oldmat->donotstash;
1328 
1329   ierr = PetscMemcpy(mat->rmap.range,A->rmap.range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
1330   ierr = PetscMemcpy(mat->cmap.range,A->cmap.range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
1331   ierr = MatStashCreate_Private(A->comm,1,&mat->stash);CHKERRQ(ierr);
1332 
1333   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1334   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1335   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1336   *newmat = mat;
1337   PetscFunctionReturn(0);
1338 }
1339 
1340 #include "petscsys.h"
1341 
1342 #undef __FUNCT__
1343 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile"
1344 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N, MatType type,Mat *newmat)
1345 {
1346   PetscErrorCode ierr;
1347   PetscMPIInt    rank,size;
1348   PetscInt       *rowners,i,m,nz,j;
1349   PetscScalar    *array,*vals,*vals_ptr;
1350   MPI_Status     status;
1351 
1352   PetscFunctionBegin;
1353   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1354   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1355 
1356   /* determine ownership of all rows */
1357   m          = M/size + ((M % size) > rank);
1358   ierr       = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1359   ierr       = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
1360   rowners[0] = 0;
1361   for (i=2; i<=size; i++) {
1362     rowners[i] += rowners[i-1];
1363   }
1364 
1365   ierr = MatCreate(comm,newmat);CHKERRQ(ierr);
1366   ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1367   ierr = MatSetType(*newmat,type);CHKERRQ(ierr);
1368   ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr);
1369   ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr);
1370 
1371   if (!rank) {
1372     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1373 
1374     /* read in my part of the matrix numerical values  */
1375     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1376 
1377     /* insert into matrix-by row (this is why cannot directly read into array */
1378     vals_ptr = vals;
1379     for (i=0; i<m; i++) {
1380       for (j=0; j<N; j++) {
1381         array[i + j*m] = *vals_ptr++;
1382       }
1383     }
1384 
1385     /* read in other processors and ship out */
1386     for (i=1; i<size; i++) {
1387       nz   = (rowners[i+1] - rowners[i])*N;
1388       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1389       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr);
1390     }
1391   } else {
1392     /* receive numeric values */
1393     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1394 
1395     /* receive message of values*/
1396     ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr);
1397 
1398     /* insert into matrix-by row (this is why cannot directly read into array */
1399     vals_ptr = vals;
1400     for (i=0; i<m; i++) {
1401       for (j=0; j<N; j++) {
1402         array[i + j*m] = *vals_ptr++;
1403       }
1404     }
1405   }
1406   ierr = PetscFree(rowners);CHKERRQ(ierr);
1407   ierr = PetscFree(vals);CHKERRQ(ierr);
1408   ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1409   ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1410   PetscFunctionReturn(0);
1411 }
1412 
1413 #undef __FUNCT__
1414 #define __FUNCT__ "MatLoad_MPIDense"
1415 PetscErrorCode MatLoad_MPIDense(PetscViewer viewer, MatType type,Mat *newmat)
1416 {
1417   Mat            A;
1418   PetscScalar    *vals,*svals;
1419   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1420   MPI_Status     status;
1421   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz;
1422   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1423   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1424   PetscInt       i,nz,j,rstart,rend;
1425   int            fd;
1426   PetscErrorCode ierr;
1427 
1428   PetscFunctionBegin;
1429   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1430   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1431   if (!rank) {
1432     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1433     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1434     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1435   }
1436 
1437   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1438   M = header[1]; N = header[2]; nz = header[3];
1439 
1440   /*
1441        Handle case where matrix is stored on disk as a dense matrix
1442   */
1443   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1444     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,type,newmat);CHKERRQ(ierr);
1445     PetscFunctionReturn(0);
1446   }
1447 
1448   /* determine ownership of all rows */
1449   m          = M/size + ((M % size) > rank);
1450   ierr       = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1451   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1452   rowners[0] = 0;
1453   for (i=2; i<=size; i++) {
1454     rowners[i] += rowners[i-1];
1455   }
1456   rstart = rowners[rank];
1457   rend   = rowners[rank+1];
1458 
1459   /* distribute row lengths to all processors */
1460   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&ourlens);CHKERRQ(ierr);
1461   offlens = ourlens + (rend-rstart);
1462   if (!rank) {
1463     ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
1464     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1465     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
1466     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1467     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1468     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1469   } else {
1470     ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1471   }
1472 
1473   if (!rank) {
1474     /* calculate the number of nonzeros on each processor */
1475     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
1476     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1477     for (i=0; i<size; i++) {
1478       for (j=rowners[i]; j< rowners[i+1]; j++) {
1479         procsnz[i] += rowlengths[j];
1480       }
1481     }
1482     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1483 
1484     /* determine max buffer needed and allocate it */
1485     maxnz = 0;
1486     for (i=0; i<size; i++) {
1487       maxnz = PetscMax(maxnz,procsnz[i]);
1488     }
1489     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1490 
1491     /* read in my part of the matrix column indices  */
1492     nz = procsnz[0];
1493     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1494     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1495 
1496     /* read in every one elses and ship off */
1497     for (i=1; i<size; i++) {
1498       nz   = procsnz[i];
1499       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1500       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1501     }
1502     ierr = PetscFree(cols);CHKERRQ(ierr);
1503   } else {
1504     /* determine buffer space needed for message */
1505     nz = 0;
1506     for (i=0; i<m; i++) {
1507       nz += ourlens[i];
1508     }
1509     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1510 
1511     /* receive message of column indices*/
1512     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1513     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1514     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1515   }
1516 
1517   /* loop over local rows, determining number of off diagonal entries */
1518   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
1519   jj = 0;
1520   for (i=0; i<m; i++) {
1521     for (j=0; j<ourlens[i]; j++) {
1522       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1523       jj++;
1524     }
1525   }
1526 
1527   /* create our matrix */
1528   for (i=0; i<m; i++) {
1529     ourlens[i] -= offlens[i];
1530   }
1531   ierr = MatCreate(comm,newmat);CHKERRQ(ierr);
1532   ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1533   ierr = MatSetType(*newmat,type);CHKERRQ(ierr);
1534   ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr);
1535   A = *newmat;
1536   for (i=0; i<m; i++) {
1537     ourlens[i] += offlens[i];
1538   }
1539 
1540   if (!rank) {
1541     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1542 
1543     /* read in my part of the matrix numerical values  */
1544     nz = procsnz[0];
1545     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1546 
1547     /* insert into matrix */
1548     jj      = rstart;
1549     smycols = mycols;
1550     svals   = vals;
1551     for (i=0; i<m; i++) {
1552       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1553       smycols += ourlens[i];
1554       svals   += ourlens[i];
1555       jj++;
1556     }
1557 
1558     /* read in other processors and ship out */
1559     for (i=1; i<size; i++) {
1560       nz   = procsnz[i];
1561       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1562       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1563     }
1564     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1565   } else {
1566     /* receive numeric values */
1567     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1568 
1569     /* receive message of values*/
1570     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1571     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1572     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1573 
1574     /* insert into matrix */
1575     jj      = rstart;
1576     smycols = mycols;
1577     svals   = vals;
1578     for (i=0; i<m; i++) {
1579       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1580       smycols += ourlens[i];
1581       svals   += ourlens[i];
1582       jj++;
1583     }
1584   }
1585   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1586   ierr = PetscFree(vals);CHKERRQ(ierr);
1587   ierr = PetscFree(mycols);CHKERRQ(ierr);
1588   ierr = PetscFree(rowners);CHKERRQ(ierr);
1589 
1590   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1591   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1592   PetscFunctionReturn(0);
1593 }
1594 
1595 #undef __FUNCT__
1596 #define __FUNCT__ "MatEqual_MPIDense"
1597 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscTruth *flag)
1598 {
1599   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
1600   Mat            a,b;
1601   PetscTruth     flg;
1602   PetscErrorCode ierr;
1603 
1604   PetscFunctionBegin;
1605   a = matA->A;
1606   b = matB->A;
1607   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
1608   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1609   PetscFunctionReturn(0);
1610 }
1611 
1612 
1613 
1614