xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 910ba992ec3971c61c49352ecba92a2a32ff963f)
1 #define PETSCMAT_DLL
2 
3 /*
4    Basic functions for basic parallel dense matrices.
5 */
6 
7 #include "src/mat/impls/dense/mpi/mpidense.h"    /*I   "petscmat.h"  I*/
8 
9 #undef __FUNCT__
10 #define __FUNCT__ "MatLUFactorSymbolic_MPIDense"
11 PetscErrorCode MatLUFactorSymbolic_MPIDense(Mat A,IS row,IS col,MatFactorInfo *info,Mat *fact)
12 {
13   PetscErrorCode ierr;
14 
15   PetscFunctionBegin;
16   ierr = MatDuplicate(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr);
17   PetscFunctionReturn(0);
18 }
19 
20 #undef __FUNCT__
21 #define __FUNCT__ "MatCholeskyFactorSymbolic_MPIDense"
22 PetscErrorCode MatCholeskyFactorSymbolic_MPIDense(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
23 {
24   PetscErrorCode ierr;
25 
26   PetscFunctionBegin;
27   ierr = MatDuplicate(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr);
28   PetscFunctionReturn(0);
29 }
30 
31 #undef __FUNCT__
32 #define __FUNCT__ "MatDenseGetLocalMatrix"
33 /*@
34 
35       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
36               matrix that represents the operator. For sequential matrices it returns itself.
37 
38     Input Parameter:
39 .      A - the Seq or MPI dense matrix
40 
41     Output Parameter:
42 .      B - the inner matrix
43 
44     Level: intermediate
45 
46 @*/
47 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
48 {
49   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
50   PetscErrorCode ierr;
51   PetscTruth     flg;
52   PetscMPIInt    size;
53 
54   PetscFunctionBegin;
55   ierr = PetscTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
56   if (!flg) {  /* this check sucks! */
57     ierr = PetscTypeCompare((PetscObject)A,MATDENSE,&flg);CHKERRQ(ierr);
58     if (flg) {
59       ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
60       if (size == 1) flg = PETSC_FALSE;
61     }
62   }
63   if (flg) {
64     *B = mat->A;
65   } else {
66     *B = A;
67   }
68   PetscFunctionReturn(0);
69 }
70 
71 #undef __FUNCT__
72 #define __FUNCT__ "MatGetRow_MPIDense"
73 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
74 {
75   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
76   PetscErrorCode ierr;
77   PetscInt       lrow,rstart = A->rmap.rstart,rend = A->rmap.rend;
78 
79   PetscFunctionBegin;
80   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows")
81   lrow = row - rstart;
82   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr);
83   PetscFunctionReturn(0);
84 }
85 
86 #undef __FUNCT__
87 #define __FUNCT__ "MatRestoreRow_MPIDense"
88 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
89 {
90   PetscErrorCode ierr;
91 
92   PetscFunctionBegin;
93   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
94   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
95   PetscFunctionReturn(0);
96 }
97 
98 EXTERN_C_BEGIN
99 #undef __FUNCT__
100 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense"
101 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B)
102 {
103   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
104   PetscErrorCode ierr;
105   PetscInt       m = A->rmap.n,rstart = A->rmap.rstart;
106   PetscScalar    *array;
107   MPI_Comm       comm;
108 
109   PetscFunctionBegin;
110   if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported.");
111 
112   /* The reuse aspect is not implemented efficiently */
113   if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);}
114 
115   ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
116   ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr);
117   ierr = MatCreate(comm,B);CHKERRQ(ierr);
118   ierr = MatSetSizes(*B,m,m,m,m);CHKERRQ(ierr);
119   ierr = MatSetType(*B,mdn->A->type_name);CHKERRQ(ierr);
120   ierr = MatSeqDenseSetPreallocation(*B,array+m*rstart);CHKERRQ(ierr);
121   ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr);
122   ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
123   ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
124 
125   *iscopy = PETSC_TRUE;
126   PetscFunctionReturn(0);
127 }
128 EXTERN_C_END
129 
130 #undef __FUNCT__
131 #define __FUNCT__ "MatSetValues_MPIDense"
132 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
133 {
134   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
135   PetscErrorCode ierr;
136   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend,row;
137   PetscTruth     roworiented = A->roworiented;
138 
139   PetscFunctionBegin;
140   for (i=0; i<m; i++) {
141     if (idxm[i] < 0) continue;
142     if (idxm[i] >= mat->rmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
143     if (idxm[i] >= rstart && idxm[i] < rend) {
144       row = idxm[i] - rstart;
145       if (roworiented) {
146         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
147       } else {
148         for (j=0; j<n; j++) {
149           if (idxn[j] < 0) continue;
150           if (idxn[j] >= mat->cmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
151           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
152         }
153       }
154     } else {
155       if (!A->donotstash) {
156         if (roworiented) {
157           ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr);
158         } else {
159           ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr);
160         }
161       }
162     }
163   }
164   PetscFunctionReturn(0);
165 }
166 
167 #undef __FUNCT__
168 #define __FUNCT__ "MatGetValues_MPIDense"
169 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
170 {
171   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
172   PetscErrorCode ierr;
173   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend,row;
174 
175   PetscFunctionBegin;
176   for (i=0; i<m; i++) {
177     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
178     if (idxm[i] >= mat->rmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
179     if (idxm[i] >= rstart && idxm[i] < rend) {
180       row = idxm[i] - rstart;
181       for (j=0; j<n; j++) {
182         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
183         if (idxn[j] >= mat->cmap.N) {
184           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
185         }
186         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
187       }
188     } else {
189       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
190     }
191   }
192   PetscFunctionReturn(0);
193 }
194 
195 #undef __FUNCT__
196 #define __FUNCT__ "MatGetArray_MPIDense"
197 PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[])
198 {
199   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
200   PetscErrorCode ierr;
201 
202   PetscFunctionBegin;
203   ierr = MatGetArray(a->A,array);CHKERRQ(ierr);
204   PetscFunctionReturn(0);
205 }
206 
207 #undef __FUNCT__
208 #define __FUNCT__ "MatGetSubMatrix_MPIDense"
209 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B)
210 {
211   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data,*newmatd;
212   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
213   PetscErrorCode ierr;
214   PetscInt       i,j,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols;
215   PetscScalar    *av,*bv,*v = lmat->v;
216   Mat            newmat;
217 
218   PetscFunctionBegin;
219   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
220   ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr);
221   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
222   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
223 
224   /* No parallel redistribution currently supported! Should really check each index set
225      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
226      original matrix! */
227 
228   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
229   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
230 
231   /* Check submatrix call */
232   if (scall == MAT_REUSE_MATRIX) {
233     /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
234     /* Really need to test rows and column sizes! */
235     newmat = *B;
236   } else {
237     /* Create and fill new matrix */
238     ierr = MatCreate(A->comm,&newmat);CHKERRQ(ierr);
239     ierr = MatSetSizes(newmat,nrows,cs,PETSC_DECIDE,ncols);CHKERRQ(ierr);
240     ierr = MatSetType(newmat,A->type_name);CHKERRQ(ierr);
241     ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr);
242   }
243 
244   /* Now extract the data pointers and do the copy, column at a time */
245   newmatd = (Mat_MPIDense*)newmat->data;
246   bv      = ((Mat_SeqDense *)newmatd->A->data)->v;
247 
248   for (i=0; i<ncols; i++) {
249     av = v + nlrows*icol[i];
250     for (j=0; j<nrows; j++) {
251       *bv++ = av[irow[j] - rstart];
252     }
253   }
254 
255   /* Assemble the matrices so that the correct flags are set */
256   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
257   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
258 
259   /* Free work space */
260   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
261   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
262   *B = newmat;
263   PetscFunctionReturn(0);
264 }
265 
266 #undef __FUNCT__
267 #define __FUNCT__ "MatRestoreArray_MPIDense"
268 PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[])
269 {
270   PetscFunctionBegin;
271   PetscFunctionReturn(0);
272 }
273 
274 #undef __FUNCT__
275 #define __FUNCT__ "MatAssemblyBegin_MPIDense"
276 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
277 {
278   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
279   MPI_Comm       comm = mat->comm;
280   PetscErrorCode ierr;
281   PetscInt       nstash,reallocs;
282   InsertMode     addv;
283 
284   PetscFunctionBegin;
285   /* make sure all processors are either in INSERTMODE or ADDMODE */
286   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
287   if (addv == (ADD_VALUES|INSERT_VALUES)) {
288     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
289   }
290   mat->insertmode = addv; /* in case this processor had no cache */
291 
292   ierr = MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);CHKERRQ(ierr);
293   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
294   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
295   PetscFunctionReturn(0);
296 }
297 
298 #undef __FUNCT__
299 #define __FUNCT__ "MatAssemblyEnd_MPIDense"
300 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
301 {
302   Mat_MPIDense    *mdn=(Mat_MPIDense*)mat->data;
303   PetscErrorCode  ierr;
304   PetscInt        i,*row,*col,flg,j,rstart,ncols;
305   PetscMPIInt     n;
306   PetscScalar     *val;
307   InsertMode      addv=mat->insertmode;
308 
309   PetscFunctionBegin;
310   /*  wait on receives */
311   while (1) {
312     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
313     if (!flg) break;
314 
315     for (i=0; i<n;) {
316       /* Now identify the consecutive vals belonging to the same row */
317       for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
318       if (j < n) ncols = j-i;
319       else       ncols = n-i;
320       /* Now assemble all these values with a single function call */
321       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
322       i = j;
323     }
324   }
325   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
326 
327   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
328   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
329 
330   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
331     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
332   }
333   PetscFunctionReturn(0);
334 }
335 
336 #undef __FUNCT__
337 #define __FUNCT__ "MatZeroEntries_MPIDense"
338 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
339 {
340   PetscErrorCode ierr;
341   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
342 
343   PetscFunctionBegin;
344   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
345   PetscFunctionReturn(0);
346 }
347 
348 /* the code does not do the diagonal entries correctly unless the
349    matrix is square and the column and row owerships are identical.
350    This is a BUG. The only way to fix it seems to be to access
351    mdn->A and mdn->B directly and not through the MatZeroRows()
352    routine.
353 */
354 #undef __FUNCT__
355 #define __FUNCT__ "MatZeroRows_MPIDense"
356 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
357 {
358   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
359   PetscErrorCode ierr;
360   PetscInt       i,*owners = A->rmap.range;
361   PetscInt       *nprocs,j,idx,nsends;
362   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
363   PetscInt       *rvalues,tag = A->tag,count,base,slen,*source;
364   PetscInt       *lens,*lrows,*values;
365   PetscMPIInt    n,imdex,rank = l->rank,size = l->size;
366   MPI_Comm       comm = A->comm;
367   MPI_Request    *send_waits,*recv_waits;
368   MPI_Status     recv_status,*send_status;
369   PetscTruth     found;
370 
371   PetscFunctionBegin;
372   /*  first count number of contributors to each processor */
373   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
374   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
375   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
376   for (i=0; i<N; i++) {
377     idx = rows[i];
378     found = PETSC_FALSE;
379     for (j=0; j<size; j++) {
380       if (idx >= owners[j] && idx < owners[j+1]) {
381         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
382       }
383     }
384     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
385   }
386   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
387 
388   /* inform other processors of number of messages and max length*/
389   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
390 
391   /* post receives:   */
392   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
393   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
394   for (i=0; i<nrecvs; i++) {
395     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
396   }
397 
398   /* do sends:
399       1) starts[i] gives the starting index in svalues for stuff going to
400          the ith processor
401   */
402   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
403   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
404   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
405   starts[0]  = 0;
406   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
407   for (i=0; i<N; i++) {
408     svalues[starts[owner[i]]++] = rows[i];
409   }
410 
411   starts[0] = 0;
412   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
413   count = 0;
414   for (i=0; i<size; i++) {
415     if (nprocs[2*i+1]) {
416       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
417     }
418   }
419   ierr = PetscFree(starts);CHKERRQ(ierr);
420 
421   base = owners[rank];
422 
423   /*  wait on receives */
424   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
425   source = lens + nrecvs;
426   count  = nrecvs; slen = 0;
427   while (count) {
428     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
429     /* unpack receives into our local space */
430     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
431     source[imdex]  = recv_status.MPI_SOURCE;
432     lens[imdex]    = n;
433     slen += n;
434     count--;
435   }
436   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
437 
438   /* move the data into the send scatter */
439   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
440   count = 0;
441   for (i=0; i<nrecvs; i++) {
442     values = rvalues + i*nmax;
443     for (j=0; j<lens[i]; j++) {
444       lrows[count++] = values[j] - base;
445     }
446   }
447   ierr = PetscFree(rvalues);CHKERRQ(ierr);
448   ierr = PetscFree(lens);CHKERRQ(ierr);
449   ierr = PetscFree(owner);CHKERRQ(ierr);
450   ierr = PetscFree(nprocs);CHKERRQ(ierr);
451 
452   /* actually zap the local rows */
453   ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
454   ierr = PetscFree(lrows);CHKERRQ(ierr);
455 
456   /* wait on sends */
457   if (nsends) {
458     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
459     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
460     ierr = PetscFree(send_status);CHKERRQ(ierr);
461   }
462   ierr = PetscFree(send_waits);CHKERRQ(ierr);
463   ierr = PetscFree(svalues);CHKERRQ(ierr);
464 
465   PetscFunctionReturn(0);
466 }
467 
468 #undef __FUNCT__
469 #define __FUNCT__ "MatMult_MPIDense"
470 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
471 {
472   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
473   PetscErrorCode ierr;
474 
475   PetscFunctionBegin;
476   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
477   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
478   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
479   PetscFunctionReturn(0);
480 }
481 
482 #undef __FUNCT__
483 #define __FUNCT__ "MatMultAdd_MPIDense"
484 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
485 {
486   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
487   PetscErrorCode ierr;
488 
489   PetscFunctionBegin;
490   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
491   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
492   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
493   PetscFunctionReturn(0);
494 }
495 
496 #undef __FUNCT__
497 #define __FUNCT__ "MatMultTranspose_MPIDense"
498 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
499 {
500   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
501   PetscErrorCode ierr;
502   PetscScalar    zero = 0.0;
503 
504   PetscFunctionBegin;
505   ierr = VecSet(yy,zero);CHKERRQ(ierr);
506   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
507   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
508   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
509   PetscFunctionReturn(0);
510 }
511 
512 #undef __FUNCT__
513 #define __FUNCT__ "MatMultTransposeAdd_MPIDense"
514 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
515 {
516   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
517   PetscErrorCode ierr;
518 
519   PetscFunctionBegin;
520   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
521   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
522   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
523   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
524   PetscFunctionReturn(0);
525 }
526 
527 #undef __FUNCT__
528 #define __FUNCT__ "MatGetDiagonal_MPIDense"
529 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
530 {
531   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
532   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
533   PetscErrorCode ierr;
534   PetscInt       len,i,n,m = A->rmap.n,radd;
535   PetscScalar    *x,zero = 0.0;
536 
537   PetscFunctionBegin;
538   ierr = VecSet(v,zero);CHKERRQ(ierr);
539   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
540   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
541   if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
542   len  = PetscMin(a->A->rmap.n,a->A->cmap.n);
543   radd = A->rmap.rstart*m;
544   for (i=0; i<len; i++) {
545     x[i] = aloc->v[radd + i*m + i];
546   }
547   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
548   PetscFunctionReturn(0);
549 }
550 
551 #undef __FUNCT__
552 #define __FUNCT__ "MatDestroy_MPIDense"
553 PetscErrorCode MatDestroy_MPIDense(Mat mat)
554 {
555   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
556   PetscErrorCode ierr;
557 
558   PetscFunctionBegin;
559 
560 #if defined(PETSC_USE_LOG)
561   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N);
562 #endif
563   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
564   ierr = MatDestroy(mdn->A);CHKERRQ(ierr);
565   if (mdn->lvec)   {ierr = VecDestroy(mdn->lvec);CHKERRQ(ierr);}
566   if (mdn->Mvctx)  {ierr = VecScatterDestroy(mdn->Mvctx);CHKERRQ(ierr);}
567   if (mdn->factor) {
568     ierr = PetscFree(mdn->factor->temp);CHKERRQ(ierr);
569     ierr = PetscFree(mdn->factor->tag);CHKERRQ(ierr);
570     ierr = PetscFree(mdn->factor->pivots);CHKERRQ(ierr);
571     ierr = PetscFree(mdn->factor);CHKERRQ(ierr);
572   }
573   ierr = PetscFree(mdn);CHKERRQ(ierr);
574   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
575   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
576   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
577   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
578   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
579   PetscFunctionReturn(0);
580 }
581 
582 #undef __FUNCT__
583 #define __FUNCT__ "MatView_MPIDense_Binary"
584 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
585 {
586   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
587   PetscErrorCode ierr;
588 
589   PetscFunctionBegin;
590   if (mdn->size == 1) {
591     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
592   }
593   else SETERRQ(PETSC_ERR_SUP,"Only uniprocessor output supported");
594   PetscFunctionReturn(0);
595 }
596 
597 #undef __FUNCT__
598 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket"
599 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
600 {
601   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
602   PetscErrorCode    ierr;
603   PetscMPIInt       size = mdn->size,rank = mdn->rank;
604   PetscViewerType   vtype;
605   PetscTruth        iascii,isdraw;
606   PetscViewer       sviewer;
607   PetscViewerFormat format;
608 
609   PetscFunctionBegin;
610   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
611   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
612   if (iascii) {
613     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
614     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
615     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
616       MatInfo info;
617       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
618       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap.n,
619                    (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
620       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
621       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
622       PetscFunctionReturn(0);
623     } else if (format == PETSC_VIEWER_ASCII_INFO) {
624       PetscFunctionReturn(0);
625     }
626   } else if (isdraw) {
627     PetscDraw  draw;
628     PetscTruth isnull;
629 
630     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
631     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
632     if (isnull) PetscFunctionReturn(0);
633   }
634 
635   if (size == 1) {
636     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
637   } else {
638     /* assemble the entire matrix onto first processor. */
639     Mat         A;
640     PetscInt    M = mat->rmap.N,N = mat->cmap.N,m,row,i,nz;
641     PetscInt    *cols;
642     PetscScalar *vals;
643 
644     ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr);
645     if (!rank) {
646       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
647     } else {
648       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
649     }
650     /* Since this is a temporary matrix, MATMPIDENSE instead of A->type_name here is probably acceptable. */
651     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
652     ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL);
653     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
654 
655     /* Copy the matrix ... This isn't the most efficient means,
656        but it's quick for now */
657     A->insertmode = INSERT_VALUES;
658     row = mat->rmap.rstart; m = mdn->A->rmap.n;
659     for (i=0; i<m; i++) {
660       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
661       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
662       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
663       row++;
664     }
665 
666     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
667     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
668     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
669     if (!rank) {
670       ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
671     }
672     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
673     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
674     ierr = MatDestroy(A);CHKERRQ(ierr);
675   }
676   PetscFunctionReturn(0);
677 }
678 
679 #undef __FUNCT__
680 #define __FUNCT__ "MatView_MPIDense"
681 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
682 {
683   PetscErrorCode ierr;
684   PetscTruth     iascii,isbinary,isdraw,issocket;
685 
686   PetscFunctionBegin;
687 
688   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
689   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
690   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
691   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
692 
693   if (iascii || issocket || isdraw) {
694     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
695   } else if (isbinary) {
696     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
697   } else {
698     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name);
699   }
700   PetscFunctionReturn(0);
701 }
702 
703 #undef __FUNCT__
704 #define __FUNCT__ "MatGetInfo_MPIDense"
705 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
706 {
707   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
708   Mat            mdn = mat->A;
709   PetscErrorCode ierr;
710   PetscReal      isend[5],irecv[5];
711 
712   PetscFunctionBegin;
713   info->rows_global    = (double)A->rmap.N;
714   info->columns_global = (double)A->cmap.N;
715   info->rows_local     = (double)A->rmap.n;
716   info->columns_local  = (double)A->cmap.N;
717   info->block_size     = 1.0;
718   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
719   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
720   isend[3] = info->memory;  isend[4] = info->mallocs;
721   if (flag == MAT_LOCAL) {
722     info->nz_used      = isend[0];
723     info->nz_allocated = isend[1];
724     info->nz_unneeded  = isend[2];
725     info->memory       = isend[3];
726     info->mallocs      = isend[4];
727   } else if (flag == MAT_GLOBAL_MAX) {
728     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr);
729     info->nz_used      = irecv[0];
730     info->nz_allocated = irecv[1];
731     info->nz_unneeded  = irecv[2];
732     info->memory       = irecv[3];
733     info->mallocs      = irecv[4];
734   } else if (flag == MAT_GLOBAL_SUM) {
735     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr);
736     info->nz_used      = irecv[0];
737     info->nz_allocated = irecv[1];
738     info->nz_unneeded  = irecv[2];
739     info->memory       = irecv[3];
740     info->mallocs      = irecv[4];
741   }
742   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
743   info->fill_ratio_needed = 0;
744   info->factor_mallocs    = 0;
745   PetscFunctionReturn(0);
746 }
747 
748 #undef __FUNCT__
749 #define __FUNCT__ "MatSetOption_MPIDense"
750 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op)
751 {
752   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
753   PetscErrorCode ierr;
754 
755   PetscFunctionBegin;
756   switch (op) {
757   case MAT_NO_NEW_NONZERO_LOCATIONS:
758   case MAT_YES_NEW_NONZERO_LOCATIONS:
759   case MAT_NEW_NONZERO_LOCATION_ERR:
760   case MAT_NEW_NONZERO_ALLOCATION_ERR:
761   case MAT_COLUMNS_SORTED:
762   case MAT_COLUMNS_UNSORTED:
763     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
764     break;
765   case MAT_ROW_ORIENTED:
766     a->roworiented = PETSC_TRUE;
767     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
768     break;
769   case MAT_ROWS_SORTED:
770   case MAT_ROWS_UNSORTED:
771   case MAT_YES_NEW_DIAGONALS:
772   case MAT_USE_HASH_TABLE:
773     ierr = PetscInfo1(A,"Option %d ignored\n",op);CHKERRQ(ierr);
774     break;
775   case MAT_COLUMN_ORIENTED:
776     a->roworiented = PETSC_FALSE;
777     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
778     break;
779   case MAT_IGNORE_OFF_PROC_ENTRIES:
780     a->donotstash = PETSC_TRUE;
781     break;
782   case MAT_NO_NEW_DIAGONALS:
783     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
784   case MAT_SYMMETRIC:
785   case MAT_STRUCTURALLY_SYMMETRIC:
786   case MAT_NOT_SYMMETRIC:
787   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
788   case MAT_HERMITIAN:
789   case MAT_NOT_HERMITIAN:
790   case MAT_SYMMETRY_ETERNAL:
791   case MAT_NOT_SYMMETRY_ETERNAL:
792     break;
793   default:
794     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
795   }
796   PetscFunctionReturn(0);
797 }
798 
799 
800 #undef __FUNCT__
801 #define __FUNCT__ "MatDiagonalScale_MPIDense"
802 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
803 {
804   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
805   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
806   PetscScalar    *l,*r,x,*v;
807   PetscErrorCode ierr;
808   PetscInt       i,j,s2a,s3a,s2,s3,m=mdn->A->rmap.n,n=mdn->A->cmap.n;
809 
810   PetscFunctionBegin;
811   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
812   if (ll) {
813     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
814     if (s2a != s2) SETERRQ2(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
815     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
816     for (i=0; i<m; i++) {
817       x = l[i];
818       v = mat->v + i;
819       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
820     }
821     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
822     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
823   }
824   if (rr) {
825     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
826     if (s3a != s3) SETERRQ2(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
827     ierr = VecScatterBegin(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
828     ierr = VecScatterEnd(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
829     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
830     for (i=0; i<n; i++) {
831       x = r[i];
832       v = mat->v + i*m;
833       for (j=0; j<m; j++) { (*v++) *= x;}
834     }
835     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
836     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
837   }
838   PetscFunctionReturn(0);
839 }
840 
841 #undef __FUNCT__
842 #define __FUNCT__ "MatNorm_MPIDense"
843 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
844 {
845   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
846   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
847   PetscErrorCode ierr;
848   PetscInt       i,j;
849   PetscReal      sum = 0.0;
850   PetscScalar    *v = mat->v;
851 
852   PetscFunctionBegin;
853   if (mdn->size == 1) {
854     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
855   } else {
856     if (type == NORM_FROBENIUS) {
857       for (i=0; i<mdn->A->cmap.n*mdn->A->rmap.n; i++) {
858 #if defined(PETSC_USE_COMPLEX)
859         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
860 #else
861         sum += (*v)*(*v); v++;
862 #endif
863       }
864       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr);
865       *nrm = sqrt(*nrm);
866       ierr = PetscLogFlops(2*mdn->A->cmap.n*mdn->A->rmap.n);CHKERRQ(ierr);
867     } else if (type == NORM_1) {
868       PetscReal *tmp,*tmp2;
869       ierr = PetscMalloc(2*A->cmap.N*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
870       tmp2 = tmp + A->cmap.N;
871       ierr = PetscMemzero(tmp,2*A->cmap.N*sizeof(PetscReal));CHKERRQ(ierr);
872       *nrm = 0.0;
873       v = mat->v;
874       for (j=0; j<mdn->A->cmap.n; j++) {
875         for (i=0; i<mdn->A->rmap.n; i++) {
876           tmp[j] += PetscAbsScalar(*v);  v++;
877         }
878       }
879       ierr = MPI_Allreduce(tmp,tmp2,A->cmap.N,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr);
880       for (j=0; j<A->cmap.N; j++) {
881         if (tmp2[j] > *nrm) *nrm = tmp2[j];
882       }
883       ierr = PetscFree(tmp);CHKERRQ(ierr);
884       ierr = PetscLogFlops(A->cmap.n*A->rmap.n);CHKERRQ(ierr);
885     } else if (type == NORM_INFINITY) { /* max row norm */
886       PetscReal ntemp;
887       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
888       ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr);
889     } else {
890       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
891     }
892   }
893   PetscFunctionReturn(0);
894 }
895 
896 #undef __FUNCT__
897 #define __FUNCT__ "MatTranspose_MPIDense"
898 PetscErrorCode MatTranspose_MPIDense(Mat A,Mat *matout)
899 {
900   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
901   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
902   Mat            B;
903   PetscInt       M = A->rmap.N,N = A->cmap.N,m,n,*rwork,rstart = A->rmap.rstart;
904   PetscErrorCode ierr;
905   PetscInt       j,i;
906   PetscScalar    *v;
907 
908   PetscFunctionBegin;
909   if (!matout && M != N) {
910     SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place");
911   }
912   ierr = MatCreate(A->comm,&B);CHKERRQ(ierr);
913   ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,N,M);CHKERRQ(ierr);
914   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
915   ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr);
916 
917   m = a->A->rmap.n; n = a->A->cmap.n; v = Aloc->v;
918   ierr = PetscMalloc(n*sizeof(PetscInt),&rwork);CHKERRQ(ierr);
919   for (j=0; j<n; j++) {
920     for (i=0; i<m; i++) rwork[i] = rstart + i;
921     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
922     v   += m;
923   }
924   ierr = PetscFree(rwork);CHKERRQ(ierr);
925   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
926   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
927   if (matout) {
928     *matout = B;
929   } else {
930     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
931   }
932   PetscFunctionReturn(0);
933 }
934 
935 #include "petscblaslapack.h"
936 #undef __FUNCT__
937 #define __FUNCT__ "MatScale_MPIDense"
938 PetscErrorCode MatScale_MPIDense(Mat inA,PetscScalar alpha)
939 {
940   Mat_MPIDense   *A = (Mat_MPIDense*)inA->data;
941   Mat_SeqDense   *a = (Mat_SeqDense*)A->A->data;
942   PetscScalar    oalpha = alpha;
943   PetscBLASInt   one = 1,nz = (PetscBLASInt)inA->rmap.n*inA->cmap.N;
944   PetscErrorCode ierr;
945 
946   PetscFunctionBegin;
947   BLASscal_(&nz,&oalpha,a->v,&one);
948   ierr = PetscLogFlops(nz);CHKERRQ(ierr);
949   PetscFunctionReturn(0);
950 }
951 
952 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
953 
954 #undef __FUNCT__
955 #define __FUNCT__ "MatSetUpPreallocation_MPIDense"
956 PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A)
957 {
958   PetscErrorCode ierr;
959 
960   PetscFunctionBegin;
961   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
962   PetscFunctionReturn(0);
963 }
964 
965 #undef __FUNCT__
966 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIDense"
967 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
968 {
969   PetscErrorCode ierr;
970   PetscInt       m=A->rmap.n,n=B->cmap.n;
971   Mat            Cmat;
972 
973   PetscFunctionBegin;
974   if (A->cmap.n != B->rmap.n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap.n %d != B->rmap.n %d\n",A->cmap.n,B->rmap.n);
975   ierr = MatCreate(B->comm,&Cmat);CHKERRQ(ierr);
976   ierr = MatSetSizes(Cmat,m,n,A->rmap.N,B->cmap.N);CHKERRQ(ierr);
977   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
978   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
979   Cmat->assembled = PETSC_TRUE;
980   *C = Cmat;
981   PetscFunctionReturn(0);
982 }
983 
984 /* -------------------------------------------------------------------*/
985 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
986        MatGetRow_MPIDense,
987        MatRestoreRow_MPIDense,
988        MatMult_MPIDense,
989 /* 4*/ MatMultAdd_MPIDense,
990        MatMultTranspose_MPIDense,
991        MatMultTransposeAdd_MPIDense,
992        0,
993        0,
994        0,
995 /*10*/ 0,
996        0,
997        0,
998        0,
999        MatTranspose_MPIDense,
1000 /*15*/ MatGetInfo_MPIDense,
1001        MatEqual_MPIDense,
1002        MatGetDiagonal_MPIDense,
1003        MatDiagonalScale_MPIDense,
1004        MatNorm_MPIDense,
1005 /*20*/ MatAssemblyBegin_MPIDense,
1006        MatAssemblyEnd_MPIDense,
1007        0,
1008        MatSetOption_MPIDense,
1009        MatZeroEntries_MPIDense,
1010 /*25*/ MatZeroRows_MPIDense,
1011        MatLUFactorSymbolic_MPIDense,
1012        0,
1013        MatCholeskyFactorSymbolic_MPIDense,
1014        0,
1015 /*30*/ MatSetUpPreallocation_MPIDense,
1016        0,
1017        0,
1018        MatGetArray_MPIDense,
1019        MatRestoreArray_MPIDense,
1020 /*35*/ MatDuplicate_MPIDense,
1021        0,
1022        0,
1023        0,
1024        0,
1025 /*40*/ 0,
1026        MatGetSubMatrices_MPIDense,
1027        0,
1028        MatGetValues_MPIDense,
1029        0,
1030 /*45*/ 0,
1031        MatScale_MPIDense,
1032        0,
1033        0,
1034        0,
1035 /*50*/ 0,
1036        0,
1037        0,
1038        0,
1039        0,
1040 /*55*/ 0,
1041        0,
1042        0,
1043        0,
1044        0,
1045 /*60*/ MatGetSubMatrix_MPIDense,
1046        MatDestroy_MPIDense,
1047        MatView_MPIDense,
1048        0,
1049        0,
1050 /*65*/ 0,
1051        0,
1052        0,
1053        0,
1054        0,
1055 /*70*/ 0,
1056        0,
1057        0,
1058        0,
1059        0,
1060 /*75*/ 0,
1061        0,
1062        0,
1063        0,
1064        0,
1065 /*80*/ 0,
1066        0,
1067        0,
1068        0,
1069 /*84*/ MatLoad_MPIDense,
1070        0,
1071        0,
1072        0,
1073        0,
1074        0,
1075 /*90*/ 0,
1076        MatMatMultSymbolic_MPIDense_MPIDense,
1077        0,
1078        0,
1079        0,
1080 /*95*/ 0,
1081        0,
1082        0,
1083        0};
1084 
1085 EXTERN_C_BEGIN
1086 #undef __FUNCT__
1087 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense"
1088 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1089 {
1090   Mat_MPIDense   *a;
1091   PetscErrorCode ierr;
1092 
1093   PetscFunctionBegin;
1094   mat->preallocated = PETSC_TRUE;
1095   /* Note:  For now, when data is specified above, this assumes the user correctly
1096    allocates the local dense storage space.  We should add error checking. */
1097 
1098   a    = (Mat_MPIDense*)mat->data;
1099   ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1100   ierr = MatSetSizes(a->A,mat->rmap.n,mat->cmap.N,mat->rmap.n,mat->cmap.N);CHKERRQ(ierr);
1101   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
1102   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1103   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1104   PetscFunctionReturn(0);
1105 }
1106 EXTERN_C_END
1107 
1108 /*MC
1109    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1110 
1111    Options Database Keys:
1112 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions()
1113 
1114   Level: beginner
1115 
1116 .seealso: MatCreateMPIDense
1117 M*/
1118 
1119 EXTERN_C_BEGIN
1120 #undef __FUNCT__
1121 #define __FUNCT__ "MatCreate_MPIDense"
1122 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIDense(Mat mat)
1123 {
1124   Mat_MPIDense   *a;
1125   PetscErrorCode ierr;
1126 
1127   PetscFunctionBegin;
1128   ierr              = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr);
1129   mat->data         = (void*)a;
1130   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1131   mat->factor       = 0;
1132   mat->mapping      = 0;
1133 
1134   a->factor       = 0;
1135   mat->insertmode = NOT_SET_VALUES;
1136   ierr = MPI_Comm_rank(mat->comm,&a->rank);CHKERRQ(ierr);
1137   ierr = MPI_Comm_size(mat->comm,&a->size);CHKERRQ(ierr);
1138 
1139   mat->rmap.bs = mat->cmap.bs = 1;
1140   ierr = PetscMapInitialize(mat->comm,&mat->rmap);CHKERRQ(ierr);
1141   ierr = PetscMapInitialize(mat->comm,&mat->cmap);CHKERRQ(ierr);
1142   a->nvec = mat->cmap.n;
1143 
1144   /* build cache for off array entries formed */
1145   a->donotstash = PETSC_FALSE;
1146   ierr = MatStashCreate_Private(mat->comm,1,&mat->stash);CHKERRQ(ierr);
1147 
1148   /* stuff used for matrix vector multiply */
1149   a->lvec        = 0;
1150   a->Mvctx       = 0;
1151   a->roworiented = PETSC_TRUE;
1152 
1153   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
1154                                      "MatGetDiagonalBlock_MPIDense",
1155                                      MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr);
1156   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C",
1157                                      "MatMPIDenseSetPreallocation_MPIDense",
1158                                      MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1159   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",
1160                                      "MatMatMult_MPIAIJ_MPIDense",
1161                                       MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1162   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",
1163                                      "MatMatMultSymbolic_MPIAIJ_MPIDense",
1164                                       MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1165   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",
1166                                      "MatMatMultNumeric_MPIAIJ_MPIDense",
1167                                       MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1168   PetscFunctionReturn(0);
1169 }
1170 EXTERN_C_END
1171 
1172 /*MC
1173    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1174 
1175    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1176    and MATMPIDENSE otherwise.
1177 
1178    Options Database Keys:
1179 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1180 
1181   Level: beginner
1182 
1183 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1184 M*/
1185 
1186 EXTERN_C_BEGIN
1187 #undef __FUNCT__
1188 #define __FUNCT__ "MatCreate_Dense"
1189 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_Dense(Mat A)
1190 {
1191   PetscErrorCode ierr;
1192   PetscMPIInt    size;
1193 
1194   PetscFunctionBegin;
1195   ierr = PetscObjectChangeTypeName((PetscObject)A,MATDENSE);CHKERRQ(ierr);
1196   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1197   if (size == 1) {
1198     ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr);
1199   } else {
1200     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
1201   }
1202   PetscFunctionReturn(0);
1203 }
1204 EXTERN_C_END
1205 
1206 #undef __FUNCT__
1207 #define __FUNCT__ "MatMPIDenseSetPreallocation"
1208 /*@C
1209    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1210 
1211    Not collective
1212 
1213    Input Parameters:
1214 .  A - the matrix
1215 -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
1216    to control all matrix memory allocation.
1217 
1218    Notes:
1219    The dense format is fully compatible with standard Fortran 77
1220    storage by columns.
1221 
1222    The data input variable is intended primarily for Fortran programmers
1223    who wish to allocate their own matrix memory space.  Most users should
1224    set data=PETSC_NULL.
1225 
1226    Level: intermediate
1227 
1228 .keywords: matrix,dense, parallel
1229 
1230 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1231 @*/
1232 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data)
1233 {
1234   PetscErrorCode ierr,(*f)(Mat,PetscScalar *);
1235 
1236   PetscFunctionBegin;
1237   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1238   if (f) {
1239     ierr = (*f)(mat,data);CHKERRQ(ierr);
1240   }
1241   PetscFunctionReturn(0);
1242 }
1243 
1244 #undef __FUNCT__
1245 #define __FUNCT__ "MatCreateMPIDense"
1246 /*@C
1247    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
1248 
1249    Collective on MPI_Comm
1250 
1251    Input Parameters:
1252 +  comm - MPI communicator
1253 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1254 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1255 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1256 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1257 -  data - optional location of matrix data.  Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1258    to control all matrix memory allocation.
1259 
1260    Output Parameter:
1261 .  A - the matrix
1262 
1263    Notes:
1264    The dense format is fully compatible with standard Fortran 77
1265    storage by columns.
1266 
1267    The data input variable is intended primarily for Fortran programmers
1268    who wish to allocate their own matrix memory space.  Most users should
1269    set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users).
1270 
1271    The user MUST specify either the local or global matrix dimensions
1272    (possibly both).
1273 
1274    Level: intermediate
1275 
1276 .keywords: matrix,dense, parallel
1277 
1278 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1279 @*/
1280 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1281 {
1282   PetscErrorCode ierr;
1283   PetscMPIInt    size;
1284 
1285   PetscFunctionBegin;
1286   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1287   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1288   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1289   if (size > 1) {
1290     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1291     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1292   } else {
1293     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1294     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1295   }
1296   PetscFunctionReturn(0);
1297 }
1298 
1299 #undef __FUNCT__
1300 #define __FUNCT__ "MatDuplicate_MPIDense"
1301 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1302 {
1303   Mat            mat;
1304   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
1305   PetscErrorCode ierr;
1306 
1307   PetscFunctionBegin;
1308   *newmat       = 0;
1309   ierr = MatCreate(A->comm,&mat);CHKERRQ(ierr);
1310   ierr = MatSetSizes(mat,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N);CHKERRQ(ierr);
1311   ierr = MatSetType(mat,A->type_name);CHKERRQ(ierr);
1312   a                 = (Mat_MPIDense*)mat->data;
1313   ierr              = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1314   mat->factor       = A->factor;
1315   mat->assembled    = PETSC_TRUE;
1316   mat->preallocated = PETSC_TRUE;
1317 
1318   mat->rmap.rstart     = A->rmap.rstart;
1319   mat->rmap.rend       = A->rmap.rend;
1320   a->size              = oldmat->size;
1321   a->rank              = oldmat->rank;
1322   mat->insertmode      = NOT_SET_VALUES;
1323   a->nvec              = oldmat->nvec;
1324   a->donotstash        = oldmat->donotstash;
1325 
1326   ierr = PetscMalloc((a->size+1)*sizeof(PetscInt),&mat->rmap.range);CHKERRQ(ierr);
1327   ierr = PetscMalloc((a->size+1)*sizeof(PetscInt),&mat->cmap.range);CHKERRQ(ierr);
1328   ierr = PetscMemcpy(mat->rmap.range,A->rmap.range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
1329   ierr = PetscMemcpy(mat->cmap.range,A->cmap.range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
1330   ierr = MatStashCreate_Private(A->comm,1,&mat->stash);CHKERRQ(ierr);
1331 
1332   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1333   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1334   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1335   *newmat = mat;
1336   PetscFunctionReturn(0);
1337 }
1338 
1339 #include "petscsys.h"
1340 
1341 #undef __FUNCT__
1342 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile"
1343 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N, MatType type,Mat *newmat)
1344 {
1345   PetscErrorCode ierr;
1346   PetscMPIInt    rank,size;
1347   PetscInt       *rowners,i,m,nz,j;
1348   PetscScalar    *array,*vals,*vals_ptr;
1349   MPI_Status     status;
1350 
1351   PetscFunctionBegin;
1352   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1353   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1354 
1355   /* determine ownership of all rows */
1356   m          = M/size + ((M % size) > rank);
1357   ierr       = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1358   ierr       = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
1359   rowners[0] = 0;
1360   for (i=2; i<=size; i++) {
1361     rowners[i] += rowners[i-1];
1362   }
1363 
1364   ierr = MatCreate(comm,newmat);CHKERRQ(ierr);
1365   ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1366   ierr = MatSetType(*newmat,type);CHKERRQ(ierr);
1367   ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr);
1368   ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr);
1369 
1370   if (!rank) {
1371     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1372 
1373     /* read in my part of the matrix numerical values  */
1374     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1375 
1376     /* insert into matrix-by row (this is why cannot directly read into array */
1377     vals_ptr = vals;
1378     for (i=0; i<m; i++) {
1379       for (j=0; j<N; j++) {
1380         array[i + j*m] = *vals_ptr++;
1381       }
1382     }
1383 
1384     /* read in other processors and ship out */
1385     for (i=1; i<size; i++) {
1386       nz   = (rowners[i+1] - rowners[i])*N;
1387       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1388       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr);
1389     }
1390   } else {
1391     /* receive numeric values */
1392     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1393 
1394     /* receive message of values*/
1395     ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr);
1396 
1397     /* insert into matrix-by row (this is why cannot directly read into array */
1398     vals_ptr = vals;
1399     for (i=0; i<m; i++) {
1400       for (j=0; j<N; j++) {
1401         array[i + j*m] = *vals_ptr++;
1402       }
1403     }
1404   }
1405   ierr = PetscFree(rowners);CHKERRQ(ierr);
1406   ierr = PetscFree(vals);CHKERRQ(ierr);
1407   ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1408   ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1409   PetscFunctionReturn(0);
1410 }
1411 
1412 #undef __FUNCT__
1413 #define __FUNCT__ "MatLoad_MPIDense"
1414 PetscErrorCode MatLoad_MPIDense(PetscViewer viewer, MatType type,Mat *newmat)
1415 {
1416   Mat            A;
1417   PetscScalar    *vals,*svals;
1418   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1419   MPI_Status     status;
1420   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz;
1421   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1422   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1423   PetscInt       i,nz,j,rstart,rend;
1424   int            fd;
1425   PetscErrorCode ierr;
1426 
1427   PetscFunctionBegin;
1428   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1429   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1430   if (!rank) {
1431     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1432     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1433     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1434   }
1435 
1436   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1437   M = header[1]; N = header[2]; nz = header[3];
1438 
1439   /*
1440        Handle case where matrix is stored on disk as a dense matrix
1441   */
1442   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1443     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,type,newmat);CHKERRQ(ierr);
1444     PetscFunctionReturn(0);
1445   }
1446 
1447   /* determine ownership of all rows */
1448   m          = M/size + ((M % size) > rank);
1449   ierr       = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1450   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1451   rowners[0] = 0;
1452   for (i=2; i<=size; i++) {
1453     rowners[i] += rowners[i-1];
1454   }
1455   rstart = rowners[rank];
1456   rend   = rowners[rank+1];
1457 
1458   /* distribute row lengths to all processors */
1459   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&ourlens);CHKERRQ(ierr);
1460   offlens = ourlens + (rend-rstart);
1461   if (!rank) {
1462     ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
1463     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1464     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
1465     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1466     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1467     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1468   } else {
1469     ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1470   }
1471 
1472   if (!rank) {
1473     /* calculate the number of nonzeros on each processor */
1474     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
1475     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1476     for (i=0; i<size; i++) {
1477       for (j=rowners[i]; j< rowners[i+1]; j++) {
1478         procsnz[i] += rowlengths[j];
1479       }
1480     }
1481     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1482 
1483     /* determine max buffer needed and allocate it */
1484     maxnz = 0;
1485     for (i=0; i<size; i++) {
1486       maxnz = PetscMax(maxnz,procsnz[i]);
1487     }
1488     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1489 
1490     /* read in my part of the matrix column indices  */
1491     nz = procsnz[0];
1492     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1493     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1494 
1495     /* read in every one elses and ship off */
1496     for (i=1; i<size; i++) {
1497       nz   = procsnz[i];
1498       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1499       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1500     }
1501     ierr = PetscFree(cols);CHKERRQ(ierr);
1502   } else {
1503     /* determine buffer space needed for message */
1504     nz = 0;
1505     for (i=0; i<m; i++) {
1506       nz += ourlens[i];
1507     }
1508     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1509 
1510     /* receive message of column indices*/
1511     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1512     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1513     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1514   }
1515 
1516   /* loop over local rows, determining number of off diagonal entries */
1517   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
1518   jj = 0;
1519   for (i=0; i<m; i++) {
1520     for (j=0; j<ourlens[i]; j++) {
1521       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1522       jj++;
1523     }
1524   }
1525 
1526   /* create our matrix */
1527   for (i=0; i<m; i++) {
1528     ourlens[i] -= offlens[i];
1529   }
1530   ierr = MatCreate(comm,newmat);CHKERRQ(ierr);
1531   ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1532   ierr = MatSetType(*newmat,type);CHKERRQ(ierr);
1533   ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr);
1534   A = *newmat;
1535   for (i=0; i<m; i++) {
1536     ourlens[i] += offlens[i];
1537   }
1538 
1539   if (!rank) {
1540     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1541 
1542     /* read in my part of the matrix numerical values  */
1543     nz = procsnz[0];
1544     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1545 
1546     /* insert into matrix */
1547     jj      = rstart;
1548     smycols = mycols;
1549     svals   = vals;
1550     for (i=0; i<m; i++) {
1551       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1552       smycols += ourlens[i];
1553       svals   += ourlens[i];
1554       jj++;
1555     }
1556 
1557     /* read in other processors and ship out */
1558     for (i=1; i<size; i++) {
1559       nz   = procsnz[i];
1560       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1561       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1562     }
1563     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1564   } else {
1565     /* receive numeric values */
1566     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1567 
1568     /* receive message of values*/
1569     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1570     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1571     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1572 
1573     /* insert into matrix */
1574     jj      = rstart;
1575     smycols = mycols;
1576     svals   = vals;
1577     for (i=0; i<m; i++) {
1578       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1579       smycols += ourlens[i];
1580       svals   += ourlens[i];
1581       jj++;
1582     }
1583   }
1584   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1585   ierr = PetscFree(vals);CHKERRQ(ierr);
1586   ierr = PetscFree(mycols);CHKERRQ(ierr);
1587   ierr = PetscFree(rowners);CHKERRQ(ierr);
1588 
1589   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1590   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1591   PetscFunctionReturn(0);
1592 }
1593 
1594 #undef __FUNCT__
1595 #define __FUNCT__ "MatEqual_MPIDense"
1596 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscTruth *flag)
1597 {
1598   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
1599   Mat            a,b;
1600   PetscTruth     flg;
1601   PetscErrorCode ierr;
1602 
1603   PetscFunctionBegin;
1604   a = matA->A;
1605   b = matB->A;
1606   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
1607   PetscFunctionReturn(0);
1608 }
1609 
1610 
1611 
1612