xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 18be62a5feccf172f7bc80c15c4be8f6d6443e8b)
1 #define PETSCMAT_DLL
2 
3 /*
4    Basic functions for basic parallel dense matrices.
5 */
6 
7 #include "src/mat/impls/dense/mpi/mpidense.h"
8 
9 #undef __FUNCT__
10 #define __FUNCT__ "MatGetRow_MPIDense"
11 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
12 {
13   Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
14   PetscErrorCode ierr;
15   PetscInt          lrow,rstart = mat->rstart,rend = mat->rend;
16 
17   PetscFunctionBegin;
18   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows")
19   lrow = row - rstart;
20   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr);
21   PetscFunctionReturn(0);
22 }
23 
24 #undef __FUNCT__
25 #define __FUNCT__ "MatRestoreRow_MPIDense"
26 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
27 {
28   PetscErrorCode ierr;
29 
30   PetscFunctionBegin;
31   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
32   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
33   PetscFunctionReturn(0);
34 }
35 
36 EXTERN_C_BEGIN
37 #undef __FUNCT__
38 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense"
39 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B)
40 {
41   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
42   PetscErrorCode ierr;
43   PetscInt          m = A->m,rstart = mdn->rstart;
44   PetscScalar  *array;
45   MPI_Comm     comm;
46 
47   PetscFunctionBegin;
48   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported.");
49 
50   /* The reuse aspect is not implemented efficiently */
51   if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);}
52 
53   ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
54   ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr);
55   ierr = MatCreate(comm,B);CHKERRQ(ierr);
56   ierr = MatSetSizes(*B,m,m,m,m);CHKERRQ(ierr);
57   ierr = MatSetType(*B,mdn->A->type_name);CHKERRQ(ierr);
58   ierr = MatSeqDenseSetPreallocation(*B,array+m*rstart);CHKERRQ(ierr);
59   ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr);
60   ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
61   ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
62 
63   *iscopy = PETSC_TRUE;
64   PetscFunctionReturn(0);
65 }
66 EXTERN_C_END
67 
68 #undef __FUNCT__
69 #define __FUNCT__ "MatSetValues_MPIDense"
70 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
71 {
72   Mat_MPIDense *A = (Mat_MPIDense*)mat->data;
73   PetscErrorCode ierr;
74   PetscInt          i,j,rstart = A->rstart,rend = A->rend,row;
75   PetscTruth   roworiented = A->roworiented;
76 
77   PetscFunctionBegin;
78   for (i=0; i<m; i++) {
79     if (idxm[i] < 0) continue;
80     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
81     if (idxm[i] >= rstart && idxm[i] < rend) {
82       row = idxm[i] - rstart;
83       if (roworiented) {
84         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
85       } else {
86         for (j=0; j<n; j++) {
87           if (idxn[j] < 0) continue;
88           if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
89           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
90         }
91       }
92     } else {
93       if (!A->donotstash) {
94         if (roworiented) {
95           ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr);
96         } else {
97           ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr);
98         }
99       }
100     }
101   }
102   PetscFunctionReturn(0);
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatGetValues_MPIDense"
107 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
108 {
109   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
110   PetscErrorCode ierr;
111   PetscInt          i,j,rstart = mdn->rstart,rend = mdn->rend,row;
112 
113   PetscFunctionBegin;
114   for (i=0; i<m; i++) {
115     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
116     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
117     if (idxm[i] >= rstart && idxm[i] < rend) {
118       row = idxm[i] - rstart;
119       for (j=0; j<n; j++) {
120         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
121         if (idxn[j] >= mat->N) {
122           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
123         }
124         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
125       }
126     } else {
127       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
128     }
129   }
130   PetscFunctionReturn(0);
131 }
132 
133 #undef __FUNCT__
134 #define __FUNCT__ "MatGetArray_MPIDense"
135 PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[])
136 {
137   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
138   PetscErrorCode ierr;
139 
140   PetscFunctionBegin;
141   ierr = MatGetArray(a->A,array);CHKERRQ(ierr);
142   PetscFunctionReturn(0);
143 }
144 
145 #undef __FUNCT__
146 #define __FUNCT__ "MatGetSubMatrix_MPIDense"
147 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B)
148 {
149   Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd;
150   Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data;
151   PetscErrorCode ierr;
152   PetscInt          i,j,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols;
153   PetscScalar  *av,*bv,*v = lmat->v;
154   Mat          newmat;
155 
156   PetscFunctionBegin;
157   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
158   ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr);
159   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
160   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
161 
162   /* No parallel redistribution currently supported! Should really check each index set
163      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
164      original matrix! */
165 
166   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
167   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
168 
169   /* Check submatrix call */
170   if (scall == MAT_REUSE_MATRIX) {
171     /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
172     /* Really need to test rows and column sizes! */
173     newmat = *B;
174   } else {
175     /* Create and fill new matrix */
176     ierr = MatCreate(A->comm,&newmat);CHKERRQ(ierr);
177     ierr = MatSetSizes(newmat,nrows,cs,PETSC_DECIDE,ncols);CHKERRQ(ierr);
178     ierr = MatSetType(newmat,A->type_name);CHKERRQ(ierr);
179     ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr);
180   }
181 
182   /* Now extract the data pointers and do the copy, column at a time */
183   newmatd = (Mat_MPIDense*)newmat->data;
184   bv      = ((Mat_SeqDense *)newmatd->A->data)->v;
185 
186   for (i=0; i<ncols; i++) {
187     av = v + nlrows*icol[i];
188     for (j=0; j<nrows; j++) {
189       *bv++ = av[irow[j] - rstart];
190     }
191   }
192 
193   /* Assemble the matrices so that the correct flags are set */
194   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
195   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
196 
197   /* Free work space */
198   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
199   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
200   *B = newmat;
201   PetscFunctionReturn(0);
202 }
203 
204 #undef __FUNCT__
205 #define __FUNCT__ "MatRestoreArray_MPIDense"
206 PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[])
207 {
208   PetscFunctionBegin;
209   PetscFunctionReturn(0);
210 }
211 
212 #undef __FUNCT__
213 #define __FUNCT__ "MatAssemblyBegin_MPIDense"
214 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
215 {
216   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
217   MPI_Comm     comm = mat->comm;
218   PetscErrorCode ierr;
219   PetscInt          nstash,reallocs;
220   InsertMode   addv;
221 
222   PetscFunctionBegin;
223   /* make sure all processors are either in INSERTMODE or ADDMODE */
224   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
225   if (addv == (ADD_VALUES|INSERT_VALUES)) {
226     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
227   }
228   mat->insertmode = addv; /* in case this processor had no cache */
229 
230   ierr = MatStashScatterBegin_Private(&mat->stash,mdn->rowners);CHKERRQ(ierr);
231   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
232   ierr = PetscLogInfo((mdn->A,"MatAssemblyBegin_MPIDense:Stash has %D entries, uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr);
233   PetscFunctionReturn(0);
234 }
235 
236 #undef __FUNCT__
237 #define __FUNCT__ "MatAssemblyEnd_MPIDense"
238 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
239 {
240   Mat_MPIDense    *mdn=(Mat_MPIDense*)mat->data;
241   PetscErrorCode  ierr;
242   PetscInt        i,*row,*col,flg,j,rstart,ncols;
243   PetscMPIInt     n;
244   PetscScalar       *val;
245   InsertMode      addv=mat->insertmode;
246 
247   PetscFunctionBegin;
248   /*  wait on receives */
249   while (1) {
250     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
251     if (!flg) break;
252 
253     for (i=0; i<n;) {
254       /* Now identify the consecutive vals belonging to the same row */
255       for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
256       if (j < n) ncols = j-i;
257       else       ncols = n-i;
258       /* Now assemble all these values with a single function call */
259       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
260       i = j;
261     }
262   }
263   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
264 
265   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
266   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
267 
268   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
269     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
270   }
271   PetscFunctionReturn(0);
272 }
273 
274 #undef __FUNCT__
275 #define __FUNCT__ "MatZeroEntries_MPIDense"
276 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
277 {
278   PetscErrorCode ierr;
279   Mat_MPIDense *l = (Mat_MPIDense*)A->data;
280 
281   PetscFunctionBegin;
282   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
283   PetscFunctionReturn(0);
284 }
285 
286 /* the code does not do the diagonal entries correctly unless the
287    matrix is square and the column and row owerships are identical.
288    This is a BUG. The only way to fix it seems to be to access
289    mdn->A and mdn->B directly and not through the MatZeroRows()
290    routine.
291 */
292 #undef __FUNCT__
293 #define __FUNCT__ "MatZeroRows_MPIDense"
294 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
295 {
296   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
297   PetscErrorCode ierr;
298   PetscInt       i,*owners = l->rowners;
299   PetscInt       *nprocs,j,idx,nsends;
300   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
301   PetscInt       *rvalues,tag = A->tag,count,base,slen,*source;
302   PetscInt       *lens,*lrows,*values;
303   PetscMPIInt    n,imdex,rank = l->rank,size = l->size;
304   MPI_Comm       comm = A->comm;
305   MPI_Request    *send_waits,*recv_waits;
306   MPI_Status     recv_status,*send_status;
307   PetscTruth     found;
308 
309   PetscFunctionBegin;
310   /*  first count number of contributors to each processor */
311   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
312   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
313   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
314   for (i=0; i<N; i++) {
315     idx = rows[i];
316     found = PETSC_FALSE;
317     for (j=0; j<size; j++) {
318       if (idx >= owners[j] && idx < owners[j+1]) {
319         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
320       }
321     }
322     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
323   }
324   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
325 
326   /* inform other processors of number of messages and max length*/
327   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
328 
329   /* post receives:   */
330   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
331   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
332   for (i=0; i<nrecvs; i++) {
333     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
334   }
335 
336   /* do sends:
337       1) starts[i] gives the starting index in svalues for stuff going to
338          the ith processor
339   */
340   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
341   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
342   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
343   starts[0]  = 0;
344   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
345   for (i=0; i<N; i++) {
346     svalues[starts[owner[i]]++] = rows[i];
347   }
348 
349   starts[0] = 0;
350   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
351   count = 0;
352   for (i=0; i<size; i++) {
353     if (nprocs[2*i+1]) {
354       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
355     }
356   }
357   ierr = PetscFree(starts);CHKERRQ(ierr);
358 
359   base = owners[rank];
360 
361   /*  wait on receives */
362   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
363   source = lens + nrecvs;
364   count  = nrecvs; slen = 0;
365   while (count) {
366     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
367     /* unpack receives into our local space */
368     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
369     source[imdex]  = recv_status.MPI_SOURCE;
370     lens[imdex]    = n;
371     slen += n;
372     count--;
373   }
374   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
375 
376   /* move the data into the send scatter */
377   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
378   count = 0;
379   for (i=0; i<nrecvs; i++) {
380     values = rvalues + i*nmax;
381     for (j=0; j<lens[i]; j++) {
382       lrows[count++] = values[j] - base;
383     }
384   }
385   ierr = PetscFree(rvalues);CHKERRQ(ierr);
386   ierr = PetscFree(lens);CHKERRQ(ierr);
387   ierr = PetscFree(owner);CHKERRQ(ierr);
388   ierr = PetscFree(nprocs);CHKERRQ(ierr);
389 
390   /* actually zap the local rows */
391   ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
392   ierr = PetscFree(lrows);CHKERRQ(ierr);
393 
394   /* wait on sends */
395   if (nsends) {
396     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
397     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
398     ierr = PetscFree(send_status);CHKERRQ(ierr);
399   }
400   ierr = PetscFree(send_waits);CHKERRQ(ierr);
401   ierr = PetscFree(svalues);CHKERRQ(ierr);
402 
403   PetscFunctionReturn(0);
404 }
405 
406 #undef __FUNCT__
407 #define __FUNCT__ "MatMult_MPIDense"
408 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
409 {
410   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
411   PetscErrorCode ierr;
412 
413   PetscFunctionBegin;
414   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
415   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
416   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
417   PetscFunctionReturn(0);
418 }
419 
420 #undef __FUNCT__
421 #define __FUNCT__ "MatMultAdd_MPIDense"
422 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
423 {
424   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
425   PetscErrorCode ierr;
426 
427   PetscFunctionBegin;
428   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
429   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
430   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
431   PetscFunctionReturn(0);
432 }
433 
434 #undef __FUNCT__
435 #define __FUNCT__ "MatMultTranspose_MPIDense"
436 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
437 {
438   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
439   PetscErrorCode ierr;
440   PetscScalar  zero = 0.0;
441 
442   PetscFunctionBegin;
443   ierr = VecSet(yy,zero);CHKERRQ(ierr);
444   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
445   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
446   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
447   PetscFunctionReturn(0);
448 }
449 
450 #undef __FUNCT__
451 #define __FUNCT__ "MatMultTransposeAdd_MPIDense"
452 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
453 {
454   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
455   PetscErrorCode ierr;
456 
457   PetscFunctionBegin;
458   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
459   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
460   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
461   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
462   PetscFunctionReturn(0);
463 }
464 
465 #undef __FUNCT__
466 #define __FUNCT__ "MatGetDiagonal_MPIDense"
467 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
468 {
469   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
470   Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data;
471   PetscErrorCode ierr;
472   PetscInt          len,i,n,m = A->m,radd;
473   PetscScalar  *x,zero = 0.0;
474 
475   PetscFunctionBegin;
476   ierr = VecSet(v,zero);CHKERRQ(ierr);
477   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
478   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
479   if (n != A->M) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
480   len  = PetscMin(a->A->m,a->A->n);
481   radd = a->rstart*m;
482   for (i=0; i<len; i++) {
483     x[i] = aloc->v[radd + i*m + i];
484   }
485   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
486   PetscFunctionReturn(0);
487 }
488 
489 #undef __FUNCT__
490 #define __FUNCT__ "MatDestroy_MPIDense"
491 PetscErrorCode MatDestroy_MPIDense(Mat mat)
492 {
493   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
494   PetscErrorCode ierr;
495 
496   PetscFunctionBegin;
497 
498 #if defined(PETSC_USE_LOG)
499   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->M,mat->N);
500 #endif
501   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
502   ierr = PetscFree(mdn->rowners);CHKERRQ(ierr);
503   ierr = MatDestroy(mdn->A);CHKERRQ(ierr);
504   if (mdn->lvec)   VecDestroy(mdn->lvec);
505   if (mdn->Mvctx)  VecScatterDestroy(mdn->Mvctx);
506   if (mdn->factor) {
507     if (mdn->factor->temp)   {ierr = PetscFree(mdn->factor->temp);CHKERRQ(ierr);}
508     if (mdn->factor->tag)    {ierr = PetscFree(mdn->factor->tag);CHKERRQ(ierr);}
509     if (mdn->factor->pivots) {ierr = PetscFree(mdn->factor->pivots);CHKERRQ(ierr);}
510     ierr = PetscFree(mdn->factor);CHKERRQ(ierr);
511   }
512   ierr = PetscFree(mdn);CHKERRQ(ierr);
513   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
514   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
515   PetscFunctionReturn(0);
516 }
517 
518 #undef __FUNCT__
519 #define __FUNCT__ "MatView_MPIDense_Binary"
520 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
521 {
522   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
523   PetscErrorCode ierr;
524 
525   PetscFunctionBegin;
526   if (mdn->size == 1) {
527     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
528   }
529   else SETERRQ(PETSC_ERR_SUP,"Only uniprocessor output supported");
530   PetscFunctionReturn(0);
531 }
532 
533 #undef __FUNCT__
534 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket"
535 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
536 {
537   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
538   PetscErrorCode    ierr;
539   PetscMPIInt       size = mdn->size,rank = mdn->rank;
540   PetscViewerType   vtype;
541   PetscTruth        iascii,isdraw;
542   PetscViewer       sviewer;
543   PetscViewerFormat format;
544 
545   PetscFunctionBegin;
546   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
547   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
548   if (iascii) {
549     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
550     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
551     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
552       MatInfo info;
553       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
554       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->m,
555                    (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
556       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
557       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
558       PetscFunctionReturn(0);
559     } else if (format == PETSC_VIEWER_ASCII_INFO) {
560       PetscFunctionReturn(0);
561     }
562   } else if (isdraw) {
563     PetscDraw       draw;
564     PetscTruth isnull;
565 
566     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
567     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
568     if (isnull) PetscFunctionReturn(0);
569   }
570 
571   if (size == 1) {
572     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
573   } else {
574     /* assemble the entire matrix onto first processor. */
575     Mat         A;
576     PetscInt    M = mat->M,N = mat->N,m,row,i,nz;
577     PetscInt    *cols;
578     PetscScalar   *vals;
579 
580     ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr);
581     if (!rank) {
582       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
583     } else {
584       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
585     }
586     /* Since this is a temporary matrix, MATMPIDENSE instead of A->type_name here is probably acceptable. */
587     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
588     ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL);
589     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
590 
591     /* Copy the matrix ... This isn't the most efficient means,
592        but it's quick for now */
593     A->insertmode = INSERT_VALUES;
594     row = mdn->rstart; m = mdn->A->m;
595     for (i=0; i<m; i++) {
596       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
597       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
598       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
599       row++;
600     }
601 
602     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
603     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
604     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
605     if (!rank) {
606       ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
607     }
608     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
609     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
610     ierr = MatDestroy(A);CHKERRQ(ierr);
611   }
612   PetscFunctionReturn(0);
613 }
614 
615 #undef __FUNCT__
616 #define __FUNCT__ "MatView_MPIDense"
617 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
618 {
619   PetscErrorCode ierr;
620   PetscTruth iascii,isbinary,isdraw,issocket;
621 
622   PetscFunctionBegin;
623 
624   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
625   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
626   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
627   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
628 
629   if (iascii || issocket || isdraw) {
630     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
631   } else if (isbinary) {
632     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
633   } else {
634     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name);
635   }
636   PetscFunctionReturn(0);
637 }
638 
639 #undef __FUNCT__
640 #define __FUNCT__ "MatGetInfo_MPIDense"
641 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
642 {
643   Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
644   Mat          mdn = mat->A;
645   PetscErrorCode ierr;
646   PetscReal    isend[5],irecv[5];
647 
648   PetscFunctionBegin;
649   info->rows_global    = (double)A->M;
650   info->columns_global = (double)A->N;
651   info->rows_local     = (double)A->m;
652   info->columns_local  = (double)A->N;
653   info->block_size     = 1.0;
654   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
655   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
656   isend[3] = info->memory;  isend[4] = info->mallocs;
657   if (flag == MAT_LOCAL) {
658     info->nz_used      = isend[0];
659     info->nz_allocated = isend[1];
660     info->nz_unneeded  = isend[2];
661     info->memory       = isend[3];
662     info->mallocs      = isend[4];
663   } else if (flag == MAT_GLOBAL_MAX) {
664     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr);
665     info->nz_used      = irecv[0];
666     info->nz_allocated = irecv[1];
667     info->nz_unneeded  = irecv[2];
668     info->memory       = irecv[3];
669     info->mallocs      = irecv[4];
670   } else if (flag == MAT_GLOBAL_SUM) {
671     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr);
672     info->nz_used      = irecv[0];
673     info->nz_allocated = irecv[1];
674     info->nz_unneeded  = irecv[2];
675     info->memory       = irecv[3];
676     info->mallocs      = irecv[4];
677   }
678   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
679   info->fill_ratio_needed = 0;
680   info->factor_mallocs    = 0;
681   PetscFunctionReturn(0);
682 }
683 
684 #undef __FUNCT__
685 #define __FUNCT__ "MatSetOption_MPIDense"
686 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op)
687 {
688   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
689   PetscErrorCode ierr;
690 
691   PetscFunctionBegin;
692   switch (op) {
693   case MAT_NO_NEW_NONZERO_LOCATIONS:
694   case MAT_YES_NEW_NONZERO_LOCATIONS:
695   case MAT_NEW_NONZERO_LOCATION_ERR:
696   case MAT_NEW_NONZERO_ALLOCATION_ERR:
697   case MAT_COLUMNS_SORTED:
698   case MAT_COLUMNS_UNSORTED:
699     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
700     break;
701   case MAT_ROW_ORIENTED:
702     a->roworiented = PETSC_TRUE;
703     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
704     break;
705   case MAT_ROWS_SORTED:
706   case MAT_ROWS_UNSORTED:
707   case MAT_YES_NEW_DIAGONALS:
708   case MAT_USE_HASH_TABLE:
709     ierr = PetscLogInfo((A,"MatSetOption_MPIDense:Option ignored\n"));CHKERRQ(ierr);
710     break;
711   case MAT_COLUMN_ORIENTED:
712     a->roworiented = PETSC_FALSE;
713     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
714     break;
715   case MAT_IGNORE_OFF_PROC_ENTRIES:
716     a->donotstash = PETSC_TRUE;
717     break;
718   case MAT_NO_NEW_DIAGONALS:
719     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
720   case MAT_SYMMETRIC:
721   case MAT_STRUCTURALLY_SYMMETRIC:
722   case MAT_NOT_SYMMETRIC:
723   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
724   case MAT_HERMITIAN:
725   case MAT_NOT_HERMITIAN:
726   case MAT_SYMMETRY_ETERNAL:
727   case MAT_NOT_SYMMETRY_ETERNAL:
728     break;
729   default:
730     SETERRQ(PETSC_ERR_SUP,"unknown option");
731   }
732   PetscFunctionReturn(0);
733 }
734 
735 
736 #undef __FUNCT__
737 #define __FUNCT__ "MatDiagonalScale_MPIDense"
738 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
739 {
740   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
741   Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
742   PetscScalar  *l,*r,x,*v;
743   PetscErrorCode ierr;
744   PetscInt          i,j,s2a,s3a,s2,s3,m=mdn->A->m,n=mdn->A->n;
745 
746   PetscFunctionBegin;
747   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
748   if (ll) {
749     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
750     if (s2a != s2) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size");
751     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
752     for (i=0; i<m; i++) {
753       x = l[i];
754       v = mat->v + i;
755       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
756     }
757     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
758     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
759   }
760   if (rr) {
761     ierr = VecGetSize(rr,&s3a);CHKERRQ(ierr);
762     if (s3a != s3) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size");
763     ierr = VecScatterBegin(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
764     ierr = VecScatterEnd(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
765     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
766     for (i=0; i<n; i++) {
767       x = r[i];
768       v = mat->v + i*m;
769       for (j=0; j<m; j++) { (*v++) *= x;}
770     }
771     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
772     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
773   }
774   PetscFunctionReturn(0);
775 }
776 
777 #undef __FUNCT__
778 #define __FUNCT__ "MatNorm_MPIDense"
779 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
780 {
781   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
782   Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
783   PetscErrorCode ierr;
784   PetscInt          i,j;
785   PetscReal    sum = 0.0;
786   PetscScalar  *v = mat->v;
787 
788   PetscFunctionBegin;
789   if (mdn->size == 1) {
790     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
791   } else {
792     if (type == NORM_FROBENIUS) {
793       for (i=0; i<mdn->A->n*mdn->A->m; i++) {
794 #if defined(PETSC_USE_COMPLEX)
795         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
796 #else
797         sum += (*v)*(*v); v++;
798 #endif
799       }
800       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr);
801       *nrm = sqrt(*nrm);
802       ierr = PetscLogFlops(2*mdn->A->n*mdn->A->m);CHKERRQ(ierr);
803     } else if (type == NORM_1) {
804       PetscReal *tmp,*tmp2;
805       ierr = PetscMalloc(2*A->N*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
806       tmp2 = tmp + A->N;
807       ierr = PetscMemzero(tmp,2*A->N*sizeof(PetscReal));CHKERRQ(ierr);
808       *nrm = 0.0;
809       v = mat->v;
810       for (j=0; j<mdn->A->n; j++) {
811         for (i=0; i<mdn->A->m; i++) {
812           tmp[j] += PetscAbsScalar(*v);  v++;
813         }
814       }
815       ierr = MPI_Allreduce(tmp,tmp2,A->N,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr);
816       for (j=0; j<A->N; j++) {
817         if (tmp2[j] > *nrm) *nrm = tmp2[j];
818       }
819       ierr = PetscFree(tmp);CHKERRQ(ierr);
820       ierr = PetscLogFlops(A->n*A->m);CHKERRQ(ierr);
821     } else if (type == NORM_INFINITY) { /* max row norm */
822       PetscReal ntemp;
823       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
824       ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr);
825     } else {
826       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
827     }
828   }
829   PetscFunctionReturn(0);
830 }
831 
832 #undef __FUNCT__
833 #define __FUNCT__ "MatTranspose_MPIDense"
834 PetscErrorCode MatTranspose_MPIDense(Mat A,Mat *matout)
835 {
836   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
837   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
838   Mat            B;
839   PetscInt       M = A->M,N = A->N,m,n,*rwork,rstart = a->rstart;
840   PetscErrorCode ierr;
841   PetscInt       j,i;
842   PetscScalar      *v;
843 
844   PetscFunctionBegin;
845   if (!matout && M != N) {
846     SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place");
847   }
848   ierr = MatCreate(A->comm,&B);CHKERRQ(ierr);
849   ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,N,M);CHKERRQ(ierr);
850   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
851   ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr);
852 
853   m = a->A->m; n = a->A->n; v = Aloc->v;
854   ierr = PetscMalloc(n*sizeof(PetscInt),&rwork);CHKERRQ(ierr);
855   for (j=0; j<n; j++) {
856     for (i=0; i<m; i++) rwork[i] = rstart + i;
857     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
858     v   += m;
859   }
860   ierr = PetscFree(rwork);CHKERRQ(ierr);
861   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
862   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
863   if (matout) {
864     *matout = B;
865   } else {
866     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
867   }
868   PetscFunctionReturn(0);
869 }
870 
871 #include "petscblaslapack.h"
872 #undef __FUNCT__
873 #define __FUNCT__ "MatScale_MPIDense"
874 PetscErrorCode MatScale_MPIDense(Mat inA,PetscScalar alpha)
875 {
876   Mat_MPIDense *A = (Mat_MPIDense*)inA->data;
877   Mat_SeqDense *a = (Mat_SeqDense*)A->A->data;
878   PetscScalar  oalpha = alpha;
879   PetscBLASInt one = 1,nz = (PetscBLASInt)inA->m*inA->N;
880   PetscErrorCode ierr;
881 
882   PetscFunctionBegin;
883   BLASscal_(&nz,&oalpha,a->v,&one);
884   ierr = PetscLogFlops(nz);CHKERRQ(ierr);
885   PetscFunctionReturn(0);
886 }
887 
888 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
889 
890 #undef __FUNCT__
891 #define __FUNCT__ "MatSetUpPreallocation_MPIDense"
892 PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A)
893 {
894   PetscErrorCode ierr;
895 
896   PetscFunctionBegin;
897   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
898   PetscFunctionReturn(0);
899 }
900 
901 /* -------------------------------------------------------------------*/
902 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
903        MatGetRow_MPIDense,
904        MatRestoreRow_MPIDense,
905        MatMult_MPIDense,
906 /* 4*/ MatMultAdd_MPIDense,
907        MatMultTranspose_MPIDense,
908        MatMultTransposeAdd_MPIDense,
909        0,
910        0,
911        0,
912 /*10*/ 0,
913        0,
914        0,
915        0,
916        MatTranspose_MPIDense,
917 /*15*/ MatGetInfo_MPIDense,
918        0,
919        MatGetDiagonal_MPIDense,
920        MatDiagonalScale_MPIDense,
921        MatNorm_MPIDense,
922 /*20*/ MatAssemblyBegin_MPIDense,
923        MatAssemblyEnd_MPIDense,
924        0,
925        MatSetOption_MPIDense,
926        MatZeroEntries_MPIDense,
927 /*25*/ MatZeroRows_MPIDense,
928        0,
929        0,
930        0,
931        0,
932 /*30*/ MatSetUpPreallocation_MPIDense,
933        0,
934        0,
935        MatGetArray_MPIDense,
936        MatRestoreArray_MPIDense,
937 /*35*/ MatDuplicate_MPIDense,
938        0,
939        0,
940        0,
941        0,
942 /*40*/ 0,
943        MatGetSubMatrices_MPIDense,
944        0,
945        MatGetValues_MPIDense,
946        0,
947 /*45*/ 0,
948        MatScale_MPIDense,
949        0,
950        0,
951        0,
952 /*50*/ 0,
953        0,
954        0,
955        0,
956        0,
957 /*55*/ 0,
958        0,
959        0,
960        0,
961        0,
962 /*60*/ MatGetSubMatrix_MPIDense,
963        MatDestroy_MPIDense,
964        MatView_MPIDense,
965        MatGetPetscMaps_Petsc,
966        0,
967 /*65*/ 0,
968        0,
969        0,
970        0,
971        0,
972 /*70*/ 0,
973        0,
974        0,
975        0,
976        0,
977 /*75*/ 0,
978        0,
979        0,
980        0,
981        0,
982 /*80*/ 0,
983        0,
984        0,
985        0,
986 /*84*/ MatLoad_MPIDense,
987        0,
988        0,
989        0,
990        0,
991        0,
992 /*90*/ 0,
993        0,
994        0,
995        0,
996        0,
997 /*95*/ 0,
998        0,
999        0,
1000        0};
1001 
1002 EXTERN_C_BEGIN
1003 #undef __FUNCT__
1004 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense"
1005 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1006 {
1007   Mat_MPIDense *a;
1008   PetscErrorCode ierr;
1009 
1010   PetscFunctionBegin;
1011   mat->preallocated = PETSC_TRUE;
1012   /* Note:  For now, when data is specified above, this assumes the user correctly
1013    allocates the local dense storage space.  We should add error checking. */
1014 
1015   a    = (Mat_MPIDense*)mat->data;
1016   ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1017   ierr = MatSetSizes(a->A,mat->m,mat->N,mat->m,mat->N);CHKERRQ(ierr);
1018   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
1019   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1020   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1021   PetscFunctionReturn(0);
1022 }
1023 EXTERN_C_END
1024 
1025 /*MC
1026    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1027 
1028    Options Database Keys:
1029 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions()
1030 
1031   Level: beginner
1032 
1033 .seealso: MatCreateMPIDense
1034 M*/
1035 
1036 EXTERN_C_BEGIN
1037 #undef __FUNCT__
1038 #define __FUNCT__ "MatCreate_MPIDense"
1039 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIDense(Mat mat)
1040 {
1041   Mat_MPIDense *a;
1042   PetscErrorCode ierr;
1043   PetscInt          i;
1044 
1045   PetscFunctionBegin;
1046   ierr              = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr);
1047   mat->data         = (void*)a;
1048   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1049   mat->factor       = 0;
1050   mat->mapping      = 0;
1051 
1052   a->factor       = 0;
1053   mat->insertmode = NOT_SET_VALUES;
1054   ierr = MPI_Comm_rank(mat->comm,&a->rank);CHKERRQ(ierr);
1055   ierr = MPI_Comm_size(mat->comm,&a->size);CHKERRQ(ierr);
1056 
1057   ierr = PetscSplitOwnership(mat->comm,&mat->m,&mat->M);CHKERRQ(ierr);
1058   ierr = PetscSplitOwnership(mat->comm,&mat->n,&mat->N);CHKERRQ(ierr);
1059   a->nvec = mat->n;
1060 
1061   /* the information in the maps duplicates the information computed below, eventually
1062      we should remove the duplicate information that is not contained in the maps */
1063   ierr = PetscMapCreateMPI(mat->comm,mat->m,mat->M,&mat->rmap);CHKERRQ(ierr);
1064   ierr = PetscMapCreateMPI(mat->comm,mat->n,mat->N,&mat->cmap);CHKERRQ(ierr);
1065 
1066   /* build local table of row and column ownerships */
1067   ierr       = PetscMalloc(2*(a->size+2)*sizeof(PetscInt),&a->rowners);CHKERRQ(ierr);
1068   a->cowners = a->rowners + a->size + 1;
1069   ierr = PetscLogObjectMemory(mat,2*(a->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));CHKERRQ(ierr);
1070   ierr = MPI_Allgather(&mat->m,1,MPIU_INT,a->rowners+1,1,MPIU_INT,mat->comm);CHKERRQ(ierr);
1071   a->rowners[0] = 0;
1072   for (i=2; i<=a->size; i++) {
1073     a->rowners[i] += a->rowners[i-1];
1074   }
1075   a->rstart = a->rowners[a->rank];
1076   a->rend   = a->rowners[a->rank+1];
1077   ierr      = MPI_Allgather(&mat->n,1,MPIU_INT,a->cowners+1,1,MPIU_INT,mat->comm);CHKERRQ(ierr);
1078   a->cowners[0] = 0;
1079   for (i=2; i<=a->size; i++) {
1080     a->cowners[i] += a->cowners[i-1];
1081   }
1082 
1083   /* build cache for off array entries formed */
1084   a->donotstash = PETSC_FALSE;
1085   ierr = MatStashCreate_Private(mat->comm,1,&mat->stash);CHKERRQ(ierr);
1086 
1087   /* stuff used for matrix vector multiply */
1088   a->lvec        = 0;
1089   a->Mvctx       = 0;
1090   a->roworiented = PETSC_TRUE;
1091 
1092   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
1093                                      "MatGetDiagonalBlock_MPIDense",
1094                                      MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr);
1095   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C",
1096                                      "MatMPIDenseSetPreallocation_MPIDense",
1097                                      MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1098   PetscFunctionReturn(0);
1099 }
1100 EXTERN_C_END
1101 
1102 /*MC
1103    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1104 
1105    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1106    and MATMPIDENSE otherwise.
1107 
1108    Options Database Keys:
1109 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1110 
1111   Level: beginner
1112 
1113 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1114 M*/
1115 
1116 EXTERN_C_BEGIN
1117 #undef __FUNCT__
1118 #define __FUNCT__ "MatCreate_Dense"
1119 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_Dense(Mat A)
1120 {
1121   PetscErrorCode ierr;
1122   PetscMPIInt    size;
1123 
1124   PetscFunctionBegin;
1125   ierr = PetscObjectChangeTypeName((PetscObject)A,MATDENSE);CHKERRQ(ierr);
1126   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1127   if (size == 1) {
1128     ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr);
1129   } else {
1130     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
1131   }
1132   PetscFunctionReturn(0);
1133 }
1134 EXTERN_C_END
1135 
1136 #undef __FUNCT__
1137 #define __FUNCT__ "MatMPIDenseSetPreallocation"
1138 /*@C
1139    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1140 
1141    Not collective
1142 
1143    Input Parameters:
1144 .  A - the matrix
1145 -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
1146    to control all matrix memory allocation.
1147 
1148    Notes:
1149    The dense format is fully compatible with standard Fortran 77
1150    storage by columns.
1151 
1152    The data input variable is intended primarily for Fortran programmers
1153    who wish to allocate their own matrix memory space.  Most users should
1154    set data=PETSC_NULL.
1155 
1156    Level: intermediate
1157 
1158 .keywords: matrix,dense, parallel
1159 
1160 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1161 @*/
1162 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data)
1163 {
1164   PetscErrorCode ierr,(*f)(Mat,PetscScalar *);
1165 
1166   PetscFunctionBegin;
1167   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1168   if (f) {
1169     ierr = (*f)(mat,data);CHKERRQ(ierr);
1170   }
1171   PetscFunctionReturn(0);
1172 }
1173 
1174 #undef __FUNCT__
1175 #define __FUNCT__ "MatCreateMPIDense"
1176 /*@C
1177    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
1178 
1179    Collective on MPI_Comm
1180 
1181    Input Parameters:
1182 +  comm - MPI communicator
1183 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1184 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1185 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1186 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1187 -  data - optional location of matrix data.  Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1188    to control all matrix memory allocation.
1189 
1190    Output Parameter:
1191 .  A - the matrix
1192 
1193    Notes:
1194    The dense format is fully compatible with standard Fortran 77
1195    storage by columns.
1196 
1197    The data input variable is intended primarily for Fortran programmers
1198    who wish to allocate their own matrix memory space.  Most users should
1199    set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users).
1200 
1201    The user MUST specify either the local or global matrix dimensions
1202    (possibly both).
1203 
1204    Level: intermediate
1205 
1206 .keywords: matrix,dense, parallel
1207 
1208 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1209 @*/
1210 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1211 {
1212   PetscErrorCode ierr;
1213   PetscMPIInt    size;
1214 
1215   PetscFunctionBegin;
1216   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1217   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1218   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1219   if (size > 1) {
1220     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1221     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1222   } else {
1223     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1224     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1225   }
1226   PetscFunctionReturn(0);
1227 }
1228 
1229 #undef __FUNCT__
1230 #define __FUNCT__ "MatDuplicate_MPIDense"
1231 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1232 {
1233   Mat          mat;
1234   Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data;
1235   PetscErrorCode ierr;
1236 
1237   PetscFunctionBegin;
1238   *newmat       = 0;
1239   ierr = MatCreate(A->comm,&mat);CHKERRQ(ierr);
1240   ierr = MatSetSizes(mat,A->m,A->n,A->M,A->N);CHKERRQ(ierr);
1241   ierr = MatSetType(mat,A->type_name);CHKERRQ(ierr);
1242   ierr              = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr);
1243   mat->data         = (void*)a;
1244   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1245   mat->factor       = A->factor;
1246   mat->assembled    = PETSC_TRUE;
1247   mat->preallocated = PETSC_TRUE;
1248 
1249   a->rstart       = oldmat->rstart;
1250   a->rend         = oldmat->rend;
1251   a->size         = oldmat->size;
1252   a->rank         = oldmat->rank;
1253   mat->insertmode = NOT_SET_VALUES;
1254   a->nvec         = oldmat->nvec;
1255   a->donotstash   = oldmat->donotstash;
1256   ierr            = PetscMalloc((a->size+1)*sizeof(PetscInt),&a->rowners);CHKERRQ(ierr);
1257   ierr = PetscLogObjectMemory(mat,(a->size+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));CHKERRQ(ierr);
1258   ierr = PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
1259   ierr = MatStashCreate_Private(A->comm,1,&mat->stash);CHKERRQ(ierr);
1260 
1261   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1262   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1263   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1264   *newmat = mat;
1265   PetscFunctionReturn(0);
1266 }
1267 
1268 #include "petscsys.h"
1269 
1270 #undef __FUNCT__
1271 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile"
1272 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N, MatType type,Mat *newmat)
1273 {
1274   PetscErrorCode ierr;
1275   PetscMPIInt    rank,size;
1276   PetscInt            *rowners,i,m,nz,j;
1277   PetscScalar    *array,*vals,*vals_ptr;
1278   MPI_Status     status;
1279 
1280   PetscFunctionBegin;
1281   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1282   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1283 
1284   /* determine ownership of all rows */
1285   m          = M/size + ((M % size) > rank);
1286   ierr       = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1287   ierr       = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
1288   rowners[0] = 0;
1289   for (i=2; i<=size; i++) {
1290     rowners[i] += rowners[i-1];
1291   }
1292 
1293   ierr = MatCreate(comm,newmat);CHKERRQ(ierr);
1294   ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1295   ierr = MatSetType(*newmat,type);CHKERRQ(ierr);
1296   ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr);
1297   ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr);
1298 
1299   if (!rank) {
1300     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1301 
1302     /* read in my part of the matrix numerical values  */
1303     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1304 
1305     /* insert into matrix-by row (this is why cannot directly read into array */
1306     vals_ptr = vals;
1307     for (i=0; i<m; i++) {
1308       for (j=0; j<N; j++) {
1309         array[i + j*m] = *vals_ptr++;
1310       }
1311     }
1312 
1313     /* read in other processors and ship out */
1314     for (i=1; i<size; i++) {
1315       nz   = (rowners[i+1] - rowners[i])*N;
1316       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1317       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr);
1318     }
1319   } else {
1320     /* receive numeric values */
1321     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1322 
1323     /* receive message of values*/
1324     ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr);
1325 
1326     /* insert into matrix-by row (this is why cannot directly read into array */
1327     vals_ptr = vals;
1328     for (i=0; i<m; i++) {
1329       for (j=0; j<N; j++) {
1330         array[i + j*m] = *vals_ptr++;
1331       }
1332     }
1333   }
1334   ierr = PetscFree(rowners);CHKERRQ(ierr);
1335   ierr = PetscFree(vals);CHKERRQ(ierr);
1336   ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1337   ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1338   PetscFunctionReturn(0);
1339 }
1340 
1341 #undef __FUNCT__
1342 #define __FUNCT__ "MatLoad_MPIDense"
1343 PetscErrorCode MatLoad_MPIDense(PetscViewer viewer, MatType type,Mat *newmat)
1344 {
1345   Mat            A;
1346   PetscScalar    *vals,*svals;
1347   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1348   MPI_Status     status;
1349   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz;
1350   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1351   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1352   PetscInt       i,nz,j,rstart,rend;
1353   int            fd;
1354   PetscErrorCode ierr;
1355 
1356   PetscFunctionBegin;
1357   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1358   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1359   if (!rank) {
1360     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1361     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1362     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1363   }
1364 
1365   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1366   M = header[1]; N = header[2]; nz = header[3];
1367 
1368   /*
1369        Handle case where matrix is stored on disk as a dense matrix
1370   */
1371   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1372     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,type,newmat);CHKERRQ(ierr);
1373     PetscFunctionReturn(0);
1374   }
1375 
1376   /* determine ownership of all rows */
1377   m          = M/size + ((M % size) > rank);
1378   ierr       = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1379   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1380   rowners[0] = 0;
1381   for (i=2; i<=size; i++) {
1382     rowners[i] += rowners[i-1];
1383   }
1384   rstart = rowners[rank];
1385   rend   = rowners[rank+1];
1386 
1387   /* distribute row lengths to all processors */
1388   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&ourlens);CHKERRQ(ierr);
1389   offlens = ourlens + (rend-rstart);
1390   if (!rank) {
1391     ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
1392     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1393     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
1394     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1395     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1396     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1397   } else {
1398     ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1399   }
1400 
1401   if (!rank) {
1402     /* calculate the number of nonzeros on each processor */
1403     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
1404     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1405     for (i=0; i<size; i++) {
1406       for (j=rowners[i]; j< rowners[i+1]; j++) {
1407         procsnz[i] += rowlengths[j];
1408       }
1409     }
1410     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1411 
1412     /* determine max buffer needed and allocate it */
1413     maxnz = 0;
1414     for (i=0; i<size; i++) {
1415       maxnz = PetscMax(maxnz,procsnz[i]);
1416     }
1417     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1418 
1419     /* read in my part of the matrix column indices  */
1420     nz = procsnz[0];
1421     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1422     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1423 
1424     /* read in every one elses and ship off */
1425     for (i=1; i<size; i++) {
1426       nz   = procsnz[i];
1427       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1428       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1429     }
1430     ierr = PetscFree(cols);CHKERRQ(ierr);
1431   } else {
1432     /* determine buffer space needed for message */
1433     nz = 0;
1434     for (i=0; i<m; i++) {
1435       nz += ourlens[i];
1436     }
1437     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1438 
1439     /* receive message of column indices*/
1440     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1441     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1442     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1443   }
1444 
1445   /* loop over local rows, determining number of off diagonal entries */
1446   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
1447   jj = 0;
1448   for (i=0; i<m; i++) {
1449     for (j=0; j<ourlens[i]; j++) {
1450       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1451       jj++;
1452     }
1453   }
1454 
1455   /* create our matrix */
1456   for (i=0; i<m; i++) {
1457     ourlens[i] -= offlens[i];
1458   }
1459   ierr = MatCreate(comm,newmat);CHKERRQ(ierr);
1460   ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1461   ierr = MatSetType(*newmat,type);CHKERRQ(ierr);
1462   ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr);
1463   A = *newmat;
1464   for (i=0; i<m; i++) {
1465     ourlens[i] += offlens[i];
1466   }
1467 
1468   if (!rank) {
1469     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1470 
1471     /* read in my part of the matrix numerical values  */
1472     nz = procsnz[0];
1473     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1474 
1475     /* insert into matrix */
1476     jj      = rstart;
1477     smycols = mycols;
1478     svals   = vals;
1479     for (i=0; i<m; i++) {
1480       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1481       smycols += ourlens[i];
1482       svals   += ourlens[i];
1483       jj++;
1484     }
1485 
1486     /* read in other processors and ship out */
1487     for (i=1; i<size; i++) {
1488       nz   = procsnz[i];
1489       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1490       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1491     }
1492     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1493   } else {
1494     /* receive numeric values */
1495     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1496 
1497     /* receive message of values*/
1498     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1499     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1500     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1501 
1502     /* insert into matrix */
1503     jj      = rstart;
1504     smycols = mycols;
1505     svals   = vals;
1506     for (i=0; i<m; i++) {
1507       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1508       smycols += ourlens[i];
1509       svals   += ourlens[i];
1510       jj++;
1511     }
1512   }
1513   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1514   ierr = PetscFree(vals);CHKERRQ(ierr);
1515   ierr = PetscFree(mycols);CHKERRQ(ierr);
1516   ierr = PetscFree(rowners);CHKERRQ(ierr);
1517 
1518   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1519   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1520   PetscFunctionReturn(0);
1521 }
1522 
1523 
1524 
1525 
1526 
1527