xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 8bc8193efbc389280f83b3d41dffa9e2d23e2ace) !
1 /*
2    Basic functions for basic parallel dense matrices.
3 */
4 
5 #include "src/mat/impls/dense/mpi/mpidense.h"
6 
7 #undef __FUNCT__
8 #define __FUNCT__ "MatGetRow_MPIDense"
9 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
10 {
11   Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
12   PetscErrorCode ierr;
13   PetscInt          lrow,rstart = mat->rstart,rend = mat->rend;
14 
15   PetscFunctionBegin;
16   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows")
17   lrow = row - rstart;
18   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr);
19   PetscFunctionReturn(0);
20 }
21 
22 #undef __FUNCT__
23 #define __FUNCT__ "MatRestoreRow_MPIDense"
24 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
25 {
26   PetscErrorCode ierr;
27 
28   PetscFunctionBegin;
29   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
30   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
31   PetscFunctionReturn(0);
32 }
33 
34 EXTERN_C_BEGIN
35 #undef __FUNCT__
36 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense"
37 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B)
38 {
39   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
40   PetscErrorCode ierr;
41   PetscInt          m = A->m,rstart = mdn->rstart;
42   PetscScalar  *array;
43   MPI_Comm     comm;
44 
45   PetscFunctionBegin;
46   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported.");
47 
48   /* The reuse aspect is not implemented efficiently */
49   if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);}
50 
51   ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
52   ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr);
53   ierr = MatCreate(comm,m,m,m,m,B);CHKERRQ(ierr);
54   ierr = MatSetType(*B,mdn->A->type_name);CHKERRQ(ierr);
55   ierr = MatSeqDenseSetPreallocation(*B,array+m*rstart);CHKERRQ(ierr);
56   ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr);
57   ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
58   ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
59 
60   *iscopy = PETSC_TRUE;
61   PetscFunctionReturn(0);
62 }
63 EXTERN_C_END
64 
65 #undef __FUNCT__
66 #define __FUNCT__ "MatSetValues_MPIDense"
67 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
68 {
69   Mat_MPIDense *A = (Mat_MPIDense*)mat->data;
70   PetscErrorCode ierr;
71   PetscInt          i,j,rstart = A->rstart,rend = A->rend,row;
72   PetscTruth   roworiented = A->roworiented;
73 
74   PetscFunctionBegin;
75   for (i=0; i<m; i++) {
76     if (idxm[i] < 0) continue;
77     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
78     if (idxm[i] >= rstart && idxm[i] < rend) {
79       row = idxm[i] - rstart;
80       if (roworiented) {
81         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
82       } else {
83         for (j=0; j<n; j++) {
84           if (idxn[j] < 0) continue;
85           if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
86           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
87         }
88       }
89     } else {
90       if (!A->donotstash) {
91         if (roworiented) {
92           ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr);
93         } else {
94           ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr);
95         }
96       }
97     }
98   }
99   PetscFunctionReturn(0);
100 }
101 
102 #undef __FUNCT__
103 #define __FUNCT__ "MatGetValues_MPIDense"
104 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
105 {
106   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
107   PetscErrorCode ierr;
108   PetscInt          i,j,rstart = mdn->rstart,rend = mdn->rend,row;
109 
110   PetscFunctionBegin;
111   for (i=0; i<m; i++) {
112     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
113     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
114     if (idxm[i] >= rstart && idxm[i] < rend) {
115       row = idxm[i] - rstart;
116       for (j=0; j<n; j++) {
117         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
118         if (idxn[j] >= mat->N) {
119           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
120         }
121         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
122       }
123     } else {
124       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
125     }
126   }
127   PetscFunctionReturn(0);
128 }
129 
130 #undef __FUNCT__
131 #define __FUNCT__ "MatGetArray_MPIDense"
132 PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[])
133 {
134   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
135   PetscErrorCode ierr;
136 
137   PetscFunctionBegin;
138   ierr = MatGetArray(a->A,array);CHKERRQ(ierr);
139   PetscFunctionReturn(0);
140 }
141 
142 #undef __FUNCT__
143 #define __FUNCT__ "MatGetSubMatrix_MPIDense"
144 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B)
145 {
146   Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd;
147   Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data;
148   PetscErrorCode ierr;
149   PetscInt          i,j,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols;
150   PetscScalar  *av,*bv,*v = lmat->v;
151   Mat          newmat;
152 
153   PetscFunctionBegin;
154   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
155   ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr);
156   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
157   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
158 
159   /* No parallel redistribution currently supported! Should really check each index set
160      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
161      original matrix! */
162 
163   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
164   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
165 
166   /* Check submatrix call */
167   if (scall == MAT_REUSE_MATRIX) {
168     /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
169     /* Really need to test rows and column sizes! */
170     newmat = *B;
171   } else {
172     /* Create and fill new matrix */
173     ierr = MatCreate(A->comm,nrows,cs,PETSC_DECIDE,ncols,&newmat);CHKERRQ(ierr);
174     ierr = MatSetType(newmat,A->type_name);CHKERRQ(ierr);
175     ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr);
176   }
177 
178   /* Now extract the data pointers and do the copy, column at a time */
179   newmatd = (Mat_MPIDense*)newmat->data;
180   bv      = ((Mat_SeqDense *)newmatd->A->data)->v;
181 
182   for (i=0; i<ncols; i++) {
183     av = v + nlrows*icol[i];
184     for (j=0; j<nrows; j++) {
185       *bv++ = av[irow[j] - rstart];
186     }
187   }
188 
189   /* Assemble the matrices so that the correct flags are set */
190   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
191   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
192 
193   /* Free work space */
194   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
195   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
196   *B = newmat;
197   PetscFunctionReturn(0);
198 }
199 
200 #undef __FUNCT__
201 #define __FUNCT__ "MatRestoreArray_MPIDense"
202 PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[])
203 {
204   PetscFunctionBegin;
205   PetscFunctionReturn(0);
206 }
207 
208 #undef __FUNCT__
209 #define __FUNCT__ "MatAssemblyBegin_MPIDense"
210 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
211 {
212   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
213   MPI_Comm     comm = mat->comm;
214   PetscErrorCode ierr;
215   PetscInt          nstash,reallocs;
216   InsertMode   addv;
217 
218   PetscFunctionBegin;
219   /* make sure all processors are either in INSERTMODE or ADDMODE */
220   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
221   if (addv == (ADD_VALUES|INSERT_VALUES)) {
222     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
223   }
224   mat->insertmode = addv; /* in case this processor had no cache */
225 
226   ierr = MatStashScatterBegin_Private(&mat->stash,mdn->rowners);CHKERRQ(ierr);
227   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
228   PetscLogInfo(mdn->A,"MatAssemblyBegin_MPIDense:Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
229   PetscFunctionReturn(0);
230 }
231 
232 #undef __FUNCT__
233 #define __FUNCT__ "MatAssemblyEnd_MPIDense"
234 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
235 {
236   Mat_MPIDense    *mdn=(Mat_MPIDense*)mat->data;
237   PetscErrorCode  ierr;
238   PetscInt        i,*row,*col,flg,j,rstart,ncols;
239   PetscMPIInt     n;
240   PetscScalar     *val;
241   InsertMode      addv=mat->insertmode;
242 
243   PetscFunctionBegin;
244   /*  wait on receives */
245   while (1) {
246     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
247     if (!flg) break;
248 
249     for (i=0; i<n;) {
250       /* Now identify the consecutive vals belonging to the same row */
251       for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
252       if (j < n) ncols = j-i;
253       else       ncols = n-i;
254       /* Now assemble all these values with a single function call */
255       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
256       i = j;
257     }
258   }
259   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
260 
261   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
262   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
263 
264   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
265     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
266   }
267   PetscFunctionReturn(0);
268 }
269 
270 #undef __FUNCT__
271 #define __FUNCT__ "MatZeroEntries_MPIDense"
272 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
273 {
274   PetscErrorCode ierr;
275   Mat_MPIDense *l = (Mat_MPIDense*)A->data;
276 
277   PetscFunctionBegin;
278   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
279   PetscFunctionReturn(0);
280 }
281 
282 /* the code does not do the diagonal entries correctly unless the
283    matrix is square and the column and row owerships are identical.
284    This is a BUG. The only way to fix it seems to be to access
285    mdn->A and mdn->B directly and not through the MatZeroRows()
286    routine.
287 */
288 #undef __FUNCT__
289 #define __FUNCT__ "MatZeroRows_MPIDense"
290 PetscErrorCode MatZeroRows_MPIDense(Mat A,IS is,const PetscScalar *diag)
291 {
292   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
293   PetscErrorCode ierr;
294   PetscInt       i,N,*rows,*owners = l->rowners;
295   PetscInt       *nprocs,j,idx,nsends;
296   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
297   PetscInt       *rvalues,tag = A->tag,count,base,slen,*source;
298   PetscInt       *lens,*lrows,*values;
299   PetscMPIInt    n,imdex,rank = l->rank,size = l->size;
300   MPI_Comm       comm = A->comm;
301   MPI_Request    *send_waits,*recv_waits;
302   MPI_Status     recv_status,*send_status;
303   IS             istmp;
304   PetscTruth     found;
305 
306   PetscFunctionBegin;
307   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
308   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
309 
310   /*  first count number of contributors to each processor */
311   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
312   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
313   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
314   for (i=0; i<N; i++) {
315     idx = rows[i];
316     found = PETSC_FALSE;
317     for (j=0; j<size; j++) {
318       if (idx >= owners[j] && idx < owners[j+1]) {
319         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
320       }
321     }
322     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
323   }
324   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
325 
326   /* inform other processors of number of messages and max length*/
327   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
328 
329   /* post receives:   */
330   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
331   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
332   for (i=0; i<nrecvs; i++) {
333     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
334   }
335 
336   /* do sends:
337       1) starts[i] gives the starting index in svalues for stuff going to
338          the ith processor
339   */
340   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
341   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
342   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
343   starts[0]  = 0;
344   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
345   for (i=0; i<N; i++) {
346     svalues[starts[owner[i]]++] = rows[i];
347   }
348   ISRestoreIndices(is,&rows);
349 
350   starts[0] = 0;
351   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
352   count = 0;
353   for (i=0; i<size; i++) {
354     if (nprocs[2*i+1]) {
355       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
356     }
357   }
358   ierr = PetscFree(starts);CHKERRQ(ierr);
359 
360   base = owners[rank];
361 
362   /*  wait on receives */
363   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
364   source = lens + nrecvs;
365   count  = nrecvs; slen = 0;
366   while (count) {
367     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
368     /* unpack receives into our local space */
369     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
370     source[imdex]  = recv_status.MPI_SOURCE;
371     lens[imdex]    = n;
372     slen += n;
373     count--;
374   }
375   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
376 
377   /* move the data into the send scatter */
378   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
379   count = 0;
380   for (i=0; i<nrecvs; i++) {
381     values = rvalues + i*nmax;
382     for (j=0; j<lens[i]; j++) {
383       lrows[count++] = values[j] - base;
384     }
385   }
386   ierr = PetscFree(rvalues);CHKERRQ(ierr);
387   ierr = PetscFree(lens);CHKERRQ(ierr);
388   ierr = PetscFree(owner);CHKERRQ(ierr);
389   ierr = PetscFree(nprocs);CHKERRQ(ierr);
390 
391   /* actually zap the local rows */
392   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
393   PetscLogObjectParent(A,istmp);
394   ierr = PetscFree(lrows);CHKERRQ(ierr);
395   ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
396   ierr = ISDestroy(istmp);CHKERRQ(ierr);
397 
398   /* wait on sends */
399   if (nsends) {
400     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
401     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
402     ierr = PetscFree(send_status);CHKERRQ(ierr);
403   }
404   ierr = PetscFree(send_waits);CHKERRQ(ierr);
405   ierr = PetscFree(svalues);CHKERRQ(ierr);
406 
407   PetscFunctionReturn(0);
408 }
409 
410 #undef __FUNCT__
411 #define __FUNCT__ "MatMult_MPIDense"
412 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
413 {
414   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
415   PetscErrorCode ierr;
416 
417   PetscFunctionBegin;
418   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
419   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
420   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
421   PetscFunctionReturn(0);
422 }
423 
424 #undef __FUNCT__
425 #define __FUNCT__ "MatMultAdd_MPIDense"
426 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
427 {
428   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
429   PetscErrorCode ierr;
430 
431   PetscFunctionBegin;
432   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
433   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
434   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
435   PetscFunctionReturn(0);
436 }
437 
438 #undef __FUNCT__
439 #define __FUNCT__ "MatMultTranspose_MPIDense"
440 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
441 {
442   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
443   PetscErrorCode ierr;
444   PetscScalar  zero = 0.0;
445 
446   PetscFunctionBegin;
447   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
448   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
449   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
450   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
451   PetscFunctionReturn(0);
452 }
453 
454 #undef __FUNCT__
455 #define __FUNCT__ "MatMultTransposeAdd_MPIDense"
456 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
457 {
458   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
459   PetscErrorCode ierr;
460 
461   PetscFunctionBegin;
462   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
463   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
464   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
465   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
466   PetscFunctionReturn(0);
467 }
468 
469 #undef __FUNCT__
470 #define __FUNCT__ "MatGetDiagonal_MPIDense"
471 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
472 {
473   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
474   Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data;
475   PetscErrorCode ierr;
476   PetscInt          len,i,n,m = A->m,radd;
477   PetscScalar  *x,zero = 0.0;
478 
479   PetscFunctionBegin;
480   ierr = VecSet(&zero,v);CHKERRQ(ierr);
481   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
482   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
483   if (n != A->M) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
484   len  = PetscMin(a->A->m,a->A->n);
485   radd = a->rstart*m;
486   for (i=0; i<len; i++) {
487     x[i] = aloc->v[radd + i*m + i];
488   }
489   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
490   PetscFunctionReturn(0);
491 }
492 
493 #undef __FUNCT__
494 #define __FUNCT__ "MatDestroy_MPIDense"
495 PetscErrorCode MatDestroy_MPIDense(Mat mat)
496 {
497   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
498   PetscErrorCode ierr;
499 
500   PetscFunctionBegin;
501 
502 #if defined(PETSC_USE_LOG)
503   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->M,mat->N);
504 #endif
505   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
506   ierr = PetscFree(mdn->rowners);CHKERRQ(ierr);
507   ierr = MatDestroy(mdn->A);CHKERRQ(ierr);
508   if (mdn->lvec)   VecDestroy(mdn->lvec);
509   if (mdn->Mvctx)  VecScatterDestroy(mdn->Mvctx);
510   if (mdn->factor) {
511     if (mdn->factor->temp)   {ierr = PetscFree(mdn->factor->temp);CHKERRQ(ierr);}
512     if (mdn->factor->tag)    {ierr = PetscFree(mdn->factor->tag);CHKERRQ(ierr);}
513     if (mdn->factor->pivots) {ierr = PetscFree(mdn->factor->pivots);CHKERRQ(ierr);}
514     ierr = PetscFree(mdn->factor);CHKERRQ(ierr);
515   }
516   ierr = PetscFree(mdn);CHKERRQ(ierr);
517   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
518   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
519   PetscFunctionReturn(0);
520 }
521 
522 #undef __FUNCT__
523 #define __FUNCT__ "MatView_MPIDense_Binary"
524 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
525 {
526   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
527   PetscErrorCode ierr;
528 
529   PetscFunctionBegin;
530   if (mdn->size == 1) {
531     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
532   }
533   else SETERRQ(PETSC_ERR_SUP,"Only uniprocessor output supported");
534   PetscFunctionReturn(0);
535 }
536 
537 #undef __FUNCT__
538 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket"
539 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
540 {
541   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
542   PetscErrorCode    ierr;
543   PetscMPIInt       size = mdn->size,rank = mdn->rank;
544   PetscViewerType   vtype;
545   PetscTruth        iascii,isdraw;
546   PetscViewer       sviewer;
547   PetscViewerFormat format;
548 
549   PetscFunctionBegin;
550   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
551   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
552   if (iascii) {
553     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
554     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
555     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
556       MatInfo info;
557       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
558       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->m,
559                    (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
560       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
561       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
562       PetscFunctionReturn(0);
563     } else if (format == PETSC_VIEWER_ASCII_INFO) {
564       PetscFunctionReturn(0);
565     }
566   } else if (isdraw) {
567     PetscDraw       draw;
568     PetscTruth isnull;
569 
570     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
571     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
572     if (isnull) PetscFunctionReturn(0);
573   }
574 
575   if (size == 1) {
576     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
577   } else {
578     /* assemble the entire matrix onto first processor. */
579     Mat         A;
580     PetscInt    M = mat->M,N = mat->N,m,row,i,nz;
581     PetscInt    *cols;
582     PetscScalar *vals;
583 
584     if (!rank) {
585       ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr);
586     } else {
587       ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr);
588     }
589     /* Since this is a temporary matrix, MATMPIDENSE instead of A->type_name here is probably acceptable. */
590     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
591     ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL);
592     PetscLogObjectParent(mat,A);
593 
594     /* Copy the matrix ... This isn't the most efficient means,
595        but it's quick for now */
596     A->insertmode = INSERT_VALUES;
597     row = mdn->rstart; m = mdn->A->m;
598     for (i=0; i<m; i++) {
599       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
600       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
601       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
602       row++;
603     }
604 
605     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
606     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
607     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
608     if (!rank) {
609       ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
610     }
611     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
612     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
613     ierr = MatDestroy(A);CHKERRQ(ierr);
614   }
615   PetscFunctionReturn(0);
616 }
617 
618 #undef __FUNCT__
619 #define __FUNCT__ "MatView_MPIDense"
620 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
621 {
622   PetscErrorCode ierr;
623   PetscTruth iascii,isbinary,isdraw,issocket;
624 
625   PetscFunctionBegin;
626 
627   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
628   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
629   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
630   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
631 
632   if (iascii || issocket || isdraw) {
633     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
634   } else if (isbinary) {
635     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
636   } else {
637     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name);
638   }
639   PetscFunctionReturn(0);
640 }
641 
642 #undef __FUNCT__
643 #define __FUNCT__ "MatGetInfo_MPIDense"
644 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
645 {
646   Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
647   Mat          mdn = mat->A;
648   PetscErrorCode ierr;
649   PetscReal    isend[5],irecv[5];
650 
651   PetscFunctionBegin;
652   info->rows_global    = (double)A->M;
653   info->columns_global = (double)A->N;
654   info->rows_local     = (double)A->m;
655   info->columns_local  = (double)A->N;
656   info->block_size     = 1.0;
657   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
658   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
659   isend[3] = info->memory;  isend[4] = info->mallocs;
660   if (flag == MAT_LOCAL) {
661     info->nz_used      = isend[0];
662     info->nz_allocated = isend[1];
663     info->nz_unneeded  = isend[2];
664     info->memory       = isend[3];
665     info->mallocs      = isend[4];
666   } else if (flag == MAT_GLOBAL_MAX) {
667     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr);
668     info->nz_used      = irecv[0];
669     info->nz_allocated = irecv[1];
670     info->nz_unneeded  = irecv[2];
671     info->memory       = irecv[3];
672     info->mallocs      = irecv[4];
673   } else if (flag == MAT_GLOBAL_SUM) {
674     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr);
675     info->nz_used      = irecv[0];
676     info->nz_allocated = irecv[1];
677     info->nz_unneeded  = irecv[2];
678     info->memory       = irecv[3];
679     info->mallocs      = irecv[4];
680   }
681   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
682   info->fill_ratio_needed = 0;
683   info->factor_mallocs    = 0;
684   PetscFunctionReturn(0);
685 }
686 
687 #undef __FUNCT__
688 #define __FUNCT__ "MatSetOption_MPIDense"
689 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op)
690 {
691   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
692   PetscErrorCode ierr;
693 
694   PetscFunctionBegin;
695   switch (op) {
696   case MAT_NO_NEW_NONZERO_LOCATIONS:
697   case MAT_YES_NEW_NONZERO_LOCATIONS:
698   case MAT_NEW_NONZERO_LOCATION_ERR:
699   case MAT_NEW_NONZERO_ALLOCATION_ERR:
700   case MAT_COLUMNS_SORTED:
701   case MAT_COLUMNS_UNSORTED:
702     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
703     break;
704   case MAT_ROW_ORIENTED:
705     a->roworiented = PETSC_TRUE;
706     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
707     break;
708   case MAT_ROWS_SORTED:
709   case MAT_ROWS_UNSORTED:
710   case MAT_YES_NEW_DIAGONALS:
711   case MAT_USE_HASH_TABLE:
712     PetscLogInfo(A,"MatSetOption_MPIDense:Option ignored\n");
713     break;
714   case MAT_COLUMN_ORIENTED:
715     a->roworiented = PETSC_FALSE;
716     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
717     break;
718   case MAT_IGNORE_OFF_PROC_ENTRIES:
719     a->donotstash = PETSC_TRUE;
720     break;
721   case MAT_NO_NEW_DIAGONALS:
722     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
723   case MAT_SYMMETRIC:
724   case MAT_STRUCTURALLY_SYMMETRIC:
725   case MAT_NOT_SYMMETRIC:
726   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
727   case MAT_HERMITIAN:
728   case MAT_NOT_HERMITIAN:
729   case MAT_SYMMETRY_ETERNAL:
730   case MAT_NOT_SYMMETRY_ETERNAL:
731     break;
732   default:
733     SETERRQ(PETSC_ERR_SUP,"unknown option");
734   }
735   PetscFunctionReturn(0);
736 }
737 
738 
739 #undef __FUNCT__
740 #define __FUNCT__ "MatDiagonalScale_MPIDense"
741 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
742 {
743   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
744   Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
745   PetscScalar  *l,*r,x,*v;
746   PetscErrorCode ierr;
747   PetscInt          i,j,s2a,s3a,s2,s3,m=mdn->A->m,n=mdn->A->n;
748 
749   PetscFunctionBegin;
750   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
751   if (ll) {
752     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
753     if (s2a != s2) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size");
754     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
755     for (i=0; i<m; i++) {
756       x = l[i];
757       v = mat->v + i;
758       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
759     }
760     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
761     PetscLogFlops(n*m);
762   }
763   if (rr) {
764     ierr = VecGetSize(rr,&s3a);CHKERRQ(ierr);
765     if (s3a != s3) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size");
766     ierr = VecScatterBegin(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
767     ierr = VecScatterEnd(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
768     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
769     for (i=0; i<n; i++) {
770       x = r[i];
771       v = mat->v + i*m;
772       for (j=0; j<m; j++) { (*v++) *= x;}
773     }
774     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
775     PetscLogFlops(n*m);
776   }
777   PetscFunctionReturn(0);
778 }
779 
780 #undef __FUNCT__
781 #define __FUNCT__ "MatNorm_MPIDense"
782 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
783 {
784   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
785   Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
786   PetscErrorCode ierr;
787   PetscInt          i,j;
788   PetscReal    sum = 0.0;
789   PetscScalar  *v = mat->v;
790 
791   PetscFunctionBegin;
792   if (mdn->size == 1) {
793     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
794   } else {
795     if (type == NORM_FROBENIUS) {
796       for (i=0; i<mdn->A->n*mdn->A->m; i++) {
797 #if defined(PETSC_USE_COMPLEX)
798         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
799 #else
800         sum += (*v)*(*v); v++;
801 #endif
802       }
803       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr);
804       *nrm = sqrt(*nrm);
805       PetscLogFlops(2*mdn->A->n*mdn->A->m);
806     } else if (type == NORM_1) {
807       PetscReal *tmp,*tmp2;
808       ierr = PetscMalloc(2*A->N*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
809       tmp2 = tmp + A->N;
810       ierr = PetscMemzero(tmp,2*A->N*sizeof(PetscReal));CHKERRQ(ierr);
811       *nrm = 0.0;
812       v = mat->v;
813       for (j=0; j<mdn->A->n; j++) {
814         for (i=0; i<mdn->A->m; i++) {
815           tmp[j] += PetscAbsScalar(*v);  v++;
816         }
817       }
818       ierr = MPI_Allreduce(tmp,tmp2,A->N,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr);
819       for (j=0; j<A->N; j++) {
820         if (tmp2[j] > *nrm) *nrm = tmp2[j];
821       }
822       ierr = PetscFree(tmp);CHKERRQ(ierr);
823       PetscLogFlops(A->n*A->m);
824     } else if (type == NORM_INFINITY) { /* max row norm */
825       PetscReal ntemp;
826       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
827       ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr);
828     } else {
829       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
830     }
831   }
832   PetscFunctionReturn(0);
833 }
834 
835 #undef __FUNCT__
836 #define __FUNCT__ "MatTranspose_MPIDense"
837 PetscErrorCode MatTranspose_MPIDense(Mat A,Mat *matout)
838 {
839   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
840   Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data;
841   Mat          B;
842   PetscInt          M = A->M,N = A->N,m,n,*rwork,rstart = a->rstart;
843   PetscErrorCode ierr;
844   PetscInt          j,i;
845   PetscScalar  *v;
846 
847   PetscFunctionBegin;
848   if (!matout && M != N) {
849     SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place");
850   }
851   ierr = MatCreate(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,&B);CHKERRQ(ierr);
852   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
853   ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr);
854 
855   m = a->A->m; n = a->A->n; v = Aloc->v;
856   ierr = PetscMalloc(n*sizeof(PetscInt),&rwork);CHKERRQ(ierr);
857   for (j=0; j<n; j++) {
858     for (i=0; i<m; i++) rwork[i] = rstart + i;
859     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
860     v   += m;
861   }
862   ierr = PetscFree(rwork);CHKERRQ(ierr);
863   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
864   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
865   if (matout) {
866     *matout = B;
867   } else {
868     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
869   }
870   PetscFunctionReturn(0);
871 }
872 
873 #include "petscblaslapack.h"
874 #undef __FUNCT__
875 #define __FUNCT__ "MatScale_MPIDense"
876 PetscErrorCode MatScale_MPIDense(const PetscScalar *alpha,Mat inA)
877 {
878   Mat_MPIDense *A = (Mat_MPIDense*)inA->data;
879   Mat_SeqDense *a = (Mat_SeqDense*)A->A->data;
880   PetscBLASInt one = 1,nz = (PetscBLASInt)inA->m*inA->N;
881 
882   PetscFunctionBegin;
883   BLscal_(&nz,(PetscScalar*)alpha,a->v,&one);
884   PetscLogFlops(nz);
885   PetscFunctionReturn(0);
886 }
887 
888 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
889 
890 #undef __FUNCT__
891 #define __FUNCT__ "MatSetUpPreallocation_MPIDense"
892 PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A)
893 {
894   PetscErrorCode ierr;
895 
896   PetscFunctionBegin;
897   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
898   PetscFunctionReturn(0);
899 }
900 
901 /* -------------------------------------------------------------------*/
902 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
903        MatGetRow_MPIDense,
904        MatRestoreRow_MPIDense,
905        MatMult_MPIDense,
906 /* 4*/ MatMultAdd_MPIDense,
907        MatMultTranspose_MPIDense,
908        MatMultTransposeAdd_MPIDense,
909        0,
910        0,
911        0,
912 /*10*/ 0,
913        0,
914        0,
915        0,
916        MatTranspose_MPIDense,
917 /*15*/ MatGetInfo_MPIDense,
918        0,
919        MatGetDiagonal_MPIDense,
920        MatDiagonalScale_MPIDense,
921        MatNorm_MPIDense,
922 /*20*/ MatAssemblyBegin_MPIDense,
923        MatAssemblyEnd_MPIDense,
924        0,
925        MatSetOption_MPIDense,
926        MatZeroEntries_MPIDense,
927 /*25*/ MatZeroRows_MPIDense,
928        0,
929        0,
930        0,
931        0,
932 /*30*/ MatSetUpPreallocation_MPIDense,
933        0,
934        0,
935        MatGetArray_MPIDense,
936        MatRestoreArray_MPIDense,
937 /*35*/ MatDuplicate_MPIDense,
938        0,
939        0,
940        0,
941        0,
942 /*40*/ 0,
943        MatGetSubMatrices_MPIDense,
944        0,
945        MatGetValues_MPIDense,
946        0,
947 /*45*/ 0,
948        MatScale_MPIDense,
949        0,
950        0,
951        0,
952 /*50*/ 0,
953        0,
954        0,
955        0,
956        0,
957 /*55*/ 0,
958        0,
959        0,
960        0,
961        0,
962 /*60*/ MatGetSubMatrix_MPIDense,
963        MatDestroy_MPIDense,
964        MatView_MPIDense,
965        MatGetPetscMaps_Petsc,
966        0,
967 /*65*/ 0,
968        0,
969        0,
970        0,
971        0,
972 /*70*/ 0,
973        0,
974        0,
975        0,
976        0,
977 /*75*/ 0,
978        0,
979        0,
980        0,
981        0,
982 /*80*/ 0,
983        0,
984        0,
985        0,
986 /*84*/ MatLoad_MPIDense,
987        0,
988        0,
989        0,
990        0,
991        0,
992 /*90*/ 0,
993        0,
994        0,
995        0,
996        0,
997 /*95*/ 0,
998        0,
999        0,
1000        0};
1001 
1002 EXTERN_C_BEGIN
1003 #undef __FUNCT__
1004 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense"
1005 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1006 {
1007   Mat_MPIDense *a;
1008   PetscErrorCode ierr;
1009 
1010   PetscFunctionBegin;
1011   mat->preallocated = PETSC_TRUE;
1012   /* Note:  For now, when data is specified above, this assumes the user correctly
1013    allocates the local dense storage space.  We should add error checking. */
1014 
1015   a    = (Mat_MPIDense*)mat->data;
1016   ierr = MatCreate(PETSC_COMM_SELF,mat->m,mat->N,mat->m,mat->N,&a->A);CHKERRQ(ierr);
1017   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
1018   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1019   PetscLogObjectParent(mat,a->A);
1020   PetscFunctionReturn(0);
1021 }
1022 EXTERN_C_END
1023 
1024 /*MC
1025    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1026 
1027    Options Database Keys:
1028 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions()
1029 
1030   Level: beginner
1031 
1032 .seealso: MatCreateMPIDense
1033 M*/
1034 
1035 EXTERN_C_BEGIN
1036 #undef __FUNCT__
1037 #define __FUNCT__ "MatCreate_MPIDense"
1038 PetscErrorCode MatCreate_MPIDense(Mat mat)
1039 {
1040   Mat_MPIDense *a;
1041   PetscErrorCode ierr;
1042   PetscInt          i;
1043 
1044   PetscFunctionBegin;
1045   ierr              = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr);
1046   mat->data         = (void*)a;
1047   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1048   mat->factor       = 0;
1049   mat->mapping      = 0;
1050 
1051   a->factor       = 0;
1052   mat->insertmode = NOT_SET_VALUES;
1053   ierr = MPI_Comm_rank(mat->comm,&a->rank);CHKERRQ(ierr);
1054   ierr = MPI_Comm_size(mat->comm,&a->size);CHKERRQ(ierr);
1055 
1056   ierr = PetscSplitOwnership(mat->comm,&mat->m,&mat->M);CHKERRQ(ierr);
1057   ierr = PetscSplitOwnership(mat->comm,&mat->n,&mat->N);CHKERRQ(ierr);
1058   a->nvec = mat->n;
1059 
1060   /* the information in the maps duplicates the information computed below, eventually
1061      we should remove the duplicate information that is not contained in the maps */
1062   ierr = PetscMapCreateMPI(mat->comm,mat->m,mat->M,&mat->rmap);CHKERRQ(ierr);
1063   ierr = PetscMapCreateMPI(mat->comm,mat->n,mat->N,&mat->cmap);CHKERRQ(ierr);
1064 
1065   /* build local table of row and column ownerships */
1066   ierr       = PetscMalloc(2*(a->size+2)*sizeof(PetscInt),&a->rowners);CHKERRQ(ierr);
1067   a->cowners = a->rowners + a->size + 1;
1068   PetscLogObjectMemory(mat,2*(a->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
1069   ierr = MPI_Allgather(&mat->m,1,MPIU_INT,a->rowners+1,1,MPIU_INT,mat->comm);CHKERRQ(ierr);
1070   a->rowners[0] = 0;
1071   for (i=2; i<=a->size; i++) {
1072     a->rowners[i] += a->rowners[i-1];
1073   }
1074   a->rstart = a->rowners[a->rank];
1075   a->rend   = a->rowners[a->rank+1];
1076   ierr      = MPI_Allgather(&mat->n,1,MPIU_INT,a->cowners+1,1,MPIU_INT,mat->comm);CHKERRQ(ierr);
1077   a->cowners[0] = 0;
1078   for (i=2; i<=a->size; i++) {
1079     a->cowners[i] += a->cowners[i-1];
1080   }
1081 
1082   /* build cache for off array entries formed */
1083   a->donotstash = PETSC_FALSE;
1084   ierr = MatStashCreate_Private(mat->comm,1,&mat->stash);CHKERRQ(ierr);
1085 
1086   /* stuff used for matrix vector multiply */
1087   a->lvec        = 0;
1088   a->Mvctx       = 0;
1089   a->roworiented = PETSC_TRUE;
1090 
1091   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
1092                                      "MatGetDiagonalBlock_MPIDense",
1093                                      MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr);
1094   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C",
1095                                      "MatMPIDenseSetPreallocation_MPIDense",
1096                                      MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1097   PetscFunctionReturn(0);
1098 }
1099 EXTERN_C_END
1100 
1101 /*MC
1102    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1103 
1104    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1105    and MATMPIDENSE otherwise.
1106 
1107    Options Database Keys:
1108 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1109 
1110   Level: beginner
1111 
1112 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1113 M*/
1114 
1115 EXTERN_C_BEGIN
1116 #undef __FUNCT__
1117 #define __FUNCT__ "MatCreate_Dense"
1118 PetscErrorCode MatCreate_Dense(Mat A)
1119 {
1120   PetscErrorCode ierr;
1121   PetscMPIInt    size;
1122 
1123   PetscFunctionBegin;
1124   ierr = PetscObjectChangeTypeName((PetscObject)A,MATDENSE);CHKERRQ(ierr);
1125   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1126   if (size == 1) {
1127     ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr);
1128   } else {
1129     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
1130   }
1131   PetscFunctionReturn(0);
1132 }
1133 EXTERN_C_END
1134 
1135 #undef __FUNCT__
1136 #define __FUNCT__ "MatMPIDenseSetPreallocation"
1137 /*@C
1138    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1139 
1140    Not collective
1141 
1142    Input Parameters:
1143 .  A - the matrix
1144 -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
1145    to control all matrix memory allocation.
1146 
1147    Notes:
1148    The dense format is fully compatible with standard Fortran 77
1149    storage by columns.
1150 
1151    The data input variable is intended primarily for Fortran programmers
1152    who wish to allocate their own matrix memory space.  Most users should
1153    set data=PETSC_NULL.
1154 
1155    Level: intermediate
1156 
1157 .keywords: matrix,dense, parallel
1158 
1159 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1160 @*/
1161 PetscErrorCode MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data)
1162 {
1163   PetscErrorCode ierr,(*f)(Mat,PetscScalar *);
1164 
1165   PetscFunctionBegin;
1166   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1167   if (f) {
1168     ierr = (*f)(mat,data);CHKERRQ(ierr);
1169   }
1170   PetscFunctionReturn(0);
1171 }
1172 
1173 #undef __FUNCT__
1174 #define __FUNCT__ "MatCreateMPIDense"
1175 /*@C
1176    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
1177 
1178    Collective on MPI_Comm
1179 
1180    Input Parameters:
1181 +  comm - MPI communicator
1182 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1183 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1184 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1185 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1186 -  data - optional location of matrix data.  Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1187    to control all matrix memory allocation.
1188 
1189    Output Parameter:
1190 .  A - the matrix
1191 
1192    Notes:
1193    The dense format is fully compatible with standard Fortran 77
1194    storage by columns.
1195 
1196    The data input variable is intended primarily for Fortran programmers
1197    who wish to allocate their own matrix memory space.  Most users should
1198    set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users).
1199 
1200    The user MUST specify either the local or global matrix dimensions
1201    (possibly both).
1202 
1203    Level: intermediate
1204 
1205 .keywords: matrix,dense, parallel
1206 
1207 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1208 @*/
1209 PetscErrorCode MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1210 {
1211   PetscErrorCode ierr;
1212   PetscMPIInt    size;
1213 
1214   PetscFunctionBegin;
1215   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
1216   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1217   if (size > 1) {
1218     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1219     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1220   } else {
1221     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1222     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1223   }
1224   PetscFunctionReturn(0);
1225 }
1226 
1227 #undef __FUNCT__
1228 #define __FUNCT__ "MatDuplicate_MPIDense"
1229 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1230 {
1231   Mat          mat;
1232   Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data;
1233   PetscErrorCode ierr;
1234 
1235   PetscFunctionBegin;
1236   *newmat       = 0;
1237   ierr = MatCreate(A->comm,A->m,A->n,A->M,A->N,&mat);CHKERRQ(ierr);
1238   ierr = MatSetType(mat,A->type_name);CHKERRQ(ierr);
1239   ierr              = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr);
1240   mat->data         = (void*)a;
1241   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1242   mat->factor       = A->factor;
1243   mat->assembled    = PETSC_TRUE;
1244   mat->preallocated = PETSC_TRUE;
1245 
1246   a->rstart       = oldmat->rstart;
1247   a->rend         = oldmat->rend;
1248   a->size         = oldmat->size;
1249   a->rank         = oldmat->rank;
1250   mat->insertmode = NOT_SET_VALUES;
1251   a->nvec         = oldmat->nvec;
1252   a->donotstash   = oldmat->donotstash;
1253   ierr            = PetscMalloc((a->size+1)*sizeof(PetscInt),&a->rowners);CHKERRQ(ierr);
1254   PetscLogObjectMemory(mat,(a->size+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
1255   ierr = PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
1256   ierr = MatStashCreate_Private(A->comm,1,&mat->stash);CHKERRQ(ierr);
1257 
1258   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1259   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1260   PetscLogObjectParent(mat,a->A);
1261   *newmat = mat;
1262   PetscFunctionReturn(0);
1263 }
1264 
1265 #include "petscsys.h"
1266 
1267 #undef __FUNCT__
1268 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile"
1269 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,const MatType type,Mat *newmat)
1270 {
1271   PetscErrorCode ierr;
1272   PetscMPIInt    rank,size;
1273   PetscInt            *rowners,i,m,nz,j;
1274   PetscScalar    *array,*vals,*vals_ptr;
1275   MPI_Status     status;
1276 
1277   PetscFunctionBegin;
1278   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1279   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1280 
1281   /* determine ownership of all rows */
1282   m          = M/size + ((M % size) > rank);
1283   ierr       = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1284   ierr       = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
1285   rowners[0] = 0;
1286   for (i=2; i<=size; i++) {
1287     rowners[i] += rowners[i-1];
1288   }
1289 
1290   ierr = MatCreate(comm,m,PETSC_DECIDE,M,N,newmat);CHKERRQ(ierr);
1291   ierr = MatSetType(*newmat,type);CHKERRQ(ierr);
1292   ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr);
1293   ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr);
1294 
1295   if (!rank) {
1296     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1297 
1298     /* read in my part of the matrix numerical values  */
1299     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1300 
1301     /* insert into matrix-by row (this is why cannot directly read into array */
1302     vals_ptr = vals;
1303     for (i=0; i<m; i++) {
1304       for (j=0; j<N; j++) {
1305         array[i + j*m] = *vals_ptr++;
1306       }
1307     }
1308 
1309     /* read in other processors and ship out */
1310     for (i=1; i<size; i++) {
1311       nz   = (rowners[i+1] - rowners[i])*N;
1312       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1313       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr);
1314     }
1315   } else {
1316     /* receive numeric values */
1317     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1318 
1319     /* receive message of values*/
1320     ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr);
1321 
1322     /* insert into matrix-by row (this is why cannot directly read into array */
1323     vals_ptr = vals;
1324     for (i=0; i<m; i++) {
1325       for (j=0; j<N; j++) {
1326         array[i + j*m] = *vals_ptr++;
1327       }
1328     }
1329   }
1330   ierr = PetscFree(rowners);CHKERRQ(ierr);
1331   ierr = PetscFree(vals);CHKERRQ(ierr);
1332   ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1333   ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1334   PetscFunctionReturn(0);
1335 }
1336 
1337 #undef __FUNCT__
1338 #define __FUNCT__ "MatLoad_MPIDense"
1339 PetscErrorCode MatLoad_MPIDense(PetscViewer viewer,const MatType type,Mat *newmat)
1340 {
1341   Mat            A;
1342   PetscScalar    *vals,*svals;
1343   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1344   MPI_Status     status;
1345   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz;
1346   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1347   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1348   PetscInt       i,nz,j,rstart,rend;
1349   int            fd;
1350   PetscErrorCode ierr;
1351 
1352   PetscFunctionBegin;
1353   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1354   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1355   if (!rank) {
1356     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1357     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1358     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1359   }
1360 
1361   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1362   M = header[1]; N = header[2]; nz = header[3];
1363 
1364   /*
1365        Handle case where matrix is stored on disk as a dense matrix
1366   */
1367   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1368     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,type,newmat);CHKERRQ(ierr);
1369     PetscFunctionReturn(0);
1370   }
1371 
1372   /* determine ownership of all rows */
1373   m          = M/size + ((M % size) > rank);
1374   ierr       = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1375   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1376   rowners[0] = 0;
1377   for (i=2; i<=size; i++) {
1378     rowners[i] += rowners[i-1];
1379   }
1380   rstart = rowners[rank];
1381   rend   = rowners[rank+1];
1382 
1383   /* distribute row lengths to all processors */
1384   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&ourlens);CHKERRQ(ierr);
1385   offlens = ourlens + (rend-rstart);
1386   if (!rank) {
1387     ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
1388     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1389     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
1390     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1391     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1392     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1393   } else {
1394     ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1395   }
1396 
1397   if (!rank) {
1398     /* calculate the number of nonzeros on each processor */
1399     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
1400     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1401     for (i=0; i<size; i++) {
1402       for (j=rowners[i]; j< rowners[i+1]; j++) {
1403         procsnz[i] += rowlengths[j];
1404       }
1405     }
1406     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1407 
1408     /* determine max buffer needed and allocate it */
1409     maxnz = 0;
1410     for (i=0; i<size; i++) {
1411       maxnz = PetscMax(maxnz,procsnz[i]);
1412     }
1413     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1414 
1415     /* read in my part of the matrix column indices  */
1416     nz = procsnz[0];
1417     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1418     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1419 
1420     /* read in every one elses and ship off */
1421     for (i=1; i<size; i++) {
1422       nz   = procsnz[i];
1423       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1424       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1425     }
1426     ierr = PetscFree(cols);CHKERRQ(ierr);
1427   } else {
1428     /* determine buffer space needed for message */
1429     nz = 0;
1430     for (i=0; i<m; i++) {
1431       nz += ourlens[i];
1432     }
1433     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1434 
1435     /* receive message of column indices*/
1436     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1437     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1438     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1439   }
1440 
1441   /* loop over local rows, determining number of off diagonal entries */
1442   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
1443   jj = 0;
1444   for (i=0; i<m; i++) {
1445     for (j=0; j<ourlens[i]; j++) {
1446       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1447       jj++;
1448     }
1449   }
1450 
1451   /* create our matrix */
1452   for (i=0; i<m; i++) {
1453     ourlens[i] -= offlens[i];
1454   }
1455   ierr = MatCreate(comm,m,PETSC_DECIDE,M,N,newmat);CHKERRQ(ierr);
1456   ierr = MatSetType(*newmat,type);CHKERRQ(ierr);
1457   ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr);
1458   A = *newmat;
1459   for (i=0; i<m; i++) {
1460     ourlens[i] += offlens[i];
1461   }
1462 
1463   if (!rank) {
1464     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1465 
1466     /* read in my part of the matrix numerical values  */
1467     nz = procsnz[0];
1468     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1469 
1470     /* insert into matrix */
1471     jj      = rstart;
1472     smycols = mycols;
1473     svals   = vals;
1474     for (i=0; i<m; i++) {
1475       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1476       smycols += ourlens[i];
1477       svals   += ourlens[i];
1478       jj++;
1479     }
1480 
1481     /* read in other processors and ship out */
1482     for (i=1; i<size; i++) {
1483       nz   = procsnz[i];
1484       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1485       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1486     }
1487     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1488   } else {
1489     /* receive numeric values */
1490     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1491 
1492     /* receive message of values*/
1493     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1494     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1495     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1496 
1497     /* insert into matrix */
1498     jj      = rstart;
1499     smycols = mycols;
1500     svals   = vals;
1501     for (i=0; i<m; i++) {
1502       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1503       smycols += ourlens[i];
1504       svals   += ourlens[i];
1505       jj++;
1506     }
1507   }
1508   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1509   ierr = PetscFree(vals);CHKERRQ(ierr);
1510   ierr = PetscFree(mycols);CHKERRQ(ierr);
1511   ierr = PetscFree(rowners);CHKERRQ(ierr);
1512 
1513   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1514   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1515   PetscFunctionReturn(0);
1516 }
1517 
1518 
1519 
1520 
1521 
1522