xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 321af1c700e5f4ecf330ccd829080dfec8601628)
1 /*
2    Basic functions for basic parallel dense matrices.
3 */
4 
5 #include "src/mat/impls/dense/mpi/mpidense.h"
6 
7 EXTERN_C_BEGIN
8 #undef __FUNCT__
9 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense"
10 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B)
11 {
12   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
13   PetscErrorCode ierr;
14   int          m = A->m,rstart = mdn->rstart;
15   PetscScalar  *array;
16   MPI_Comm     comm;
17 
18   PetscFunctionBegin;
19   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported.");
20 
21   /* The reuse aspect is not implemented efficiently */
22   if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);}
23 
24   ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
25   ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr);
26   ierr = MatCreate(comm,m,m,m,m,B);CHKERRQ(ierr);
27   ierr = MatSetType(*B,mdn->A->type_name);CHKERRQ(ierr);
28   ierr = MatSeqDenseSetPreallocation(*B,array+m*rstart);CHKERRQ(ierr);
29   ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr);
30   ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
31   ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
32 
33   *iscopy = PETSC_TRUE;
34   PetscFunctionReturn(0);
35 }
36 EXTERN_C_END
37 
38 #undef __FUNCT__
39 #define __FUNCT__ "MatSetValues_MPIDense"
40 PetscErrorCode MatSetValues_MPIDense(Mat mat,int m,const int idxm[],int n,const int idxn[],const PetscScalar v[],InsertMode addv)
41 {
42   Mat_MPIDense *A = (Mat_MPIDense*)mat->data;
43   PetscErrorCode ierr;
44   int          i,j,rstart = A->rstart,rend = A->rend,row;
45   PetscTruth   roworiented = A->roworiented;
46 
47   PetscFunctionBegin;
48   for (i=0; i<m; i++) {
49     if (idxm[i] < 0) continue;
50     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
51     if (idxm[i] >= rstart && idxm[i] < rend) {
52       row = idxm[i] - rstart;
53       if (roworiented) {
54         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
55       } else {
56         for (j=0; j<n; j++) {
57           if (idxn[j] < 0) continue;
58           if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
59           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
60         }
61       }
62     } else {
63       if (!A->donotstash) {
64         if (roworiented) {
65           ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr);
66         } else {
67           ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr);
68         }
69       }
70     }
71   }
72   PetscFunctionReturn(0);
73 }
74 
75 #undef __FUNCT__
76 #define __FUNCT__ "MatGetValues_MPIDense"
77 PetscErrorCode MatGetValues_MPIDense(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[])
78 {
79   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
80   PetscErrorCode ierr;
81   int          i,j,rstart = mdn->rstart,rend = mdn->rend,row;
82 
83   PetscFunctionBegin;
84   for (i=0; i<m; i++) {
85     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
86     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
87     if (idxm[i] >= rstart && idxm[i] < rend) {
88       row = idxm[i] - rstart;
89       for (j=0; j<n; j++) {
90         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
91         if (idxn[j] >= mat->N) {
92           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
93         }
94         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
95       }
96     } else {
97       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
98     }
99   }
100   PetscFunctionReturn(0);
101 }
102 
103 #undef __FUNCT__
104 #define __FUNCT__ "MatGetArray_MPIDense"
105 PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[])
106 {
107   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
108   PetscErrorCode ierr;
109 
110   PetscFunctionBegin;
111   ierr = MatGetArray(a->A,array);CHKERRQ(ierr);
112   PetscFunctionReturn(0);
113 }
114 
115 #undef __FUNCT__
116 #define __FUNCT__ "MatGetSubMatrix_MPIDense"
117 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B)
118 {
119   Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd;
120   Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data;
121   PetscErrorCode ierr;
122   int          i,j,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols;
123   PetscScalar  *av,*bv,*v = lmat->v;
124   Mat          newmat;
125 
126   PetscFunctionBegin;
127   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
128   ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr);
129   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
130   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
131 
132   /* No parallel redistribution currently supported! Should really check each index set
133      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
134      original matrix! */
135 
136   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
137   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
138 
139   /* Check submatrix call */
140   if (scall == MAT_REUSE_MATRIX) {
141     /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
142     /* Really need to test rows and column sizes! */
143     newmat = *B;
144   } else {
145     /* Create and fill new matrix */
146     ierr = MatCreate(A->comm,nrows,cs,PETSC_DECIDE,ncols,&newmat);CHKERRQ(ierr);
147     ierr = MatSetType(newmat,A->type_name);CHKERRQ(ierr);
148     ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr);
149   }
150 
151   /* Now extract the data pointers and do the copy, column at a time */
152   newmatd = (Mat_MPIDense*)newmat->data;
153   bv      = ((Mat_SeqDense *)newmatd->A->data)->v;
154 
155   for (i=0; i<ncols; i++) {
156     av = v + nlrows*icol[i];
157     for (j=0; j<nrows; j++) {
158       *bv++ = av[irow[j] - rstart];
159     }
160   }
161 
162   /* Assemble the matrices so that the correct flags are set */
163   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
164   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
165 
166   /* Free work space */
167   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
168   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
169   *B = newmat;
170   PetscFunctionReturn(0);
171 }
172 
173 #undef __FUNCT__
174 #define __FUNCT__ "MatRestoreArray_MPIDense"
175 PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[])
176 {
177   PetscFunctionBegin;
178   PetscFunctionReturn(0);
179 }
180 
181 #undef __FUNCT__
182 #define __FUNCT__ "MatAssemblyBegin_MPIDense"
183 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
184 {
185   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
186   MPI_Comm     comm = mat->comm;
187   PetscErrorCode ierr;
188   int          nstash,reallocs;
189   InsertMode   addv;
190 
191   PetscFunctionBegin;
192   /* make sure all processors are either in INSERTMODE or ADDMODE */
193   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
194   if (addv == (ADD_VALUES|INSERT_VALUES)) {
195     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
196   }
197   mat->insertmode = addv; /* in case this processor had no cache */
198 
199   ierr = MatStashScatterBegin_Private(&mat->stash,mdn->rowners);CHKERRQ(ierr);
200   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
201   PetscLogInfo(mdn->A,"MatAssemblyBegin_MPIDense:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
202   PetscFunctionReturn(0);
203 }
204 
205 #undef __FUNCT__
206 #define __FUNCT__ "MatAssemblyEnd_MPIDense"
207 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
208 {
209   Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data;
210   PetscErrorCode ierr;
211   int          i,n,*row,*col,flg,j,rstart,ncols;
212   PetscScalar  *val;
213   InsertMode   addv=mat->insertmode;
214 
215   PetscFunctionBegin;
216   /*  wait on receives */
217   while (1) {
218     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
219     if (!flg) break;
220 
221     for (i=0; i<n;) {
222       /* Now identify the consecutive vals belonging to the same row */
223       for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
224       if (j < n) ncols = j-i;
225       else       ncols = n-i;
226       /* Now assemble all these values with a single function call */
227       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
228       i = j;
229     }
230   }
231   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
232 
233   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
234   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
235 
236   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
237     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
238   }
239   PetscFunctionReturn(0);
240 }
241 
242 #undef __FUNCT__
243 #define __FUNCT__ "MatZeroEntries_MPIDense"
244 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
245 {
246   PetscErrorCode ierr;
247   Mat_MPIDense *l = (Mat_MPIDense*)A->data;
248 
249   PetscFunctionBegin;
250   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
251   PetscFunctionReturn(0);
252 }
253 
254 #undef __FUNCT__
255 #define __FUNCT__ "MatGetBlockSize_MPIDense"
256 PetscErrorCode MatGetBlockSize_MPIDense(Mat A,int *bs)
257 {
258   PetscFunctionBegin;
259   *bs = 1;
260   PetscFunctionReturn(0);
261 }
262 
263 /* the code does not do the diagonal entries correctly unless the
264    matrix is square and the column and row owerships are identical.
265    This is a BUG. The only way to fix it seems to be to access
266    mdn->A and mdn->B directly and not through the MatZeroRows()
267    routine.
268 */
269 #undef __FUNCT__
270 #define __FUNCT__ "MatZeroRows_MPIDense"
271 PetscErrorCode MatZeroRows_MPIDense(Mat A,IS is,const PetscScalar *diag)
272 {
273   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
274   PetscErrorCode ierr;
275   int            i,N,*rows,*owners = l->rowners,size = l->size;
276   int            *nprocs,j,idx,nsends;
277   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
278   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
279   int            *lens,imdex,*lrows,*values;
280   MPI_Comm       comm = A->comm;
281   MPI_Request    *send_waits,*recv_waits;
282   MPI_Status     recv_status,*send_status;
283   IS             istmp;
284   PetscTruth     found;
285 
286   PetscFunctionBegin;
287   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
288   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
289 
290   /*  first count number of contributors to each processor */
291   ierr  = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
292   ierr  = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
293   ierr  = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
294   for (i=0; i<N; i++) {
295     idx = rows[i];
296     found = PETSC_FALSE;
297     for (j=0; j<size; j++) {
298       if (idx >= owners[j] && idx < owners[j+1]) {
299         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
300       }
301     }
302     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
303   }
304   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
305 
306   /* inform other processors of number of messages and max length*/
307   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
308 
309   /* post receives:   */
310   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
311   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
312   for (i=0; i<nrecvs; i++) {
313     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
314   }
315 
316   /* do sends:
317       1) starts[i] gives the starting index in svalues for stuff going to
318          the ith processor
319   */
320   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
321   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
322   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
323   starts[0]  = 0;
324   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
325   for (i=0; i<N; i++) {
326     svalues[starts[owner[i]]++] = rows[i];
327   }
328   ISRestoreIndices(is,&rows);
329 
330   starts[0] = 0;
331   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
332   count = 0;
333   for (i=0; i<size; i++) {
334     if (nprocs[2*i+1]) {
335       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
336     }
337   }
338   ierr = PetscFree(starts);CHKERRQ(ierr);
339 
340   base = owners[rank];
341 
342   /*  wait on receives */
343   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
344   source = lens + nrecvs;
345   count  = nrecvs; slen = 0;
346   while (count) {
347     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
348     /* unpack receives into our local space */
349     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
350     source[imdex]  = recv_status.MPI_SOURCE;
351     lens[imdex]  = n;
352     slen += n;
353     count--;
354   }
355   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
356 
357   /* move the data into the send scatter */
358   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
359   count = 0;
360   for (i=0; i<nrecvs; i++) {
361     values = rvalues + i*nmax;
362     for (j=0; j<lens[i]; j++) {
363       lrows[count++] = values[j] - base;
364     }
365   }
366   ierr = PetscFree(rvalues);CHKERRQ(ierr);
367   ierr = PetscFree(lens);CHKERRQ(ierr);
368   ierr = PetscFree(owner);CHKERRQ(ierr);
369   ierr = PetscFree(nprocs);CHKERRQ(ierr);
370 
371   /* actually zap the local rows */
372   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
373   PetscLogObjectParent(A,istmp);
374   ierr = PetscFree(lrows);CHKERRQ(ierr);
375   ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
376   ierr = ISDestroy(istmp);CHKERRQ(ierr);
377 
378   /* wait on sends */
379   if (nsends) {
380     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
381     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
382     ierr = PetscFree(send_status);CHKERRQ(ierr);
383   }
384   ierr = PetscFree(send_waits);CHKERRQ(ierr);
385   ierr = PetscFree(svalues);CHKERRQ(ierr);
386 
387   PetscFunctionReturn(0);
388 }
389 
390 #undef __FUNCT__
391 #define __FUNCT__ "MatMult_MPIDense"
392 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
393 {
394   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
395   PetscErrorCode ierr;
396 
397   PetscFunctionBegin;
398   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
399   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
400   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
401   PetscFunctionReturn(0);
402 }
403 
404 #undef __FUNCT__
405 #define __FUNCT__ "MatMultAdd_MPIDense"
406 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
407 {
408   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
409   PetscErrorCode ierr;
410 
411   PetscFunctionBegin;
412   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
413   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
414   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
415   PetscFunctionReturn(0);
416 }
417 
418 #undef __FUNCT__
419 #define __FUNCT__ "MatMultTranspose_MPIDense"
420 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
421 {
422   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
423   PetscErrorCode ierr;
424   PetscScalar  zero = 0.0;
425 
426   PetscFunctionBegin;
427   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
428   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
429   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
430   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
431   PetscFunctionReturn(0);
432 }
433 
434 #undef __FUNCT__
435 #define __FUNCT__ "MatMultTransposeAdd_MPIDense"
436 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
437 {
438   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
439   PetscErrorCode ierr;
440 
441   PetscFunctionBegin;
442   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
443   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
444   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
445   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
446   PetscFunctionReturn(0);
447 }
448 
449 #undef __FUNCT__
450 #define __FUNCT__ "MatGetDiagonal_MPIDense"
451 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
452 {
453   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
454   Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data;
455   PetscErrorCode ierr;
456   int          len,i,n,m = A->m,radd;
457   PetscScalar  *x,zero = 0.0;
458 
459   PetscFunctionBegin;
460   ierr = VecSet(&zero,v);CHKERRQ(ierr);
461   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
462   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
463   if (n != A->M) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
464   len  = PetscMin(a->A->m,a->A->n);
465   radd = a->rstart*m;
466   for (i=0; i<len; i++) {
467     x[i] = aloc->v[radd + i*m + i];
468   }
469   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
470   PetscFunctionReturn(0);
471 }
472 
473 #undef __FUNCT__
474 #define __FUNCT__ "MatDestroy_MPIDense"
475 PetscErrorCode MatDestroy_MPIDense(Mat mat)
476 {
477   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
478   PetscErrorCode ierr;
479 
480   PetscFunctionBegin;
481 
482 #if defined(PETSC_USE_LOG)
483   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
484 #endif
485   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
486   ierr = PetscFree(mdn->rowners);CHKERRQ(ierr);
487   ierr = MatDestroy(mdn->A);CHKERRQ(ierr);
488   if (mdn->lvec)   VecDestroy(mdn->lvec);
489   if (mdn->Mvctx)  VecScatterDestroy(mdn->Mvctx);
490   if (mdn->factor) {
491     if (mdn->factor->temp)   {ierr = PetscFree(mdn->factor->temp);CHKERRQ(ierr);}
492     if (mdn->factor->tag)    {ierr = PetscFree(mdn->factor->tag);CHKERRQ(ierr);}
493     if (mdn->factor->pivots) {ierr = PetscFree(mdn->factor->pivots);CHKERRQ(ierr);}
494     ierr = PetscFree(mdn->factor);CHKERRQ(ierr);
495   }
496   ierr = PetscFree(mdn);CHKERRQ(ierr);
497   PetscFunctionReturn(0);
498 }
499 
500 #undef __FUNCT__
501 #define __FUNCT__ "MatView_MPIDense_Binary"
502 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
503 {
504   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
505   PetscErrorCode ierr;
506 
507   PetscFunctionBegin;
508   if (mdn->size == 1) {
509     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
510   }
511   else SETERRQ(PETSC_ERR_SUP,"Only uniprocessor output supported");
512   PetscFunctionReturn(0);
513 }
514 
515 #undef __FUNCT__
516 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket"
517 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
518 {
519   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
520   PetscErrorCode    ierr;
521   PetscMPIInt       size = mdn->size,rank = mdn->rank;
522   PetscViewerType   vtype;
523   PetscTruth        iascii,isdraw;
524   PetscViewer       sviewer;
525   PetscViewerFormat format;
526 
527   PetscFunctionBegin;
528   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
529   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
530   if (iascii) {
531     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
532     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
533     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
534       MatInfo info;
535       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
536       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mat->m,
537                    (int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
538       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
539       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
540       PetscFunctionReturn(0);
541     } else if (format == PETSC_VIEWER_ASCII_INFO) {
542       PetscFunctionReturn(0);
543     }
544   } else if (isdraw) {
545     PetscDraw       draw;
546     PetscTruth isnull;
547 
548     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
549     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
550     if (isnull) PetscFunctionReturn(0);
551   }
552 
553   if (size == 1) {
554     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
555   } else {
556     /* assemble the entire matrix onto first processor. */
557     Mat               A;
558     int               M = mat->M,N = mat->N,m,row,i,nz;
559     const int         *cols;
560     const PetscScalar *vals;
561 
562     if (!rank) {
563       ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr);
564     } else {
565       ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr);
566     }
567     /* Since this is a temporary matrix, MATMPIDENSE instead of A->type_name here is probably acceptable. */
568     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
569     ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL);
570     PetscLogObjectParent(mat,A);
571 
572     /* Copy the matrix ... This isn't the most efficient means,
573        but it's quick for now */
574     row = mdn->rstart; m = mdn->A->m;
575     for (i=0; i<m; i++) {
576       ierr = MatGetRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
577       ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
578       ierr = MatRestoreRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
579       row++;
580     }
581 
582     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
583     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
584     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
585     if (!rank) {
586       ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
587     }
588     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
589     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
590     ierr = MatDestroy(A);CHKERRQ(ierr);
591   }
592   PetscFunctionReturn(0);
593 }
594 
595 #undef __FUNCT__
596 #define __FUNCT__ "MatView_MPIDense"
597 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
598 {
599   PetscErrorCode ierr;
600   PetscTruth iascii,isbinary,isdraw,issocket;
601 
602   PetscFunctionBegin;
603 
604   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
605   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
606   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
607   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
608 
609   if (iascii || issocket || isdraw) {
610     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
611   } else if (isbinary) {
612     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
613   } else {
614     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name);
615   }
616   PetscFunctionReturn(0);
617 }
618 
619 #undef __FUNCT__
620 #define __FUNCT__ "MatGetInfo_MPIDense"
621 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
622 {
623   Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
624   Mat          mdn = mat->A;
625   PetscErrorCode ierr;
626   PetscReal    isend[5],irecv[5];
627 
628   PetscFunctionBegin;
629   info->rows_global    = (double)A->M;
630   info->columns_global = (double)A->N;
631   info->rows_local     = (double)A->m;
632   info->columns_local  = (double)A->N;
633   info->block_size     = 1.0;
634   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
635   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
636   isend[3] = info->memory;  isend[4] = info->mallocs;
637   if (flag == MAT_LOCAL) {
638     info->nz_used      = isend[0];
639     info->nz_allocated = isend[1];
640     info->nz_unneeded  = isend[2];
641     info->memory       = isend[3];
642     info->mallocs      = isend[4];
643   } else if (flag == MAT_GLOBAL_MAX) {
644     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr);
645     info->nz_used      = irecv[0];
646     info->nz_allocated = irecv[1];
647     info->nz_unneeded  = irecv[2];
648     info->memory       = irecv[3];
649     info->mallocs      = irecv[4];
650   } else if (flag == MAT_GLOBAL_SUM) {
651     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr);
652     info->nz_used      = irecv[0];
653     info->nz_allocated = irecv[1];
654     info->nz_unneeded  = irecv[2];
655     info->memory       = irecv[3];
656     info->mallocs      = irecv[4];
657   }
658   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
659   info->fill_ratio_needed = 0;
660   info->factor_mallocs    = 0;
661   PetscFunctionReturn(0);
662 }
663 
664 #undef __FUNCT__
665 #define __FUNCT__ "MatSetOption_MPIDense"
666 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op)
667 {
668   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
669   PetscErrorCode ierr;
670 
671   PetscFunctionBegin;
672   switch (op) {
673   case MAT_NO_NEW_NONZERO_LOCATIONS:
674   case MAT_YES_NEW_NONZERO_LOCATIONS:
675   case MAT_NEW_NONZERO_LOCATION_ERR:
676   case MAT_NEW_NONZERO_ALLOCATION_ERR:
677   case MAT_COLUMNS_SORTED:
678   case MAT_COLUMNS_UNSORTED:
679     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
680     break;
681   case MAT_ROW_ORIENTED:
682     a->roworiented = PETSC_TRUE;
683     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
684     break;
685   case MAT_ROWS_SORTED:
686   case MAT_ROWS_UNSORTED:
687   case MAT_YES_NEW_DIAGONALS:
688   case MAT_USE_HASH_TABLE:
689     PetscLogInfo(A,"MatSetOption_MPIDense:Option ignored\n");
690     break;
691   case MAT_COLUMN_ORIENTED:
692     a->roworiented = PETSC_FALSE;
693     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
694     break;
695   case MAT_IGNORE_OFF_PROC_ENTRIES:
696     a->donotstash = PETSC_TRUE;
697     break;
698   case MAT_NO_NEW_DIAGONALS:
699     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
700   case MAT_SYMMETRIC:
701   case MAT_STRUCTURALLY_SYMMETRIC:
702   case MAT_NOT_SYMMETRIC:
703   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
704   case MAT_HERMITIAN:
705   case MAT_NOT_HERMITIAN:
706   case MAT_SYMMETRY_ETERNAL:
707   case MAT_NOT_SYMMETRY_ETERNAL:
708     break;
709   default:
710     SETERRQ(PETSC_ERR_SUP,"unknown option");
711   }
712   PetscFunctionReturn(0);
713 }
714 
715 #undef __FUNCT__
716 #define __FUNCT__ "MatGetRow_MPIDense"
717 PetscErrorCode MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,PetscScalar **v)
718 {
719   Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
720   PetscErrorCode ierr;
721   int          lrow,rstart = mat->rstart,rend = mat->rend;
722 
723   PetscFunctionBegin;
724   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows")
725   lrow = row - rstart;
726   ierr = MatGetRow(mat->A,lrow,nz,(const int **)idx,(const PetscScalar **)v);CHKERRQ(ierr);
727   PetscFunctionReturn(0);
728 }
729 
730 #undef __FUNCT__
731 #define __FUNCT__ "MatRestoreRow_MPIDense"
732 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
733 {
734   PetscErrorCode ierr;
735 
736   PetscFunctionBegin;
737   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
738   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
739   PetscFunctionReturn(0);
740 }
741 
742 #undef __FUNCT__
743 #define __FUNCT__ "MatDiagonalScale_MPIDense"
744 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
745 {
746   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
747   Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
748   PetscScalar  *l,*r,x,*v;
749   PetscErrorCode ierr;
750   int          i,j,s2a,s3a,s2,s3,m=mdn->A->m,n=mdn->A->n;
751 
752   PetscFunctionBegin;
753   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
754   if (ll) {
755     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
756     if (s2a != s2) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size");
757     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
758     for (i=0; i<m; i++) {
759       x = l[i];
760       v = mat->v + i;
761       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
762     }
763     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
764     PetscLogFlops(n*m);
765   }
766   if (rr) {
767     ierr = VecGetSize(rr,&s3a);CHKERRQ(ierr);
768     if (s3a != s3) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size");
769     ierr = VecScatterBegin(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
770     ierr = VecScatterEnd(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
771     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
772     for (i=0; i<n; i++) {
773       x = r[i];
774       v = mat->v + i*m;
775       for (j=0; j<m; j++) { (*v++) *= x;}
776     }
777     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
778     PetscLogFlops(n*m);
779   }
780   PetscFunctionReturn(0);
781 }
782 
783 #undef __FUNCT__
784 #define __FUNCT__ "MatNorm_MPIDense"
785 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
786 {
787   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
788   Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
789   PetscErrorCode ierr;
790   int          i,j;
791   PetscReal    sum = 0.0;
792   PetscScalar  *v = mat->v;
793 
794   PetscFunctionBegin;
795   if (mdn->size == 1) {
796     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
797   } else {
798     if (type == NORM_FROBENIUS) {
799       for (i=0; i<mdn->A->n*mdn->A->m; i++) {
800 #if defined(PETSC_USE_COMPLEX)
801         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
802 #else
803         sum += (*v)*(*v); v++;
804 #endif
805       }
806       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr);
807       *nrm = sqrt(*nrm);
808       PetscLogFlops(2*mdn->A->n*mdn->A->m);
809     } else if (type == NORM_1) {
810       PetscReal *tmp,*tmp2;
811       ierr = PetscMalloc(2*A->N*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
812       tmp2 = tmp + A->N;
813       ierr = PetscMemzero(tmp,2*A->N*sizeof(PetscReal));CHKERRQ(ierr);
814       *nrm = 0.0;
815       v = mat->v;
816       for (j=0; j<mdn->A->n; j++) {
817         for (i=0; i<mdn->A->m; i++) {
818           tmp[j] += PetscAbsScalar(*v);  v++;
819         }
820       }
821       ierr = MPI_Allreduce(tmp,tmp2,A->N,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr);
822       for (j=0; j<A->N; j++) {
823         if (tmp2[j] > *nrm) *nrm = tmp2[j];
824       }
825       ierr = PetscFree(tmp);CHKERRQ(ierr);
826       PetscLogFlops(A->n*A->m);
827     } else if (type == NORM_INFINITY) { /* max row norm */
828       PetscReal ntemp;
829       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
830       ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr);
831     } else {
832       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
833     }
834   }
835   PetscFunctionReturn(0);
836 }
837 
838 #undef __FUNCT__
839 #define __FUNCT__ "MatTranspose_MPIDense"
840 PetscErrorCode MatTranspose_MPIDense(Mat A,Mat *matout)
841 {
842   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
843   Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data;
844   Mat          B;
845   int          M = A->M,N = A->N,m,n,*rwork,rstart = a->rstart;
846   PetscErrorCode ierr;
847   int          j,i;
848   PetscScalar  *v;
849 
850   PetscFunctionBegin;
851   if (!matout && M != N) {
852     SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place");
853   }
854   ierr = MatCreate(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,&B);CHKERRQ(ierr);
855   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
856   ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr);
857 
858   m = a->A->m; n = a->A->n; v = Aloc->v;
859   ierr = PetscMalloc(n*sizeof(int),&rwork);CHKERRQ(ierr);
860   for (j=0; j<n; j++) {
861     for (i=0; i<m; i++) rwork[i] = rstart + i;
862     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
863     v   += m;
864   }
865   ierr = PetscFree(rwork);CHKERRQ(ierr);
866   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
867   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
868   if (matout) {
869     *matout = B;
870   } else {
871     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
872   }
873   PetscFunctionReturn(0);
874 }
875 
876 #include "petscblaslapack.h"
877 #undef __FUNCT__
878 #define __FUNCT__ "MatScale_MPIDense"
879 PetscErrorCode MatScale_MPIDense(const PetscScalar *alpha,Mat inA)
880 {
881   Mat_MPIDense *A = (Mat_MPIDense*)inA->data;
882   Mat_SeqDense *a = (Mat_SeqDense*)A->A->data;
883   PetscBLASInt one = 1,nz = (PetscBLASInt)inA->m*inA->N;
884 
885   PetscFunctionBegin;
886   BLscal_(&nz,(PetscScalar*)alpha,a->v,&one);
887   PetscLogFlops(nz);
888   PetscFunctionReturn(0);
889 }
890 
891 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
892 
893 #undef __FUNCT__
894 #define __FUNCT__ "MatSetUpPreallocation_MPIDense"
895 PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A)
896 {
897   PetscErrorCode ierr;
898 
899   PetscFunctionBegin;
900   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
901   PetscFunctionReturn(0);
902 }
903 
904 /* -------------------------------------------------------------------*/
905 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
906        MatGetRow_MPIDense,
907        MatRestoreRow_MPIDense,
908        MatMult_MPIDense,
909 /* 4*/ MatMultAdd_MPIDense,
910        MatMultTranspose_MPIDense,
911        MatMultTransposeAdd_MPIDense,
912        0,
913        0,
914        0,
915 /*10*/ 0,
916        0,
917        0,
918        0,
919        MatTranspose_MPIDense,
920 /*15*/ MatGetInfo_MPIDense,
921        0,
922        MatGetDiagonal_MPIDense,
923        MatDiagonalScale_MPIDense,
924        MatNorm_MPIDense,
925 /*20*/ MatAssemblyBegin_MPIDense,
926        MatAssemblyEnd_MPIDense,
927        0,
928        MatSetOption_MPIDense,
929        MatZeroEntries_MPIDense,
930 /*25*/ MatZeroRows_MPIDense,
931        0,
932        0,
933        0,
934        0,
935 /*30*/ MatSetUpPreallocation_MPIDense,
936        0,
937        0,
938        MatGetArray_MPIDense,
939        MatRestoreArray_MPIDense,
940 /*35*/ MatDuplicate_MPIDense,
941        0,
942        0,
943        0,
944        0,
945 /*40*/ 0,
946        MatGetSubMatrices_MPIDense,
947        0,
948        MatGetValues_MPIDense,
949        0,
950 /*45*/ 0,
951        MatScale_MPIDense,
952        0,
953        0,
954        0,
955 /*50*/ MatGetBlockSize_MPIDense,
956        0,
957        0,
958        0,
959        0,
960 /*55*/ 0,
961        0,
962        0,
963        0,
964        0,
965 /*60*/ MatGetSubMatrix_MPIDense,
966        MatDestroy_MPIDense,
967        MatView_MPIDense,
968        MatGetPetscMaps_Petsc,
969        0,
970 /*65*/ 0,
971        0,
972        0,
973        0,
974        0,
975 /*70*/ 0,
976        0,
977        0,
978        0,
979        0,
980 /*75*/ 0,
981        0,
982        0,
983        0,
984        0,
985 /*80*/ 0,
986        0,
987        0,
988        0,
989 /*84*/ MatLoad_MPIDense,
990        0,
991        0,
992        0,
993        0,
994        0,
995 /*90*/ 0,
996        0,
997        0,
998        0,
999        0,
1000 /*95*/ 0,
1001        0,
1002        0,
1003        0};
1004 
1005 EXTERN_C_BEGIN
1006 #undef __FUNCT__
1007 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense"
1008 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1009 {
1010   Mat_MPIDense *a;
1011   PetscErrorCode ierr;
1012 
1013   PetscFunctionBegin;
1014   mat->preallocated = PETSC_TRUE;
1015   /* Note:  For now, when data is specified above, this assumes the user correctly
1016    allocates the local dense storage space.  We should add error checking. */
1017 
1018   a    = (Mat_MPIDense*)mat->data;
1019   ierr = MatCreate(PETSC_COMM_SELF,mat->m,mat->N,mat->m,mat->N,&a->A);CHKERRQ(ierr);
1020   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
1021   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1022   PetscLogObjectParent(mat,a->A);
1023   PetscFunctionReturn(0);
1024 }
1025 EXTERN_C_END
1026 
1027 /*MC
1028    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1029 
1030    Options Database Keys:
1031 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions()
1032 
1033   Level: beginner
1034 
1035 .seealso: MatCreateMPIDense
1036 M*/
1037 
1038 EXTERN_C_BEGIN
1039 #undef __FUNCT__
1040 #define __FUNCT__ "MatCreate_MPIDense"
1041 PetscErrorCode MatCreate_MPIDense(Mat mat)
1042 {
1043   Mat_MPIDense *a;
1044   PetscErrorCode ierr;
1045   int          i;
1046 
1047   PetscFunctionBegin;
1048   ierr              = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr);
1049   mat->data         = (void*)a;
1050   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1051   mat->factor       = 0;
1052   mat->mapping      = 0;
1053 
1054   a->factor       = 0;
1055   mat->insertmode = NOT_SET_VALUES;
1056   ierr = MPI_Comm_rank(mat->comm,&a->rank);CHKERRQ(ierr);
1057   ierr = MPI_Comm_size(mat->comm,&a->size);CHKERRQ(ierr);
1058 
1059   ierr = PetscSplitOwnership(mat->comm,&mat->m,&mat->M);CHKERRQ(ierr);
1060   ierr = PetscSplitOwnership(mat->comm,&mat->n,&mat->N);CHKERRQ(ierr);
1061   a->nvec = mat->n;
1062 
1063   /* the information in the maps duplicates the information computed below, eventually
1064      we should remove the duplicate information that is not contained in the maps */
1065   ierr = PetscMapCreateMPI(mat->comm,mat->m,mat->M,&mat->rmap);CHKERRQ(ierr);
1066   ierr = PetscMapCreateMPI(mat->comm,mat->n,mat->N,&mat->cmap);CHKERRQ(ierr);
1067 
1068   /* build local table of row and column ownerships */
1069   ierr       = PetscMalloc(2*(a->size+2)*sizeof(int),&a->rowners);CHKERRQ(ierr);
1070   a->cowners = a->rowners + a->size + 1;
1071   PetscLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
1072   ierr = MPI_Allgather(&mat->m,1,MPI_INT,a->rowners+1,1,MPI_INT,mat->comm);CHKERRQ(ierr);
1073   a->rowners[0] = 0;
1074   for (i=2; i<=a->size; i++) {
1075     a->rowners[i] += a->rowners[i-1];
1076   }
1077   a->rstart = a->rowners[a->rank];
1078   a->rend   = a->rowners[a->rank+1];
1079   ierr      = MPI_Allgather(&mat->n,1,MPI_INT,a->cowners+1,1,MPI_INT,mat->comm);CHKERRQ(ierr);
1080   a->cowners[0] = 0;
1081   for (i=2; i<=a->size; i++) {
1082     a->cowners[i] += a->cowners[i-1];
1083   }
1084 
1085   /* build cache for off array entries formed */
1086   a->donotstash = PETSC_FALSE;
1087   ierr = MatStashCreate_Private(mat->comm,1,&mat->stash);CHKERRQ(ierr);
1088 
1089   /* stuff used for matrix vector multiply */
1090   a->lvec        = 0;
1091   a->Mvctx       = 0;
1092   a->roworiented = PETSC_TRUE;
1093 
1094   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
1095                                      "MatGetDiagonalBlock_MPIDense",
1096                                      MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr);
1097   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C",
1098                                      "MatMPIDenseSetPreallocation_MPIDense",
1099                                      MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1100   PetscFunctionReturn(0);
1101 }
1102 EXTERN_C_END
1103 
1104 /*MC
1105    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1106 
1107    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1108    and MATMPIDENSE otherwise.
1109 
1110    Options Database Keys:
1111 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1112 
1113   Level: beginner
1114 
1115 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1116 M*/
1117 
1118 EXTERN_C_BEGIN
1119 #undef __FUNCT__
1120 #define __FUNCT__ "MatCreate_Dense"
1121 PetscErrorCode MatCreate_Dense(Mat A)
1122 {
1123   PetscErrorCode ierr;
1124   int size;
1125 
1126   PetscFunctionBegin;
1127   ierr = PetscObjectChangeTypeName((PetscObject)A,MATDENSE);CHKERRQ(ierr);
1128   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1129   if (size == 1) {
1130     ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr);
1131   } else {
1132     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
1133   }
1134   PetscFunctionReturn(0);
1135 }
1136 EXTERN_C_END
1137 
1138 #undef __FUNCT__
1139 #define __FUNCT__ "MatMPIDenseSetPreallocation"
1140 /*@C
1141    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1142 
1143    Not collective
1144 
1145    Input Parameters:
1146 .  A - the matrix
1147 -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
1148    to control all matrix memory allocation.
1149 
1150    Notes:
1151    The dense format is fully compatible with standard Fortran 77
1152    storage by columns.
1153 
1154    The data input variable is intended primarily for Fortran programmers
1155    who wish to allocate their own matrix memory space.  Most users should
1156    set data=PETSC_NULL.
1157 
1158    Level: intermediate
1159 
1160 .keywords: matrix,dense, parallel
1161 
1162 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1163 @*/
1164 PetscErrorCode MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data)
1165 {
1166   PetscErrorCode ierr,(*f)(Mat,PetscScalar *);
1167 
1168   PetscFunctionBegin;
1169   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1170   if (f) {
1171     ierr = (*f)(mat,data);CHKERRQ(ierr);
1172   }
1173   PetscFunctionReturn(0);
1174 }
1175 
1176 #undef __FUNCT__
1177 #define __FUNCT__ "MatCreateMPIDense"
1178 /*@C
1179    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
1180 
1181    Collective on MPI_Comm
1182 
1183    Input Parameters:
1184 +  comm - MPI communicator
1185 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1186 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1187 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1188 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1189 -  data - optional location of matrix data.  Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1190    to control all matrix memory allocation.
1191 
1192    Output Parameter:
1193 .  A - the matrix
1194 
1195    Notes:
1196    The dense format is fully compatible with standard Fortran 77
1197    storage by columns.
1198 
1199    The data input variable is intended primarily for Fortran programmers
1200    who wish to allocate their own matrix memory space.  Most users should
1201    set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users).
1202 
1203    The user MUST specify either the local or global matrix dimensions
1204    (possibly both).
1205 
1206    Level: intermediate
1207 
1208 .keywords: matrix,dense, parallel
1209 
1210 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1211 @*/
1212 PetscErrorCode MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,PetscScalar *data,Mat *A)
1213 {
1214   PetscErrorCode ierr;
1215   int size;
1216 
1217   PetscFunctionBegin;
1218   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
1219   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1220   if (size > 1) {
1221     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1222     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1223   } else {
1224     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1225     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1226   }
1227   PetscFunctionReturn(0);
1228 }
1229 
1230 #undef __FUNCT__
1231 #define __FUNCT__ "MatDuplicate_MPIDense"
1232 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1233 {
1234   Mat          mat;
1235   Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data;
1236   PetscErrorCode ierr;
1237 
1238   PetscFunctionBegin;
1239   *newmat       = 0;
1240   ierr = MatCreate(A->comm,A->m,A->n,A->M,A->N,&mat);CHKERRQ(ierr);
1241   ierr = MatSetType(mat,A->type_name);CHKERRQ(ierr);
1242   ierr              = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr);
1243   mat->data         = (void*)a;
1244   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1245   mat->factor       = A->factor;
1246   mat->assembled    = PETSC_TRUE;
1247   mat->preallocated = PETSC_TRUE;
1248 
1249   a->rstart       = oldmat->rstart;
1250   a->rend         = oldmat->rend;
1251   a->size         = oldmat->size;
1252   a->rank         = oldmat->rank;
1253   mat->insertmode = NOT_SET_VALUES;
1254   a->nvec         = oldmat->nvec;
1255   a->donotstash   = oldmat->donotstash;
1256   ierr            = PetscMalloc((a->size+1)*sizeof(int),&a->rowners);CHKERRQ(ierr);
1257   PetscLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
1258   ierr = PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));CHKERRQ(ierr);
1259   ierr = MatStashCreate_Private(A->comm,1,&mat->stash);CHKERRQ(ierr);
1260 
1261   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1262   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1263   PetscLogObjectParent(mat,a->A);
1264   *newmat = mat;
1265   PetscFunctionReturn(0);
1266 }
1267 
1268 #include "petscsys.h"
1269 
1270 #undef __FUNCT__
1271 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile"
1272 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M,int N,const MatType type,Mat *newmat)
1273 {
1274   PetscErrorCode ierr;
1275   PetscMPIInt    rank,size;
1276   int            *rowners,i,m,nz,j;
1277   PetscScalar    *array,*vals,*vals_ptr;
1278   MPI_Status     status;
1279 
1280   PetscFunctionBegin;
1281   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1282   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1283 
1284   /* determine ownership of all rows */
1285   m          = M/size + ((M % size) > rank);
1286   ierr       = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
1287   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1288   rowners[0] = 0;
1289   for (i=2; i<=size; i++) {
1290     rowners[i] += rowners[i-1];
1291   }
1292 
1293   ierr = MatCreate(comm,m,PETSC_DECIDE,M,N,newmat);CHKERRQ(ierr);
1294   ierr = MatSetType(*newmat,type);CHKERRQ(ierr);
1295   ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr);
1296   ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr);
1297 
1298   if (!rank) {
1299     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1300 
1301     /* read in my part of the matrix numerical values  */
1302     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1303 
1304     /* insert into matrix-by row (this is why cannot directly read into array */
1305     vals_ptr = vals;
1306     for (i=0; i<m; i++) {
1307       for (j=0; j<N; j++) {
1308         array[i + j*m] = *vals_ptr++;
1309       }
1310     }
1311 
1312     /* read in other processors and ship out */
1313     for (i=1; i<size; i++) {
1314       nz   = (rowners[i+1] - rowners[i])*N;
1315       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1316       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr);
1317     }
1318   } else {
1319     /* receive numeric values */
1320     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1321 
1322     /* receive message of values*/
1323     ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr);
1324 
1325     /* insert into matrix-by row (this is why cannot directly read into array */
1326     vals_ptr = vals;
1327     for (i=0; i<m; i++) {
1328       for (j=0; j<N; j++) {
1329         array[i + j*m] = *vals_ptr++;
1330       }
1331     }
1332   }
1333   ierr = PetscFree(rowners);CHKERRQ(ierr);
1334   ierr = PetscFree(vals);CHKERRQ(ierr);
1335   ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1336   ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1337   PetscFunctionReturn(0);
1338 }
1339 
1340 #undef __FUNCT__
1341 #define __FUNCT__ "MatLoad_MPIDense"
1342 PetscErrorCode MatLoad_MPIDense(PetscViewer viewer,const MatType type,Mat *newmat)
1343 {
1344   Mat            A;
1345   PetscScalar    *vals,*svals;
1346   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1347   MPI_Status     status;
1348   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1349   int            header[4],*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1350   int            *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1351   int            i,nz,j,rstart,rend,fd;
1352   PetscErrorCode ierr;
1353 
1354   PetscFunctionBegin;
1355   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1356   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1357   if (!rank) {
1358     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1359     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1360     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1361   }
1362 
1363   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1364   M = header[1]; N = header[2]; nz = header[3];
1365 
1366   /*
1367        Handle case where matrix is stored on disk as a dense matrix
1368   */
1369   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1370     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,type,newmat);CHKERRQ(ierr);
1371     PetscFunctionReturn(0);
1372   }
1373 
1374   /* determine ownership of all rows */
1375   m          = M/size + ((M % size) > rank);
1376   ierr       = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
1377   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1378   rowners[0] = 0;
1379   for (i=2; i<=size; i++) {
1380     rowners[i] += rowners[i-1];
1381   }
1382   rstart = rowners[rank];
1383   rend   = rowners[rank+1];
1384 
1385   /* distribute row lengths to all processors */
1386   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr);
1387   offlens = ourlens + (rend-rstart);
1388   if (!rank) {
1389     ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr);
1390     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1391     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
1392     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1393     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1394     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1395   } else {
1396     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1397   }
1398 
1399   if (!rank) {
1400     /* calculate the number of nonzeros on each processor */
1401     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
1402     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
1403     for (i=0; i<size; i++) {
1404       for (j=rowners[i]; j< rowners[i+1]; j++) {
1405         procsnz[i] += rowlengths[j];
1406       }
1407     }
1408     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1409 
1410     /* determine max buffer needed and allocate it */
1411     maxnz = 0;
1412     for (i=0; i<size; i++) {
1413       maxnz = PetscMax(maxnz,procsnz[i]);
1414     }
1415     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
1416 
1417     /* read in my part of the matrix column indices  */
1418     nz = procsnz[0];
1419     ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr);
1420     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1421 
1422     /* read in every one elses and ship off */
1423     for (i=1; i<size; i++) {
1424       nz   = procsnz[i];
1425       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1426       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1427     }
1428     ierr = PetscFree(cols);CHKERRQ(ierr);
1429   } else {
1430     /* determine buffer space needed for message */
1431     nz = 0;
1432     for (i=0; i<m; i++) {
1433       nz += ourlens[i];
1434     }
1435     ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr);
1436 
1437     /* receive message of column indices*/
1438     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
1439     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
1440     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1441   }
1442 
1443   /* loop over local rows, determining number of off diagonal entries */
1444   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
1445   jj = 0;
1446   for (i=0; i<m; i++) {
1447     for (j=0; j<ourlens[i]; j++) {
1448       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1449       jj++;
1450     }
1451   }
1452 
1453   /* create our matrix */
1454   for (i=0; i<m; i++) {
1455     ourlens[i] -= offlens[i];
1456   }
1457   ierr = MatCreate(comm,m,PETSC_DECIDE,M,N,newmat);CHKERRQ(ierr);
1458   ierr = MatSetType(*newmat,type);CHKERRQ(ierr);
1459   ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr);
1460   A = *newmat;
1461   for (i=0; i<m; i++) {
1462     ourlens[i] += offlens[i];
1463   }
1464 
1465   if (!rank) {
1466     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1467 
1468     /* read in my part of the matrix numerical values  */
1469     nz = procsnz[0];
1470     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1471 
1472     /* insert into matrix */
1473     jj      = rstart;
1474     smycols = mycols;
1475     svals   = vals;
1476     for (i=0; i<m; i++) {
1477       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1478       smycols += ourlens[i];
1479       svals   += ourlens[i];
1480       jj++;
1481     }
1482 
1483     /* read in other processors and ship out */
1484     for (i=1; i<size; i++) {
1485       nz   = procsnz[i];
1486       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1487       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1488     }
1489     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1490   } else {
1491     /* receive numeric values */
1492     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1493 
1494     /* receive message of values*/
1495     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1496     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1497     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1498 
1499     /* insert into matrix */
1500     jj      = rstart;
1501     smycols = mycols;
1502     svals   = vals;
1503     for (i=0; i<m; i++) {
1504       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1505       smycols += ourlens[i];
1506       svals   += ourlens[i];
1507       jj++;
1508     }
1509   }
1510   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1511   ierr = PetscFree(vals);CHKERRQ(ierr);
1512   ierr = PetscFree(mycols);CHKERRQ(ierr);
1513   ierr = PetscFree(rowners);CHKERRQ(ierr);
1514 
1515   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1516   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1517   PetscFunctionReturn(0);
1518 }
1519 
1520 
1521 
1522 
1523 
1524