xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 6849ba73f22fecb8f92ef896a42e4e8bd4cd6965)
1 /*
2    Basic functions for basic parallel dense matrices.
3 */
4 
5 #include "src/mat/impls/dense/mpi/mpidense.h"
6 
7 EXTERN_C_BEGIN
8 #undef __FUNCT__
9 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense"
10 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B)
11 {
12   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
13   PetscErrorCode ierr;
14   int          m = A->m,rstart = mdn->rstart;
15   PetscScalar  *array;
16   MPI_Comm     comm;
17 
18   PetscFunctionBegin;
19   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported.");
20 
21   /* The reuse aspect is not implemented efficiently */
22   if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);}
23 
24   ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
25   ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr);
26   ierr = MatCreate(comm,m,m,m,m,B);CHKERRQ(ierr);
27   ierr = MatSetType(*B,mdn->A->type_name);CHKERRQ(ierr);
28   ierr = MatSeqDenseSetPreallocation(*B,array+m*rstart);CHKERRQ(ierr);
29   ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr);
30   ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
31   ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
32 
33   *iscopy = PETSC_TRUE;
34   PetscFunctionReturn(0);
35 }
36 EXTERN_C_END
37 
38 #undef __FUNCT__
39 #define __FUNCT__ "MatSetValues_MPIDense"
40 PetscErrorCode MatSetValues_MPIDense(Mat mat,int m,const int idxm[],int n,const int idxn[],const PetscScalar v[],InsertMode addv)
41 {
42   Mat_MPIDense *A = (Mat_MPIDense*)mat->data;
43   PetscErrorCode ierr;
44   int          i,j,rstart = A->rstart,rend = A->rend,row;
45   PetscTruth   roworiented = A->roworiented;
46 
47   PetscFunctionBegin;
48   for (i=0; i<m; i++) {
49     if (idxm[i] < 0) continue;
50     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
51     if (idxm[i] >= rstart && idxm[i] < rend) {
52       row = idxm[i] - rstart;
53       if (roworiented) {
54         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
55       } else {
56         for (j=0; j<n; j++) {
57           if (idxn[j] < 0) continue;
58           if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
59           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
60         }
61       }
62     } else {
63       if (!A->donotstash) {
64         if (roworiented) {
65           ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr);
66         } else {
67           ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr);
68         }
69       }
70     }
71   }
72   PetscFunctionReturn(0);
73 }
74 
75 #undef __FUNCT__
76 #define __FUNCT__ "MatGetValues_MPIDense"
77 PetscErrorCode MatGetValues_MPIDense(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[])
78 {
79   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
80   PetscErrorCode ierr;
81   int          i,j,rstart = mdn->rstart,rend = mdn->rend,row;
82 
83   PetscFunctionBegin;
84   for (i=0; i<m; i++) {
85     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
86     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
87     if (idxm[i] >= rstart && idxm[i] < rend) {
88       row = idxm[i] - rstart;
89       for (j=0; j<n; j++) {
90         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
91         if (idxn[j] >= mat->N) {
92           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
93         }
94         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
95       }
96     } else {
97       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
98     }
99   }
100   PetscFunctionReturn(0);
101 }
102 
103 #undef __FUNCT__
104 #define __FUNCT__ "MatGetArray_MPIDense"
105 PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[])
106 {
107   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
108   PetscErrorCode ierr;
109 
110   PetscFunctionBegin;
111   ierr = MatGetArray(a->A,array);CHKERRQ(ierr);
112   PetscFunctionReturn(0);
113 }
114 
115 #undef __FUNCT__
116 #define __FUNCT__ "MatGetSubMatrix_MPIDense"
117 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B)
118 {
119   Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd;
120   Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data;
121   PetscErrorCode ierr;
122   int          i,j,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols;
123   PetscScalar  *av,*bv,*v = lmat->v;
124   Mat          newmat;
125 
126   PetscFunctionBegin;
127   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
128   ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr);
129   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
130   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
131 
132   /* No parallel redistribution currently supported! Should really check each index set
133      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
134      original matrix! */
135 
136   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
137   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
138 
139   /* Check submatrix call */
140   if (scall == MAT_REUSE_MATRIX) {
141     /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
142     /* Really need to test rows and column sizes! */
143     newmat = *B;
144   } else {
145     /* Create and fill new matrix */
146     ierr = MatCreate(A->comm,nrows,cs,PETSC_DECIDE,ncols,&newmat);CHKERRQ(ierr);
147     ierr = MatSetType(newmat,A->type_name);CHKERRQ(ierr);
148     ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr);
149   }
150 
151   /* Now extract the data pointers and do the copy, column at a time */
152   newmatd = (Mat_MPIDense*)newmat->data;
153   bv      = ((Mat_SeqDense *)newmatd->A->data)->v;
154 
155   for (i=0; i<ncols; i++) {
156     av = v + nlrows*icol[i];
157     for (j=0; j<nrows; j++) {
158       *bv++ = av[irow[j] - rstart];
159     }
160   }
161 
162   /* Assemble the matrices so that the correct flags are set */
163   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
164   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
165 
166   /* Free work space */
167   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
168   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
169   *B = newmat;
170   PetscFunctionReturn(0);
171 }
172 
173 #undef __FUNCT__
174 #define __FUNCT__ "MatRestoreArray_MPIDense"
175 PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[])
176 {
177   PetscFunctionBegin;
178   PetscFunctionReturn(0);
179 }
180 
181 #undef __FUNCT__
182 #define __FUNCT__ "MatAssemblyBegin_MPIDense"
183 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
184 {
185   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
186   MPI_Comm     comm = mat->comm;
187   PetscErrorCode ierr;
188   int          nstash,reallocs;
189   InsertMode   addv;
190 
191   PetscFunctionBegin;
192   /* make sure all processors are either in INSERTMODE or ADDMODE */
193   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
194   if (addv == (ADD_VALUES|INSERT_VALUES)) {
195     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
196   }
197   mat->insertmode = addv; /* in case this processor had no cache */
198 
199   ierr = MatStashScatterBegin_Private(&mat->stash,mdn->rowners);CHKERRQ(ierr);
200   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
201   PetscLogInfo(mdn->A,"MatAssemblyBegin_MPIDense:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
202   PetscFunctionReturn(0);
203 }
204 
205 #undef __FUNCT__
206 #define __FUNCT__ "MatAssemblyEnd_MPIDense"
207 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
208 {
209   Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data;
210   PetscErrorCode ierr;
211   int          i,n,*row,*col,flg,j,rstart,ncols;
212   PetscScalar  *val;
213   InsertMode   addv=mat->insertmode;
214 
215   PetscFunctionBegin;
216   /*  wait on receives */
217   while (1) {
218     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
219     if (!flg) break;
220 
221     for (i=0; i<n;) {
222       /* Now identify the consecutive vals belonging to the same row */
223       for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
224       if (j < n) ncols = j-i;
225       else       ncols = n-i;
226       /* Now assemble all these values with a single function call */
227       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
228       i = j;
229     }
230   }
231   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
232 
233   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
234   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
235 
236   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
237     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
238   }
239   PetscFunctionReturn(0);
240 }
241 
242 #undef __FUNCT__
243 #define __FUNCT__ "MatZeroEntries_MPIDense"
244 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
245 {
246   PetscErrorCode ierr;
247   Mat_MPIDense *l = (Mat_MPIDense*)A->data;
248 
249   PetscFunctionBegin;
250   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
251   PetscFunctionReturn(0);
252 }
253 
254 #undef __FUNCT__
255 #define __FUNCT__ "MatGetBlockSize_MPIDense"
256 PetscErrorCode MatGetBlockSize_MPIDense(Mat A,int *bs)
257 {
258   PetscFunctionBegin;
259   *bs = 1;
260   PetscFunctionReturn(0);
261 }
262 
263 /* the code does not do the diagonal entries correctly unless the
264    matrix is square and the column and row owerships are identical.
265    This is a BUG. The only way to fix it seems to be to access
266    mdn->A and mdn->B directly and not through the MatZeroRows()
267    routine.
268 */
269 #undef __FUNCT__
270 #define __FUNCT__ "MatZeroRows_MPIDense"
271 PetscErrorCode MatZeroRows_MPIDense(Mat A,IS is,const PetscScalar *diag)
272 {
273   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
274   PetscErrorCode ierr;
275   int            i,N,*rows,*owners = l->rowners,size = l->size;
276   int            *nprocs,j,idx,nsends;
277   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
278   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
279   int            *lens,imdex,*lrows,*values;
280   MPI_Comm       comm = A->comm;
281   MPI_Request    *send_waits,*recv_waits;
282   MPI_Status     recv_status,*send_status;
283   IS             istmp;
284   PetscTruth     found;
285 
286   PetscFunctionBegin;
287   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
288   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
289 
290   /*  first count number of contributors to each processor */
291   ierr  = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
292   ierr  = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
293   ierr  = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
294   for (i=0; i<N; i++) {
295     idx = rows[i];
296     found = PETSC_FALSE;
297     for (j=0; j<size; j++) {
298       if (idx >= owners[j] && idx < owners[j+1]) {
299         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
300       }
301     }
302     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
303   }
304   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
305 
306   /* inform other processors of number of messages and max length*/
307   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
308 
309   /* post receives:   */
310   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
311   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
312   for (i=0; i<nrecvs; i++) {
313     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
314   }
315 
316   /* do sends:
317       1) starts[i] gives the starting index in svalues for stuff going to
318          the ith processor
319   */
320   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
321   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
322   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
323   starts[0]  = 0;
324   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
325   for (i=0; i<N; i++) {
326     svalues[starts[owner[i]]++] = rows[i];
327   }
328   ISRestoreIndices(is,&rows);
329 
330   starts[0] = 0;
331   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
332   count = 0;
333   for (i=0; i<size; i++) {
334     if (nprocs[2*i+1]) {
335       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
336     }
337   }
338   ierr = PetscFree(starts);CHKERRQ(ierr);
339 
340   base = owners[rank];
341 
342   /*  wait on receives */
343   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
344   source = lens + nrecvs;
345   count  = nrecvs; slen = 0;
346   while (count) {
347     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
348     /* unpack receives into our local space */
349     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
350     source[imdex]  = recv_status.MPI_SOURCE;
351     lens[imdex]  = n;
352     slen += n;
353     count--;
354   }
355   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
356 
357   /* move the data into the send scatter */
358   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
359   count = 0;
360   for (i=0; i<nrecvs; i++) {
361     values = rvalues + i*nmax;
362     for (j=0; j<lens[i]; j++) {
363       lrows[count++] = values[j] - base;
364     }
365   }
366   ierr = PetscFree(rvalues);CHKERRQ(ierr);
367   ierr = PetscFree(lens);CHKERRQ(ierr);
368   ierr = PetscFree(owner);CHKERRQ(ierr);
369   ierr = PetscFree(nprocs);CHKERRQ(ierr);
370 
371   /* actually zap the local rows */
372   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
373   PetscLogObjectParent(A,istmp);
374   ierr = PetscFree(lrows);CHKERRQ(ierr);
375   ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
376   ierr = ISDestroy(istmp);CHKERRQ(ierr);
377 
378   /* wait on sends */
379   if (nsends) {
380     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
381     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
382     ierr = PetscFree(send_status);CHKERRQ(ierr);
383   }
384   ierr = PetscFree(send_waits);CHKERRQ(ierr);
385   ierr = PetscFree(svalues);CHKERRQ(ierr);
386 
387   PetscFunctionReturn(0);
388 }
389 
390 #undef __FUNCT__
391 #define __FUNCT__ "MatMult_MPIDense"
392 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
393 {
394   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
395   PetscErrorCode ierr;
396 
397   PetscFunctionBegin;
398   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
399   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
400   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
401   PetscFunctionReturn(0);
402 }
403 
404 #undef __FUNCT__
405 #define __FUNCT__ "MatMultAdd_MPIDense"
406 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
407 {
408   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
409   PetscErrorCode ierr;
410 
411   PetscFunctionBegin;
412   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
413   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
414   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
415   PetscFunctionReturn(0);
416 }
417 
418 #undef __FUNCT__
419 #define __FUNCT__ "MatMultTranspose_MPIDense"
420 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
421 {
422   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
423   PetscErrorCode ierr;
424   PetscScalar  zero = 0.0;
425 
426   PetscFunctionBegin;
427   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
428   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
429   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
430   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
431   PetscFunctionReturn(0);
432 }
433 
434 #undef __FUNCT__
435 #define __FUNCT__ "MatMultTransposeAdd_MPIDense"
436 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
437 {
438   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
439   PetscErrorCode ierr;
440 
441   PetscFunctionBegin;
442   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
443   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
444   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
445   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
446   PetscFunctionReturn(0);
447 }
448 
449 #undef __FUNCT__
450 #define __FUNCT__ "MatGetDiagonal_MPIDense"
451 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
452 {
453   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
454   Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data;
455   PetscErrorCode ierr;
456   int          len,i,n,m = A->m,radd;
457   PetscScalar  *x,zero = 0.0;
458 
459   PetscFunctionBegin;
460   ierr = VecSet(&zero,v);CHKERRQ(ierr);
461   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
462   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
463   if (n != A->M) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
464   len  = PetscMin(a->A->m,a->A->n);
465   radd = a->rstart*m;
466   for (i=0; i<len; i++) {
467     x[i] = aloc->v[radd + i*m + i];
468   }
469   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
470   PetscFunctionReturn(0);
471 }
472 
473 #undef __FUNCT__
474 #define __FUNCT__ "MatDestroy_MPIDense"
475 PetscErrorCode MatDestroy_MPIDense(Mat mat)
476 {
477   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
478   PetscErrorCode ierr;
479 
480   PetscFunctionBegin;
481 
482 #if defined(PETSC_USE_LOG)
483   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
484 #endif
485   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
486   ierr = PetscFree(mdn->rowners);CHKERRQ(ierr);
487   ierr = MatDestroy(mdn->A);CHKERRQ(ierr);
488   if (mdn->lvec)   VecDestroy(mdn->lvec);
489   if (mdn->Mvctx)  VecScatterDestroy(mdn->Mvctx);
490   if (mdn->factor) {
491     if (mdn->factor->temp)   {ierr = PetscFree(mdn->factor->temp);CHKERRQ(ierr);}
492     if (mdn->factor->tag)    {ierr = PetscFree(mdn->factor->tag);CHKERRQ(ierr);}
493     if (mdn->factor->pivots) {ierr = PetscFree(mdn->factor->pivots);CHKERRQ(ierr);}
494     ierr = PetscFree(mdn->factor);CHKERRQ(ierr);
495   }
496   ierr = PetscFree(mdn);CHKERRQ(ierr);
497   PetscFunctionReturn(0);
498 }
499 
500 #undef __FUNCT__
501 #define __FUNCT__ "MatView_MPIDense_Binary"
502 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
503 {
504   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
505   PetscErrorCode ierr;
506 
507   PetscFunctionBegin;
508   if (mdn->size == 1) {
509     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
510   }
511   else SETERRQ(PETSC_ERR_SUP,"Only uniprocessor output supported");
512   PetscFunctionReturn(0);
513 }
514 
515 #undef __FUNCT__
516 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket"
517 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
518 {
519   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
520   PetscErrorCode ierr;
521   int               size = mdn->size,rank = mdn->rank;
522   PetscViewerType   vtype;
523   PetscTruth        iascii,isdraw;
524   PetscViewer       sviewer;
525   PetscViewerFormat format;
526 
527   PetscFunctionBegin;
528   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
529   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
530   if (iascii) {
531     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
532     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
533     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
534       MatInfo info;
535       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
536       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %d nz %d nz alloced %d mem %d \n",rank,mat->m,
537                    (int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
538       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
539       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
540       PetscFunctionReturn(0);
541     } else if (format == PETSC_VIEWER_ASCII_INFO) {
542       PetscFunctionReturn(0);
543     }
544   } else if (isdraw) {
545     PetscDraw       draw;
546     PetscTruth isnull;
547 
548     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
549     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
550     if (isnull) PetscFunctionReturn(0);
551   }
552 
553   if (size == 1) {
554     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
555   } else {
556     /* assemble the entire matrix onto first processor. */
557     Mat               A;
558     int               M = mat->M,N = mat->N,m,row,i,nz;
559     const int         *cols;
560     const PetscScalar *vals;
561 
562     if (!rank) {
563       ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr);
564     } else {
565       ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr);
566     }
567     /* Since this is a temporary matrix, MATMPIDENSE instead of A->type_name here is probably acceptable. */
568     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
569     ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL);
570     PetscLogObjectParent(mat,A);
571 
572     /* Copy the matrix ... This isn't the most efficient means,
573        but it's quick for now */
574     row = mdn->rstart; m = mdn->A->m;
575     for (i=0; i<m; i++) {
576       ierr = MatGetRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
577       ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
578       ierr = MatRestoreRow(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
579       row++;
580     }
581 
582     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
583     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
584     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
585     if (!rank) {
586       ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
587     }
588     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
589     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
590     ierr = MatDestroy(A);CHKERRQ(ierr);
591   }
592   PetscFunctionReturn(0);
593 }
594 
595 #undef __FUNCT__
596 #define __FUNCT__ "MatView_MPIDense"
597 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
598 {
599   PetscErrorCode ierr;
600   PetscTruth iascii,isbinary,isdraw,issocket;
601 
602   PetscFunctionBegin;
603 
604   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
605   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
606   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
607   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
608 
609   if (iascii || issocket || isdraw) {
610     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
611   } else if (isbinary) {
612     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
613   } else {
614     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name);
615   }
616   PetscFunctionReturn(0);
617 }
618 
619 #undef __FUNCT__
620 #define __FUNCT__ "MatGetInfo_MPIDense"
621 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
622 {
623   Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
624   Mat          mdn = mat->A;
625   PetscErrorCode ierr;
626   PetscReal    isend[5],irecv[5];
627 
628   PetscFunctionBegin;
629   info->rows_global    = (double)A->M;
630   info->columns_global = (double)A->N;
631   info->rows_local     = (double)A->m;
632   info->columns_local  = (double)A->N;
633   info->block_size     = 1.0;
634   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
635   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
636   isend[3] = info->memory;  isend[4] = info->mallocs;
637   if (flag == MAT_LOCAL) {
638     info->nz_used      = isend[0];
639     info->nz_allocated = isend[1];
640     info->nz_unneeded  = isend[2];
641     info->memory       = isend[3];
642     info->mallocs      = isend[4];
643   } else if (flag == MAT_GLOBAL_MAX) {
644     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr);
645     info->nz_used      = irecv[0];
646     info->nz_allocated = irecv[1];
647     info->nz_unneeded  = irecv[2];
648     info->memory       = irecv[3];
649     info->mallocs      = irecv[4];
650   } else if (flag == MAT_GLOBAL_SUM) {
651     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr);
652     info->nz_used      = irecv[0];
653     info->nz_allocated = irecv[1];
654     info->nz_unneeded  = irecv[2];
655     info->memory       = irecv[3];
656     info->mallocs      = irecv[4];
657   }
658   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
659   info->fill_ratio_needed = 0;
660   info->factor_mallocs    = 0;
661   PetscFunctionReturn(0);
662 }
663 
664 #undef __FUNCT__
665 #define __FUNCT__ "MatSetOption_MPIDense"
666 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op)
667 {
668   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
669   PetscErrorCode ierr;
670 
671   PetscFunctionBegin;
672   switch (op) {
673   case MAT_NO_NEW_NONZERO_LOCATIONS:
674   case MAT_YES_NEW_NONZERO_LOCATIONS:
675   case MAT_NEW_NONZERO_LOCATION_ERR:
676   case MAT_NEW_NONZERO_ALLOCATION_ERR:
677   case MAT_COLUMNS_SORTED:
678   case MAT_COLUMNS_UNSORTED:
679     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
680     break;
681   case MAT_ROW_ORIENTED:
682     a->roworiented = PETSC_TRUE;
683     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
684     break;
685   case MAT_ROWS_SORTED:
686   case MAT_ROWS_UNSORTED:
687   case MAT_YES_NEW_DIAGONALS:
688   case MAT_USE_HASH_TABLE:
689     PetscLogInfo(A,"MatSetOption_MPIDense:Option ignored\n");
690     break;
691   case MAT_COLUMN_ORIENTED:
692     a->roworiented = PETSC_FALSE;
693     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
694     break;
695   case MAT_IGNORE_OFF_PROC_ENTRIES:
696     a->donotstash = PETSC_TRUE;
697     break;
698   case MAT_NO_NEW_DIAGONALS:
699     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
700   case MAT_SYMMETRIC:
701   case MAT_STRUCTURALLY_SYMMETRIC:
702   case MAT_NOT_SYMMETRIC:
703   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
704   case MAT_HERMITIAN:
705   case MAT_NOT_HERMITIAN:
706   case MAT_SYMMETRY_ETERNAL:
707   case MAT_NOT_SYMMETRY_ETERNAL:
708     break;
709   default:
710     SETERRQ(PETSC_ERR_SUP,"unknown option");
711   }
712   PetscFunctionReturn(0);
713 }
714 
715 #undef __FUNCT__
716 #define __FUNCT__ "MatGetRow_MPIDense"
717 PetscErrorCode MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,PetscScalar **v)
718 {
719   Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
720   PetscErrorCode ierr;
721   int          lrow,rstart = mat->rstart,rend = mat->rend;
722 
723   PetscFunctionBegin;
724   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows")
725   lrow = row - rstart;
726   ierr = MatGetRow(mat->A,lrow,nz,(const int **)idx,(const PetscScalar **)v);CHKERRQ(ierr);
727   PetscFunctionReturn(0);
728 }
729 
730 #undef __FUNCT__
731 #define __FUNCT__ "MatRestoreRow_MPIDense"
732 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
733 {
734   PetscErrorCode ierr;
735 
736   PetscFunctionBegin;
737   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
738   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
739   PetscFunctionReturn(0);
740 }
741 
742 #undef __FUNCT__
743 #define __FUNCT__ "MatDiagonalScale_MPIDense"
744 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
745 {
746   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
747   Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
748   PetscScalar  *l,*r,x,*v;
749   PetscErrorCode ierr;
750   int          i,j,s2a,s3a,s2,s3,m=mdn->A->m,n=mdn->A->n;
751 
752   PetscFunctionBegin;
753   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
754   if (ll) {
755     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
756     if (s2a != s2) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size");
757     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
758     for (i=0; i<m; i++) {
759       x = l[i];
760       v = mat->v + i;
761       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
762     }
763     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
764     PetscLogFlops(n*m);
765   }
766   if (rr) {
767     ierr = VecGetSize(rr,&s3a);CHKERRQ(ierr);
768     if (s3a != s3) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size");
769     ierr = VecScatterBegin(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
770     ierr = VecScatterEnd(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
771     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
772     for (i=0; i<n; i++) {
773       x = r[i];
774       v = mat->v + i*m;
775       for (j=0; j<m; j++) { (*v++) *= x;}
776     }
777     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
778     PetscLogFlops(n*m);
779   }
780   PetscFunctionReturn(0);
781 }
782 
783 #undef __FUNCT__
784 #define __FUNCT__ "MatNorm_MPIDense"
785 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
786 {
787   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
788   Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
789   PetscErrorCode ierr;
790   int          i,j;
791   PetscReal    sum = 0.0;
792   PetscScalar  *v = mat->v;
793 
794   PetscFunctionBegin;
795   if (mdn->size == 1) {
796     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
797   } else {
798     if (type == NORM_FROBENIUS) {
799       for (i=0; i<mdn->A->n*mdn->A->m; i++) {
800 #if defined(PETSC_USE_COMPLEX)
801         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
802 #else
803         sum += (*v)*(*v); v++;
804 #endif
805       }
806       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr);
807       *nrm = sqrt(*nrm);
808       PetscLogFlops(2*mdn->A->n*mdn->A->m);
809     } else if (type == NORM_1) {
810       PetscReal *tmp,*tmp2;
811       ierr = PetscMalloc(2*A->N*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
812       tmp2 = tmp + A->N;
813       ierr = PetscMemzero(tmp,2*A->N*sizeof(PetscReal));CHKERRQ(ierr);
814       *nrm = 0.0;
815       v = mat->v;
816       for (j=0; j<mdn->A->n; j++) {
817         for (i=0; i<mdn->A->m; i++) {
818           tmp[j] += PetscAbsScalar(*v);  v++;
819         }
820       }
821       ierr = MPI_Allreduce(tmp,tmp2,A->N,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr);
822       for (j=0; j<A->N; j++) {
823         if (tmp2[j] > *nrm) *nrm = tmp2[j];
824       }
825       ierr = PetscFree(tmp);CHKERRQ(ierr);
826       PetscLogFlops(A->n*A->m);
827     } else if (type == NORM_INFINITY) { /* max row norm */
828       PetscReal ntemp;
829       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
830       ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr);
831     } else {
832       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
833     }
834   }
835   PetscFunctionReturn(0);
836 }
837 
838 #undef __FUNCT__
839 #define __FUNCT__ "MatTranspose_MPIDense"
840 PetscErrorCode MatTranspose_MPIDense(Mat A,Mat *matout)
841 {
842   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
843   Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data;
844   Mat          B;
845   int          M = A->M,N = A->N,m,n,*rwork,rstart = a->rstart;
846   PetscErrorCode ierr;
847   int          j,i;
848   PetscScalar  *v;
849 
850   PetscFunctionBegin;
851   if (!matout && M != N) {
852     SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place");
853   }
854   ierr = MatCreate(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,&B);CHKERRQ(ierr);
855   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
856   ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr);
857 
858   m = a->A->m; n = a->A->n; v = Aloc->v;
859   ierr = PetscMalloc(n*sizeof(int),&rwork);CHKERRQ(ierr);
860   for (j=0; j<n; j++) {
861     for (i=0; i<m; i++) rwork[i] = rstart + i;
862     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
863     v   += m;
864   }
865   ierr = PetscFree(rwork);CHKERRQ(ierr);
866   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
867   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
868   if (matout) {
869     *matout = B;
870   } else {
871     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
872   }
873   PetscFunctionReturn(0);
874 }
875 
876 #include "petscblaslapack.h"
877 #undef __FUNCT__
878 #define __FUNCT__ "MatScale_MPIDense"
879 PetscErrorCode MatScale_MPIDense(const PetscScalar *alpha,Mat inA)
880 {
881   Mat_MPIDense *A = (Mat_MPIDense*)inA->data;
882   Mat_SeqDense *a = (Mat_SeqDense*)A->A->data;
883   int          one = 1,nz;
884 
885   PetscFunctionBegin;
886   nz = inA->m*inA->N;
887   BLscal_(&nz,(PetscScalar*)alpha,a->v,&one);
888   PetscLogFlops(nz);
889   PetscFunctionReturn(0);
890 }
891 
892 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
893 
894 #undef __FUNCT__
895 #define __FUNCT__ "MatSetUpPreallocation_MPIDense"
896 PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A)
897 {
898   PetscErrorCode ierr;
899 
900   PetscFunctionBegin;
901   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
902   PetscFunctionReturn(0);
903 }
904 
905 /* -------------------------------------------------------------------*/
906 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
907        MatGetRow_MPIDense,
908        MatRestoreRow_MPIDense,
909        MatMult_MPIDense,
910 /* 4*/ MatMultAdd_MPIDense,
911        MatMultTranspose_MPIDense,
912        MatMultTransposeAdd_MPIDense,
913        0,
914        0,
915        0,
916 /*10*/ 0,
917        0,
918        0,
919        0,
920        MatTranspose_MPIDense,
921 /*15*/ MatGetInfo_MPIDense,
922        0,
923        MatGetDiagonal_MPIDense,
924        MatDiagonalScale_MPIDense,
925        MatNorm_MPIDense,
926 /*20*/ MatAssemblyBegin_MPIDense,
927        MatAssemblyEnd_MPIDense,
928        0,
929        MatSetOption_MPIDense,
930        MatZeroEntries_MPIDense,
931 /*25*/ MatZeroRows_MPIDense,
932        0,
933        0,
934        0,
935        0,
936 /*30*/ MatSetUpPreallocation_MPIDense,
937        0,
938        0,
939        MatGetArray_MPIDense,
940        MatRestoreArray_MPIDense,
941 /*35*/ MatDuplicate_MPIDense,
942        0,
943        0,
944        0,
945        0,
946 /*40*/ 0,
947        MatGetSubMatrices_MPIDense,
948        0,
949        MatGetValues_MPIDense,
950        0,
951 /*45*/ 0,
952        MatScale_MPIDense,
953        0,
954        0,
955        0,
956 /*50*/ MatGetBlockSize_MPIDense,
957        0,
958        0,
959        0,
960        0,
961 /*55*/ 0,
962        0,
963        0,
964        0,
965        0,
966 /*60*/ MatGetSubMatrix_MPIDense,
967        MatDestroy_MPIDense,
968        MatView_MPIDense,
969        MatGetPetscMaps_Petsc,
970        0,
971 /*65*/ 0,
972        0,
973        0,
974        0,
975        0,
976 /*70*/ 0,
977        0,
978        0,
979        0,
980        0,
981 /*75*/ 0,
982        0,
983        0,
984        0,
985        0,
986 /*80*/ 0,
987        0,
988        0,
989        0,
990 /*85*/ MatLoad_MPIDense};
991 
992 EXTERN_C_BEGIN
993 #undef __FUNCT__
994 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense"
995 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
996 {
997   Mat_MPIDense *a;
998   PetscErrorCode ierr;
999 
1000   PetscFunctionBegin;
1001   mat->preallocated = PETSC_TRUE;
1002   /* Note:  For now, when data is specified above, this assumes the user correctly
1003    allocates the local dense storage space.  We should add error checking. */
1004 
1005   a    = (Mat_MPIDense*)mat->data;
1006   ierr = MatCreate(PETSC_COMM_SELF,mat->m,mat->N,mat->m,mat->N,&a->A);CHKERRQ(ierr);
1007   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
1008   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1009   PetscLogObjectParent(mat,a->A);
1010   PetscFunctionReturn(0);
1011 }
1012 EXTERN_C_END
1013 
1014 /*MC
1015    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1016 
1017    Options Database Keys:
1018 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions()
1019 
1020   Level: beginner
1021 
1022 .seealso: MatCreateMPIDense
1023 M*/
1024 
1025 EXTERN_C_BEGIN
1026 #undef __FUNCT__
1027 #define __FUNCT__ "MatCreate_MPIDense"
1028 PetscErrorCode MatCreate_MPIDense(Mat mat)
1029 {
1030   Mat_MPIDense *a;
1031   PetscErrorCode ierr;
1032   int          i;
1033 
1034   PetscFunctionBegin;
1035   ierr              = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr);
1036   mat->data         = (void*)a;
1037   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1038   mat->factor       = 0;
1039   mat->mapping      = 0;
1040 
1041   a->factor       = 0;
1042   mat->insertmode = NOT_SET_VALUES;
1043   ierr = MPI_Comm_rank(mat->comm,&a->rank);CHKERRQ(ierr);
1044   ierr = MPI_Comm_size(mat->comm,&a->size);CHKERRQ(ierr);
1045 
1046   ierr = PetscSplitOwnership(mat->comm,&mat->m,&mat->M);CHKERRQ(ierr);
1047   ierr = PetscSplitOwnership(mat->comm,&mat->n,&mat->N);CHKERRQ(ierr);
1048   a->nvec = mat->n;
1049 
1050   /* the information in the maps duplicates the information computed below, eventually
1051      we should remove the duplicate information that is not contained in the maps */
1052   ierr = PetscMapCreateMPI(mat->comm,mat->m,mat->M,&mat->rmap);CHKERRQ(ierr);
1053   ierr = PetscMapCreateMPI(mat->comm,mat->n,mat->N,&mat->cmap);CHKERRQ(ierr);
1054 
1055   /* build local table of row and column ownerships */
1056   ierr       = PetscMalloc(2*(a->size+2)*sizeof(int),&a->rowners);CHKERRQ(ierr);
1057   a->cowners = a->rowners + a->size + 1;
1058   PetscLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
1059   ierr = MPI_Allgather(&mat->m,1,MPI_INT,a->rowners+1,1,MPI_INT,mat->comm);CHKERRQ(ierr);
1060   a->rowners[0] = 0;
1061   for (i=2; i<=a->size; i++) {
1062     a->rowners[i] += a->rowners[i-1];
1063   }
1064   a->rstart = a->rowners[a->rank];
1065   a->rend   = a->rowners[a->rank+1];
1066   ierr      = MPI_Allgather(&mat->n,1,MPI_INT,a->cowners+1,1,MPI_INT,mat->comm);CHKERRQ(ierr);
1067   a->cowners[0] = 0;
1068   for (i=2; i<=a->size; i++) {
1069     a->cowners[i] += a->cowners[i-1];
1070   }
1071 
1072   /* build cache for off array entries formed */
1073   a->donotstash = PETSC_FALSE;
1074   ierr = MatStashCreate_Private(mat->comm,1,&mat->stash);CHKERRQ(ierr);
1075 
1076   /* stuff used for matrix vector multiply */
1077   a->lvec        = 0;
1078   a->Mvctx       = 0;
1079   a->roworiented = PETSC_TRUE;
1080 
1081   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
1082                                      "MatGetDiagonalBlock_MPIDense",
1083                                      MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr);
1084   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C",
1085                                      "MatMPIDenseSetPreallocation_MPIDense",
1086                                      MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1087   PetscFunctionReturn(0);
1088 }
1089 EXTERN_C_END
1090 
1091 /*MC
1092    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1093 
1094    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1095    and MATMPIDENSE otherwise.
1096 
1097    Options Database Keys:
1098 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1099 
1100   Level: beginner
1101 
1102 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1103 M*/
1104 
1105 EXTERN_C_BEGIN
1106 #undef __FUNCT__
1107 #define __FUNCT__ "MatCreate_Dense"
1108 PetscErrorCode MatCreate_Dense(Mat A)
1109 {
1110   PetscErrorCode ierr;
1111   int size;
1112 
1113   PetscFunctionBegin;
1114   ierr = PetscObjectChangeTypeName((PetscObject)A,MATDENSE);CHKERRQ(ierr);
1115   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1116   if (size == 1) {
1117     ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr);
1118   } else {
1119     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
1120   }
1121   PetscFunctionReturn(0);
1122 }
1123 EXTERN_C_END
1124 
1125 #undef __FUNCT__
1126 #define __FUNCT__ "MatMPIDenseSetPreallocation"
1127 /*@C
1128    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1129 
1130    Not collective
1131 
1132    Input Parameters:
1133 .  A - the matrix
1134 -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
1135    to control all matrix memory allocation.
1136 
1137    Notes:
1138    The dense format is fully compatible with standard Fortran 77
1139    storage by columns.
1140 
1141    The data input variable is intended primarily for Fortran programmers
1142    who wish to allocate their own matrix memory space.  Most users should
1143    set data=PETSC_NULL.
1144 
1145    Level: intermediate
1146 
1147 .keywords: matrix,dense, parallel
1148 
1149 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1150 @*/
1151 PetscErrorCode MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data)
1152 {
1153   PetscErrorCode ierr,(*f)(Mat,PetscScalar *);
1154 
1155   PetscFunctionBegin;
1156   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1157   if (f) {
1158     ierr = (*f)(mat,data);CHKERRQ(ierr);
1159   }
1160   PetscFunctionReturn(0);
1161 }
1162 
1163 #undef __FUNCT__
1164 #define __FUNCT__ "MatCreateMPIDense"
1165 /*@C
1166    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
1167 
1168    Collective on MPI_Comm
1169 
1170    Input Parameters:
1171 +  comm - MPI communicator
1172 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1173 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1174 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1175 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1176 -  data - optional location of matrix data.  Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1177    to control all matrix memory allocation.
1178 
1179    Output Parameter:
1180 .  A - the matrix
1181 
1182    Notes:
1183    The dense format is fully compatible with standard Fortran 77
1184    storage by columns.
1185 
1186    The data input variable is intended primarily for Fortran programmers
1187    who wish to allocate their own matrix memory space.  Most users should
1188    set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users).
1189 
1190    The user MUST specify either the local or global matrix dimensions
1191    (possibly both).
1192 
1193    Level: intermediate
1194 
1195 .keywords: matrix,dense, parallel
1196 
1197 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1198 @*/
1199 PetscErrorCode MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,PetscScalar *data,Mat *A)
1200 {
1201   PetscErrorCode ierr;
1202   int size;
1203 
1204   PetscFunctionBegin;
1205   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
1206   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1207   if (size > 1) {
1208     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1209     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1210   } else {
1211     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1212     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1213   }
1214   PetscFunctionReturn(0);
1215 }
1216 
1217 #undef __FUNCT__
1218 #define __FUNCT__ "MatDuplicate_MPIDense"
1219 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1220 {
1221   Mat          mat;
1222   Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data;
1223   PetscErrorCode ierr;
1224 
1225   PetscFunctionBegin;
1226   *newmat       = 0;
1227   ierr = MatCreate(A->comm,A->m,A->n,A->M,A->N,&mat);CHKERRQ(ierr);
1228   ierr = MatSetType(mat,A->type_name);CHKERRQ(ierr);
1229   ierr              = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr);
1230   mat->data         = (void*)a;
1231   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1232   mat->factor       = A->factor;
1233   mat->assembled    = PETSC_TRUE;
1234   mat->preallocated = PETSC_TRUE;
1235 
1236   a->rstart       = oldmat->rstart;
1237   a->rend         = oldmat->rend;
1238   a->size         = oldmat->size;
1239   a->rank         = oldmat->rank;
1240   mat->insertmode = NOT_SET_VALUES;
1241   a->nvec         = oldmat->nvec;
1242   a->donotstash   = oldmat->donotstash;
1243   ierr            = PetscMalloc((a->size+1)*sizeof(int),&a->rowners);CHKERRQ(ierr);
1244   PetscLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
1245   ierr = PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));CHKERRQ(ierr);
1246   ierr = MatStashCreate_Private(A->comm,1,&mat->stash);CHKERRQ(ierr);
1247 
1248   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1249   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1250   PetscLogObjectParent(mat,a->A);
1251   *newmat = mat;
1252   PetscFunctionReturn(0);
1253 }
1254 
1255 #include "petscsys.h"
1256 
1257 #undef __FUNCT__
1258 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile"
1259 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M,int N,const MatType type,Mat *newmat)
1260 {
1261   PetscErrorCode ierr;
1262   int          *rowners,i,size,rank,m,nz,j;
1263   PetscScalar  *array,*vals,*vals_ptr;
1264   MPI_Status   status;
1265 
1266   PetscFunctionBegin;
1267   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1268   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1269 
1270   /* determine ownership of all rows */
1271   m          = M/size + ((M % size) > rank);
1272   ierr       = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
1273   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1274   rowners[0] = 0;
1275   for (i=2; i<=size; i++) {
1276     rowners[i] += rowners[i-1];
1277   }
1278 
1279   ierr = MatCreate(comm,m,PETSC_DECIDE,M,N,newmat);CHKERRQ(ierr);
1280   ierr = MatSetType(*newmat,type);CHKERRQ(ierr);
1281   ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr);
1282   ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr);
1283 
1284   if (!rank) {
1285     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1286 
1287     /* read in my part of the matrix numerical values  */
1288     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1289 
1290     /* insert into matrix-by row (this is why cannot directly read into array */
1291     vals_ptr = vals;
1292     for (i=0; i<m; i++) {
1293       for (j=0; j<N; j++) {
1294         array[i + j*m] = *vals_ptr++;
1295       }
1296     }
1297 
1298     /* read in other processors and ship out */
1299     for (i=1; i<size; i++) {
1300       nz   = (rowners[i+1] - rowners[i])*N;
1301       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1302       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr);
1303     }
1304   } else {
1305     /* receive numeric values */
1306     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1307 
1308     /* receive message of values*/
1309     ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr);
1310 
1311     /* insert into matrix-by row (this is why cannot directly read into array */
1312     vals_ptr = vals;
1313     for (i=0; i<m; i++) {
1314       for (j=0; j<N; j++) {
1315         array[i + j*m] = *vals_ptr++;
1316       }
1317     }
1318   }
1319   ierr = PetscFree(rowners);CHKERRQ(ierr);
1320   ierr = PetscFree(vals);CHKERRQ(ierr);
1321   ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1322   ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1323   PetscFunctionReturn(0);
1324 }
1325 
1326 #undef __FUNCT__
1327 #define __FUNCT__ "MatLoad_MPIDense"
1328 PetscErrorCode MatLoad_MPIDense(PetscViewer viewer,const MatType type,Mat *newmat)
1329 {
1330   Mat          A;
1331   PetscScalar  *vals,*svals;
1332   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1333   MPI_Status   status;
1334   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1335   int          *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1336   int          tag = ((PetscObject)viewer)->tag;
1337   int          i,nz,j,rstart,rend,fd;
1338   PetscErrorCode ierr;
1339 
1340   PetscFunctionBegin;
1341   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1342   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1343   if (!rank) {
1344     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1345     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1346     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1347   }
1348 
1349   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1350   M = header[1]; N = header[2]; nz = header[3];
1351 
1352   /*
1353        Handle case where matrix is stored on disk as a dense matrix
1354   */
1355   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1356     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,type,newmat);CHKERRQ(ierr);
1357     PetscFunctionReturn(0);
1358   }
1359 
1360   /* determine ownership of all rows */
1361   m          = M/size + ((M % size) > rank);
1362   ierr       = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
1363   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1364   rowners[0] = 0;
1365   for (i=2; i<=size; i++) {
1366     rowners[i] += rowners[i-1];
1367   }
1368   rstart = rowners[rank];
1369   rend   = rowners[rank+1];
1370 
1371   /* distribute row lengths to all processors */
1372   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr);
1373   offlens = ourlens + (rend-rstart);
1374   if (!rank) {
1375     ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr);
1376     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1377     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
1378     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1379     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1380     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1381   } else {
1382     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1383   }
1384 
1385   if (!rank) {
1386     /* calculate the number of nonzeros on each processor */
1387     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
1388     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
1389     for (i=0; i<size; i++) {
1390       for (j=rowners[i]; j< rowners[i+1]; j++) {
1391         procsnz[i] += rowlengths[j];
1392       }
1393     }
1394     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1395 
1396     /* determine max buffer needed and allocate it */
1397     maxnz = 0;
1398     for (i=0; i<size; i++) {
1399       maxnz = PetscMax(maxnz,procsnz[i]);
1400     }
1401     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
1402 
1403     /* read in my part of the matrix column indices  */
1404     nz = procsnz[0];
1405     ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr);
1406     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1407 
1408     /* read in every one elses and ship off */
1409     for (i=1; i<size; i++) {
1410       nz   = procsnz[i];
1411       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1412       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1413     }
1414     ierr = PetscFree(cols);CHKERRQ(ierr);
1415   } else {
1416     /* determine buffer space needed for message */
1417     nz = 0;
1418     for (i=0; i<m; i++) {
1419       nz += ourlens[i];
1420     }
1421     ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr);
1422 
1423     /* receive message of column indices*/
1424     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
1425     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
1426     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1427   }
1428 
1429   /* loop over local rows, determining number of off diagonal entries */
1430   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
1431   jj = 0;
1432   for (i=0; i<m; i++) {
1433     for (j=0; j<ourlens[i]; j++) {
1434       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1435       jj++;
1436     }
1437   }
1438 
1439   /* create our matrix */
1440   for (i=0; i<m; i++) {
1441     ourlens[i] -= offlens[i];
1442   }
1443   ierr = MatCreate(comm,m,PETSC_DECIDE,M,N,newmat);CHKERRQ(ierr);
1444   ierr = MatSetType(*newmat,type);CHKERRQ(ierr);
1445   ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr);
1446   A = *newmat;
1447   for (i=0; i<m; i++) {
1448     ourlens[i] += offlens[i];
1449   }
1450 
1451   if (!rank) {
1452     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1453 
1454     /* read in my part of the matrix numerical values  */
1455     nz = procsnz[0];
1456     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1457 
1458     /* insert into matrix */
1459     jj      = rstart;
1460     smycols = mycols;
1461     svals   = vals;
1462     for (i=0; i<m; i++) {
1463       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1464       smycols += ourlens[i];
1465       svals   += ourlens[i];
1466       jj++;
1467     }
1468 
1469     /* read in other processors and ship out */
1470     for (i=1; i<size; i++) {
1471       nz   = procsnz[i];
1472       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1473       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1474     }
1475     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1476   } else {
1477     /* receive numeric values */
1478     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1479 
1480     /* receive message of values*/
1481     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1482     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1483     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1484 
1485     /* insert into matrix */
1486     jj      = rstart;
1487     smycols = mycols;
1488     svals   = vals;
1489     for (i=0; i<m; i++) {
1490       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1491       smycols += ourlens[i];
1492       svals   += ourlens[i];
1493       jj++;
1494     }
1495   }
1496   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1497   ierr = PetscFree(vals);CHKERRQ(ierr);
1498   ierr = PetscFree(mycols);CHKERRQ(ierr);
1499   ierr = PetscFree(rowners);CHKERRQ(ierr);
1500 
1501   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1502   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1503   PetscFunctionReturn(0);
1504 }
1505 
1506 
1507 
1508 
1509 
1510