xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision be1d678a52e6eff2808b2fa31ae986cdbf03c9fe)
1 #define PETSCMAT_DLL
2 
3 /*
4    Basic functions for basic parallel dense matrices.
5 */
6 
7 #include "src/mat/impls/dense/mpi/mpidense.h"
8 
9 #undef __FUNCT__
10 #define __FUNCT__ "MatGetRow_MPIDense"
11 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
12 {
13   Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
14   PetscErrorCode ierr;
15   PetscInt          lrow,rstart = mat->rstart,rend = mat->rend;
16 
17   PetscFunctionBegin;
18   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows")
19   lrow = row - rstart;
20   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr);
21   PetscFunctionReturn(0);
22 }
23 
24 #undef __FUNCT__
25 #define __FUNCT__ "MatRestoreRow_MPIDense"
26 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
27 {
28   PetscErrorCode ierr;
29 
30   PetscFunctionBegin;
31   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
32   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
33   PetscFunctionReturn(0);
34 }
35 
36 EXTERN_C_BEGIN
37 #undef __FUNCT__
38 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense"
39 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B)
40 {
41   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
42   PetscErrorCode ierr;
43   PetscInt          m = A->m,rstart = mdn->rstart;
44   PetscScalar  *array;
45   MPI_Comm     comm;
46 
47   PetscFunctionBegin;
48   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported.");
49 
50   /* The reuse aspect is not implemented efficiently */
51   if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);}
52 
53   ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
54   ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr);
55   ierr = MatCreate(comm,m,m,m,m,B);CHKERRQ(ierr);
56   ierr = MatSetType(*B,mdn->A->type_name);CHKERRQ(ierr);
57   ierr = MatSeqDenseSetPreallocation(*B,array+m*rstart);CHKERRQ(ierr);
58   ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr);
59   ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
60   ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
61 
62   *iscopy = PETSC_TRUE;
63   PetscFunctionReturn(0);
64 }
65 EXTERN_C_END
66 
67 #undef __FUNCT__
68 #define __FUNCT__ "MatSetValues_MPIDense"
69 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
70 {
71   Mat_MPIDense *A = (Mat_MPIDense*)mat->data;
72   PetscErrorCode ierr;
73   PetscInt          i,j,rstart = A->rstart,rend = A->rend,row;
74   PetscTruth   roworiented = A->roworiented;
75 
76   PetscFunctionBegin;
77   for (i=0; i<m; i++) {
78     if (idxm[i] < 0) continue;
79     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
80     if (idxm[i] >= rstart && idxm[i] < rend) {
81       row = idxm[i] - rstart;
82       if (roworiented) {
83         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
84       } else {
85         for (j=0; j<n; j++) {
86           if (idxn[j] < 0) continue;
87           if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
88           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
89         }
90       }
91     } else {
92       if (!A->donotstash) {
93         if (roworiented) {
94           ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr);
95         } else {
96           ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr);
97         }
98       }
99     }
100   }
101   PetscFunctionReturn(0);
102 }
103 
104 #undef __FUNCT__
105 #define __FUNCT__ "MatGetValues_MPIDense"
106 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
107 {
108   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
109   PetscErrorCode ierr;
110   PetscInt          i,j,rstart = mdn->rstart,rend = mdn->rend,row;
111 
112   PetscFunctionBegin;
113   for (i=0; i<m; i++) {
114     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
115     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
116     if (idxm[i] >= rstart && idxm[i] < rend) {
117       row = idxm[i] - rstart;
118       for (j=0; j<n; j++) {
119         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
120         if (idxn[j] >= mat->N) {
121           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
122         }
123         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
124       }
125     } else {
126       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
127     }
128   }
129   PetscFunctionReturn(0);
130 }
131 
132 #undef __FUNCT__
133 #define __FUNCT__ "MatGetArray_MPIDense"
134 PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[])
135 {
136   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
137   PetscErrorCode ierr;
138 
139   PetscFunctionBegin;
140   ierr = MatGetArray(a->A,array);CHKERRQ(ierr);
141   PetscFunctionReturn(0);
142 }
143 
144 #undef __FUNCT__
145 #define __FUNCT__ "MatGetSubMatrix_MPIDense"
146 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B)
147 {
148   Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd;
149   Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data;
150   PetscErrorCode ierr;
151   PetscInt          i,j,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols;
152   PetscScalar  *av,*bv,*v = lmat->v;
153   Mat          newmat;
154 
155   PetscFunctionBegin;
156   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
157   ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr);
158   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
159   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
160 
161   /* No parallel redistribution currently supported! Should really check each index set
162      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
163      original matrix! */
164 
165   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
166   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
167 
168   /* Check submatrix call */
169   if (scall == MAT_REUSE_MATRIX) {
170     /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
171     /* Really need to test rows and column sizes! */
172     newmat = *B;
173   } else {
174     /* Create and fill new matrix */
175     ierr = MatCreate(A->comm,nrows,cs,PETSC_DECIDE,ncols,&newmat);CHKERRQ(ierr);
176     ierr = MatSetType(newmat,A->type_name);CHKERRQ(ierr);
177     ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr);
178   }
179 
180   /* Now extract the data pointers and do the copy, column at a time */
181   newmatd = (Mat_MPIDense*)newmat->data;
182   bv      = ((Mat_SeqDense *)newmatd->A->data)->v;
183 
184   for (i=0; i<ncols; i++) {
185     av = v + nlrows*icol[i];
186     for (j=0; j<nrows; j++) {
187       *bv++ = av[irow[j] - rstart];
188     }
189   }
190 
191   /* Assemble the matrices so that the correct flags are set */
192   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
193   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
194 
195   /* Free work space */
196   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
197   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
198   *B = newmat;
199   PetscFunctionReturn(0);
200 }
201 
202 #undef __FUNCT__
203 #define __FUNCT__ "MatRestoreArray_MPIDense"
204 PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[])
205 {
206   PetscFunctionBegin;
207   PetscFunctionReturn(0);
208 }
209 
210 #undef __FUNCT__
211 #define __FUNCT__ "MatAssemblyBegin_MPIDense"
212 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
213 {
214   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
215   MPI_Comm     comm = mat->comm;
216   PetscErrorCode ierr;
217   PetscInt          nstash,reallocs;
218   InsertMode   addv;
219 
220   PetscFunctionBegin;
221   /* make sure all processors are either in INSERTMODE or ADDMODE */
222   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
223   if (addv == (ADD_VALUES|INSERT_VALUES)) {
224     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
225   }
226   mat->insertmode = addv; /* in case this processor had no cache */
227 
228   ierr = MatStashScatterBegin_Private(&mat->stash,mdn->rowners);CHKERRQ(ierr);
229   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
230   ierr = PetscLogInfo((mdn->A,"MatAssemblyBegin_MPIDense:Stash has %D entries, uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr);
231   PetscFunctionReturn(0);
232 }
233 
234 #undef __FUNCT__
235 #define __FUNCT__ "MatAssemblyEnd_MPIDense"
236 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
237 {
238   Mat_MPIDense    *mdn=(Mat_MPIDense*)mat->data;
239   PetscErrorCode  ierr;
240   PetscInt        i,*row,*col,flg,j,rstart,ncols;
241   PetscMPIInt     n;
242   PetscScalar     *val;
243   InsertMode      addv=mat->insertmode;
244 
245   PetscFunctionBegin;
246   /*  wait on receives */
247   while (1) {
248     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
249     if (!flg) break;
250 
251     for (i=0; i<n;) {
252       /* Now identify the consecutive vals belonging to the same row */
253       for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
254       if (j < n) ncols = j-i;
255       else       ncols = n-i;
256       /* Now assemble all these values with a single function call */
257       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
258       i = j;
259     }
260   }
261   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
262 
263   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
264   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
265 
266   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
267     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
268   }
269   PetscFunctionReturn(0);
270 }
271 
272 #undef __FUNCT__
273 #define __FUNCT__ "MatZeroEntries_MPIDense"
274 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
275 {
276   PetscErrorCode ierr;
277   Mat_MPIDense *l = (Mat_MPIDense*)A->data;
278 
279   PetscFunctionBegin;
280   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
281   PetscFunctionReturn(0);
282 }
283 
284 /* the code does not do the diagonal entries correctly unless the
285    matrix is square and the column and row owerships are identical.
286    This is a BUG. The only way to fix it seems to be to access
287    mdn->A and mdn->B directly and not through the MatZeroRows()
288    routine.
289 */
290 #undef __FUNCT__
291 #define __FUNCT__ "MatZeroRows_MPIDense"
292 PetscErrorCode MatZeroRows_MPIDense(Mat A,IS is,const PetscScalar *diag)
293 {
294   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
295   PetscErrorCode ierr;
296   PetscInt       i,N,*rows,*owners = l->rowners;
297   PetscInt       *nprocs,j,idx,nsends;
298   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
299   PetscInt       *rvalues,tag = A->tag,count,base,slen,*source;
300   PetscInt       *lens,*lrows,*values;
301   PetscMPIInt    n,imdex,rank = l->rank,size = l->size;
302   MPI_Comm       comm = A->comm;
303   MPI_Request    *send_waits,*recv_waits;
304   MPI_Status     recv_status,*send_status;
305   IS             istmp;
306   PetscTruth     found;
307 
308   PetscFunctionBegin;
309   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
310   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
311 
312   /*  first count number of contributors to each processor */
313   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
314   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
315   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
316   for (i=0; i<N; i++) {
317     idx = rows[i];
318     found = PETSC_FALSE;
319     for (j=0; j<size; j++) {
320       if (idx >= owners[j] && idx < owners[j+1]) {
321         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
322       }
323     }
324     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
325   }
326   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
327 
328   /* inform other processors of number of messages and max length*/
329   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
330 
331   /* post receives:   */
332   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
333   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
334   for (i=0; i<nrecvs; i++) {
335     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
336   }
337 
338   /* do sends:
339       1) starts[i] gives the starting index in svalues for stuff going to
340          the ith processor
341   */
342   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
343   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
344   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
345   starts[0]  = 0;
346   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
347   for (i=0; i<N; i++) {
348     svalues[starts[owner[i]]++] = rows[i];
349   }
350   ISRestoreIndices(is,&rows);
351 
352   starts[0] = 0;
353   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
354   count = 0;
355   for (i=0; i<size; i++) {
356     if (nprocs[2*i+1]) {
357       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
358     }
359   }
360   ierr = PetscFree(starts);CHKERRQ(ierr);
361 
362   base = owners[rank];
363 
364   /*  wait on receives */
365   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
366   source = lens + nrecvs;
367   count  = nrecvs; slen = 0;
368   while (count) {
369     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
370     /* unpack receives into our local space */
371     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
372     source[imdex]  = recv_status.MPI_SOURCE;
373     lens[imdex]    = n;
374     slen += n;
375     count--;
376   }
377   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
378 
379   /* move the data into the send scatter */
380   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
381   count = 0;
382   for (i=0; i<nrecvs; i++) {
383     values = rvalues + i*nmax;
384     for (j=0; j<lens[i]; j++) {
385       lrows[count++] = values[j] - base;
386     }
387   }
388   ierr = PetscFree(rvalues);CHKERRQ(ierr);
389   ierr = PetscFree(lens);CHKERRQ(ierr);
390   ierr = PetscFree(owner);CHKERRQ(ierr);
391   ierr = PetscFree(nprocs);CHKERRQ(ierr);
392 
393   /* actually zap the local rows */
394   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
395   ierr = PetscLogObjectParent(A,istmp);CHKERRQ(ierr);
396   ierr = PetscFree(lrows);CHKERRQ(ierr);
397   ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
398   ierr = ISDestroy(istmp);CHKERRQ(ierr);
399 
400   /* wait on sends */
401   if (nsends) {
402     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
403     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
404     ierr = PetscFree(send_status);CHKERRQ(ierr);
405   }
406   ierr = PetscFree(send_waits);CHKERRQ(ierr);
407   ierr = PetscFree(svalues);CHKERRQ(ierr);
408 
409   PetscFunctionReturn(0);
410 }
411 
412 #undef __FUNCT__
413 #define __FUNCT__ "MatMult_MPIDense"
414 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
415 {
416   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
417   PetscErrorCode ierr;
418 
419   PetscFunctionBegin;
420   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
421   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
422   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
423   PetscFunctionReturn(0);
424 }
425 
426 #undef __FUNCT__
427 #define __FUNCT__ "MatMultAdd_MPIDense"
428 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
429 {
430   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
431   PetscErrorCode ierr;
432 
433   PetscFunctionBegin;
434   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
435   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
436   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
437   PetscFunctionReturn(0);
438 }
439 
440 #undef __FUNCT__
441 #define __FUNCT__ "MatMultTranspose_MPIDense"
442 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
443 {
444   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
445   PetscErrorCode ierr;
446   PetscScalar  zero = 0.0;
447 
448   PetscFunctionBegin;
449   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
450   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
451   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
452   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
453   PetscFunctionReturn(0);
454 }
455 
456 #undef __FUNCT__
457 #define __FUNCT__ "MatMultTransposeAdd_MPIDense"
458 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
459 {
460   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
461   PetscErrorCode ierr;
462 
463   PetscFunctionBegin;
464   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
465   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
466   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
467   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
468   PetscFunctionReturn(0);
469 }
470 
471 #undef __FUNCT__
472 #define __FUNCT__ "MatGetDiagonal_MPIDense"
473 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
474 {
475   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
476   Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data;
477   PetscErrorCode ierr;
478   PetscInt          len,i,n,m = A->m,radd;
479   PetscScalar  *x,zero = 0.0;
480 
481   PetscFunctionBegin;
482   ierr = VecSet(&zero,v);CHKERRQ(ierr);
483   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
484   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
485   if (n != A->M) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
486   len  = PetscMin(a->A->m,a->A->n);
487   radd = a->rstart*m;
488   for (i=0; i<len; i++) {
489     x[i] = aloc->v[radd + i*m + i];
490   }
491   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
492   PetscFunctionReturn(0);
493 }
494 
495 #undef __FUNCT__
496 #define __FUNCT__ "MatDestroy_MPIDense"
497 PetscErrorCode MatDestroy_MPIDense(Mat mat)
498 {
499   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
500   PetscErrorCode ierr;
501 
502   PetscFunctionBegin;
503 
504 #if defined(PETSC_USE_LOG)
505   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->M,mat->N);
506 #endif
507   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
508   ierr = PetscFree(mdn->rowners);CHKERRQ(ierr);
509   ierr = MatDestroy(mdn->A);CHKERRQ(ierr);
510   if (mdn->lvec)   VecDestroy(mdn->lvec);
511   if (mdn->Mvctx)  VecScatterDestroy(mdn->Mvctx);
512   if (mdn->factor) {
513     if (mdn->factor->temp)   {ierr = PetscFree(mdn->factor->temp);CHKERRQ(ierr);}
514     if (mdn->factor->tag)    {ierr = PetscFree(mdn->factor->tag);CHKERRQ(ierr);}
515     if (mdn->factor->pivots) {ierr = PetscFree(mdn->factor->pivots);CHKERRQ(ierr);}
516     ierr = PetscFree(mdn->factor);CHKERRQ(ierr);
517   }
518   ierr = PetscFree(mdn);CHKERRQ(ierr);
519   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
520   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
521   PetscFunctionReturn(0);
522 }
523 
524 #undef __FUNCT__
525 #define __FUNCT__ "MatView_MPIDense_Binary"
526 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
527 {
528   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
529   PetscErrorCode ierr;
530 
531   PetscFunctionBegin;
532   if (mdn->size == 1) {
533     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
534   }
535   else SETERRQ(PETSC_ERR_SUP,"Only uniprocessor output supported");
536   PetscFunctionReturn(0);
537 }
538 
539 #undef __FUNCT__
540 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket"
541 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
542 {
543   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
544   PetscErrorCode    ierr;
545   PetscMPIInt       size = mdn->size,rank = mdn->rank;
546   PetscViewerType   vtype;
547   PetscTruth        iascii,isdraw;
548   PetscViewer       sviewer;
549   PetscViewerFormat format;
550 
551   PetscFunctionBegin;
552   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
553   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
554   if (iascii) {
555     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
556     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
557     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
558       MatInfo info;
559       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
560       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->m,
561                    (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
562       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
563       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
564       PetscFunctionReturn(0);
565     } else if (format == PETSC_VIEWER_ASCII_INFO) {
566       PetscFunctionReturn(0);
567     }
568   } else if (isdraw) {
569     PetscDraw       draw;
570     PetscTruth isnull;
571 
572     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
573     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
574     if (isnull) PetscFunctionReturn(0);
575   }
576 
577   if (size == 1) {
578     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
579   } else {
580     /* assemble the entire matrix onto first processor. */
581     Mat         A;
582     PetscInt    M = mat->M,N = mat->N,m,row,i,nz;
583     PetscInt    *cols;
584     PetscScalar *vals;
585 
586     if (!rank) {
587       ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr);
588     } else {
589       ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr);
590     }
591     /* Since this is a temporary matrix, MATMPIDENSE instead of A->type_name here is probably acceptable. */
592     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
593     ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL);
594     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
595 
596     /* Copy the matrix ... This isn't the most efficient means,
597        but it's quick for now */
598     A->insertmode = INSERT_VALUES;
599     row = mdn->rstart; m = mdn->A->m;
600     for (i=0; i<m; i++) {
601       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
602       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
603       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
604       row++;
605     }
606 
607     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
608     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
609     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
610     if (!rank) {
611       ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
612     }
613     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
614     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
615     ierr = MatDestroy(A);CHKERRQ(ierr);
616   }
617   PetscFunctionReturn(0);
618 }
619 
620 #undef __FUNCT__
621 #define __FUNCT__ "MatView_MPIDense"
622 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
623 {
624   PetscErrorCode ierr;
625   PetscTruth iascii,isbinary,isdraw,issocket;
626 
627   PetscFunctionBegin;
628 
629   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
630   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
631   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
632   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
633 
634   if (iascii || issocket || isdraw) {
635     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
636   } else if (isbinary) {
637     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
638   } else {
639     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name);
640   }
641   PetscFunctionReturn(0);
642 }
643 
644 #undef __FUNCT__
645 #define __FUNCT__ "MatGetInfo_MPIDense"
646 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
647 {
648   Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
649   Mat          mdn = mat->A;
650   PetscErrorCode ierr;
651   PetscReal    isend[5],irecv[5];
652 
653   PetscFunctionBegin;
654   info->rows_global    = (double)A->M;
655   info->columns_global = (double)A->N;
656   info->rows_local     = (double)A->m;
657   info->columns_local  = (double)A->N;
658   info->block_size     = 1.0;
659   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
660   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
661   isend[3] = info->memory;  isend[4] = info->mallocs;
662   if (flag == MAT_LOCAL) {
663     info->nz_used      = isend[0];
664     info->nz_allocated = isend[1];
665     info->nz_unneeded  = isend[2];
666     info->memory       = isend[3];
667     info->mallocs      = isend[4];
668   } else if (flag == MAT_GLOBAL_MAX) {
669     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr);
670     info->nz_used      = irecv[0];
671     info->nz_allocated = irecv[1];
672     info->nz_unneeded  = irecv[2];
673     info->memory       = irecv[3];
674     info->mallocs      = irecv[4];
675   } else if (flag == MAT_GLOBAL_SUM) {
676     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr);
677     info->nz_used      = irecv[0];
678     info->nz_allocated = irecv[1];
679     info->nz_unneeded  = irecv[2];
680     info->memory       = irecv[3];
681     info->mallocs      = irecv[4];
682   }
683   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
684   info->fill_ratio_needed = 0;
685   info->factor_mallocs    = 0;
686   PetscFunctionReturn(0);
687 }
688 
689 #undef __FUNCT__
690 #define __FUNCT__ "MatSetOption_MPIDense"
691 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op)
692 {
693   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
694   PetscErrorCode ierr;
695 
696   PetscFunctionBegin;
697   switch (op) {
698   case MAT_NO_NEW_NONZERO_LOCATIONS:
699   case MAT_YES_NEW_NONZERO_LOCATIONS:
700   case MAT_NEW_NONZERO_LOCATION_ERR:
701   case MAT_NEW_NONZERO_ALLOCATION_ERR:
702   case MAT_COLUMNS_SORTED:
703   case MAT_COLUMNS_UNSORTED:
704     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
705     break;
706   case MAT_ROW_ORIENTED:
707     a->roworiented = PETSC_TRUE;
708     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
709     break;
710   case MAT_ROWS_SORTED:
711   case MAT_ROWS_UNSORTED:
712   case MAT_YES_NEW_DIAGONALS:
713   case MAT_USE_HASH_TABLE:
714     ierr = PetscLogInfo((A,"MatSetOption_MPIDense:Option ignored\n"));CHKERRQ(ierr);
715     break;
716   case MAT_COLUMN_ORIENTED:
717     a->roworiented = PETSC_FALSE;
718     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
719     break;
720   case MAT_IGNORE_OFF_PROC_ENTRIES:
721     a->donotstash = PETSC_TRUE;
722     break;
723   case MAT_NO_NEW_DIAGONALS:
724     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
725   case MAT_SYMMETRIC:
726   case MAT_STRUCTURALLY_SYMMETRIC:
727   case MAT_NOT_SYMMETRIC:
728   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
729   case MAT_HERMITIAN:
730   case MAT_NOT_HERMITIAN:
731   case MAT_SYMMETRY_ETERNAL:
732   case MAT_NOT_SYMMETRY_ETERNAL:
733     break;
734   default:
735     SETERRQ(PETSC_ERR_SUP,"unknown option");
736   }
737   PetscFunctionReturn(0);
738 }
739 
740 
741 #undef __FUNCT__
742 #define __FUNCT__ "MatDiagonalScale_MPIDense"
743 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
744 {
745   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
746   Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
747   PetscScalar  *l,*r,x,*v;
748   PetscErrorCode ierr;
749   PetscInt          i,j,s2a,s3a,s2,s3,m=mdn->A->m,n=mdn->A->n;
750 
751   PetscFunctionBegin;
752   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
753   if (ll) {
754     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
755     if (s2a != s2) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size");
756     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
757     for (i=0; i<m; i++) {
758       x = l[i];
759       v = mat->v + i;
760       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
761     }
762     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
763     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
764   }
765   if (rr) {
766     ierr = VecGetSize(rr,&s3a);CHKERRQ(ierr);
767     if (s3a != s3) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size");
768     ierr = VecScatterBegin(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
769     ierr = VecScatterEnd(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
770     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
771     for (i=0; i<n; i++) {
772       x = r[i];
773       v = mat->v + i*m;
774       for (j=0; j<m; j++) { (*v++) *= x;}
775     }
776     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
777     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
778   }
779   PetscFunctionReturn(0);
780 }
781 
782 #undef __FUNCT__
783 #define __FUNCT__ "MatNorm_MPIDense"
784 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
785 {
786   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
787   Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
788   PetscErrorCode ierr;
789   PetscInt          i,j;
790   PetscReal    sum = 0.0;
791   PetscScalar  *v = mat->v;
792 
793   PetscFunctionBegin;
794   if (mdn->size == 1) {
795     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
796   } else {
797     if (type == NORM_FROBENIUS) {
798       for (i=0; i<mdn->A->n*mdn->A->m; i++) {
799 #if defined(PETSC_USE_COMPLEX)
800         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
801 #else
802         sum += (*v)*(*v); v++;
803 #endif
804       }
805       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr);
806       *nrm = sqrt(*nrm);
807       ierr = PetscLogFlops(2*mdn->A->n*mdn->A->m);CHKERRQ(ierr);
808     } else if (type == NORM_1) {
809       PetscReal *tmp,*tmp2;
810       ierr = PetscMalloc(2*A->N*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
811       tmp2 = tmp + A->N;
812       ierr = PetscMemzero(tmp,2*A->N*sizeof(PetscReal));CHKERRQ(ierr);
813       *nrm = 0.0;
814       v = mat->v;
815       for (j=0; j<mdn->A->n; j++) {
816         for (i=0; i<mdn->A->m; i++) {
817           tmp[j] += PetscAbsScalar(*v);  v++;
818         }
819       }
820       ierr = MPI_Allreduce(tmp,tmp2,A->N,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr);
821       for (j=0; j<A->N; j++) {
822         if (tmp2[j] > *nrm) *nrm = tmp2[j];
823       }
824       ierr = PetscFree(tmp);CHKERRQ(ierr);
825       ierr = PetscLogFlops(A->n*A->m);CHKERRQ(ierr);
826     } else if (type == NORM_INFINITY) { /* max row norm */
827       PetscReal ntemp;
828       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
829       ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr);
830     } else {
831       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
832     }
833   }
834   PetscFunctionReturn(0);
835 }
836 
837 #undef __FUNCT__
838 #define __FUNCT__ "MatTranspose_MPIDense"
839 PetscErrorCode MatTranspose_MPIDense(Mat A,Mat *matout)
840 {
841   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
842   Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data;
843   Mat          B;
844   PetscInt          M = A->M,N = A->N,m,n,*rwork,rstart = a->rstart;
845   PetscErrorCode ierr;
846   PetscInt          j,i;
847   PetscScalar  *v;
848 
849   PetscFunctionBegin;
850   if (!matout && M != N) {
851     SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place");
852   }
853   ierr = MatCreate(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,&B);CHKERRQ(ierr);
854   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
855   ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr);
856 
857   m = a->A->m; n = a->A->n; v = Aloc->v;
858   ierr = PetscMalloc(n*sizeof(PetscInt),&rwork);CHKERRQ(ierr);
859   for (j=0; j<n; j++) {
860     for (i=0; i<m; i++) rwork[i] = rstart + i;
861     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
862     v   += m;
863   }
864   ierr = PetscFree(rwork);CHKERRQ(ierr);
865   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
866   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
867   if (matout) {
868     *matout = B;
869   } else {
870     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
871   }
872   PetscFunctionReturn(0);
873 }
874 
875 #include "petscblaslapack.h"
876 #undef __FUNCT__
877 #define __FUNCT__ "MatScale_MPIDense"
878 PetscErrorCode MatScale_MPIDense(const PetscScalar *alpha,Mat inA)
879 {
880   Mat_MPIDense *A = (Mat_MPIDense*)inA->data;
881   Mat_SeqDense *a = (Mat_SeqDense*)A->A->data;
882   PetscBLASInt one = 1,nz = (PetscBLASInt)inA->m*inA->N;
883   PetscErrorCode ierr;
884 
885   PetscFunctionBegin;
886   BLASscal_(&nz,(PetscScalar*)alpha,a->v,&one);
887   ierr = PetscLogFlops(nz);CHKERRQ(ierr);
888   PetscFunctionReturn(0);
889 }
890 
891 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
892 
893 #undef __FUNCT__
894 #define __FUNCT__ "MatSetUpPreallocation_MPIDense"
895 PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A)
896 {
897   PetscErrorCode ierr;
898 
899   PetscFunctionBegin;
900   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
901   PetscFunctionReturn(0);
902 }
903 
904 /* -------------------------------------------------------------------*/
905 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
906        MatGetRow_MPIDense,
907        MatRestoreRow_MPIDense,
908        MatMult_MPIDense,
909 /* 4*/ MatMultAdd_MPIDense,
910        MatMultTranspose_MPIDense,
911        MatMultTransposeAdd_MPIDense,
912        0,
913        0,
914        0,
915 /*10*/ 0,
916        0,
917        0,
918        0,
919        MatTranspose_MPIDense,
920 /*15*/ MatGetInfo_MPIDense,
921        0,
922        MatGetDiagonal_MPIDense,
923        MatDiagonalScale_MPIDense,
924        MatNorm_MPIDense,
925 /*20*/ MatAssemblyBegin_MPIDense,
926        MatAssemblyEnd_MPIDense,
927        0,
928        MatSetOption_MPIDense,
929        MatZeroEntries_MPIDense,
930 /*25*/ MatZeroRows_MPIDense,
931        0,
932        0,
933        0,
934        0,
935 /*30*/ MatSetUpPreallocation_MPIDense,
936        0,
937        0,
938        MatGetArray_MPIDense,
939        MatRestoreArray_MPIDense,
940 /*35*/ MatDuplicate_MPIDense,
941        0,
942        0,
943        0,
944        0,
945 /*40*/ 0,
946        MatGetSubMatrices_MPIDense,
947        0,
948        MatGetValues_MPIDense,
949        0,
950 /*45*/ 0,
951        MatScale_MPIDense,
952        0,
953        0,
954        0,
955 /*50*/ 0,
956        0,
957        0,
958        0,
959        0,
960 /*55*/ 0,
961        0,
962        0,
963        0,
964        0,
965 /*60*/ MatGetSubMatrix_MPIDense,
966        MatDestroy_MPIDense,
967        MatView_MPIDense,
968        MatGetPetscMaps_Petsc,
969        0,
970 /*65*/ 0,
971        0,
972        0,
973        0,
974        0,
975 /*70*/ 0,
976        0,
977        0,
978        0,
979        0,
980 /*75*/ 0,
981        0,
982        0,
983        0,
984        0,
985 /*80*/ 0,
986        0,
987        0,
988        0,
989 /*84*/ MatLoad_MPIDense,
990        0,
991        0,
992        0,
993        0,
994        0,
995 /*90*/ 0,
996        0,
997        0,
998        0,
999        0,
1000 /*95*/ 0,
1001        0,
1002        0,
1003        0};
1004 
1005 EXTERN_C_BEGIN
1006 #undef __FUNCT__
1007 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense"
1008 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1009 {
1010   Mat_MPIDense *a;
1011   PetscErrorCode ierr;
1012 
1013   PetscFunctionBegin;
1014   mat->preallocated = PETSC_TRUE;
1015   /* Note:  For now, when data is specified above, this assumes the user correctly
1016    allocates the local dense storage space.  We should add error checking. */
1017 
1018   a    = (Mat_MPIDense*)mat->data;
1019   ierr = MatCreate(PETSC_COMM_SELF,mat->m,mat->N,mat->m,mat->N,&a->A);CHKERRQ(ierr);
1020   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
1021   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1022   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1023   PetscFunctionReturn(0);
1024 }
1025 EXTERN_C_END
1026 
1027 /*MC
1028    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1029 
1030    Options Database Keys:
1031 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions()
1032 
1033   Level: beginner
1034 
1035 .seealso: MatCreateMPIDense
1036 M*/
1037 
1038 EXTERN_C_BEGIN
1039 #undef __FUNCT__
1040 #define __FUNCT__ "MatCreate_MPIDense"
1041 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIDense(Mat mat)
1042 {
1043   Mat_MPIDense *a;
1044   PetscErrorCode ierr;
1045   PetscInt          i;
1046 
1047   PetscFunctionBegin;
1048   ierr              = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr);
1049   mat->data         = (void*)a;
1050   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1051   mat->factor       = 0;
1052   mat->mapping      = 0;
1053 
1054   a->factor       = 0;
1055   mat->insertmode = NOT_SET_VALUES;
1056   ierr = MPI_Comm_rank(mat->comm,&a->rank);CHKERRQ(ierr);
1057   ierr = MPI_Comm_size(mat->comm,&a->size);CHKERRQ(ierr);
1058 
1059   ierr = PetscSplitOwnership(mat->comm,&mat->m,&mat->M);CHKERRQ(ierr);
1060   ierr = PetscSplitOwnership(mat->comm,&mat->n,&mat->N);CHKERRQ(ierr);
1061   a->nvec = mat->n;
1062 
1063   /* the information in the maps duplicates the information computed below, eventually
1064      we should remove the duplicate information that is not contained in the maps */
1065   ierr = PetscMapCreateMPI(mat->comm,mat->m,mat->M,&mat->rmap);CHKERRQ(ierr);
1066   ierr = PetscMapCreateMPI(mat->comm,mat->n,mat->N,&mat->cmap);CHKERRQ(ierr);
1067 
1068   /* build local table of row and column ownerships */
1069   ierr       = PetscMalloc(2*(a->size+2)*sizeof(PetscInt),&a->rowners);CHKERRQ(ierr);
1070   a->cowners = a->rowners + a->size + 1;
1071   ierr = PetscLogObjectMemory(mat,2*(a->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));CHKERRQ(ierr);
1072   ierr = MPI_Allgather(&mat->m,1,MPIU_INT,a->rowners+1,1,MPIU_INT,mat->comm);CHKERRQ(ierr);
1073   a->rowners[0] = 0;
1074   for (i=2; i<=a->size; i++) {
1075     a->rowners[i] += a->rowners[i-1];
1076   }
1077   a->rstart = a->rowners[a->rank];
1078   a->rend   = a->rowners[a->rank+1];
1079   ierr      = MPI_Allgather(&mat->n,1,MPIU_INT,a->cowners+1,1,MPIU_INT,mat->comm);CHKERRQ(ierr);
1080   a->cowners[0] = 0;
1081   for (i=2; i<=a->size; i++) {
1082     a->cowners[i] += a->cowners[i-1];
1083   }
1084 
1085   /* build cache for off array entries formed */
1086   a->donotstash = PETSC_FALSE;
1087   ierr = MatStashCreate_Private(mat->comm,1,&mat->stash);CHKERRQ(ierr);
1088 
1089   /* stuff used for matrix vector multiply */
1090   a->lvec        = 0;
1091   a->Mvctx       = 0;
1092   a->roworiented = PETSC_TRUE;
1093 
1094   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
1095                                      "MatGetDiagonalBlock_MPIDense",
1096                                      MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr);
1097   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C",
1098                                      "MatMPIDenseSetPreallocation_MPIDense",
1099                                      MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1100   PetscFunctionReturn(0);
1101 }
1102 EXTERN_C_END
1103 
1104 /*MC
1105    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1106 
1107    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1108    and MATMPIDENSE otherwise.
1109 
1110    Options Database Keys:
1111 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1112 
1113   Level: beginner
1114 
1115 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1116 M*/
1117 
1118 EXTERN_C_BEGIN
1119 #undef __FUNCT__
1120 #define __FUNCT__ "MatCreate_Dense"
1121 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_Dense(Mat A)
1122 {
1123   PetscErrorCode ierr;
1124   PetscMPIInt    size;
1125 
1126   PetscFunctionBegin;
1127   ierr = PetscObjectChangeTypeName((PetscObject)A,MATDENSE);CHKERRQ(ierr);
1128   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1129   if (size == 1) {
1130     ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr);
1131   } else {
1132     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
1133   }
1134   PetscFunctionReturn(0);
1135 }
1136 EXTERN_C_END
1137 
1138 #undef __FUNCT__
1139 #define __FUNCT__ "MatMPIDenseSetPreallocation"
1140 /*@C
1141    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1142 
1143    Not collective
1144 
1145    Input Parameters:
1146 .  A - the matrix
1147 -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
1148    to control all matrix memory allocation.
1149 
1150    Notes:
1151    The dense format is fully compatible with standard Fortran 77
1152    storage by columns.
1153 
1154    The data input variable is intended primarily for Fortran programmers
1155    who wish to allocate their own matrix memory space.  Most users should
1156    set data=PETSC_NULL.
1157 
1158    Level: intermediate
1159 
1160 .keywords: matrix,dense, parallel
1161 
1162 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1163 @*/
1164 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data)
1165 {
1166   PetscErrorCode ierr,(*f)(Mat,PetscScalar *);
1167 
1168   PetscFunctionBegin;
1169   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1170   if (f) {
1171     ierr = (*f)(mat,data);CHKERRQ(ierr);
1172   }
1173   PetscFunctionReturn(0);
1174 }
1175 
1176 #undef __FUNCT__
1177 #define __FUNCT__ "MatCreateMPIDense"
1178 /*@C
1179    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
1180 
1181    Collective on MPI_Comm
1182 
1183    Input Parameters:
1184 +  comm - MPI communicator
1185 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1186 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1187 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1188 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1189 -  data - optional location of matrix data.  Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1190    to control all matrix memory allocation.
1191 
1192    Output Parameter:
1193 .  A - the matrix
1194 
1195    Notes:
1196    The dense format is fully compatible with standard Fortran 77
1197    storage by columns.
1198 
1199    The data input variable is intended primarily for Fortran programmers
1200    who wish to allocate their own matrix memory space.  Most users should
1201    set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users).
1202 
1203    The user MUST specify either the local or global matrix dimensions
1204    (possibly both).
1205 
1206    Level: intermediate
1207 
1208 .keywords: matrix,dense, parallel
1209 
1210 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1211 @*/
1212 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1213 {
1214   PetscErrorCode ierr;
1215   PetscMPIInt    size;
1216 
1217   PetscFunctionBegin;
1218   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
1219   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1220   if (size > 1) {
1221     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1222     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1223   } else {
1224     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1225     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1226   }
1227   PetscFunctionReturn(0);
1228 }
1229 
1230 #undef __FUNCT__
1231 #define __FUNCT__ "MatDuplicate_MPIDense"
1232 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1233 {
1234   Mat          mat;
1235   Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data;
1236   PetscErrorCode ierr;
1237 
1238   PetscFunctionBegin;
1239   *newmat       = 0;
1240   ierr = MatCreate(A->comm,A->m,A->n,A->M,A->N,&mat);CHKERRQ(ierr);
1241   ierr = MatSetType(mat,A->type_name);CHKERRQ(ierr);
1242   ierr              = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr);
1243   mat->data         = (void*)a;
1244   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1245   mat->factor       = A->factor;
1246   mat->assembled    = PETSC_TRUE;
1247   mat->preallocated = PETSC_TRUE;
1248 
1249   a->rstart       = oldmat->rstart;
1250   a->rend         = oldmat->rend;
1251   a->size         = oldmat->size;
1252   a->rank         = oldmat->rank;
1253   mat->insertmode = NOT_SET_VALUES;
1254   a->nvec         = oldmat->nvec;
1255   a->donotstash   = oldmat->donotstash;
1256   ierr            = PetscMalloc((a->size+1)*sizeof(PetscInt),&a->rowners);CHKERRQ(ierr);
1257   ierr = PetscLogObjectMemory(mat,(a->size+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));CHKERRQ(ierr);
1258   ierr = PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
1259   ierr = MatStashCreate_Private(A->comm,1,&mat->stash);CHKERRQ(ierr);
1260 
1261   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1262   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1263   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1264   *newmat = mat;
1265   PetscFunctionReturn(0);
1266 }
1267 
1268 #include "petscsys.h"
1269 
1270 #undef __FUNCT__
1271 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile"
1272 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,const MatType type,Mat *newmat)
1273 {
1274   PetscErrorCode ierr;
1275   PetscMPIInt    rank,size;
1276   PetscInt            *rowners,i,m,nz,j;
1277   PetscScalar    *array,*vals,*vals_ptr;
1278   MPI_Status     status;
1279 
1280   PetscFunctionBegin;
1281   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1282   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1283 
1284   /* determine ownership of all rows */
1285   m          = M/size + ((M % size) > rank);
1286   ierr       = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1287   ierr       = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
1288   rowners[0] = 0;
1289   for (i=2; i<=size; i++) {
1290     rowners[i] += rowners[i-1];
1291   }
1292 
1293   ierr = MatCreate(comm,m,PETSC_DECIDE,M,N,newmat);CHKERRQ(ierr);
1294   ierr = MatSetType(*newmat,type);CHKERRQ(ierr);
1295   ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr);
1296   ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr);
1297 
1298   if (!rank) {
1299     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1300 
1301     /* read in my part of the matrix numerical values  */
1302     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1303 
1304     /* insert into matrix-by row (this is why cannot directly read into array */
1305     vals_ptr = vals;
1306     for (i=0; i<m; i++) {
1307       for (j=0; j<N; j++) {
1308         array[i + j*m] = *vals_ptr++;
1309       }
1310     }
1311 
1312     /* read in other processors and ship out */
1313     for (i=1; i<size; i++) {
1314       nz   = (rowners[i+1] - rowners[i])*N;
1315       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1316       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr);
1317     }
1318   } else {
1319     /* receive numeric values */
1320     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1321 
1322     /* receive message of values*/
1323     ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr);
1324 
1325     /* insert into matrix-by row (this is why cannot directly read into array */
1326     vals_ptr = vals;
1327     for (i=0; i<m; i++) {
1328       for (j=0; j<N; j++) {
1329         array[i + j*m] = *vals_ptr++;
1330       }
1331     }
1332   }
1333   ierr = PetscFree(rowners);CHKERRQ(ierr);
1334   ierr = PetscFree(vals);CHKERRQ(ierr);
1335   ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1336   ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1337   PetscFunctionReturn(0);
1338 }
1339 
1340 #undef __FUNCT__
1341 #define __FUNCT__ "MatLoad_MPIDense"
1342 PetscErrorCode MatLoad_MPIDense(PetscViewer viewer,const MatType type,Mat *newmat)
1343 {
1344   Mat            A;
1345   PetscScalar    *vals,*svals;
1346   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1347   MPI_Status     status;
1348   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz;
1349   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1350   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1351   PetscInt       i,nz,j,rstart,rend;
1352   int            fd;
1353   PetscErrorCode ierr;
1354 
1355   PetscFunctionBegin;
1356   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1357   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1358   if (!rank) {
1359     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1360     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1361     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1362   }
1363 
1364   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1365   M = header[1]; N = header[2]; nz = header[3];
1366 
1367   /*
1368        Handle case where matrix is stored on disk as a dense matrix
1369   */
1370   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1371     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,type,newmat);CHKERRQ(ierr);
1372     PetscFunctionReturn(0);
1373   }
1374 
1375   /* determine ownership of all rows */
1376   m          = M/size + ((M % size) > rank);
1377   ierr       = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1378   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1379   rowners[0] = 0;
1380   for (i=2; i<=size; i++) {
1381     rowners[i] += rowners[i-1];
1382   }
1383   rstart = rowners[rank];
1384   rend   = rowners[rank+1];
1385 
1386   /* distribute row lengths to all processors */
1387   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&ourlens);CHKERRQ(ierr);
1388   offlens = ourlens + (rend-rstart);
1389   if (!rank) {
1390     ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
1391     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1392     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
1393     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1394     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1395     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1396   } else {
1397     ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1398   }
1399 
1400   if (!rank) {
1401     /* calculate the number of nonzeros on each processor */
1402     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
1403     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1404     for (i=0; i<size; i++) {
1405       for (j=rowners[i]; j< rowners[i+1]; j++) {
1406         procsnz[i] += rowlengths[j];
1407       }
1408     }
1409     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1410 
1411     /* determine max buffer needed and allocate it */
1412     maxnz = 0;
1413     for (i=0; i<size; i++) {
1414       maxnz = PetscMax(maxnz,procsnz[i]);
1415     }
1416     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1417 
1418     /* read in my part of the matrix column indices  */
1419     nz = procsnz[0];
1420     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1421     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1422 
1423     /* read in every one elses and ship off */
1424     for (i=1; i<size; i++) {
1425       nz   = procsnz[i];
1426       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1427       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1428     }
1429     ierr = PetscFree(cols);CHKERRQ(ierr);
1430   } else {
1431     /* determine buffer space needed for message */
1432     nz = 0;
1433     for (i=0; i<m; i++) {
1434       nz += ourlens[i];
1435     }
1436     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1437 
1438     /* receive message of column indices*/
1439     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1440     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1441     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1442   }
1443 
1444   /* loop over local rows, determining number of off diagonal entries */
1445   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
1446   jj = 0;
1447   for (i=0; i<m; i++) {
1448     for (j=0; j<ourlens[i]; j++) {
1449       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1450       jj++;
1451     }
1452   }
1453 
1454   /* create our matrix */
1455   for (i=0; i<m; i++) {
1456     ourlens[i] -= offlens[i];
1457   }
1458   ierr = MatCreate(comm,m,PETSC_DECIDE,M,N,newmat);CHKERRQ(ierr);
1459   ierr = MatSetType(*newmat,type);CHKERRQ(ierr);
1460   ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr);
1461   A = *newmat;
1462   for (i=0; i<m; i++) {
1463     ourlens[i] += offlens[i];
1464   }
1465 
1466   if (!rank) {
1467     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1468 
1469     /* read in my part of the matrix numerical values  */
1470     nz = procsnz[0];
1471     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1472 
1473     /* insert into matrix */
1474     jj      = rstart;
1475     smycols = mycols;
1476     svals   = vals;
1477     for (i=0; i<m; i++) {
1478       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1479       smycols += ourlens[i];
1480       svals   += ourlens[i];
1481       jj++;
1482     }
1483 
1484     /* read in other processors and ship out */
1485     for (i=1; i<size; i++) {
1486       nz   = procsnz[i];
1487       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1488       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1489     }
1490     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1491   } else {
1492     /* receive numeric values */
1493     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1494 
1495     /* receive message of values*/
1496     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1497     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1498     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1499 
1500     /* insert into matrix */
1501     jj      = rstart;
1502     smycols = mycols;
1503     svals   = vals;
1504     for (i=0; i<m; i++) {
1505       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1506       smycols += ourlens[i];
1507       svals   += ourlens[i];
1508       jj++;
1509     }
1510   }
1511   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1512   ierr = PetscFree(vals);CHKERRQ(ierr);
1513   ierr = PetscFree(mycols);CHKERRQ(ierr);
1514   ierr = PetscFree(rowners);CHKERRQ(ierr);
1515 
1516   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1517   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1518   PetscFunctionReturn(0);
1519 }
1520 
1521 
1522 
1523 
1524 
1525