xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision ba8c8a5662b295e2893d0db495771477bd634898)
1 /*
2    Basic functions for basic parallel dense matrices.
3 */
4 
5 #include "src/mat/impls/dense/mpi/mpidense.h"
6 
7 #undef __FUNCT__
8 #define __FUNCT__ "MatGetRow_MPIDense"
9 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
10 {
11   Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
12   PetscErrorCode ierr;
13   PetscInt          lrow,rstart = mat->rstart,rend = mat->rend;
14 
15   PetscFunctionBegin;
16   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows")
17   lrow = row - rstart;
18   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr);
19   PetscFunctionReturn(0);
20 }
21 
22 #undef __FUNCT__
23 #define __FUNCT__ "MatRestoreRow_MPIDense"
24 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
25 {
26   PetscErrorCode ierr;
27 
28   PetscFunctionBegin;
29   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
30   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
31   PetscFunctionReturn(0);
32 }
33 
34 EXTERN_C_BEGIN
35 #undef __FUNCT__
36 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense"
37 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B)
38 {
39   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
40   PetscErrorCode ierr;
41   PetscInt          m = A->m,rstart = mdn->rstart;
42   PetscScalar  *array;
43   MPI_Comm     comm;
44 
45   PetscFunctionBegin;
46   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported.");
47 
48   /* The reuse aspect is not implemented efficiently */
49   if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);}
50 
51   ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
52   ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr);
53   ierr = MatCreate(comm,m,m,m,m,B);CHKERRQ(ierr);
54   ierr = MatSetType(*B,mdn->A->type_name);CHKERRQ(ierr);
55   ierr = MatSeqDenseSetPreallocation(*B,array+m*rstart);CHKERRQ(ierr);
56   ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr);
57   ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
58   ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
59 
60   *iscopy = PETSC_TRUE;
61   PetscFunctionReturn(0);
62 }
63 EXTERN_C_END
64 
65 #undef __FUNCT__
66 #define __FUNCT__ "MatSetValues_MPIDense"
67 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
68 {
69   Mat_MPIDense *A = (Mat_MPIDense*)mat->data;
70   PetscErrorCode ierr;
71   PetscInt          i,j,rstart = A->rstart,rend = A->rend,row;
72   PetscTruth   roworiented = A->roworiented;
73 
74   PetscFunctionBegin;
75   for (i=0; i<m; i++) {
76     if (idxm[i] < 0) continue;
77     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
78     if (idxm[i] >= rstart && idxm[i] < rend) {
79       row = idxm[i] - rstart;
80       if (roworiented) {
81         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
82       } else {
83         for (j=0; j<n; j++) {
84           if (idxn[j] < 0) continue;
85           if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
86           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
87         }
88       }
89     } else {
90       if (!A->donotstash) {
91         if (roworiented) {
92           ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr);
93         } else {
94           ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr);
95         }
96       }
97     }
98   }
99   PetscFunctionReturn(0);
100 }
101 
102 #undef __FUNCT__
103 #define __FUNCT__ "MatGetValues_MPIDense"
104 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
105 {
106   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
107   PetscErrorCode ierr;
108   PetscInt          i,j,rstart = mdn->rstart,rend = mdn->rend,row;
109 
110   PetscFunctionBegin;
111   for (i=0; i<m; i++) {
112     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
113     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
114     if (idxm[i] >= rstart && idxm[i] < rend) {
115       row = idxm[i] - rstart;
116       for (j=0; j<n; j++) {
117         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
118         if (idxn[j] >= mat->N) {
119           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
120         }
121         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
122       }
123     } else {
124       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
125     }
126   }
127   PetscFunctionReturn(0);
128 }
129 
130 #undef __FUNCT__
131 #define __FUNCT__ "MatGetArray_MPIDense"
132 PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[])
133 {
134   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
135   PetscErrorCode ierr;
136 
137   PetscFunctionBegin;
138   ierr = MatGetArray(a->A,array);CHKERRQ(ierr);
139   PetscFunctionReturn(0);
140 }
141 
142 #undef __FUNCT__
143 #define __FUNCT__ "MatGetSubMatrix_MPIDense"
144 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B)
145 {
146   Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd;
147   Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data;
148   PetscErrorCode ierr;
149   PetscInt          i,j,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols;
150   PetscScalar  *av,*bv,*v = lmat->v;
151   Mat          newmat;
152 
153   PetscFunctionBegin;
154   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
155   ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr);
156   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
157   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
158 
159   /* No parallel redistribution currently supported! Should really check each index set
160      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
161      original matrix! */
162 
163   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
164   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
165 
166   /* Check submatrix call */
167   if (scall == MAT_REUSE_MATRIX) {
168     /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
169     /* Really need to test rows and column sizes! */
170     newmat = *B;
171   } else {
172     /* Create and fill new matrix */
173     ierr = MatCreate(A->comm,nrows,cs,PETSC_DECIDE,ncols,&newmat);CHKERRQ(ierr);
174     ierr = MatSetType(newmat,A->type_name);CHKERRQ(ierr);
175     ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr);
176   }
177 
178   /* Now extract the data pointers and do the copy, column at a time */
179   newmatd = (Mat_MPIDense*)newmat->data;
180   bv      = ((Mat_SeqDense *)newmatd->A->data)->v;
181 
182   for (i=0; i<ncols; i++) {
183     av = v + nlrows*icol[i];
184     for (j=0; j<nrows; j++) {
185       *bv++ = av[irow[j] - rstart];
186     }
187   }
188 
189   /* Assemble the matrices so that the correct flags are set */
190   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
191   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
192 
193   /* Free work space */
194   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
195   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
196   *B = newmat;
197   PetscFunctionReturn(0);
198 }
199 
200 #undef __FUNCT__
201 #define __FUNCT__ "MatRestoreArray_MPIDense"
202 PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[])
203 {
204   PetscFunctionBegin;
205   PetscFunctionReturn(0);
206 }
207 
208 #undef __FUNCT__
209 #define __FUNCT__ "MatAssemblyBegin_MPIDense"
210 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
211 {
212   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
213   MPI_Comm     comm = mat->comm;
214   PetscErrorCode ierr;
215   PetscInt          nstash,reallocs;
216   InsertMode   addv;
217 
218   PetscFunctionBegin;
219   /* make sure all processors are either in INSERTMODE or ADDMODE */
220   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
221   if (addv == (ADD_VALUES|INSERT_VALUES)) {
222     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
223   }
224   mat->insertmode = addv; /* in case this processor had no cache */
225 
226   ierr = MatStashScatterBegin_Private(&mat->stash,mdn->rowners);CHKERRQ(ierr);
227   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
228   PetscLogInfo(mdn->A,"MatAssemblyBegin_MPIDense:Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
229   PetscFunctionReturn(0);
230 }
231 
232 #undef __FUNCT__
233 #define __FUNCT__ "MatAssemblyEnd_MPIDense"
234 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
235 {
236   Mat_MPIDense    *mdn=(Mat_MPIDense*)mat->data;
237   PetscErrorCode  ierr;
238   PetscInt        i,*row,*col,flg,j,rstart,ncols;
239   PetscMPIInt     n;
240   PetscScalar     *val;
241   InsertMode      addv=mat->insertmode;
242 
243   PetscFunctionBegin;
244   /*  wait on receives */
245   while (1) {
246     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
247     if (!flg) break;
248 
249     for (i=0; i<n;) {
250       /* Now identify the consecutive vals belonging to the same row */
251       for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
252       if (j < n) ncols = j-i;
253       else       ncols = n-i;
254       /* Now assemble all these values with a single function call */
255       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
256       i = j;
257     }
258   }
259   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
260 
261   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
262   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
263 
264   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
265     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
266   }
267   PetscFunctionReturn(0);
268 }
269 
270 #undef __FUNCT__
271 #define __FUNCT__ "MatZeroEntries_MPIDense"
272 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
273 {
274   PetscErrorCode ierr;
275   Mat_MPIDense *l = (Mat_MPIDense*)A->data;
276 
277   PetscFunctionBegin;
278   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
279   PetscFunctionReturn(0);
280 }
281 
282 /* the code does not do the diagonal entries correctly unless the
283    matrix is square and the column and row owerships are identical.
284    This is a BUG. The only way to fix it seems to be to access
285    mdn->A and mdn->B directly and not through the MatZeroRows()
286    routine.
287 */
288 #undef __FUNCT__
289 #define __FUNCT__ "MatZeroRows_MPIDense"
290 PetscErrorCode MatZeroRows_MPIDense(Mat A,IS is,const PetscScalar *diag)
291 {
292   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
293   PetscErrorCode ierr;
294   PetscInt       i,N,*rows,*owners = l->rowners;
295   PetscInt       *nprocs,j,idx,nsends;
296   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
297   PetscInt       *rvalues,tag = A->tag,count,base,slen,*source;
298   PetscInt       *lens,*lrows,*values;
299   PetscMPIInt    n,imdex,rank = l->rank,size = l->size;
300   MPI_Comm       comm = A->comm;
301   MPI_Request    *send_waits,*recv_waits;
302   MPI_Status     recv_status,*send_status;
303   IS             istmp;
304   PetscTruth     found;
305 
306   PetscFunctionBegin;
307   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
308   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
309 
310   /*  first count number of contributors to each processor */
311   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
312   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
313   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
314   for (i=0; i<N; i++) {
315     idx = rows[i];
316     found = PETSC_FALSE;
317     for (j=0; j<size; j++) {
318       if (idx >= owners[j] && idx < owners[j+1]) {
319         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
320       }
321     }
322     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
323   }
324   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
325 
326   /* inform other processors of number of messages and max length*/
327   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
328 
329   /* post receives:   */
330   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
331   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
332   for (i=0; i<nrecvs; i++) {
333     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
334   }
335 
336   /* do sends:
337       1) starts[i] gives the starting index in svalues for stuff going to
338          the ith processor
339   */
340   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
341   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
342   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
343   starts[0]  = 0;
344   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
345   for (i=0; i<N; i++) {
346     svalues[starts[owner[i]]++] = rows[i];
347   }
348   ISRestoreIndices(is,&rows);
349 
350   starts[0] = 0;
351   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
352   count = 0;
353   for (i=0; i<size; i++) {
354     if (nprocs[2*i+1]) {
355       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
356     }
357   }
358   ierr = PetscFree(starts);CHKERRQ(ierr);
359 
360   base = owners[rank];
361 
362   /*  wait on receives */
363   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
364   source = lens + nrecvs;
365   count  = nrecvs; slen = 0;
366   while (count) {
367     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
368     /* unpack receives into our local space */
369     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
370     source[imdex]  = recv_status.MPI_SOURCE;
371     lens[imdex]    = n;
372     slen += n;
373     count--;
374   }
375   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
376 
377   /* move the data into the send scatter */
378   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
379   count = 0;
380   for (i=0; i<nrecvs; i++) {
381     values = rvalues + i*nmax;
382     for (j=0; j<lens[i]; j++) {
383       lrows[count++] = values[j] - base;
384     }
385   }
386   ierr = PetscFree(rvalues);CHKERRQ(ierr);
387   ierr = PetscFree(lens);CHKERRQ(ierr);
388   ierr = PetscFree(owner);CHKERRQ(ierr);
389   ierr = PetscFree(nprocs);CHKERRQ(ierr);
390 
391   /* actually zap the local rows */
392   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
393   PetscLogObjectParent(A,istmp);
394   ierr = PetscFree(lrows);CHKERRQ(ierr);
395   ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
396   ierr = ISDestroy(istmp);CHKERRQ(ierr);
397 
398   /* wait on sends */
399   if (nsends) {
400     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
401     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
402     ierr = PetscFree(send_status);CHKERRQ(ierr);
403   }
404   ierr = PetscFree(send_waits);CHKERRQ(ierr);
405   ierr = PetscFree(svalues);CHKERRQ(ierr);
406 
407   PetscFunctionReturn(0);
408 }
409 
410 #undef __FUNCT__
411 #define __FUNCT__ "MatMult_MPIDense"
412 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
413 {
414   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
415   PetscErrorCode ierr;
416 
417   PetscFunctionBegin;
418   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
419   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
420   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
421   PetscFunctionReturn(0);
422 }
423 
424 #undef __FUNCT__
425 #define __FUNCT__ "MatMultAdd_MPIDense"
426 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
427 {
428   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
429   PetscErrorCode ierr;
430 
431   PetscFunctionBegin;
432   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
433   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
434   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
435   PetscFunctionReturn(0);
436 }
437 
438 #undef __FUNCT__
439 #define __FUNCT__ "MatMultTranspose_MPIDense"
440 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
441 {
442   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
443   PetscErrorCode ierr;
444   PetscScalar  zero = 0.0;
445 
446   PetscFunctionBegin;
447   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
448   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
449   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
450   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
451   PetscFunctionReturn(0);
452 }
453 
454 #undef __FUNCT__
455 #define __FUNCT__ "MatMultTransposeAdd_MPIDense"
456 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
457 {
458   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
459   PetscErrorCode ierr;
460 
461   PetscFunctionBegin;
462   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
463   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
464   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
465   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
466   PetscFunctionReturn(0);
467 }
468 
469 #undef __FUNCT__
470 #define __FUNCT__ "MatGetDiagonal_MPIDense"
471 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
472 {
473   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
474   Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data;
475   PetscErrorCode ierr;
476   PetscInt          len,i,n,m = A->m,radd;
477   PetscScalar  *x,zero = 0.0;
478 
479   PetscFunctionBegin;
480   ierr = VecSet(&zero,v);CHKERRQ(ierr);
481   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
482   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
483   if (n != A->M) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
484   len  = PetscMin(a->A->m,a->A->n);
485   radd = a->rstart*m;
486   for (i=0; i<len; i++) {
487     x[i] = aloc->v[radd + i*m + i];
488   }
489   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
490   PetscFunctionReturn(0);
491 }
492 
493 #undef __FUNCT__
494 #define __FUNCT__ "MatDestroy_MPIDense"
495 PetscErrorCode MatDestroy_MPIDense(Mat mat)
496 {
497   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
498   PetscErrorCode ierr;
499 
500   PetscFunctionBegin;
501 
502 #if defined(PETSC_USE_LOG)
503   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->M,mat->N);
504 #endif
505   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
506   ierr = PetscFree(mdn->rowners);CHKERRQ(ierr);
507   ierr = MatDestroy(mdn->A);CHKERRQ(ierr);
508   if (mdn->lvec)   VecDestroy(mdn->lvec);
509   if (mdn->Mvctx)  VecScatterDestroy(mdn->Mvctx);
510   if (mdn->factor) {
511     if (mdn->factor->temp)   {ierr = PetscFree(mdn->factor->temp);CHKERRQ(ierr);}
512     if (mdn->factor->tag)    {ierr = PetscFree(mdn->factor->tag);CHKERRQ(ierr);}
513     if (mdn->factor->pivots) {ierr = PetscFree(mdn->factor->pivots);CHKERRQ(ierr);}
514     ierr = PetscFree(mdn->factor);CHKERRQ(ierr);
515   }
516   ierr = PetscFree(mdn);CHKERRQ(ierr);
517   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
518   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
519   PetscFunctionReturn(0);
520 }
521 
522 #undef __FUNCT__
523 #define __FUNCT__ "MatView_MPIDense_Binary"
524 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
525 {
526   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
527   PetscErrorCode ierr;
528 
529   PetscFunctionBegin;
530   if (mdn->size == 1) {
531     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
532   }
533   else SETERRQ(PETSC_ERR_SUP,"Only uniprocessor output supported");
534   PetscFunctionReturn(0);
535 }
536 
537 #undef __FUNCT__
538 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket"
539 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
540 {
541   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
542   PetscErrorCode    ierr;
543   PetscMPIInt       size = mdn->size,rank = mdn->rank;
544   PetscViewerType   vtype;
545   PetscTruth        iascii,isdraw;
546   PetscViewer       sviewer;
547   PetscViewerFormat format;
548 
549   PetscFunctionBegin;
550   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
551   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
552   if (iascii) {
553     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
554     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
555     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
556       MatInfo info;
557       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
558       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->m,
559                    (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
560       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
561       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
562       PetscFunctionReturn(0);
563     } else if (format == PETSC_VIEWER_ASCII_INFO) {
564       PetscFunctionReturn(0);
565     }
566   } else if (isdraw) {
567     PetscDraw       draw;
568     PetscTruth isnull;
569 
570     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
571     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
572     if (isnull) PetscFunctionReturn(0);
573   }
574 
575   if (size == 1) {
576     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
577   } else {
578     /* assemble the entire matrix onto first processor. */
579     Mat         A;
580     PetscInt    M = mat->M,N = mat->N,m,row,i,nz;
581     PetscInt    *cols;
582     PetscScalar *vals;
583 
584     if (!rank) {
585       ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr);
586     } else {
587       ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr);
588     }
589     /* Since this is a temporary matrix, MATMPIDENSE instead of A->type_name here is probably acceptable. */
590     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
591     ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL);
592     PetscLogObjectParent(mat,A);
593 
594     /* Copy the matrix ... This isn't the most efficient means,
595        but it's quick for now */
596     row = mdn->rstart; m = mdn->A->m;
597     for (i=0; i<m; i++) {
598       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
599       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
600       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
601       row++;
602     }
603 
604     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
605     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
606     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
607     if (!rank) {
608       ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
609     }
610     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
611     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
612     ierr = MatDestroy(A);CHKERRQ(ierr);
613   }
614   PetscFunctionReturn(0);
615 }
616 
617 #undef __FUNCT__
618 #define __FUNCT__ "MatView_MPIDense"
619 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
620 {
621   PetscErrorCode ierr;
622   PetscTruth iascii,isbinary,isdraw,issocket;
623 
624   PetscFunctionBegin;
625 
626   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
627   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
628   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
629   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
630 
631   if (iascii || issocket || isdraw) {
632     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
633   } else if (isbinary) {
634     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
635   } else {
636     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name);
637   }
638   PetscFunctionReturn(0);
639 }
640 
641 #undef __FUNCT__
642 #define __FUNCT__ "MatGetInfo_MPIDense"
643 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
644 {
645   Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
646   Mat          mdn = mat->A;
647   PetscErrorCode ierr;
648   PetscReal    isend[5],irecv[5];
649 
650   PetscFunctionBegin;
651   info->rows_global    = (double)A->M;
652   info->columns_global = (double)A->N;
653   info->rows_local     = (double)A->m;
654   info->columns_local  = (double)A->N;
655   info->block_size     = 1.0;
656   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
657   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
658   isend[3] = info->memory;  isend[4] = info->mallocs;
659   if (flag == MAT_LOCAL) {
660     info->nz_used      = isend[0];
661     info->nz_allocated = isend[1];
662     info->nz_unneeded  = isend[2];
663     info->memory       = isend[3];
664     info->mallocs      = isend[4];
665   } else if (flag == MAT_GLOBAL_MAX) {
666     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr);
667     info->nz_used      = irecv[0];
668     info->nz_allocated = irecv[1];
669     info->nz_unneeded  = irecv[2];
670     info->memory       = irecv[3];
671     info->mallocs      = irecv[4];
672   } else if (flag == MAT_GLOBAL_SUM) {
673     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr);
674     info->nz_used      = irecv[0];
675     info->nz_allocated = irecv[1];
676     info->nz_unneeded  = irecv[2];
677     info->memory       = irecv[3];
678     info->mallocs      = irecv[4];
679   }
680   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
681   info->fill_ratio_needed = 0;
682   info->factor_mallocs    = 0;
683   PetscFunctionReturn(0);
684 }
685 
686 #undef __FUNCT__
687 #define __FUNCT__ "MatSetOption_MPIDense"
688 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op)
689 {
690   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
691   PetscErrorCode ierr;
692 
693   PetscFunctionBegin;
694   switch (op) {
695   case MAT_NO_NEW_NONZERO_LOCATIONS:
696   case MAT_YES_NEW_NONZERO_LOCATIONS:
697   case MAT_NEW_NONZERO_LOCATION_ERR:
698   case MAT_NEW_NONZERO_ALLOCATION_ERR:
699   case MAT_COLUMNS_SORTED:
700   case MAT_COLUMNS_UNSORTED:
701     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
702     break;
703   case MAT_ROW_ORIENTED:
704     a->roworiented = PETSC_TRUE;
705     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
706     break;
707   case MAT_ROWS_SORTED:
708   case MAT_ROWS_UNSORTED:
709   case MAT_YES_NEW_DIAGONALS:
710   case MAT_USE_HASH_TABLE:
711     PetscLogInfo(A,"MatSetOption_MPIDense:Option ignored\n");
712     break;
713   case MAT_COLUMN_ORIENTED:
714     a->roworiented = PETSC_FALSE;
715     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
716     break;
717   case MAT_IGNORE_OFF_PROC_ENTRIES:
718     a->donotstash = PETSC_TRUE;
719     break;
720   case MAT_NO_NEW_DIAGONALS:
721     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
722   case MAT_SYMMETRIC:
723   case MAT_STRUCTURALLY_SYMMETRIC:
724   case MAT_NOT_SYMMETRIC:
725   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
726   case MAT_HERMITIAN:
727   case MAT_NOT_HERMITIAN:
728   case MAT_SYMMETRY_ETERNAL:
729   case MAT_NOT_SYMMETRY_ETERNAL:
730     break;
731   default:
732     SETERRQ(PETSC_ERR_SUP,"unknown option");
733   }
734   PetscFunctionReturn(0);
735 }
736 
737 
738 #undef __FUNCT__
739 #define __FUNCT__ "MatDiagonalScale_MPIDense"
740 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
741 {
742   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
743   Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
744   PetscScalar  *l,*r,x,*v;
745   PetscErrorCode ierr;
746   PetscInt          i,j,s2a,s3a,s2,s3,m=mdn->A->m,n=mdn->A->n;
747 
748   PetscFunctionBegin;
749   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
750   if (ll) {
751     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
752     if (s2a != s2) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size");
753     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
754     for (i=0; i<m; i++) {
755       x = l[i];
756       v = mat->v + i;
757       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
758     }
759     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
760     PetscLogFlops(n*m);
761   }
762   if (rr) {
763     ierr = VecGetSize(rr,&s3a);CHKERRQ(ierr);
764     if (s3a != s3) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size");
765     ierr = VecScatterBegin(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
766     ierr = VecScatterEnd(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
767     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
768     for (i=0; i<n; i++) {
769       x = r[i];
770       v = mat->v + i*m;
771       for (j=0; j<m; j++) { (*v++) *= x;}
772     }
773     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
774     PetscLogFlops(n*m);
775   }
776   PetscFunctionReturn(0);
777 }
778 
779 #undef __FUNCT__
780 #define __FUNCT__ "MatNorm_MPIDense"
781 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
782 {
783   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
784   Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
785   PetscErrorCode ierr;
786   PetscInt          i,j;
787   PetscReal    sum = 0.0;
788   PetscScalar  *v = mat->v;
789 
790   PetscFunctionBegin;
791   if (mdn->size == 1) {
792     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
793   } else {
794     if (type == NORM_FROBENIUS) {
795       for (i=0; i<mdn->A->n*mdn->A->m; i++) {
796 #if defined(PETSC_USE_COMPLEX)
797         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
798 #else
799         sum += (*v)*(*v); v++;
800 #endif
801       }
802       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr);
803       *nrm = sqrt(*nrm);
804       PetscLogFlops(2*mdn->A->n*mdn->A->m);
805     } else if (type == NORM_1) {
806       PetscReal *tmp,*tmp2;
807       ierr = PetscMalloc(2*A->N*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
808       tmp2 = tmp + A->N;
809       ierr = PetscMemzero(tmp,2*A->N*sizeof(PetscReal));CHKERRQ(ierr);
810       *nrm = 0.0;
811       v = mat->v;
812       for (j=0; j<mdn->A->n; j++) {
813         for (i=0; i<mdn->A->m; i++) {
814           tmp[j] += PetscAbsScalar(*v);  v++;
815         }
816       }
817       ierr = MPI_Allreduce(tmp,tmp2,A->N,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr);
818       for (j=0; j<A->N; j++) {
819         if (tmp2[j] > *nrm) *nrm = tmp2[j];
820       }
821       ierr = PetscFree(tmp);CHKERRQ(ierr);
822       PetscLogFlops(A->n*A->m);
823     } else if (type == NORM_INFINITY) { /* max row norm */
824       PetscReal ntemp;
825       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
826       ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr);
827     } else {
828       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
829     }
830   }
831   PetscFunctionReturn(0);
832 }
833 
834 #undef __FUNCT__
835 #define __FUNCT__ "MatTranspose_MPIDense"
836 PetscErrorCode MatTranspose_MPIDense(Mat A,Mat *matout)
837 {
838   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
839   Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data;
840   Mat          B;
841   PetscInt          M = A->M,N = A->N,m,n,*rwork,rstart = a->rstart;
842   PetscErrorCode ierr;
843   PetscInt          j,i;
844   PetscScalar  *v;
845 
846   PetscFunctionBegin;
847   if (!matout && M != N) {
848     SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place");
849   }
850   ierr = MatCreate(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,&B);CHKERRQ(ierr);
851   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
852   ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr);
853 
854   m = a->A->m; n = a->A->n; v = Aloc->v;
855   ierr = PetscMalloc(n*sizeof(PetscInt),&rwork);CHKERRQ(ierr);
856   for (j=0; j<n; j++) {
857     for (i=0; i<m; i++) rwork[i] = rstart + i;
858     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
859     v   += m;
860   }
861   ierr = PetscFree(rwork);CHKERRQ(ierr);
862   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
863   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
864   if (matout) {
865     *matout = B;
866   } else {
867     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
868   }
869   PetscFunctionReturn(0);
870 }
871 
872 #include "petscblaslapack.h"
873 #undef __FUNCT__
874 #define __FUNCT__ "MatScale_MPIDense"
875 PetscErrorCode MatScale_MPIDense(const PetscScalar *alpha,Mat inA)
876 {
877   Mat_MPIDense *A = (Mat_MPIDense*)inA->data;
878   Mat_SeqDense *a = (Mat_SeqDense*)A->A->data;
879   PetscBLASInt one = 1,nz = (PetscBLASInt)inA->m*inA->N;
880 
881   PetscFunctionBegin;
882   BLscal_(&nz,(PetscScalar*)alpha,a->v,&one);
883   PetscLogFlops(nz);
884   PetscFunctionReturn(0);
885 }
886 
887 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
888 
889 #undef __FUNCT__
890 #define __FUNCT__ "MatSetUpPreallocation_MPIDense"
891 PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A)
892 {
893   PetscErrorCode ierr;
894 
895   PetscFunctionBegin;
896   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
897   PetscFunctionReturn(0);
898 }
899 
900 /* -------------------------------------------------------------------*/
901 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
902        MatGetRow_MPIDense,
903        MatRestoreRow_MPIDense,
904        MatMult_MPIDense,
905 /* 4*/ MatMultAdd_MPIDense,
906        MatMultTranspose_MPIDense,
907        MatMultTransposeAdd_MPIDense,
908        0,
909        0,
910        0,
911 /*10*/ 0,
912        0,
913        0,
914        0,
915        MatTranspose_MPIDense,
916 /*15*/ MatGetInfo_MPIDense,
917        0,
918        MatGetDiagonal_MPIDense,
919        MatDiagonalScale_MPIDense,
920        MatNorm_MPIDense,
921 /*20*/ MatAssemblyBegin_MPIDense,
922        MatAssemblyEnd_MPIDense,
923        0,
924        MatSetOption_MPIDense,
925        MatZeroEntries_MPIDense,
926 /*25*/ MatZeroRows_MPIDense,
927        0,
928        0,
929        0,
930        0,
931 /*30*/ MatSetUpPreallocation_MPIDense,
932        0,
933        0,
934        MatGetArray_MPIDense,
935        MatRestoreArray_MPIDense,
936 /*35*/ MatDuplicate_MPIDense,
937        0,
938        0,
939        0,
940        0,
941 /*40*/ 0,
942        MatGetSubMatrices_MPIDense,
943        0,
944        MatGetValues_MPIDense,
945        0,
946 /*45*/ 0,
947        MatScale_MPIDense,
948        0,
949        0,
950        0,
951 /*50*/ 0,
952        0,
953        0,
954        0,
955        0,
956 /*55*/ 0,
957        0,
958        0,
959        0,
960        0,
961 /*60*/ MatGetSubMatrix_MPIDense,
962        MatDestroy_MPIDense,
963        MatView_MPIDense,
964        MatGetPetscMaps_Petsc,
965        0,
966 /*65*/ 0,
967        0,
968        0,
969        0,
970        0,
971 /*70*/ 0,
972        0,
973        0,
974        0,
975        0,
976 /*75*/ 0,
977        0,
978        0,
979        0,
980        0,
981 /*80*/ 0,
982        0,
983        0,
984        0,
985 /*84*/ MatLoad_MPIDense,
986        0,
987        0,
988        0,
989        0,
990        0,
991 /*90*/ 0,
992        0,
993        0,
994        0,
995        0,
996 /*95*/ 0,
997        0,
998        0,
999        0};
1000 
1001 EXTERN_C_BEGIN
1002 #undef __FUNCT__
1003 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense"
1004 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1005 {
1006   Mat_MPIDense *a;
1007   PetscErrorCode ierr;
1008 
1009   PetscFunctionBegin;
1010   mat->preallocated = PETSC_TRUE;
1011   /* Note:  For now, when data is specified above, this assumes the user correctly
1012    allocates the local dense storage space.  We should add error checking. */
1013 
1014   a    = (Mat_MPIDense*)mat->data;
1015   ierr = MatCreate(PETSC_COMM_SELF,mat->m,mat->N,mat->m,mat->N,&a->A);CHKERRQ(ierr);
1016   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
1017   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1018   PetscLogObjectParent(mat,a->A);
1019   PetscFunctionReturn(0);
1020 }
1021 EXTERN_C_END
1022 
1023 /*MC
1024    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1025 
1026    Options Database Keys:
1027 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions()
1028 
1029   Level: beginner
1030 
1031 .seealso: MatCreateMPIDense
1032 M*/
1033 
1034 EXTERN_C_BEGIN
1035 #undef __FUNCT__
1036 #define __FUNCT__ "MatCreate_MPIDense"
1037 PetscErrorCode MatCreate_MPIDense(Mat mat)
1038 {
1039   Mat_MPIDense *a;
1040   PetscErrorCode ierr;
1041   PetscInt          i;
1042 
1043   PetscFunctionBegin;
1044   ierr              = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr);
1045   mat->data         = (void*)a;
1046   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1047   mat->factor       = 0;
1048   mat->mapping      = 0;
1049 
1050   a->factor       = 0;
1051   mat->insertmode = NOT_SET_VALUES;
1052   ierr = MPI_Comm_rank(mat->comm,&a->rank);CHKERRQ(ierr);
1053   ierr = MPI_Comm_size(mat->comm,&a->size);CHKERRQ(ierr);
1054 
1055   ierr = PetscSplitOwnership(mat->comm,&mat->m,&mat->M);CHKERRQ(ierr);
1056   ierr = PetscSplitOwnership(mat->comm,&mat->n,&mat->N);CHKERRQ(ierr);
1057   a->nvec = mat->n;
1058 
1059   /* the information in the maps duplicates the information computed below, eventually
1060      we should remove the duplicate information that is not contained in the maps */
1061   ierr = PetscMapCreateMPI(mat->comm,mat->m,mat->M,&mat->rmap);CHKERRQ(ierr);
1062   ierr = PetscMapCreateMPI(mat->comm,mat->n,mat->N,&mat->cmap);CHKERRQ(ierr);
1063 
1064   /* build local table of row and column ownerships */
1065   ierr       = PetscMalloc(2*(a->size+2)*sizeof(PetscInt),&a->rowners);CHKERRQ(ierr);
1066   a->cowners = a->rowners + a->size + 1;
1067   PetscLogObjectMemory(mat,2*(a->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
1068   ierr = MPI_Allgather(&mat->m,1,MPIU_INT,a->rowners+1,1,MPIU_INT,mat->comm);CHKERRQ(ierr);
1069   a->rowners[0] = 0;
1070   for (i=2; i<=a->size; i++) {
1071     a->rowners[i] += a->rowners[i-1];
1072   }
1073   a->rstart = a->rowners[a->rank];
1074   a->rend   = a->rowners[a->rank+1];
1075   ierr      = MPI_Allgather(&mat->n,1,MPIU_INT,a->cowners+1,1,MPIU_INT,mat->comm);CHKERRQ(ierr);
1076   a->cowners[0] = 0;
1077   for (i=2; i<=a->size; i++) {
1078     a->cowners[i] += a->cowners[i-1];
1079   }
1080 
1081   /* build cache for off array entries formed */
1082   a->donotstash = PETSC_FALSE;
1083   ierr = MatStashCreate_Private(mat->comm,1,&mat->stash);CHKERRQ(ierr);
1084 
1085   /* stuff used for matrix vector multiply */
1086   a->lvec        = 0;
1087   a->Mvctx       = 0;
1088   a->roworiented = PETSC_TRUE;
1089 
1090   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
1091                                      "MatGetDiagonalBlock_MPIDense",
1092                                      MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr);
1093   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C",
1094                                      "MatMPIDenseSetPreallocation_MPIDense",
1095                                      MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1096   PetscFunctionReturn(0);
1097 }
1098 EXTERN_C_END
1099 
1100 /*MC
1101    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1102 
1103    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1104    and MATMPIDENSE otherwise.
1105 
1106    Options Database Keys:
1107 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1108 
1109   Level: beginner
1110 
1111 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1112 M*/
1113 
1114 EXTERN_C_BEGIN
1115 #undef __FUNCT__
1116 #define __FUNCT__ "MatCreate_Dense"
1117 PetscErrorCode MatCreate_Dense(Mat A)
1118 {
1119   PetscErrorCode ierr;
1120   PetscMPIInt    size;
1121 
1122   PetscFunctionBegin;
1123   ierr = PetscObjectChangeTypeName((PetscObject)A,MATDENSE);CHKERRQ(ierr);
1124   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1125   if (size == 1) {
1126     ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr);
1127   } else {
1128     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
1129   }
1130   PetscFunctionReturn(0);
1131 }
1132 EXTERN_C_END
1133 
1134 #undef __FUNCT__
1135 #define __FUNCT__ "MatMPIDenseSetPreallocation"
1136 /*@C
1137    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1138 
1139    Not collective
1140 
1141    Input Parameters:
1142 .  A - the matrix
1143 -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
1144    to control all matrix memory allocation.
1145 
1146    Notes:
1147    The dense format is fully compatible with standard Fortran 77
1148    storage by columns.
1149 
1150    The data input variable is intended primarily for Fortran programmers
1151    who wish to allocate their own matrix memory space.  Most users should
1152    set data=PETSC_NULL.
1153 
1154    Level: intermediate
1155 
1156 .keywords: matrix,dense, parallel
1157 
1158 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1159 @*/
1160 PetscErrorCode MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data)
1161 {
1162   PetscErrorCode ierr,(*f)(Mat,PetscScalar *);
1163 
1164   PetscFunctionBegin;
1165   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1166   if (f) {
1167     ierr = (*f)(mat,data);CHKERRQ(ierr);
1168   }
1169   PetscFunctionReturn(0);
1170 }
1171 
1172 #undef __FUNCT__
1173 #define __FUNCT__ "MatCreateMPIDense"
1174 /*@C
1175    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
1176 
1177    Collective on MPI_Comm
1178 
1179    Input Parameters:
1180 +  comm - MPI communicator
1181 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1182 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1183 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1184 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1185 -  data - optional location of matrix data.  Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1186    to control all matrix memory allocation.
1187 
1188    Output Parameter:
1189 .  A - the matrix
1190 
1191    Notes:
1192    The dense format is fully compatible with standard Fortran 77
1193    storage by columns.
1194 
1195    The data input variable is intended primarily for Fortran programmers
1196    who wish to allocate their own matrix memory space.  Most users should
1197    set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users).
1198 
1199    The user MUST specify either the local or global matrix dimensions
1200    (possibly both).
1201 
1202    Level: intermediate
1203 
1204 .keywords: matrix,dense, parallel
1205 
1206 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1207 @*/
1208 PetscErrorCode MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1209 {
1210   PetscErrorCode ierr;
1211   PetscMPIInt    size;
1212 
1213   PetscFunctionBegin;
1214   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
1215   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1216   if (size > 1) {
1217     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1218     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1219   } else {
1220     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1221     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1222   }
1223   PetscFunctionReturn(0);
1224 }
1225 
1226 #undef __FUNCT__
1227 #define __FUNCT__ "MatDuplicate_MPIDense"
1228 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1229 {
1230   Mat          mat;
1231   Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data;
1232   PetscErrorCode ierr;
1233 
1234   PetscFunctionBegin;
1235   *newmat       = 0;
1236   ierr = MatCreate(A->comm,A->m,A->n,A->M,A->N,&mat);CHKERRQ(ierr);
1237   ierr = MatSetType(mat,A->type_name);CHKERRQ(ierr);
1238   ierr              = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr);
1239   mat->data         = (void*)a;
1240   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1241   mat->factor       = A->factor;
1242   mat->assembled    = PETSC_TRUE;
1243   mat->preallocated = PETSC_TRUE;
1244 
1245   a->rstart       = oldmat->rstart;
1246   a->rend         = oldmat->rend;
1247   a->size         = oldmat->size;
1248   a->rank         = oldmat->rank;
1249   mat->insertmode = NOT_SET_VALUES;
1250   a->nvec         = oldmat->nvec;
1251   a->donotstash   = oldmat->donotstash;
1252   ierr            = PetscMalloc((a->size+1)*sizeof(PetscInt),&a->rowners);CHKERRQ(ierr);
1253   PetscLogObjectMemory(mat,(a->size+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
1254   ierr = PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
1255   ierr = MatStashCreate_Private(A->comm,1,&mat->stash);CHKERRQ(ierr);
1256 
1257   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1258   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1259   PetscLogObjectParent(mat,a->A);
1260   *newmat = mat;
1261   PetscFunctionReturn(0);
1262 }
1263 
1264 #include "petscsys.h"
1265 
1266 #undef __FUNCT__
1267 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile"
1268 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,const MatType type,Mat *newmat)
1269 {
1270   PetscErrorCode ierr;
1271   PetscMPIInt    rank,size;
1272   PetscInt            *rowners,i,m,nz,j;
1273   PetscScalar    *array,*vals,*vals_ptr;
1274   MPI_Status     status;
1275 
1276   PetscFunctionBegin;
1277   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1278   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1279 
1280   /* determine ownership of all rows */
1281   m          = M/size + ((M % size) > rank);
1282   ierr       = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1283   ierr       = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
1284   rowners[0] = 0;
1285   for (i=2; i<=size; i++) {
1286     rowners[i] += rowners[i-1];
1287   }
1288 
1289   ierr = MatCreate(comm,m,PETSC_DECIDE,M,N,newmat);CHKERRQ(ierr);
1290   ierr = MatSetType(*newmat,type);CHKERRQ(ierr);
1291   ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr);
1292   ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr);
1293 
1294   if (!rank) {
1295     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1296 
1297     /* read in my part of the matrix numerical values  */
1298     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1299 
1300     /* insert into matrix-by row (this is why cannot directly read into array */
1301     vals_ptr = vals;
1302     for (i=0; i<m; i++) {
1303       for (j=0; j<N; j++) {
1304         array[i + j*m] = *vals_ptr++;
1305       }
1306     }
1307 
1308     /* read in other processors and ship out */
1309     for (i=1; i<size; i++) {
1310       nz   = (rowners[i+1] - rowners[i])*N;
1311       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1312       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr);
1313     }
1314   } else {
1315     /* receive numeric values */
1316     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1317 
1318     /* receive message of values*/
1319     ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr);
1320 
1321     /* insert into matrix-by row (this is why cannot directly read into array */
1322     vals_ptr = vals;
1323     for (i=0; i<m; i++) {
1324       for (j=0; j<N; j++) {
1325         array[i + j*m] = *vals_ptr++;
1326       }
1327     }
1328   }
1329   ierr = PetscFree(rowners);CHKERRQ(ierr);
1330   ierr = PetscFree(vals);CHKERRQ(ierr);
1331   ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1332   ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1333   PetscFunctionReturn(0);
1334 }
1335 
1336 #undef __FUNCT__
1337 #define __FUNCT__ "MatLoad_MPIDense"
1338 PetscErrorCode MatLoad_MPIDense(PetscViewer viewer,const MatType type,Mat *newmat)
1339 {
1340   Mat            A;
1341   PetscScalar    *vals,*svals;
1342   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1343   MPI_Status     status;
1344   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz;
1345   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1346   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1347   PetscInt       i,nz,j,rstart,rend;
1348   int            fd;
1349   PetscErrorCode ierr;
1350 
1351   PetscFunctionBegin;
1352   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1353   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1354   if (!rank) {
1355     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1356     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1357     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1358   }
1359 
1360   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1361   M = header[1]; N = header[2]; nz = header[3];
1362 
1363   /*
1364        Handle case where matrix is stored on disk as a dense matrix
1365   */
1366   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1367     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,type,newmat);CHKERRQ(ierr);
1368     PetscFunctionReturn(0);
1369   }
1370 
1371   /* determine ownership of all rows */
1372   m          = M/size + ((M % size) > rank);
1373   ierr       = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1374   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1375   rowners[0] = 0;
1376   for (i=2; i<=size; i++) {
1377     rowners[i] += rowners[i-1];
1378   }
1379   rstart = rowners[rank];
1380   rend   = rowners[rank+1];
1381 
1382   /* distribute row lengths to all processors */
1383   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&ourlens);CHKERRQ(ierr);
1384   offlens = ourlens + (rend-rstart);
1385   if (!rank) {
1386     ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
1387     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1388     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
1389     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1390     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1391     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1392   } else {
1393     ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1394   }
1395 
1396   if (!rank) {
1397     /* calculate the number of nonzeros on each processor */
1398     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
1399     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1400     for (i=0; i<size; i++) {
1401       for (j=rowners[i]; j< rowners[i+1]; j++) {
1402         procsnz[i] += rowlengths[j];
1403       }
1404     }
1405     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1406 
1407     /* determine max buffer needed and allocate it */
1408     maxnz = 0;
1409     for (i=0; i<size; i++) {
1410       maxnz = PetscMax(maxnz,procsnz[i]);
1411     }
1412     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1413 
1414     /* read in my part of the matrix column indices  */
1415     nz = procsnz[0];
1416     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1417     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1418 
1419     /* read in every one elses and ship off */
1420     for (i=1; i<size; i++) {
1421       nz   = procsnz[i];
1422       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1423       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1424     }
1425     ierr = PetscFree(cols);CHKERRQ(ierr);
1426   } else {
1427     /* determine buffer space needed for message */
1428     nz = 0;
1429     for (i=0; i<m; i++) {
1430       nz += ourlens[i];
1431     }
1432     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1433 
1434     /* receive message of column indices*/
1435     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1436     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1437     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1438   }
1439 
1440   /* loop over local rows, determining number of off diagonal entries */
1441   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
1442   jj = 0;
1443   for (i=0; i<m; i++) {
1444     for (j=0; j<ourlens[i]; j++) {
1445       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1446       jj++;
1447     }
1448   }
1449 
1450   /* create our matrix */
1451   for (i=0; i<m; i++) {
1452     ourlens[i] -= offlens[i];
1453   }
1454   ierr = MatCreate(comm,m,PETSC_DECIDE,M,N,newmat);CHKERRQ(ierr);
1455   ierr = MatSetType(*newmat,type);CHKERRQ(ierr);
1456   ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr);
1457   A = *newmat;
1458   for (i=0; i<m; i++) {
1459     ourlens[i] += offlens[i];
1460   }
1461 
1462   if (!rank) {
1463     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1464 
1465     /* read in my part of the matrix numerical values  */
1466     nz = procsnz[0];
1467     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1468 
1469     /* insert into matrix */
1470     jj      = rstart;
1471     smycols = mycols;
1472     svals   = vals;
1473     for (i=0; i<m; i++) {
1474       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1475       smycols += ourlens[i];
1476       svals   += ourlens[i];
1477       jj++;
1478     }
1479 
1480     /* read in other processors and ship out */
1481     for (i=1; i<size; i++) {
1482       nz   = procsnz[i];
1483       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1484       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1485     }
1486     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1487   } else {
1488     /* receive numeric values */
1489     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1490 
1491     /* receive message of values*/
1492     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1493     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1494     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1495 
1496     /* insert into matrix */
1497     jj      = rstart;
1498     smycols = mycols;
1499     svals   = vals;
1500     for (i=0; i<m; i++) {
1501       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1502       smycols += ourlens[i];
1503       svals   += ourlens[i];
1504       jj++;
1505     }
1506   }
1507   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1508   ierr = PetscFree(vals);CHKERRQ(ierr);
1509   ierr = PetscFree(mycols);CHKERRQ(ierr);
1510   ierr = PetscFree(rowners);CHKERRQ(ierr);
1511 
1512   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1513   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1514   PetscFunctionReturn(0);
1515 }
1516 
1517 
1518 
1519 
1520 
1521