xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 0c295ffd55d452b856784eb664abbb23c6d3c576)
1 /*
2    Basic functions for basic parallel dense matrices.
3 */
4 
5 #include "src/mat/impls/dense/mpi/mpidense.h"
6 
7 #undef __FUNCT__
8 #define __FUNCT__ "MatGetRow_MPIDense"
9 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
10 {
11   Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
12   PetscErrorCode ierr;
13   PetscInt          lrow,rstart = mat->rstart,rend = mat->rend;
14 
15   PetscFunctionBegin;
16   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows")
17   lrow = row - rstart;
18   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr);
19   PetscFunctionReturn(0);
20 }
21 
22 #undef __FUNCT__
23 #define __FUNCT__ "MatRestoreRow_MPIDense"
24 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
25 {
26   PetscErrorCode ierr;
27 
28   PetscFunctionBegin;
29   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
30   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
31   PetscFunctionReturn(0);
32 }
33 
34 EXTERN_C_BEGIN
35 #undef __FUNCT__
36 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense"
37 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B)
38 {
39   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
40   PetscErrorCode ierr;
41   PetscInt          m = A->m,rstart = mdn->rstart;
42   PetscScalar  *array;
43   MPI_Comm     comm;
44 
45   PetscFunctionBegin;
46   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported.");
47 
48   /* The reuse aspect is not implemented efficiently */
49   if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);}
50 
51   ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
52   ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr);
53   ierr = MatCreate(comm,m,m,m,m,B);CHKERRQ(ierr);
54   ierr = MatSetType(*B,mdn->A->type_name);CHKERRQ(ierr);
55   ierr = MatSeqDenseSetPreallocation(*B,array+m*rstart);CHKERRQ(ierr);
56   ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr);
57   ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
58   ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
59 
60   *iscopy = PETSC_TRUE;
61   PetscFunctionReturn(0);
62 }
63 EXTERN_C_END
64 
65 #undef __FUNCT__
66 #define __FUNCT__ "MatSetValues_MPIDense"
67 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
68 {
69   Mat_MPIDense *A = (Mat_MPIDense*)mat->data;
70   PetscErrorCode ierr;
71   PetscInt          i,j,rstart = A->rstart,rend = A->rend,row;
72   PetscTruth   roworiented = A->roworiented;
73 
74   PetscFunctionBegin;
75   for (i=0; i<m; i++) {
76     if (idxm[i] < 0) continue;
77     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
78     if (idxm[i] >= rstart && idxm[i] < rend) {
79       row = idxm[i] - rstart;
80       if (roworiented) {
81         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
82       } else {
83         for (j=0; j<n; j++) {
84           if (idxn[j] < 0) continue;
85           if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
86           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
87         }
88       }
89     } else {
90       if (!A->donotstash) {
91         if (roworiented) {
92           ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr);
93         } else {
94           ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr);
95         }
96       }
97     }
98   }
99   PetscFunctionReturn(0);
100 }
101 
102 #undef __FUNCT__
103 #define __FUNCT__ "MatGetValues_MPIDense"
104 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
105 {
106   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
107   PetscErrorCode ierr;
108   PetscInt          i,j,rstart = mdn->rstart,rend = mdn->rend,row;
109 
110   PetscFunctionBegin;
111   for (i=0; i<m; i++) {
112     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
113     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
114     if (idxm[i] >= rstart && idxm[i] < rend) {
115       row = idxm[i] - rstart;
116       for (j=0; j<n; j++) {
117         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
118         if (idxn[j] >= mat->N) {
119           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
120         }
121         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
122       }
123     } else {
124       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
125     }
126   }
127   PetscFunctionReturn(0);
128 }
129 
130 #undef __FUNCT__
131 #define __FUNCT__ "MatGetArray_MPIDense"
132 PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[])
133 {
134   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
135   PetscErrorCode ierr;
136 
137   PetscFunctionBegin;
138   ierr = MatGetArray(a->A,array);CHKERRQ(ierr);
139   PetscFunctionReturn(0);
140 }
141 
142 #undef __FUNCT__
143 #define __FUNCT__ "MatGetSubMatrix_MPIDense"
144 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B)
145 {
146   Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd;
147   Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data;
148   PetscErrorCode ierr;
149   PetscInt          i,j,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols;
150   PetscScalar  *av,*bv,*v = lmat->v;
151   Mat          newmat;
152 
153   PetscFunctionBegin;
154   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
155   ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr);
156   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
157   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
158 
159   /* No parallel redistribution currently supported! Should really check each index set
160      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
161      original matrix! */
162 
163   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
164   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
165 
166   /* Check submatrix call */
167   if (scall == MAT_REUSE_MATRIX) {
168     /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
169     /* Really need to test rows and column sizes! */
170     newmat = *B;
171   } else {
172     /* Create and fill new matrix */
173     ierr = MatCreate(A->comm,nrows,cs,PETSC_DECIDE,ncols,&newmat);CHKERRQ(ierr);
174     ierr = MatSetType(newmat,A->type_name);CHKERRQ(ierr);
175     ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr);
176   }
177 
178   /* Now extract the data pointers and do the copy, column at a time */
179   newmatd = (Mat_MPIDense*)newmat->data;
180   bv      = ((Mat_SeqDense *)newmatd->A->data)->v;
181 
182   for (i=0; i<ncols; i++) {
183     av = v + nlrows*icol[i];
184     for (j=0; j<nrows; j++) {
185       *bv++ = av[irow[j] - rstart];
186     }
187   }
188 
189   /* Assemble the matrices so that the correct flags are set */
190   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
191   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
192 
193   /* Free work space */
194   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
195   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
196   *B = newmat;
197   PetscFunctionReturn(0);
198 }
199 
200 #undef __FUNCT__
201 #define __FUNCT__ "MatRestoreArray_MPIDense"
202 PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[])
203 {
204   PetscFunctionBegin;
205   PetscFunctionReturn(0);
206 }
207 
208 #undef __FUNCT__
209 #define __FUNCT__ "MatAssemblyBegin_MPIDense"
210 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
211 {
212   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
213   MPI_Comm     comm = mat->comm;
214   PetscErrorCode ierr;
215   PetscInt          nstash,reallocs;
216   InsertMode   addv;
217 
218   PetscFunctionBegin;
219   /* make sure all processors are either in INSERTMODE or ADDMODE */
220   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
221   if (addv == (ADD_VALUES|INSERT_VALUES)) {
222     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
223   }
224   mat->insertmode = addv; /* in case this processor had no cache */
225 
226   ierr = MatStashScatterBegin_Private(&mat->stash,mdn->rowners);CHKERRQ(ierr);
227   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
228   ierr = PetscLogInfo((mdn->A,"MatAssemblyBegin_MPIDense:Stash has %D entries, uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr);
229   PetscFunctionReturn(0);
230 }
231 
232 #undef __FUNCT__
233 #define __FUNCT__ "MatAssemblyEnd_MPIDense"
234 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
235 {
236   Mat_MPIDense    *mdn=(Mat_MPIDense*)mat->data;
237   PetscErrorCode  ierr;
238   PetscInt        i,*row,*col,flg,j,rstart,ncols;
239   PetscMPIInt     n;
240   PetscScalar     *val;
241   InsertMode      addv=mat->insertmode;
242 
243   PetscFunctionBegin;
244   /*  wait on receives */
245   while (1) {
246     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
247     if (!flg) break;
248 
249     for (i=0; i<n;) {
250       /* Now identify the consecutive vals belonging to the same row */
251       for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
252       if (j < n) ncols = j-i;
253       else       ncols = n-i;
254       /* Now assemble all these values with a single function call */
255       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
256       i = j;
257     }
258   }
259   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
260 
261   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
262   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
263 
264   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
265     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
266   }
267   PetscFunctionReturn(0);
268 }
269 
270 #undef __FUNCT__
271 #define __FUNCT__ "MatZeroEntries_MPIDense"
272 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
273 {
274   PetscErrorCode ierr;
275   Mat_MPIDense *l = (Mat_MPIDense*)A->data;
276 
277   PetscFunctionBegin;
278   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
279   PetscFunctionReturn(0);
280 }
281 
282 /* the code does not do the diagonal entries correctly unless the
283    matrix is square and the column and row owerships are identical.
284    This is a BUG. The only way to fix it seems to be to access
285    mdn->A and mdn->B directly and not through the MatZeroRows()
286    routine.
287 */
288 #undef __FUNCT__
289 #define __FUNCT__ "MatZeroRows_MPIDense"
290 PetscErrorCode MatZeroRows_MPIDense(Mat A,IS is,const PetscScalar *diag)
291 {
292   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
293   PetscErrorCode ierr;
294   PetscInt       i,N,*rows,*owners = l->rowners;
295   PetscInt       *nprocs,j,idx,nsends;
296   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
297   PetscInt       *rvalues,tag = A->tag,count,base,slen,*source;
298   PetscInt       *lens,*lrows,*values;
299   PetscMPIInt    n,imdex,rank = l->rank,size = l->size;
300   MPI_Comm       comm = A->comm;
301   MPI_Request    *send_waits,*recv_waits;
302   MPI_Status     recv_status,*send_status;
303   IS             istmp;
304   PetscTruth     found;
305 
306   PetscFunctionBegin;
307   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
308   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
309 
310   /*  first count number of contributors to each processor */
311   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
312   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
313   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
314   for (i=0; i<N; i++) {
315     idx = rows[i];
316     found = PETSC_FALSE;
317     for (j=0; j<size; j++) {
318       if (idx >= owners[j] && idx < owners[j+1]) {
319         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
320       }
321     }
322     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
323   }
324   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
325 
326   /* inform other processors of number of messages and max length*/
327   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
328 
329   /* post receives:   */
330   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
331   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
332   for (i=0; i<nrecvs; i++) {
333     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
334   }
335 
336   /* do sends:
337       1) starts[i] gives the starting index in svalues for stuff going to
338          the ith processor
339   */
340   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
341   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
342   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
343   starts[0]  = 0;
344   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
345   for (i=0; i<N; i++) {
346     svalues[starts[owner[i]]++] = rows[i];
347   }
348   ISRestoreIndices(is,&rows);
349 
350   starts[0] = 0;
351   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
352   count = 0;
353   for (i=0; i<size; i++) {
354     if (nprocs[2*i+1]) {
355       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
356     }
357   }
358   ierr = PetscFree(starts);CHKERRQ(ierr);
359 
360   base = owners[rank];
361 
362   /*  wait on receives */
363   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
364   source = lens + nrecvs;
365   count  = nrecvs; slen = 0;
366   while (count) {
367     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
368     /* unpack receives into our local space */
369     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
370     source[imdex]  = recv_status.MPI_SOURCE;
371     lens[imdex]    = n;
372     slen += n;
373     count--;
374   }
375   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
376 
377   /* move the data into the send scatter */
378   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
379   count = 0;
380   for (i=0; i<nrecvs; i++) {
381     values = rvalues + i*nmax;
382     for (j=0; j<lens[i]; j++) {
383       lrows[count++] = values[j] - base;
384     }
385   }
386   ierr = PetscFree(rvalues);CHKERRQ(ierr);
387   ierr = PetscFree(lens);CHKERRQ(ierr);
388   ierr = PetscFree(owner);CHKERRQ(ierr);
389   ierr = PetscFree(nprocs);CHKERRQ(ierr);
390 
391   /* actually zap the local rows */
392   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
393   ierr = PetscLogObjectParent(A,istmp);CHKERRQ(ierr);
394   ierr = PetscFree(lrows);CHKERRQ(ierr);
395   ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
396   ierr = ISDestroy(istmp);CHKERRQ(ierr);
397 
398   /* wait on sends */
399   if (nsends) {
400     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
401     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
402     ierr = PetscFree(send_status);CHKERRQ(ierr);
403   }
404   ierr = PetscFree(send_waits);CHKERRQ(ierr);
405   ierr = PetscFree(svalues);CHKERRQ(ierr);
406 
407   PetscFunctionReturn(0);
408 }
409 
410 #undef __FUNCT__
411 #define __FUNCT__ "MatMult_MPIDense"
412 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
413 {
414   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
415   PetscErrorCode ierr;
416 
417   PetscFunctionBegin;
418   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
419   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
420   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
421   PetscFunctionReturn(0);
422 }
423 
424 #undef __FUNCT__
425 #define __FUNCT__ "MatMultAdd_MPIDense"
426 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
427 {
428   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
429   PetscErrorCode ierr;
430 
431   PetscFunctionBegin;
432   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
433   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
434   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
435   PetscFunctionReturn(0);
436 }
437 
438 #undef __FUNCT__
439 #define __FUNCT__ "MatMultTranspose_MPIDense"
440 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
441 {
442   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
443   PetscErrorCode ierr;
444   PetscScalar  zero = 0.0;
445 
446   PetscFunctionBegin;
447   ierr = VecSet(&zero,yy);CHKERRQ(ierr);
448   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
449   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
450   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
451   PetscFunctionReturn(0);
452 }
453 
454 #undef __FUNCT__
455 #define __FUNCT__ "MatMultTransposeAdd_MPIDense"
456 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
457 {
458   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
459   PetscErrorCode ierr;
460 
461   PetscFunctionBegin;
462   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
463   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
464   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
465   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
466   PetscFunctionReturn(0);
467 }
468 
469 #undef __FUNCT__
470 #define __FUNCT__ "MatGetDiagonal_MPIDense"
471 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
472 {
473   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
474   Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data;
475   PetscErrorCode ierr;
476   PetscInt          len,i,n,m = A->m,radd;
477   PetscScalar  *x,zero = 0.0;
478 
479   PetscFunctionBegin;
480   ierr = VecSet(&zero,v);CHKERRQ(ierr);
481   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
482   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
483   if (n != A->M) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
484   len  = PetscMin(a->A->m,a->A->n);
485   radd = a->rstart*m;
486   for (i=0; i<len; i++) {
487     x[i] = aloc->v[radd + i*m + i];
488   }
489   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
490   PetscFunctionReturn(0);
491 }
492 
493 #undef __FUNCT__
494 #define __FUNCT__ "MatDestroy_MPIDense"
495 PetscErrorCode MatDestroy_MPIDense(Mat mat)
496 {
497   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
498   PetscErrorCode ierr;
499 
500   PetscFunctionBegin;
501 
502 #if defined(PETSC_USE_LOG)
503   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->M,mat->N);
504 #endif
505   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
506   ierr = PetscFree(mdn->rowners);CHKERRQ(ierr);
507   ierr = MatDestroy(mdn->A);CHKERRQ(ierr);
508   if (mdn->lvec)   VecDestroy(mdn->lvec);
509   if (mdn->Mvctx)  VecScatterDestroy(mdn->Mvctx);
510   if (mdn->factor) {
511     if (mdn->factor->temp)   {ierr = PetscFree(mdn->factor->temp);CHKERRQ(ierr);}
512     if (mdn->factor->tag)    {ierr = PetscFree(mdn->factor->tag);CHKERRQ(ierr);}
513     if (mdn->factor->pivots) {ierr = PetscFree(mdn->factor->pivots);CHKERRQ(ierr);}
514     ierr = PetscFree(mdn->factor);CHKERRQ(ierr);
515   }
516   ierr = PetscFree(mdn);CHKERRQ(ierr);
517   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
518   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
519   PetscFunctionReturn(0);
520 }
521 
522 #undef __FUNCT__
523 #define __FUNCT__ "MatView_MPIDense_Binary"
524 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
525 {
526   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
527   PetscErrorCode ierr;
528 
529   PetscFunctionBegin;
530   if (mdn->size == 1) {
531     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
532   }
533   else SETERRQ(PETSC_ERR_SUP,"Only uniprocessor output supported");
534   PetscFunctionReturn(0);
535 }
536 
537 #undef __FUNCT__
538 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket"
539 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
540 {
541   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
542   PetscErrorCode    ierr;
543   PetscMPIInt       size = mdn->size,rank = mdn->rank;
544   PetscViewerType   vtype;
545   PetscTruth        iascii,isdraw;
546   PetscViewer       sviewer;
547   PetscViewerFormat format;
548 
549   PetscFunctionBegin;
550   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
551   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
552   if (iascii) {
553     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
554     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
555     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
556       MatInfo info;
557       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
558       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->m,
559                    (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
560       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
561       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
562       PetscFunctionReturn(0);
563     } else if (format == PETSC_VIEWER_ASCII_INFO) {
564       PetscFunctionReturn(0);
565     }
566   } else if (isdraw) {
567     PetscDraw       draw;
568     PetscTruth isnull;
569 
570     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
571     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
572     if (isnull) PetscFunctionReturn(0);
573   }
574 
575   if (size == 1) {
576     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
577   } else {
578     /* assemble the entire matrix onto first processor. */
579     Mat         A;
580     PetscInt    M = mat->M,N = mat->N,m,row,i,nz;
581     PetscInt    *cols;
582     PetscScalar *vals;
583 
584     if (!rank) {
585       ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr);
586     } else {
587       ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr);
588     }
589     /* Since this is a temporary matrix, MATMPIDENSE instead of A->type_name here is probably acceptable. */
590     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
591     ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL);
592     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
593 
594     /* Copy the matrix ... This isn't the most efficient means,
595        but it's quick for now */
596     A->insertmode = INSERT_VALUES;
597     row = mdn->rstart; m = mdn->A->m;
598     for (i=0; i<m; i++) {
599       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
600       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
601       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
602       row++;
603     }
604 
605     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
606     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
607     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
608     if (!rank) {
609       ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
610     }
611     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
612     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
613     ierr = MatDestroy(A);CHKERRQ(ierr);
614   }
615   PetscFunctionReturn(0);
616 }
617 
618 #undef __FUNCT__
619 #define __FUNCT__ "MatView_MPIDense"
620 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
621 {
622   PetscErrorCode ierr;
623   PetscTruth iascii,isbinary,isdraw,issocket;
624 
625   PetscFunctionBegin;
626 
627   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
628   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
629   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
630   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
631 
632   if (iascii || issocket || isdraw) {
633     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
634   } else if (isbinary) {
635     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
636   } else {
637     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name);
638   }
639   PetscFunctionReturn(0);
640 }
641 
642 #undef __FUNCT__
643 #define __FUNCT__ "MatGetInfo_MPIDense"
644 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
645 {
646   Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
647   Mat          mdn = mat->A;
648   PetscErrorCode ierr;
649   PetscReal    isend[5],irecv[5];
650 
651   PetscFunctionBegin;
652   info->rows_global    = (double)A->M;
653   info->columns_global = (double)A->N;
654   info->rows_local     = (double)A->m;
655   info->columns_local  = (double)A->N;
656   info->block_size     = 1.0;
657   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
658   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
659   isend[3] = info->memory;  isend[4] = info->mallocs;
660   if (flag == MAT_LOCAL) {
661     info->nz_used      = isend[0];
662     info->nz_allocated = isend[1];
663     info->nz_unneeded  = isend[2];
664     info->memory       = isend[3];
665     info->mallocs      = isend[4];
666   } else if (flag == MAT_GLOBAL_MAX) {
667     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr);
668     info->nz_used      = irecv[0];
669     info->nz_allocated = irecv[1];
670     info->nz_unneeded  = irecv[2];
671     info->memory       = irecv[3];
672     info->mallocs      = irecv[4];
673   } else if (flag == MAT_GLOBAL_SUM) {
674     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr);
675     info->nz_used      = irecv[0];
676     info->nz_allocated = irecv[1];
677     info->nz_unneeded  = irecv[2];
678     info->memory       = irecv[3];
679     info->mallocs      = irecv[4];
680   }
681   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
682   info->fill_ratio_needed = 0;
683   info->factor_mallocs    = 0;
684   PetscFunctionReturn(0);
685 }
686 
687 #undef __FUNCT__
688 #define __FUNCT__ "MatSetOption_MPIDense"
689 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op)
690 {
691   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
692   PetscErrorCode ierr;
693 
694   PetscFunctionBegin;
695   switch (op) {
696   case MAT_NO_NEW_NONZERO_LOCATIONS:
697   case MAT_YES_NEW_NONZERO_LOCATIONS:
698   case MAT_NEW_NONZERO_LOCATION_ERR:
699   case MAT_NEW_NONZERO_ALLOCATION_ERR:
700   case MAT_COLUMNS_SORTED:
701   case MAT_COLUMNS_UNSORTED:
702     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
703     break;
704   case MAT_ROW_ORIENTED:
705     a->roworiented = PETSC_TRUE;
706     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
707     break;
708   case MAT_ROWS_SORTED:
709   case MAT_ROWS_UNSORTED:
710   case MAT_YES_NEW_DIAGONALS:
711   case MAT_USE_HASH_TABLE:
712     ierr = PetscLogInfo((A,"MatSetOption_MPIDense:Option ignored\n"));CHKERRQ(ierr);
713     break;
714   case MAT_COLUMN_ORIENTED:
715     a->roworiented = PETSC_FALSE;
716     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
717     break;
718   case MAT_IGNORE_OFF_PROC_ENTRIES:
719     a->donotstash = PETSC_TRUE;
720     break;
721   case MAT_NO_NEW_DIAGONALS:
722     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
723   case MAT_SYMMETRIC:
724   case MAT_STRUCTURALLY_SYMMETRIC:
725   case MAT_NOT_SYMMETRIC:
726   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
727   case MAT_HERMITIAN:
728   case MAT_NOT_HERMITIAN:
729   case MAT_SYMMETRY_ETERNAL:
730   case MAT_NOT_SYMMETRY_ETERNAL:
731     break;
732   default:
733     SETERRQ(PETSC_ERR_SUP,"unknown option");
734   }
735   PetscFunctionReturn(0);
736 }
737 
738 
739 #undef __FUNCT__
740 #define __FUNCT__ "MatDiagonalScale_MPIDense"
741 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
742 {
743   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
744   Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
745   PetscScalar  *l,*r,x,*v;
746   PetscErrorCode ierr;
747   PetscInt          i,j,s2a,s3a,s2,s3,m=mdn->A->m,n=mdn->A->n;
748 
749   PetscFunctionBegin;
750   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
751   if (ll) {
752     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
753     if (s2a != s2) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size");
754     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
755     for (i=0; i<m; i++) {
756       x = l[i];
757       v = mat->v + i;
758       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
759     }
760     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
761     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
762   }
763   if (rr) {
764     ierr = VecGetSize(rr,&s3a);CHKERRQ(ierr);
765     if (s3a != s3) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size");
766     ierr = VecScatterBegin(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
767     ierr = VecScatterEnd(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
768     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
769     for (i=0; i<n; i++) {
770       x = r[i];
771       v = mat->v + i*m;
772       for (j=0; j<m; j++) { (*v++) *= x;}
773     }
774     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
775     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
776   }
777   PetscFunctionReturn(0);
778 }
779 
780 #undef __FUNCT__
781 #define __FUNCT__ "MatNorm_MPIDense"
782 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
783 {
784   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
785   Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
786   PetscErrorCode ierr;
787   PetscInt          i,j;
788   PetscReal    sum = 0.0;
789   PetscScalar  *v = mat->v;
790 
791   PetscFunctionBegin;
792   if (mdn->size == 1) {
793     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
794   } else {
795     if (type == NORM_FROBENIUS) {
796       for (i=0; i<mdn->A->n*mdn->A->m; i++) {
797 #if defined(PETSC_USE_COMPLEX)
798         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
799 #else
800         sum += (*v)*(*v); v++;
801 #endif
802       }
803       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr);
804       *nrm = sqrt(*nrm);
805       ierr = PetscLogFlops(2*mdn->A->n*mdn->A->m);CHKERRQ(ierr);
806     } else if (type == NORM_1) {
807       PetscReal *tmp,*tmp2;
808       ierr = PetscMalloc(2*A->N*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
809       tmp2 = tmp + A->N;
810       ierr = PetscMemzero(tmp,2*A->N*sizeof(PetscReal));CHKERRQ(ierr);
811       *nrm = 0.0;
812       v = mat->v;
813       for (j=0; j<mdn->A->n; j++) {
814         for (i=0; i<mdn->A->m; i++) {
815           tmp[j] += PetscAbsScalar(*v);  v++;
816         }
817       }
818       ierr = MPI_Allreduce(tmp,tmp2,A->N,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr);
819       for (j=0; j<A->N; j++) {
820         if (tmp2[j] > *nrm) *nrm = tmp2[j];
821       }
822       ierr = PetscFree(tmp);CHKERRQ(ierr);
823       ierr = PetscLogFlops(A->n*A->m);CHKERRQ(ierr);
824     } else if (type == NORM_INFINITY) { /* max row norm */
825       PetscReal ntemp;
826       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
827       ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr);
828     } else {
829       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
830     }
831   }
832   PetscFunctionReturn(0);
833 }
834 
835 #undef __FUNCT__
836 #define __FUNCT__ "MatTranspose_MPIDense"
837 PetscErrorCode MatTranspose_MPIDense(Mat A,Mat *matout)
838 {
839   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
840   Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data;
841   Mat          B;
842   PetscInt          M = A->M,N = A->N,m,n,*rwork,rstart = a->rstart;
843   PetscErrorCode ierr;
844   PetscInt          j,i;
845   PetscScalar  *v;
846 
847   PetscFunctionBegin;
848   if (!matout && M != N) {
849     SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place");
850   }
851   ierr = MatCreate(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,&B);CHKERRQ(ierr);
852   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
853   ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr);
854 
855   m = a->A->m; n = a->A->n; v = Aloc->v;
856   ierr = PetscMalloc(n*sizeof(PetscInt),&rwork);CHKERRQ(ierr);
857   for (j=0; j<n; j++) {
858     for (i=0; i<m; i++) rwork[i] = rstart + i;
859     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
860     v   += m;
861   }
862   ierr = PetscFree(rwork);CHKERRQ(ierr);
863   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
864   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
865   if (matout) {
866     *matout = B;
867   } else {
868     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
869   }
870   PetscFunctionReturn(0);
871 }
872 
873 #include "petscblaslapack.h"
874 #undef __FUNCT__
875 #define __FUNCT__ "MatScale_MPIDense"
876 PetscErrorCode MatScale_MPIDense(const PetscScalar *alpha,Mat inA)
877 {
878   Mat_MPIDense *A = (Mat_MPIDense*)inA->data;
879   Mat_SeqDense *a = (Mat_SeqDense*)A->A->data;
880   PetscBLASInt one = 1,nz = (PetscBLASInt)inA->m*inA->N;
881   PetscErrorCode ierr;
882 
883   PetscFunctionBegin;
884   BLASscal_(&nz,(PetscScalar*)alpha,a->v,&one);
885   ierr = PetscLogFlops(nz);CHKERRQ(ierr);
886   PetscFunctionReturn(0);
887 }
888 
889 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
890 
891 #undef __FUNCT__
892 #define __FUNCT__ "MatSetUpPreallocation_MPIDense"
893 PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A)
894 {
895   PetscErrorCode ierr;
896 
897   PetscFunctionBegin;
898   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
899   PetscFunctionReturn(0);
900 }
901 
902 /* -------------------------------------------------------------------*/
903 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
904        MatGetRow_MPIDense,
905        MatRestoreRow_MPIDense,
906        MatMult_MPIDense,
907 /* 4*/ MatMultAdd_MPIDense,
908        MatMultTranspose_MPIDense,
909        MatMultTransposeAdd_MPIDense,
910        0,
911        0,
912        0,
913 /*10*/ 0,
914        0,
915        0,
916        0,
917        MatTranspose_MPIDense,
918 /*15*/ MatGetInfo_MPIDense,
919        0,
920        MatGetDiagonal_MPIDense,
921        MatDiagonalScale_MPIDense,
922        MatNorm_MPIDense,
923 /*20*/ MatAssemblyBegin_MPIDense,
924        MatAssemblyEnd_MPIDense,
925        0,
926        MatSetOption_MPIDense,
927        MatZeroEntries_MPIDense,
928 /*25*/ MatZeroRows_MPIDense,
929        0,
930        0,
931        0,
932        0,
933 /*30*/ MatSetUpPreallocation_MPIDense,
934        0,
935        0,
936        MatGetArray_MPIDense,
937        MatRestoreArray_MPIDense,
938 /*35*/ MatDuplicate_MPIDense,
939        0,
940        0,
941        0,
942        0,
943 /*40*/ 0,
944        MatGetSubMatrices_MPIDense,
945        0,
946        MatGetValues_MPIDense,
947        0,
948 /*45*/ 0,
949        MatScale_MPIDense,
950        0,
951        0,
952        0,
953 /*50*/ 0,
954        0,
955        0,
956        0,
957        0,
958 /*55*/ 0,
959        0,
960        0,
961        0,
962        0,
963 /*60*/ MatGetSubMatrix_MPIDense,
964        MatDestroy_MPIDense,
965        MatView_MPIDense,
966        MatGetPetscMaps_Petsc,
967        0,
968 /*65*/ 0,
969        0,
970        0,
971        0,
972        0,
973 /*70*/ 0,
974        0,
975        0,
976        0,
977        0,
978 /*75*/ 0,
979        0,
980        0,
981        0,
982        0,
983 /*80*/ 0,
984        0,
985        0,
986        0,
987 /*84*/ MatLoad_MPIDense,
988        0,
989        0,
990        0,
991        0,
992        0,
993 /*90*/ 0,
994        0,
995        0,
996        0,
997        0,
998 /*95*/ 0,
999        0,
1000        0,
1001        0};
1002 
1003 EXTERN_C_BEGIN
1004 #undef __FUNCT__
1005 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense"
1006 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1007 {
1008   Mat_MPIDense *a;
1009   PetscErrorCode ierr;
1010 
1011   PetscFunctionBegin;
1012   mat->preallocated = PETSC_TRUE;
1013   /* Note:  For now, when data is specified above, this assumes the user correctly
1014    allocates the local dense storage space.  We should add error checking. */
1015 
1016   a    = (Mat_MPIDense*)mat->data;
1017   ierr = MatCreate(PETSC_COMM_SELF,mat->m,mat->N,mat->m,mat->N,&a->A);CHKERRQ(ierr);
1018   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
1019   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1020   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1021   PetscFunctionReturn(0);
1022 }
1023 EXTERN_C_END
1024 
1025 /*MC
1026    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1027 
1028    Options Database Keys:
1029 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions()
1030 
1031   Level: beginner
1032 
1033 .seealso: MatCreateMPIDense
1034 M*/
1035 
1036 EXTERN_C_BEGIN
1037 #undef __FUNCT__
1038 #define __FUNCT__ "MatCreate_MPIDense"
1039 PetscErrorCode MatCreate_MPIDense(Mat mat)
1040 {
1041   Mat_MPIDense *a;
1042   PetscErrorCode ierr;
1043   PetscInt          i;
1044 
1045   PetscFunctionBegin;
1046   ierr              = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr);
1047   mat->data         = (void*)a;
1048   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1049   mat->factor       = 0;
1050   mat->mapping      = 0;
1051 
1052   a->factor       = 0;
1053   mat->insertmode = NOT_SET_VALUES;
1054   ierr = MPI_Comm_rank(mat->comm,&a->rank);CHKERRQ(ierr);
1055   ierr = MPI_Comm_size(mat->comm,&a->size);CHKERRQ(ierr);
1056 
1057   ierr = PetscSplitOwnership(mat->comm,&mat->m,&mat->M);CHKERRQ(ierr);
1058   ierr = PetscSplitOwnership(mat->comm,&mat->n,&mat->N);CHKERRQ(ierr);
1059   a->nvec = mat->n;
1060 
1061   /* the information in the maps duplicates the information computed below, eventually
1062      we should remove the duplicate information that is not contained in the maps */
1063   ierr = PetscMapCreateMPI(mat->comm,mat->m,mat->M,&mat->rmap);CHKERRQ(ierr);
1064   ierr = PetscMapCreateMPI(mat->comm,mat->n,mat->N,&mat->cmap);CHKERRQ(ierr);
1065 
1066   /* build local table of row and column ownerships */
1067   ierr       = PetscMalloc(2*(a->size+2)*sizeof(PetscInt),&a->rowners);CHKERRQ(ierr);
1068   a->cowners = a->rowners + a->size + 1;
1069   ierr = PetscLogObjectMemory(mat,2*(a->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));CHKERRQ(ierr);
1070   ierr = MPI_Allgather(&mat->m,1,MPIU_INT,a->rowners+1,1,MPIU_INT,mat->comm);CHKERRQ(ierr);
1071   a->rowners[0] = 0;
1072   for (i=2; i<=a->size; i++) {
1073     a->rowners[i] += a->rowners[i-1];
1074   }
1075   a->rstart = a->rowners[a->rank];
1076   a->rend   = a->rowners[a->rank+1];
1077   ierr      = MPI_Allgather(&mat->n,1,MPIU_INT,a->cowners+1,1,MPIU_INT,mat->comm);CHKERRQ(ierr);
1078   a->cowners[0] = 0;
1079   for (i=2; i<=a->size; i++) {
1080     a->cowners[i] += a->cowners[i-1];
1081   }
1082 
1083   /* build cache for off array entries formed */
1084   a->donotstash = PETSC_FALSE;
1085   ierr = MatStashCreate_Private(mat->comm,1,&mat->stash);CHKERRQ(ierr);
1086 
1087   /* stuff used for matrix vector multiply */
1088   a->lvec        = 0;
1089   a->Mvctx       = 0;
1090   a->roworiented = PETSC_TRUE;
1091 
1092   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
1093                                      "MatGetDiagonalBlock_MPIDense",
1094                                      MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr);
1095   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C",
1096                                      "MatMPIDenseSetPreallocation_MPIDense",
1097                                      MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1098   PetscFunctionReturn(0);
1099 }
1100 EXTERN_C_END
1101 
1102 /*MC
1103    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1104 
1105    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1106    and MATMPIDENSE otherwise.
1107 
1108    Options Database Keys:
1109 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1110 
1111   Level: beginner
1112 
1113 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1114 M*/
1115 
1116 EXTERN_C_BEGIN
1117 #undef __FUNCT__
1118 #define __FUNCT__ "MatCreate_Dense"
1119 PetscErrorCode MatCreate_Dense(Mat A)
1120 {
1121   PetscErrorCode ierr;
1122   PetscMPIInt    size;
1123 
1124   PetscFunctionBegin;
1125   ierr = PetscObjectChangeTypeName((PetscObject)A,MATDENSE);CHKERRQ(ierr);
1126   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1127   if (size == 1) {
1128     ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr);
1129   } else {
1130     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
1131   }
1132   PetscFunctionReturn(0);
1133 }
1134 EXTERN_C_END
1135 
1136 #undef __FUNCT__
1137 #define __FUNCT__ "MatMPIDenseSetPreallocation"
1138 /*@C
1139    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1140 
1141    Not collective
1142 
1143    Input Parameters:
1144 .  A - the matrix
1145 -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
1146    to control all matrix memory allocation.
1147 
1148    Notes:
1149    The dense format is fully compatible with standard Fortran 77
1150    storage by columns.
1151 
1152    The data input variable is intended primarily for Fortran programmers
1153    who wish to allocate their own matrix memory space.  Most users should
1154    set data=PETSC_NULL.
1155 
1156    Level: intermediate
1157 
1158 .keywords: matrix,dense, parallel
1159 
1160 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1161 @*/
1162 PetscErrorCode MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data)
1163 {
1164   PetscErrorCode ierr,(*f)(Mat,PetscScalar *);
1165 
1166   PetscFunctionBegin;
1167   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1168   if (f) {
1169     ierr = (*f)(mat,data);CHKERRQ(ierr);
1170   }
1171   PetscFunctionReturn(0);
1172 }
1173 
1174 #undef __FUNCT__
1175 #define __FUNCT__ "MatCreateMPIDense"
1176 /*@C
1177    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
1178 
1179    Collective on MPI_Comm
1180 
1181    Input Parameters:
1182 +  comm - MPI communicator
1183 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1184 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1185 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1186 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1187 -  data - optional location of matrix data.  Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1188    to control all matrix memory allocation.
1189 
1190    Output Parameter:
1191 .  A - the matrix
1192 
1193    Notes:
1194    The dense format is fully compatible with standard Fortran 77
1195    storage by columns.
1196 
1197    The data input variable is intended primarily for Fortran programmers
1198    who wish to allocate their own matrix memory space.  Most users should
1199    set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users).
1200 
1201    The user MUST specify either the local or global matrix dimensions
1202    (possibly both).
1203 
1204    Level: intermediate
1205 
1206 .keywords: matrix,dense, parallel
1207 
1208 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1209 @*/
1210 PetscErrorCode MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1211 {
1212   PetscErrorCode ierr;
1213   PetscMPIInt    size;
1214 
1215   PetscFunctionBegin;
1216   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
1217   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1218   if (size > 1) {
1219     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1220     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1221   } else {
1222     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1223     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1224   }
1225   PetscFunctionReturn(0);
1226 }
1227 
1228 #undef __FUNCT__
1229 #define __FUNCT__ "MatDuplicate_MPIDense"
1230 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1231 {
1232   Mat          mat;
1233   Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data;
1234   PetscErrorCode ierr;
1235 
1236   PetscFunctionBegin;
1237   *newmat       = 0;
1238   ierr = MatCreate(A->comm,A->m,A->n,A->M,A->N,&mat);CHKERRQ(ierr);
1239   ierr = MatSetType(mat,A->type_name);CHKERRQ(ierr);
1240   ierr              = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr);
1241   mat->data         = (void*)a;
1242   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1243   mat->factor       = A->factor;
1244   mat->assembled    = PETSC_TRUE;
1245   mat->preallocated = PETSC_TRUE;
1246 
1247   a->rstart       = oldmat->rstart;
1248   a->rend         = oldmat->rend;
1249   a->size         = oldmat->size;
1250   a->rank         = oldmat->rank;
1251   mat->insertmode = NOT_SET_VALUES;
1252   a->nvec         = oldmat->nvec;
1253   a->donotstash   = oldmat->donotstash;
1254   ierr            = PetscMalloc((a->size+1)*sizeof(PetscInt),&a->rowners);CHKERRQ(ierr);
1255   ierr = PetscLogObjectMemory(mat,(a->size+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));CHKERRQ(ierr);
1256   ierr = PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
1257   ierr = MatStashCreate_Private(A->comm,1,&mat->stash);CHKERRQ(ierr);
1258 
1259   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1260   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1261   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1262   *newmat = mat;
1263   PetscFunctionReturn(0);
1264 }
1265 
1266 #include "petscsys.h"
1267 
1268 #undef __FUNCT__
1269 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile"
1270 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,const MatType type,Mat *newmat)
1271 {
1272   PetscErrorCode ierr;
1273   PetscMPIInt    rank,size;
1274   PetscInt            *rowners,i,m,nz,j;
1275   PetscScalar    *array,*vals,*vals_ptr;
1276   MPI_Status     status;
1277 
1278   PetscFunctionBegin;
1279   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1280   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1281 
1282   /* determine ownership of all rows */
1283   m          = M/size + ((M % size) > rank);
1284   ierr       = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1285   ierr       = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
1286   rowners[0] = 0;
1287   for (i=2; i<=size; i++) {
1288     rowners[i] += rowners[i-1];
1289   }
1290 
1291   ierr = MatCreate(comm,m,PETSC_DECIDE,M,N,newmat);CHKERRQ(ierr);
1292   ierr = MatSetType(*newmat,type);CHKERRQ(ierr);
1293   ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr);
1294   ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr);
1295 
1296   if (!rank) {
1297     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1298 
1299     /* read in my part of the matrix numerical values  */
1300     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1301 
1302     /* insert into matrix-by row (this is why cannot directly read into array */
1303     vals_ptr = vals;
1304     for (i=0; i<m; i++) {
1305       for (j=0; j<N; j++) {
1306         array[i + j*m] = *vals_ptr++;
1307       }
1308     }
1309 
1310     /* read in other processors and ship out */
1311     for (i=1; i<size; i++) {
1312       nz   = (rowners[i+1] - rowners[i])*N;
1313       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1314       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr);
1315     }
1316   } else {
1317     /* receive numeric values */
1318     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1319 
1320     /* receive message of values*/
1321     ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr);
1322 
1323     /* insert into matrix-by row (this is why cannot directly read into array */
1324     vals_ptr = vals;
1325     for (i=0; i<m; i++) {
1326       for (j=0; j<N; j++) {
1327         array[i + j*m] = *vals_ptr++;
1328       }
1329     }
1330   }
1331   ierr = PetscFree(rowners);CHKERRQ(ierr);
1332   ierr = PetscFree(vals);CHKERRQ(ierr);
1333   ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1334   ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1335   PetscFunctionReturn(0);
1336 }
1337 
1338 #undef __FUNCT__
1339 #define __FUNCT__ "MatLoad_MPIDense"
1340 PetscErrorCode MatLoad_MPIDense(PetscViewer viewer,const MatType type,Mat *newmat)
1341 {
1342   Mat            A;
1343   PetscScalar    *vals,*svals;
1344   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1345   MPI_Status     status;
1346   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz;
1347   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1348   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1349   PetscInt       i,nz,j,rstart,rend;
1350   int            fd;
1351   PetscErrorCode ierr;
1352 
1353   PetscFunctionBegin;
1354   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1355   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1356   if (!rank) {
1357     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1358     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1359     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1360   }
1361 
1362   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1363   M = header[1]; N = header[2]; nz = header[3];
1364 
1365   /*
1366        Handle case where matrix is stored on disk as a dense matrix
1367   */
1368   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1369     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,type,newmat);CHKERRQ(ierr);
1370     PetscFunctionReturn(0);
1371   }
1372 
1373   /* determine ownership of all rows */
1374   m          = M/size + ((M % size) > rank);
1375   ierr       = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1376   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1377   rowners[0] = 0;
1378   for (i=2; i<=size; i++) {
1379     rowners[i] += rowners[i-1];
1380   }
1381   rstart = rowners[rank];
1382   rend   = rowners[rank+1];
1383 
1384   /* distribute row lengths to all processors */
1385   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&ourlens);CHKERRQ(ierr);
1386   offlens = ourlens + (rend-rstart);
1387   if (!rank) {
1388     ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
1389     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1390     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
1391     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1392     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1393     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1394   } else {
1395     ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1396   }
1397 
1398   if (!rank) {
1399     /* calculate the number of nonzeros on each processor */
1400     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
1401     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1402     for (i=0; i<size; i++) {
1403       for (j=rowners[i]; j< rowners[i+1]; j++) {
1404         procsnz[i] += rowlengths[j];
1405       }
1406     }
1407     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1408 
1409     /* determine max buffer needed and allocate it */
1410     maxnz = 0;
1411     for (i=0; i<size; i++) {
1412       maxnz = PetscMax(maxnz,procsnz[i]);
1413     }
1414     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1415 
1416     /* read in my part of the matrix column indices  */
1417     nz = procsnz[0];
1418     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1419     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1420 
1421     /* read in every one elses and ship off */
1422     for (i=1; i<size; i++) {
1423       nz   = procsnz[i];
1424       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1425       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1426     }
1427     ierr = PetscFree(cols);CHKERRQ(ierr);
1428   } else {
1429     /* determine buffer space needed for message */
1430     nz = 0;
1431     for (i=0; i<m; i++) {
1432       nz += ourlens[i];
1433     }
1434     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1435 
1436     /* receive message of column indices*/
1437     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1438     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1439     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1440   }
1441 
1442   /* loop over local rows, determining number of off diagonal entries */
1443   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
1444   jj = 0;
1445   for (i=0; i<m; i++) {
1446     for (j=0; j<ourlens[i]; j++) {
1447       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1448       jj++;
1449     }
1450   }
1451 
1452   /* create our matrix */
1453   for (i=0; i<m; i++) {
1454     ourlens[i] -= offlens[i];
1455   }
1456   ierr = MatCreate(comm,m,PETSC_DECIDE,M,N,newmat);CHKERRQ(ierr);
1457   ierr = MatSetType(*newmat,type);CHKERRQ(ierr);
1458   ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr);
1459   A = *newmat;
1460   for (i=0; i<m; i++) {
1461     ourlens[i] += offlens[i];
1462   }
1463 
1464   if (!rank) {
1465     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1466 
1467     /* read in my part of the matrix numerical values  */
1468     nz = procsnz[0];
1469     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1470 
1471     /* insert into matrix */
1472     jj      = rstart;
1473     smycols = mycols;
1474     svals   = vals;
1475     for (i=0; i<m; i++) {
1476       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1477       smycols += ourlens[i];
1478       svals   += ourlens[i];
1479       jj++;
1480     }
1481 
1482     /* read in other processors and ship out */
1483     for (i=1; i<size; i++) {
1484       nz   = procsnz[i];
1485       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1486       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1487     }
1488     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1489   } else {
1490     /* receive numeric values */
1491     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1492 
1493     /* receive message of values*/
1494     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1495     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1496     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1497 
1498     /* insert into matrix */
1499     jj      = rstart;
1500     smycols = mycols;
1501     svals   = vals;
1502     for (i=0; i<m; i++) {
1503       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1504       smycols += ourlens[i];
1505       svals   += ourlens[i];
1506       jj++;
1507     }
1508   }
1509   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1510   ierr = PetscFree(vals);CHKERRQ(ierr);
1511   ierr = PetscFree(mycols);CHKERRQ(ierr);
1512   ierr = PetscFree(rowners);CHKERRQ(ierr);
1513 
1514   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1515   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1516   PetscFunctionReturn(0);
1517 }
1518 
1519 
1520 
1521 
1522 
1523