xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 95fbd943d7c27f52d1b744f92ae57b2fc0bd05b9)
1 #define PETSCMAT_DLL
2 
3 /*
4    Basic functions for basic parallel dense matrices.
5 */
6 
7 #include "src/mat/impls/dense/mpi/mpidense.h"    /*I   "petscmat.h"  I*/
8 
9 #undef __FUNCT__
10 #define __FUNCT__ "MatDenseGetLocalMatrix"
11 /*@
12 
13       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
14               matrix that represents the operator. For sequential matrices it returns itself.
15 
16     Input Parameter:
17 .      A - the Seq or MPI dense matrix
18 
19     Output Parameter:
20 .      B - the inner matrix
21 
22 @*/
23 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
24 {
25   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
26   PetscErrorCode ierr;
27   PetscTruth     flg;
28   PetscMPIInt    size;
29 
30   PetscFunctionBegin;
31   ierr = PetscTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
32   if (!flg) {  /* this check sucks! */
33     ierr = PetscTypeCompare((PetscObject)A,MATDENSE,&flg);CHKERRQ(ierr);
34     if (flg) {
35       ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
36       if (size == 1) flg = PETSC_FALSE;
37     }
38   }
39   if (flg) {
40     *B = mat->A;
41   } else {
42     *B = A;
43   }
44   PetscFunctionReturn(0);
45 }
46 
47 #undef __FUNCT__
48 #define __FUNCT__ "MatGetRow_MPIDense"
49 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
50 {
51   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
52   PetscErrorCode ierr;
53   PetscInt       lrow,rstart = mat->rstart,rend = mat->rend;
54 
55   PetscFunctionBegin;
56   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows")
57   lrow = row - rstart;
58   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr);
59   PetscFunctionReturn(0);
60 }
61 
62 #undef __FUNCT__
63 #define __FUNCT__ "MatRestoreRow_MPIDense"
64 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
65 {
66   PetscErrorCode ierr;
67 
68   PetscFunctionBegin;
69   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
70   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
71   PetscFunctionReturn(0);
72 }
73 
74 EXTERN_C_BEGIN
75 #undef __FUNCT__
76 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense"
77 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B)
78 {
79   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
80   PetscErrorCode ierr;
81   PetscInt       m = A->m,rstart = mdn->rstart;
82   PetscScalar    *array;
83   MPI_Comm       comm;
84 
85   PetscFunctionBegin;
86   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported.");
87 
88   /* The reuse aspect is not implemented efficiently */
89   if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);}
90 
91   ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
92   ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr);
93   ierr = MatCreate(comm,B);CHKERRQ(ierr);
94   ierr = MatSetSizes(*B,m,m,m,m);CHKERRQ(ierr);
95   ierr = MatSetType(*B,mdn->A->type_name);CHKERRQ(ierr);
96   ierr = MatSeqDenseSetPreallocation(*B,array+m*rstart);CHKERRQ(ierr);
97   ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr);
98   ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
99   ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
100 
101   *iscopy = PETSC_TRUE;
102   PetscFunctionReturn(0);
103 }
104 EXTERN_C_END
105 
106 #undef __FUNCT__
107 #define __FUNCT__ "MatSetValues_MPIDense"
108 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
109 {
110   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
111   PetscErrorCode ierr;
112   PetscInt       i,j,rstart = A->rstart,rend = A->rend,row;
113   PetscTruth     roworiented = A->roworiented;
114 
115   PetscFunctionBegin;
116   for (i=0; i<m; i++) {
117     if (idxm[i] < 0) continue;
118     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
119     if (idxm[i] >= rstart && idxm[i] < rend) {
120       row = idxm[i] - rstart;
121       if (roworiented) {
122         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
123       } else {
124         for (j=0; j<n; j++) {
125           if (idxn[j] < 0) continue;
126           if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
127           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
128         }
129       }
130     } else {
131       if (!A->donotstash) {
132         if (roworiented) {
133           ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr);
134         } else {
135           ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr);
136         }
137       }
138     }
139   }
140   PetscFunctionReturn(0);
141 }
142 
143 #undef __FUNCT__
144 #define __FUNCT__ "MatGetValues_MPIDense"
145 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
146 {
147   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
148   PetscErrorCode ierr;
149   PetscInt       i,j,rstart = mdn->rstart,rend = mdn->rend,row;
150 
151   PetscFunctionBegin;
152   for (i=0; i<m; i++) {
153     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
154     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
155     if (idxm[i] >= rstart && idxm[i] < rend) {
156       row = idxm[i] - rstart;
157       for (j=0; j<n; j++) {
158         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
159         if (idxn[j] >= mat->N) {
160           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
161         }
162         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
163       }
164     } else {
165       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
166     }
167   }
168   PetscFunctionReturn(0);
169 }
170 
171 #undef __FUNCT__
172 #define __FUNCT__ "MatGetArray_MPIDense"
173 PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[])
174 {
175   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
176   PetscErrorCode ierr;
177 
178   PetscFunctionBegin;
179   ierr = MatGetArray(a->A,array);CHKERRQ(ierr);
180   PetscFunctionReturn(0);
181 }
182 
183 #undef __FUNCT__
184 #define __FUNCT__ "MatGetSubMatrix_MPIDense"
185 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B)
186 {
187   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data,*newmatd;
188   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
189   PetscErrorCode ierr;
190   PetscInt       i,j,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols;
191   PetscScalar    *av,*bv,*v = lmat->v;
192   Mat            newmat;
193 
194   PetscFunctionBegin;
195   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
196   ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr);
197   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
198   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
199 
200   /* No parallel redistribution currently supported! Should really check each index set
201      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
202      original matrix! */
203 
204   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
205   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
206 
207   /* Check submatrix call */
208   if (scall == MAT_REUSE_MATRIX) {
209     /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
210     /* Really need to test rows and column sizes! */
211     newmat = *B;
212   } else {
213     /* Create and fill new matrix */
214     ierr = MatCreate(A->comm,&newmat);CHKERRQ(ierr);
215     ierr = MatSetSizes(newmat,nrows,cs,PETSC_DECIDE,ncols);CHKERRQ(ierr);
216     ierr = MatSetType(newmat,A->type_name);CHKERRQ(ierr);
217     ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr);
218   }
219 
220   /* Now extract the data pointers and do the copy, column at a time */
221   newmatd = (Mat_MPIDense*)newmat->data;
222   bv      = ((Mat_SeqDense *)newmatd->A->data)->v;
223 
224   for (i=0; i<ncols; i++) {
225     av = v + nlrows*icol[i];
226     for (j=0; j<nrows; j++) {
227       *bv++ = av[irow[j] - rstart];
228     }
229   }
230 
231   /* Assemble the matrices so that the correct flags are set */
232   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
233   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
234 
235   /* Free work space */
236   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
237   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
238   *B = newmat;
239   PetscFunctionReturn(0);
240 }
241 
242 #undef __FUNCT__
243 #define __FUNCT__ "MatRestoreArray_MPIDense"
244 PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[])
245 {
246   PetscFunctionBegin;
247   PetscFunctionReturn(0);
248 }
249 
250 #undef __FUNCT__
251 #define __FUNCT__ "MatAssemblyBegin_MPIDense"
252 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
253 {
254   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
255   MPI_Comm       comm = mat->comm;
256   PetscErrorCode ierr;
257   PetscInt       nstash,reallocs;
258   InsertMode     addv;
259 
260   PetscFunctionBegin;
261   /* make sure all processors are either in INSERTMODE or ADDMODE */
262   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
263   if (addv == (ADD_VALUES|INSERT_VALUES)) {
264     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
265   }
266   mat->insertmode = addv; /* in case this processor had no cache */
267 
268   ierr = MatStashScatterBegin_Private(&mat->stash,mdn->rowners);CHKERRQ(ierr);
269   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
270   ierr = PetscLogInfo((mdn->A,"MatAssemblyBegin_MPIDense:Stash has %D entries, uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr);
271   PetscFunctionReturn(0);
272 }
273 
274 #undef __FUNCT__
275 #define __FUNCT__ "MatAssemblyEnd_MPIDense"
276 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
277 {
278   Mat_MPIDense    *mdn=(Mat_MPIDense*)mat->data;
279   PetscErrorCode  ierr;
280   PetscInt        i,*row,*col,flg,j,rstart,ncols;
281   PetscMPIInt     n;
282   PetscScalar     *val;
283   InsertMode      addv=mat->insertmode;
284 
285   PetscFunctionBegin;
286   /*  wait on receives */
287   while (1) {
288     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
289     if (!flg) break;
290 
291     for (i=0; i<n;) {
292       /* Now identify the consecutive vals belonging to the same row */
293       for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
294       if (j < n) ncols = j-i;
295       else       ncols = n-i;
296       /* Now assemble all these values with a single function call */
297       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
298       i = j;
299     }
300   }
301   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
302 
303   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
304   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
305 
306   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
307     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
308   }
309   PetscFunctionReturn(0);
310 }
311 
312 #undef __FUNCT__
313 #define __FUNCT__ "MatZeroEntries_MPIDense"
314 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
315 {
316   PetscErrorCode ierr;
317   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
318 
319   PetscFunctionBegin;
320   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
321   PetscFunctionReturn(0);
322 }
323 
324 /* the code does not do the diagonal entries correctly unless the
325    matrix is square and the column and row owerships are identical.
326    This is a BUG. The only way to fix it seems to be to access
327    mdn->A and mdn->B directly and not through the MatZeroRows()
328    routine.
329 */
330 #undef __FUNCT__
331 #define __FUNCT__ "MatZeroRows_MPIDense"
332 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
333 {
334   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
335   PetscErrorCode ierr;
336   PetscInt       i,*owners = l->rowners;
337   PetscInt       *nprocs,j,idx,nsends;
338   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
339   PetscInt       *rvalues,tag = A->tag,count,base,slen,*source;
340   PetscInt       *lens,*lrows,*values;
341   PetscMPIInt    n,imdex,rank = l->rank,size = l->size;
342   MPI_Comm       comm = A->comm;
343   MPI_Request    *send_waits,*recv_waits;
344   MPI_Status     recv_status,*send_status;
345   PetscTruth     found;
346 
347   PetscFunctionBegin;
348   /*  first count number of contributors to each processor */
349   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
350   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
351   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
352   for (i=0; i<N; i++) {
353     idx = rows[i];
354     found = PETSC_FALSE;
355     for (j=0; j<size; j++) {
356       if (idx >= owners[j] && idx < owners[j+1]) {
357         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
358       }
359     }
360     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
361   }
362   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
363 
364   /* inform other processors of number of messages and max length*/
365   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
366 
367   /* post receives:   */
368   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
369   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
370   for (i=0; i<nrecvs; i++) {
371     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
372   }
373 
374   /* do sends:
375       1) starts[i] gives the starting index in svalues for stuff going to
376          the ith processor
377   */
378   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
379   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
380   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
381   starts[0]  = 0;
382   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
383   for (i=0; i<N; i++) {
384     svalues[starts[owner[i]]++] = rows[i];
385   }
386 
387   starts[0] = 0;
388   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
389   count = 0;
390   for (i=0; i<size; i++) {
391     if (nprocs[2*i+1]) {
392       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
393     }
394   }
395   ierr = PetscFree(starts);CHKERRQ(ierr);
396 
397   base = owners[rank];
398 
399   /*  wait on receives */
400   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
401   source = lens + nrecvs;
402   count  = nrecvs; slen = 0;
403   while (count) {
404     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
405     /* unpack receives into our local space */
406     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
407     source[imdex]  = recv_status.MPI_SOURCE;
408     lens[imdex]    = n;
409     slen += n;
410     count--;
411   }
412   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
413 
414   /* move the data into the send scatter */
415   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
416   count = 0;
417   for (i=0; i<nrecvs; i++) {
418     values = rvalues + i*nmax;
419     for (j=0; j<lens[i]; j++) {
420       lrows[count++] = values[j] - base;
421     }
422   }
423   ierr = PetscFree(rvalues);CHKERRQ(ierr);
424   ierr = PetscFree(lens);CHKERRQ(ierr);
425   ierr = PetscFree(owner);CHKERRQ(ierr);
426   ierr = PetscFree(nprocs);CHKERRQ(ierr);
427 
428   /* actually zap the local rows */
429   ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
430   ierr = PetscFree(lrows);CHKERRQ(ierr);
431 
432   /* wait on sends */
433   if (nsends) {
434     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
435     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
436     ierr = PetscFree(send_status);CHKERRQ(ierr);
437   }
438   ierr = PetscFree(send_waits);CHKERRQ(ierr);
439   ierr = PetscFree(svalues);CHKERRQ(ierr);
440 
441   PetscFunctionReturn(0);
442 }
443 
444 #undef __FUNCT__
445 #define __FUNCT__ "MatMult_MPIDense"
446 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
447 {
448   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
449   PetscErrorCode ierr;
450 
451   PetscFunctionBegin;
452   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
453   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
454   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
455   PetscFunctionReturn(0);
456 }
457 
458 #undef __FUNCT__
459 #define __FUNCT__ "MatMultAdd_MPIDense"
460 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
461 {
462   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
463   PetscErrorCode ierr;
464 
465   PetscFunctionBegin;
466   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
467   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
468   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
469   PetscFunctionReturn(0);
470 }
471 
472 #undef __FUNCT__
473 #define __FUNCT__ "MatMultTranspose_MPIDense"
474 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
475 {
476   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
477   PetscErrorCode ierr;
478   PetscScalar    zero = 0.0;
479 
480   PetscFunctionBegin;
481   ierr = VecSet(yy,zero);CHKERRQ(ierr);
482   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
483   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
484   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
485   PetscFunctionReturn(0);
486 }
487 
488 #undef __FUNCT__
489 #define __FUNCT__ "MatMultTransposeAdd_MPIDense"
490 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
491 {
492   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
493   PetscErrorCode ierr;
494 
495   PetscFunctionBegin;
496   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
497   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
498   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
499   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
500   PetscFunctionReturn(0);
501 }
502 
503 #undef __FUNCT__
504 #define __FUNCT__ "MatGetDiagonal_MPIDense"
505 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
506 {
507   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
508   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
509   PetscErrorCode ierr;
510   PetscInt       len,i,n,m = A->m,radd;
511   PetscScalar    *x,zero = 0.0;
512 
513   PetscFunctionBegin;
514   ierr = VecSet(v,zero);CHKERRQ(ierr);
515   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
516   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
517   if (n != A->M) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
518   len  = PetscMin(a->A->m,a->A->n);
519   radd = a->rstart*m;
520   for (i=0; i<len; i++) {
521     x[i] = aloc->v[radd + i*m + i];
522   }
523   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
524   PetscFunctionReturn(0);
525 }
526 
527 #undef __FUNCT__
528 #define __FUNCT__ "MatDestroy_MPIDense"
529 PetscErrorCode MatDestroy_MPIDense(Mat mat)
530 {
531   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
532   PetscErrorCode ierr;
533 
534   PetscFunctionBegin;
535 
536 #if defined(PETSC_USE_LOG)
537   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->M,mat->N);
538 #endif
539   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
540   ierr = PetscFree(mdn->rowners);CHKERRQ(ierr);
541   ierr = MatDestroy(mdn->A);CHKERRQ(ierr);
542   if (mdn->lvec)   VecDestroy(mdn->lvec);
543   if (mdn->Mvctx)  VecScatterDestroy(mdn->Mvctx);
544   if (mdn->factor) {
545     if (mdn->factor->temp)   {ierr = PetscFree(mdn->factor->temp);CHKERRQ(ierr);}
546     if (mdn->factor->tag)    {ierr = PetscFree(mdn->factor->tag);CHKERRQ(ierr);}
547     if (mdn->factor->pivots) {ierr = PetscFree(mdn->factor->pivots);CHKERRQ(ierr);}
548     ierr = PetscFree(mdn->factor);CHKERRQ(ierr);
549   }
550   ierr = PetscFree(mdn);CHKERRQ(ierr);
551   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
552   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
553   PetscFunctionReturn(0);
554 }
555 
556 #undef __FUNCT__
557 #define __FUNCT__ "MatView_MPIDense_Binary"
558 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
559 {
560   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
561   PetscErrorCode ierr;
562 
563   PetscFunctionBegin;
564   if (mdn->size == 1) {
565     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
566   }
567   else SETERRQ(PETSC_ERR_SUP,"Only uniprocessor output supported");
568   PetscFunctionReturn(0);
569 }
570 
571 #undef __FUNCT__
572 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket"
573 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
574 {
575   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
576   PetscErrorCode    ierr;
577   PetscMPIInt       size = mdn->size,rank = mdn->rank;
578   PetscViewerType   vtype;
579   PetscTruth        iascii,isdraw;
580   PetscViewer       sviewer;
581   PetscViewerFormat format;
582 
583   PetscFunctionBegin;
584   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
585   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
586   if (iascii) {
587     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
588     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
589     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
590       MatInfo info;
591       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
592       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->m,
593                    (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
594       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
595       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
596       PetscFunctionReturn(0);
597     } else if (format == PETSC_VIEWER_ASCII_INFO) {
598       PetscFunctionReturn(0);
599     }
600   } else if (isdraw) {
601     PetscDraw       draw;
602     PetscTruth isnull;
603 
604     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
605     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
606     if (isnull) PetscFunctionReturn(0);
607   }
608 
609   if (size == 1) {
610     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
611   } else {
612     /* assemble the entire matrix onto first processor. */
613     Mat         A;
614     PetscInt    M = mat->M,N = mat->N,m,row,i,nz;
615     PetscInt    *cols;
616     PetscScalar *vals;
617 
618     ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr);
619     if (!rank) {
620       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
621     } else {
622       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
623     }
624     /* Since this is a temporary matrix, MATMPIDENSE instead of A->type_name here is probably acceptable. */
625     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
626     ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL);
627     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
628 
629     /* Copy the matrix ... This isn't the most efficient means,
630        but it's quick for now */
631     A->insertmode = INSERT_VALUES;
632     row = mdn->rstart; m = mdn->A->m;
633     for (i=0; i<m; i++) {
634       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
635       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
636       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
637       row++;
638     }
639 
640     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
641     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
642     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
643     if (!rank) {
644       ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
645     }
646     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
647     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
648     ierr = MatDestroy(A);CHKERRQ(ierr);
649   }
650   PetscFunctionReturn(0);
651 }
652 
653 #undef __FUNCT__
654 #define __FUNCT__ "MatView_MPIDense"
655 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
656 {
657   PetscErrorCode ierr;
658   PetscTruth     iascii,isbinary,isdraw,issocket;
659 
660   PetscFunctionBegin;
661 
662   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
663   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
664   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
665   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
666 
667   if (iascii || issocket || isdraw) {
668     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
669   } else if (isbinary) {
670     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
671   } else {
672     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name);
673   }
674   PetscFunctionReturn(0);
675 }
676 
677 #undef __FUNCT__
678 #define __FUNCT__ "MatGetInfo_MPIDense"
679 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
680 {
681   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
682   Mat            mdn = mat->A;
683   PetscErrorCode ierr;
684   PetscReal      isend[5],irecv[5];
685 
686   PetscFunctionBegin;
687   info->rows_global    = (double)A->M;
688   info->columns_global = (double)A->N;
689   info->rows_local     = (double)A->m;
690   info->columns_local  = (double)A->N;
691   info->block_size     = 1.0;
692   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
693   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
694   isend[3] = info->memory;  isend[4] = info->mallocs;
695   if (flag == MAT_LOCAL) {
696     info->nz_used      = isend[0];
697     info->nz_allocated = isend[1];
698     info->nz_unneeded  = isend[2];
699     info->memory       = isend[3];
700     info->mallocs      = isend[4];
701   } else if (flag == MAT_GLOBAL_MAX) {
702     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr);
703     info->nz_used      = irecv[0];
704     info->nz_allocated = irecv[1];
705     info->nz_unneeded  = irecv[2];
706     info->memory       = irecv[3];
707     info->mallocs      = irecv[4];
708   } else if (flag == MAT_GLOBAL_SUM) {
709     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr);
710     info->nz_used      = irecv[0];
711     info->nz_allocated = irecv[1];
712     info->nz_unneeded  = irecv[2];
713     info->memory       = irecv[3];
714     info->mallocs      = irecv[4];
715   }
716   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
717   info->fill_ratio_needed = 0;
718   info->factor_mallocs    = 0;
719   PetscFunctionReturn(0);
720 }
721 
722 #undef __FUNCT__
723 #define __FUNCT__ "MatSetOption_MPIDense"
724 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op)
725 {
726   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
727   PetscErrorCode ierr;
728 
729   PetscFunctionBegin;
730   switch (op) {
731   case MAT_NO_NEW_NONZERO_LOCATIONS:
732   case MAT_YES_NEW_NONZERO_LOCATIONS:
733   case MAT_NEW_NONZERO_LOCATION_ERR:
734   case MAT_NEW_NONZERO_ALLOCATION_ERR:
735   case MAT_COLUMNS_SORTED:
736   case MAT_COLUMNS_UNSORTED:
737     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
738     break;
739   case MAT_ROW_ORIENTED:
740     a->roworiented = PETSC_TRUE;
741     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
742     break;
743   case MAT_ROWS_SORTED:
744   case MAT_ROWS_UNSORTED:
745   case MAT_YES_NEW_DIAGONALS:
746   case MAT_USE_HASH_TABLE:
747     ierr = PetscLogInfo((A,"MatSetOption_MPIDense:Option ignored\n"));CHKERRQ(ierr);
748     break;
749   case MAT_COLUMN_ORIENTED:
750     a->roworiented = PETSC_FALSE;
751     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
752     break;
753   case MAT_IGNORE_OFF_PROC_ENTRIES:
754     a->donotstash = PETSC_TRUE;
755     break;
756   case MAT_NO_NEW_DIAGONALS:
757     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
758   case MAT_SYMMETRIC:
759   case MAT_STRUCTURALLY_SYMMETRIC:
760   case MAT_NOT_SYMMETRIC:
761   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
762   case MAT_HERMITIAN:
763   case MAT_NOT_HERMITIAN:
764   case MAT_SYMMETRY_ETERNAL:
765   case MAT_NOT_SYMMETRY_ETERNAL:
766     break;
767   default:
768     SETERRQ(PETSC_ERR_SUP,"unknown option");
769   }
770   PetscFunctionReturn(0);
771 }
772 
773 
774 #undef __FUNCT__
775 #define __FUNCT__ "MatDiagonalScale_MPIDense"
776 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
777 {
778   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
779   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
780   PetscScalar    *l,*r,x,*v;
781   PetscErrorCode ierr;
782   PetscInt       i,j,s2a,s3a,s2,s3,m=mdn->A->m,n=mdn->A->n;
783 
784   PetscFunctionBegin;
785   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
786   if (ll) {
787     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
788     if (s2a != s2) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size");
789     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
790     for (i=0; i<m; i++) {
791       x = l[i];
792       v = mat->v + i;
793       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
794     }
795     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
796     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
797   }
798   if (rr) {
799     ierr = VecGetSize(rr,&s3a);CHKERRQ(ierr);
800     if (s3a != s3) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size");
801     ierr = VecScatterBegin(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
802     ierr = VecScatterEnd(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
803     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
804     for (i=0; i<n; i++) {
805       x = r[i];
806       v = mat->v + i*m;
807       for (j=0; j<m; j++) { (*v++) *= x;}
808     }
809     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
810     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
811   }
812   PetscFunctionReturn(0);
813 }
814 
815 #undef __FUNCT__
816 #define __FUNCT__ "MatNorm_MPIDense"
817 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
818 {
819   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
820   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
821   PetscErrorCode ierr;
822   PetscInt       i,j;
823   PetscReal      sum = 0.0;
824   PetscScalar    *v = mat->v;
825 
826   PetscFunctionBegin;
827   if (mdn->size == 1) {
828     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
829   } else {
830     if (type == NORM_FROBENIUS) {
831       for (i=0; i<mdn->A->n*mdn->A->m; i++) {
832 #if defined(PETSC_USE_COMPLEX)
833         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
834 #else
835         sum += (*v)*(*v); v++;
836 #endif
837       }
838       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr);
839       *nrm = sqrt(*nrm);
840       ierr = PetscLogFlops(2*mdn->A->n*mdn->A->m);CHKERRQ(ierr);
841     } else if (type == NORM_1) {
842       PetscReal *tmp,*tmp2;
843       ierr = PetscMalloc(2*A->N*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
844       tmp2 = tmp + A->N;
845       ierr = PetscMemzero(tmp,2*A->N*sizeof(PetscReal));CHKERRQ(ierr);
846       *nrm = 0.0;
847       v = mat->v;
848       for (j=0; j<mdn->A->n; j++) {
849         for (i=0; i<mdn->A->m; i++) {
850           tmp[j] += PetscAbsScalar(*v);  v++;
851         }
852       }
853       ierr = MPI_Allreduce(tmp,tmp2,A->N,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr);
854       for (j=0; j<A->N; j++) {
855         if (tmp2[j] > *nrm) *nrm = tmp2[j];
856       }
857       ierr = PetscFree(tmp);CHKERRQ(ierr);
858       ierr = PetscLogFlops(A->n*A->m);CHKERRQ(ierr);
859     } else if (type == NORM_INFINITY) { /* max row norm */
860       PetscReal ntemp;
861       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
862       ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr);
863     } else {
864       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
865     }
866   }
867   PetscFunctionReturn(0);
868 }
869 
870 #undef __FUNCT__
871 #define __FUNCT__ "MatTranspose_MPIDense"
872 PetscErrorCode MatTranspose_MPIDense(Mat A,Mat *matout)
873 {
874   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
875   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
876   Mat            B;
877   PetscInt       M = A->M,N = A->N,m,n,*rwork,rstart = a->rstart;
878   PetscErrorCode ierr;
879   PetscInt       j,i;
880   PetscScalar    *v;
881 
882   PetscFunctionBegin;
883   if (!matout && M != N) {
884     SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place");
885   }
886   ierr = MatCreate(A->comm,&B);CHKERRQ(ierr);
887   ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,N,M);CHKERRQ(ierr);
888   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
889   ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr);
890 
891   m = a->A->m; n = a->A->n; v = Aloc->v;
892   ierr = PetscMalloc(n*sizeof(PetscInt),&rwork);CHKERRQ(ierr);
893   for (j=0; j<n; j++) {
894     for (i=0; i<m; i++) rwork[i] = rstart + i;
895     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
896     v   += m;
897   }
898   ierr = PetscFree(rwork);CHKERRQ(ierr);
899   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
900   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
901   if (matout) {
902     *matout = B;
903   } else {
904     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
905   }
906   PetscFunctionReturn(0);
907 }
908 
909 #include "petscblaslapack.h"
910 #undef __FUNCT__
911 #define __FUNCT__ "MatScale_MPIDense"
912 PetscErrorCode MatScale_MPIDense(Mat inA,PetscScalar alpha)
913 {
914   Mat_MPIDense   *A = (Mat_MPIDense*)inA->data;
915   Mat_SeqDense   *a = (Mat_SeqDense*)A->A->data;
916   PetscScalar    oalpha = alpha;
917   PetscBLASInt   one = 1,nz = (PetscBLASInt)inA->m*inA->N;
918   PetscErrorCode ierr;
919 
920   PetscFunctionBegin;
921   BLASscal_(&nz,&oalpha,a->v,&one);
922   ierr = PetscLogFlops(nz);CHKERRQ(ierr);
923   PetscFunctionReturn(0);
924 }
925 
926 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
927 
928 #undef __FUNCT__
929 #define __FUNCT__ "MatSetUpPreallocation_MPIDense"
930 PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A)
931 {
932   PetscErrorCode ierr;
933 
934   PetscFunctionBegin;
935   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
936   PetscFunctionReturn(0);
937 }
938 
939 /* -------------------------------------------------------------------*/
940 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
941        MatGetRow_MPIDense,
942        MatRestoreRow_MPIDense,
943        MatMult_MPIDense,
944 /* 4*/ MatMultAdd_MPIDense,
945        MatMultTranspose_MPIDense,
946        MatMultTransposeAdd_MPIDense,
947        0,
948        0,
949        0,
950 /*10*/ 0,
951        0,
952        0,
953        0,
954        MatTranspose_MPIDense,
955 /*15*/ MatGetInfo_MPIDense,
956        0,
957        MatGetDiagonal_MPIDense,
958        MatDiagonalScale_MPIDense,
959        MatNorm_MPIDense,
960 /*20*/ MatAssemblyBegin_MPIDense,
961        MatAssemblyEnd_MPIDense,
962        0,
963        MatSetOption_MPIDense,
964        MatZeroEntries_MPIDense,
965 /*25*/ MatZeroRows_MPIDense,
966        0,
967        0,
968        0,
969        0,
970 /*30*/ MatSetUpPreallocation_MPIDense,
971        0,
972        0,
973        MatGetArray_MPIDense,
974        MatRestoreArray_MPIDense,
975 /*35*/ MatDuplicate_MPIDense,
976        0,
977        0,
978        0,
979        0,
980 /*40*/ 0,
981        MatGetSubMatrices_MPIDense,
982        0,
983        MatGetValues_MPIDense,
984        0,
985 /*45*/ 0,
986        MatScale_MPIDense,
987        0,
988        0,
989        0,
990 /*50*/ 0,
991        0,
992        0,
993        0,
994        0,
995 /*55*/ 0,
996        0,
997        0,
998        0,
999        0,
1000 /*60*/ MatGetSubMatrix_MPIDense,
1001        MatDestroy_MPIDense,
1002        MatView_MPIDense,
1003        MatGetPetscMaps_Petsc,
1004        0,
1005 /*65*/ 0,
1006        0,
1007        0,
1008        0,
1009        0,
1010 /*70*/ 0,
1011        0,
1012        0,
1013        0,
1014        0,
1015 /*75*/ 0,
1016        0,
1017        0,
1018        0,
1019        0,
1020 /*80*/ 0,
1021        0,
1022        0,
1023        0,
1024 /*84*/ MatLoad_MPIDense,
1025        0,
1026        0,
1027        0,
1028        0,
1029        0,
1030 /*90*/ 0,
1031        0,
1032        0,
1033        0,
1034        0,
1035 /*95*/ 0,
1036        0,
1037        0,
1038        0};
1039 
1040 EXTERN_C_BEGIN
1041 #undef __FUNCT__
1042 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense"
1043 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1044 {
1045   Mat_MPIDense   *a;
1046   PetscErrorCode ierr;
1047 
1048   PetscFunctionBegin;
1049   mat->preallocated = PETSC_TRUE;
1050   /* Note:  For now, when data is specified above, this assumes the user correctly
1051    allocates the local dense storage space.  We should add error checking. */
1052 
1053   a    = (Mat_MPIDense*)mat->data;
1054   ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1055   ierr = MatSetSizes(a->A,mat->m,mat->N,mat->m,mat->N);CHKERRQ(ierr);
1056   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
1057   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1058   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1059   PetscFunctionReturn(0);
1060 }
1061 EXTERN_C_END
1062 
1063 /*MC
1064    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1065 
1066    Options Database Keys:
1067 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions()
1068 
1069   Level: beginner
1070 
1071 .seealso: MatCreateMPIDense
1072 M*/
1073 
1074 EXTERN_C_BEGIN
1075 #undef __FUNCT__
1076 #define __FUNCT__ "MatCreate_MPIDense"
1077 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIDense(Mat mat)
1078 {
1079   Mat_MPIDense   *a;
1080   PetscErrorCode ierr;
1081   PetscInt       i;
1082 
1083   PetscFunctionBegin;
1084   ierr              = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr);
1085   mat->data         = (void*)a;
1086   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1087   mat->factor       = 0;
1088   mat->mapping      = 0;
1089 
1090   a->factor       = 0;
1091   mat->insertmode = NOT_SET_VALUES;
1092   ierr = MPI_Comm_rank(mat->comm,&a->rank);CHKERRQ(ierr);
1093   ierr = MPI_Comm_size(mat->comm,&a->size);CHKERRQ(ierr);
1094 
1095   ierr = PetscSplitOwnership(mat->comm,&mat->m,&mat->M);CHKERRQ(ierr);
1096   ierr = PetscSplitOwnership(mat->comm,&mat->n,&mat->N);CHKERRQ(ierr);
1097   a->nvec = mat->n;
1098 
1099   /* the information in the maps duplicates the information computed below, eventually
1100      we should remove the duplicate information that is not contained in the maps */
1101   ierr = PetscMapCreateMPI(mat->comm,mat->m,mat->M,&mat->rmap);CHKERRQ(ierr);
1102   ierr = PetscMapCreateMPI(mat->comm,mat->n,mat->N,&mat->cmap);CHKERRQ(ierr);
1103 
1104   /* build local table of row and column ownerships */
1105   ierr       = PetscMalloc(2*(a->size+2)*sizeof(PetscInt),&a->rowners);CHKERRQ(ierr);
1106   a->cowners = a->rowners + a->size + 1;
1107   ierr = PetscLogObjectMemory(mat,2*(a->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));CHKERRQ(ierr);
1108   ierr = MPI_Allgather(&mat->m,1,MPIU_INT,a->rowners+1,1,MPIU_INT,mat->comm);CHKERRQ(ierr);
1109   a->rowners[0] = 0;
1110   for (i=2; i<=a->size; i++) {
1111     a->rowners[i] += a->rowners[i-1];
1112   }
1113   a->rstart = a->rowners[a->rank];
1114   a->rend   = a->rowners[a->rank+1];
1115   ierr      = MPI_Allgather(&mat->n,1,MPIU_INT,a->cowners+1,1,MPIU_INT,mat->comm);CHKERRQ(ierr);
1116   a->cowners[0] = 0;
1117   for (i=2; i<=a->size; i++) {
1118     a->cowners[i] += a->cowners[i-1];
1119   }
1120 
1121   /* build cache for off array entries formed */
1122   a->donotstash = PETSC_FALSE;
1123   ierr = MatStashCreate_Private(mat->comm,1,&mat->stash);CHKERRQ(ierr);
1124 
1125   /* stuff used for matrix vector multiply */
1126   a->lvec        = 0;
1127   a->Mvctx       = 0;
1128   a->roworiented = PETSC_TRUE;
1129 
1130   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
1131                                      "MatGetDiagonalBlock_MPIDense",
1132                                      MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr);
1133   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C",
1134                                      "MatMPIDenseSetPreallocation_MPIDense",
1135                                      MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1136   PetscFunctionReturn(0);
1137 }
1138 EXTERN_C_END
1139 
1140 /*MC
1141    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1142 
1143    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1144    and MATMPIDENSE otherwise.
1145 
1146    Options Database Keys:
1147 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1148 
1149   Level: beginner
1150 
1151 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1152 M*/
1153 
1154 EXTERN_C_BEGIN
1155 #undef __FUNCT__
1156 #define __FUNCT__ "MatCreate_Dense"
1157 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_Dense(Mat A)
1158 {
1159   PetscErrorCode ierr;
1160   PetscMPIInt    size;
1161 
1162   PetscFunctionBegin;
1163   ierr = PetscObjectChangeTypeName((PetscObject)A,MATDENSE);CHKERRQ(ierr);
1164   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1165   if (size == 1) {
1166     ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr);
1167   } else {
1168     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
1169   }
1170   PetscFunctionReturn(0);
1171 }
1172 EXTERN_C_END
1173 
1174 #undef __FUNCT__
1175 #define __FUNCT__ "MatMPIDenseSetPreallocation"
1176 /*@C
1177    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1178 
1179    Not collective
1180 
1181    Input Parameters:
1182 .  A - the matrix
1183 -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
1184    to control all matrix memory allocation.
1185 
1186    Notes:
1187    The dense format is fully compatible with standard Fortran 77
1188    storage by columns.
1189 
1190    The data input variable is intended primarily for Fortran programmers
1191    who wish to allocate their own matrix memory space.  Most users should
1192    set data=PETSC_NULL.
1193 
1194    Level: intermediate
1195 
1196 .keywords: matrix,dense, parallel
1197 
1198 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1199 @*/
1200 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data)
1201 {
1202   PetscErrorCode ierr,(*f)(Mat,PetscScalar *);
1203 
1204   PetscFunctionBegin;
1205   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1206   if (f) {
1207     ierr = (*f)(mat,data);CHKERRQ(ierr);
1208   }
1209   PetscFunctionReturn(0);
1210 }
1211 
1212 #undef __FUNCT__
1213 #define __FUNCT__ "MatCreateMPIDense"
1214 /*@C
1215    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
1216 
1217    Collective on MPI_Comm
1218 
1219    Input Parameters:
1220 +  comm - MPI communicator
1221 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1222 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1223 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1224 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1225 -  data - optional location of matrix data.  Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1226    to control all matrix memory allocation.
1227 
1228    Output Parameter:
1229 .  A - the matrix
1230 
1231    Notes:
1232    The dense format is fully compatible with standard Fortran 77
1233    storage by columns.
1234 
1235    The data input variable is intended primarily for Fortran programmers
1236    who wish to allocate their own matrix memory space.  Most users should
1237    set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users).
1238 
1239    The user MUST specify either the local or global matrix dimensions
1240    (possibly both).
1241 
1242    Level: intermediate
1243 
1244 .keywords: matrix,dense, parallel
1245 
1246 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1247 @*/
1248 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1249 {
1250   PetscErrorCode ierr;
1251   PetscMPIInt    size;
1252 
1253   PetscFunctionBegin;
1254   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1255   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1256   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1257   if (size > 1) {
1258     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1259     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1260   } else {
1261     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1262     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1263   }
1264   PetscFunctionReturn(0);
1265 }
1266 
1267 #undef __FUNCT__
1268 #define __FUNCT__ "MatDuplicate_MPIDense"
1269 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1270 {
1271   Mat            mat;
1272   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
1273   PetscErrorCode ierr;
1274 
1275   PetscFunctionBegin;
1276   *newmat       = 0;
1277   ierr = MatCreate(A->comm,&mat);CHKERRQ(ierr);
1278   ierr = MatSetSizes(mat,A->m,A->n,A->M,A->N);CHKERRQ(ierr);
1279   ierr = MatSetType(mat,A->type_name);CHKERRQ(ierr);
1280   ierr              = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr);
1281   mat->data         = (void*)a;
1282   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1283   mat->factor       = A->factor;
1284   mat->assembled    = PETSC_TRUE;
1285   mat->preallocated = PETSC_TRUE;
1286 
1287   a->rstart       = oldmat->rstart;
1288   a->rend         = oldmat->rend;
1289   a->size         = oldmat->size;
1290   a->rank         = oldmat->rank;
1291   mat->insertmode = NOT_SET_VALUES;
1292   a->nvec         = oldmat->nvec;
1293   a->donotstash   = oldmat->donotstash;
1294   ierr            = PetscMalloc((a->size+1)*sizeof(PetscInt),&a->rowners);CHKERRQ(ierr);
1295   ierr = PetscLogObjectMemory(mat,(a->size+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));CHKERRQ(ierr);
1296   ierr = PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
1297   ierr = MatStashCreate_Private(A->comm,1,&mat->stash);CHKERRQ(ierr);
1298 
1299   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1300   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1301   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1302   *newmat = mat;
1303   PetscFunctionReturn(0);
1304 }
1305 
1306 #include "petscsys.h"
1307 
1308 #undef __FUNCT__
1309 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile"
1310 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N, MatType type,Mat *newmat)
1311 {
1312   PetscErrorCode ierr;
1313   PetscMPIInt    rank,size;
1314   PetscInt       *rowners,i,m,nz,j;
1315   PetscScalar    *array,*vals,*vals_ptr;
1316   MPI_Status     status;
1317 
1318   PetscFunctionBegin;
1319   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1320   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1321 
1322   /* determine ownership of all rows */
1323   m          = M/size + ((M % size) > rank);
1324   ierr       = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1325   ierr       = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
1326   rowners[0] = 0;
1327   for (i=2; i<=size; i++) {
1328     rowners[i] += rowners[i-1];
1329   }
1330 
1331   ierr = MatCreate(comm,newmat);CHKERRQ(ierr);
1332   ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1333   ierr = MatSetType(*newmat,type);CHKERRQ(ierr);
1334   ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr);
1335   ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr);
1336 
1337   if (!rank) {
1338     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1339 
1340     /* read in my part of the matrix numerical values  */
1341     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1342 
1343     /* insert into matrix-by row (this is why cannot directly read into array */
1344     vals_ptr = vals;
1345     for (i=0; i<m; i++) {
1346       for (j=0; j<N; j++) {
1347         array[i + j*m] = *vals_ptr++;
1348       }
1349     }
1350 
1351     /* read in other processors and ship out */
1352     for (i=1; i<size; i++) {
1353       nz   = (rowners[i+1] - rowners[i])*N;
1354       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1355       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr);
1356     }
1357   } else {
1358     /* receive numeric values */
1359     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1360 
1361     /* receive message of values*/
1362     ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr);
1363 
1364     /* insert into matrix-by row (this is why cannot directly read into array */
1365     vals_ptr = vals;
1366     for (i=0; i<m; i++) {
1367       for (j=0; j<N; j++) {
1368         array[i + j*m] = *vals_ptr++;
1369       }
1370     }
1371   }
1372   ierr = PetscFree(rowners);CHKERRQ(ierr);
1373   ierr = PetscFree(vals);CHKERRQ(ierr);
1374   ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1375   ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1376   PetscFunctionReturn(0);
1377 }
1378 
1379 #undef __FUNCT__
1380 #define __FUNCT__ "MatLoad_MPIDense"
1381 PetscErrorCode MatLoad_MPIDense(PetscViewer viewer, MatType type,Mat *newmat)
1382 {
1383   Mat            A;
1384   PetscScalar    *vals,*svals;
1385   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1386   MPI_Status     status;
1387   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz;
1388   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1389   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1390   PetscInt       i,nz,j,rstart,rend;
1391   int            fd;
1392   PetscErrorCode ierr;
1393 
1394   PetscFunctionBegin;
1395   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1396   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1397   if (!rank) {
1398     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1399     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1400     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1401   }
1402 
1403   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1404   M = header[1]; N = header[2]; nz = header[3];
1405 
1406   /*
1407        Handle case where matrix is stored on disk as a dense matrix
1408   */
1409   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1410     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,type,newmat);CHKERRQ(ierr);
1411     PetscFunctionReturn(0);
1412   }
1413 
1414   /* determine ownership of all rows */
1415   m          = M/size + ((M % size) > rank);
1416   ierr       = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1417   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1418   rowners[0] = 0;
1419   for (i=2; i<=size; i++) {
1420     rowners[i] += rowners[i-1];
1421   }
1422   rstart = rowners[rank];
1423   rend   = rowners[rank+1];
1424 
1425   /* distribute row lengths to all processors */
1426   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&ourlens);CHKERRQ(ierr);
1427   offlens = ourlens + (rend-rstart);
1428   if (!rank) {
1429     ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
1430     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1431     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
1432     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1433     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1434     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1435   } else {
1436     ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1437   }
1438 
1439   if (!rank) {
1440     /* calculate the number of nonzeros on each processor */
1441     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
1442     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1443     for (i=0; i<size; i++) {
1444       for (j=rowners[i]; j< rowners[i+1]; j++) {
1445         procsnz[i] += rowlengths[j];
1446       }
1447     }
1448     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1449 
1450     /* determine max buffer needed and allocate it */
1451     maxnz = 0;
1452     for (i=0; i<size; i++) {
1453       maxnz = PetscMax(maxnz,procsnz[i]);
1454     }
1455     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1456 
1457     /* read in my part of the matrix column indices  */
1458     nz = procsnz[0];
1459     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1460     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1461 
1462     /* read in every one elses and ship off */
1463     for (i=1; i<size; i++) {
1464       nz   = procsnz[i];
1465       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1466       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1467     }
1468     ierr = PetscFree(cols);CHKERRQ(ierr);
1469   } else {
1470     /* determine buffer space needed for message */
1471     nz = 0;
1472     for (i=0; i<m; i++) {
1473       nz += ourlens[i];
1474     }
1475     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1476 
1477     /* receive message of column indices*/
1478     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1479     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1480     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1481   }
1482 
1483   /* loop over local rows, determining number of off diagonal entries */
1484   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
1485   jj = 0;
1486   for (i=0; i<m; i++) {
1487     for (j=0; j<ourlens[i]; j++) {
1488       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1489       jj++;
1490     }
1491   }
1492 
1493   /* create our matrix */
1494   for (i=0; i<m; i++) {
1495     ourlens[i] -= offlens[i];
1496   }
1497   ierr = MatCreate(comm,newmat);CHKERRQ(ierr);
1498   ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1499   ierr = MatSetType(*newmat,type);CHKERRQ(ierr);
1500   ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr);
1501   A = *newmat;
1502   for (i=0; i<m; i++) {
1503     ourlens[i] += offlens[i];
1504   }
1505 
1506   if (!rank) {
1507     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1508 
1509     /* read in my part of the matrix numerical values  */
1510     nz = procsnz[0];
1511     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1512 
1513     /* insert into matrix */
1514     jj      = rstart;
1515     smycols = mycols;
1516     svals   = vals;
1517     for (i=0; i<m; i++) {
1518       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1519       smycols += ourlens[i];
1520       svals   += ourlens[i];
1521       jj++;
1522     }
1523 
1524     /* read in other processors and ship out */
1525     for (i=1; i<size; i++) {
1526       nz   = procsnz[i];
1527       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1528       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1529     }
1530     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1531   } else {
1532     /* receive numeric values */
1533     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1534 
1535     /* receive message of values*/
1536     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1537     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1538     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1539 
1540     /* insert into matrix */
1541     jj      = rstart;
1542     smycols = mycols;
1543     svals   = vals;
1544     for (i=0; i<m; i++) {
1545       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1546       smycols += ourlens[i];
1547       svals   += ourlens[i];
1548       jj++;
1549     }
1550   }
1551   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1552   ierr = PetscFree(vals);CHKERRQ(ierr);
1553   ierr = PetscFree(mycols);CHKERRQ(ierr);
1554   ierr = PetscFree(rowners);CHKERRQ(ierr);
1555 
1556   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1557   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1558   PetscFunctionReturn(0);
1559 }
1560 
1561 
1562 
1563 
1564 
1565