xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision f69a0ea3504314d029ee423e0de2950ece2dab86)
1 #define PETSCMAT_DLL
2 
3 /*
4    Basic functions for basic parallel dense matrices.
5 */
6 
7 #include "src/mat/impls/dense/mpi/mpidense.h"
8 
9 #undef __FUNCT__
10 #define __FUNCT__ "MatGetRow_MPIDense"
11 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
12 {
13   Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
14   PetscErrorCode ierr;
15   PetscInt          lrow,rstart = mat->rstart,rend = mat->rend;
16 
17   PetscFunctionBegin;
18   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows")
19   lrow = row - rstart;
20   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr);
21   PetscFunctionReturn(0);
22 }
23 
24 #undef __FUNCT__
25 #define __FUNCT__ "MatRestoreRow_MPIDense"
26 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
27 {
28   PetscErrorCode ierr;
29 
30   PetscFunctionBegin;
31   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
32   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
33   PetscFunctionReturn(0);
34 }
35 
36 EXTERN_C_BEGIN
37 #undef __FUNCT__
38 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense"
39 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B)
40 {
41   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
42   PetscErrorCode ierr;
43   PetscInt          m = A->m,rstart = mdn->rstart;
44   PetscScalar  *array;
45   MPI_Comm     comm;
46 
47   PetscFunctionBegin;
48   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported.");
49 
50   /* The reuse aspect is not implemented efficiently */
51   if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);}
52 
53   ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
54   ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr);
55   ierr = MatCreate(comm,B);CHKERRQ(ierr);
56   ierr = MatSetSizes(*B,m,m,m,m);CHKERRQ(ierr);
57   ierr = MatSetType(*B,mdn->A->type_name);CHKERRQ(ierr);
58   ierr = MatSeqDenseSetPreallocation(*B,array+m*rstart);CHKERRQ(ierr);
59   ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr);
60   ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
61   ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
62 
63   *iscopy = PETSC_TRUE;
64   PetscFunctionReturn(0);
65 }
66 EXTERN_C_END
67 
68 #undef __FUNCT__
69 #define __FUNCT__ "MatSetValues_MPIDense"
70 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
71 {
72   Mat_MPIDense *A = (Mat_MPIDense*)mat->data;
73   PetscErrorCode ierr;
74   PetscInt          i,j,rstart = A->rstart,rend = A->rend,row;
75   PetscTruth   roworiented = A->roworiented;
76 
77   PetscFunctionBegin;
78   for (i=0; i<m; i++) {
79     if (idxm[i] < 0) continue;
80     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
81     if (idxm[i] >= rstart && idxm[i] < rend) {
82       row = idxm[i] - rstart;
83       if (roworiented) {
84         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
85       } else {
86         for (j=0; j<n; j++) {
87           if (idxn[j] < 0) continue;
88           if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
89           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
90         }
91       }
92     } else {
93       if (!A->donotstash) {
94         if (roworiented) {
95           ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr);
96         } else {
97           ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr);
98         }
99       }
100     }
101   }
102   PetscFunctionReturn(0);
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatGetValues_MPIDense"
107 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
108 {
109   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
110   PetscErrorCode ierr;
111   PetscInt          i,j,rstart = mdn->rstart,rend = mdn->rend,row;
112 
113   PetscFunctionBegin;
114   for (i=0; i<m; i++) {
115     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
116     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
117     if (idxm[i] >= rstart && idxm[i] < rend) {
118       row = idxm[i] - rstart;
119       for (j=0; j<n; j++) {
120         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
121         if (idxn[j] >= mat->N) {
122           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
123         }
124         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
125       }
126     } else {
127       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
128     }
129   }
130   PetscFunctionReturn(0);
131 }
132 
133 #undef __FUNCT__
134 #define __FUNCT__ "MatGetArray_MPIDense"
135 PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[])
136 {
137   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
138   PetscErrorCode ierr;
139 
140   PetscFunctionBegin;
141   ierr = MatGetArray(a->A,array);CHKERRQ(ierr);
142   PetscFunctionReturn(0);
143 }
144 
145 #undef __FUNCT__
146 #define __FUNCT__ "MatGetSubMatrix_MPIDense"
147 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B)
148 {
149   Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd;
150   Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data;
151   PetscErrorCode ierr;
152   PetscInt          i,j,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols;
153   PetscScalar  *av,*bv,*v = lmat->v;
154   Mat          newmat;
155 
156   PetscFunctionBegin;
157   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
158   ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr);
159   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
160   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
161 
162   /* No parallel redistribution currently supported! Should really check each index set
163      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
164      original matrix! */
165 
166   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
167   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
168 
169   /* Check submatrix call */
170   if (scall == MAT_REUSE_MATRIX) {
171     /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
172     /* Really need to test rows and column sizes! */
173     newmat = *B;
174   } else {
175     /* Create and fill new matrix */
176     ierr = MatCreate(A->comm,&newmat);CHKERRQ(ierr);
177     ierr = MatSetSizes(newmat,nrows,cs,PETSC_DECIDE,ncols);CHKERRQ(ierr);
178     ierr = MatSetType(newmat,A->type_name);CHKERRQ(ierr);
179     ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr);
180   }
181 
182   /* Now extract the data pointers and do the copy, column at a time */
183   newmatd = (Mat_MPIDense*)newmat->data;
184   bv      = ((Mat_SeqDense *)newmatd->A->data)->v;
185 
186   for (i=0; i<ncols; i++) {
187     av = v + nlrows*icol[i];
188     for (j=0; j<nrows; j++) {
189       *bv++ = av[irow[j] - rstart];
190     }
191   }
192 
193   /* Assemble the matrices so that the correct flags are set */
194   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
195   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
196 
197   /* Free work space */
198   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
199   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
200   *B = newmat;
201   PetscFunctionReturn(0);
202 }
203 
204 #undef __FUNCT__
205 #define __FUNCT__ "MatRestoreArray_MPIDense"
206 PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[])
207 {
208   PetscFunctionBegin;
209   PetscFunctionReturn(0);
210 }
211 
212 #undef __FUNCT__
213 #define __FUNCT__ "MatAssemblyBegin_MPIDense"
214 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
215 {
216   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
217   MPI_Comm     comm = mat->comm;
218   PetscErrorCode ierr;
219   PetscInt          nstash,reallocs;
220   InsertMode   addv;
221 
222   PetscFunctionBegin;
223   /* make sure all processors are either in INSERTMODE or ADDMODE */
224   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
225   if (addv == (ADD_VALUES|INSERT_VALUES)) {
226     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
227   }
228   mat->insertmode = addv; /* in case this processor had no cache */
229 
230   ierr = MatStashScatterBegin_Private(&mat->stash,mdn->rowners);CHKERRQ(ierr);
231   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
232   ierr = PetscLogInfo((mdn->A,"MatAssemblyBegin_MPIDense:Stash has %D entries, uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr);
233   PetscFunctionReturn(0);
234 }
235 
236 #undef __FUNCT__
237 #define __FUNCT__ "MatAssemblyEnd_MPIDense"
238 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
239 {
240   Mat_MPIDense    *mdn=(Mat_MPIDense*)mat->data;
241   PetscErrorCode  ierr;
242   PetscInt        i,*row,*col,flg,j,rstart,ncols;
243   PetscMPIInt     n;
244   PetscScalar     *val;
245   InsertMode      addv=mat->insertmode;
246 
247   PetscFunctionBegin;
248   /*  wait on receives */
249   while (1) {
250     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
251     if (!flg) break;
252 
253     for (i=0; i<n;) {
254       /* Now identify the consecutive vals belonging to the same row */
255       for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
256       if (j < n) ncols = j-i;
257       else       ncols = n-i;
258       /* Now assemble all these values with a single function call */
259       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
260       i = j;
261     }
262   }
263   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
264 
265   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
266   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
267 
268   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
269     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
270   }
271   PetscFunctionReturn(0);
272 }
273 
274 #undef __FUNCT__
275 #define __FUNCT__ "MatZeroEntries_MPIDense"
276 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
277 {
278   PetscErrorCode ierr;
279   Mat_MPIDense *l = (Mat_MPIDense*)A->data;
280 
281   PetscFunctionBegin;
282   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
283   PetscFunctionReturn(0);
284 }
285 
286 /* the code does not do the diagonal entries correctly unless the
287    matrix is square and the column and row owerships are identical.
288    This is a BUG. The only way to fix it seems to be to access
289    mdn->A and mdn->B directly and not through the MatZeroRows()
290    routine.
291 */
292 #undef __FUNCT__
293 #define __FUNCT__ "MatZeroRows_MPIDense"
294 PetscErrorCode MatZeroRows_MPIDense(Mat A,IS is,const PetscScalar *diag)
295 {
296   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
297   PetscErrorCode ierr;
298   PetscInt       i,N,*rows,*owners = l->rowners;
299   PetscInt       *nprocs,j,idx,nsends;
300   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
301   PetscInt       *rvalues,tag = A->tag,count,base,slen,*source;
302   PetscInt       *lens,*lrows,*values;
303   PetscMPIInt    n,imdex,rank = l->rank,size = l->size;
304   MPI_Comm       comm = A->comm;
305   MPI_Request    *send_waits,*recv_waits;
306   MPI_Status     recv_status,*send_status;
307   IS             istmp;
308   PetscTruth     found;
309 
310   PetscFunctionBegin;
311   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
312   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
313 
314   /*  first count number of contributors to each processor */
315   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
316   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
317   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
318   for (i=0; i<N; i++) {
319     idx = rows[i];
320     found = PETSC_FALSE;
321     for (j=0; j<size; j++) {
322       if (idx >= owners[j] && idx < owners[j+1]) {
323         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
324       }
325     }
326     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
327   }
328   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
329 
330   /* inform other processors of number of messages and max length*/
331   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
332 
333   /* post receives:   */
334   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
335   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
336   for (i=0; i<nrecvs; i++) {
337     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
338   }
339 
340   /* do sends:
341       1) starts[i] gives the starting index in svalues for stuff going to
342          the ith processor
343   */
344   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
345   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
346   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
347   starts[0]  = 0;
348   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
349   for (i=0; i<N; i++) {
350     svalues[starts[owner[i]]++] = rows[i];
351   }
352   ISRestoreIndices(is,&rows);
353 
354   starts[0] = 0;
355   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
356   count = 0;
357   for (i=0; i<size; i++) {
358     if (nprocs[2*i+1]) {
359       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
360     }
361   }
362   ierr = PetscFree(starts);CHKERRQ(ierr);
363 
364   base = owners[rank];
365 
366   /*  wait on receives */
367   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
368   source = lens + nrecvs;
369   count  = nrecvs; slen = 0;
370   while (count) {
371     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
372     /* unpack receives into our local space */
373     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
374     source[imdex]  = recv_status.MPI_SOURCE;
375     lens[imdex]    = n;
376     slen += n;
377     count--;
378   }
379   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
380 
381   /* move the data into the send scatter */
382   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
383   count = 0;
384   for (i=0; i<nrecvs; i++) {
385     values = rvalues + i*nmax;
386     for (j=0; j<lens[i]; j++) {
387       lrows[count++] = values[j] - base;
388     }
389   }
390   ierr = PetscFree(rvalues);CHKERRQ(ierr);
391   ierr = PetscFree(lens);CHKERRQ(ierr);
392   ierr = PetscFree(owner);CHKERRQ(ierr);
393   ierr = PetscFree(nprocs);CHKERRQ(ierr);
394 
395   /* actually zap the local rows */
396   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
397   ierr = PetscLogObjectParent(A,istmp);CHKERRQ(ierr);
398   ierr = PetscFree(lrows);CHKERRQ(ierr);
399   ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
400   ierr = ISDestroy(istmp);CHKERRQ(ierr);
401 
402   /* wait on sends */
403   if (nsends) {
404     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
405     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
406     ierr = PetscFree(send_status);CHKERRQ(ierr);
407   }
408   ierr = PetscFree(send_waits);CHKERRQ(ierr);
409   ierr = PetscFree(svalues);CHKERRQ(ierr);
410 
411   PetscFunctionReturn(0);
412 }
413 
414 #undef __FUNCT__
415 #define __FUNCT__ "MatMult_MPIDense"
416 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
417 {
418   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
419   PetscErrorCode ierr;
420 
421   PetscFunctionBegin;
422   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
423   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
424   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
425   PetscFunctionReturn(0);
426 }
427 
428 #undef __FUNCT__
429 #define __FUNCT__ "MatMultAdd_MPIDense"
430 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
431 {
432   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
433   PetscErrorCode ierr;
434 
435   PetscFunctionBegin;
436   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
437   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
438   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
439   PetscFunctionReturn(0);
440 }
441 
442 #undef __FUNCT__
443 #define __FUNCT__ "MatMultTranspose_MPIDense"
444 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
445 {
446   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
447   PetscErrorCode ierr;
448   PetscScalar  zero = 0.0;
449 
450   PetscFunctionBegin;
451   ierr = VecSet(yy,zero);CHKERRQ(ierr);
452   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
453   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
454   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
455   PetscFunctionReturn(0);
456 }
457 
458 #undef __FUNCT__
459 #define __FUNCT__ "MatMultTransposeAdd_MPIDense"
460 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
461 {
462   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
463   PetscErrorCode ierr;
464 
465   PetscFunctionBegin;
466   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
467   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
468   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
469   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
470   PetscFunctionReturn(0);
471 }
472 
473 #undef __FUNCT__
474 #define __FUNCT__ "MatGetDiagonal_MPIDense"
475 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
476 {
477   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
478   Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data;
479   PetscErrorCode ierr;
480   PetscInt          len,i,n,m = A->m,radd;
481   PetscScalar  *x,zero = 0.0;
482 
483   PetscFunctionBegin;
484   ierr = VecSet(v,zero);CHKERRQ(ierr);
485   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
486   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
487   if (n != A->M) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
488   len  = PetscMin(a->A->m,a->A->n);
489   radd = a->rstart*m;
490   for (i=0; i<len; i++) {
491     x[i] = aloc->v[radd + i*m + i];
492   }
493   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
494   PetscFunctionReturn(0);
495 }
496 
497 #undef __FUNCT__
498 #define __FUNCT__ "MatDestroy_MPIDense"
499 PetscErrorCode MatDestroy_MPIDense(Mat mat)
500 {
501   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
502   PetscErrorCode ierr;
503 
504   PetscFunctionBegin;
505 
506 #if defined(PETSC_USE_LOG)
507   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->M,mat->N);
508 #endif
509   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
510   ierr = PetscFree(mdn->rowners);CHKERRQ(ierr);
511   ierr = MatDestroy(mdn->A);CHKERRQ(ierr);
512   if (mdn->lvec)   VecDestroy(mdn->lvec);
513   if (mdn->Mvctx)  VecScatterDestroy(mdn->Mvctx);
514   if (mdn->factor) {
515     if (mdn->factor->temp)   {ierr = PetscFree(mdn->factor->temp);CHKERRQ(ierr);}
516     if (mdn->factor->tag)    {ierr = PetscFree(mdn->factor->tag);CHKERRQ(ierr);}
517     if (mdn->factor->pivots) {ierr = PetscFree(mdn->factor->pivots);CHKERRQ(ierr);}
518     ierr = PetscFree(mdn->factor);CHKERRQ(ierr);
519   }
520   ierr = PetscFree(mdn);CHKERRQ(ierr);
521   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
522   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
523   PetscFunctionReturn(0);
524 }
525 
526 #undef __FUNCT__
527 #define __FUNCT__ "MatView_MPIDense_Binary"
528 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
529 {
530   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
531   PetscErrorCode ierr;
532 
533   PetscFunctionBegin;
534   if (mdn->size == 1) {
535     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
536   }
537   else SETERRQ(PETSC_ERR_SUP,"Only uniprocessor output supported");
538   PetscFunctionReturn(0);
539 }
540 
541 #undef __FUNCT__
542 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket"
543 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
544 {
545   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
546   PetscErrorCode    ierr;
547   PetscMPIInt       size = mdn->size,rank = mdn->rank;
548   PetscViewerType   vtype;
549   PetscTruth        iascii,isdraw;
550   PetscViewer       sviewer;
551   PetscViewerFormat format;
552 
553   PetscFunctionBegin;
554   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
555   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
556   if (iascii) {
557     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
558     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
559     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
560       MatInfo info;
561       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
562       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->m,
563                    (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
564       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
565       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
566       PetscFunctionReturn(0);
567     } else if (format == PETSC_VIEWER_ASCII_INFO) {
568       PetscFunctionReturn(0);
569     }
570   } else if (isdraw) {
571     PetscDraw       draw;
572     PetscTruth isnull;
573 
574     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
575     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
576     if (isnull) PetscFunctionReturn(0);
577   }
578 
579   if (size == 1) {
580     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
581   } else {
582     /* assemble the entire matrix onto first processor. */
583     Mat         A;
584     PetscInt    M = mat->M,N = mat->N,m,row,i,nz;
585     PetscInt    *cols;
586     PetscScalar *vals;
587 
588     ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr);
589     if (!rank) {
590       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
591     } else {
592       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
593     }
594     /* Since this is a temporary matrix, MATMPIDENSE instead of A->type_name here is probably acceptable. */
595     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
596     ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL);
597     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
598 
599     /* Copy the matrix ... This isn't the most efficient means,
600        but it's quick for now */
601     A->insertmode = INSERT_VALUES;
602     row = mdn->rstart; m = mdn->A->m;
603     for (i=0; i<m; i++) {
604       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
605       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
606       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
607       row++;
608     }
609 
610     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
611     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
612     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
613     if (!rank) {
614       ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
615     }
616     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
617     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
618     ierr = MatDestroy(A);CHKERRQ(ierr);
619   }
620   PetscFunctionReturn(0);
621 }
622 
623 #undef __FUNCT__
624 #define __FUNCT__ "MatView_MPIDense"
625 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
626 {
627   PetscErrorCode ierr;
628   PetscTruth iascii,isbinary,isdraw,issocket;
629 
630   PetscFunctionBegin;
631 
632   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
633   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
634   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
635   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
636 
637   if (iascii || issocket || isdraw) {
638     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
639   } else if (isbinary) {
640     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
641   } else {
642     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name);
643   }
644   PetscFunctionReturn(0);
645 }
646 
647 #undef __FUNCT__
648 #define __FUNCT__ "MatGetInfo_MPIDense"
649 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
650 {
651   Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
652   Mat          mdn = mat->A;
653   PetscErrorCode ierr;
654   PetscReal    isend[5],irecv[5];
655 
656   PetscFunctionBegin;
657   info->rows_global    = (double)A->M;
658   info->columns_global = (double)A->N;
659   info->rows_local     = (double)A->m;
660   info->columns_local  = (double)A->N;
661   info->block_size     = 1.0;
662   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
663   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
664   isend[3] = info->memory;  isend[4] = info->mallocs;
665   if (flag == MAT_LOCAL) {
666     info->nz_used      = isend[0];
667     info->nz_allocated = isend[1];
668     info->nz_unneeded  = isend[2];
669     info->memory       = isend[3];
670     info->mallocs      = isend[4];
671   } else if (flag == MAT_GLOBAL_MAX) {
672     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr);
673     info->nz_used      = irecv[0];
674     info->nz_allocated = irecv[1];
675     info->nz_unneeded  = irecv[2];
676     info->memory       = irecv[3];
677     info->mallocs      = irecv[4];
678   } else if (flag == MAT_GLOBAL_SUM) {
679     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr);
680     info->nz_used      = irecv[0];
681     info->nz_allocated = irecv[1];
682     info->nz_unneeded  = irecv[2];
683     info->memory       = irecv[3];
684     info->mallocs      = irecv[4];
685   }
686   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
687   info->fill_ratio_needed = 0;
688   info->factor_mallocs    = 0;
689   PetscFunctionReturn(0);
690 }
691 
692 #undef __FUNCT__
693 #define __FUNCT__ "MatSetOption_MPIDense"
694 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op)
695 {
696   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
697   PetscErrorCode ierr;
698 
699   PetscFunctionBegin;
700   switch (op) {
701   case MAT_NO_NEW_NONZERO_LOCATIONS:
702   case MAT_YES_NEW_NONZERO_LOCATIONS:
703   case MAT_NEW_NONZERO_LOCATION_ERR:
704   case MAT_NEW_NONZERO_ALLOCATION_ERR:
705   case MAT_COLUMNS_SORTED:
706   case MAT_COLUMNS_UNSORTED:
707     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
708     break;
709   case MAT_ROW_ORIENTED:
710     a->roworiented = PETSC_TRUE;
711     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
712     break;
713   case MAT_ROWS_SORTED:
714   case MAT_ROWS_UNSORTED:
715   case MAT_YES_NEW_DIAGONALS:
716   case MAT_USE_HASH_TABLE:
717     ierr = PetscLogInfo((A,"MatSetOption_MPIDense:Option ignored\n"));CHKERRQ(ierr);
718     break;
719   case MAT_COLUMN_ORIENTED:
720     a->roworiented = PETSC_FALSE;
721     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
722     break;
723   case MAT_IGNORE_OFF_PROC_ENTRIES:
724     a->donotstash = PETSC_TRUE;
725     break;
726   case MAT_NO_NEW_DIAGONALS:
727     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
728   case MAT_SYMMETRIC:
729   case MAT_STRUCTURALLY_SYMMETRIC:
730   case MAT_NOT_SYMMETRIC:
731   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
732   case MAT_HERMITIAN:
733   case MAT_NOT_HERMITIAN:
734   case MAT_SYMMETRY_ETERNAL:
735   case MAT_NOT_SYMMETRY_ETERNAL:
736     break;
737   default:
738     SETERRQ(PETSC_ERR_SUP,"unknown option");
739   }
740   PetscFunctionReturn(0);
741 }
742 
743 
744 #undef __FUNCT__
745 #define __FUNCT__ "MatDiagonalScale_MPIDense"
746 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
747 {
748   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
749   Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
750   PetscScalar  *l,*r,x,*v;
751   PetscErrorCode ierr;
752   PetscInt          i,j,s2a,s3a,s2,s3,m=mdn->A->m,n=mdn->A->n;
753 
754   PetscFunctionBegin;
755   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
756   if (ll) {
757     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
758     if (s2a != s2) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size");
759     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
760     for (i=0; i<m; i++) {
761       x = l[i];
762       v = mat->v + i;
763       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
764     }
765     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
766     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
767   }
768   if (rr) {
769     ierr = VecGetSize(rr,&s3a);CHKERRQ(ierr);
770     if (s3a != s3) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size");
771     ierr = VecScatterBegin(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
772     ierr = VecScatterEnd(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
773     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
774     for (i=0; i<n; i++) {
775       x = r[i];
776       v = mat->v + i*m;
777       for (j=0; j<m; j++) { (*v++) *= x;}
778     }
779     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
780     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
781   }
782   PetscFunctionReturn(0);
783 }
784 
785 #undef __FUNCT__
786 #define __FUNCT__ "MatNorm_MPIDense"
787 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
788 {
789   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
790   Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
791   PetscErrorCode ierr;
792   PetscInt          i,j;
793   PetscReal    sum = 0.0;
794   PetscScalar  *v = mat->v;
795 
796   PetscFunctionBegin;
797   if (mdn->size == 1) {
798     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
799   } else {
800     if (type == NORM_FROBENIUS) {
801       for (i=0; i<mdn->A->n*mdn->A->m; i++) {
802 #if defined(PETSC_USE_COMPLEX)
803         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
804 #else
805         sum += (*v)*(*v); v++;
806 #endif
807       }
808       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr);
809       *nrm = sqrt(*nrm);
810       ierr = PetscLogFlops(2*mdn->A->n*mdn->A->m);CHKERRQ(ierr);
811     } else if (type == NORM_1) {
812       PetscReal *tmp,*tmp2;
813       ierr = PetscMalloc(2*A->N*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
814       tmp2 = tmp + A->N;
815       ierr = PetscMemzero(tmp,2*A->N*sizeof(PetscReal));CHKERRQ(ierr);
816       *nrm = 0.0;
817       v = mat->v;
818       for (j=0; j<mdn->A->n; j++) {
819         for (i=0; i<mdn->A->m; i++) {
820           tmp[j] += PetscAbsScalar(*v);  v++;
821         }
822       }
823       ierr = MPI_Allreduce(tmp,tmp2,A->N,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr);
824       for (j=0; j<A->N; j++) {
825         if (tmp2[j] > *nrm) *nrm = tmp2[j];
826       }
827       ierr = PetscFree(tmp);CHKERRQ(ierr);
828       ierr = PetscLogFlops(A->n*A->m);CHKERRQ(ierr);
829     } else if (type == NORM_INFINITY) { /* max row norm */
830       PetscReal ntemp;
831       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
832       ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr);
833     } else {
834       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
835     }
836   }
837   PetscFunctionReturn(0);
838 }
839 
840 #undef __FUNCT__
841 #define __FUNCT__ "MatTranspose_MPIDense"
842 PetscErrorCode MatTranspose_MPIDense(Mat A,Mat *matout)
843 {
844   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
845   Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data;
846   Mat          B;
847   PetscInt          M = A->M,N = A->N,m,n,*rwork,rstart = a->rstart;
848   PetscErrorCode ierr;
849   PetscInt          j,i;
850   PetscScalar  *v;
851 
852   PetscFunctionBegin;
853   if (!matout && M != N) {
854     SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place");
855   }
856   ierr = MatCreate(A->comm,&B);CHKERRQ(ierr);
857   ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,N,M);CHKERRQ(ierr);
858   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
859   ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr);
860 
861   m = a->A->m; n = a->A->n; v = Aloc->v;
862   ierr = PetscMalloc(n*sizeof(PetscInt),&rwork);CHKERRQ(ierr);
863   for (j=0; j<n; j++) {
864     for (i=0; i<m; i++) rwork[i] = rstart + i;
865     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
866     v   += m;
867   }
868   ierr = PetscFree(rwork);CHKERRQ(ierr);
869   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
870   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
871   if (matout) {
872     *matout = B;
873   } else {
874     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
875   }
876   PetscFunctionReturn(0);
877 }
878 
879 #include "petscblaslapack.h"
880 #undef __FUNCT__
881 #define __FUNCT__ "MatScale_MPIDense"
882 PetscErrorCode MatScale_MPIDense(const PetscScalar *alpha,Mat inA)
883 {
884   Mat_MPIDense *A = (Mat_MPIDense*)inA->data;
885   Mat_SeqDense *a = (Mat_SeqDense*)A->A->data;
886   PetscBLASInt one = 1,nz = (PetscBLASInt)inA->m*inA->N;
887   PetscErrorCode ierr;
888 
889   PetscFunctionBegin;
890   BLASscal_(&nz,(PetscScalar*)alpha,a->v,&one);
891   ierr = PetscLogFlops(nz);CHKERRQ(ierr);
892   PetscFunctionReturn(0);
893 }
894 
895 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
896 
897 #undef __FUNCT__
898 #define __FUNCT__ "MatSetUpPreallocation_MPIDense"
899 PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A)
900 {
901   PetscErrorCode ierr;
902 
903   PetscFunctionBegin;
904   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
905   PetscFunctionReturn(0);
906 }
907 
908 /* -------------------------------------------------------------------*/
909 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
910        MatGetRow_MPIDense,
911        MatRestoreRow_MPIDense,
912        MatMult_MPIDense,
913 /* 4*/ MatMultAdd_MPIDense,
914        MatMultTranspose_MPIDense,
915        MatMultTransposeAdd_MPIDense,
916        0,
917        0,
918        0,
919 /*10*/ 0,
920        0,
921        0,
922        0,
923        MatTranspose_MPIDense,
924 /*15*/ MatGetInfo_MPIDense,
925        0,
926        MatGetDiagonal_MPIDense,
927        MatDiagonalScale_MPIDense,
928        MatNorm_MPIDense,
929 /*20*/ MatAssemblyBegin_MPIDense,
930        MatAssemblyEnd_MPIDense,
931        0,
932        MatSetOption_MPIDense,
933        MatZeroEntries_MPIDense,
934 /*25*/ MatZeroRows_MPIDense,
935        0,
936        0,
937        0,
938        0,
939 /*30*/ MatSetUpPreallocation_MPIDense,
940        0,
941        0,
942        MatGetArray_MPIDense,
943        MatRestoreArray_MPIDense,
944 /*35*/ MatDuplicate_MPIDense,
945        0,
946        0,
947        0,
948        0,
949 /*40*/ 0,
950        MatGetSubMatrices_MPIDense,
951        0,
952        MatGetValues_MPIDense,
953        0,
954 /*45*/ 0,
955        MatScale_MPIDense,
956        0,
957        0,
958        0,
959 /*50*/ 0,
960        0,
961        0,
962        0,
963        0,
964 /*55*/ 0,
965        0,
966        0,
967        0,
968        0,
969 /*60*/ MatGetSubMatrix_MPIDense,
970        MatDestroy_MPIDense,
971        MatView_MPIDense,
972        MatGetPetscMaps_Petsc,
973        0,
974 /*65*/ 0,
975        0,
976        0,
977        0,
978        0,
979 /*70*/ 0,
980        0,
981        0,
982        0,
983        0,
984 /*75*/ 0,
985        0,
986        0,
987        0,
988        0,
989 /*80*/ 0,
990        0,
991        0,
992        0,
993 /*84*/ MatLoad_MPIDense,
994        0,
995        0,
996        0,
997        0,
998        0,
999 /*90*/ 0,
1000        0,
1001        0,
1002        0,
1003        0,
1004 /*95*/ 0,
1005        0,
1006        0,
1007        0};
1008 
1009 EXTERN_C_BEGIN
1010 #undef __FUNCT__
1011 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense"
1012 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1013 {
1014   Mat_MPIDense *a;
1015   PetscErrorCode ierr;
1016 
1017   PetscFunctionBegin;
1018   mat->preallocated = PETSC_TRUE;
1019   /* Note:  For now, when data is specified above, this assumes the user correctly
1020    allocates the local dense storage space.  We should add error checking. */
1021 
1022   a    = (Mat_MPIDense*)mat->data;
1023   ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1024   ierr = MatSetSizes(a->A,mat->m,mat->N,mat->m,mat->N);CHKERRQ(ierr);
1025   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
1026   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1027   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1028   PetscFunctionReturn(0);
1029 }
1030 EXTERN_C_END
1031 
1032 /*MC
1033    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1034 
1035    Options Database Keys:
1036 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions()
1037 
1038   Level: beginner
1039 
1040 .seealso: MatCreateMPIDense
1041 M*/
1042 
1043 EXTERN_C_BEGIN
1044 #undef __FUNCT__
1045 #define __FUNCT__ "MatCreate_MPIDense"
1046 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIDense(Mat mat)
1047 {
1048   Mat_MPIDense *a;
1049   PetscErrorCode ierr;
1050   PetscInt          i;
1051 
1052   PetscFunctionBegin;
1053   ierr              = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr);
1054   mat->data         = (void*)a;
1055   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1056   mat->factor       = 0;
1057   mat->mapping      = 0;
1058 
1059   a->factor       = 0;
1060   mat->insertmode = NOT_SET_VALUES;
1061   ierr = MPI_Comm_rank(mat->comm,&a->rank);CHKERRQ(ierr);
1062   ierr = MPI_Comm_size(mat->comm,&a->size);CHKERRQ(ierr);
1063 
1064   ierr = PetscSplitOwnership(mat->comm,&mat->m,&mat->M);CHKERRQ(ierr);
1065   ierr = PetscSplitOwnership(mat->comm,&mat->n,&mat->N);CHKERRQ(ierr);
1066   a->nvec = mat->n;
1067 
1068   /* the information in the maps duplicates the information computed below, eventually
1069      we should remove the duplicate information that is not contained in the maps */
1070   ierr = PetscMapCreateMPI(mat->comm,mat->m,mat->M,&mat->rmap);CHKERRQ(ierr);
1071   ierr = PetscMapCreateMPI(mat->comm,mat->n,mat->N,&mat->cmap);CHKERRQ(ierr);
1072 
1073   /* build local table of row and column ownerships */
1074   ierr       = PetscMalloc(2*(a->size+2)*sizeof(PetscInt),&a->rowners);CHKERRQ(ierr);
1075   a->cowners = a->rowners + a->size + 1;
1076   ierr = PetscLogObjectMemory(mat,2*(a->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));CHKERRQ(ierr);
1077   ierr = MPI_Allgather(&mat->m,1,MPIU_INT,a->rowners+1,1,MPIU_INT,mat->comm);CHKERRQ(ierr);
1078   a->rowners[0] = 0;
1079   for (i=2; i<=a->size; i++) {
1080     a->rowners[i] += a->rowners[i-1];
1081   }
1082   a->rstart = a->rowners[a->rank];
1083   a->rend   = a->rowners[a->rank+1];
1084   ierr      = MPI_Allgather(&mat->n,1,MPIU_INT,a->cowners+1,1,MPIU_INT,mat->comm);CHKERRQ(ierr);
1085   a->cowners[0] = 0;
1086   for (i=2; i<=a->size; i++) {
1087     a->cowners[i] += a->cowners[i-1];
1088   }
1089 
1090   /* build cache for off array entries formed */
1091   a->donotstash = PETSC_FALSE;
1092   ierr = MatStashCreate_Private(mat->comm,1,&mat->stash);CHKERRQ(ierr);
1093 
1094   /* stuff used for matrix vector multiply */
1095   a->lvec        = 0;
1096   a->Mvctx       = 0;
1097   a->roworiented = PETSC_TRUE;
1098 
1099   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
1100                                      "MatGetDiagonalBlock_MPIDense",
1101                                      MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr);
1102   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C",
1103                                      "MatMPIDenseSetPreallocation_MPIDense",
1104                                      MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1105   PetscFunctionReturn(0);
1106 }
1107 EXTERN_C_END
1108 
1109 /*MC
1110    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1111 
1112    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1113    and MATMPIDENSE otherwise.
1114 
1115    Options Database Keys:
1116 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1117 
1118   Level: beginner
1119 
1120 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1121 M*/
1122 
1123 EXTERN_C_BEGIN
1124 #undef __FUNCT__
1125 #define __FUNCT__ "MatCreate_Dense"
1126 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_Dense(Mat A)
1127 {
1128   PetscErrorCode ierr;
1129   PetscMPIInt    size;
1130 
1131   PetscFunctionBegin;
1132   ierr = PetscObjectChangeTypeName((PetscObject)A,MATDENSE);CHKERRQ(ierr);
1133   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1134   if (size == 1) {
1135     ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr);
1136   } else {
1137     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
1138   }
1139   PetscFunctionReturn(0);
1140 }
1141 EXTERN_C_END
1142 
1143 #undef __FUNCT__
1144 #define __FUNCT__ "MatMPIDenseSetPreallocation"
1145 /*@C
1146    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1147 
1148    Not collective
1149 
1150    Input Parameters:
1151 .  A - the matrix
1152 -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
1153    to control all matrix memory allocation.
1154 
1155    Notes:
1156    The dense format is fully compatible with standard Fortran 77
1157    storage by columns.
1158 
1159    The data input variable is intended primarily for Fortran programmers
1160    who wish to allocate their own matrix memory space.  Most users should
1161    set data=PETSC_NULL.
1162 
1163    Level: intermediate
1164 
1165 .keywords: matrix,dense, parallel
1166 
1167 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1168 @*/
1169 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data)
1170 {
1171   PetscErrorCode ierr,(*f)(Mat,PetscScalar *);
1172 
1173   PetscFunctionBegin;
1174   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1175   if (f) {
1176     ierr = (*f)(mat,data);CHKERRQ(ierr);
1177   }
1178   PetscFunctionReturn(0);
1179 }
1180 
1181 #undef __FUNCT__
1182 #define __FUNCT__ "MatCreateMPIDense"
1183 /*@C
1184    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
1185 
1186    Collective on MPI_Comm
1187 
1188    Input Parameters:
1189 +  comm - MPI communicator
1190 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1191 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1192 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1193 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1194 -  data - optional location of matrix data.  Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1195    to control all matrix memory allocation.
1196 
1197    Output Parameter:
1198 .  A - the matrix
1199 
1200    Notes:
1201    The dense format is fully compatible with standard Fortran 77
1202    storage by columns.
1203 
1204    The data input variable is intended primarily for Fortran programmers
1205    who wish to allocate their own matrix memory space.  Most users should
1206    set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users).
1207 
1208    The user MUST specify either the local or global matrix dimensions
1209    (possibly both).
1210 
1211    Level: intermediate
1212 
1213 .keywords: matrix,dense, parallel
1214 
1215 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1216 @*/
1217 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1218 {
1219   PetscErrorCode ierr;
1220   PetscMPIInt    size;
1221 
1222   PetscFunctionBegin;
1223   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1224   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1225   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1226   if (size > 1) {
1227     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1228     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1229   } else {
1230     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1231     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1232   }
1233   PetscFunctionReturn(0);
1234 }
1235 
1236 #undef __FUNCT__
1237 #define __FUNCT__ "MatDuplicate_MPIDense"
1238 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1239 {
1240   Mat          mat;
1241   Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data;
1242   PetscErrorCode ierr;
1243 
1244   PetscFunctionBegin;
1245   *newmat       = 0;
1246   ierr = MatCreate(A->comm,&mat);CHKERRQ(ierr);
1247   ierr = MatSetSizes(mat,A->m,A->n,A->M,A->N);CHKERRQ(ierr);
1248   ierr = MatSetType(mat,A->type_name);CHKERRQ(ierr);
1249   ierr              = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr);
1250   mat->data         = (void*)a;
1251   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1252   mat->factor       = A->factor;
1253   mat->assembled    = PETSC_TRUE;
1254   mat->preallocated = PETSC_TRUE;
1255 
1256   a->rstart       = oldmat->rstart;
1257   a->rend         = oldmat->rend;
1258   a->size         = oldmat->size;
1259   a->rank         = oldmat->rank;
1260   mat->insertmode = NOT_SET_VALUES;
1261   a->nvec         = oldmat->nvec;
1262   a->donotstash   = oldmat->donotstash;
1263   ierr            = PetscMalloc((a->size+1)*sizeof(PetscInt),&a->rowners);CHKERRQ(ierr);
1264   ierr = PetscLogObjectMemory(mat,(a->size+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));CHKERRQ(ierr);
1265   ierr = PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
1266   ierr = MatStashCreate_Private(A->comm,1,&mat->stash);CHKERRQ(ierr);
1267 
1268   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1269   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1270   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1271   *newmat = mat;
1272   PetscFunctionReturn(0);
1273 }
1274 
1275 #include "petscsys.h"
1276 
1277 #undef __FUNCT__
1278 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile"
1279 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N, MatType type,Mat *newmat)
1280 {
1281   PetscErrorCode ierr;
1282   PetscMPIInt    rank,size;
1283   PetscInt            *rowners,i,m,nz,j;
1284   PetscScalar    *array,*vals,*vals_ptr;
1285   MPI_Status     status;
1286 
1287   PetscFunctionBegin;
1288   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1289   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1290 
1291   /* determine ownership of all rows */
1292   m          = M/size + ((M % size) > rank);
1293   ierr       = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1294   ierr       = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
1295   rowners[0] = 0;
1296   for (i=2; i<=size; i++) {
1297     rowners[i] += rowners[i-1];
1298   }
1299 
1300   ierr = MatCreate(comm,newmat);CHKERRQ(ierr);
1301   ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1302   ierr = MatSetType(*newmat,type);CHKERRQ(ierr);
1303   ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr);
1304   ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr);
1305 
1306   if (!rank) {
1307     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1308 
1309     /* read in my part of the matrix numerical values  */
1310     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1311 
1312     /* insert into matrix-by row (this is why cannot directly read into array */
1313     vals_ptr = vals;
1314     for (i=0; i<m; i++) {
1315       for (j=0; j<N; j++) {
1316         array[i + j*m] = *vals_ptr++;
1317       }
1318     }
1319 
1320     /* read in other processors and ship out */
1321     for (i=1; i<size; i++) {
1322       nz   = (rowners[i+1] - rowners[i])*N;
1323       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1324       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr);
1325     }
1326   } else {
1327     /* receive numeric values */
1328     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1329 
1330     /* receive message of values*/
1331     ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr);
1332 
1333     /* insert into matrix-by row (this is why cannot directly read into array */
1334     vals_ptr = vals;
1335     for (i=0; i<m; i++) {
1336       for (j=0; j<N; j++) {
1337         array[i + j*m] = *vals_ptr++;
1338       }
1339     }
1340   }
1341   ierr = PetscFree(rowners);CHKERRQ(ierr);
1342   ierr = PetscFree(vals);CHKERRQ(ierr);
1343   ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1344   ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1345   PetscFunctionReturn(0);
1346 }
1347 
1348 #undef __FUNCT__
1349 #define __FUNCT__ "MatLoad_MPIDense"
1350 PetscErrorCode MatLoad_MPIDense(PetscViewer viewer, MatType type,Mat *newmat)
1351 {
1352   Mat            A;
1353   PetscScalar    *vals,*svals;
1354   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1355   MPI_Status     status;
1356   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz;
1357   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1358   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1359   PetscInt       i,nz,j,rstart,rend;
1360   int            fd;
1361   PetscErrorCode ierr;
1362 
1363   PetscFunctionBegin;
1364   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1365   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1366   if (!rank) {
1367     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1368     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1369     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1370   }
1371 
1372   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1373   M = header[1]; N = header[2]; nz = header[3];
1374 
1375   /*
1376        Handle case where matrix is stored on disk as a dense matrix
1377   */
1378   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1379     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,type,newmat);CHKERRQ(ierr);
1380     PetscFunctionReturn(0);
1381   }
1382 
1383   /* determine ownership of all rows */
1384   m          = M/size + ((M % size) > rank);
1385   ierr       = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1386   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1387   rowners[0] = 0;
1388   for (i=2; i<=size; i++) {
1389     rowners[i] += rowners[i-1];
1390   }
1391   rstart = rowners[rank];
1392   rend   = rowners[rank+1];
1393 
1394   /* distribute row lengths to all processors */
1395   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&ourlens);CHKERRQ(ierr);
1396   offlens = ourlens + (rend-rstart);
1397   if (!rank) {
1398     ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
1399     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1400     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
1401     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1402     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1403     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1404   } else {
1405     ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1406   }
1407 
1408   if (!rank) {
1409     /* calculate the number of nonzeros on each processor */
1410     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
1411     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1412     for (i=0; i<size; i++) {
1413       for (j=rowners[i]; j< rowners[i+1]; j++) {
1414         procsnz[i] += rowlengths[j];
1415       }
1416     }
1417     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1418 
1419     /* determine max buffer needed and allocate it */
1420     maxnz = 0;
1421     for (i=0; i<size; i++) {
1422       maxnz = PetscMax(maxnz,procsnz[i]);
1423     }
1424     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1425 
1426     /* read in my part of the matrix column indices  */
1427     nz = procsnz[0];
1428     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1429     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1430 
1431     /* read in every one elses and ship off */
1432     for (i=1; i<size; i++) {
1433       nz   = procsnz[i];
1434       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1435       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1436     }
1437     ierr = PetscFree(cols);CHKERRQ(ierr);
1438   } else {
1439     /* determine buffer space needed for message */
1440     nz = 0;
1441     for (i=0; i<m; i++) {
1442       nz += ourlens[i];
1443     }
1444     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1445 
1446     /* receive message of column indices*/
1447     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1448     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1449     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1450   }
1451 
1452   /* loop over local rows, determining number of off diagonal entries */
1453   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
1454   jj = 0;
1455   for (i=0; i<m; i++) {
1456     for (j=0; j<ourlens[i]; j++) {
1457       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1458       jj++;
1459     }
1460   }
1461 
1462   /* create our matrix */
1463   for (i=0; i<m; i++) {
1464     ourlens[i] -= offlens[i];
1465   }
1466   ierr = MatCreate(comm,newmat);CHKERRQ(ierr);
1467   ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1468   ierr = MatSetType(*newmat,type);CHKERRQ(ierr);
1469   ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr);
1470   A = *newmat;
1471   for (i=0; i<m; i++) {
1472     ourlens[i] += offlens[i];
1473   }
1474 
1475   if (!rank) {
1476     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1477 
1478     /* read in my part of the matrix numerical values  */
1479     nz = procsnz[0];
1480     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1481 
1482     /* insert into matrix */
1483     jj      = rstart;
1484     smycols = mycols;
1485     svals   = vals;
1486     for (i=0; i<m; i++) {
1487       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1488       smycols += ourlens[i];
1489       svals   += ourlens[i];
1490       jj++;
1491     }
1492 
1493     /* read in other processors and ship out */
1494     for (i=1; i<size; i++) {
1495       nz   = procsnz[i];
1496       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1497       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1498     }
1499     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1500   } else {
1501     /* receive numeric values */
1502     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1503 
1504     /* receive message of values*/
1505     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1506     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1507     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1508 
1509     /* insert into matrix */
1510     jj      = rstart;
1511     smycols = mycols;
1512     svals   = vals;
1513     for (i=0; i<m; i++) {
1514       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1515       smycols += ourlens[i];
1516       svals   += ourlens[i];
1517       jj++;
1518     }
1519   }
1520   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1521   ierr = PetscFree(vals);CHKERRQ(ierr);
1522   ierr = PetscFree(mycols);CHKERRQ(ierr);
1523   ierr = PetscFree(rowners);CHKERRQ(ierr);
1524 
1525   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1526   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1527   PetscFunctionReturn(0);
1528 }
1529 
1530 
1531 
1532 
1533 
1534