xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision e68848bdcd5ce88113591298ad148f80b60bb1d6)
1 #define PETSCMAT_DLL
2 
3 /*
4    Basic functions for basic parallel dense matrices.
5 */
6 
7 #include "src/mat/impls/dense/mpi/mpidense.h"    /*I   "petscmat.h"  I*/
8 
9 #undef __FUNCT__
10 #define __FUNCT__ "MatDenseGetLocalMatrix"
11 /*@
12 
13       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
14               matrix that represents the operator. For sequential matrices it returns itself.
15 
16     Input Parameter:
17 .      A - the Seq or MPI dense matrix
18 
19     Output Parameter:
20 .      B - the inner matrix
21 
22     Level: intermediate
23 
24 @*/
25 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
26 {
27   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
28   PetscErrorCode ierr;
29   PetscTruth     flg;
30   PetscMPIInt    size;
31 
32   PetscFunctionBegin;
33   ierr = PetscTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
34   if (!flg) {  /* this check sucks! */
35     ierr = PetscTypeCompare((PetscObject)A,MATDENSE,&flg);CHKERRQ(ierr);
36     if (flg) {
37       ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
38       if (size == 1) flg = PETSC_FALSE;
39     }
40   }
41   if (flg) {
42     *B = mat->A;
43   } else {
44     *B = A;
45   }
46   PetscFunctionReturn(0);
47 }
48 
49 #undef __FUNCT__
50 #define __FUNCT__ "MatGetRow_MPIDense"
51 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
52 {
53   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
54   PetscErrorCode ierr;
55   PetscInt       lrow,rstart = mat->rstart,rend = mat->rend;
56 
57   PetscFunctionBegin;
58   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows")
59   lrow = row - rstart;
60   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr);
61   PetscFunctionReturn(0);
62 }
63 
64 #undef __FUNCT__
65 #define __FUNCT__ "MatRestoreRow_MPIDense"
66 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
67 {
68   PetscErrorCode ierr;
69 
70   PetscFunctionBegin;
71   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
72   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
73   PetscFunctionReturn(0);
74 }
75 
76 EXTERN_C_BEGIN
77 #undef __FUNCT__
78 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense"
79 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B)
80 {
81   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
82   PetscErrorCode ierr;
83   PetscInt       m = A->m,rstart = mdn->rstart;
84   PetscScalar    *array;
85   MPI_Comm       comm;
86 
87   PetscFunctionBegin;
88   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported.");
89 
90   /* The reuse aspect is not implemented efficiently */
91   if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);}
92 
93   ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
94   ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr);
95   ierr = MatCreate(comm,B);CHKERRQ(ierr);
96   ierr = MatSetSizes(*B,m,m,m,m);CHKERRQ(ierr);
97   ierr = MatSetType(*B,mdn->A->type_name);CHKERRQ(ierr);
98   ierr = MatSeqDenseSetPreallocation(*B,array+m*rstart);CHKERRQ(ierr);
99   ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr);
100   ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
101   ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
102 
103   *iscopy = PETSC_TRUE;
104   PetscFunctionReturn(0);
105 }
106 EXTERN_C_END
107 
108 #undef __FUNCT__
109 #define __FUNCT__ "MatSetValues_MPIDense"
110 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
111 {
112   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
113   PetscErrorCode ierr;
114   PetscInt       i,j,rstart = A->rstart,rend = A->rend,row;
115   PetscTruth     roworiented = A->roworiented;
116 
117   PetscFunctionBegin;
118   for (i=0; i<m; i++) {
119     if (idxm[i] < 0) continue;
120     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
121     if (idxm[i] >= rstart && idxm[i] < rend) {
122       row = idxm[i] - rstart;
123       if (roworiented) {
124         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
125       } else {
126         for (j=0; j<n; j++) {
127           if (idxn[j] < 0) continue;
128           if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
129           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
130         }
131       }
132     } else {
133       if (!A->donotstash) {
134         if (roworiented) {
135           ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr);
136         } else {
137           ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr);
138         }
139       }
140     }
141   }
142   PetscFunctionReturn(0);
143 }
144 
145 #undef __FUNCT__
146 #define __FUNCT__ "MatGetValues_MPIDense"
147 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
148 {
149   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
150   PetscErrorCode ierr;
151   PetscInt       i,j,rstart = mdn->rstart,rend = mdn->rend,row;
152 
153   PetscFunctionBegin;
154   for (i=0; i<m; i++) {
155     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
156     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
157     if (idxm[i] >= rstart && idxm[i] < rend) {
158       row = idxm[i] - rstart;
159       for (j=0; j<n; j++) {
160         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
161         if (idxn[j] >= mat->N) {
162           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
163         }
164         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
165       }
166     } else {
167       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
168     }
169   }
170   PetscFunctionReturn(0);
171 }
172 
173 #undef __FUNCT__
174 #define __FUNCT__ "MatGetArray_MPIDense"
175 PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[])
176 {
177   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
178   PetscErrorCode ierr;
179 
180   PetscFunctionBegin;
181   ierr = MatGetArray(a->A,array);CHKERRQ(ierr);
182   PetscFunctionReturn(0);
183 }
184 
185 #undef __FUNCT__
186 #define __FUNCT__ "MatGetSubMatrix_MPIDense"
187 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B)
188 {
189   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data,*newmatd;
190   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
191   PetscErrorCode ierr;
192   PetscInt       i,j,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols;
193   PetscScalar    *av,*bv,*v = lmat->v;
194   Mat            newmat;
195 
196   PetscFunctionBegin;
197   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
198   ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr);
199   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
200   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
201 
202   /* No parallel redistribution currently supported! Should really check each index set
203      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
204      original matrix! */
205 
206   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
207   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
208 
209   /* Check submatrix call */
210   if (scall == MAT_REUSE_MATRIX) {
211     /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
212     /* Really need to test rows and column sizes! */
213     newmat = *B;
214   } else {
215     /* Create and fill new matrix */
216     ierr = MatCreate(A->comm,&newmat);CHKERRQ(ierr);
217     ierr = MatSetSizes(newmat,nrows,cs,PETSC_DECIDE,ncols);CHKERRQ(ierr);
218     ierr = MatSetType(newmat,A->type_name);CHKERRQ(ierr);
219     ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr);
220   }
221 
222   /* Now extract the data pointers and do the copy, column at a time */
223   newmatd = (Mat_MPIDense*)newmat->data;
224   bv      = ((Mat_SeqDense *)newmatd->A->data)->v;
225 
226   for (i=0; i<ncols; i++) {
227     av = v + nlrows*icol[i];
228     for (j=0; j<nrows; j++) {
229       *bv++ = av[irow[j] - rstart];
230     }
231   }
232 
233   /* Assemble the matrices so that the correct flags are set */
234   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
235   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
236 
237   /* Free work space */
238   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
239   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
240   *B = newmat;
241   PetscFunctionReturn(0);
242 }
243 
244 #undef __FUNCT__
245 #define __FUNCT__ "MatRestoreArray_MPIDense"
246 PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[])
247 {
248   PetscFunctionBegin;
249   PetscFunctionReturn(0);
250 }
251 
252 #undef __FUNCT__
253 #define __FUNCT__ "MatAssemblyBegin_MPIDense"
254 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
255 {
256   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
257   MPI_Comm       comm = mat->comm;
258   PetscErrorCode ierr;
259   PetscInt       nstash,reallocs;
260   InsertMode     addv;
261 
262   PetscFunctionBegin;
263   /* make sure all processors are either in INSERTMODE or ADDMODE */
264   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
265   if (addv == (ADD_VALUES|INSERT_VALUES)) {
266     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
267   }
268   mat->insertmode = addv; /* in case this processor had no cache */
269 
270   ierr = MatStashScatterBegin_Private(&mat->stash,mdn->rowners);CHKERRQ(ierr);
271   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
272   ierr = PetscLogInfo((mdn->A,"MatAssemblyBegin_MPIDense:Stash has %D entries, uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr);
273   PetscFunctionReturn(0);
274 }
275 
276 #undef __FUNCT__
277 #define __FUNCT__ "MatAssemblyEnd_MPIDense"
278 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
279 {
280   Mat_MPIDense    *mdn=(Mat_MPIDense*)mat->data;
281   PetscErrorCode  ierr;
282   PetscInt        i,*row,*col,flg,j,rstart,ncols;
283   PetscMPIInt     n;
284   PetscScalar     *val;
285   InsertMode      addv=mat->insertmode;
286 
287   PetscFunctionBegin;
288   /*  wait on receives */
289   while (1) {
290     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
291     if (!flg) break;
292 
293     for (i=0; i<n;) {
294       /* Now identify the consecutive vals belonging to the same row */
295       for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
296       if (j < n) ncols = j-i;
297       else       ncols = n-i;
298       /* Now assemble all these values with a single function call */
299       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
300       i = j;
301     }
302   }
303   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
304 
305   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
306   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
307 
308   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
309     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
310   }
311   PetscFunctionReturn(0);
312 }
313 
314 #undef __FUNCT__
315 #define __FUNCT__ "MatZeroEntries_MPIDense"
316 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
317 {
318   PetscErrorCode ierr;
319   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
320 
321   PetscFunctionBegin;
322   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
323   PetscFunctionReturn(0);
324 }
325 
326 /* the code does not do the diagonal entries correctly unless the
327    matrix is square and the column and row owerships are identical.
328    This is a BUG. The only way to fix it seems to be to access
329    mdn->A and mdn->B directly and not through the MatZeroRows()
330    routine.
331 */
332 #undef __FUNCT__
333 #define __FUNCT__ "MatZeroRows_MPIDense"
334 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
335 {
336   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
337   PetscErrorCode ierr;
338   PetscInt       i,*owners = l->rowners;
339   PetscInt       *nprocs,j,idx,nsends;
340   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
341   PetscInt       *rvalues,tag = A->tag,count,base,slen,*source;
342   PetscInt       *lens,*lrows,*values;
343   PetscMPIInt    n,imdex,rank = l->rank,size = l->size;
344   MPI_Comm       comm = A->comm;
345   MPI_Request    *send_waits,*recv_waits;
346   MPI_Status     recv_status,*send_status;
347   PetscTruth     found;
348 
349   PetscFunctionBegin;
350   /*  first count number of contributors to each processor */
351   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
352   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
353   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
354   for (i=0; i<N; i++) {
355     idx = rows[i];
356     found = PETSC_FALSE;
357     for (j=0; j<size; j++) {
358       if (idx >= owners[j] && idx < owners[j+1]) {
359         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
360       }
361     }
362     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
363   }
364   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
365 
366   /* inform other processors of number of messages and max length*/
367   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
368 
369   /* post receives:   */
370   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
371   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
372   for (i=0; i<nrecvs; i++) {
373     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
374   }
375 
376   /* do sends:
377       1) starts[i] gives the starting index in svalues for stuff going to
378          the ith processor
379   */
380   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
381   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
382   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
383   starts[0]  = 0;
384   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
385   for (i=0; i<N; i++) {
386     svalues[starts[owner[i]]++] = rows[i];
387   }
388 
389   starts[0] = 0;
390   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
391   count = 0;
392   for (i=0; i<size; i++) {
393     if (nprocs[2*i+1]) {
394       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
395     }
396   }
397   ierr = PetscFree(starts);CHKERRQ(ierr);
398 
399   base = owners[rank];
400 
401   /*  wait on receives */
402   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
403   source = lens + nrecvs;
404   count  = nrecvs; slen = 0;
405   while (count) {
406     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
407     /* unpack receives into our local space */
408     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
409     source[imdex]  = recv_status.MPI_SOURCE;
410     lens[imdex]    = n;
411     slen += n;
412     count--;
413   }
414   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
415 
416   /* move the data into the send scatter */
417   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
418   count = 0;
419   for (i=0; i<nrecvs; i++) {
420     values = rvalues + i*nmax;
421     for (j=0; j<lens[i]; j++) {
422       lrows[count++] = values[j] - base;
423     }
424   }
425   ierr = PetscFree(rvalues);CHKERRQ(ierr);
426   ierr = PetscFree(lens);CHKERRQ(ierr);
427   ierr = PetscFree(owner);CHKERRQ(ierr);
428   ierr = PetscFree(nprocs);CHKERRQ(ierr);
429 
430   /* actually zap the local rows */
431   ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
432   ierr = PetscFree(lrows);CHKERRQ(ierr);
433 
434   /* wait on sends */
435   if (nsends) {
436     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
437     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
438     ierr = PetscFree(send_status);CHKERRQ(ierr);
439   }
440   ierr = PetscFree(send_waits);CHKERRQ(ierr);
441   ierr = PetscFree(svalues);CHKERRQ(ierr);
442 
443   PetscFunctionReturn(0);
444 }
445 
446 #undef __FUNCT__
447 #define __FUNCT__ "MatMult_MPIDense"
448 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
449 {
450   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
451   PetscErrorCode ierr;
452 
453   PetscFunctionBegin;
454   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
455   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
456   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
457   PetscFunctionReturn(0);
458 }
459 
460 #undef __FUNCT__
461 #define __FUNCT__ "MatMultAdd_MPIDense"
462 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
463 {
464   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
465   PetscErrorCode ierr;
466 
467   PetscFunctionBegin;
468   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
469   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
470   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
471   PetscFunctionReturn(0);
472 }
473 
474 #undef __FUNCT__
475 #define __FUNCT__ "MatMultTranspose_MPIDense"
476 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
477 {
478   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
479   PetscErrorCode ierr;
480   PetscScalar    zero = 0.0;
481 
482   PetscFunctionBegin;
483   ierr = VecSet(yy,zero);CHKERRQ(ierr);
484   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
485   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
486   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
487   PetscFunctionReturn(0);
488 }
489 
490 #undef __FUNCT__
491 #define __FUNCT__ "MatMultTransposeAdd_MPIDense"
492 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
493 {
494   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
495   PetscErrorCode ierr;
496 
497   PetscFunctionBegin;
498   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
499   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
500   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
501   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
502   PetscFunctionReturn(0);
503 }
504 
505 #undef __FUNCT__
506 #define __FUNCT__ "MatGetDiagonal_MPIDense"
507 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
508 {
509   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
510   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
511   PetscErrorCode ierr;
512   PetscInt       len,i,n,m = A->m,radd;
513   PetscScalar    *x,zero = 0.0;
514 
515   PetscFunctionBegin;
516   ierr = VecSet(v,zero);CHKERRQ(ierr);
517   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
518   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
519   if (n != A->M) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
520   len  = PetscMin(a->A->m,a->A->n);
521   radd = a->rstart*m;
522   for (i=0; i<len; i++) {
523     x[i] = aloc->v[radd + i*m + i];
524   }
525   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
526   PetscFunctionReturn(0);
527 }
528 
529 #undef __FUNCT__
530 #define __FUNCT__ "MatDestroy_MPIDense"
531 PetscErrorCode MatDestroy_MPIDense(Mat mat)
532 {
533   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
534   PetscErrorCode ierr;
535 
536   PetscFunctionBegin;
537 
538 #if defined(PETSC_USE_LOG)
539   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->M,mat->N);
540 #endif
541   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
542   ierr = PetscFree(mdn->rowners);CHKERRQ(ierr);
543   ierr = MatDestroy(mdn->A);CHKERRQ(ierr);
544   if (mdn->lvec)   VecDestroy(mdn->lvec);
545   if (mdn->Mvctx)  VecScatterDestroy(mdn->Mvctx);
546   if (mdn->factor) {
547     if (mdn->factor->temp)   {ierr = PetscFree(mdn->factor->temp);CHKERRQ(ierr);}
548     if (mdn->factor->tag)    {ierr = PetscFree(mdn->factor->tag);CHKERRQ(ierr);}
549     if (mdn->factor->pivots) {ierr = PetscFree(mdn->factor->pivots);CHKERRQ(ierr);}
550     ierr = PetscFree(mdn->factor);CHKERRQ(ierr);
551   }
552   ierr = PetscFree(mdn);CHKERRQ(ierr);
553   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
554   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
555   PetscFunctionReturn(0);
556 }
557 
558 #undef __FUNCT__
559 #define __FUNCT__ "MatView_MPIDense_Binary"
560 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
561 {
562   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
563   PetscErrorCode ierr;
564 
565   PetscFunctionBegin;
566   if (mdn->size == 1) {
567     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
568   }
569   else SETERRQ(PETSC_ERR_SUP,"Only uniprocessor output supported");
570   PetscFunctionReturn(0);
571 }
572 
573 #undef __FUNCT__
574 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket"
575 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
576 {
577   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
578   PetscErrorCode    ierr;
579   PetscMPIInt       size = mdn->size,rank = mdn->rank;
580   PetscViewerType   vtype;
581   PetscTruth        iascii,isdraw;
582   PetscViewer       sviewer;
583   PetscViewerFormat format;
584 
585   PetscFunctionBegin;
586   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
587   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
588   if (iascii) {
589     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
590     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
591     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
592       MatInfo info;
593       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
594       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->m,
595                    (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
596       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
597       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
598       PetscFunctionReturn(0);
599     } else if (format == PETSC_VIEWER_ASCII_INFO) {
600       PetscFunctionReturn(0);
601     }
602   } else if (isdraw) {
603     PetscDraw       draw;
604     PetscTruth isnull;
605 
606     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
607     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
608     if (isnull) PetscFunctionReturn(0);
609   }
610 
611   if (size == 1) {
612     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
613   } else {
614     /* assemble the entire matrix onto first processor. */
615     Mat         A;
616     PetscInt    M = mat->M,N = mat->N,m,row,i,nz;
617     PetscInt    *cols;
618     PetscScalar *vals;
619 
620     ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr);
621     if (!rank) {
622       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
623     } else {
624       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
625     }
626     /* Since this is a temporary matrix, MATMPIDENSE instead of A->type_name here is probably acceptable. */
627     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
628     ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL);
629     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
630 
631     /* Copy the matrix ... This isn't the most efficient means,
632        but it's quick for now */
633     A->insertmode = INSERT_VALUES;
634     row = mdn->rstart; m = mdn->A->m;
635     for (i=0; i<m; i++) {
636       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
637       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
638       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
639       row++;
640     }
641 
642     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
643     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
644     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
645     if (!rank) {
646       ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
647     }
648     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
649     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
650     ierr = MatDestroy(A);CHKERRQ(ierr);
651   }
652   PetscFunctionReturn(0);
653 }
654 
655 #undef __FUNCT__
656 #define __FUNCT__ "MatView_MPIDense"
657 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
658 {
659   PetscErrorCode ierr;
660   PetscTruth     iascii,isbinary,isdraw,issocket;
661 
662   PetscFunctionBegin;
663 
664   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
665   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
666   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
667   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
668 
669   if (iascii || issocket || isdraw) {
670     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
671   } else if (isbinary) {
672     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
673   } else {
674     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name);
675   }
676   PetscFunctionReturn(0);
677 }
678 
679 #undef __FUNCT__
680 #define __FUNCT__ "MatGetInfo_MPIDense"
681 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
682 {
683   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
684   Mat            mdn = mat->A;
685   PetscErrorCode ierr;
686   PetscReal      isend[5],irecv[5];
687 
688   PetscFunctionBegin;
689   info->rows_global    = (double)A->M;
690   info->columns_global = (double)A->N;
691   info->rows_local     = (double)A->m;
692   info->columns_local  = (double)A->N;
693   info->block_size     = 1.0;
694   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
695   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
696   isend[3] = info->memory;  isend[4] = info->mallocs;
697   if (flag == MAT_LOCAL) {
698     info->nz_used      = isend[0];
699     info->nz_allocated = isend[1];
700     info->nz_unneeded  = isend[2];
701     info->memory       = isend[3];
702     info->mallocs      = isend[4];
703   } else if (flag == MAT_GLOBAL_MAX) {
704     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr);
705     info->nz_used      = irecv[0];
706     info->nz_allocated = irecv[1];
707     info->nz_unneeded  = irecv[2];
708     info->memory       = irecv[3];
709     info->mallocs      = irecv[4];
710   } else if (flag == MAT_GLOBAL_SUM) {
711     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr);
712     info->nz_used      = irecv[0];
713     info->nz_allocated = irecv[1];
714     info->nz_unneeded  = irecv[2];
715     info->memory       = irecv[3];
716     info->mallocs      = irecv[4];
717   }
718   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
719   info->fill_ratio_needed = 0;
720   info->factor_mallocs    = 0;
721   PetscFunctionReturn(0);
722 }
723 
724 #undef __FUNCT__
725 #define __FUNCT__ "MatSetOption_MPIDense"
726 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op)
727 {
728   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
729   PetscErrorCode ierr;
730 
731   PetscFunctionBegin;
732   switch (op) {
733   case MAT_NO_NEW_NONZERO_LOCATIONS:
734   case MAT_YES_NEW_NONZERO_LOCATIONS:
735   case MAT_NEW_NONZERO_LOCATION_ERR:
736   case MAT_NEW_NONZERO_ALLOCATION_ERR:
737   case MAT_COLUMNS_SORTED:
738   case MAT_COLUMNS_UNSORTED:
739     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
740     break;
741   case MAT_ROW_ORIENTED:
742     a->roworiented = PETSC_TRUE;
743     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
744     break;
745   case MAT_ROWS_SORTED:
746   case MAT_ROWS_UNSORTED:
747   case MAT_YES_NEW_DIAGONALS:
748   case MAT_USE_HASH_TABLE:
749     ierr = PetscLogInfo((A,"MatSetOption_MPIDense:Option ignored\n"));CHKERRQ(ierr);
750     break;
751   case MAT_COLUMN_ORIENTED:
752     a->roworiented = PETSC_FALSE;
753     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
754     break;
755   case MAT_IGNORE_OFF_PROC_ENTRIES:
756     a->donotstash = PETSC_TRUE;
757     break;
758   case MAT_NO_NEW_DIAGONALS:
759     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
760   case MAT_SYMMETRIC:
761   case MAT_STRUCTURALLY_SYMMETRIC:
762   case MAT_NOT_SYMMETRIC:
763   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
764   case MAT_HERMITIAN:
765   case MAT_NOT_HERMITIAN:
766   case MAT_SYMMETRY_ETERNAL:
767   case MAT_NOT_SYMMETRY_ETERNAL:
768     break;
769   default:
770     SETERRQ(PETSC_ERR_SUP,"unknown option");
771   }
772   PetscFunctionReturn(0);
773 }
774 
775 
776 #undef __FUNCT__
777 #define __FUNCT__ "MatDiagonalScale_MPIDense"
778 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
779 {
780   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
781   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
782   PetscScalar    *l,*r,x,*v;
783   PetscErrorCode ierr;
784   PetscInt       i,j,s2a,s3a,s2,s3,m=mdn->A->m,n=mdn->A->n;
785 
786   PetscFunctionBegin;
787   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
788   if (ll) {
789     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
790     if (s2a != s2) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size");
791     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
792     for (i=0; i<m; i++) {
793       x = l[i];
794       v = mat->v + i;
795       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
796     }
797     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
798     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
799   }
800   if (rr) {
801     ierr = VecGetSize(rr,&s3a);CHKERRQ(ierr);
802     if (s3a != s3) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size");
803     ierr = VecScatterBegin(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
804     ierr = VecScatterEnd(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
805     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
806     for (i=0; i<n; i++) {
807       x = r[i];
808       v = mat->v + i*m;
809       for (j=0; j<m; j++) { (*v++) *= x;}
810     }
811     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
812     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
813   }
814   PetscFunctionReturn(0);
815 }
816 
817 #undef __FUNCT__
818 #define __FUNCT__ "MatNorm_MPIDense"
819 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
820 {
821   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
822   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
823   PetscErrorCode ierr;
824   PetscInt       i,j;
825   PetscReal      sum = 0.0;
826   PetscScalar    *v = mat->v;
827 
828   PetscFunctionBegin;
829   if (mdn->size == 1) {
830     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
831   } else {
832     if (type == NORM_FROBENIUS) {
833       for (i=0; i<mdn->A->n*mdn->A->m; i++) {
834 #if defined(PETSC_USE_COMPLEX)
835         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
836 #else
837         sum += (*v)*(*v); v++;
838 #endif
839       }
840       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr);
841       *nrm = sqrt(*nrm);
842       ierr = PetscLogFlops(2*mdn->A->n*mdn->A->m);CHKERRQ(ierr);
843     } else if (type == NORM_1) {
844       PetscReal *tmp,*tmp2;
845       ierr = PetscMalloc(2*A->N*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
846       tmp2 = tmp + A->N;
847       ierr = PetscMemzero(tmp,2*A->N*sizeof(PetscReal));CHKERRQ(ierr);
848       *nrm = 0.0;
849       v = mat->v;
850       for (j=0; j<mdn->A->n; j++) {
851         for (i=0; i<mdn->A->m; i++) {
852           tmp[j] += PetscAbsScalar(*v);  v++;
853         }
854       }
855       ierr = MPI_Allreduce(tmp,tmp2,A->N,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr);
856       for (j=0; j<A->N; j++) {
857         if (tmp2[j] > *nrm) *nrm = tmp2[j];
858       }
859       ierr = PetscFree(tmp);CHKERRQ(ierr);
860       ierr = PetscLogFlops(A->n*A->m);CHKERRQ(ierr);
861     } else if (type == NORM_INFINITY) { /* max row norm */
862       PetscReal ntemp;
863       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
864       ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr);
865     } else {
866       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
867     }
868   }
869   PetscFunctionReturn(0);
870 }
871 
872 #undef __FUNCT__
873 #define __FUNCT__ "MatTranspose_MPIDense"
874 PetscErrorCode MatTranspose_MPIDense(Mat A,Mat *matout)
875 {
876   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
877   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
878   Mat            B;
879   PetscInt       M = A->M,N = A->N,m,n,*rwork,rstart = a->rstart;
880   PetscErrorCode ierr;
881   PetscInt       j,i;
882   PetscScalar    *v;
883 
884   PetscFunctionBegin;
885   if (!matout && M != N) {
886     SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place");
887   }
888   ierr = MatCreate(A->comm,&B);CHKERRQ(ierr);
889   ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,N,M);CHKERRQ(ierr);
890   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
891   ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr);
892 
893   m = a->A->m; n = a->A->n; v = Aloc->v;
894   ierr = PetscMalloc(n*sizeof(PetscInt),&rwork);CHKERRQ(ierr);
895   for (j=0; j<n; j++) {
896     for (i=0; i<m; i++) rwork[i] = rstart + i;
897     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
898     v   += m;
899   }
900   ierr = PetscFree(rwork);CHKERRQ(ierr);
901   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
902   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
903   if (matout) {
904     *matout = B;
905   } else {
906     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
907   }
908   PetscFunctionReturn(0);
909 }
910 
911 #include "petscblaslapack.h"
912 #undef __FUNCT__
913 #define __FUNCT__ "MatScale_MPIDense"
914 PetscErrorCode MatScale_MPIDense(Mat inA,PetscScalar alpha)
915 {
916   Mat_MPIDense   *A = (Mat_MPIDense*)inA->data;
917   Mat_SeqDense   *a = (Mat_SeqDense*)A->A->data;
918   PetscScalar    oalpha = alpha;
919   PetscBLASInt   one = 1,nz = (PetscBLASInt)inA->m*inA->N;
920   PetscErrorCode ierr;
921 
922   PetscFunctionBegin;
923   BLASscal_(&nz,&oalpha,a->v,&one);
924   ierr = PetscLogFlops(nz);CHKERRQ(ierr);
925   PetscFunctionReturn(0);
926 }
927 
928 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
929 
930 #undef __FUNCT__
931 #define __FUNCT__ "MatSetUpPreallocation_MPIDense"
932 PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A)
933 {
934   PetscErrorCode ierr;
935 
936   PetscFunctionBegin;
937   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
938   PetscFunctionReturn(0);
939 }
940 
941 /* -------------------------------------------------------------------*/
942 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
943        MatGetRow_MPIDense,
944        MatRestoreRow_MPIDense,
945        MatMult_MPIDense,
946 /* 4*/ MatMultAdd_MPIDense,
947        MatMultTranspose_MPIDense,
948        MatMultTransposeAdd_MPIDense,
949        0,
950        0,
951        0,
952 /*10*/ 0,
953        0,
954        0,
955        0,
956        MatTranspose_MPIDense,
957 /*15*/ MatGetInfo_MPIDense,
958        0,
959        MatGetDiagonal_MPIDense,
960        MatDiagonalScale_MPIDense,
961        MatNorm_MPIDense,
962 /*20*/ MatAssemblyBegin_MPIDense,
963        MatAssemblyEnd_MPIDense,
964        0,
965        MatSetOption_MPIDense,
966        MatZeroEntries_MPIDense,
967 /*25*/ MatZeroRows_MPIDense,
968        0,
969        0,
970        0,
971        0,
972 /*30*/ MatSetUpPreallocation_MPIDense,
973        0,
974        0,
975        MatGetArray_MPIDense,
976        MatRestoreArray_MPIDense,
977 /*35*/ MatDuplicate_MPIDense,
978        0,
979        0,
980        0,
981        0,
982 /*40*/ 0,
983        MatGetSubMatrices_MPIDense,
984        0,
985        MatGetValues_MPIDense,
986        0,
987 /*45*/ 0,
988        MatScale_MPIDense,
989        0,
990        0,
991        0,
992 /*50*/ 0,
993        0,
994        0,
995        0,
996        0,
997 /*55*/ 0,
998        0,
999        0,
1000        0,
1001        0,
1002 /*60*/ MatGetSubMatrix_MPIDense,
1003        MatDestroy_MPIDense,
1004        MatView_MPIDense,
1005        MatGetPetscMaps_Petsc,
1006        0,
1007 /*65*/ 0,
1008        0,
1009        0,
1010        0,
1011        0,
1012 /*70*/ 0,
1013        0,
1014        0,
1015        0,
1016        0,
1017 /*75*/ 0,
1018        0,
1019        0,
1020        0,
1021        0,
1022 /*80*/ 0,
1023        0,
1024        0,
1025        0,
1026 /*84*/ MatLoad_MPIDense,
1027        0,
1028        0,
1029        0,
1030        0,
1031        0,
1032 /*90*/ 0,
1033        0,
1034        0,
1035        0,
1036        0,
1037 /*95*/ 0,
1038        0,
1039        0,
1040        0};
1041 
1042 EXTERN_C_BEGIN
1043 #undef __FUNCT__
1044 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense"
1045 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1046 {
1047   Mat_MPIDense   *a;
1048   PetscErrorCode ierr;
1049 
1050   PetscFunctionBegin;
1051   mat->preallocated = PETSC_TRUE;
1052   /* Note:  For now, when data is specified above, this assumes the user correctly
1053    allocates the local dense storage space.  We should add error checking. */
1054 
1055   a    = (Mat_MPIDense*)mat->data;
1056   ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1057   ierr = MatSetSizes(a->A,mat->m,mat->N,mat->m,mat->N);CHKERRQ(ierr);
1058   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
1059   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1060   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1061   PetscFunctionReturn(0);
1062 }
1063 EXTERN_C_END
1064 
1065 /*MC
1066    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1067 
1068    Options Database Keys:
1069 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions()
1070 
1071   Level: beginner
1072 
1073 .seealso: MatCreateMPIDense
1074 M*/
1075 
1076 EXTERN_C_BEGIN
1077 #undef __FUNCT__
1078 #define __FUNCT__ "MatCreate_MPIDense"
1079 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIDense(Mat mat)
1080 {
1081   Mat_MPIDense   *a;
1082   PetscErrorCode ierr;
1083   PetscInt       i;
1084 
1085   PetscFunctionBegin;
1086   ierr              = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr);
1087   mat->data         = (void*)a;
1088   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1089   mat->factor       = 0;
1090   mat->mapping      = 0;
1091 
1092   a->factor       = 0;
1093   mat->insertmode = NOT_SET_VALUES;
1094   ierr = MPI_Comm_rank(mat->comm,&a->rank);CHKERRQ(ierr);
1095   ierr = MPI_Comm_size(mat->comm,&a->size);CHKERRQ(ierr);
1096 
1097   ierr = PetscSplitOwnership(mat->comm,&mat->m,&mat->M);CHKERRQ(ierr);
1098   ierr = PetscSplitOwnership(mat->comm,&mat->n,&mat->N);CHKERRQ(ierr);
1099   a->nvec = mat->n;
1100 
1101   /* the information in the maps duplicates the information computed below, eventually
1102      we should remove the duplicate information that is not contained in the maps */
1103   ierr = PetscMapCreateMPI(mat->comm,mat->m,mat->M,&mat->rmap);CHKERRQ(ierr);
1104   ierr = PetscMapCreateMPI(mat->comm,mat->n,mat->N,&mat->cmap);CHKERRQ(ierr);
1105 
1106   /* build local table of row and column ownerships */
1107   ierr       = PetscMalloc(2*(a->size+2)*sizeof(PetscInt),&a->rowners);CHKERRQ(ierr);
1108   a->cowners = a->rowners + a->size + 1;
1109   ierr = PetscLogObjectMemory(mat,2*(a->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));CHKERRQ(ierr);
1110   ierr = MPI_Allgather(&mat->m,1,MPIU_INT,a->rowners+1,1,MPIU_INT,mat->comm);CHKERRQ(ierr);
1111   a->rowners[0] = 0;
1112   for (i=2; i<=a->size; i++) {
1113     a->rowners[i] += a->rowners[i-1];
1114   }
1115   a->rstart = a->rowners[a->rank];
1116   a->rend   = a->rowners[a->rank+1];
1117   ierr      = MPI_Allgather(&mat->n,1,MPIU_INT,a->cowners+1,1,MPIU_INT,mat->comm);CHKERRQ(ierr);
1118   a->cowners[0] = 0;
1119   for (i=2; i<=a->size; i++) {
1120     a->cowners[i] += a->cowners[i-1];
1121   }
1122 
1123   /* build cache for off array entries formed */
1124   a->donotstash = PETSC_FALSE;
1125   ierr = MatStashCreate_Private(mat->comm,1,&mat->stash);CHKERRQ(ierr);
1126 
1127   /* stuff used for matrix vector multiply */
1128   a->lvec        = 0;
1129   a->Mvctx       = 0;
1130   a->roworiented = PETSC_TRUE;
1131 
1132   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
1133                                      "MatGetDiagonalBlock_MPIDense",
1134                                      MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr);
1135   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C",
1136                                      "MatMPIDenseSetPreallocation_MPIDense",
1137                                      MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1138   PetscFunctionReturn(0);
1139 }
1140 EXTERN_C_END
1141 
1142 /*MC
1143    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1144 
1145    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1146    and MATMPIDENSE otherwise.
1147 
1148    Options Database Keys:
1149 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1150 
1151   Level: beginner
1152 
1153 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1154 M*/
1155 
1156 EXTERN_C_BEGIN
1157 #undef __FUNCT__
1158 #define __FUNCT__ "MatCreate_Dense"
1159 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_Dense(Mat A)
1160 {
1161   PetscErrorCode ierr;
1162   PetscMPIInt    size;
1163 
1164   PetscFunctionBegin;
1165   ierr = PetscObjectChangeTypeName((PetscObject)A,MATDENSE);CHKERRQ(ierr);
1166   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1167   if (size == 1) {
1168     ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr);
1169   } else {
1170     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
1171   }
1172   PetscFunctionReturn(0);
1173 }
1174 EXTERN_C_END
1175 
1176 #undef __FUNCT__
1177 #define __FUNCT__ "MatMPIDenseSetPreallocation"
1178 /*@C
1179    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1180 
1181    Not collective
1182 
1183    Input Parameters:
1184 .  A - the matrix
1185 -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
1186    to control all matrix memory allocation.
1187 
1188    Notes:
1189    The dense format is fully compatible with standard Fortran 77
1190    storage by columns.
1191 
1192    The data input variable is intended primarily for Fortran programmers
1193    who wish to allocate their own matrix memory space.  Most users should
1194    set data=PETSC_NULL.
1195 
1196    Level: intermediate
1197 
1198 .keywords: matrix,dense, parallel
1199 
1200 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1201 @*/
1202 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data)
1203 {
1204   PetscErrorCode ierr,(*f)(Mat,PetscScalar *);
1205 
1206   PetscFunctionBegin;
1207   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1208   if (f) {
1209     ierr = (*f)(mat,data);CHKERRQ(ierr);
1210   }
1211   PetscFunctionReturn(0);
1212 }
1213 
1214 #undef __FUNCT__
1215 #define __FUNCT__ "MatCreateMPIDense"
1216 /*@C
1217    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
1218 
1219    Collective on MPI_Comm
1220 
1221    Input Parameters:
1222 +  comm - MPI communicator
1223 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1224 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1225 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1226 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1227 -  data - optional location of matrix data.  Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1228    to control all matrix memory allocation.
1229 
1230    Output Parameter:
1231 .  A - the matrix
1232 
1233    Notes:
1234    The dense format is fully compatible with standard Fortran 77
1235    storage by columns.
1236 
1237    The data input variable is intended primarily for Fortran programmers
1238    who wish to allocate their own matrix memory space.  Most users should
1239    set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users).
1240 
1241    The user MUST specify either the local or global matrix dimensions
1242    (possibly both).
1243 
1244    Level: intermediate
1245 
1246 .keywords: matrix,dense, parallel
1247 
1248 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1249 @*/
1250 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1251 {
1252   PetscErrorCode ierr;
1253   PetscMPIInt    size;
1254 
1255   PetscFunctionBegin;
1256   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1257   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1258   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1259   if (size > 1) {
1260     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1261     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1262   } else {
1263     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1264     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1265   }
1266   PetscFunctionReturn(0);
1267 }
1268 
1269 #undef __FUNCT__
1270 #define __FUNCT__ "MatDuplicate_MPIDense"
1271 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1272 {
1273   Mat            mat;
1274   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
1275   PetscErrorCode ierr;
1276 
1277   PetscFunctionBegin;
1278   *newmat       = 0;
1279   ierr = MatCreate(A->comm,&mat);CHKERRQ(ierr);
1280   ierr = MatSetSizes(mat,A->m,A->n,A->M,A->N);CHKERRQ(ierr);
1281   ierr = MatSetType(mat,A->type_name);CHKERRQ(ierr);
1282   ierr              = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr);
1283   mat->data         = (void*)a;
1284   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1285   mat->factor       = A->factor;
1286   mat->assembled    = PETSC_TRUE;
1287   mat->preallocated = PETSC_TRUE;
1288 
1289   a->rstart       = oldmat->rstart;
1290   a->rend         = oldmat->rend;
1291   a->size         = oldmat->size;
1292   a->rank         = oldmat->rank;
1293   mat->insertmode = NOT_SET_VALUES;
1294   a->nvec         = oldmat->nvec;
1295   a->donotstash   = oldmat->donotstash;
1296   ierr            = PetscMalloc((a->size+1)*sizeof(PetscInt),&a->rowners);CHKERRQ(ierr);
1297   ierr = PetscLogObjectMemory(mat,(a->size+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));CHKERRQ(ierr);
1298   ierr = PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
1299   ierr = MatStashCreate_Private(A->comm,1,&mat->stash);CHKERRQ(ierr);
1300 
1301   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1302   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1303   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1304   *newmat = mat;
1305   PetscFunctionReturn(0);
1306 }
1307 
1308 #include "petscsys.h"
1309 
1310 #undef __FUNCT__
1311 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile"
1312 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N, MatType type,Mat *newmat)
1313 {
1314   PetscErrorCode ierr;
1315   PetscMPIInt    rank,size;
1316   PetscInt       *rowners,i,m,nz,j;
1317   PetscScalar    *array,*vals,*vals_ptr;
1318   MPI_Status     status;
1319 
1320   PetscFunctionBegin;
1321   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1322   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1323 
1324   /* determine ownership of all rows */
1325   m          = M/size + ((M % size) > rank);
1326   ierr       = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1327   ierr       = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
1328   rowners[0] = 0;
1329   for (i=2; i<=size; i++) {
1330     rowners[i] += rowners[i-1];
1331   }
1332 
1333   ierr = MatCreate(comm,newmat);CHKERRQ(ierr);
1334   ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1335   ierr = MatSetType(*newmat,type);CHKERRQ(ierr);
1336   ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr);
1337   ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr);
1338 
1339   if (!rank) {
1340     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1341 
1342     /* read in my part of the matrix numerical values  */
1343     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1344 
1345     /* insert into matrix-by row (this is why cannot directly read into array */
1346     vals_ptr = vals;
1347     for (i=0; i<m; i++) {
1348       for (j=0; j<N; j++) {
1349         array[i + j*m] = *vals_ptr++;
1350       }
1351     }
1352 
1353     /* read in other processors and ship out */
1354     for (i=1; i<size; i++) {
1355       nz   = (rowners[i+1] - rowners[i])*N;
1356       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1357       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr);
1358     }
1359   } else {
1360     /* receive numeric values */
1361     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1362 
1363     /* receive message of values*/
1364     ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr);
1365 
1366     /* insert into matrix-by row (this is why cannot directly read into array */
1367     vals_ptr = vals;
1368     for (i=0; i<m; i++) {
1369       for (j=0; j<N; j++) {
1370         array[i + j*m] = *vals_ptr++;
1371       }
1372     }
1373   }
1374   ierr = PetscFree(rowners);CHKERRQ(ierr);
1375   ierr = PetscFree(vals);CHKERRQ(ierr);
1376   ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1377   ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1378   PetscFunctionReturn(0);
1379 }
1380 
1381 #undef __FUNCT__
1382 #define __FUNCT__ "MatLoad_MPIDense"
1383 PetscErrorCode MatLoad_MPIDense(PetscViewer viewer, MatType type,Mat *newmat)
1384 {
1385   Mat            A;
1386   PetscScalar    *vals,*svals;
1387   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1388   MPI_Status     status;
1389   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz;
1390   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1391   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1392   PetscInt       i,nz,j,rstart,rend;
1393   int            fd;
1394   PetscErrorCode ierr;
1395 
1396   PetscFunctionBegin;
1397   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1398   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1399   if (!rank) {
1400     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1401     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1402     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1403   }
1404 
1405   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1406   M = header[1]; N = header[2]; nz = header[3];
1407 
1408   /*
1409        Handle case where matrix is stored on disk as a dense matrix
1410   */
1411   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1412     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,type,newmat);CHKERRQ(ierr);
1413     PetscFunctionReturn(0);
1414   }
1415 
1416   /* determine ownership of all rows */
1417   m          = M/size + ((M % size) > rank);
1418   ierr       = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1419   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1420   rowners[0] = 0;
1421   for (i=2; i<=size; i++) {
1422     rowners[i] += rowners[i-1];
1423   }
1424   rstart = rowners[rank];
1425   rend   = rowners[rank+1];
1426 
1427   /* distribute row lengths to all processors */
1428   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&ourlens);CHKERRQ(ierr);
1429   offlens = ourlens + (rend-rstart);
1430   if (!rank) {
1431     ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
1432     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1433     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
1434     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1435     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1436     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1437   } else {
1438     ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1439   }
1440 
1441   if (!rank) {
1442     /* calculate the number of nonzeros on each processor */
1443     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
1444     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1445     for (i=0; i<size; i++) {
1446       for (j=rowners[i]; j< rowners[i+1]; j++) {
1447         procsnz[i] += rowlengths[j];
1448       }
1449     }
1450     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1451 
1452     /* determine max buffer needed and allocate it */
1453     maxnz = 0;
1454     for (i=0; i<size; i++) {
1455       maxnz = PetscMax(maxnz,procsnz[i]);
1456     }
1457     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1458 
1459     /* read in my part of the matrix column indices  */
1460     nz = procsnz[0];
1461     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1462     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1463 
1464     /* read in every one elses and ship off */
1465     for (i=1; i<size; i++) {
1466       nz   = procsnz[i];
1467       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1468       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1469     }
1470     ierr = PetscFree(cols);CHKERRQ(ierr);
1471   } else {
1472     /* determine buffer space needed for message */
1473     nz = 0;
1474     for (i=0; i<m; i++) {
1475       nz += ourlens[i];
1476     }
1477     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1478 
1479     /* receive message of column indices*/
1480     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1481     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1482     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1483   }
1484 
1485   /* loop over local rows, determining number of off diagonal entries */
1486   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
1487   jj = 0;
1488   for (i=0; i<m; i++) {
1489     for (j=0; j<ourlens[i]; j++) {
1490       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1491       jj++;
1492     }
1493   }
1494 
1495   /* create our matrix */
1496   for (i=0; i<m; i++) {
1497     ourlens[i] -= offlens[i];
1498   }
1499   ierr = MatCreate(comm,newmat);CHKERRQ(ierr);
1500   ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1501   ierr = MatSetType(*newmat,type);CHKERRQ(ierr);
1502   ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr);
1503   A = *newmat;
1504   for (i=0; i<m; i++) {
1505     ourlens[i] += offlens[i];
1506   }
1507 
1508   if (!rank) {
1509     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1510 
1511     /* read in my part of the matrix numerical values  */
1512     nz = procsnz[0];
1513     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1514 
1515     /* insert into matrix */
1516     jj      = rstart;
1517     smycols = mycols;
1518     svals   = vals;
1519     for (i=0; i<m; i++) {
1520       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1521       smycols += ourlens[i];
1522       svals   += ourlens[i];
1523       jj++;
1524     }
1525 
1526     /* read in other processors and ship out */
1527     for (i=1; i<size; i++) {
1528       nz   = procsnz[i];
1529       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1530       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1531     }
1532     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1533   } else {
1534     /* receive numeric values */
1535     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1536 
1537     /* receive message of values*/
1538     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1539     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1540     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1541 
1542     /* insert into matrix */
1543     jj      = rstart;
1544     smycols = mycols;
1545     svals   = vals;
1546     for (i=0; i<m; i++) {
1547       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1548       smycols += ourlens[i];
1549       svals   += ourlens[i];
1550       jj++;
1551     }
1552   }
1553   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1554   ierr = PetscFree(vals);CHKERRQ(ierr);
1555   ierr = PetscFree(mycols);CHKERRQ(ierr);
1556   ierr = PetscFree(rowners);CHKERRQ(ierr);
1557 
1558   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1559   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1560   PetscFunctionReturn(0);
1561 }
1562 
1563 
1564 
1565 
1566 
1567