xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 2cfcea2917e9827eeaf4fa9c2e8dd75b054ced7b)
1 #define PETSCMAT_DLL
2 
3 /*
4    Basic functions for basic parallel dense matrices.
5 */
6 
7 #include "src/mat/impls/dense/mpi/mpidense.h"    /*I   "petscmat.h"  I*/
8 
9 #undef __FUNCT__
10 #define __FUNCT__ "MatLUFactorSymbolic_MPIDense"
11 PetscErrorCode MatLUFactorSymbolic_MPIDense(Mat A,IS row,IS col,MatFactorInfo *info,Mat *fact)
12 {
13   PetscErrorCode ierr;
14 
15   PetscFunctionBegin;
16   ierr = MatDuplicate(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr);
17   PetscFunctionReturn(0);
18 }
19 
20 #undef __FUNCT__
21 #define __FUNCT__ "MatCholeskyFactorSymbolic_MPIDense"
22 PetscErrorCode MatCholeskyFactorSymbolic_MPIDense(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
23 {
24   PetscErrorCode ierr;
25 
26   PetscFunctionBegin;
27   ierr = MatDuplicate(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr);
28   PetscFunctionReturn(0);
29 }
30 
31 #undef __FUNCT__
32 #define __FUNCT__ "MatDenseGetLocalMatrix"
33 /*@
34 
35       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
36               matrix that represents the operator. For sequential matrices it returns itself.
37 
38     Input Parameter:
39 .      A - the Seq or MPI dense matrix
40 
41     Output Parameter:
42 .      B - the inner matrix
43 
44     Level: intermediate
45 
46 @*/
47 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
48 {
49   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
50   PetscErrorCode ierr;
51   PetscTruth     flg;
52   PetscMPIInt    size;
53 
54   PetscFunctionBegin;
55   ierr = PetscTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
56   if (!flg) {  /* this check sucks! */
57     ierr = PetscTypeCompare((PetscObject)A,MATDENSE,&flg);CHKERRQ(ierr);
58     if (flg) {
59       ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
60       if (size == 1) flg = PETSC_FALSE;
61     }
62   }
63   if (flg) {
64     *B = mat->A;
65   } else {
66     *B = A;
67   }
68   PetscFunctionReturn(0);
69 }
70 
71 #undef __FUNCT__
72 #define __FUNCT__ "MatGetRow_MPIDense"
73 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
74 {
75   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
76   PetscErrorCode ierr;
77   PetscInt       lrow,rstart = A->rmap.rstart,rend = A->rmap.rend;
78 
79   PetscFunctionBegin;
80   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows")
81   lrow = row - rstart;
82   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr);
83   PetscFunctionReturn(0);
84 }
85 
86 #undef __FUNCT__
87 #define __FUNCT__ "MatRestoreRow_MPIDense"
88 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
89 {
90   PetscErrorCode ierr;
91 
92   PetscFunctionBegin;
93   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
94   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
95   PetscFunctionReturn(0);
96 }
97 
98 EXTERN_C_BEGIN
99 #undef __FUNCT__
100 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense"
101 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B)
102 {
103   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
104   PetscErrorCode ierr;
105   PetscInt       m = A->rmap.n,rstart = A->rmap.rstart;
106   PetscScalar    *array;
107   MPI_Comm       comm;
108 
109   PetscFunctionBegin;
110   if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported.");
111 
112   /* The reuse aspect is not implemented efficiently */
113   if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);}
114 
115   ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
116   ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr);
117   ierr = MatCreate(comm,B);CHKERRQ(ierr);
118   ierr = MatSetSizes(*B,m,m,m,m);CHKERRQ(ierr);
119   ierr = MatSetType(*B,mdn->A->type_name);CHKERRQ(ierr);
120   ierr = MatSeqDenseSetPreallocation(*B,array+m*rstart);CHKERRQ(ierr);
121   ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr);
122   ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
123   ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
124 
125   *iscopy = PETSC_TRUE;
126   PetscFunctionReturn(0);
127 }
128 EXTERN_C_END
129 
130 #undef __FUNCT__
131 #define __FUNCT__ "MatSetValues_MPIDense"
132 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
133 {
134   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
135   PetscErrorCode ierr;
136   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend,row;
137   PetscTruth     roworiented = A->roworiented;
138 
139   PetscFunctionBegin;
140   for (i=0; i<m; i++) {
141     if (idxm[i] < 0) continue;
142     if (idxm[i] >= mat->rmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
143     if (idxm[i] >= rstart && idxm[i] < rend) {
144       row = idxm[i] - rstart;
145       if (roworiented) {
146         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
147       } else {
148         for (j=0; j<n; j++) {
149           if (idxn[j] < 0) continue;
150           if (idxn[j] >= mat->cmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
151           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
152         }
153       }
154     } else {
155       if (!A->donotstash) {
156         if (roworiented) {
157           ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr);
158         } else {
159           ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr);
160         }
161       }
162     }
163   }
164   PetscFunctionReturn(0);
165 }
166 
167 #undef __FUNCT__
168 #define __FUNCT__ "MatGetValues_MPIDense"
169 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
170 {
171   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
172   PetscErrorCode ierr;
173   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend,row;
174 
175   PetscFunctionBegin;
176   for (i=0; i<m; i++) {
177     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
178     if (idxm[i] >= mat->rmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
179     if (idxm[i] >= rstart && idxm[i] < rend) {
180       row = idxm[i] - rstart;
181       for (j=0; j<n; j++) {
182         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
183         if (idxn[j] >= mat->cmap.N) {
184           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
185         }
186         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
187       }
188     } else {
189       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
190     }
191   }
192   PetscFunctionReturn(0);
193 }
194 
195 #undef __FUNCT__
196 #define __FUNCT__ "MatGetArray_MPIDense"
197 PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[])
198 {
199   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
200   PetscErrorCode ierr;
201 
202   PetscFunctionBegin;
203   ierr = MatGetArray(a->A,array);CHKERRQ(ierr);
204   PetscFunctionReturn(0);
205 }
206 
207 #undef __FUNCT__
208 #define __FUNCT__ "MatGetSubMatrix_MPIDense"
209 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B)
210 {
211   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data,*newmatd;
212   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
213   PetscErrorCode ierr;
214   PetscInt       i,j,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols;
215   PetscScalar    *av,*bv,*v = lmat->v;
216   Mat            newmat;
217 
218   PetscFunctionBegin;
219   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
220   ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr);
221   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
222   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
223 
224   /* No parallel redistribution currently supported! Should really check each index set
225      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
226      original matrix! */
227 
228   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
229   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
230 
231   /* Check submatrix call */
232   if (scall == MAT_REUSE_MATRIX) {
233     /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
234     /* Really need to test rows and column sizes! */
235     newmat = *B;
236   } else {
237     /* Create and fill new matrix */
238     ierr = MatCreate(A->comm,&newmat);CHKERRQ(ierr);
239     ierr = MatSetSizes(newmat,nrows,cs,PETSC_DECIDE,ncols);CHKERRQ(ierr);
240     ierr = MatSetType(newmat,A->type_name);CHKERRQ(ierr);
241     ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr);
242   }
243 
244   /* Now extract the data pointers and do the copy, column at a time */
245   newmatd = (Mat_MPIDense*)newmat->data;
246   bv      = ((Mat_SeqDense *)newmatd->A->data)->v;
247 
248   for (i=0; i<ncols; i++) {
249     av = v + nlrows*icol[i];
250     for (j=0; j<nrows; j++) {
251       *bv++ = av[irow[j] - rstart];
252     }
253   }
254 
255   /* Assemble the matrices so that the correct flags are set */
256   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
257   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
258 
259   /* Free work space */
260   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
261   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
262   *B = newmat;
263   PetscFunctionReturn(0);
264 }
265 
266 #undef __FUNCT__
267 #define __FUNCT__ "MatRestoreArray_MPIDense"
268 PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[])
269 {
270   PetscFunctionBegin;
271   PetscFunctionReturn(0);
272 }
273 
274 #undef __FUNCT__
275 #define __FUNCT__ "MatAssemblyBegin_MPIDense"
276 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
277 {
278   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
279   MPI_Comm       comm = mat->comm;
280   PetscErrorCode ierr;
281   PetscInt       nstash,reallocs;
282   InsertMode     addv;
283 
284   PetscFunctionBegin;
285   /* make sure all processors are either in INSERTMODE or ADDMODE */
286   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
287   if (addv == (ADD_VALUES|INSERT_VALUES)) {
288     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
289   }
290   mat->insertmode = addv; /* in case this processor had no cache */
291 
292   ierr = MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);CHKERRQ(ierr);
293   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
294   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
295   PetscFunctionReturn(0);
296 }
297 
298 #undef __FUNCT__
299 #define __FUNCT__ "MatAssemblyEnd_MPIDense"
300 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
301 {
302   Mat_MPIDense    *mdn=(Mat_MPIDense*)mat->data;
303   PetscErrorCode  ierr;
304   PetscInt        i,*row,*col,flg,j,rstart,ncols;
305   PetscMPIInt     n;
306   PetscScalar     *val;
307   InsertMode      addv=mat->insertmode;
308 
309   PetscFunctionBegin;
310   /*  wait on receives */
311   while (1) {
312     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
313     if (!flg) break;
314 
315     for (i=0; i<n;) {
316       /* Now identify the consecutive vals belonging to the same row */
317       for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
318       if (j < n) ncols = j-i;
319       else       ncols = n-i;
320       /* Now assemble all these values with a single function call */
321       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
322       i = j;
323     }
324   }
325   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
326 
327   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
328   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
329 
330   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
331     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
332   }
333   PetscFunctionReturn(0);
334 }
335 
336 #undef __FUNCT__
337 #define __FUNCT__ "MatZeroEntries_MPIDense"
338 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
339 {
340   PetscErrorCode ierr;
341   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
342 
343   PetscFunctionBegin;
344   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
345   PetscFunctionReturn(0);
346 }
347 
348 /* the code does not do the diagonal entries correctly unless the
349    matrix is square and the column and row owerships are identical.
350    This is a BUG. The only way to fix it seems to be to access
351    mdn->A and mdn->B directly and not through the MatZeroRows()
352    routine.
353 */
354 #undef __FUNCT__
355 #define __FUNCT__ "MatZeroRows_MPIDense"
356 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
357 {
358   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
359   PetscErrorCode ierr;
360   PetscInt       i,*owners = A->rmap.range;
361   PetscInt       *nprocs,j,idx,nsends;
362   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
363   PetscInt       *rvalues,tag = A->tag,count,base,slen,*source;
364   PetscInt       *lens,*lrows,*values;
365   PetscMPIInt    n,imdex,rank = l->rank,size = l->size;
366   MPI_Comm       comm = A->comm;
367   MPI_Request    *send_waits,*recv_waits;
368   MPI_Status     recv_status,*send_status;
369   PetscTruth     found;
370 
371   PetscFunctionBegin;
372   /*  first count number of contributors to each processor */
373   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
374   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
375   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
376   for (i=0; i<N; i++) {
377     idx = rows[i];
378     found = PETSC_FALSE;
379     for (j=0; j<size; j++) {
380       if (idx >= owners[j] && idx < owners[j+1]) {
381         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
382       }
383     }
384     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
385   }
386   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
387 
388   /* inform other processors of number of messages and max length*/
389   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
390 
391   /* post receives:   */
392   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
393   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
394   for (i=0; i<nrecvs; i++) {
395     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
396   }
397 
398   /* do sends:
399       1) starts[i] gives the starting index in svalues for stuff going to
400          the ith processor
401   */
402   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
403   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
404   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
405   starts[0]  = 0;
406   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
407   for (i=0; i<N; i++) {
408     svalues[starts[owner[i]]++] = rows[i];
409   }
410 
411   starts[0] = 0;
412   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
413   count = 0;
414   for (i=0; i<size; i++) {
415     if (nprocs[2*i+1]) {
416       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
417     }
418   }
419   ierr = PetscFree(starts);CHKERRQ(ierr);
420 
421   base = owners[rank];
422 
423   /*  wait on receives */
424   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
425   source = lens + nrecvs;
426   count  = nrecvs; slen = 0;
427   while (count) {
428     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
429     /* unpack receives into our local space */
430     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
431     source[imdex]  = recv_status.MPI_SOURCE;
432     lens[imdex]    = n;
433     slen += n;
434     count--;
435   }
436   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
437 
438   /* move the data into the send scatter */
439   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
440   count = 0;
441   for (i=0; i<nrecvs; i++) {
442     values = rvalues + i*nmax;
443     for (j=0; j<lens[i]; j++) {
444       lrows[count++] = values[j] - base;
445     }
446   }
447   ierr = PetscFree(rvalues);CHKERRQ(ierr);
448   ierr = PetscFree(lens);CHKERRQ(ierr);
449   ierr = PetscFree(owner);CHKERRQ(ierr);
450   ierr = PetscFree(nprocs);CHKERRQ(ierr);
451 
452   /* actually zap the local rows */
453   ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
454   ierr = PetscFree(lrows);CHKERRQ(ierr);
455 
456   /* wait on sends */
457   if (nsends) {
458     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
459     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
460     ierr = PetscFree(send_status);CHKERRQ(ierr);
461   }
462   ierr = PetscFree(send_waits);CHKERRQ(ierr);
463   ierr = PetscFree(svalues);CHKERRQ(ierr);
464 
465   PetscFunctionReturn(0);
466 }
467 
468 #undef __FUNCT__
469 #define __FUNCT__ "MatMult_MPIDense"
470 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
471 {
472   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
473   PetscErrorCode ierr;
474 
475   PetscFunctionBegin;
476   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
477   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
478   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
479   PetscFunctionReturn(0);
480 }
481 
482 #undef __FUNCT__
483 #define __FUNCT__ "MatMultAdd_MPIDense"
484 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
485 {
486   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
487   PetscErrorCode ierr;
488 
489   PetscFunctionBegin;
490   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
491   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
492   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
493   PetscFunctionReturn(0);
494 }
495 
496 #undef __FUNCT__
497 #define __FUNCT__ "MatMultTranspose_MPIDense"
498 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
499 {
500   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
501   PetscErrorCode ierr;
502   PetscScalar    zero = 0.0;
503 
504   PetscFunctionBegin;
505   ierr = VecSet(yy,zero);CHKERRQ(ierr);
506   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
507   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
508   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
509   PetscFunctionReturn(0);
510 }
511 
512 #undef __FUNCT__
513 #define __FUNCT__ "MatMultTransposeAdd_MPIDense"
514 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
515 {
516   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
517   PetscErrorCode ierr;
518 
519   PetscFunctionBegin;
520   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
521   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
522   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
523   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
524   PetscFunctionReturn(0);
525 }
526 
527 #undef __FUNCT__
528 #define __FUNCT__ "MatGetDiagonal_MPIDense"
529 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
530 {
531   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
532   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
533   PetscErrorCode ierr;
534   PetscInt       len,i,n,m = A->rmap.n,radd;
535   PetscScalar    *x,zero = 0.0;
536 
537   PetscFunctionBegin;
538   ierr = VecSet(v,zero);CHKERRQ(ierr);
539   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
540   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
541   if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
542   len  = PetscMin(a->A->rmap.n,a->A->cmap.n);
543   radd = A->rmap.rstart*m;
544   for (i=0; i<len; i++) {
545     x[i] = aloc->v[radd + i*m + i];
546   }
547   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
548   PetscFunctionReturn(0);
549 }
550 
551 #undef __FUNCT__
552 #define __FUNCT__ "MatDestroy_MPIDense"
553 PetscErrorCode MatDestroy_MPIDense(Mat mat)
554 {
555   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
556   PetscErrorCode ierr;
557 
558   PetscFunctionBegin;
559 
560 #if defined(PETSC_USE_LOG)
561   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N);
562 #endif
563   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
564   ierr = MatDestroy(mdn->A);CHKERRQ(ierr);
565   if (mdn->lvec)   {ierr = VecDestroy(mdn->lvec);CHKERRQ(ierr);}
566   if (mdn->Mvctx)  {ierr = VecScatterDestroy(mdn->Mvctx);CHKERRQ(ierr);}
567   if (mdn->factor) {
568     ierr = PetscFree(mdn->factor->temp);CHKERRQ(ierr);
569     ierr = PetscFree(mdn->factor->tag);CHKERRQ(ierr);
570     ierr = PetscFree(mdn->factor->pivots);CHKERRQ(ierr);
571     ierr = PetscFree(mdn->factor);CHKERRQ(ierr);
572   }
573   ierr = PetscFree(mdn);CHKERRQ(ierr);
574   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
575   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
576   PetscFunctionReturn(0);
577 }
578 
579 #undef __FUNCT__
580 #define __FUNCT__ "MatView_MPIDense_Binary"
581 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
582 {
583   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
584   PetscErrorCode ierr;
585 
586   PetscFunctionBegin;
587   if (mdn->size == 1) {
588     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
589   }
590   else SETERRQ(PETSC_ERR_SUP,"Only uniprocessor output supported");
591   PetscFunctionReturn(0);
592 }
593 
594 #undef __FUNCT__
595 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket"
596 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
597 {
598   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
599   PetscErrorCode    ierr;
600   PetscMPIInt       size = mdn->size,rank = mdn->rank;
601   PetscViewerType   vtype;
602   PetscTruth        iascii,isdraw;
603   PetscViewer       sviewer;
604   PetscViewerFormat format;
605 
606   PetscFunctionBegin;
607   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
608   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
609   if (iascii) {
610     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
611     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
612     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
613       MatInfo info;
614       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
615       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap.n,
616                    (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
617       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
618       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
619       PetscFunctionReturn(0);
620     } else if (format == PETSC_VIEWER_ASCII_INFO) {
621       PetscFunctionReturn(0);
622     }
623   } else if (isdraw) {
624     PetscDraw  draw;
625     PetscTruth isnull;
626 
627     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
628     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
629     if (isnull) PetscFunctionReturn(0);
630   }
631 
632   if (size == 1) {
633     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
634   } else {
635     /* assemble the entire matrix onto first processor. */
636     Mat         A;
637     PetscInt    M = mat->rmap.N,N = mat->cmap.N,m,row,i,nz;
638     PetscInt    *cols;
639     PetscScalar *vals;
640 
641     ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr);
642     if (!rank) {
643       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
644     } else {
645       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
646     }
647     /* Since this is a temporary matrix, MATMPIDENSE instead of A->type_name here is probably acceptable. */
648     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
649     ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL);
650     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
651 
652     /* Copy the matrix ... This isn't the most efficient means,
653        but it's quick for now */
654     A->insertmode = INSERT_VALUES;
655     row = mat->rmap.rstart; m = mdn->A->rmap.n;
656     for (i=0; i<m; i++) {
657       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
658       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
659       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
660       row++;
661     }
662 
663     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
664     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
665     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
666     if (!rank) {
667       ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
668     }
669     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
670     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
671     ierr = MatDestroy(A);CHKERRQ(ierr);
672   }
673   PetscFunctionReturn(0);
674 }
675 
676 #undef __FUNCT__
677 #define __FUNCT__ "MatView_MPIDense"
678 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
679 {
680   PetscErrorCode ierr;
681   PetscTruth     iascii,isbinary,isdraw,issocket;
682 
683   PetscFunctionBegin;
684 
685   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
686   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
687   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
688   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
689 
690   if (iascii || issocket || isdraw) {
691     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
692   } else if (isbinary) {
693     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
694   } else {
695     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name);
696   }
697   PetscFunctionReturn(0);
698 }
699 
700 #undef __FUNCT__
701 #define __FUNCT__ "MatGetInfo_MPIDense"
702 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
703 {
704   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
705   Mat            mdn = mat->A;
706   PetscErrorCode ierr;
707   PetscReal      isend[5],irecv[5];
708 
709   PetscFunctionBegin;
710   info->rows_global    = (double)A->rmap.N;
711   info->columns_global = (double)A->cmap.N;
712   info->rows_local     = (double)A->rmap.n;
713   info->columns_local  = (double)A->cmap.N;
714   info->block_size     = 1.0;
715   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
716   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
717   isend[3] = info->memory;  isend[4] = info->mallocs;
718   if (flag == MAT_LOCAL) {
719     info->nz_used      = isend[0];
720     info->nz_allocated = isend[1];
721     info->nz_unneeded  = isend[2];
722     info->memory       = isend[3];
723     info->mallocs      = isend[4];
724   } else if (flag == MAT_GLOBAL_MAX) {
725     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr);
726     info->nz_used      = irecv[0];
727     info->nz_allocated = irecv[1];
728     info->nz_unneeded  = irecv[2];
729     info->memory       = irecv[3];
730     info->mallocs      = irecv[4];
731   } else if (flag == MAT_GLOBAL_SUM) {
732     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr);
733     info->nz_used      = irecv[0];
734     info->nz_allocated = irecv[1];
735     info->nz_unneeded  = irecv[2];
736     info->memory       = irecv[3];
737     info->mallocs      = irecv[4];
738   }
739   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
740   info->fill_ratio_needed = 0;
741   info->factor_mallocs    = 0;
742   PetscFunctionReturn(0);
743 }
744 
745 #undef __FUNCT__
746 #define __FUNCT__ "MatSetOption_MPIDense"
747 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op)
748 {
749   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
750   PetscErrorCode ierr;
751 
752   PetscFunctionBegin;
753   switch (op) {
754   case MAT_NO_NEW_NONZERO_LOCATIONS:
755   case MAT_YES_NEW_NONZERO_LOCATIONS:
756   case MAT_NEW_NONZERO_LOCATION_ERR:
757   case MAT_NEW_NONZERO_ALLOCATION_ERR:
758   case MAT_COLUMNS_SORTED:
759   case MAT_COLUMNS_UNSORTED:
760     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
761     break;
762   case MAT_ROW_ORIENTED:
763     a->roworiented = PETSC_TRUE;
764     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
765     break;
766   case MAT_ROWS_SORTED:
767   case MAT_ROWS_UNSORTED:
768   case MAT_YES_NEW_DIAGONALS:
769   case MAT_USE_HASH_TABLE:
770     ierr = PetscInfo1(A,"Option %d ignored\n",op);CHKERRQ(ierr);
771     break;
772   case MAT_COLUMN_ORIENTED:
773     a->roworiented = PETSC_FALSE;
774     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
775     break;
776   case MAT_IGNORE_OFF_PROC_ENTRIES:
777     a->donotstash = PETSC_TRUE;
778     break;
779   case MAT_NO_NEW_DIAGONALS:
780     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
781   case MAT_SYMMETRIC:
782   case MAT_STRUCTURALLY_SYMMETRIC:
783   case MAT_NOT_SYMMETRIC:
784   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
785   case MAT_HERMITIAN:
786   case MAT_NOT_HERMITIAN:
787   case MAT_SYMMETRY_ETERNAL:
788   case MAT_NOT_SYMMETRY_ETERNAL:
789     break;
790   default:
791     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
792   }
793   PetscFunctionReturn(0);
794 }
795 
796 
797 #undef __FUNCT__
798 #define __FUNCT__ "MatDiagonalScale_MPIDense"
799 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
800 {
801   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
802   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
803   PetscScalar    *l,*r,x,*v;
804   PetscErrorCode ierr;
805   PetscInt       i,j,s2a,s3a,s2,s3,m=mdn->A->rmap.n,n=mdn->A->cmap.n;
806 
807   PetscFunctionBegin;
808   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
809   if (ll) {
810     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
811     if (s2a != s2) SETERRQ2(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
812     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
813     for (i=0; i<m; i++) {
814       x = l[i];
815       v = mat->v + i;
816       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
817     }
818     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
819     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
820   }
821   if (rr) {
822     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
823     if (s3a != s3) SETERRQ2(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
824     ierr = VecScatterBegin(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
825     ierr = VecScatterEnd(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
826     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
827     for (i=0; i<n; i++) {
828       x = r[i];
829       v = mat->v + i*m;
830       for (j=0; j<m; j++) { (*v++) *= x;}
831     }
832     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
833     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
834   }
835   PetscFunctionReturn(0);
836 }
837 
838 #undef __FUNCT__
839 #define __FUNCT__ "MatNorm_MPIDense"
840 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
841 {
842   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
843   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
844   PetscErrorCode ierr;
845   PetscInt       i,j;
846   PetscReal      sum = 0.0;
847   PetscScalar    *v = mat->v;
848 
849   PetscFunctionBegin;
850   if (mdn->size == 1) {
851     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
852   } else {
853     if (type == NORM_FROBENIUS) {
854       for (i=0; i<mdn->A->cmap.n*mdn->A->rmap.n; i++) {
855 #if defined(PETSC_USE_COMPLEX)
856         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
857 #else
858         sum += (*v)*(*v); v++;
859 #endif
860       }
861       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr);
862       *nrm = sqrt(*nrm);
863       ierr = PetscLogFlops(2*mdn->A->cmap.n*mdn->A->rmap.n);CHKERRQ(ierr);
864     } else if (type == NORM_1) {
865       PetscReal *tmp,*tmp2;
866       ierr = PetscMalloc(2*A->cmap.N*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
867       tmp2 = tmp + A->cmap.N;
868       ierr = PetscMemzero(tmp,2*A->cmap.N*sizeof(PetscReal));CHKERRQ(ierr);
869       *nrm = 0.0;
870       v = mat->v;
871       for (j=0; j<mdn->A->cmap.n; j++) {
872         for (i=0; i<mdn->A->rmap.n; i++) {
873           tmp[j] += PetscAbsScalar(*v);  v++;
874         }
875       }
876       ierr = MPI_Allreduce(tmp,tmp2,A->cmap.N,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr);
877       for (j=0; j<A->cmap.N; j++) {
878         if (tmp2[j] > *nrm) *nrm = tmp2[j];
879       }
880       ierr = PetscFree(tmp);CHKERRQ(ierr);
881       ierr = PetscLogFlops(A->cmap.n*A->rmap.n);CHKERRQ(ierr);
882     } else if (type == NORM_INFINITY) { /* max row norm */
883       PetscReal ntemp;
884       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
885       ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr);
886     } else {
887       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
888     }
889   }
890   PetscFunctionReturn(0);
891 }
892 
893 #undef __FUNCT__
894 #define __FUNCT__ "MatTranspose_MPIDense"
895 PetscErrorCode MatTranspose_MPIDense(Mat A,Mat *matout)
896 {
897   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
898   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
899   Mat            B;
900   PetscInt       M = A->rmap.N,N = A->cmap.N,m,n,*rwork,rstart = A->rmap.rstart;
901   PetscErrorCode ierr;
902   PetscInt       j,i;
903   PetscScalar    *v;
904 
905   PetscFunctionBegin;
906   if (!matout && M != N) {
907     SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place");
908   }
909   ierr = MatCreate(A->comm,&B);CHKERRQ(ierr);
910   ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,N,M);CHKERRQ(ierr);
911   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
912   ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr);
913 
914   m = a->A->rmap.n; n = a->A->cmap.n; v = Aloc->v;
915   ierr = PetscMalloc(n*sizeof(PetscInt),&rwork);CHKERRQ(ierr);
916   for (j=0; j<n; j++) {
917     for (i=0; i<m; i++) rwork[i] = rstart + i;
918     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
919     v   += m;
920   }
921   ierr = PetscFree(rwork);CHKERRQ(ierr);
922   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
923   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
924   if (matout) {
925     *matout = B;
926   } else {
927     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
928   }
929   PetscFunctionReturn(0);
930 }
931 
932 #include "petscblaslapack.h"
933 #undef __FUNCT__
934 #define __FUNCT__ "MatScale_MPIDense"
935 PetscErrorCode MatScale_MPIDense(Mat inA,PetscScalar alpha)
936 {
937   Mat_MPIDense   *A = (Mat_MPIDense*)inA->data;
938   Mat_SeqDense   *a = (Mat_SeqDense*)A->A->data;
939   PetscScalar    oalpha = alpha;
940   PetscBLASInt   one = 1,nz = (PetscBLASInt)inA->rmap.n*inA->cmap.N;
941   PetscErrorCode ierr;
942 
943   PetscFunctionBegin;
944   BLASscal_(&nz,&oalpha,a->v,&one);
945   ierr = PetscLogFlops(nz);CHKERRQ(ierr);
946   PetscFunctionReturn(0);
947 }
948 
949 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
950 
951 #undef __FUNCT__
952 #define __FUNCT__ "MatSetUpPreallocation_MPIDense"
953 PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A)
954 {
955   PetscErrorCode ierr;
956 
957   PetscFunctionBegin;
958   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
959   PetscFunctionReturn(0);
960 }
961 
962 /* -------------------------------------------------------------------*/
963 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
964        MatGetRow_MPIDense,
965        MatRestoreRow_MPIDense,
966        MatMult_MPIDense,
967 /* 4*/ MatMultAdd_MPIDense,
968        MatMultTranspose_MPIDense,
969        MatMultTransposeAdd_MPIDense,
970        0,
971        0,
972        0,
973 /*10*/ 0,
974        0,
975        0,
976        0,
977        MatTranspose_MPIDense,
978 /*15*/ MatGetInfo_MPIDense,
979        0,
980        MatGetDiagonal_MPIDense,
981        MatDiagonalScale_MPIDense,
982        MatNorm_MPIDense,
983 /*20*/ MatAssemblyBegin_MPIDense,
984        MatAssemblyEnd_MPIDense,
985        0,
986        MatSetOption_MPIDense,
987        MatZeroEntries_MPIDense,
988 /*25*/ MatZeroRows_MPIDense,
989        MatLUFactorSymbolic_MPIDense,
990        0,
991        MatCholeskyFactorSymbolic_MPIDense,
992        0,
993 /*30*/ MatSetUpPreallocation_MPIDense,
994        0,
995        0,
996        MatGetArray_MPIDense,
997        MatRestoreArray_MPIDense,
998 /*35*/ MatDuplicate_MPIDense,
999        0,
1000        0,
1001        0,
1002        0,
1003 /*40*/ 0,
1004        MatGetSubMatrices_MPIDense,
1005        0,
1006        MatGetValues_MPIDense,
1007        0,
1008 /*45*/ 0,
1009        MatScale_MPIDense,
1010        0,
1011        0,
1012        0,
1013 /*50*/ 0,
1014        0,
1015        0,
1016        0,
1017        0,
1018 /*55*/ 0,
1019        0,
1020        0,
1021        0,
1022        0,
1023 /*60*/ MatGetSubMatrix_MPIDense,
1024        MatDestroy_MPIDense,
1025        MatView_MPIDense,
1026        0,
1027        0,
1028 /*65*/ 0,
1029        0,
1030        0,
1031        0,
1032        0,
1033 /*70*/ 0,
1034        0,
1035        0,
1036        0,
1037        0,
1038 /*75*/ 0,
1039        0,
1040        0,
1041        0,
1042        0,
1043 /*80*/ 0,
1044        0,
1045        0,
1046        0,
1047 /*84*/ MatLoad_MPIDense,
1048        0,
1049        0,
1050        0,
1051        0,
1052        0,
1053 /*90*/ 0,
1054        0,
1055        0,
1056        0,
1057        0,
1058 /*95*/ 0,
1059        0,
1060        0,
1061        0};
1062 
1063 EXTERN_C_BEGIN
1064 #undef __FUNCT__
1065 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense"
1066 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1067 {
1068   Mat_MPIDense   *a;
1069   PetscErrorCode ierr;
1070 
1071   PetscFunctionBegin;
1072   mat->preallocated = PETSC_TRUE;
1073   /* Note:  For now, when data is specified above, this assumes the user correctly
1074    allocates the local dense storage space.  We should add error checking. */
1075 
1076   a    = (Mat_MPIDense*)mat->data;
1077   ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1078   ierr = MatSetSizes(a->A,mat->rmap.n,mat->cmap.N,mat->rmap.n,mat->cmap.N);CHKERRQ(ierr);
1079   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
1080   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1081   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1082   PetscFunctionReturn(0);
1083 }
1084 EXTERN_C_END
1085 
1086 /*MC
1087    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1088 
1089    Options Database Keys:
1090 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions()
1091 
1092   Level: beginner
1093 
1094 .seealso: MatCreateMPIDense
1095 M*/
1096 
1097 EXTERN_C_BEGIN
1098 #undef __FUNCT__
1099 #define __FUNCT__ "MatCreate_MPIDense"
1100 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIDense(Mat mat)
1101 {
1102   Mat_MPIDense   *a;
1103   PetscErrorCode ierr;
1104 
1105   PetscFunctionBegin;
1106   ierr              = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr);
1107   mat->data         = (void*)a;
1108   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1109   mat->factor       = 0;
1110   mat->mapping      = 0;
1111 
1112   a->factor       = 0;
1113   mat->insertmode = NOT_SET_VALUES;
1114   ierr = MPI_Comm_rank(mat->comm,&a->rank);CHKERRQ(ierr);
1115   ierr = MPI_Comm_size(mat->comm,&a->size);CHKERRQ(ierr);
1116 
1117   mat->rmap.bs = mat->cmap.bs = 1;
1118   ierr = PetscMapInitialize(mat->comm,&mat->rmap);CHKERRQ(ierr);
1119   ierr = PetscMapInitialize(mat->comm,&mat->cmap);CHKERRQ(ierr);
1120   a->nvec = mat->cmap.n;
1121 
1122   /* build cache for off array entries formed */
1123   a->donotstash = PETSC_FALSE;
1124   ierr = MatStashCreate_Private(mat->comm,1,&mat->stash);CHKERRQ(ierr);
1125 
1126   /* stuff used for matrix vector multiply */
1127   a->lvec        = 0;
1128   a->Mvctx       = 0;
1129   a->roworiented = PETSC_TRUE;
1130 
1131   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
1132                                      "MatGetDiagonalBlock_MPIDense",
1133                                      MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr);
1134   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C",
1135                                      "MatMPIDenseSetPreallocation_MPIDense",
1136                                      MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1137   PetscFunctionReturn(0);
1138 }
1139 EXTERN_C_END
1140 
1141 /*MC
1142    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1143 
1144    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1145    and MATMPIDENSE otherwise.
1146 
1147    Options Database Keys:
1148 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1149 
1150   Level: beginner
1151 
1152 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1153 M*/
1154 
1155 EXTERN_C_BEGIN
1156 #undef __FUNCT__
1157 #define __FUNCT__ "MatCreate_Dense"
1158 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_Dense(Mat A)
1159 {
1160   PetscErrorCode ierr;
1161   PetscMPIInt    size;
1162 
1163   PetscFunctionBegin;
1164   ierr = PetscObjectChangeTypeName((PetscObject)A,MATDENSE);CHKERRQ(ierr);
1165   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1166   if (size == 1) {
1167     ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr);
1168   } else {
1169     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
1170   }
1171   PetscFunctionReturn(0);
1172 }
1173 EXTERN_C_END
1174 
1175 #undef __FUNCT__
1176 #define __FUNCT__ "MatMPIDenseSetPreallocation"
1177 /*@C
1178    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1179 
1180    Not collective
1181 
1182    Input Parameters:
1183 .  A - the matrix
1184 -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
1185    to control all matrix memory allocation.
1186 
1187    Notes:
1188    The dense format is fully compatible with standard Fortran 77
1189    storage by columns.
1190 
1191    The data input variable is intended primarily for Fortran programmers
1192    who wish to allocate their own matrix memory space.  Most users should
1193    set data=PETSC_NULL.
1194 
1195    Level: intermediate
1196 
1197 .keywords: matrix,dense, parallel
1198 
1199 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1200 @*/
1201 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data)
1202 {
1203   PetscErrorCode ierr,(*f)(Mat,PetscScalar *);
1204 
1205   PetscFunctionBegin;
1206   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1207   if (f) {
1208     ierr = (*f)(mat,data);CHKERRQ(ierr);
1209   }
1210   PetscFunctionReturn(0);
1211 }
1212 
1213 #undef __FUNCT__
1214 #define __FUNCT__ "MatCreateMPIDense"
1215 /*@C
1216    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
1217 
1218    Collective on MPI_Comm
1219 
1220    Input Parameters:
1221 +  comm - MPI communicator
1222 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1223 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1224 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1225 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1226 -  data - optional location of matrix data.  Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1227    to control all matrix memory allocation.
1228 
1229    Output Parameter:
1230 .  A - the matrix
1231 
1232    Notes:
1233    The dense format is fully compatible with standard Fortran 77
1234    storage by columns.
1235 
1236    The data input variable is intended primarily for Fortran programmers
1237    who wish to allocate their own matrix memory space.  Most users should
1238    set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users).
1239 
1240    The user MUST specify either the local or global matrix dimensions
1241    (possibly both).
1242 
1243    Level: intermediate
1244 
1245 .keywords: matrix,dense, parallel
1246 
1247 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1248 @*/
1249 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1250 {
1251   PetscErrorCode ierr;
1252   PetscMPIInt    size;
1253 
1254   PetscFunctionBegin;
1255   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1256   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1257   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1258   if (size > 1) {
1259     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1260     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1261   } else {
1262     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1263     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1264   }
1265   PetscFunctionReturn(0);
1266 }
1267 
1268 #undef __FUNCT__
1269 #define __FUNCT__ "MatDuplicate_MPIDense"
1270 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1271 {
1272   Mat            mat;
1273   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
1274   PetscErrorCode ierr;
1275 
1276   PetscFunctionBegin;
1277   *newmat       = 0;
1278   ierr = MatCreate(A->comm,&mat);CHKERRQ(ierr);
1279   ierr = MatSetSizes(mat,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N);CHKERRQ(ierr);
1280   ierr = MatSetType(mat,A->type_name);CHKERRQ(ierr);
1281   a                 = (Mat_MPIDense*)mat->data;
1282   ierr              = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1283   mat->factor       = A->factor;
1284   mat->assembled    = PETSC_TRUE;
1285   mat->preallocated = PETSC_TRUE;
1286 
1287   mat->rmap.rstart     = A->rmap.rstart;
1288   mat->rmap.rend       = A->rmap.rend;
1289   a->size              = oldmat->size;
1290   a->rank              = oldmat->rank;
1291   mat->insertmode      = NOT_SET_VALUES;
1292   a->nvec              = oldmat->nvec;
1293   a->donotstash        = oldmat->donotstash;
1294 
1295   ierr = PetscMalloc((a->size+1)*sizeof(PetscInt),&mat->rmap.range);CHKERRQ(ierr);
1296   ierr = PetscMalloc((a->size+1)*sizeof(PetscInt),&mat->cmap.range);CHKERRQ(ierr);
1297   ierr = PetscMemcpy(mat->rmap.range,A->rmap.range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
1298   ierr = PetscMemcpy(mat->cmap.range,A->cmap.range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
1299   ierr = MatStashCreate_Private(A->comm,1,&mat->stash);CHKERRQ(ierr);
1300 
1301   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1302   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1303   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1304   *newmat = mat;
1305   PetscFunctionReturn(0);
1306 }
1307 
1308 #include "petscsys.h"
1309 
1310 #undef __FUNCT__
1311 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile"
1312 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N, MatType type,Mat *newmat)
1313 {
1314   PetscErrorCode ierr;
1315   PetscMPIInt    rank,size;
1316   PetscInt       *rowners,i,m,nz,j;
1317   PetscScalar    *array,*vals,*vals_ptr;
1318   MPI_Status     status;
1319 
1320   PetscFunctionBegin;
1321   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1322   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1323 
1324   /* determine ownership of all rows */
1325   m          = M/size + ((M % size) > rank);
1326   ierr       = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1327   ierr       = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
1328   rowners[0] = 0;
1329   for (i=2; i<=size; i++) {
1330     rowners[i] += rowners[i-1];
1331   }
1332 
1333   ierr = MatCreate(comm,newmat);CHKERRQ(ierr);
1334   ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1335   ierr = MatSetType(*newmat,type);CHKERRQ(ierr);
1336   ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr);
1337   ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr);
1338 
1339   if (!rank) {
1340     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1341 
1342     /* read in my part of the matrix numerical values  */
1343     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1344 
1345     /* insert into matrix-by row (this is why cannot directly read into array */
1346     vals_ptr = vals;
1347     for (i=0; i<m; i++) {
1348       for (j=0; j<N; j++) {
1349         array[i + j*m] = *vals_ptr++;
1350       }
1351     }
1352 
1353     /* read in other processors and ship out */
1354     for (i=1; i<size; i++) {
1355       nz   = (rowners[i+1] - rowners[i])*N;
1356       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1357       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr);
1358     }
1359   } else {
1360     /* receive numeric values */
1361     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1362 
1363     /* receive message of values*/
1364     ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr);
1365 
1366     /* insert into matrix-by row (this is why cannot directly read into array */
1367     vals_ptr = vals;
1368     for (i=0; i<m; i++) {
1369       for (j=0; j<N; j++) {
1370         array[i + j*m] = *vals_ptr++;
1371       }
1372     }
1373   }
1374   ierr = PetscFree(rowners);CHKERRQ(ierr);
1375   ierr = PetscFree(vals);CHKERRQ(ierr);
1376   ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1377   ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1378   PetscFunctionReturn(0);
1379 }
1380 
1381 #undef __FUNCT__
1382 #define __FUNCT__ "MatLoad_MPIDense"
1383 PetscErrorCode MatLoad_MPIDense(PetscViewer viewer, MatType type,Mat *newmat)
1384 {
1385   Mat            A;
1386   PetscScalar    *vals,*svals;
1387   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1388   MPI_Status     status;
1389   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz;
1390   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1391   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1392   PetscInt       i,nz,j,rstart,rend;
1393   int            fd;
1394   PetscErrorCode ierr;
1395 
1396   PetscFunctionBegin;
1397   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1398   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1399   if (!rank) {
1400     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1401     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1402     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1403   }
1404 
1405   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1406   M = header[1]; N = header[2]; nz = header[3];
1407 
1408   /*
1409        Handle case where matrix is stored on disk as a dense matrix
1410   */
1411   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1412     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,type,newmat);CHKERRQ(ierr);
1413     PetscFunctionReturn(0);
1414   }
1415 
1416   /* determine ownership of all rows */
1417   m          = M/size + ((M % size) > rank);
1418   ierr       = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1419   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1420   rowners[0] = 0;
1421   for (i=2; i<=size; i++) {
1422     rowners[i] += rowners[i-1];
1423   }
1424   rstart = rowners[rank];
1425   rend   = rowners[rank+1];
1426 
1427   /* distribute row lengths to all processors */
1428   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&ourlens);CHKERRQ(ierr);
1429   offlens = ourlens + (rend-rstart);
1430   if (!rank) {
1431     ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
1432     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1433     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
1434     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1435     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1436     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1437   } else {
1438     ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1439   }
1440 
1441   if (!rank) {
1442     /* calculate the number of nonzeros on each processor */
1443     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
1444     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1445     for (i=0; i<size; i++) {
1446       for (j=rowners[i]; j< rowners[i+1]; j++) {
1447         procsnz[i] += rowlengths[j];
1448       }
1449     }
1450     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1451 
1452     /* determine max buffer needed and allocate it */
1453     maxnz = 0;
1454     for (i=0; i<size; i++) {
1455       maxnz = PetscMax(maxnz,procsnz[i]);
1456     }
1457     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1458 
1459     /* read in my part of the matrix column indices  */
1460     nz = procsnz[0];
1461     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1462     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1463 
1464     /* read in every one elses and ship off */
1465     for (i=1; i<size; i++) {
1466       nz   = procsnz[i];
1467       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1468       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1469     }
1470     ierr = PetscFree(cols);CHKERRQ(ierr);
1471   } else {
1472     /* determine buffer space needed for message */
1473     nz = 0;
1474     for (i=0; i<m; i++) {
1475       nz += ourlens[i];
1476     }
1477     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1478 
1479     /* receive message of column indices*/
1480     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1481     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1482     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1483   }
1484 
1485   /* loop over local rows, determining number of off diagonal entries */
1486   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
1487   jj = 0;
1488   for (i=0; i<m; i++) {
1489     for (j=0; j<ourlens[i]; j++) {
1490       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1491       jj++;
1492     }
1493   }
1494 
1495   /* create our matrix */
1496   for (i=0; i<m; i++) {
1497     ourlens[i] -= offlens[i];
1498   }
1499   ierr = MatCreate(comm,newmat);CHKERRQ(ierr);
1500   ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1501   ierr = MatSetType(*newmat,type);CHKERRQ(ierr);
1502   ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr);
1503   A = *newmat;
1504   for (i=0; i<m; i++) {
1505     ourlens[i] += offlens[i];
1506   }
1507 
1508   if (!rank) {
1509     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1510 
1511     /* read in my part of the matrix numerical values  */
1512     nz = procsnz[0];
1513     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1514 
1515     /* insert into matrix */
1516     jj      = rstart;
1517     smycols = mycols;
1518     svals   = vals;
1519     for (i=0; i<m; i++) {
1520       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1521       smycols += ourlens[i];
1522       svals   += ourlens[i];
1523       jj++;
1524     }
1525 
1526     /* read in other processors and ship out */
1527     for (i=1; i<size; i++) {
1528       nz   = procsnz[i];
1529       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1530       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1531     }
1532     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1533   } else {
1534     /* receive numeric values */
1535     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1536 
1537     /* receive message of values*/
1538     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1539     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1540     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1541 
1542     /* insert into matrix */
1543     jj      = rstart;
1544     smycols = mycols;
1545     svals   = vals;
1546     for (i=0; i<m; i++) {
1547       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1548       smycols += ourlens[i];
1549       svals   += ourlens[i];
1550       jj++;
1551     }
1552   }
1553   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1554   ierr = PetscFree(vals);CHKERRQ(ierr);
1555   ierr = PetscFree(mycols);CHKERRQ(ierr);
1556   ierr = PetscFree(rowners);CHKERRQ(ierr);
1557 
1558   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1559   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1560   PetscFunctionReturn(0);
1561 }
1562 
1563 
1564 
1565 
1566 
1567