xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision f259bd47db2537c0c1948ca59840a8a8f5c67e64)
1 #define PETSCMAT_DLL
2 
3 /*
4    Basic functions for basic parallel dense matrices.
5 */
6 
7 #include "src/mat/impls/dense/mpi/mpidense.h"    /*I   "petscmat.h"  I*/
8 
9 #undef __FUNCT__
10 #define __FUNCT__ "MatLUFactorSymbolic_MPIDense"
11 PetscErrorCode MatLUFactorSymbolic_MPIDense(Mat A,IS row,IS col,MatFactorInfo *info,Mat *fact)
12 {
13   PetscErrorCode ierr;
14 
15   PetscFunctionBegin;
16   ierr = MatDuplicate(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr);
17   PetscFunctionReturn(0);
18 }
19 
20 #undef __FUNCT__
21 #define __FUNCT__ "MatLUFactorNumeric_MPIDense"
22 PetscErrorCode MatLUFactorNumeric_MPIDense(Mat A,MatFactorInfo *info_dummy,Mat *fact)
23 {
24   PetscFunctionBegin;
25   SETERRQ(PETSC_ERR_SUP,"No support of numerical factorization for mpidense matrix type");
26   PetscFunctionReturn(0);
27 }
28 
29 #undef __FUNCT__
30 #define __FUNCT__ "MatCholeskyFactorSymbolic_MPIDense"
31 PetscErrorCode MatCholeskyFactorSymbolic_MPIDense(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
32 {
33   PetscErrorCode ierr;
34 
35   PetscFunctionBegin;
36   ierr = MatDuplicate(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr);
37   PetscFunctionReturn(0);
38 }
39 
40 #undef __FUNCT__
41 #define __FUNCT__ "MatCholeskyFactorNumeric_MPIDense"
42 PetscErrorCode MatCholeskyFactorNumeric_MPIDense(Mat A,MatFactorInfo *info_dummy,Mat *fact)
43 {
44   PetscFunctionBegin;
45   SETERRQ(PETSC_ERR_SUP,"no support of numerical factorization for mpidense matrix type");
46   PetscFunctionReturn(0);
47 }
48 
49 #undef __FUNCT__
50 #define __FUNCT__ "MatDenseGetLocalMatrix"
51 /*@
52 
53       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
54               matrix that represents the operator. For sequential matrices it returns itself.
55 
56     Input Parameter:
57 .      A - the Seq or MPI dense matrix
58 
59     Output Parameter:
60 .      B - the inner matrix
61 
62     Level: intermediate
63 
64 @*/
65 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
66 {
67   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
68   PetscErrorCode ierr;
69   PetscTruth     flg;
70   PetscMPIInt    size;
71 
72   PetscFunctionBegin;
73   ierr = PetscTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
74   if (!flg) {  /* this check sucks! */
75     ierr = PetscTypeCompare((PetscObject)A,MATDENSE,&flg);CHKERRQ(ierr);
76     if (flg) {
77       ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
78       if (size == 1) flg = PETSC_FALSE;
79     }
80   }
81   if (flg) {
82     *B = mat->A;
83   } else {
84     *B = A;
85   }
86   PetscFunctionReturn(0);
87 }
88 
89 #undef __FUNCT__
90 #define __FUNCT__ "MatGetRow_MPIDense"
91 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
92 {
93   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
94   PetscErrorCode ierr;
95   PetscInt       lrow,rstart = mat->rstart,rend = mat->rend;
96 
97   PetscFunctionBegin;
98   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows")
99   lrow = row - rstart;
100   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr);
101   PetscFunctionReturn(0);
102 }
103 
104 #undef __FUNCT__
105 #define __FUNCT__ "MatRestoreRow_MPIDense"
106 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
107 {
108   PetscErrorCode ierr;
109 
110   PetscFunctionBegin;
111   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
112   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
113   PetscFunctionReturn(0);
114 }
115 
116 EXTERN_C_BEGIN
117 #undef __FUNCT__
118 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense"
119 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B)
120 {
121   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
122   PetscErrorCode ierr;
123   PetscInt       m = A->m,rstart = mdn->rstart;
124   PetscScalar    *array;
125   MPI_Comm       comm;
126 
127   PetscFunctionBegin;
128   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported.");
129 
130   /* The reuse aspect is not implemented efficiently */
131   if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);}
132 
133   ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
134   ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr);
135   ierr = MatCreate(comm,B);CHKERRQ(ierr);
136   ierr = MatSetSizes(*B,m,m,m,m);CHKERRQ(ierr);
137   ierr = MatSetType(*B,mdn->A->type_name);CHKERRQ(ierr);
138   ierr = MatSeqDenseSetPreallocation(*B,array+m*rstart);CHKERRQ(ierr);
139   ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr);
140   ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
141   ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
142 
143   *iscopy = PETSC_TRUE;
144   PetscFunctionReturn(0);
145 }
146 EXTERN_C_END
147 
148 #undef __FUNCT__
149 #define __FUNCT__ "MatSetValues_MPIDense"
150 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
151 {
152   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
153   PetscErrorCode ierr;
154   PetscInt       i,j,rstart = A->rstart,rend = A->rend,row;
155   PetscTruth     roworiented = A->roworiented;
156 
157   PetscFunctionBegin;
158   for (i=0; i<m; i++) {
159     if (idxm[i] < 0) continue;
160     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
161     if (idxm[i] >= rstart && idxm[i] < rend) {
162       row = idxm[i] - rstart;
163       if (roworiented) {
164         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
165       } else {
166         for (j=0; j<n; j++) {
167           if (idxn[j] < 0) continue;
168           if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
169           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
170         }
171       }
172     } else {
173       if (!A->donotstash) {
174         if (roworiented) {
175           ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr);
176         } else {
177           ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr);
178         }
179       }
180     }
181   }
182   PetscFunctionReturn(0);
183 }
184 
185 #undef __FUNCT__
186 #define __FUNCT__ "MatGetValues_MPIDense"
187 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
188 {
189   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
190   PetscErrorCode ierr;
191   PetscInt       i,j,rstart = mdn->rstart,rend = mdn->rend,row;
192 
193   PetscFunctionBegin;
194   for (i=0; i<m; i++) {
195     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
196     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
197     if (idxm[i] >= rstart && idxm[i] < rend) {
198       row = idxm[i] - rstart;
199       for (j=0; j<n; j++) {
200         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
201         if (idxn[j] >= mat->N) {
202           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
203         }
204         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
205       }
206     } else {
207       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
208     }
209   }
210   PetscFunctionReturn(0);
211 }
212 
213 #undef __FUNCT__
214 #define __FUNCT__ "MatGetArray_MPIDense"
215 PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[])
216 {
217   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
218   PetscErrorCode ierr;
219 
220   PetscFunctionBegin;
221   ierr = MatGetArray(a->A,array);CHKERRQ(ierr);
222   PetscFunctionReturn(0);
223 }
224 
225 #undef __FUNCT__
226 #define __FUNCT__ "MatGetSubMatrix_MPIDense"
227 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B)
228 {
229   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data,*newmatd;
230   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
231   PetscErrorCode ierr;
232   PetscInt       i,j,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols;
233   PetscScalar    *av,*bv,*v = lmat->v;
234   Mat            newmat;
235 
236   PetscFunctionBegin;
237   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
238   ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr);
239   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
240   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
241 
242   /* No parallel redistribution currently supported! Should really check each index set
243      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
244      original matrix! */
245 
246   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
247   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
248 
249   /* Check submatrix call */
250   if (scall == MAT_REUSE_MATRIX) {
251     /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
252     /* Really need to test rows and column sizes! */
253     newmat = *B;
254   } else {
255     /* Create and fill new matrix */
256     ierr = MatCreate(A->comm,&newmat);CHKERRQ(ierr);
257     ierr = MatSetSizes(newmat,nrows,cs,PETSC_DECIDE,ncols);CHKERRQ(ierr);
258     ierr = MatSetType(newmat,A->type_name);CHKERRQ(ierr);
259     ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr);
260   }
261 
262   /* Now extract the data pointers and do the copy, column at a time */
263   newmatd = (Mat_MPIDense*)newmat->data;
264   bv      = ((Mat_SeqDense *)newmatd->A->data)->v;
265 
266   for (i=0; i<ncols; i++) {
267     av = v + nlrows*icol[i];
268     for (j=0; j<nrows; j++) {
269       *bv++ = av[irow[j] - rstart];
270     }
271   }
272 
273   /* Assemble the matrices so that the correct flags are set */
274   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
275   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
276 
277   /* Free work space */
278   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
279   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
280   *B = newmat;
281   PetscFunctionReturn(0);
282 }
283 
284 #undef __FUNCT__
285 #define __FUNCT__ "MatRestoreArray_MPIDense"
286 PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[])
287 {
288   PetscFunctionBegin;
289   PetscFunctionReturn(0);
290 }
291 
292 #undef __FUNCT__
293 #define __FUNCT__ "MatAssemblyBegin_MPIDense"
294 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
295 {
296   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
297   MPI_Comm       comm = mat->comm;
298   PetscErrorCode ierr;
299   PetscInt       nstash,reallocs;
300   InsertMode     addv;
301 
302   PetscFunctionBegin;
303   /* make sure all processors are either in INSERTMODE or ADDMODE */
304   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
305   if (addv == (ADD_VALUES|INSERT_VALUES)) {
306     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
307   }
308   mat->insertmode = addv; /* in case this processor had no cache */
309 
310   ierr = MatStashScatterBegin_Private(&mat->stash,mdn->rowners);CHKERRQ(ierr);
311   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
312   ierr = PetscLogInfo((mdn->A,"MatAssemblyBegin_MPIDense:Stash has %D entries, uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr);
313   PetscFunctionReturn(0);
314 }
315 
316 #undef __FUNCT__
317 #define __FUNCT__ "MatAssemblyEnd_MPIDense"
318 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
319 {
320   Mat_MPIDense    *mdn=(Mat_MPIDense*)mat->data;
321   PetscErrorCode  ierr;
322   PetscInt        i,*row,*col,flg,j,rstart,ncols;
323   PetscMPIInt     n;
324   PetscScalar     *val;
325   InsertMode      addv=mat->insertmode;
326 
327   PetscFunctionBegin;
328   /*  wait on receives */
329   while (1) {
330     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
331     if (!flg) break;
332 
333     for (i=0; i<n;) {
334       /* Now identify the consecutive vals belonging to the same row */
335       for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
336       if (j < n) ncols = j-i;
337       else       ncols = n-i;
338       /* Now assemble all these values with a single function call */
339       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
340       i = j;
341     }
342   }
343   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
344 
345   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
346   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
347 
348   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
349     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
350   }
351   PetscFunctionReturn(0);
352 }
353 
354 #undef __FUNCT__
355 #define __FUNCT__ "MatZeroEntries_MPIDense"
356 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
357 {
358   PetscErrorCode ierr;
359   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
360 
361   PetscFunctionBegin;
362   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
363   PetscFunctionReturn(0);
364 }
365 
366 /* the code does not do the diagonal entries correctly unless the
367    matrix is square and the column and row owerships are identical.
368    This is a BUG. The only way to fix it seems to be to access
369    mdn->A and mdn->B directly and not through the MatZeroRows()
370    routine.
371 */
372 #undef __FUNCT__
373 #define __FUNCT__ "MatZeroRows_MPIDense"
374 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
375 {
376   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
377   PetscErrorCode ierr;
378   PetscInt       i,*owners = l->rowners;
379   PetscInt       *nprocs,j,idx,nsends;
380   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
381   PetscInt       *rvalues,tag = A->tag,count,base,slen,*source;
382   PetscInt       *lens,*lrows,*values;
383   PetscMPIInt    n,imdex,rank = l->rank,size = l->size;
384   MPI_Comm       comm = A->comm;
385   MPI_Request    *send_waits,*recv_waits;
386   MPI_Status     recv_status,*send_status;
387   PetscTruth     found;
388 
389   PetscFunctionBegin;
390   /*  first count number of contributors to each processor */
391   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
392   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
393   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
394   for (i=0; i<N; i++) {
395     idx = rows[i];
396     found = PETSC_FALSE;
397     for (j=0; j<size; j++) {
398       if (idx >= owners[j] && idx < owners[j+1]) {
399         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
400       }
401     }
402     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
403   }
404   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
405 
406   /* inform other processors of number of messages and max length*/
407   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
408 
409   /* post receives:   */
410   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
411   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
412   for (i=0; i<nrecvs; i++) {
413     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
414   }
415 
416   /* do sends:
417       1) starts[i] gives the starting index in svalues for stuff going to
418          the ith processor
419   */
420   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
421   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
422   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
423   starts[0]  = 0;
424   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
425   for (i=0; i<N; i++) {
426     svalues[starts[owner[i]]++] = rows[i];
427   }
428 
429   starts[0] = 0;
430   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
431   count = 0;
432   for (i=0; i<size; i++) {
433     if (nprocs[2*i+1]) {
434       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
435     }
436   }
437   ierr = PetscFree(starts);CHKERRQ(ierr);
438 
439   base = owners[rank];
440 
441   /*  wait on receives */
442   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
443   source = lens + nrecvs;
444   count  = nrecvs; slen = 0;
445   while (count) {
446     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
447     /* unpack receives into our local space */
448     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
449     source[imdex]  = recv_status.MPI_SOURCE;
450     lens[imdex]    = n;
451     slen += n;
452     count--;
453   }
454   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
455 
456   /* move the data into the send scatter */
457   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
458   count = 0;
459   for (i=0; i<nrecvs; i++) {
460     values = rvalues + i*nmax;
461     for (j=0; j<lens[i]; j++) {
462       lrows[count++] = values[j] - base;
463     }
464   }
465   ierr = PetscFree(rvalues);CHKERRQ(ierr);
466   ierr = PetscFree(lens);CHKERRQ(ierr);
467   ierr = PetscFree(owner);CHKERRQ(ierr);
468   ierr = PetscFree(nprocs);CHKERRQ(ierr);
469 
470   /* actually zap the local rows */
471   ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
472   ierr = PetscFree(lrows);CHKERRQ(ierr);
473 
474   /* wait on sends */
475   if (nsends) {
476     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
477     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
478     ierr = PetscFree(send_status);CHKERRQ(ierr);
479   }
480   ierr = PetscFree(send_waits);CHKERRQ(ierr);
481   ierr = PetscFree(svalues);CHKERRQ(ierr);
482 
483   PetscFunctionReturn(0);
484 }
485 
486 #undef __FUNCT__
487 #define __FUNCT__ "MatMult_MPIDense"
488 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
489 {
490   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
491   PetscErrorCode ierr;
492 
493   PetscFunctionBegin;
494   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
495   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
496   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
497   PetscFunctionReturn(0);
498 }
499 
500 #undef __FUNCT__
501 #define __FUNCT__ "MatMultAdd_MPIDense"
502 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
503 {
504   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
505   PetscErrorCode ierr;
506 
507   PetscFunctionBegin;
508   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
509   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
510   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
511   PetscFunctionReturn(0);
512 }
513 
514 #undef __FUNCT__
515 #define __FUNCT__ "MatMultTranspose_MPIDense"
516 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
517 {
518   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
519   PetscErrorCode ierr;
520   PetscScalar    zero = 0.0;
521 
522   PetscFunctionBegin;
523   ierr = VecSet(yy,zero);CHKERRQ(ierr);
524   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
525   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
526   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
527   PetscFunctionReturn(0);
528 }
529 
530 #undef __FUNCT__
531 #define __FUNCT__ "MatMultTransposeAdd_MPIDense"
532 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
533 {
534   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
535   PetscErrorCode ierr;
536 
537   PetscFunctionBegin;
538   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
539   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
540   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
541   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
542   PetscFunctionReturn(0);
543 }
544 
545 #undef __FUNCT__
546 #define __FUNCT__ "MatGetDiagonal_MPIDense"
547 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
548 {
549   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
550   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
551   PetscErrorCode ierr;
552   PetscInt       len,i,n,m = A->m,radd;
553   PetscScalar    *x,zero = 0.0;
554 
555   PetscFunctionBegin;
556   ierr = VecSet(v,zero);CHKERRQ(ierr);
557   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
558   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
559   if (n != A->M) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
560   len  = PetscMin(a->A->m,a->A->n);
561   radd = a->rstart*m;
562   for (i=0; i<len; i++) {
563     x[i] = aloc->v[radd + i*m + i];
564   }
565   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
566   PetscFunctionReturn(0);
567 }
568 
569 #undef __FUNCT__
570 #define __FUNCT__ "MatDestroy_MPIDense"
571 PetscErrorCode MatDestroy_MPIDense(Mat mat)
572 {
573   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
574   PetscErrorCode ierr;
575 
576   PetscFunctionBegin;
577 
578 #if defined(PETSC_USE_LOG)
579   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->M,mat->N);
580 #endif
581   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
582   ierr = PetscFree(mdn->rowners);CHKERRQ(ierr);
583   ierr = MatDestroy(mdn->A);CHKERRQ(ierr);
584   if (mdn->lvec)   VecDestroy(mdn->lvec);
585   if (mdn->Mvctx)  VecScatterDestroy(mdn->Mvctx);
586   if (mdn->factor) {
587     if (mdn->factor->temp)   {ierr = PetscFree(mdn->factor->temp);CHKERRQ(ierr);}
588     if (mdn->factor->tag)    {ierr = PetscFree(mdn->factor->tag);CHKERRQ(ierr);}
589     if (mdn->factor->pivots) {ierr = PetscFree(mdn->factor->pivots);CHKERRQ(ierr);}
590     ierr = PetscFree(mdn->factor);CHKERRQ(ierr);
591   }
592   ierr = PetscFree(mdn);CHKERRQ(ierr);
593   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
594   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
595   PetscFunctionReturn(0);
596 }
597 
598 #undef __FUNCT__
599 #define __FUNCT__ "MatView_MPIDense_Binary"
600 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
601 {
602   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
603   PetscErrorCode ierr;
604 
605   PetscFunctionBegin;
606   if (mdn->size == 1) {
607     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
608   }
609   else SETERRQ(PETSC_ERR_SUP,"Only uniprocessor output supported");
610   PetscFunctionReturn(0);
611 }
612 
613 #undef __FUNCT__
614 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket"
615 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
616 {
617   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
618   PetscErrorCode    ierr;
619   PetscMPIInt       size = mdn->size,rank = mdn->rank;
620   PetscViewerType   vtype;
621   PetscTruth        iascii,isdraw;
622   PetscViewer       sviewer;
623   PetscViewerFormat format;
624 
625   PetscFunctionBegin;
626   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
627   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
628   if (iascii) {
629     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
630     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
631     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
632       MatInfo info;
633       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
634       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->m,
635                    (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
636       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
637       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
638       PetscFunctionReturn(0);
639     } else if (format == PETSC_VIEWER_ASCII_INFO) {
640       PetscFunctionReturn(0);
641     }
642   } else if (isdraw) {
643     PetscDraw       draw;
644     PetscTruth isnull;
645 
646     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
647     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
648     if (isnull) PetscFunctionReturn(0);
649   }
650 
651   if (size == 1) {
652     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
653   } else {
654     /* assemble the entire matrix onto first processor. */
655     Mat         A;
656     PetscInt    M = mat->M,N = mat->N,m,row,i,nz;
657     PetscInt    *cols;
658     PetscScalar *vals;
659 
660     ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr);
661     if (!rank) {
662       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
663     } else {
664       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
665     }
666     /* Since this is a temporary matrix, MATMPIDENSE instead of A->type_name here is probably acceptable. */
667     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
668     ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL);
669     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
670 
671     /* Copy the matrix ... This isn't the most efficient means,
672        but it's quick for now */
673     A->insertmode = INSERT_VALUES;
674     row = mdn->rstart; m = mdn->A->m;
675     for (i=0; i<m; i++) {
676       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
677       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
678       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
679       row++;
680     }
681 
682     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
683     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
684     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
685     if (!rank) {
686       ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
687     }
688     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
689     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
690     ierr = MatDestroy(A);CHKERRQ(ierr);
691   }
692   PetscFunctionReturn(0);
693 }
694 
695 #undef __FUNCT__
696 #define __FUNCT__ "MatView_MPIDense"
697 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
698 {
699   PetscErrorCode ierr;
700   PetscTruth     iascii,isbinary,isdraw,issocket;
701 
702   PetscFunctionBegin;
703 
704   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
705   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
706   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
707   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
708 
709   if (iascii || issocket || isdraw) {
710     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
711   } else if (isbinary) {
712     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
713   } else {
714     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name);
715   }
716   PetscFunctionReturn(0);
717 }
718 
719 #undef __FUNCT__
720 #define __FUNCT__ "MatGetInfo_MPIDense"
721 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
722 {
723   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
724   Mat            mdn = mat->A;
725   PetscErrorCode ierr;
726   PetscReal      isend[5],irecv[5];
727 
728   PetscFunctionBegin;
729   info->rows_global    = (double)A->M;
730   info->columns_global = (double)A->N;
731   info->rows_local     = (double)A->m;
732   info->columns_local  = (double)A->N;
733   info->block_size     = 1.0;
734   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
735   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
736   isend[3] = info->memory;  isend[4] = info->mallocs;
737   if (flag == MAT_LOCAL) {
738     info->nz_used      = isend[0];
739     info->nz_allocated = isend[1];
740     info->nz_unneeded  = isend[2];
741     info->memory       = isend[3];
742     info->mallocs      = isend[4];
743   } else if (flag == MAT_GLOBAL_MAX) {
744     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr);
745     info->nz_used      = irecv[0];
746     info->nz_allocated = irecv[1];
747     info->nz_unneeded  = irecv[2];
748     info->memory       = irecv[3];
749     info->mallocs      = irecv[4];
750   } else if (flag == MAT_GLOBAL_SUM) {
751     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr);
752     info->nz_used      = irecv[0];
753     info->nz_allocated = irecv[1];
754     info->nz_unneeded  = irecv[2];
755     info->memory       = irecv[3];
756     info->mallocs      = irecv[4];
757   }
758   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
759   info->fill_ratio_needed = 0;
760   info->factor_mallocs    = 0;
761   PetscFunctionReturn(0);
762 }
763 
764 #undef __FUNCT__
765 #define __FUNCT__ "MatSetOption_MPIDense"
766 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op)
767 {
768   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
769   PetscErrorCode ierr;
770 
771   PetscFunctionBegin;
772   switch (op) {
773   case MAT_NO_NEW_NONZERO_LOCATIONS:
774   case MAT_YES_NEW_NONZERO_LOCATIONS:
775   case MAT_NEW_NONZERO_LOCATION_ERR:
776   case MAT_NEW_NONZERO_ALLOCATION_ERR:
777   case MAT_COLUMNS_SORTED:
778   case MAT_COLUMNS_UNSORTED:
779     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
780     break;
781   case MAT_ROW_ORIENTED:
782     a->roworiented = PETSC_TRUE;
783     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
784     break;
785   case MAT_ROWS_SORTED:
786   case MAT_ROWS_UNSORTED:
787   case MAT_YES_NEW_DIAGONALS:
788   case MAT_USE_HASH_TABLE:
789     ierr = PetscLogInfo((A,"MatSetOption_MPIDense:Option ignored\n"));CHKERRQ(ierr);
790     break;
791   case MAT_COLUMN_ORIENTED:
792     a->roworiented = PETSC_FALSE;
793     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
794     break;
795   case MAT_IGNORE_OFF_PROC_ENTRIES:
796     a->donotstash = PETSC_TRUE;
797     break;
798   case MAT_NO_NEW_DIAGONALS:
799     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
800   case MAT_SYMMETRIC:
801   case MAT_STRUCTURALLY_SYMMETRIC:
802   case MAT_NOT_SYMMETRIC:
803   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
804   case MAT_HERMITIAN:
805   case MAT_NOT_HERMITIAN:
806   case MAT_SYMMETRY_ETERNAL:
807   case MAT_NOT_SYMMETRY_ETERNAL:
808     break;
809   default:
810     SETERRQ(PETSC_ERR_SUP,"unknown option");
811   }
812   PetscFunctionReturn(0);
813 }
814 
815 
816 #undef __FUNCT__
817 #define __FUNCT__ "MatDiagonalScale_MPIDense"
818 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
819 {
820   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
821   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
822   PetscScalar    *l,*r,x,*v;
823   PetscErrorCode ierr;
824   PetscInt       i,j,s2a,s3a,s2,s3,m=mdn->A->m,n=mdn->A->n;
825 
826   PetscFunctionBegin;
827   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
828   if (ll) {
829     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
830     if (s2a != s2) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size");
831     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
832     for (i=0; i<m; i++) {
833       x = l[i];
834       v = mat->v + i;
835       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
836     }
837     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
838     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
839   }
840   if (rr) {
841     ierr = VecGetSize(rr,&s3a);CHKERRQ(ierr);
842     if (s3a != s3) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size");
843     ierr = VecScatterBegin(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
844     ierr = VecScatterEnd(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
845     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
846     for (i=0; i<n; i++) {
847       x = r[i];
848       v = mat->v + i*m;
849       for (j=0; j<m; j++) { (*v++) *= x;}
850     }
851     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
852     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
853   }
854   PetscFunctionReturn(0);
855 }
856 
857 #undef __FUNCT__
858 #define __FUNCT__ "MatNorm_MPIDense"
859 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
860 {
861   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
862   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
863   PetscErrorCode ierr;
864   PetscInt       i,j;
865   PetscReal      sum = 0.0;
866   PetscScalar    *v = mat->v;
867 
868   PetscFunctionBegin;
869   if (mdn->size == 1) {
870     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
871   } else {
872     if (type == NORM_FROBENIUS) {
873       for (i=0; i<mdn->A->n*mdn->A->m; i++) {
874 #if defined(PETSC_USE_COMPLEX)
875         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
876 #else
877         sum += (*v)*(*v); v++;
878 #endif
879       }
880       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr);
881       *nrm = sqrt(*nrm);
882       ierr = PetscLogFlops(2*mdn->A->n*mdn->A->m);CHKERRQ(ierr);
883     } else if (type == NORM_1) {
884       PetscReal *tmp,*tmp2;
885       ierr = PetscMalloc(2*A->N*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
886       tmp2 = tmp + A->N;
887       ierr = PetscMemzero(tmp,2*A->N*sizeof(PetscReal));CHKERRQ(ierr);
888       *nrm = 0.0;
889       v = mat->v;
890       for (j=0; j<mdn->A->n; j++) {
891         for (i=0; i<mdn->A->m; i++) {
892           tmp[j] += PetscAbsScalar(*v);  v++;
893         }
894       }
895       ierr = MPI_Allreduce(tmp,tmp2,A->N,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr);
896       for (j=0; j<A->N; j++) {
897         if (tmp2[j] > *nrm) *nrm = tmp2[j];
898       }
899       ierr = PetscFree(tmp);CHKERRQ(ierr);
900       ierr = PetscLogFlops(A->n*A->m);CHKERRQ(ierr);
901     } else if (type == NORM_INFINITY) { /* max row norm */
902       PetscReal ntemp;
903       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
904       ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr);
905     } else {
906       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
907     }
908   }
909   PetscFunctionReturn(0);
910 }
911 
912 #undef __FUNCT__
913 #define __FUNCT__ "MatTranspose_MPIDense"
914 PetscErrorCode MatTranspose_MPIDense(Mat A,Mat *matout)
915 {
916   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
917   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
918   Mat            B;
919   PetscInt       M = A->M,N = A->N,m,n,*rwork,rstart = a->rstart;
920   PetscErrorCode ierr;
921   PetscInt       j,i;
922   PetscScalar    *v;
923 
924   PetscFunctionBegin;
925   if (!matout && M != N) {
926     SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place");
927   }
928   ierr = MatCreate(A->comm,&B);CHKERRQ(ierr);
929   ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,N,M);CHKERRQ(ierr);
930   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
931   ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr);
932 
933   m = a->A->m; n = a->A->n; v = Aloc->v;
934   ierr = PetscMalloc(n*sizeof(PetscInt),&rwork);CHKERRQ(ierr);
935   for (j=0; j<n; j++) {
936     for (i=0; i<m; i++) rwork[i] = rstart + i;
937     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
938     v   += m;
939   }
940   ierr = PetscFree(rwork);CHKERRQ(ierr);
941   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
942   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
943   if (matout) {
944     *matout = B;
945   } else {
946     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
947   }
948   PetscFunctionReturn(0);
949 }
950 
951 #include "petscblaslapack.h"
952 #undef __FUNCT__
953 #define __FUNCT__ "MatScale_MPIDense"
954 PetscErrorCode MatScale_MPIDense(Mat inA,PetscScalar alpha)
955 {
956   Mat_MPIDense   *A = (Mat_MPIDense*)inA->data;
957   Mat_SeqDense   *a = (Mat_SeqDense*)A->A->data;
958   PetscScalar    oalpha = alpha;
959   PetscBLASInt   one = 1,nz = (PetscBLASInt)inA->m*inA->N;
960   PetscErrorCode ierr;
961 
962   PetscFunctionBegin;
963   BLASscal_(&nz,&oalpha,a->v,&one);
964   ierr = PetscLogFlops(nz);CHKERRQ(ierr);
965   PetscFunctionReturn(0);
966 }
967 
968 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
969 
970 #undef __FUNCT__
971 #define __FUNCT__ "MatSetUpPreallocation_MPIDense"
972 PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A)
973 {
974   PetscErrorCode ierr;
975 
976   PetscFunctionBegin;
977   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
978   PetscFunctionReturn(0);
979 }
980 
981 /* -------------------------------------------------------------------*/
982 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
983        MatGetRow_MPIDense,
984        MatRestoreRow_MPIDense,
985        MatMult_MPIDense,
986 /* 4*/ MatMultAdd_MPIDense,
987        MatMultTranspose_MPIDense,
988        MatMultTransposeAdd_MPIDense,
989        0,
990        0,
991        0,
992 /*10*/ 0,
993        0,
994        0,
995        0,
996        MatTranspose_MPIDense,
997 /*15*/ MatGetInfo_MPIDense,
998        0,
999        MatGetDiagonal_MPIDense,
1000        MatDiagonalScale_MPIDense,
1001        MatNorm_MPIDense,
1002 /*20*/ MatAssemblyBegin_MPIDense,
1003        MatAssemblyEnd_MPIDense,
1004        0,
1005        MatSetOption_MPIDense,
1006        MatZeroEntries_MPIDense,
1007 /*25*/ MatZeroRows_MPIDense,
1008        MatLUFactorSymbolic_MPIDense,
1009        MatLUFactorNumeric_MPIDense,
1010        MatCholeskyFactorSymbolic_MPIDense,
1011        MatCholeskyFactorNumeric_MPIDense,
1012 /*30*/ MatSetUpPreallocation_MPIDense,
1013        0,
1014        0,
1015        MatGetArray_MPIDense,
1016        MatRestoreArray_MPIDense,
1017 /*35*/ MatDuplicate_MPIDense,
1018        0,
1019        0,
1020        0,
1021        0,
1022 /*40*/ 0,
1023        MatGetSubMatrices_MPIDense,
1024        0,
1025        MatGetValues_MPIDense,
1026        0,
1027 /*45*/ 0,
1028        MatScale_MPIDense,
1029        0,
1030        0,
1031        0,
1032 /*50*/ 0,
1033        0,
1034        0,
1035        0,
1036        0,
1037 /*55*/ 0,
1038        0,
1039        0,
1040        0,
1041        0,
1042 /*60*/ MatGetSubMatrix_MPIDense,
1043        MatDestroy_MPIDense,
1044        MatView_MPIDense,
1045        MatGetPetscMaps_Petsc,
1046        0,
1047 /*65*/ 0,
1048        0,
1049        0,
1050        0,
1051        0,
1052 /*70*/ 0,
1053        0,
1054        0,
1055        0,
1056        0,
1057 /*75*/ 0,
1058        0,
1059        0,
1060        0,
1061        0,
1062 /*80*/ 0,
1063        0,
1064        0,
1065        0,
1066 /*84*/ MatLoad_MPIDense,
1067        0,
1068        0,
1069        0,
1070        0,
1071        0,
1072 /*90*/ 0,
1073        0,
1074        0,
1075        0,
1076        0,
1077 /*95*/ 0,
1078        0,
1079        0,
1080        0};
1081 
1082 EXTERN_C_BEGIN
1083 #undef __FUNCT__
1084 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense"
1085 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1086 {
1087   Mat_MPIDense   *a;
1088   PetscErrorCode ierr;
1089 
1090   PetscFunctionBegin;
1091   mat->preallocated = PETSC_TRUE;
1092   /* Note:  For now, when data is specified above, this assumes the user correctly
1093    allocates the local dense storage space.  We should add error checking. */
1094 
1095   a    = (Mat_MPIDense*)mat->data;
1096   ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1097   ierr = MatSetSizes(a->A,mat->m,mat->N,mat->m,mat->N);CHKERRQ(ierr);
1098   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
1099   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1100   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1101   PetscFunctionReturn(0);
1102 }
1103 EXTERN_C_END
1104 
1105 /*MC
1106    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1107 
1108    Options Database Keys:
1109 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions()
1110 
1111   Level: beginner
1112 
1113 .seealso: MatCreateMPIDense
1114 M*/
1115 
1116 EXTERN_C_BEGIN
1117 #undef __FUNCT__
1118 #define __FUNCT__ "MatCreate_MPIDense"
1119 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIDense(Mat mat)
1120 {
1121   Mat_MPIDense   *a;
1122   PetscErrorCode ierr;
1123   PetscInt       i;
1124 
1125   PetscFunctionBegin;
1126   ierr              = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr);
1127   mat->data         = (void*)a;
1128   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1129   mat->factor       = 0;
1130   mat->mapping      = 0;
1131 
1132   a->factor       = 0;
1133   mat->insertmode = NOT_SET_VALUES;
1134   ierr = MPI_Comm_rank(mat->comm,&a->rank);CHKERRQ(ierr);
1135   ierr = MPI_Comm_size(mat->comm,&a->size);CHKERRQ(ierr);
1136 
1137   ierr = PetscSplitOwnership(mat->comm,&mat->m,&mat->M);CHKERRQ(ierr);
1138   ierr = PetscSplitOwnership(mat->comm,&mat->n,&mat->N);CHKERRQ(ierr);
1139   a->nvec = mat->n;
1140 
1141   /* the information in the maps duplicates the information computed below, eventually
1142      we should remove the duplicate information that is not contained in the maps */
1143   ierr = PetscMapCreateMPI(mat->comm,mat->m,mat->M,&mat->rmap);CHKERRQ(ierr);
1144   ierr = PetscMapCreateMPI(mat->comm,mat->n,mat->N,&mat->cmap);CHKERRQ(ierr);
1145 
1146   /* build local table of row and column ownerships */
1147   ierr       = PetscMalloc(2*(a->size+2)*sizeof(PetscInt),&a->rowners);CHKERRQ(ierr);
1148   a->cowners = a->rowners + a->size + 1;
1149   ierr = PetscLogObjectMemory(mat,2*(a->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));CHKERRQ(ierr);
1150   ierr = MPI_Allgather(&mat->m,1,MPIU_INT,a->rowners+1,1,MPIU_INT,mat->comm);CHKERRQ(ierr);
1151   a->rowners[0] = 0;
1152   for (i=2; i<=a->size; i++) {
1153     a->rowners[i] += a->rowners[i-1];
1154   }
1155   a->rstart = a->rowners[a->rank];
1156   a->rend   = a->rowners[a->rank+1];
1157   ierr      = MPI_Allgather(&mat->n,1,MPIU_INT,a->cowners+1,1,MPIU_INT,mat->comm);CHKERRQ(ierr);
1158   a->cowners[0] = 0;
1159   for (i=2; i<=a->size; i++) {
1160     a->cowners[i] += a->cowners[i-1];
1161   }
1162 
1163   /* build cache for off array entries formed */
1164   a->donotstash = PETSC_FALSE;
1165   ierr = MatStashCreate_Private(mat->comm,1,&mat->stash);CHKERRQ(ierr);
1166 
1167   /* stuff used for matrix vector multiply */
1168   a->lvec        = 0;
1169   a->Mvctx       = 0;
1170   a->roworiented = PETSC_TRUE;
1171 
1172   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
1173                                      "MatGetDiagonalBlock_MPIDense",
1174                                      MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr);
1175   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C",
1176                                      "MatMPIDenseSetPreallocation_MPIDense",
1177                                      MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1178   PetscFunctionReturn(0);
1179 }
1180 EXTERN_C_END
1181 
1182 /*MC
1183    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1184 
1185    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1186    and MATMPIDENSE otherwise.
1187 
1188    Options Database Keys:
1189 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1190 
1191   Level: beginner
1192 
1193 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1194 M*/
1195 
1196 EXTERN_C_BEGIN
1197 #undef __FUNCT__
1198 #define __FUNCT__ "MatCreate_Dense"
1199 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_Dense(Mat A)
1200 {
1201   PetscErrorCode ierr;
1202   PetscMPIInt    size;
1203 
1204   PetscFunctionBegin;
1205   ierr = PetscObjectChangeTypeName((PetscObject)A,MATDENSE);CHKERRQ(ierr);
1206   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1207   if (size == 1) {
1208     ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr);
1209   } else {
1210     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
1211   }
1212   PetscFunctionReturn(0);
1213 }
1214 EXTERN_C_END
1215 
1216 #undef __FUNCT__
1217 #define __FUNCT__ "MatMPIDenseSetPreallocation"
1218 /*@C
1219    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1220 
1221    Not collective
1222 
1223    Input Parameters:
1224 .  A - the matrix
1225 -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
1226    to control all matrix memory allocation.
1227 
1228    Notes:
1229    The dense format is fully compatible with standard Fortran 77
1230    storage by columns.
1231 
1232    The data input variable is intended primarily for Fortran programmers
1233    who wish to allocate their own matrix memory space.  Most users should
1234    set data=PETSC_NULL.
1235 
1236    Level: intermediate
1237 
1238 .keywords: matrix,dense, parallel
1239 
1240 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1241 @*/
1242 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data)
1243 {
1244   PetscErrorCode ierr,(*f)(Mat,PetscScalar *);
1245 
1246   PetscFunctionBegin;
1247   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1248   if (f) {
1249     ierr = (*f)(mat,data);CHKERRQ(ierr);
1250   }
1251   PetscFunctionReturn(0);
1252 }
1253 
1254 #undef __FUNCT__
1255 #define __FUNCT__ "MatCreateMPIDense"
1256 /*@C
1257    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
1258 
1259    Collective on MPI_Comm
1260 
1261    Input Parameters:
1262 +  comm - MPI communicator
1263 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1264 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1265 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1266 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1267 -  data - optional location of matrix data.  Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1268    to control all matrix memory allocation.
1269 
1270    Output Parameter:
1271 .  A - the matrix
1272 
1273    Notes:
1274    The dense format is fully compatible with standard Fortran 77
1275    storage by columns.
1276 
1277    The data input variable is intended primarily for Fortran programmers
1278    who wish to allocate their own matrix memory space.  Most users should
1279    set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users).
1280 
1281    The user MUST specify either the local or global matrix dimensions
1282    (possibly both).
1283 
1284    Level: intermediate
1285 
1286 .keywords: matrix,dense, parallel
1287 
1288 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1289 @*/
1290 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1291 {
1292   PetscErrorCode ierr;
1293   PetscMPIInt    size;
1294 
1295   PetscFunctionBegin;
1296   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1297   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1298   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1299   if (size > 1) {
1300     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1301     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1302   } else {
1303     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1304     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1305   }
1306   PetscFunctionReturn(0);
1307 }
1308 
1309 #undef __FUNCT__
1310 #define __FUNCT__ "MatDuplicate_MPIDense"
1311 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1312 {
1313   Mat            mat;
1314   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
1315   PetscErrorCode ierr;
1316 
1317   PetscFunctionBegin;
1318   *newmat       = 0;
1319   ierr = MatCreate(A->comm,&mat);CHKERRQ(ierr);
1320   ierr = MatSetSizes(mat,A->m,A->n,A->M,A->N);CHKERRQ(ierr);
1321   ierr = MatSetType(mat,A->type_name);CHKERRQ(ierr);
1322   a                 = (Mat_MPIDense*)mat->data;
1323   ierr              = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1324   mat->factor       = A->factor;
1325   mat->assembled    = PETSC_TRUE;
1326   mat->preallocated = PETSC_TRUE;
1327 
1328   a->rstart       = oldmat->rstart;
1329   a->rend         = oldmat->rend;
1330   a->size         = oldmat->size;
1331   a->rank         = oldmat->rank;
1332   mat->insertmode = NOT_SET_VALUES;
1333   a->nvec         = oldmat->nvec;
1334   a->donotstash   = oldmat->donotstash;
1335 
1336   ierr = PetscLogObjectMemory(mat,(a->size+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));CHKERRQ(ierr);
1337   ierr = PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
1338   ierr = MatStashCreate_Private(A->comm,1,&mat->stash);CHKERRQ(ierr);
1339 
1340   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1341   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1342   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1343   *newmat = mat;
1344   PetscFunctionReturn(0);
1345 }
1346 
1347 #include "petscsys.h"
1348 
1349 #undef __FUNCT__
1350 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile"
1351 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N, MatType type,Mat *newmat)
1352 {
1353   PetscErrorCode ierr;
1354   PetscMPIInt    rank,size;
1355   PetscInt       *rowners,i,m,nz,j;
1356   PetscScalar    *array,*vals,*vals_ptr;
1357   MPI_Status     status;
1358 
1359   PetscFunctionBegin;
1360   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1361   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1362 
1363   /* determine ownership of all rows */
1364   m          = M/size + ((M % size) > rank);
1365   ierr       = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1366   ierr       = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
1367   rowners[0] = 0;
1368   for (i=2; i<=size; i++) {
1369     rowners[i] += rowners[i-1];
1370   }
1371 
1372   ierr = MatCreate(comm,newmat);CHKERRQ(ierr);
1373   ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1374   ierr = MatSetType(*newmat,type);CHKERRQ(ierr);
1375   ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr);
1376   ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr);
1377 
1378   if (!rank) {
1379     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1380 
1381     /* read in my part of the matrix numerical values  */
1382     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1383 
1384     /* insert into matrix-by row (this is why cannot directly read into array */
1385     vals_ptr = vals;
1386     for (i=0; i<m; i++) {
1387       for (j=0; j<N; j++) {
1388         array[i + j*m] = *vals_ptr++;
1389       }
1390     }
1391 
1392     /* read in other processors and ship out */
1393     for (i=1; i<size; i++) {
1394       nz   = (rowners[i+1] - rowners[i])*N;
1395       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1396       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr);
1397     }
1398   } else {
1399     /* receive numeric values */
1400     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1401 
1402     /* receive message of values*/
1403     ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr);
1404 
1405     /* insert into matrix-by row (this is why cannot directly read into array */
1406     vals_ptr = vals;
1407     for (i=0; i<m; i++) {
1408       for (j=0; j<N; j++) {
1409         array[i + j*m] = *vals_ptr++;
1410       }
1411     }
1412   }
1413   ierr = PetscFree(rowners);CHKERRQ(ierr);
1414   ierr = PetscFree(vals);CHKERRQ(ierr);
1415   ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1416   ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1417   PetscFunctionReturn(0);
1418 }
1419 
1420 #undef __FUNCT__
1421 #define __FUNCT__ "MatLoad_MPIDense"
1422 PetscErrorCode MatLoad_MPIDense(PetscViewer viewer, MatType type,Mat *newmat)
1423 {
1424   Mat            A;
1425   PetscScalar    *vals,*svals;
1426   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1427   MPI_Status     status;
1428   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz;
1429   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1430   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1431   PetscInt       i,nz,j,rstart,rend;
1432   int            fd;
1433   PetscErrorCode ierr;
1434 
1435   PetscFunctionBegin;
1436   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1437   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1438   if (!rank) {
1439     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1440     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1441     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1442   }
1443 
1444   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1445   M = header[1]; N = header[2]; nz = header[3];
1446 
1447   /*
1448        Handle case where matrix is stored on disk as a dense matrix
1449   */
1450   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1451     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,type,newmat);CHKERRQ(ierr);
1452     PetscFunctionReturn(0);
1453   }
1454 
1455   /* determine ownership of all rows */
1456   m          = M/size + ((M % size) > rank);
1457   ierr       = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1458   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1459   rowners[0] = 0;
1460   for (i=2; i<=size; i++) {
1461     rowners[i] += rowners[i-1];
1462   }
1463   rstart = rowners[rank];
1464   rend   = rowners[rank+1];
1465 
1466   /* distribute row lengths to all processors */
1467   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&ourlens);CHKERRQ(ierr);
1468   offlens = ourlens + (rend-rstart);
1469   if (!rank) {
1470     ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
1471     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1472     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
1473     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1474     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1475     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1476   } else {
1477     ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1478   }
1479 
1480   if (!rank) {
1481     /* calculate the number of nonzeros on each processor */
1482     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
1483     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1484     for (i=0; i<size; i++) {
1485       for (j=rowners[i]; j< rowners[i+1]; j++) {
1486         procsnz[i] += rowlengths[j];
1487       }
1488     }
1489     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1490 
1491     /* determine max buffer needed and allocate it */
1492     maxnz = 0;
1493     for (i=0; i<size; i++) {
1494       maxnz = PetscMax(maxnz,procsnz[i]);
1495     }
1496     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1497 
1498     /* read in my part of the matrix column indices  */
1499     nz = procsnz[0];
1500     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1501     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1502 
1503     /* read in every one elses and ship off */
1504     for (i=1; i<size; i++) {
1505       nz   = procsnz[i];
1506       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1507       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1508     }
1509     ierr = PetscFree(cols);CHKERRQ(ierr);
1510   } else {
1511     /* determine buffer space needed for message */
1512     nz = 0;
1513     for (i=0; i<m; i++) {
1514       nz += ourlens[i];
1515     }
1516     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1517 
1518     /* receive message of column indices*/
1519     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1520     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1521     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1522   }
1523 
1524   /* loop over local rows, determining number of off diagonal entries */
1525   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
1526   jj = 0;
1527   for (i=0; i<m; i++) {
1528     for (j=0; j<ourlens[i]; j++) {
1529       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1530       jj++;
1531     }
1532   }
1533 
1534   /* create our matrix */
1535   for (i=0; i<m; i++) {
1536     ourlens[i] -= offlens[i];
1537   }
1538   ierr = MatCreate(comm,newmat);CHKERRQ(ierr);
1539   ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1540   ierr = MatSetType(*newmat,type);CHKERRQ(ierr);
1541   ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr);
1542   A = *newmat;
1543   for (i=0; i<m; i++) {
1544     ourlens[i] += offlens[i];
1545   }
1546 
1547   if (!rank) {
1548     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1549 
1550     /* read in my part of the matrix numerical values  */
1551     nz = procsnz[0];
1552     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1553 
1554     /* insert into matrix */
1555     jj      = rstart;
1556     smycols = mycols;
1557     svals   = vals;
1558     for (i=0; i<m; i++) {
1559       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1560       smycols += ourlens[i];
1561       svals   += ourlens[i];
1562       jj++;
1563     }
1564 
1565     /* read in other processors and ship out */
1566     for (i=1; i<size; i++) {
1567       nz   = procsnz[i];
1568       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1569       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1570     }
1571     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1572   } else {
1573     /* receive numeric values */
1574     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1575 
1576     /* receive message of values*/
1577     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1578     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1579     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1580 
1581     /* insert into matrix */
1582     jj      = rstart;
1583     smycols = mycols;
1584     svals   = vals;
1585     for (i=0; i<m; i++) {
1586       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1587       smycols += ourlens[i];
1588       svals   += ourlens[i];
1589       jj++;
1590     }
1591   }
1592   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1593   ierr = PetscFree(vals);CHKERRQ(ierr);
1594   ierr = PetscFree(mycols);CHKERRQ(ierr);
1595   ierr = PetscFree(rowners);CHKERRQ(ierr);
1596 
1597   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1598   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1599   PetscFunctionReturn(0);
1600 }
1601 
1602 
1603 
1604 
1605 
1606