xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision f68b968ce39302dfa79eb1a6cfa1998ce074e829)
1 #define PETSCMAT_DLL
2 
3 /*
4    Basic functions for basic parallel dense matrices.
5 */
6 
7 #include "src/mat/impls/dense/mpi/mpidense.h"    /*I   "petscmat.h"  I*/
8 
9 #undef __FUNCT__
10 #define __FUNCT__ "MatLUFactorSymbolic_MPIDense"
11 PetscErrorCode MatLUFactorSymbolic_MPIDense(Mat A,IS row,IS col,MatFactorInfo *info,Mat *fact)
12 {
13   PetscErrorCode ierr;
14 
15   PetscFunctionBegin;
16   ierr = MatDuplicate(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr);
17   PetscFunctionReturn(0);
18 }
19 
20 #undef __FUNCT__
21 #define __FUNCT__ "MatCholeskyFactorSymbolic_MPIDense"
22 PetscErrorCode MatCholeskyFactorSymbolic_MPIDense(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
23 {
24   PetscErrorCode ierr;
25 
26   PetscFunctionBegin;
27   ierr = MatDuplicate(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr);
28   PetscFunctionReturn(0);
29 }
30 
31 #undef __FUNCT__
32 #define __FUNCT__ "MatDenseGetLocalMatrix"
33 /*@
34 
35       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
36               matrix that represents the operator. For sequential matrices it returns itself.
37 
38     Input Parameter:
39 .      A - the Seq or MPI dense matrix
40 
41     Output Parameter:
42 .      B - the inner matrix
43 
44     Level: intermediate
45 
46 @*/
47 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
48 {
49   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
50   PetscErrorCode ierr;
51   PetscTruth     flg;
52   PetscMPIInt    size;
53 
54   PetscFunctionBegin;
55   ierr = PetscTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
56   if (!flg) {  /* this check sucks! */
57     ierr = PetscTypeCompare((PetscObject)A,MATDENSE,&flg);CHKERRQ(ierr);
58     if (flg) {
59       ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
60       if (size == 1) flg = PETSC_FALSE;
61     }
62   }
63   if (flg) {
64     *B = mat->A;
65   } else {
66     *B = A;
67   }
68   PetscFunctionReturn(0);
69 }
70 
71 #undef __FUNCT__
72 #define __FUNCT__ "MatGetRow_MPIDense"
73 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
74 {
75   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
76   PetscErrorCode ierr;
77   PetscInt       lrow,rstart = mat->rstart,rend = mat->rend;
78 
79   PetscFunctionBegin;
80   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows")
81   lrow = row - rstart;
82   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr);
83   PetscFunctionReturn(0);
84 }
85 
86 #undef __FUNCT__
87 #define __FUNCT__ "MatRestoreRow_MPIDense"
88 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
89 {
90   PetscErrorCode ierr;
91 
92   PetscFunctionBegin;
93   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
94   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
95   PetscFunctionReturn(0);
96 }
97 
98 EXTERN_C_BEGIN
99 #undef __FUNCT__
100 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense"
101 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B)
102 {
103   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
104   PetscErrorCode ierr;
105   PetscInt       m = A->m,rstart = mdn->rstart;
106   PetscScalar    *array;
107   MPI_Comm       comm;
108 
109   PetscFunctionBegin;
110   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported.");
111 
112   /* The reuse aspect is not implemented efficiently */
113   if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);}
114 
115   ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
116   ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr);
117   ierr = MatCreate(comm,B);CHKERRQ(ierr);
118   ierr = MatSetSizes(*B,m,m,m,m);CHKERRQ(ierr);
119   ierr = MatSetType(*B,mdn->A->type_name);CHKERRQ(ierr);
120   ierr = MatSeqDenseSetPreallocation(*B,array+m*rstart);CHKERRQ(ierr);
121   ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr);
122   ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
123   ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
124 
125   *iscopy = PETSC_TRUE;
126   PetscFunctionReturn(0);
127 }
128 EXTERN_C_END
129 
130 #undef __FUNCT__
131 #define __FUNCT__ "MatSetValues_MPIDense"
132 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
133 {
134   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
135   PetscErrorCode ierr;
136   PetscInt       i,j,rstart = A->rstart,rend = A->rend,row;
137   PetscTruth     roworiented = A->roworiented;
138 
139   PetscFunctionBegin;
140   for (i=0; i<m; i++) {
141     if (idxm[i] < 0) continue;
142     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
143     if (idxm[i] >= rstart && idxm[i] < rend) {
144       row = idxm[i] - rstart;
145       if (roworiented) {
146         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
147       } else {
148         for (j=0; j<n; j++) {
149           if (idxn[j] < 0) continue;
150           if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
151           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
152         }
153       }
154     } else {
155       if (!A->donotstash) {
156         if (roworiented) {
157           ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr);
158         } else {
159           ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr);
160         }
161       }
162     }
163   }
164   PetscFunctionReturn(0);
165 }
166 
167 #undef __FUNCT__
168 #define __FUNCT__ "MatGetValues_MPIDense"
169 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
170 {
171   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
172   PetscErrorCode ierr;
173   PetscInt       i,j,rstart = mdn->rstart,rend = mdn->rend,row;
174 
175   PetscFunctionBegin;
176   for (i=0; i<m; i++) {
177     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
178     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
179     if (idxm[i] >= rstart && idxm[i] < rend) {
180       row = idxm[i] - rstart;
181       for (j=0; j<n; j++) {
182         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
183         if (idxn[j] >= mat->N) {
184           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
185         }
186         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
187       }
188     } else {
189       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
190     }
191   }
192   PetscFunctionReturn(0);
193 }
194 
195 #undef __FUNCT__
196 #define __FUNCT__ "MatGetArray_MPIDense"
197 PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[])
198 {
199   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
200   PetscErrorCode ierr;
201 
202   PetscFunctionBegin;
203   ierr = MatGetArray(a->A,array);CHKERRQ(ierr);
204   PetscFunctionReturn(0);
205 }
206 
207 #undef __FUNCT__
208 #define __FUNCT__ "MatGetSubMatrix_MPIDense"
209 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B)
210 {
211   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data,*newmatd;
212   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
213   PetscErrorCode ierr;
214   PetscInt       i,j,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols;
215   PetscScalar    *av,*bv,*v = lmat->v;
216   Mat            newmat;
217 
218   PetscFunctionBegin;
219   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
220   ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr);
221   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
222   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
223 
224   /* No parallel redistribution currently supported! Should really check each index set
225      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
226      original matrix! */
227 
228   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
229   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
230 
231   /* Check submatrix call */
232   if (scall == MAT_REUSE_MATRIX) {
233     /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
234     /* Really need to test rows and column sizes! */
235     newmat = *B;
236   } else {
237     /* Create and fill new matrix */
238     ierr = MatCreate(A->comm,&newmat);CHKERRQ(ierr);
239     ierr = MatSetSizes(newmat,nrows,cs,PETSC_DECIDE,ncols);CHKERRQ(ierr);
240     ierr = MatSetType(newmat,A->type_name);CHKERRQ(ierr);
241     ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr);
242   }
243 
244   /* Now extract the data pointers and do the copy, column at a time */
245   newmatd = (Mat_MPIDense*)newmat->data;
246   bv      = ((Mat_SeqDense *)newmatd->A->data)->v;
247 
248   for (i=0; i<ncols; i++) {
249     av = v + nlrows*icol[i];
250     for (j=0; j<nrows; j++) {
251       *bv++ = av[irow[j] - rstart];
252     }
253   }
254 
255   /* Assemble the matrices so that the correct flags are set */
256   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
257   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
258 
259   /* Free work space */
260   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
261   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
262   *B = newmat;
263   PetscFunctionReturn(0);
264 }
265 
266 #undef __FUNCT__
267 #define __FUNCT__ "MatRestoreArray_MPIDense"
268 PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[])
269 {
270   PetscFunctionBegin;
271   PetscFunctionReturn(0);
272 }
273 
274 #undef __FUNCT__
275 #define __FUNCT__ "MatAssemblyBegin_MPIDense"
276 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
277 {
278   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
279   MPI_Comm       comm = mat->comm;
280   PetscErrorCode ierr;
281   PetscInt       nstash,reallocs;
282   InsertMode     addv;
283 
284   PetscFunctionBegin;
285   /* make sure all processors are either in INSERTMODE or ADDMODE */
286   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
287   if (addv == (ADD_VALUES|INSERT_VALUES)) {
288     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
289   }
290   mat->insertmode = addv; /* in case this processor had no cache */
291 
292   ierr = MatStashScatterBegin_Private(&mat->stash,mdn->rowners);CHKERRQ(ierr);
293   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
294   ierr = PetscLogInfo((mdn->A,"MatAssemblyBegin_MPIDense:Stash has %D entries, uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr);
295   PetscFunctionReturn(0);
296 }
297 
298 #undef __FUNCT__
299 #define __FUNCT__ "MatAssemblyEnd_MPIDense"
300 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
301 {
302   Mat_MPIDense    *mdn=(Mat_MPIDense*)mat->data;
303   PetscErrorCode  ierr;
304   PetscInt        i,*row,*col,flg,j,rstart,ncols;
305   PetscMPIInt     n;
306   PetscScalar     *val;
307   InsertMode      addv=mat->insertmode;
308 
309   PetscFunctionBegin;
310   /*  wait on receives */
311   while (1) {
312     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
313     if (!flg) break;
314 
315     for (i=0; i<n;) {
316       /* Now identify the consecutive vals belonging to the same row */
317       for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
318       if (j < n) ncols = j-i;
319       else       ncols = n-i;
320       /* Now assemble all these values with a single function call */
321       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
322       i = j;
323     }
324   }
325   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
326 
327   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
328   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
329 
330   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
331     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
332   }
333   PetscFunctionReturn(0);
334 }
335 
336 #undef __FUNCT__
337 #define __FUNCT__ "MatZeroEntries_MPIDense"
338 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
339 {
340   PetscErrorCode ierr;
341   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
342 
343   PetscFunctionBegin;
344   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
345   PetscFunctionReturn(0);
346 }
347 
348 /* the code does not do the diagonal entries correctly unless the
349    matrix is square and the column and row owerships are identical.
350    This is a BUG. The only way to fix it seems to be to access
351    mdn->A and mdn->B directly and not through the MatZeroRows()
352    routine.
353 */
354 #undef __FUNCT__
355 #define __FUNCT__ "MatZeroRows_MPIDense"
356 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
357 {
358   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
359   PetscErrorCode ierr;
360   PetscInt       i,*owners = l->rowners;
361   PetscInt       *nprocs,j,idx,nsends;
362   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
363   PetscInt       *rvalues,tag = A->tag,count,base,slen,*source;
364   PetscInt       *lens,*lrows,*values;
365   PetscMPIInt    n,imdex,rank = l->rank,size = l->size;
366   MPI_Comm       comm = A->comm;
367   MPI_Request    *send_waits,*recv_waits;
368   MPI_Status     recv_status,*send_status;
369   PetscTruth     found;
370 
371   PetscFunctionBegin;
372   /*  first count number of contributors to each processor */
373   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
374   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
375   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
376   for (i=0; i<N; i++) {
377     idx = rows[i];
378     found = PETSC_FALSE;
379     for (j=0; j<size; j++) {
380       if (idx >= owners[j] && idx < owners[j+1]) {
381         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
382       }
383     }
384     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
385   }
386   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
387 
388   /* inform other processors of number of messages and max length*/
389   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
390 
391   /* post receives:   */
392   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
393   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
394   for (i=0; i<nrecvs; i++) {
395     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
396   }
397 
398   /* do sends:
399       1) starts[i] gives the starting index in svalues for stuff going to
400          the ith processor
401   */
402   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
403   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
404   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
405   starts[0]  = 0;
406   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
407   for (i=0; i<N; i++) {
408     svalues[starts[owner[i]]++] = rows[i];
409   }
410 
411   starts[0] = 0;
412   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
413   count = 0;
414   for (i=0; i<size; i++) {
415     if (nprocs[2*i+1]) {
416       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
417     }
418   }
419   ierr = PetscFree(starts);CHKERRQ(ierr);
420 
421   base = owners[rank];
422 
423   /*  wait on receives */
424   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
425   source = lens + nrecvs;
426   count  = nrecvs; slen = 0;
427   while (count) {
428     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
429     /* unpack receives into our local space */
430     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
431     source[imdex]  = recv_status.MPI_SOURCE;
432     lens[imdex]    = n;
433     slen += n;
434     count--;
435   }
436   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
437 
438   /* move the data into the send scatter */
439   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
440   count = 0;
441   for (i=0; i<nrecvs; i++) {
442     values = rvalues + i*nmax;
443     for (j=0; j<lens[i]; j++) {
444       lrows[count++] = values[j] - base;
445     }
446   }
447   ierr = PetscFree(rvalues);CHKERRQ(ierr);
448   ierr = PetscFree(lens);CHKERRQ(ierr);
449   ierr = PetscFree(owner);CHKERRQ(ierr);
450   ierr = PetscFree(nprocs);CHKERRQ(ierr);
451 
452   /* actually zap the local rows */
453   ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
454   ierr = PetscFree(lrows);CHKERRQ(ierr);
455 
456   /* wait on sends */
457   if (nsends) {
458     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
459     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
460     ierr = PetscFree(send_status);CHKERRQ(ierr);
461   }
462   ierr = PetscFree(send_waits);CHKERRQ(ierr);
463   ierr = PetscFree(svalues);CHKERRQ(ierr);
464 
465   PetscFunctionReturn(0);
466 }
467 
468 #undef __FUNCT__
469 #define __FUNCT__ "MatMult_MPIDense"
470 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
471 {
472   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
473   PetscErrorCode ierr;
474 
475   PetscFunctionBegin;
476   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
477   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
478   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
479   PetscFunctionReturn(0);
480 }
481 
482 #undef __FUNCT__
483 #define __FUNCT__ "MatMultAdd_MPIDense"
484 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
485 {
486   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
487   PetscErrorCode ierr;
488 
489   PetscFunctionBegin;
490   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
491   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
492   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
493   PetscFunctionReturn(0);
494 }
495 
496 #undef __FUNCT__
497 #define __FUNCT__ "MatMultTranspose_MPIDense"
498 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
499 {
500   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
501   PetscErrorCode ierr;
502   PetscScalar    zero = 0.0;
503 
504   PetscFunctionBegin;
505   ierr = VecSet(yy,zero);CHKERRQ(ierr);
506   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
507   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
508   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
509   PetscFunctionReturn(0);
510 }
511 
512 #undef __FUNCT__
513 #define __FUNCT__ "MatMultTransposeAdd_MPIDense"
514 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
515 {
516   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
517   PetscErrorCode ierr;
518 
519   PetscFunctionBegin;
520   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
521   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
522   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
523   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
524   PetscFunctionReturn(0);
525 }
526 
527 #undef __FUNCT__
528 #define __FUNCT__ "MatGetDiagonal_MPIDense"
529 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
530 {
531   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
532   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
533   PetscErrorCode ierr;
534   PetscInt       len,i,n,m = A->m,radd;
535   PetscScalar    *x,zero = 0.0;
536 
537   PetscFunctionBegin;
538   ierr = VecSet(v,zero);CHKERRQ(ierr);
539   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
540   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
541   if (n != A->M) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
542   len  = PetscMin(a->A->m,a->A->n);
543   radd = a->rstart*m;
544   for (i=0; i<len; i++) {
545     x[i] = aloc->v[radd + i*m + i];
546   }
547   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
548   PetscFunctionReturn(0);
549 }
550 
551 #undef __FUNCT__
552 #define __FUNCT__ "MatDestroy_MPIDense"
553 PetscErrorCode MatDestroy_MPIDense(Mat mat)
554 {
555   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
556   PetscErrorCode ierr;
557 
558   PetscFunctionBegin;
559 
560 #if defined(PETSC_USE_LOG)
561   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->M,mat->N);
562 #endif
563   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
564   ierr = PetscFree(mdn->rowners);CHKERRQ(ierr);
565   ierr = MatDestroy(mdn->A);CHKERRQ(ierr);
566   if (mdn->lvec)   VecDestroy(mdn->lvec);
567   if (mdn->Mvctx)  VecScatterDestroy(mdn->Mvctx);
568   if (mdn->factor) {
569     if (mdn->factor->temp)   {ierr = PetscFree(mdn->factor->temp);CHKERRQ(ierr);}
570     if (mdn->factor->tag)    {ierr = PetscFree(mdn->factor->tag);CHKERRQ(ierr);}
571     if (mdn->factor->pivots) {ierr = PetscFree(mdn->factor->pivots);CHKERRQ(ierr);}
572     ierr = PetscFree(mdn->factor);CHKERRQ(ierr);
573   }
574   ierr = PetscFree(mdn);CHKERRQ(ierr);
575   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
576   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
577   PetscFunctionReturn(0);
578 }
579 
580 #undef __FUNCT__
581 #define __FUNCT__ "MatView_MPIDense_Binary"
582 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
583 {
584   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
585   PetscErrorCode ierr;
586 
587   PetscFunctionBegin;
588   if (mdn->size == 1) {
589     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
590   }
591   else SETERRQ(PETSC_ERR_SUP,"Only uniprocessor output supported");
592   PetscFunctionReturn(0);
593 }
594 
595 #undef __FUNCT__
596 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket"
597 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
598 {
599   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
600   PetscErrorCode    ierr;
601   PetscMPIInt       size = mdn->size,rank = mdn->rank;
602   PetscViewerType   vtype;
603   PetscTruth        iascii,isdraw;
604   PetscViewer       sviewer;
605   PetscViewerFormat format;
606 
607   PetscFunctionBegin;
608   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
609   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
610   if (iascii) {
611     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
612     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
613     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
614       MatInfo info;
615       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
616       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->m,
617                    (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
618       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
619       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
620       PetscFunctionReturn(0);
621     } else if (format == PETSC_VIEWER_ASCII_INFO) {
622       PetscFunctionReturn(0);
623     }
624   } else if (isdraw) {
625     PetscDraw       draw;
626     PetscTruth isnull;
627 
628     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
629     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
630     if (isnull) PetscFunctionReturn(0);
631   }
632 
633   if (size == 1) {
634     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
635   } else {
636     /* assemble the entire matrix onto first processor. */
637     Mat         A;
638     PetscInt    M = mat->M,N = mat->N,m,row,i,nz;
639     PetscInt    *cols;
640     PetscScalar *vals;
641 
642     ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr);
643     if (!rank) {
644       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
645     } else {
646       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
647     }
648     /* Since this is a temporary matrix, MATMPIDENSE instead of A->type_name here is probably acceptable. */
649     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
650     ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL);
651     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
652 
653     /* Copy the matrix ... This isn't the most efficient means,
654        but it's quick for now */
655     A->insertmode = INSERT_VALUES;
656     row = mdn->rstart; m = mdn->A->m;
657     for (i=0; i<m; i++) {
658       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
659       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
660       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
661       row++;
662     }
663 
664     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
665     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
666     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
667     if (!rank) {
668       ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
669     }
670     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
671     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
672     ierr = MatDestroy(A);CHKERRQ(ierr);
673   }
674   PetscFunctionReturn(0);
675 }
676 
677 #undef __FUNCT__
678 #define __FUNCT__ "MatView_MPIDense"
679 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
680 {
681   PetscErrorCode ierr;
682   PetscTruth     iascii,isbinary,isdraw,issocket;
683 
684   PetscFunctionBegin;
685 
686   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
687   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
688   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
689   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
690 
691   if (iascii || issocket || isdraw) {
692     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
693   } else if (isbinary) {
694     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
695   } else {
696     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name);
697   }
698   PetscFunctionReturn(0);
699 }
700 
701 #undef __FUNCT__
702 #define __FUNCT__ "MatGetInfo_MPIDense"
703 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
704 {
705   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
706   Mat            mdn = mat->A;
707   PetscErrorCode ierr;
708   PetscReal      isend[5],irecv[5];
709 
710   PetscFunctionBegin;
711   info->rows_global    = (double)A->M;
712   info->columns_global = (double)A->N;
713   info->rows_local     = (double)A->m;
714   info->columns_local  = (double)A->N;
715   info->block_size     = 1.0;
716   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
717   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
718   isend[3] = info->memory;  isend[4] = info->mallocs;
719   if (flag == MAT_LOCAL) {
720     info->nz_used      = isend[0];
721     info->nz_allocated = isend[1];
722     info->nz_unneeded  = isend[2];
723     info->memory       = isend[3];
724     info->mallocs      = isend[4];
725   } else if (flag == MAT_GLOBAL_MAX) {
726     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr);
727     info->nz_used      = irecv[0];
728     info->nz_allocated = irecv[1];
729     info->nz_unneeded  = irecv[2];
730     info->memory       = irecv[3];
731     info->mallocs      = irecv[4];
732   } else if (flag == MAT_GLOBAL_SUM) {
733     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr);
734     info->nz_used      = irecv[0];
735     info->nz_allocated = irecv[1];
736     info->nz_unneeded  = irecv[2];
737     info->memory       = irecv[3];
738     info->mallocs      = irecv[4];
739   }
740   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
741   info->fill_ratio_needed = 0;
742   info->factor_mallocs    = 0;
743   PetscFunctionReturn(0);
744 }
745 
746 #undef __FUNCT__
747 #define __FUNCT__ "MatSetOption_MPIDense"
748 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op)
749 {
750   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
751   PetscErrorCode ierr;
752 
753   PetscFunctionBegin;
754   switch (op) {
755   case MAT_NO_NEW_NONZERO_LOCATIONS:
756   case MAT_YES_NEW_NONZERO_LOCATIONS:
757   case MAT_NEW_NONZERO_LOCATION_ERR:
758   case MAT_NEW_NONZERO_ALLOCATION_ERR:
759   case MAT_COLUMNS_SORTED:
760   case MAT_COLUMNS_UNSORTED:
761     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
762     break;
763   case MAT_ROW_ORIENTED:
764     a->roworiented = PETSC_TRUE;
765     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
766     break;
767   case MAT_ROWS_SORTED:
768   case MAT_ROWS_UNSORTED:
769   case MAT_YES_NEW_DIAGONALS:
770   case MAT_USE_HASH_TABLE:
771     ierr = PetscLogInfo((A,"MatSetOption_MPIDense:Option ignored\n"));CHKERRQ(ierr);
772     break;
773   case MAT_COLUMN_ORIENTED:
774     a->roworiented = PETSC_FALSE;
775     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
776     break;
777   case MAT_IGNORE_OFF_PROC_ENTRIES:
778     a->donotstash = PETSC_TRUE;
779     break;
780   case MAT_NO_NEW_DIAGONALS:
781     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
782   case MAT_SYMMETRIC:
783   case MAT_STRUCTURALLY_SYMMETRIC:
784   case MAT_NOT_SYMMETRIC:
785   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
786   case MAT_HERMITIAN:
787   case MAT_NOT_HERMITIAN:
788   case MAT_SYMMETRY_ETERNAL:
789   case MAT_NOT_SYMMETRY_ETERNAL:
790     break;
791   default:
792     SETERRQ(PETSC_ERR_SUP,"unknown option");
793   }
794   PetscFunctionReturn(0);
795 }
796 
797 
798 #undef __FUNCT__
799 #define __FUNCT__ "MatDiagonalScale_MPIDense"
800 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
801 {
802   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
803   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
804   PetscScalar    *l,*r,x,*v;
805   PetscErrorCode ierr;
806   PetscInt       i,j,s2a,s3a,s2,s3,m=mdn->A->m,n=mdn->A->n;
807 
808   PetscFunctionBegin;
809   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
810   if (ll) {
811     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
812     if (s2a != s2) SETERRQ2(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
813     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
814     for (i=0; i<m; i++) {
815       x = l[i];
816       v = mat->v + i;
817       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
818     }
819     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
820     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
821   }
822   if (rr) {
823     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
824     if (s3a != s3) SETERRQ2(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
825     ierr = VecScatterBegin(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
826     ierr = VecScatterEnd(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
827     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
828     for (i=0; i<n; i++) {
829       x = r[i];
830       v = mat->v + i*m;
831       for (j=0; j<m; j++) { (*v++) *= x;}
832     }
833     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
834     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
835   }
836   PetscFunctionReturn(0);
837 }
838 
839 #undef __FUNCT__
840 #define __FUNCT__ "MatNorm_MPIDense"
841 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
842 {
843   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
844   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
845   PetscErrorCode ierr;
846   PetscInt       i,j;
847   PetscReal      sum = 0.0;
848   PetscScalar    *v = mat->v;
849 
850   PetscFunctionBegin;
851   if (mdn->size == 1) {
852     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
853   } else {
854     if (type == NORM_FROBENIUS) {
855       for (i=0; i<mdn->A->n*mdn->A->m; i++) {
856 #if defined(PETSC_USE_COMPLEX)
857         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
858 #else
859         sum += (*v)*(*v); v++;
860 #endif
861       }
862       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr);
863       *nrm = sqrt(*nrm);
864       ierr = PetscLogFlops(2*mdn->A->n*mdn->A->m);CHKERRQ(ierr);
865     } else if (type == NORM_1) {
866       PetscReal *tmp,*tmp2;
867       ierr = PetscMalloc(2*A->N*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
868       tmp2 = tmp + A->N;
869       ierr = PetscMemzero(tmp,2*A->N*sizeof(PetscReal));CHKERRQ(ierr);
870       *nrm = 0.0;
871       v = mat->v;
872       for (j=0; j<mdn->A->n; j++) {
873         for (i=0; i<mdn->A->m; i++) {
874           tmp[j] += PetscAbsScalar(*v);  v++;
875         }
876       }
877       ierr = MPI_Allreduce(tmp,tmp2,A->N,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr);
878       for (j=0; j<A->N; j++) {
879         if (tmp2[j] > *nrm) *nrm = tmp2[j];
880       }
881       ierr = PetscFree(tmp);CHKERRQ(ierr);
882       ierr = PetscLogFlops(A->n*A->m);CHKERRQ(ierr);
883     } else if (type == NORM_INFINITY) { /* max row norm */
884       PetscReal ntemp;
885       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
886       ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr);
887     } else {
888       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
889     }
890   }
891   PetscFunctionReturn(0);
892 }
893 
894 #undef __FUNCT__
895 #define __FUNCT__ "MatTranspose_MPIDense"
896 PetscErrorCode MatTranspose_MPIDense(Mat A,Mat *matout)
897 {
898   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
899   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
900   Mat            B;
901   PetscInt       M = A->M,N = A->N,m,n,*rwork,rstart = a->rstart;
902   PetscErrorCode ierr;
903   PetscInt       j,i;
904   PetscScalar    *v;
905 
906   PetscFunctionBegin;
907   if (!matout && M != N) {
908     SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place");
909   }
910   ierr = MatCreate(A->comm,&B);CHKERRQ(ierr);
911   ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,N,M);CHKERRQ(ierr);
912   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
913   ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr);
914 
915   m = a->A->m; n = a->A->n; v = Aloc->v;
916   ierr = PetscMalloc(n*sizeof(PetscInt),&rwork);CHKERRQ(ierr);
917   for (j=0; j<n; j++) {
918     for (i=0; i<m; i++) rwork[i] = rstart + i;
919     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
920     v   += m;
921   }
922   ierr = PetscFree(rwork);CHKERRQ(ierr);
923   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
924   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
925   if (matout) {
926     *matout = B;
927   } else {
928     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
929   }
930   PetscFunctionReturn(0);
931 }
932 
933 #include "petscblaslapack.h"
934 #undef __FUNCT__
935 #define __FUNCT__ "MatScale_MPIDense"
936 PetscErrorCode MatScale_MPIDense(Mat inA,PetscScalar alpha)
937 {
938   Mat_MPIDense   *A = (Mat_MPIDense*)inA->data;
939   Mat_SeqDense   *a = (Mat_SeqDense*)A->A->data;
940   PetscScalar    oalpha = alpha;
941   PetscBLASInt   one = 1,nz = (PetscBLASInt)inA->m*inA->N;
942   PetscErrorCode ierr;
943 
944   PetscFunctionBegin;
945   BLASscal_(&nz,&oalpha,a->v,&one);
946   ierr = PetscLogFlops(nz);CHKERRQ(ierr);
947   PetscFunctionReturn(0);
948 }
949 
950 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
951 
952 #undef __FUNCT__
953 #define __FUNCT__ "MatSetUpPreallocation_MPIDense"
954 PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A)
955 {
956   PetscErrorCode ierr;
957 
958   PetscFunctionBegin;
959   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
960   PetscFunctionReturn(0);
961 }
962 
963 /* -------------------------------------------------------------------*/
964 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
965        MatGetRow_MPIDense,
966        MatRestoreRow_MPIDense,
967        MatMult_MPIDense,
968 /* 4*/ MatMultAdd_MPIDense,
969        MatMultTranspose_MPIDense,
970        MatMultTransposeAdd_MPIDense,
971        0,
972        0,
973        0,
974 /*10*/ 0,
975        0,
976        0,
977        0,
978        MatTranspose_MPIDense,
979 /*15*/ MatGetInfo_MPIDense,
980        0,
981        MatGetDiagonal_MPIDense,
982        MatDiagonalScale_MPIDense,
983        MatNorm_MPIDense,
984 /*20*/ MatAssemblyBegin_MPIDense,
985        MatAssemblyEnd_MPIDense,
986        0,
987        MatSetOption_MPIDense,
988        MatZeroEntries_MPIDense,
989 /*25*/ MatZeroRows_MPIDense,
990        MatLUFactorSymbolic_MPIDense,
991        0,
992        MatCholeskyFactorSymbolic_MPIDense,
993        0,
994 /*30*/ MatSetUpPreallocation_MPIDense,
995        0,
996        0,
997        MatGetArray_MPIDense,
998        MatRestoreArray_MPIDense,
999 /*35*/ MatDuplicate_MPIDense,
1000        0,
1001        0,
1002        0,
1003        0,
1004 /*40*/ 0,
1005        MatGetSubMatrices_MPIDense,
1006        0,
1007        MatGetValues_MPIDense,
1008        0,
1009 /*45*/ 0,
1010        MatScale_MPIDense,
1011        0,
1012        0,
1013        0,
1014 /*50*/ 0,
1015        0,
1016        0,
1017        0,
1018        0,
1019 /*55*/ 0,
1020        0,
1021        0,
1022        0,
1023        0,
1024 /*60*/ MatGetSubMatrix_MPIDense,
1025        MatDestroy_MPIDense,
1026        MatView_MPIDense,
1027        MatGetPetscMaps_Petsc,
1028        0,
1029 /*65*/ 0,
1030        0,
1031        0,
1032        0,
1033        0,
1034 /*70*/ 0,
1035        0,
1036        0,
1037        0,
1038        0,
1039 /*75*/ 0,
1040        0,
1041        0,
1042        0,
1043        0,
1044 /*80*/ 0,
1045        0,
1046        0,
1047        0,
1048 /*84*/ MatLoad_MPIDense,
1049        0,
1050        0,
1051        0,
1052        0,
1053        0,
1054 /*90*/ 0,
1055        0,
1056        0,
1057        0,
1058        0,
1059 /*95*/ 0,
1060        0,
1061        0,
1062        0};
1063 
1064 EXTERN_C_BEGIN
1065 #undef __FUNCT__
1066 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense"
1067 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1068 {
1069   Mat_MPIDense   *a;
1070   PetscErrorCode ierr;
1071 
1072   PetscFunctionBegin;
1073   mat->preallocated = PETSC_TRUE;
1074   /* Note:  For now, when data is specified above, this assumes the user correctly
1075    allocates the local dense storage space.  We should add error checking. */
1076 
1077   a    = (Mat_MPIDense*)mat->data;
1078   ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1079   ierr = MatSetSizes(a->A,mat->m,mat->N,mat->m,mat->N);CHKERRQ(ierr);
1080   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
1081   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1082   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1083   PetscFunctionReturn(0);
1084 }
1085 EXTERN_C_END
1086 
1087 /*MC
1088    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1089 
1090    Options Database Keys:
1091 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions()
1092 
1093   Level: beginner
1094 
1095 .seealso: MatCreateMPIDense
1096 M*/
1097 
1098 EXTERN_C_BEGIN
1099 #undef __FUNCT__
1100 #define __FUNCT__ "MatCreate_MPIDense"
1101 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIDense(Mat mat)
1102 {
1103   Mat_MPIDense   *a;
1104   PetscErrorCode ierr;
1105   PetscInt       i;
1106 
1107   PetscFunctionBegin;
1108   ierr              = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr);
1109   mat->data         = (void*)a;
1110   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1111   mat->factor       = 0;
1112   mat->mapping      = 0;
1113 
1114   a->factor       = 0;
1115   mat->insertmode = NOT_SET_VALUES;
1116   ierr = MPI_Comm_rank(mat->comm,&a->rank);CHKERRQ(ierr);
1117   ierr = MPI_Comm_size(mat->comm,&a->size);CHKERRQ(ierr);
1118 
1119   ierr = PetscSplitOwnership(mat->comm,&mat->m,&mat->M);CHKERRQ(ierr);
1120   ierr = PetscSplitOwnership(mat->comm,&mat->n,&mat->N);CHKERRQ(ierr);
1121   a->nvec = mat->n;
1122 
1123   /* the information in the maps duplicates the information computed below, eventually
1124      we should remove the duplicate information that is not contained in the maps */
1125   ierr = PetscMapCreateMPI(mat->comm,mat->m,mat->M,&mat->rmap);CHKERRQ(ierr);
1126   ierr = PetscMapCreateMPI(mat->comm,mat->n,mat->N,&mat->cmap);CHKERRQ(ierr);
1127 
1128   /* build local table of row and column ownerships */
1129   ierr       = PetscMalloc(2*(a->size+2)*sizeof(PetscInt),&a->rowners);CHKERRQ(ierr);
1130   a->cowners = a->rowners + a->size + 1;
1131   ierr = PetscLogObjectMemory(mat,2*(a->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));CHKERRQ(ierr);
1132   ierr = MPI_Allgather(&mat->m,1,MPIU_INT,a->rowners+1,1,MPIU_INT,mat->comm);CHKERRQ(ierr);
1133   a->rowners[0] = 0;
1134   for (i=2; i<=a->size; i++) {
1135     a->rowners[i] += a->rowners[i-1];
1136   }
1137   a->rstart = a->rowners[a->rank];
1138   a->rend   = a->rowners[a->rank+1];
1139   ierr      = MPI_Allgather(&mat->n,1,MPIU_INT,a->cowners+1,1,MPIU_INT,mat->comm);CHKERRQ(ierr);
1140   a->cowners[0] = 0;
1141   for (i=2; i<=a->size; i++) {
1142     a->cowners[i] += a->cowners[i-1];
1143   }
1144 
1145   /* build cache for off array entries formed */
1146   a->donotstash = PETSC_FALSE;
1147   ierr = MatStashCreate_Private(mat->comm,1,&mat->stash);CHKERRQ(ierr);
1148 
1149   /* stuff used for matrix vector multiply */
1150   a->lvec        = 0;
1151   a->Mvctx       = 0;
1152   a->roworiented = PETSC_TRUE;
1153 
1154   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
1155                                      "MatGetDiagonalBlock_MPIDense",
1156                                      MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr);
1157   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C",
1158                                      "MatMPIDenseSetPreallocation_MPIDense",
1159                                      MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1160   PetscFunctionReturn(0);
1161 }
1162 EXTERN_C_END
1163 
1164 /*MC
1165    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1166 
1167    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1168    and MATMPIDENSE otherwise.
1169 
1170    Options Database Keys:
1171 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1172 
1173   Level: beginner
1174 
1175 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1176 M*/
1177 
1178 EXTERN_C_BEGIN
1179 #undef __FUNCT__
1180 #define __FUNCT__ "MatCreate_Dense"
1181 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_Dense(Mat A)
1182 {
1183   PetscErrorCode ierr;
1184   PetscMPIInt    size;
1185 
1186   PetscFunctionBegin;
1187   ierr = PetscObjectChangeTypeName((PetscObject)A,MATDENSE);CHKERRQ(ierr);
1188   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1189   if (size == 1) {
1190     ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr);
1191   } else {
1192     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
1193   }
1194   PetscFunctionReturn(0);
1195 }
1196 EXTERN_C_END
1197 
1198 #undef __FUNCT__
1199 #define __FUNCT__ "MatMPIDenseSetPreallocation"
1200 /*@C
1201    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1202 
1203    Not collective
1204 
1205    Input Parameters:
1206 .  A - the matrix
1207 -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
1208    to control all matrix memory allocation.
1209 
1210    Notes:
1211    The dense format is fully compatible with standard Fortran 77
1212    storage by columns.
1213 
1214    The data input variable is intended primarily for Fortran programmers
1215    who wish to allocate their own matrix memory space.  Most users should
1216    set data=PETSC_NULL.
1217 
1218    Level: intermediate
1219 
1220 .keywords: matrix,dense, parallel
1221 
1222 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1223 @*/
1224 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data)
1225 {
1226   PetscErrorCode ierr,(*f)(Mat,PetscScalar *);
1227 
1228   PetscFunctionBegin;
1229   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1230   if (f) {
1231     ierr = (*f)(mat,data);CHKERRQ(ierr);
1232   }
1233   PetscFunctionReturn(0);
1234 }
1235 
1236 #undef __FUNCT__
1237 #define __FUNCT__ "MatCreateMPIDense"
1238 /*@C
1239    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
1240 
1241    Collective on MPI_Comm
1242 
1243    Input Parameters:
1244 +  comm - MPI communicator
1245 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1246 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1247 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1248 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1249 -  data - optional location of matrix data.  Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1250    to control all matrix memory allocation.
1251 
1252    Output Parameter:
1253 .  A - the matrix
1254 
1255    Notes:
1256    The dense format is fully compatible with standard Fortran 77
1257    storage by columns.
1258 
1259    The data input variable is intended primarily for Fortran programmers
1260    who wish to allocate their own matrix memory space.  Most users should
1261    set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users).
1262 
1263    The user MUST specify either the local or global matrix dimensions
1264    (possibly both).
1265 
1266    Level: intermediate
1267 
1268 .keywords: matrix,dense, parallel
1269 
1270 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1271 @*/
1272 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1273 {
1274   PetscErrorCode ierr;
1275   PetscMPIInt    size;
1276 
1277   PetscFunctionBegin;
1278   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1279   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1280   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1281   if (size > 1) {
1282     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1283     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1284   } else {
1285     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1286     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1287   }
1288   PetscFunctionReturn(0);
1289 }
1290 
1291 #undef __FUNCT__
1292 #define __FUNCT__ "MatDuplicate_MPIDense"
1293 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1294 {
1295   Mat            mat;
1296   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
1297   PetscErrorCode ierr;
1298 
1299   PetscFunctionBegin;
1300   *newmat       = 0;
1301   ierr = MatCreate(A->comm,&mat);CHKERRQ(ierr);
1302   ierr = MatSetSizes(mat,A->m,A->n,A->M,A->N);CHKERRQ(ierr);
1303   ierr = MatSetType(mat,A->type_name);CHKERRQ(ierr);
1304   a                 = (Mat_MPIDense*)mat->data;
1305   ierr              = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1306   mat->factor       = A->factor;
1307   mat->assembled    = PETSC_TRUE;
1308   mat->preallocated = PETSC_TRUE;
1309 
1310   a->rstart       = oldmat->rstart;
1311   a->rend         = oldmat->rend;
1312   a->size         = oldmat->size;
1313   a->rank         = oldmat->rank;
1314   mat->insertmode = NOT_SET_VALUES;
1315   a->nvec         = oldmat->nvec;
1316   a->donotstash   = oldmat->donotstash;
1317 
1318   ierr = PetscLogObjectMemory(mat,(a->size+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));CHKERRQ(ierr);
1319   ierr = PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
1320   ierr = MatStashCreate_Private(A->comm,1,&mat->stash);CHKERRQ(ierr);
1321 
1322   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1323   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1324   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1325   *newmat = mat;
1326   PetscFunctionReturn(0);
1327 }
1328 
1329 #include "petscsys.h"
1330 
1331 #undef __FUNCT__
1332 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile"
1333 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N, MatType type,Mat *newmat)
1334 {
1335   PetscErrorCode ierr;
1336   PetscMPIInt    rank,size;
1337   PetscInt       *rowners,i,m,nz,j;
1338   PetscScalar    *array,*vals,*vals_ptr;
1339   MPI_Status     status;
1340 
1341   PetscFunctionBegin;
1342   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1343   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1344 
1345   /* determine ownership of all rows */
1346   m          = M/size + ((M % size) > rank);
1347   ierr       = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1348   ierr       = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
1349   rowners[0] = 0;
1350   for (i=2; i<=size; i++) {
1351     rowners[i] += rowners[i-1];
1352   }
1353 
1354   ierr = MatCreate(comm,newmat);CHKERRQ(ierr);
1355   ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1356   ierr = MatSetType(*newmat,type);CHKERRQ(ierr);
1357   ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr);
1358   ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr);
1359 
1360   if (!rank) {
1361     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1362 
1363     /* read in my part of the matrix numerical values  */
1364     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1365 
1366     /* insert into matrix-by row (this is why cannot directly read into array */
1367     vals_ptr = vals;
1368     for (i=0; i<m; i++) {
1369       for (j=0; j<N; j++) {
1370         array[i + j*m] = *vals_ptr++;
1371       }
1372     }
1373 
1374     /* read in other processors and ship out */
1375     for (i=1; i<size; i++) {
1376       nz   = (rowners[i+1] - rowners[i])*N;
1377       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1378       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr);
1379     }
1380   } else {
1381     /* receive numeric values */
1382     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1383 
1384     /* receive message of values*/
1385     ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr);
1386 
1387     /* insert into matrix-by row (this is why cannot directly read into array */
1388     vals_ptr = vals;
1389     for (i=0; i<m; i++) {
1390       for (j=0; j<N; j++) {
1391         array[i + j*m] = *vals_ptr++;
1392       }
1393     }
1394   }
1395   ierr = PetscFree(rowners);CHKERRQ(ierr);
1396   ierr = PetscFree(vals);CHKERRQ(ierr);
1397   ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1398   ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1399   PetscFunctionReturn(0);
1400 }
1401 
1402 #undef __FUNCT__
1403 #define __FUNCT__ "MatLoad_MPIDense"
1404 PetscErrorCode MatLoad_MPIDense(PetscViewer viewer, MatType type,Mat *newmat)
1405 {
1406   Mat            A;
1407   PetscScalar    *vals,*svals;
1408   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1409   MPI_Status     status;
1410   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz;
1411   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1412   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1413   PetscInt       i,nz,j,rstart,rend;
1414   int            fd;
1415   PetscErrorCode ierr;
1416 
1417   PetscFunctionBegin;
1418   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1419   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1420   if (!rank) {
1421     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1422     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1423     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1424   }
1425 
1426   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1427   M = header[1]; N = header[2]; nz = header[3];
1428 
1429   /*
1430        Handle case where matrix is stored on disk as a dense matrix
1431   */
1432   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1433     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,type,newmat);CHKERRQ(ierr);
1434     PetscFunctionReturn(0);
1435   }
1436 
1437   /* determine ownership of all rows */
1438   m          = M/size + ((M % size) > rank);
1439   ierr       = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1440   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1441   rowners[0] = 0;
1442   for (i=2; i<=size; i++) {
1443     rowners[i] += rowners[i-1];
1444   }
1445   rstart = rowners[rank];
1446   rend   = rowners[rank+1];
1447 
1448   /* distribute row lengths to all processors */
1449   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&ourlens);CHKERRQ(ierr);
1450   offlens = ourlens + (rend-rstart);
1451   if (!rank) {
1452     ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
1453     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1454     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
1455     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1456     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1457     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1458   } else {
1459     ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1460   }
1461 
1462   if (!rank) {
1463     /* calculate the number of nonzeros on each processor */
1464     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
1465     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1466     for (i=0; i<size; i++) {
1467       for (j=rowners[i]; j< rowners[i+1]; j++) {
1468         procsnz[i] += rowlengths[j];
1469       }
1470     }
1471     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1472 
1473     /* determine max buffer needed and allocate it */
1474     maxnz = 0;
1475     for (i=0; i<size; i++) {
1476       maxnz = PetscMax(maxnz,procsnz[i]);
1477     }
1478     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1479 
1480     /* read in my part of the matrix column indices  */
1481     nz = procsnz[0];
1482     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1483     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1484 
1485     /* read in every one elses and ship off */
1486     for (i=1; i<size; i++) {
1487       nz   = procsnz[i];
1488       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1489       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1490     }
1491     ierr = PetscFree(cols);CHKERRQ(ierr);
1492   } else {
1493     /* determine buffer space needed for message */
1494     nz = 0;
1495     for (i=0; i<m; i++) {
1496       nz += ourlens[i];
1497     }
1498     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1499 
1500     /* receive message of column indices*/
1501     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1502     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1503     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1504   }
1505 
1506   /* loop over local rows, determining number of off diagonal entries */
1507   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
1508   jj = 0;
1509   for (i=0; i<m; i++) {
1510     for (j=0; j<ourlens[i]; j++) {
1511       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1512       jj++;
1513     }
1514   }
1515 
1516   /* create our matrix */
1517   for (i=0; i<m; i++) {
1518     ourlens[i] -= offlens[i];
1519   }
1520   ierr = MatCreate(comm,newmat);CHKERRQ(ierr);
1521   ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1522   ierr = MatSetType(*newmat,type);CHKERRQ(ierr);
1523   ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr);
1524   A = *newmat;
1525   for (i=0; i<m; i++) {
1526     ourlens[i] += offlens[i];
1527   }
1528 
1529   if (!rank) {
1530     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1531 
1532     /* read in my part of the matrix numerical values  */
1533     nz = procsnz[0];
1534     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1535 
1536     /* insert into matrix */
1537     jj      = rstart;
1538     smycols = mycols;
1539     svals   = vals;
1540     for (i=0; i<m; i++) {
1541       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1542       smycols += ourlens[i];
1543       svals   += ourlens[i];
1544       jj++;
1545     }
1546 
1547     /* read in other processors and ship out */
1548     for (i=1; i<size; i++) {
1549       nz   = procsnz[i];
1550       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1551       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1552     }
1553     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1554   } else {
1555     /* receive numeric values */
1556     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1557 
1558     /* receive message of values*/
1559     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1560     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1561     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1562 
1563     /* insert into matrix */
1564     jj      = rstart;
1565     smycols = mycols;
1566     svals   = vals;
1567     for (i=0; i<m; i++) {
1568       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1569       smycols += ourlens[i];
1570       svals   += ourlens[i];
1571       jj++;
1572     }
1573   }
1574   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1575   ierr = PetscFree(vals);CHKERRQ(ierr);
1576   ierr = PetscFree(mycols);CHKERRQ(ierr);
1577   ierr = PetscFree(rowners);CHKERRQ(ierr);
1578 
1579   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1580   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1581   PetscFunctionReturn(0);
1582 }
1583 
1584 
1585 
1586 
1587 
1588