xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 290bbb0a1dcfb34dbf94efcfcc44171581b0efea)
1 #define PETSCMAT_DLL
2 
3 /*
4    Basic functions for basic parallel dense matrices.
5 */
6 
7 #include "src/mat/impls/dense/mpi/mpidense.h"    /*I   "petscmat.h"  I*/
8 
9 #undef __FUNCT__
10 #define __FUNCT__ "MatLUFactorSymbolic_MPIDense"
11 PetscErrorCode MatLUFactorSymbolic_MPIDense(Mat A,IS row,IS col,MatFactorInfo *info,Mat *fact)
12 {
13   PetscErrorCode ierr;
14 
15   PetscFunctionBegin;
16   ierr = MatDuplicate(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr);
17   PetscFunctionReturn(0);
18 }
19 
20 #undef __FUNCT__
21 #define __FUNCT__ "MatCholeskyFactorSymbolic_MPIDense"
22 PetscErrorCode MatCholeskyFactorSymbolic_MPIDense(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
23 {
24   PetscErrorCode ierr;
25 
26   PetscFunctionBegin;
27   ierr = MatDuplicate(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr);
28   PetscFunctionReturn(0);
29 }
30 
31 #undef __FUNCT__
32 #define __FUNCT__ "MatDenseGetLocalMatrix"
33 /*@
34 
35       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
36               matrix that represents the operator. For sequential matrices it returns itself.
37 
38     Input Parameter:
39 .      A - the Seq or MPI dense matrix
40 
41     Output Parameter:
42 .      B - the inner matrix
43 
44     Level: intermediate
45 
46 @*/
47 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
48 {
49   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
50   PetscErrorCode ierr;
51   PetscTruth     flg;
52   PetscMPIInt    size;
53 
54   PetscFunctionBegin;
55   ierr = PetscTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
56   if (!flg) {  /* this check sucks! */
57     ierr = PetscTypeCompare((PetscObject)A,MATDENSE,&flg);CHKERRQ(ierr);
58     if (flg) {
59       ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
60       if (size == 1) flg = PETSC_FALSE;
61     }
62   }
63   if (flg) {
64     *B = mat->A;
65   } else {
66     *B = A;
67   }
68   PetscFunctionReturn(0);
69 }
70 
71 #undef __FUNCT__
72 #define __FUNCT__ "MatGetRow_MPIDense"
73 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
74 {
75   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
76   PetscErrorCode ierr;
77   PetscInt       lrow,rstart = A->rmap.rstart,rend = A->rmap.rend;
78 
79   PetscFunctionBegin;
80   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows")
81   lrow = row - rstart;
82   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr);
83   PetscFunctionReturn(0);
84 }
85 
86 #undef __FUNCT__
87 #define __FUNCT__ "MatRestoreRow_MPIDense"
88 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
89 {
90   PetscErrorCode ierr;
91 
92   PetscFunctionBegin;
93   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
94   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
95   PetscFunctionReturn(0);
96 }
97 
98 EXTERN_C_BEGIN
99 #undef __FUNCT__
100 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense"
101 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B)
102 {
103   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
104   PetscErrorCode ierr;
105   PetscInt       m = A->rmap.n,rstart = A->rmap.rstart;
106   PetscScalar    *array;
107   MPI_Comm       comm;
108 
109   PetscFunctionBegin;
110   if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported.");
111 
112   /* The reuse aspect is not implemented efficiently */
113   if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);}
114 
115   ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
116   ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr);
117   ierr = MatCreate(comm,B);CHKERRQ(ierr);
118   ierr = MatSetSizes(*B,m,m,m,m);CHKERRQ(ierr);
119   ierr = MatSetType(*B,mdn->A->type_name);CHKERRQ(ierr);
120   ierr = MatSeqDenseSetPreallocation(*B,array+m*rstart);CHKERRQ(ierr);
121   ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr);
122   ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
123   ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
124 
125   *iscopy = PETSC_TRUE;
126   PetscFunctionReturn(0);
127 }
128 EXTERN_C_END
129 
130 #undef __FUNCT__
131 #define __FUNCT__ "MatSetValues_MPIDense"
132 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
133 {
134   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
135   PetscErrorCode ierr;
136   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend,row;
137   PetscTruth     roworiented = A->roworiented;
138 
139   PetscFunctionBegin;
140   for (i=0; i<m; i++) {
141     if (idxm[i] < 0) continue;
142     if (idxm[i] >= mat->rmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
143     if (idxm[i] >= rstart && idxm[i] < rend) {
144       row = idxm[i] - rstart;
145       if (roworiented) {
146         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
147       } else {
148         for (j=0; j<n; j++) {
149           if (idxn[j] < 0) continue;
150           if (idxn[j] >= mat->cmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
151           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
152         }
153       }
154     } else {
155       if (!A->donotstash) {
156         if (roworiented) {
157           ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr);
158         } else {
159           ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr);
160         }
161       }
162     }
163   }
164   PetscFunctionReturn(0);
165 }
166 
167 #undef __FUNCT__
168 #define __FUNCT__ "MatGetValues_MPIDense"
169 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
170 {
171   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
172   PetscErrorCode ierr;
173   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend,row;
174 
175   PetscFunctionBegin;
176   for (i=0; i<m; i++) {
177     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
178     if (idxm[i] >= mat->rmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
179     if (idxm[i] >= rstart && idxm[i] < rend) {
180       row = idxm[i] - rstart;
181       for (j=0; j<n; j++) {
182         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
183         if (idxn[j] >= mat->cmap.N) {
184           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
185         }
186         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
187       }
188     } else {
189       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
190     }
191   }
192   PetscFunctionReturn(0);
193 }
194 
195 #undef __FUNCT__
196 #define __FUNCT__ "MatGetArray_MPIDense"
197 PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[])
198 {
199   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
200   PetscErrorCode ierr;
201 
202   PetscFunctionBegin;
203   ierr = MatGetArray(a->A,array);CHKERRQ(ierr);
204   PetscFunctionReturn(0);
205 }
206 
207 #undef __FUNCT__
208 #define __FUNCT__ "MatGetSubMatrix_MPIDense"
209 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B)
210 {
211   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data,*newmatd;
212   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
213   PetscErrorCode ierr;
214   PetscInt       i,j,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols;
215   PetscScalar    *av,*bv,*v = lmat->v;
216   Mat            newmat;
217 
218   PetscFunctionBegin;
219   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
220   ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr);
221   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
222   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
223 
224   /* No parallel redistribution currently supported! Should really check each index set
225      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
226      original matrix! */
227 
228   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
229   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
230 
231   /* Check submatrix call */
232   if (scall == MAT_REUSE_MATRIX) {
233     /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
234     /* Really need to test rows and column sizes! */
235     newmat = *B;
236   } else {
237     /* Create and fill new matrix */
238     ierr = MatCreate(A->comm,&newmat);CHKERRQ(ierr);
239     ierr = MatSetSizes(newmat,nrows,cs,PETSC_DECIDE,ncols);CHKERRQ(ierr);
240     ierr = MatSetType(newmat,A->type_name);CHKERRQ(ierr);
241     ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr);
242   }
243 
244   /* Now extract the data pointers and do the copy, column at a time */
245   newmatd = (Mat_MPIDense*)newmat->data;
246   bv      = ((Mat_SeqDense *)newmatd->A->data)->v;
247 
248   for (i=0; i<ncols; i++) {
249     av = v + nlrows*icol[i];
250     for (j=0; j<nrows; j++) {
251       *bv++ = av[irow[j] - rstart];
252     }
253   }
254 
255   /* Assemble the matrices so that the correct flags are set */
256   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
257   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
258 
259   /* Free work space */
260   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
261   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
262   *B = newmat;
263   PetscFunctionReturn(0);
264 }
265 
266 #undef __FUNCT__
267 #define __FUNCT__ "MatRestoreArray_MPIDense"
268 PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[])
269 {
270   PetscFunctionBegin;
271   PetscFunctionReturn(0);
272 }
273 
274 #undef __FUNCT__
275 #define __FUNCT__ "MatAssemblyBegin_MPIDense"
276 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
277 {
278   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
279   MPI_Comm       comm = mat->comm;
280   PetscErrorCode ierr;
281   PetscInt       nstash,reallocs;
282   InsertMode     addv;
283 
284   PetscFunctionBegin;
285   /* make sure all processors are either in INSERTMODE or ADDMODE */
286   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
287   if (addv == (ADD_VALUES|INSERT_VALUES)) {
288     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
289   }
290   mat->insertmode = addv; /* in case this processor had no cache */
291 
292   ierr = MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);CHKERRQ(ierr);
293   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
294   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
295   PetscFunctionReturn(0);
296 }
297 
298 #undef __FUNCT__
299 #define __FUNCT__ "MatAssemblyEnd_MPIDense"
300 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
301 {
302   Mat_MPIDense    *mdn=(Mat_MPIDense*)mat->data;
303   PetscErrorCode  ierr;
304   PetscInt        i,*row,*col,flg,j,rstart,ncols;
305   PetscMPIInt     n;
306   PetscScalar     *val;
307   InsertMode      addv=mat->insertmode;
308 
309   PetscFunctionBegin;
310   /*  wait on receives */
311   while (1) {
312     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
313     if (!flg) break;
314 
315     for (i=0; i<n;) {
316       /* Now identify the consecutive vals belonging to the same row */
317       for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
318       if (j < n) ncols = j-i;
319       else       ncols = n-i;
320       /* Now assemble all these values with a single function call */
321       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
322       i = j;
323     }
324   }
325   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
326 
327   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
328   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
329 
330   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
331     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
332   }
333   PetscFunctionReturn(0);
334 }
335 
336 #undef __FUNCT__
337 #define __FUNCT__ "MatZeroEntries_MPIDense"
338 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
339 {
340   PetscErrorCode ierr;
341   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
342 
343   PetscFunctionBegin;
344   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
345   PetscFunctionReturn(0);
346 }
347 
348 /* the code does not do the diagonal entries correctly unless the
349    matrix is square and the column and row owerships are identical.
350    This is a BUG. The only way to fix it seems to be to access
351    mdn->A and mdn->B directly and not through the MatZeroRows()
352    routine.
353 */
354 #undef __FUNCT__
355 #define __FUNCT__ "MatZeroRows_MPIDense"
356 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
357 {
358   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
359   PetscErrorCode ierr;
360   PetscInt       i,*owners = A->rmap.range;
361   PetscInt       *nprocs,j,idx,nsends;
362   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
363   PetscInt       *rvalues,tag = A->tag,count,base,slen,*source;
364   PetscInt       *lens,*lrows,*values;
365   PetscMPIInt    n,imdex,rank = l->rank,size = l->size;
366   MPI_Comm       comm = A->comm;
367   MPI_Request    *send_waits,*recv_waits;
368   MPI_Status     recv_status,*send_status;
369   PetscTruth     found;
370 
371   PetscFunctionBegin;
372   /*  first count number of contributors to each processor */
373   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
374   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
375   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
376   for (i=0; i<N; i++) {
377     idx = rows[i];
378     found = PETSC_FALSE;
379     for (j=0; j<size; j++) {
380       if (idx >= owners[j] && idx < owners[j+1]) {
381         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
382       }
383     }
384     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
385   }
386   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
387 
388   /* inform other processors of number of messages and max length*/
389   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
390 
391   /* post receives:   */
392   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
393   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
394   for (i=0; i<nrecvs; i++) {
395     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
396   }
397 
398   /* do sends:
399       1) starts[i] gives the starting index in svalues for stuff going to
400          the ith processor
401   */
402   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
403   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
404   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
405   starts[0]  = 0;
406   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
407   for (i=0; i<N; i++) {
408     svalues[starts[owner[i]]++] = rows[i];
409   }
410 
411   starts[0] = 0;
412   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
413   count = 0;
414   for (i=0; i<size; i++) {
415     if (nprocs[2*i+1]) {
416       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
417     }
418   }
419   ierr = PetscFree(starts);CHKERRQ(ierr);
420 
421   base = owners[rank];
422 
423   /*  wait on receives */
424   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
425   source = lens + nrecvs;
426   count  = nrecvs; slen = 0;
427   while (count) {
428     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
429     /* unpack receives into our local space */
430     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
431     source[imdex]  = recv_status.MPI_SOURCE;
432     lens[imdex]    = n;
433     slen += n;
434     count--;
435   }
436   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
437 
438   /* move the data into the send scatter */
439   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
440   count = 0;
441   for (i=0; i<nrecvs; i++) {
442     values = rvalues + i*nmax;
443     for (j=0; j<lens[i]; j++) {
444       lrows[count++] = values[j] - base;
445     }
446   }
447   ierr = PetscFree(rvalues);CHKERRQ(ierr);
448   ierr = PetscFree(lens);CHKERRQ(ierr);
449   ierr = PetscFree(owner);CHKERRQ(ierr);
450   ierr = PetscFree(nprocs);CHKERRQ(ierr);
451 
452   /* actually zap the local rows */
453   ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
454   ierr = PetscFree(lrows);CHKERRQ(ierr);
455 
456   /* wait on sends */
457   if (nsends) {
458     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
459     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
460     ierr = PetscFree(send_status);CHKERRQ(ierr);
461   }
462   ierr = PetscFree(send_waits);CHKERRQ(ierr);
463   ierr = PetscFree(svalues);CHKERRQ(ierr);
464 
465   PetscFunctionReturn(0);
466 }
467 
468 #undef __FUNCT__
469 #define __FUNCT__ "MatMult_MPIDense"
470 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
471 {
472   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
473   PetscErrorCode ierr;
474 
475   PetscFunctionBegin;
476   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
477   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
478   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
479   PetscFunctionReturn(0);
480 }
481 
482 #undef __FUNCT__
483 #define __FUNCT__ "MatMultAdd_MPIDense"
484 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
485 {
486   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
487   PetscErrorCode ierr;
488 
489   PetscFunctionBegin;
490   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
491   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
492   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
493   PetscFunctionReturn(0);
494 }
495 
496 #undef __FUNCT__
497 #define __FUNCT__ "MatMultTranspose_MPIDense"
498 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
499 {
500   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
501   PetscErrorCode ierr;
502   PetscScalar    zero = 0.0;
503 
504   PetscFunctionBegin;
505   ierr = VecSet(yy,zero);CHKERRQ(ierr);
506   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
507   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
508   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
509   PetscFunctionReturn(0);
510 }
511 
512 #undef __FUNCT__
513 #define __FUNCT__ "MatMultTransposeAdd_MPIDense"
514 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
515 {
516   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
517   PetscErrorCode ierr;
518 
519   PetscFunctionBegin;
520   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
521   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
522   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
523   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
524   PetscFunctionReturn(0);
525 }
526 
527 #undef __FUNCT__
528 #define __FUNCT__ "MatGetDiagonal_MPIDense"
529 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
530 {
531   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
532   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
533   PetscErrorCode ierr;
534   PetscInt       len,i,n,m = A->rmap.n,radd;
535   PetscScalar    *x,zero = 0.0;
536 
537   PetscFunctionBegin;
538   ierr = VecSet(v,zero);CHKERRQ(ierr);
539   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
540   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
541   if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
542   len  = PetscMin(a->A->rmap.n,a->A->cmap.n);
543   radd = A->rmap.rstart*m;
544   for (i=0; i<len; i++) {
545     x[i] = aloc->v[radd + i*m + i];
546   }
547   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
548   PetscFunctionReturn(0);
549 }
550 
551 #undef __FUNCT__
552 #define __FUNCT__ "MatDestroy_MPIDense"
553 PetscErrorCode MatDestroy_MPIDense(Mat mat)
554 {
555   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
556   PetscErrorCode ierr;
557 
558   PetscFunctionBegin;
559 
560 #if defined(PETSC_USE_LOG)
561   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N);
562 #endif
563   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
564   ierr = MatDestroy(mdn->A);CHKERRQ(ierr);
565   if (mdn->lvec)   {ierr = VecDestroy(mdn->lvec);CHKERRQ(ierr);}
566   if (mdn->Mvctx)  {ierr = VecScatterDestroy(mdn->Mvctx);CHKERRQ(ierr);}
567   if (mdn->factor) {
568     ierr = PetscFree(mdn->factor->temp);CHKERRQ(ierr);
569     ierr = PetscFree(mdn->factor->tag);CHKERRQ(ierr);
570     ierr = PetscFree(mdn->factor->pivots);CHKERRQ(ierr);
571     ierr = PetscFree(mdn->factor);CHKERRQ(ierr);
572   }
573   ierr = PetscFree(mdn);CHKERRQ(ierr);
574   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
575   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
576   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
577   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
578   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
579   PetscFunctionReturn(0);
580 }
581 
582 #undef __FUNCT__
583 #define __FUNCT__ "MatView_MPIDense_Binary"
584 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
585 {
586   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
587   PetscErrorCode ierr;
588 
589   PetscFunctionBegin;
590   if (mdn->size == 1) {
591     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
592   }
593   else SETERRQ(PETSC_ERR_SUP,"Only uniprocessor output supported");
594   PetscFunctionReturn(0);
595 }
596 
597 #undef __FUNCT__
598 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket"
599 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
600 {
601   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
602   PetscErrorCode    ierr;
603   PetscMPIInt       size = mdn->size,rank = mdn->rank;
604   PetscViewerType   vtype;
605   PetscTruth        iascii,isdraw;
606   PetscViewer       sviewer;
607   PetscViewerFormat format;
608 
609   PetscFunctionBegin;
610   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
611   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
612   if (iascii) {
613     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
614     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
615     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
616       MatInfo info;
617       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
618       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap.n,
619                    (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
620       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
621       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
622       PetscFunctionReturn(0);
623     } else if (format == PETSC_VIEWER_ASCII_INFO) {
624       PetscFunctionReturn(0);
625     }
626   } else if (isdraw) {
627     PetscDraw  draw;
628     PetscTruth isnull;
629 
630     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
631     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
632     if (isnull) PetscFunctionReturn(0);
633   }
634 
635   if (size == 1) {
636     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
637   } else {
638     /* assemble the entire matrix onto first processor. */
639     Mat         A;
640     PetscInt    M = mat->rmap.N,N = mat->cmap.N,m,row,i,nz;
641     PetscInt    *cols;
642     PetscScalar *vals;
643 
644     ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr);
645     if (!rank) {
646       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
647     } else {
648       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
649     }
650     /* Since this is a temporary matrix, MATMPIDENSE instead of A->type_name here is probably acceptable. */
651     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
652     ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL);
653     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
654 
655     /* Copy the matrix ... This isn't the most efficient means,
656        but it's quick for now */
657     A->insertmode = INSERT_VALUES;
658     row = mat->rmap.rstart; m = mdn->A->rmap.n;
659     for (i=0; i<m; i++) {
660       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
661       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
662       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
663       row++;
664     }
665 
666     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
667     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
668     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
669     if (!rank) {
670       ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
671     }
672     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
673     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
674     ierr = MatDestroy(A);CHKERRQ(ierr);
675   }
676   PetscFunctionReturn(0);
677 }
678 
679 #undef __FUNCT__
680 #define __FUNCT__ "MatView_MPIDense"
681 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
682 {
683   PetscErrorCode ierr;
684   PetscTruth     iascii,isbinary,isdraw,issocket;
685 
686   PetscFunctionBegin;
687 
688   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
689   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
690   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
691   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
692 
693   if (iascii || issocket || isdraw) {
694     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
695   } else if (isbinary) {
696     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
697   } else {
698     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name);
699   }
700   PetscFunctionReturn(0);
701 }
702 
703 #undef __FUNCT__
704 #define __FUNCT__ "MatGetInfo_MPIDense"
705 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
706 {
707   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
708   Mat            mdn = mat->A;
709   PetscErrorCode ierr;
710   PetscReal      isend[5],irecv[5];
711 
712   PetscFunctionBegin;
713   info->rows_global    = (double)A->rmap.N;
714   info->columns_global = (double)A->cmap.N;
715   info->rows_local     = (double)A->rmap.n;
716   info->columns_local  = (double)A->cmap.N;
717   info->block_size     = 1.0;
718   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
719   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
720   isend[3] = info->memory;  isend[4] = info->mallocs;
721   if (flag == MAT_LOCAL) {
722     info->nz_used      = isend[0];
723     info->nz_allocated = isend[1];
724     info->nz_unneeded  = isend[2];
725     info->memory       = isend[3];
726     info->mallocs      = isend[4];
727   } else if (flag == MAT_GLOBAL_MAX) {
728     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr);
729     info->nz_used      = irecv[0];
730     info->nz_allocated = irecv[1];
731     info->nz_unneeded  = irecv[2];
732     info->memory       = irecv[3];
733     info->mallocs      = irecv[4];
734   } else if (flag == MAT_GLOBAL_SUM) {
735     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr);
736     info->nz_used      = irecv[0];
737     info->nz_allocated = irecv[1];
738     info->nz_unneeded  = irecv[2];
739     info->memory       = irecv[3];
740     info->mallocs      = irecv[4];
741   }
742   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
743   info->fill_ratio_needed = 0;
744   info->factor_mallocs    = 0;
745   PetscFunctionReturn(0);
746 }
747 
748 #undef __FUNCT__
749 #define __FUNCT__ "MatSetOption_MPIDense"
750 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op)
751 {
752   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
753   PetscErrorCode ierr;
754 
755   PetscFunctionBegin;
756   switch (op) {
757   case MAT_NO_NEW_NONZERO_LOCATIONS:
758   case MAT_YES_NEW_NONZERO_LOCATIONS:
759   case MAT_NEW_NONZERO_LOCATION_ERR:
760   case MAT_NEW_NONZERO_ALLOCATION_ERR:
761   case MAT_COLUMNS_SORTED:
762   case MAT_COLUMNS_UNSORTED:
763     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
764     break;
765   case MAT_ROW_ORIENTED:
766     a->roworiented = PETSC_TRUE;
767     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
768     break;
769   case MAT_ROWS_SORTED:
770   case MAT_ROWS_UNSORTED:
771   case MAT_YES_NEW_DIAGONALS:
772   case MAT_USE_HASH_TABLE:
773     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
774     break;
775   case MAT_COLUMN_ORIENTED:
776     a->roworiented = PETSC_FALSE;
777     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
778     break;
779   case MAT_IGNORE_OFF_PROC_ENTRIES:
780     a->donotstash = PETSC_TRUE;
781     break;
782   case MAT_NO_NEW_DIAGONALS:
783     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
784   case MAT_SYMMETRIC:
785   case MAT_STRUCTURALLY_SYMMETRIC:
786   case MAT_NOT_SYMMETRIC:
787   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
788   case MAT_HERMITIAN:
789   case MAT_NOT_HERMITIAN:
790   case MAT_SYMMETRY_ETERNAL:
791   case MAT_NOT_SYMMETRY_ETERNAL:
792     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
793     break;
794   default:
795     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
796   }
797   PetscFunctionReturn(0);
798 }
799 
800 
801 #undef __FUNCT__
802 #define __FUNCT__ "MatDiagonalScale_MPIDense"
803 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
804 {
805   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
806   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
807   PetscScalar    *l,*r,x,*v;
808   PetscErrorCode ierr;
809   PetscInt       i,j,s2a,s3a,s2,s3,m=mdn->A->rmap.n,n=mdn->A->cmap.n;
810 
811   PetscFunctionBegin;
812   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
813   if (ll) {
814     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
815     if (s2a != s2) SETERRQ2(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
816     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
817     for (i=0; i<m; i++) {
818       x = l[i];
819       v = mat->v + i;
820       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
821     }
822     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
823     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
824   }
825   if (rr) {
826     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
827     if (s3a != s3) SETERRQ2(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
828     ierr = VecScatterBegin(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
829     ierr = VecScatterEnd(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
830     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
831     for (i=0; i<n; i++) {
832       x = r[i];
833       v = mat->v + i*m;
834       for (j=0; j<m; j++) { (*v++) *= x;}
835     }
836     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
837     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
838   }
839   PetscFunctionReturn(0);
840 }
841 
842 #undef __FUNCT__
843 #define __FUNCT__ "MatNorm_MPIDense"
844 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
845 {
846   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
847   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
848   PetscErrorCode ierr;
849   PetscInt       i,j;
850   PetscReal      sum = 0.0;
851   PetscScalar    *v = mat->v;
852 
853   PetscFunctionBegin;
854   if (mdn->size == 1) {
855     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
856   } else {
857     if (type == NORM_FROBENIUS) {
858       for (i=0; i<mdn->A->cmap.n*mdn->A->rmap.n; i++) {
859 #if defined(PETSC_USE_COMPLEX)
860         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
861 #else
862         sum += (*v)*(*v); v++;
863 #endif
864       }
865       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr);
866       *nrm = sqrt(*nrm);
867       ierr = PetscLogFlops(2*mdn->A->cmap.n*mdn->A->rmap.n);CHKERRQ(ierr);
868     } else if (type == NORM_1) {
869       PetscReal *tmp,*tmp2;
870       ierr = PetscMalloc(2*A->cmap.N*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
871       tmp2 = tmp + A->cmap.N;
872       ierr = PetscMemzero(tmp,2*A->cmap.N*sizeof(PetscReal));CHKERRQ(ierr);
873       *nrm = 0.0;
874       v = mat->v;
875       for (j=0; j<mdn->A->cmap.n; j++) {
876         for (i=0; i<mdn->A->rmap.n; i++) {
877           tmp[j] += PetscAbsScalar(*v);  v++;
878         }
879       }
880       ierr = MPI_Allreduce(tmp,tmp2,A->cmap.N,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr);
881       for (j=0; j<A->cmap.N; j++) {
882         if (tmp2[j] > *nrm) *nrm = tmp2[j];
883       }
884       ierr = PetscFree(tmp);CHKERRQ(ierr);
885       ierr = PetscLogFlops(A->cmap.n*A->rmap.n);CHKERRQ(ierr);
886     } else if (type == NORM_INFINITY) { /* max row norm */
887       PetscReal ntemp;
888       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
889       ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr);
890     } else {
891       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
892     }
893   }
894   PetscFunctionReturn(0);
895 }
896 
897 #undef __FUNCT__
898 #define __FUNCT__ "MatTranspose_MPIDense"
899 PetscErrorCode MatTranspose_MPIDense(Mat A,Mat *matout)
900 {
901   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
902   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
903   Mat            B;
904   PetscInt       M = A->rmap.N,N = A->cmap.N,m,n,*rwork,rstart = A->rmap.rstart;
905   PetscErrorCode ierr;
906   PetscInt       j,i;
907   PetscScalar    *v;
908 
909   PetscFunctionBegin;
910   if (!matout && M != N) {
911     SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place");
912   }
913   ierr = MatCreate(A->comm,&B);CHKERRQ(ierr);
914   ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,N,M);CHKERRQ(ierr);
915   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
916   ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr);
917 
918   m = a->A->rmap.n; n = a->A->cmap.n; v = Aloc->v;
919   ierr = PetscMalloc(n*sizeof(PetscInt),&rwork);CHKERRQ(ierr);
920   for (j=0; j<n; j++) {
921     for (i=0; i<m; i++) rwork[i] = rstart + i;
922     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
923     v   += m;
924   }
925   ierr = PetscFree(rwork);CHKERRQ(ierr);
926   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
927   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
928   if (matout) {
929     *matout = B;
930   } else {
931     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
932   }
933   PetscFunctionReturn(0);
934 }
935 
936 #include "petscblaslapack.h"
937 #undef __FUNCT__
938 #define __FUNCT__ "MatScale_MPIDense"
939 PetscErrorCode MatScale_MPIDense(Mat inA,PetscScalar alpha)
940 {
941   Mat_MPIDense   *A = (Mat_MPIDense*)inA->data;
942   Mat_SeqDense   *a = (Mat_SeqDense*)A->A->data;
943   PetscScalar    oalpha = alpha;
944   PetscBLASInt   one = 1,nz = (PetscBLASInt)inA->rmap.n*inA->cmap.N;
945   PetscErrorCode ierr;
946 
947   PetscFunctionBegin;
948   BLASscal_(&nz,&oalpha,a->v,&one);
949   ierr = PetscLogFlops(nz);CHKERRQ(ierr);
950   PetscFunctionReturn(0);
951 }
952 
953 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
954 
955 #undef __FUNCT__
956 #define __FUNCT__ "MatSetUpPreallocation_MPIDense"
957 PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A)
958 {
959   PetscErrorCode ierr;
960 
961   PetscFunctionBegin;
962   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
963   PetscFunctionReturn(0);
964 }
965 
966 #undef __FUNCT__
967 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIDense"
968 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
969 {
970   PetscErrorCode ierr;
971   PetscInt       m=A->rmap.n,n=B->cmap.n;
972   Mat            Cmat;
973 
974   PetscFunctionBegin;
975   if (A->cmap.n != B->rmap.n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap.n %d != B->rmap.n %d\n",A->cmap.n,B->rmap.n);
976   ierr = MatCreate(B->comm,&Cmat);CHKERRQ(ierr);
977   ierr = MatSetSizes(Cmat,m,n,A->rmap.N,B->cmap.N);CHKERRQ(ierr);
978   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
979   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
980   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
981   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
982   *C = Cmat;
983   PetscFunctionReturn(0);
984 }
985 
986 /* -------------------------------------------------------------------*/
987 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
988        MatGetRow_MPIDense,
989        MatRestoreRow_MPIDense,
990        MatMult_MPIDense,
991 /* 4*/ MatMultAdd_MPIDense,
992        MatMultTranspose_MPIDense,
993        MatMultTransposeAdd_MPIDense,
994        0,
995        0,
996        0,
997 /*10*/ 0,
998        0,
999        0,
1000        0,
1001        MatTranspose_MPIDense,
1002 /*15*/ MatGetInfo_MPIDense,
1003        MatEqual_MPIDense,
1004        MatGetDiagonal_MPIDense,
1005        MatDiagonalScale_MPIDense,
1006        MatNorm_MPIDense,
1007 /*20*/ MatAssemblyBegin_MPIDense,
1008        MatAssemblyEnd_MPIDense,
1009        0,
1010        MatSetOption_MPIDense,
1011        MatZeroEntries_MPIDense,
1012 /*25*/ MatZeroRows_MPIDense,
1013        MatLUFactorSymbolic_MPIDense,
1014        0,
1015        MatCholeskyFactorSymbolic_MPIDense,
1016        0,
1017 /*30*/ MatSetUpPreallocation_MPIDense,
1018        0,
1019        0,
1020        MatGetArray_MPIDense,
1021        MatRestoreArray_MPIDense,
1022 /*35*/ MatDuplicate_MPIDense,
1023        0,
1024        0,
1025        0,
1026        0,
1027 /*40*/ 0,
1028        MatGetSubMatrices_MPIDense,
1029        0,
1030        MatGetValues_MPIDense,
1031        0,
1032 /*45*/ 0,
1033        MatScale_MPIDense,
1034        0,
1035        0,
1036        0,
1037 /*50*/ 0,
1038        0,
1039        0,
1040        0,
1041        0,
1042 /*55*/ 0,
1043        0,
1044        0,
1045        0,
1046        0,
1047 /*60*/ MatGetSubMatrix_MPIDense,
1048        MatDestroy_MPIDense,
1049        MatView_MPIDense,
1050        0,
1051        0,
1052 /*65*/ 0,
1053        0,
1054        0,
1055        0,
1056        0,
1057 /*70*/ 0,
1058        0,
1059        0,
1060        0,
1061        0,
1062 /*75*/ 0,
1063        0,
1064        0,
1065        0,
1066        0,
1067 /*80*/ 0,
1068        0,
1069        0,
1070        0,
1071 /*84*/ MatLoad_MPIDense,
1072        0,
1073        0,
1074        0,
1075        0,
1076        0,
1077 /*90*/ 0,
1078        MatMatMultSymbolic_MPIDense_MPIDense,
1079        0,
1080        0,
1081        0,
1082 /*95*/ 0,
1083        0,
1084        0,
1085        0};
1086 
1087 EXTERN_C_BEGIN
1088 #undef __FUNCT__
1089 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense"
1090 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1091 {
1092   Mat_MPIDense   *a;
1093   PetscErrorCode ierr;
1094 
1095   PetscFunctionBegin;
1096   mat->preallocated = PETSC_TRUE;
1097   /* Note:  For now, when data is specified above, this assumes the user correctly
1098    allocates the local dense storage space.  We should add error checking. */
1099 
1100   a    = (Mat_MPIDense*)mat->data;
1101   ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1102   ierr = MatSetSizes(a->A,mat->rmap.n,mat->cmap.N,mat->rmap.n,mat->cmap.N);CHKERRQ(ierr);
1103   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
1104   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1105   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1106   PetscFunctionReturn(0);
1107 }
1108 EXTERN_C_END
1109 
1110 /*MC
1111    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1112 
1113    Options Database Keys:
1114 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions()
1115 
1116   Level: beginner
1117 
1118 .seealso: MatCreateMPIDense
1119 M*/
1120 
1121 EXTERN_C_BEGIN
1122 #undef __FUNCT__
1123 #define __FUNCT__ "MatCreate_MPIDense"
1124 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIDense(Mat mat)
1125 {
1126   Mat_MPIDense   *a;
1127   PetscErrorCode ierr;
1128 
1129   PetscFunctionBegin;
1130   ierr              = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr);
1131   mat->data         = (void*)a;
1132   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1133   mat->factor       = 0;
1134   mat->mapping      = 0;
1135 
1136   a->factor       = 0;
1137   mat->insertmode = NOT_SET_VALUES;
1138   ierr = MPI_Comm_rank(mat->comm,&a->rank);CHKERRQ(ierr);
1139   ierr = MPI_Comm_size(mat->comm,&a->size);CHKERRQ(ierr);
1140 
1141   mat->rmap.bs = mat->cmap.bs = 1;
1142   ierr = PetscMapInitialize(mat->comm,&mat->rmap);CHKERRQ(ierr);
1143   ierr = PetscMapInitialize(mat->comm,&mat->cmap);CHKERRQ(ierr);
1144   a->nvec = mat->cmap.n;
1145 
1146   /* build cache for off array entries formed */
1147   a->donotstash = PETSC_FALSE;
1148   ierr = MatStashCreate_Private(mat->comm,1,&mat->stash);CHKERRQ(ierr);
1149 
1150   /* stuff used for matrix vector multiply */
1151   a->lvec        = 0;
1152   a->Mvctx       = 0;
1153   a->roworiented = PETSC_TRUE;
1154 
1155   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
1156                                      "MatGetDiagonalBlock_MPIDense",
1157                                      MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr);
1158   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C",
1159                                      "MatMPIDenseSetPreallocation_MPIDense",
1160                                      MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1161   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",
1162                                      "MatMatMult_MPIAIJ_MPIDense",
1163                                       MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1164   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",
1165                                      "MatMatMultSymbolic_MPIAIJ_MPIDense",
1166                                       MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1167   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",
1168                                      "MatMatMultNumeric_MPIAIJ_MPIDense",
1169                                       MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1170   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1171   PetscFunctionReturn(0);
1172 }
1173 EXTERN_C_END
1174 
1175 /*MC
1176    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1177 
1178    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1179    and MATMPIDENSE otherwise.
1180 
1181    Options Database Keys:
1182 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1183 
1184   Level: beginner
1185 
1186 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1187 M*/
1188 
1189 EXTERN_C_BEGIN
1190 #undef __FUNCT__
1191 #define __FUNCT__ "MatCreate_Dense"
1192 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_Dense(Mat A)
1193 {
1194   PetscErrorCode ierr;
1195   PetscMPIInt    size;
1196 
1197   PetscFunctionBegin;
1198   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1199   if (size == 1) {
1200     ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr);
1201   } else {
1202     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
1203   }
1204   PetscFunctionReturn(0);
1205 }
1206 EXTERN_C_END
1207 
1208 #undef __FUNCT__
1209 #define __FUNCT__ "MatMPIDenseSetPreallocation"
1210 /*@C
1211    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1212 
1213    Not collective
1214 
1215    Input Parameters:
1216 .  A - the matrix
1217 -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
1218    to control all matrix memory allocation.
1219 
1220    Notes:
1221    The dense format is fully compatible with standard Fortran 77
1222    storage by columns.
1223 
1224    The data input variable is intended primarily for Fortran programmers
1225    who wish to allocate their own matrix memory space.  Most users should
1226    set data=PETSC_NULL.
1227 
1228    Level: intermediate
1229 
1230 .keywords: matrix,dense, parallel
1231 
1232 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1233 @*/
1234 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data)
1235 {
1236   PetscErrorCode ierr,(*f)(Mat,PetscScalar *);
1237 
1238   PetscFunctionBegin;
1239   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1240   if (f) {
1241     ierr = (*f)(mat,data);CHKERRQ(ierr);
1242   }
1243   PetscFunctionReturn(0);
1244 }
1245 
1246 #undef __FUNCT__
1247 #define __FUNCT__ "MatCreateMPIDense"
1248 /*@C
1249    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
1250 
1251    Collective on MPI_Comm
1252 
1253    Input Parameters:
1254 +  comm - MPI communicator
1255 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1256 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1257 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1258 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1259 -  data - optional location of matrix data.  Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1260    to control all matrix memory allocation.
1261 
1262    Output Parameter:
1263 .  A - the matrix
1264 
1265    Notes:
1266    The dense format is fully compatible with standard Fortran 77
1267    storage by columns.
1268 
1269    The data input variable is intended primarily for Fortran programmers
1270    who wish to allocate their own matrix memory space.  Most users should
1271    set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users).
1272 
1273    The user MUST specify either the local or global matrix dimensions
1274    (possibly both).
1275 
1276    Level: intermediate
1277 
1278 .keywords: matrix,dense, parallel
1279 
1280 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1281 @*/
1282 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1283 {
1284   PetscErrorCode ierr;
1285   PetscMPIInt    size;
1286 
1287   PetscFunctionBegin;
1288   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1289   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1290   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1291   if (size > 1) {
1292     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1293     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1294   } else {
1295     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1296     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1297   }
1298   PetscFunctionReturn(0);
1299 }
1300 
1301 #undef __FUNCT__
1302 #define __FUNCT__ "MatDuplicate_MPIDense"
1303 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1304 {
1305   Mat            mat;
1306   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
1307   PetscErrorCode ierr;
1308 
1309   PetscFunctionBegin;
1310   *newmat       = 0;
1311   ierr = MatCreate(A->comm,&mat);CHKERRQ(ierr);
1312   ierr = MatSetSizes(mat,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N);CHKERRQ(ierr);
1313   ierr = MatSetType(mat,A->type_name);CHKERRQ(ierr);
1314   a                 = (Mat_MPIDense*)mat->data;
1315   ierr              = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1316   mat->factor       = A->factor;
1317   mat->assembled    = PETSC_TRUE;
1318   mat->preallocated = PETSC_TRUE;
1319 
1320   mat->rmap.rstart     = A->rmap.rstart;
1321   mat->rmap.rend       = A->rmap.rend;
1322   a->size              = oldmat->size;
1323   a->rank              = oldmat->rank;
1324   mat->insertmode      = NOT_SET_VALUES;
1325   a->nvec              = oldmat->nvec;
1326   a->donotstash        = oldmat->donotstash;
1327 
1328   ierr = PetscMemcpy(mat->rmap.range,A->rmap.range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
1329   ierr = PetscMemcpy(mat->cmap.range,A->cmap.range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
1330   ierr = MatStashCreate_Private(A->comm,1,&mat->stash);CHKERRQ(ierr);
1331 
1332   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1333   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1334   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1335   *newmat = mat;
1336   PetscFunctionReturn(0);
1337 }
1338 
1339 #include "petscsys.h"
1340 
1341 #undef __FUNCT__
1342 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile"
1343 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N, MatType type,Mat *newmat)
1344 {
1345   PetscErrorCode ierr;
1346   PetscMPIInt    rank,size;
1347   PetscInt       *rowners,i,m,nz,j;
1348   PetscScalar    *array,*vals,*vals_ptr;
1349   MPI_Status     status;
1350 
1351   PetscFunctionBegin;
1352   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1353   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1354 
1355   /* determine ownership of all rows */
1356   m          = M/size + ((M % size) > rank);
1357   ierr       = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1358   ierr       = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
1359   rowners[0] = 0;
1360   for (i=2; i<=size; i++) {
1361     rowners[i] += rowners[i-1];
1362   }
1363 
1364   ierr = MatCreate(comm,newmat);CHKERRQ(ierr);
1365   ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1366   ierr = MatSetType(*newmat,type);CHKERRQ(ierr);
1367   ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr);
1368   ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr);
1369 
1370   if (!rank) {
1371     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1372 
1373     /* read in my part of the matrix numerical values  */
1374     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1375 
1376     /* insert into matrix-by row (this is why cannot directly read into array */
1377     vals_ptr = vals;
1378     for (i=0; i<m; i++) {
1379       for (j=0; j<N; j++) {
1380         array[i + j*m] = *vals_ptr++;
1381       }
1382     }
1383 
1384     /* read in other processors and ship out */
1385     for (i=1; i<size; i++) {
1386       nz   = (rowners[i+1] - rowners[i])*N;
1387       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1388       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr);
1389     }
1390   } else {
1391     /* receive numeric values */
1392     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1393 
1394     /* receive message of values*/
1395     ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr);
1396 
1397     /* insert into matrix-by row (this is why cannot directly read into array */
1398     vals_ptr = vals;
1399     for (i=0; i<m; i++) {
1400       for (j=0; j<N; j++) {
1401         array[i + j*m] = *vals_ptr++;
1402       }
1403     }
1404   }
1405   ierr = PetscFree(rowners);CHKERRQ(ierr);
1406   ierr = PetscFree(vals);CHKERRQ(ierr);
1407   ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1408   ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1409   PetscFunctionReturn(0);
1410 }
1411 
1412 #undef __FUNCT__
1413 #define __FUNCT__ "MatLoad_MPIDense"
1414 PetscErrorCode MatLoad_MPIDense(PetscViewer viewer, MatType type,Mat *newmat)
1415 {
1416   Mat            A;
1417   PetscScalar    *vals,*svals;
1418   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1419   MPI_Status     status;
1420   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz;
1421   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1422   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1423   PetscInt       i,nz,j,rstart,rend;
1424   int            fd;
1425   PetscErrorCode ierr;
1426 
1427   PetscFunctionBegin;
1428   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1429   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1430   if (!rank) {
1431     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1432     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1433     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1434   }
1435 
1436   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1437   M = header[1]; N = header[2]; nz = header[3];
1438 
1439   /*
1440        Handle case where matrix is stored on disk as a dense matrix
1441   */
1442   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1443     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,type,newmat);CHKERRQ(ierr);
1444     PetscFunctionReturn(0);
1445   }
1446 
1447   /* determine ownership of all rows */
1448   m          = M/size + ((M % size) > rank);
1449   ierr       = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1450   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1451   rowners[0] = 0;
1452   for (i=2; i<=size; i++) {
1453     rowners[i] += rowners[i-1];
1454   }
1455   rstart = rowners[rank];
1456   rend   = rowners[rank+1];
1457 
1458   /* distribute row lengths to all processors */
1459   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&ourlens);CHKERRQ(ierr);
1460   offlens = ourlens + (rend-rstart);
1461   if (!rank) {
1462     ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
1463     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1464     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
1465     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1466     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1467     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1468   } else {
1469     ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1470   }
1471 
1472   if (!rank) {
1473     /* calculate the number of nonzeros on each processor */
1474     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
1475     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1476     for (i=0; i<size; i++) {
1477       for (j=rowners[i]; j< rowners[i+1]; j++) {
1478         procsnz[i] += rowlengths[j];
1479       }
1480     }
1481     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1482 
1483     /* determine max buffer needed and allocate it */
1484     maxnz = 0;
1485     for (i=0; i<size; i++) {
1486       maxnz = PetscMax(maxnz,procsnz[i]);
1487     }
1488     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1489 
1490     /* read in my part of the matrix column indices  */
1491     nz = procsnz[0];
1492     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1493     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1494 
1495     /* read in every one elses and ship off */
1496     for (i=1; i<size; i++) {
1497       nz   = procsnz[i];
1498       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1499       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1500     }
1501     ierr = PetscFree(cols);CHKERRQ(ierr);
1502   } else {
1503     /* determine buffer space needed for message */
1504     nz = 0;
1505     for (i=0; i<m; i++) {
1506       nz += ourlens[i];
1507     }
1508     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1509 
1510     /* receive message of column indices*/
1511     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1512     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1513     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1514   }
1515 
1516   /* loop over local rows, determining number of off diagonal entries */
1517   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
1518   jj = 0;
1519   for (i=0; i<m; i++) {
1520     for (j=0; j<ourlens[i]; j++) {
1521       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1522       jj++;
1523     }
1524   }
1525 
1526   /* create our matrix */
1527   for (i=0; i<m; i++) {
1528     ourlens[i] -= offlens[i];
1529   }
1530   ierr = MatCreate(comm,newmat);CHKERRQ(ierr);
1531   ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1532   ierr = MatSetType(*newmat,type);CHKERRQ(ierr);
1533   ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr);
1534   A = *newmat;
1535   for (i=0; i<m; i++) {
1536     ourlens[i] += offlens[i];
1537   }
1538 
1539   if (!rank) {
1540     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1541 
1542     /* read in my part of the matrix numerical values  */
1543     nz = procsnz[0];
1544     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1545 
1546     /* insert into matrix */
1547     jj      = rstart;
1548     smycols = mycols;
1549     svals   = vals;
1550     for (i=0; i<m; i++) {
1551       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1552       smycols += ourlens[i];
1553       svals   += ourlens[i];
1554       jj++;
1555     }
1556 
1557     /* read in other processors and ship out */
1558     for (i=1; i<size; i++) {
1559       nz   = procsnz[i];
1560       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1561       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1562     }
1563     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1564   } else {
1565     /* receive numeric values */
1566     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1567 
1568     /* receive message of values*/
1569     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1570     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1571     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1572 
1573     /* insert into matrix */
1574     jj      = rstart;
1575     smycols = mycols;
1576     svals   = vals;
1577     for (i=0; i<m; i++) {
1578       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1579       smycols += ourlens[i];
1580       svals   += ourlens[i];
1581       jj++;
1582     }
1583   }
1584   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1585   ierr = PetscFree(vals);CHKERRQ(ierr);
1586   ierr = PetscFree(mycols);CHKERRQ(ierr);
1587   ierr = PetscFree(rowners);CHKERRQ(ierr);
1588 
1589   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1590   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1591   PetscFunctionReturn(0);
1592 }
1593 
1594 #undef __FUNCT__
1595 #define __FUNCT__ "MatEqual_MPIDense"
1596 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscTruth *flag)
1597 {
1598   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
1599   Mat            a,b;
1600   PetscTruth     flg;
1601   PetscErrorCode ierr;
1602 
1603   PetscFunctionBegin;
1604   a = matA->A;
1605   b = matB->A;
1606   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
1607   PetscFunctionReturn(0);
1608 }
1609 
1610 
1611 
1612