xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 2a6744eb01855f5aa328eb8fdf4b0d01e72ad151)
1 #define PETSCMAT_DLL
2 
3 /*
4    Basic functions for basic parallel dense matrices.
5 */
6 
7 #include "src/mat/impls/dense/mpi/mpidense.h"    /*I   "petscmat.h"  I*/
8 
9 #undef __FUNCT__
10 #define __FUNCT__ "MatLUFactorSymbolic_MPIDense"
11 PetscErrorCode MatLUFactorSymbolic_MPIDense(Mat A,IS row,IS col,MatFactorInfo *info,Mat *fact)
12 {
13   PetscErrorCode ierr;
14 
15   PetscFunctionBegin;
16   ierr = MatDuplicate(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr);
17   PetscFunctionReturn(0);
18 }
19 
20 #undef __FUNCT__
21 #define __FUNCT__ "MatCholeskyFactorSymbolic_MPIDense"
22 PetscErrorCode MatCholeskyFactorSymbolic_MPIDense(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
23 {
24   PetscErrorCode ierr;
25 
26   PetscFunctionBegin;
27   ierr = MatDuplicate(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr);
28   PetscFunctionReturn(0);
29 }
30 
31 #undef __FUNCT__
32 #define __FUNCT__ "MatDenseGetLocalMatrix"
33 /*@
34 
35       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
36               matrix that represents the operator. For sequential matrices it returns itself.
37 
38     Input Parameter:
39 .      A - the Seq or MPI dense matrix
40 
41     Output Parameter:
42 .      B - the inner matrix
43 
44     Level: intermediate
45 
46 @*/
47 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
48 {
49   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
50   PetscErrorCode ierr;
51   PetscTruth     flg;
52   PetscMPIInt    size;
53 
54   PetscFunctionBegin;
55   ierr = PetscTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
56   if (!flg) {  /* this check sucks! */
57     ierr = PetscTypeCompare((PetscObject)A,MATDENSE,&flg);CHKERRQ(ierr);
58     if (flg) {
59       ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
60       if (size == 1) flg = PETSC_FALSE;
61     }
62   }
63   if (flg) {
64     *B = mat->A;
65   } else {
66     *B = A;
67   }
68   PetscFunctionReturn(0);
69 }
70 
71 #undef __FUNCT__
72 #define __FUNCT__ "MatGetRow_MPIDense"
73 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
74 {
75   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
76   PetscErrorCode ierr;
77   PetscInt       lrow,rstart = A->rmap.rstart,rend = A->rmap.rend;
78 
79   PetscFunctionBegin;
80   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows")
81   lrow = row - rstart;
82   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr);
83   PetscFunctionReturn(0);
84 }
85 
86 #undef __FUNCT__
87 #define __FUNCT__ "MatRestoreRow_MPIDense"
88 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
89 {
90   PetscErrorCode ierr;
91 
92   PetscFunctionBegin;
93   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
94   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
95   PetscFunctionReturn(0);
96 }
97 
98 EXTERN_C_BEGIN
99 #undef __FUNCT__
100 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense"
101 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B)
102 {
103   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
104   PetscErrorCode ierr;
105   PetscInt       m = A->rmap.n,rstart = A->rmap.rstart;
106   PetscScalar    *array;
107   MPI_Comm       comm;
108 
109   PetscFunctionBegin;
110   if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported.");
111 
112   /* The reuse aspect is not implemented efficiently */
113   if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);}
114 
115   ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
116   ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr);
117   ierr = MatCreate(comm,B);CHKERRQ(ierr);
118   ierr = MatSetSizes(*B,m,m,m,m);CHKERRQ(ierr);
119   ierr = MatSetType(*B,mdn->A->type_name);CHKERRQ(ierr);
120   ierr = MatSeqDenseSetPreallocation(*B,array+m*rstart);CHKERRQ(ierr);
121   ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr);
122   ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
123   ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
124 
125   *iscopy = PETSC_TRUE;
126   PetscFunctionReturn(0);
127 }
128 EXTERN_C_END
129 
130 #undef __FUNCT__
131 #define __FUNCT__ "MatSetValues_MPIDense"
132 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
133 {
134   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
135   PetscErrorCode ierr;
136   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend,row;
137   PetscTruth     roworiented = A->roworiented;
138 
139   PetscFunctionBegin;
140   for (i=0; i<m; i++) {
141     if (idxm[i] < 0) continue;
142     if (idxm[i] >= mat->rmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
143     if (idxm[i] >= rstart && idxm[i] < rend) {
144       row = idxm[i] - rstart;
145       if (roworiented) {
146         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
147       } else {
148         for (j=0; j<n; j++) {
149           if (idxn[j] < 0) continue;
150           if (idxn[j] >= mat->cmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
151           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
152         }
153       }
154     } else {
155       if (!A->donotstash) {
156         if (roworiented) {
157           ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr);
158         } else {
159           ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr);
160         }
161       }
162     }
163   }
164   PetscFunctionReturn(0);
165 }
166 
167 #undef __FUNCT__
168 #define __FUNCT__ "MatGetValues_MPIDense"
169 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
170 {
171   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
172   PetscErrorCode ierr;
173   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend,row;
174 
175   PetscFunctionBegin;
176   for (i=0; i<m; i++) {
177     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
178     if (idxm[i] >= mat->rmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
179     if (idxm[i] >= rstart && idxm[i] < rend) {
180       row = idxm[i] - rstart;
181       for (j=0; j<n; j++) {
182         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
183         if (idxn[j] >= mat->cmap.N) {
184           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
185         }
186         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
187       }
188     } else {
189       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
190     }
191   }
192   PetscFunctionReturn(0);
193 }
194 
195 #undef __FUNCT__
196 #define __FUNCT__ "MatGetArray_MPIDense"
197 PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[])
198 {
199   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
200   PetscErrorCode ierr;
201 
202   PetscFunctionBegin;
203   ierr = MatGetArray(a->A,array);CHKERRQ(ierr);
204   PetscFunctionReturn(0);
205 }
206 
207 #undef __FUNCT__
208 #define __FUNCT__ "MatGetSubMatrix_MPIDense"
209 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B)
210 {
211   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data,*newmatd;
212   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
213   PetscErrorCode ierr;
214   PetscInt       i,j,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols;
215   PetscScalar    *av,*bv,*v = lmat->v;
216   Mat            newmat;
217 
218   PetscFunctionBegin;
219   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
220   ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr);
221   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
222   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
223 
224   /* No parallel redistribution currently supported! Should really check each index set
225      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
226      original matrix! */
227 
228   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
229   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
230 
231   /* Check submatrix call */
232   if (scall == MAT_REUSE_MATRIX) {
233     /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
234     /* Really need to test rows and column sizes! */
235     newmat = *B;
236   } else {
237     /* Create and fill new matrix */
238     ierr = MatCreate(A->comm,&newmat);CHKERRQ(ierr);
239     ierr = MatSetSizes(newmat,nrows,cs,PETSC_DECIDE,ncols);CHKERRQ(ierr);
240     ierr = MatSetType(newmat,A->type_name);CHKERRQ(ierr);
241     ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr);
242   }
243 
244   /* Now extract the data pointers and do the copy, column at a time */
245   newmatd = (Mat_MPIDense*)newmat->data;
246   bv      = ((Mat_SeqDense *)newmatd->A->data)->v;
247 
248   for (i=0; i<ncols; i++) {
249     av = v + nlrows*icol[i];
250     for (j=0; j<nrows; j++) {
251       *bv++ = av[irow[j] - rstart];
252     }
253   }
254 
255   /* Assemble the matrices so that the correct flags are set */
256   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
257   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
258 
259   /* Free work space */
260   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
261   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
262   *B = newmat;
263   PetscFunctionReturn(0);
264 }
265 
266 #undef __FUNCT__
267 #define __FUNCT__ "MatRestoreArray_MPIDense"
268 PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[])
269 {
270   PetscFunctionBegin;
271   PetscFunctionReturn(0);
272 }
273 
274 #undef __FUNCT__
275 #define __FUNCT__ "MatAssemblyBegin_MPIDense"
276 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
277 {
278   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
279   MPI_Comm       comm = mat->comm;
280   PetscErrorCode ierr;
281   PetscInt       nstash,reallocs;
282   InsertMode     addv;
283 
284   PetscFunctionBegin;
285   /* make sure all processors are either in INSERTMODE or ADDMODE */
286   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
287   if (addv == (ADD_VALUES|INSERT_VALUES)) {
288     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
289   }
290   mat->insertmode = addv; /* in case this processor had no cache */
291 
292   ierr = MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);CHKERRQ(ierr);
293   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
294   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
295   PetscFunctionReturn(0);
296 }
297 
298 #undef __FUNCT__
299 #define __FUNCT__ "MatAssemblyEnd_MPIDense"
300 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
301 {
302   Mat_MPIDense    *mdn=(Mat_MPIDense*)mat->data;
303   PetscErrorCode  ierr;
304   PetscInt        i,*row,*col,flg,j,rstart,ncols;
305   PetscMPIInt     n;
306   PetscScalar     *val;
307   InsertMode      addv=mat->insertmode;
308 
309   PetscFunctionBegin;
310   /*  wait on receives */
311   while (1) {
312     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
313     if (!flg) break;
314 
315     for (i=0; i<n;) {
316       /* Now identify the consecutive vals belonging to the same row */
317       for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
318       if (j < n) ncols = j-i;
319       else       ncols = n-i;
320       /* Now assemble all these values with a single function call */
321       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
322       i = j;
323     }
324   }
325   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
326 
327   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
328   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
329 
330   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
331     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
332   }
333   PetscFunctionReturn(0);
334 }
335 
336 #undef __FUNCT__
337 #define __FUNCT__ "MatZeroEntries_MPIDense"
338 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
339 {
340   PetscErrorCode ierr;
341   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
342 
343   PetscFunctionBegin;
344   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
345   PetscFunctionReturn(0);
346 }
347 
348 /* the code does not do the diagonal entries correctly unless the
349    matrix is square and the column and row owerships are identical.
350    This is a BUG. The only way to fix it seems to be to access
351    mdn->A and mdn->B directly and not through the MatZeroRows()
352    routine.
353 */
354 #undef __FUNCT__
355 #define __FUNCT__ "MatZeroRows_MPIDense"
356 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
357 {
358   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
359   PetscErrorCode ierr;
360   PetscInt       i,*owners = A->rmap.range;
361   PetscInt       *nprocs,j,idx,nsends;
362   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
363   PetscInt       *rvalues,tag = A->tag,count,base,slen,*source;
364   PetscInt       *lens,*lrows,*values;
365   PetscMPIInt    n,imdex,rank = l->rank,size = l->size;
366   MPI_Comm       comm = A->comm;
367   MPI_Request    *send_waits,*recv_waits;
368   MPI_Status     recv_status,*send_status;
369   PetscTruth     found;
370 
371   PetscFunctionBegin;
372   /*  first count number of contributors to each processor */
373   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
374   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
375   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
376   for (i=0; i<N; i++) {
377     idx = rows[i];
378     found = PETSC_FALSE;
379     for (j=0; j<size; j++) {
380       if (idx >= owners[j] && idx < owners[j+1]) {
381         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
382       }
383     }
384     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
385   }
386   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
387 
388   /* inform other processors of number of messages and max length*/
389   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
390 
391   /* post receives:   */
392   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
393   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
394   for (i=0; i<nrecvs; i++) {
395     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
396   }
397 
398   /* do sends:
399       1) starts[i] gives the starting index in svalues for stuff going to
400          the ith processor
401   */
402   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
403   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
404   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
405   starts[0]  = 0;
406   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
407   for (i=0; i<N; i++) {
408     svalues[starts[owner[i]]++] = rows[i];
409   }
410 
411   starts[0] = 0;
412   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
413   count = 0;
414   for (i=0; i<size; i++) {
415     if (nprocs[2*i+1]) {
416       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
417     }
418   }
419   ierr = PetscFree(starts);CHKERRQ(ierr);
420 
421   base = owners[rank];
422 
423   /*  wait on receives */
424   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
425   source = lens + nrecvs;
426   count  = nrecvs; slen = 0;
427   while (count) {
428     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
429     /* unpack receives into our local space */
430     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
431     source[imdex]  = recv_status.MPI_SOURCE;
432     lens[imdex]    = n;
433     slen += n;
434     count--;
435   }
436   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
437 
438   /* move the data into the send scatter */
439   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
440   count = 0;
441   for (i=0; i<nrecvs; i++) {
442     values = rvalues + i*nmax;
443     for (j=0; j<lens[i]; j++) {
444       lrows[count++] = values[j] - base;
445     }
446   }
447   ierr = PetscFree(rvalues);CHKERRQ(ierr);
448   ierr = PetscFree(lens);CHKERRQ(ierr);
449   ierr = PetscFree(owner);CHKERRQ(ierr);
450   ierr = PetscFree(nprocs);CHKERRQ(ierr);
451 
452   /* actually zap the local rows */
453   ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
454   ierr = PetscFree(lrows);CHKERRQ(ierr);
455 
456   /* wait on sends */
457   if (nsends) {
458     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
459     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
460     ierr = PetscFree(send_status);CHKERRQ(ierr);
461   }
462   ierr = PetscFree(send_waits);CHKERRQ(ierr);
463   ierr = PetscFree(svalues);CHKERRQ(ierr);
464 
465   PetscFunctionReturn(0);
466 }
467 
468 #undef __FUNCT__
469 #define __FUNCT__ "MatMult_MPIDense"
470 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
471 {
472   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
473   PetscErrorCode ierr;
474 
475   PetscFunctionBegin;
476   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
477   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
478   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
479   PetscFunctionReturn(0);
480 }
481 
482 #undef __FUNCT__
483 #define __FUNCT__ "MatMultAdd_MPIDense"
484 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
485 {
486   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
487   PetscErrorCode ierr;
488 
489   PetscFunctionBegin;
490   ierr = VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
491   ierr = VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
492   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
493   PetscFunctionReturn(0);
494 }
495 
496 #undef __FUNCT__
497 #define __FUNCT__ "MatMultTranspose_MPIDense"
498 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
499 {
500   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
501   PetscErrorCode ierr;
502   PetscScalar    zero = 0.0;
503 
504   PetscFunctionBegin;
505   ierr = VecSet(yy,zero);CHKERRQ(ierr);
506   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
507   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
508   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
509   PetscFunctionReturn(0);
510 }
511 
512 #undef __FUNCT__
513 #define __FUNCT__ "MatMultTransposeAdd_MPIDense"
514 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
515 {
516   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
517   PetscErrorCode ierr;
518 
519   PetscFunctionBegin;
520   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
521   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
522   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
523   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
524   PetscFunctionReturn(0);
525 }
526 
527 #undef __FUNCT__
528 #define __FUNCT__ "MatGetDiagonal_MPIDense"
529 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
530 {
531   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
532   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
533   PetscErrorCode ierr;
534   PetscInt       len,i,n,m = A->rmap.n,radd;
535   PetscScalar    *x,zero = 0.0;
536 
537   PetscFunctionBegin;
538   ierr = VecSet(v,zero);CHKERRQ(ierr);
539   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
540   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
541   if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
542   len  = PetscMin(a->A->rmap.n,a->A->cmap.n);
543   radd = A->rmap.rstart*m;
544   for (i=0; i<len; i++) {
545     x[i] = aloc->v[radd + i*m + i];
546   }
547   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
548   PetscFunctionReturn(0);
549 }
550 
551 #undef __FUNCT__
552 #define __FUNCT__ "MatDestroy_MPIDense"
553 PetscErrorCode MatDestroy_MPIDense(Mat mat)
554 {
555   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
556   PetscErrorCode ierr;
557 
558   PetscFunctionBegin;
559 
560 #if defined(PETSC_USE_LOG)
561   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N);
562 #endif
563   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
564   ierr = MatDestroy(mdn->A);CHKERRQ(ierr);
565   if (mdn->lvec)   {ierr = VecDestroy(mdn->lvec);CHKERRQ(ierr);}
566   if (mdn->Mvctx)  {ierr = VecScatterDestroy(mdn->Mvctx);CHKERRQ(ierr);}
567   if (mdn->factor) {
568     ierr = PetscFree(mdn->factor->temp);CHKERRQ(ierr);
569     ierr = PetscFree(mdn->factor->tag);CHKERRQ(ierr);
570     ierr = PetscFree(mdn->factor->pivots);CHKERRQ(ierr);
571     ierr = PetscFree(mdn->factor);CHKERRQ(ierr);
572   }
573   ierr = PetscFree(mdn);CHKERRQ(ierr);
574   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
575   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
576   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
577   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
578   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
579   PetscFunctionReturn(0);
580 }
581 
582 #undef __FUNCT__
583 #define __FUNCT__ "MatView_MPIDense_Binary"
584 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
585 {
586   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
587   PetscErrorCode ierr;
588 
589   PetscFunctionBegin;
590   if (mdn->size == 1) {
591     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
592   }
593   else SETERRQ(PETSC_ERR_SUP,"Only uniprocessor output supported");
594   PetscFunctionReturn(0);
595 }
596 
597 #undef __FUNCT__
598 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket"
599 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
600 {
601   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
602   PetscErrorCode    ierr;
603   PetscMPIInt       size = mdn->size,rank = mdn->rank;
604   PetscViewerType   vtype;
605   PetscTruth        iascii,isdraw;
606   PetscViewer       sviewer;
607   PetscViewerFormat format;
608 
609   PetscFunctionBegin;
610   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
611   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
612   if (iascii) {
613     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
614     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
615     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
616       MatInfo info;
617       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
618       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap.n,
619                    (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
620       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
621       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
622       PetscFunctionReturn(0);
623     } else if (format == PETSC_VIEWER_ASCII_INFO) {
624       PetscFunctionReturn(0);
625     }
626   } else if (isdraw) {
627     PetscDraw  draw;
628     PetscTruth isnull;
629 
630     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
631     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
632     if (isnull) PetscFunctionReturn(0);
633   }
634 
635   if (size == 1) {
636     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
637   } else {
638     /* assemble the entire matrix onto first processor. */
639     Mat         A;
640     PetscInt    M = mat->rmap.N,N = mat->cmap.N,m,row,i,nz;
641     PetscInt    *cols;
642     PetscScalar *vals;
643 
644     ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr);
645     if (!rank) {
646       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
647     } else {
648       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
649     }
650     /* Since this is a temporary matrix, MATMPIDENSE instead of A->type_name here is probably acceptable. */
651     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
652     ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL);
653     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
654 
655     /* Copy the matrix ... This isn't the most efficient means,
656        but it's quick for now */
657     A->insertmode = INSERT_VALUES;
658     row = mat->rmap.rstart; m = mdn->A->rmap.n;
659     for (i=0; i<m; i++) {
660       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
661       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
662       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
663       row++;
664     }
665 
666     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
667     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
668     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
669     if (!rank) {
670       ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
671     }
672     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
673     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
674     ierr = MatDestroy(A);CHKERRQ(ierr);
675   }
676   PetscFunctionReturn(0);
677 }
678 
679 #undef __FUNCT__
680 #define __FUNCT__ "MatView_MPIDense"
681 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
682 {
683   PetscErrorCode ierr;
684   PetscTruth     iascii,isbinary,isdraw,issocket;
685 
686   PetscFunctionBegin;
687 
688   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
689   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
690   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
691   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
692 
693   if (iascii || issocket || isdraw) {
694     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
695   } else if (isbinary) {
696     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
697   } else {
698     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name);
699   }
700   PetscFunctionReturn(0);
701 }
702 
703 #undef __FUNCT__
704 #define __FUNCT__ "MatGetInfo_MPIDense"
705 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
706 {
707   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
708   Mat            mdn = mat->A;
709   PetscErrorCode ierr;
710   PetscReal      isend[5],irecv[5];
711 
712   PetscFunctionBegin;
713   info->rows_global    = (double)A->rmap.N;
714   info->columns_global = (double)A->cmap.N;
715   info->rows_local     = (double)A->rmap.n;
716   info->columns_local  = (double)A->cmap.N;
717   info->block_size     = 1.0;
718   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
719   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
720   isend[3] = info->memory;  isend[4] = info->mallocs;
721   if (flag == MAT_LOCAL) {
722     info->nz_used      = isend[0];
723     info->nz_allocated = isend[1];
724     info->nz_unneeded  = isend[2];
725     info->memory       = isend[3];
726     info->mallocs      = isend[4];
727   } else if (flag == MAT_GLOBAL_MAX) {
728     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr);
729     info->nz_used      = irecv[0];
730     info->nz_allocated = irecv[1];
731     info->nz_unneeded  = irecv[2];
732     info->memory       = irecv[3];
733     info->mallocs      = irecv[4];
734   } else if (flag == MAT_GLOBAL_SUM) {
735     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr);
736     info->nz_used      = irecv[0];
737     info->nz_allocated = irecv[1];
738     info->nz_unneeded  = irecv[2];
739     info->memory       = irecv[3];
740     info->mallocs      = irecv[4];
741   }
742   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
743   info->fill_ratio_needed = 0;
744   info->factor_mallocs    = 0;
745   PetscFunctionReturn(0);
746 }
747 
748 #undef __FUNCT__
749 #define __FUNCT__ "MatSetOption_MPIDense"
750 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op)
751 {
752   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
753   PetscErrorCode ierr;
754 
755   PetscFunctionBegin;
756   switch (op) {
757   case MAT_NO_NEW_NONZERO_LOCATIONS:
758   case MAT_YES_NEW_NONZERO_LOCATIONS:
759   case MAT_NEW_NONZERO_LOCATION_ERR:
760   case MAT_NEW_NONZERO_ALLOCATION_ERR:
761   case MAT_COLUMNS_SORTED:
762   case MAT_COLUMNS_UNSORTED:
763     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
764     break;
765   case MAT_ROW_ORIENTED:
766     a->roworiented = PETSC_TRUE;
767     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
768     break;
769   case MAT_ROWS_SORTED:
770   case MAT_ROWS_UNSORTED:
771   case MAT_YES_NEW_DIAGONALS:
772   case MAT_USE_HASH_TABLE:
773     ierr = PetscInfo1(A,"Option %d ignored\n",op);CHKERRQ(ierr);
774     break;
775   case MAT_COLUMN_ORIENTED:
776     a->roworiented = PETSC_FALSE;
777     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
778     break;
779   case MAT_IGNORE_OFF_PROC_ENTRIES:
780     a->donotstash = PETSC_TRUE;
781     break;
782   case MAT_NO_NEW_DIAGONALS:
783     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
784   case MAT_SYMMETRIC:
785   case MAT_STRUCTURALLY_SYMMETRIC:
786   case MAT_NOT_SYMMETRIC:
787   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
788   case MAT_HERMITIAN:
789   case MAT_NOT_HERMITIAN:
790   case MAT_SYMMETRY_ETERNAL:
791   case MAT_NOT_SYMMETRY_ETERNAL:
792     break;
793   default:
794     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
795   }
796   PetscFunctionReturn(0);
797 }
798 
799 
800 #undef __FUNCT__
801 #define __FUNCT__ "MatDiagonalScale_MPIDense"
802 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
803 {
804   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
805   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
806   PetscScalar    *l,*r,x,*v;
807   PetscErrorCode ierr;
808   PetscInt       i,j,s2a,s3a,s2,s3,m=mdn->A->rmap.n,n=mdn->A->cmap.n;
809 
810   PetscFunctionBegin;
811   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
812   if (ll) {
813     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
814     if (s2a != s2) SETERRQ2(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
815     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
816     for (i=0; i<m; i++) {
817       x = l[i];
818       v = mat->v + i;
819       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
820     }
821     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
822     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
823   }
824   if (rr) {
825     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
826     if (s3a != s3) SETERRQ2(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
827     ierr = VecScatterBegin(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
828     ierr = VecScatterEnd(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);CHKERRQ(ierr);
829     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
830     for (i=0; i<n; i++) {
831       x = r[i];
832       v = mat->v + i*m;
833       for (j=0; j<m; j++) { (*v++) *= x;}
834     }
835     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
836     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
837   }
838   PetscFunctionReturn(0);
839 }
840 
841 #undef __FUNCT__
842 #define __FUNCT__ "MatNorm_MPIDense"
843 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
844 {
845   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
846   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
847   PetscErrorCode ierr;
848   PetscInt       i,j;
849   PetscReal      sum = 0.0;
850   PetscScalar    *v = mat->v;
851 
852   PetscFunctionBegin;
853   if (mdn->size == 1) {
854     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
855   } else {
856     if (type == NORM_FROBENIUS) {
857       for (i=0; i<mdn->A->cmap.n*mdn->A->rmap.n; i++) {
858 #if defined(PETSC_USE_COMPLEX)
859         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
860 #else
861         sum += (*v)*(*v); v++;
862 #endif
863       }
864       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr);
865       *nrm = sqrt(*nrm);
866       ierr = PetscLogFlops(2*mdn->A->cmap.n*mdn->A->rmap.n);CHKERRQ(ierr);
867     } else if (type == NORM_1) {
868       PetscReal *tmp,*tmp2;
869       ierr = PetscMalloc(2*A->cmap.N*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
870       tmp2 = tmp + A->cmap.N;
871       ierr = PetscMemzero(tmp,2*A->cmap.N*sizeof(PetscReal));CHKERRQ(ierr);
872       *nrm = 0.0;
873       v = mat->v;
874       for (j=0; j<mdn->A->cmap.n; j++) {
875         for (i=0; i<mdn->A->rmap.n; i++) {
876           tmp[j] += PetscAbsScalar(*v);  v++;
877         }
878       }
879       ierr = MPI_Allreduce(tmp,tmp2,A->cmap.N,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr);
880       for (j=0; j<A->cmap.N; j++) {
881         if (tmp2[j] > *nrm) *nrm = tmp2[j];
882       }
883       ierr = PetscFree(tmp);CHKERRQ(ierr);
884       ierr = PetscLogFlops(A->cmap.n*A->rmap.n);CHKERRQ(ierr);
885     } else if (type == NORM_INFINITY) { /* max row norm */
886       PetscReal ntemp;
887       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
888       ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr);
889     } else {
890       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
891     }
892   }
893   PetscFunctionReturn(0);
894 }
895 
896 #undef __FUNCT__
897 #define __FUNCT__ "MatTranspose_MPIDense"
898 PetscErrorCode MatTranspose_MPIDense(Mat A,Mat *matout)
899 {
900   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
901   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
902   Mat            B;
903   PetscInt       M = A->rmap.N,N = A->cmap.N,m,n,*rwork,rstart = A->rmap.rstart;
904   PetscErrorCode ierr;
905   PetscInt       j,i;
906   PetscScalar    *v;
907 
908   PetscFunctionBegin;
909   if (!matout && M != N) {
910     SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place");
911   }
912   ierr = MatCreate(A->comm,&B);CHKERRQ(ierr);
913   ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,N,M);CHKERRQ(ierr);
914   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
915   ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr);
916 
917   m = a->A->rmap.n; n = a->A->cmap.n; v = Aloc->v;
918   ierr = PetscMalloc(n*sizeof(PetscInt),&rwork);CHKERRQ(ierr);
919   for (j=0; j<n; j++) {
920     for (i=0; i<m; i++) rwork[i] = rstart + i;
921     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
922     v   += m;
923   }
924   ierr = PetscFree(rwork);CHKERRQ(ierr);
925   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
926   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
927   if (matout) {
928     *matout = B;
929   } else {
930     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
931   }
932   PetscFunctionReturn(0);
933 }
934 
935 #include "petscblaslapack.h"
936 #undef __FUNCT__
937 #define __FUNCT__ "MatScale_MPIDense"
938 PetscErrorCode MatScale_MPIDense(Mat inA,PetscScalar alpha)
939 {
940   Mat_MPIDense   *A = (Mat_MPIDense*)inA->data;
941   Mat_SeqDense   *a = (Mat_SeqDense*)A->A->data;
942   PetscScalar    oalpha = alpha;
943   PetscBLASInt   one = 1,nz = (PetscBLASInt)inA->rmap.n*inA->cmap.N;
944   PetscErrorCode ierr;
945 
946   PetscFunctionBegin;
947   BLASscal_(&nz,&oalpha,a->v,&one);
948   ierr = PetscLogFlops(nz);CHKERRQ(ierr);
949   PetscFunctionReturn(0);
950 }
951 
952 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
953 
954 #undef __FUNCT__
955 #define __FUNCT__ "MatSetUpPreallocation_MPIDense"
956 PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A)
957 {
958   PetscErrorCode ierr;
959 
960   PetscFunctionBegin;
961   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
962   PetscFunctionReturn(0);
963 }
964 
965 #undef __FUNCT__
966 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIDense"
967 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
968 {
969   PetscErrorCode ierr;
970   PetscInt       m=A->rmap.n,n=B->cmap.n;
971   Mat            Cmat;
972 
973   PetscFunctionBegin;
974   if (A->cmap.n != B->rmap.n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap.n %d != B->rmap.n %d\n",A->cmap.n,B->rmap.n);
975   ierr = MatCreate(B->comm,&Cmat);CHKERRQ(ierr);
976   ierr = MatSetSizes(Cmat,m,n,A->rmap.N,B->cmap.N);CHKERRQ(ierr);
977   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
978   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
979   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
980   ierr = MatAssemblyEnd(Cmatg,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
981   *C = Cmat;
982   PetscFunctionReturn(0);
983 }
984 
985 /* -------------------------------------------------------------------*/
986 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
987        MatGetRow_MPIDense,
988        MatRestoreRow_MPIDense,
989        MatMult_MPIDense,
990 /* 4*/ MatMultAdd_MPIDense,
991        MatMultTranspose_MPIDense,
992        MatMultTransposeAdd_MPIDense,
993        0,
994        0,
995        0,
996 /*10*/ 0,
997        0,
998        0,
999        0,
1000        MatTranspose_MPIDense,
1001 /*15*/ MatGetInfo_MPIDense,
1002        MatEqual_MPIDense,
1003        MatGetDiagonal_MPIDense,
1004        MatDiagonalScale_MPIDense,
1005        MatNorm_MPIDense,
1006 /*20*/ MatAssemblyBegin_MPIDense,
1007        MatAssemblyEnd_MPIDense,
1008        0,
1009        MatSetOption_MPIDense,
1010        MatZeroEntries_MPIDense,
1011 /*25*/ MatZeroRows_MPIDense,
1012        MatLUFactorSymbolic_MPIDense,
1013        0,
1014        MatCholeskyFactorSymbolic_MPIDense,
1015        0,
1016 /*30*/ MatSetUpPreallocation_MPIDense,
1017        0,
1018        0,
1019        MatGetArray_MPIDense,
1020        MatRestoreArray_MPIDense,
1021 /*35*/ MatDuplicate_MPIDense,
1022        0,
1023        0,
1024        0,
1025        0,
1026 /*40*/ 0,
1027        MatGetSubMatrices_MPIDense,
1028        0,
1029        MatGetValues_MPIDense,
1030        0,
1031 /*45*/ 0,
1032        MatScale_MPIDense,
1033        0,
1034        0,
1035        0,
1036 /*50*/ 0,
1037        0,
1038        0,
1039        0,
1040        0,
1041 /*55*/ 0,
1042        0,
1043        0,
1044        0,
1045        0,
1046 /*60*/ MatGetSubMatrix_MPIDense,
1047        MatDestroy_MPIDense,
1048        MatView_MPIDense,
1049        0,
1050        0,
1051 /*65*/ 0,
1052        0,
1053        0,
1054        0,
1055        0,
1056 /*70*/ 0,
1057        0,
1058        0,
1059        0,
1060        0,
1061 /*75*/ 0,
1062        0,
1063        0,
1064        0,
1065        0,
1066 /*80*/ 0,
1067        0,
1068        0,
1069        0,
1070 /*84*/ MatLoad_MPIDense,
1071        0,
1072        0,
1073        0,
1074        0,
1075        0,
1076 /*90*/ 0,
1077        MatMatMultSymbolic_MPIDense_MPIDense,
1078        0,
1079        0,
1080        0,
1081 /*95*/ 0,
1082        0,
1083        0,
1084        0};
1085 
1086 EXTERN_C_BEGIN
1087 #undef __FUNCT__
1088 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense"
1089 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1090 {
1091   Mat_MPIDense   *a;
1092   PetscErrorCode ierr;
1093 
1094   PetscFunctionBegin;
1095   mat->preallocated = PETSC_TRUE;
1096   /* Note:  For now, when data is specified above, this assumes the user correctly
1097    allocates the local dense storage space.  We should add error checking. */
1098 
1099   a    = (Mat_MPIDense*)mat->data;
1100   ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1101   ierr = MatSetSizes(a->A,mat->rmap.n,mat->cmap.N,mat->rmap.n,mat->cmap.N);CHKERRQ(ierr);
1102   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
1103   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1104   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1105   PetscFunctionReturn(0);
1106 }
1107 EXTERN_C_END
1108 
1109 /*MC
1110    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1111 
1112    Options Database Keys:
1113 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions()
1114 
1115   Level: beginner
1116 
1117 .seealso: MatCreateMPIDense
1118 M*/
1119 
1120 EXTERN_C_BEGIN
1121 #undef __FUNCT__
1122 #define __FUNCT__ "MatCreate_MPIDense"
1123 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIDense(Mat mat)
1124 {
1125   Mat_MPIDense   *a;
1126   PetscErrorCode ierr;
1127 
1128   PetscFunctionBegin;
1129   ierr              = PetscNew(Mat_MPIDense,&a);CHKERRQ(ierr);
1130   mat->data         = (void*)a;
1131   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1132   mat->factor       = 0;
1133   mat->mapping      = 0;
1134 
1135   a->factor       = 0;
1136   mat->insertmode = NOT_SET_VALUES;
1137   ierr = MPI_Comm_rank(mat->comm,&a->rank);CHKERRQ(ierr);
1138   ierr = MPI_Comm_size(mat->comm,&a->size);CHKERRQ(ierr);
1139 
1140   mat->rmap.bs = mat->cmap.bs = 1;
1141   ierr = PetscMapInitialize(mat->comm,&mat->rmap);CHKERRQ(ierr);
1142   ierr = PetscMapInitialize(mat->comm,&mat->cmap);CHKERRQ(ierr);
1143   a->nvec = mat->cmap.n;
1144 
1145   /* build cache for off array entries formed */
1146   a->donotstash = PETSC_FALSE;
1147   ierr = MatStashCreate_Private(mat->comm,1,&mat->stash);CHKERRQ(ierr);
1148 
1149   /* stuff used for matrix vector multiply */
1150   a->lvec        = 0;
1151   a->Mvctx       = 0;
1152   a->roworiented = PETSC_TRUE;
1153 
1154   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
1155                                      "MatGetDiagonalBlock_MPIDense",
1156                                      MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr);
1157   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C",
1158                                      "MatMPIDenseSetPreallocation_MPIDense",
1159                                      MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1160   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",
1161                                      "MatMatMult_MPIAIJ_MPIDense",
1162                                       MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1163   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",
1164                                      "MatMatMultSymbolic_MPIAIJ_MPIDense",
1165                                       MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1166   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",
1167                                      "MatMatMultNumeric_MPIAIJ_MPIDense",
1168                                       MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1169   PetscFunctionReturn(0);
1170 }
1171 EXTERN_C_END
1172 
1173 /*MC
1174    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1175 
1176    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1177    and MATMPIDENSE otherwise.
1178 
1179    Options Database Keys:
1180 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1181 
1182   Level: beginner
1183 
1184 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1185 M*/
1186 
1187 EXTERN_C_BEGIN
1188 #undef __FUNCT__
1189 #define __FUNCT__ "MatCreate_Dense"
1190 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_Dense(Mat A)
1191 {
1192   PetscErrorCode ierr;
1193   PetscMPIInt    size;
1194 
1195   PetscFunctionBegin;
1196   ierr = PetscObjectChangeTypeName((PetscObject)A,MATDENSE);CHKERRQ(ierr);
1197   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1198   if (size == 1) {
1199     ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr);
1200   } else {
1201     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
1202   }
1203   PetscFunctionReturn(0);
1204 }
1205 EXTERN_C_END
1206 
1207 #undef __FUNCT__
1208 #define __FUNCT__ "MatMPIDenseSetPreallocation"
1209 /*@C
1210    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1211 
1212    Not collective
1213 
1214    Input Parameters:
1215 .  A - the matrix
1216 -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
1217    to control all matrix memory allocation.
1218 
1219    Notes:
1220    The dense format is fully compatible with standard Fortran 77
1221    storage by columns.
1222 
1223    The data input variable is intended primarily for Fortran programmers
1224    who wish to allocate their own matrix memory space.  Most users should
1225    set data=PETSC_NULL.
1226 
1227    Level: intermediate
1228 
1229 .keywords: matrix,dense, parallel
1230 
1231 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1232 @*/
1233 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data)
1234 {
1235   PetscErrorCode ierr,(*f)(Mat,PetscScalar *);
1236 
1237   PetscFunctionBegin;
1238   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1239   if (f) {
1240     ierr = (*f)(mat,data);CHKERRQ(ierr);
1241   }
1242   PetscFunctionReturn(0);
1243 }
1244 
1245 #undef __FUNCT__
1246 #define __FUNCT__ "MatCreateMPIDense"
1247 /*@C
1248    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
1249 
1250    Collective on MPI_Comm
1251 
1252    Input Parameters:
1253 +  comm - MPI communicator
1254 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1255 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1256 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1257 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1258 -  data - optional location of matrix data.  Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1259    to control all matrix memory allocation.
1260 
1261    Output Parameter:
1262 .  A - the matrix
1263 
1264    Notes:
1265    The dense format is fully compatible with standard Fortran 77
1266    storage by columns.
1267 
1268    The data input variable is intended primarily for Fortran programmers
1269    who wish to allocate their own matrix memory space.  Most users should
1270    set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users).
1271 
1272    The user MUST specify either the local or global matrix dimensions
1273    (possibly both).
1274 
1275    Level: intermediate
1276 
1277 .keywords: matrix,dense, parallel
1278 
1279 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1280 @*/
1281 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1282 {
1283   PetscErrorCode ierr;
1284   PetscMPIInt    size;
1285 
1286   PetscFunctionBegin;
1287   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1288   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1289   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1290   if (size > 1) {
1291     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1292     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1293   } else {
1294     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1295     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1296   }
1297   PetscFunctionReturn(0);
1298 }
1299 
1300 #undef __FUNCT__
1301 #define __FUNCT__ "MatDuplicate_MPIDense"
1302 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1303 {
1304   Mat            mat;
1305   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
1306   PetscErrorCode ierr;
1307 
1308   PetscFunctionBegin;
1309   *newmat       = 0;
1310   ierr = MatCreate(A->comm,&mat);CHKERRQ(ierr);
1311   ierr = MatSetSizes(mat,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N);CHKERRQ(ierr);
1312   ierr = MatSetType(mat,A->type_name);CHKERRQ(ierr);
1313   a                 = (Mat_MPIDense*)mat->data;
1314   ierr              = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1315   mat->factor       = A->factor;
1316   mat->assembled    = PETSC_TRUE;
1317   mat->preallocated = PETSC_TRUE;
1318 
1319   mat->rmap.rstart     = A->rmap.rstart;
1320   mat->rmap.rend       = A->rmap.rend;
1321   a->size              = oldmat->size;
1322   a->rank              = oldmat->rank;
1323   mat->insertmode      = NOT_SET_VALUES;
1324   a->nvec              = oldmat->nvec;
1325   a->donotstash        = oldmat->donotstash;
1326 
1327   ierr = PetscMalloc((a->size+1)*sizeof(PetscInt),&mat->rmap.range);CHKERRQ(ierr);
1328   ierr = PetscMalloc((a->size+1)*sizeof(PetscInt),&mat->cmap.range);CHKERRQ(ierr);
1329   ierr = PetscMemcpy(mat->rmap.range,A->rmap.range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
1330   ierr = PetscMemcpy(mat->cmap.range,A->cmap.range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
1331   ierr = MatStashCreate_Private(A->comm,1,&mat->stash);CHKERRQ(ierr);
1332 
1333   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1334   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1335   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1336   *newmat = mat;
1337   PetscFunctionReturn(0);
1338 }
1339 
1340 #include "petscsys.h"
1341 
1342 #undef __FUNCT__
1343 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile"
1344 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N, MatType type,Mat *newmat)
1345 {
1346   PetscErrorCode ierr;
1347   PetscMPIInt    rank,size;
1348   PetscInt       *rowners,i,m,nz,j;
1349   PetscScalar    *array,*vals,*vals_ptr;
1350   MPI_Status     status;
1351 
1352   PetscFunctionBegin;
1353   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1354   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1355 
1356   /* determine ownership of all rows */
1357   m          = M/size + ((M % size) > rank);
1358   ierr       = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1359   ierr       = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
1360   rowners[0] = 0;
1361   for (i=2; i<=size; i++) {
1362     rowners[i] += rowners[i-1];
1363   }
1364 
1365   ierr = MatCreate(comm,newmat);CHKERRQ(ierr);
1366   ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1367   ierr = MatSetType(*newmat,type);CHKERRQ(ierr);
1368   ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr);
1369   ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr);
1370 
1371   if (!rank) {
1372     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1373 
1374     /* read in my part of the matrix numerical values  */
1375     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1376 
1377     /* insert into matrix-by row (this is why cannot directly read into array */
1378     vals_ptr = vals;
1379     for (i=0; i<m; i++) {
1380       for (j=0; j<N; j++) {
1381         array[i + j*m] = *vals_ptr++;
1382       }
1383     }
1384 
1385     /* read in other processors and ship out */
1386     for (i=1; i<size; i++) {
1387       nz   = (rowners[i+1] - rowners[i])*N;
1388       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1389       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr);
1390     }
1391   } else {
1392     /* receive numeric values */
1393     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1394 
1395     /* receive message of values*/
1396     ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr);
1397 
1398     /* insert into matrix-by row (this is why cannot directly read into array */
1399     vals_ptr = vals;
1400     for (i=0; i<m; i++) {
1401       for (j=0; j<N; j++) {
1402         array[i + j*m] = *vals_ptr++;
1403       }
1404     }
1405   }
1406   ierr = PetscFree(rowners);CHKERRQ(ierr);
1407   ierr = PetscFree(vals);CHKERRQ(ierr);
1408   ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1409   ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1410   PetscFunctionReturn(0);
1411 }
1412 
1413 #undef __FUNCT__
1414 #define __FUNCT__ "MatLoad_MPIDense"
1415 PetscErrorCode MatLoad_MPIDense(PetscViewer viewer, MatType type,Mat *newmat)
1416 {
1417   Mat            A;
1418   PetscScalar    *vals,*svals;
1419   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1420   MPI_Status     status;
1421   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz;
1422   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1423   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1424   PetscInt       i,nz,j,rstart,rend;
1425   int            fd;
1426   PetscErrorCode ierr;
1427 
1428   PetscFunctionBegin;
1429   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1430   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1431   if (!rank) {
1432     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1433     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1434     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1435   }
1436 
1437   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1438   M = header[1]; N = header[2]; nz = header[3];
1439 
1440   /*
1441        Handle case where matrix is stored on disk as a dense matrix
1442   */
1443   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1444     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,type,newmat);CHKERRQ(ierr);
1445     PetscFunctionReturn(0);
1446   }
1447 
1448   /* determine ownership of all rows */
1449   m          = M/size + ((M % size) > rank);
1450   ierr       = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1451   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1452   rowners[0] = 0;
1453   for (i=2; i<=size; i++) {
1454     rowners[i] += rowners[i-1];
1455   }
1456   rstart = rowners[rank];
1457   rend   = rowners[rank+1];
1458 
1459   /* distribute row lengths to all processors */
1460   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&ourlens);CHKERRQ(ierr);
1461   offlens = ourlens + (rend-rstart);
1462   if (!rank) {
1463     ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
1464     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1465     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
1466     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1467     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1468     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1469   } else {
1470     ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1471   }
1472 
1473   if (!rank) {
1474     /* calculate the number of nonzeros on each processor */
1475     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
1476     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1477     for (i=0; i<size; i++) {
1478       for (j=rowners[i]; j< rowners[i+1]; j++) {
1479         procsnz[i] += rowlengths[j];
1480       }
1481     }
1482     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1483 
1484     /* determine max buffer needed and allocate it */
1485     maxnz = 0;
1486     for (i=0; i<size; i++) {
1487       maxnz = PetscMax(maxnz,procsnz[i]);
1488     }
1489     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1490 
1491     /* read in my part of the matrix column indices  */
1492     nz = procsnz[0];
1493     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1494     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1495 
1496     /* read in every one elses and ship off */
1497     for (i=1; i<size; i++) {
1498       nz   = procsnz[i];
1499       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1500       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1501     }
1502     ierr = PetscFree(cols);CHKERRQ(ierr);
1503   } else {
1504     /* determine buffer space needed for message */
1505     nz = 0;
1506     for (i=0; i<m; i++) {
1507       nz += ourlens[i];
1508     }
1509     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1510 
1511     /* receive message of column indices*/
1512     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1513     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1514     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1515   }
1516 
1517   /* loop over local rows, determining number of off diagonal entries */
1518   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
1519   jj = 0;
1520   for (i=0; i<m; i++) {
1521     for (j=0; j<ourlens[i]; j++) {
1522       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1523       jj++;
1524     }
1525   }
1526 
1527   /* create our matrix */
1528   for (i=0; i<m; i++) {
1529     ourlens[i] -= offlens[i];
1530   }
1531   ierr = MatCreate(comm,newmat);CHKERRQ(ierr);
1532   ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1533   ierr = MatSetType(*newmat,type);CHKERRQ(ierr);
1534   ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr);
1535   A = *newmat;
1536   for (i=0; i<m; i++) {
1537     ourlens[i] += offlens[i];
1538   }
1539 
1540   if (!rank) {
1541     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1542 
1543     /* read in my part of the matrix numerical values  */
1544     nz = procsnz[0];
1545     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1546 
1547     /* insert into matrix */
1548     jj      = rstart;
1549     smycols = mycols;
1550     svals   = vals;
1551     for (i=0; i<m; i++) {
1552       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1553       smycols += ourlens[i];
1554       svals   += ourlens[i];
1555       jj++;
1556     }
1557 
1558     /* read in other processors and ship out */
1559     for (i=1; i<size; i++) {
1560       nz   = procsnz[i];
1561       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1562       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1563     }
1564     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1565   } else {
1566     /* receive numeric values */
1567     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1568 
1569     /* receive message of values*/
1570     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1571     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1572     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1573 
1574     /* insert into matrix */
1575     jj      = rstart;
1576     smycols = mycols;
1577     svals   = vals;
1578     for (i=0; i<m; i++) {
1579       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1580       smycols += ourlens[i];
1581       svals   += ourlens[i];
1582       jj++;
1583     }
1584   }
1585   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1586   ierr = PetscFree(vals);CHKERRQ(ierr);
1587   ierr = PetscFree(mycols);CHKERRQ(ierr);
1588   ierr = PetscFree(rowners);CHKERRQ(ierr);
1589 
1590   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1591   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1592   PetscFunctionReturn(0);
1593 }
1594 
1595 #undef __FUNCT__
1596 #define __FUNCT__ "MatEqual_MPIDense"
1597 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscTruth *flag)
1598 {
1599   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
1600   Mat            a,b;
1601   PetscTruth     flg;
1602   PetscErrorCode ierr;
1603 
1604   PetscFunctionBegin;
1605   a = matA->A;
1606   b = matB->A;
1607   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
1608   PetscFunctionReturn(0);
1609 }
1610 
1611 
1612 
1613