xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision ce0a2cd1da0658c2b28aad1be2e2c8e41567bece)
1 #define PETSCMAT_DLL
2 
3 /*
4    Basic functions for basic parallel dense matrices.
5 */
6 
7 #include "src/mat/impls/dense/mpi/mpidense.h"    /*I   "petscmat.h"  I*/
8 
9 #undef __FUNCT__
10 #define __FUNCT__ "MatLUFactorSymbolic_MPIDense"
11 PetscErrorCode MatLUFactorSymbolic_MPIDense(Mat A,IS row,IS col,MatFactorInfo *info,Mat *fact)
12 {
13   PetscErrorCode ierr;
14 
15   PetscFunctionBegin;
16   ierr = MatDuplicate(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr);
17   PetscFunctionReturn(0);
18 }
19 
20 #undef __FUNCT__
21 #define __FUNCT__ "MatCholeskyFactorSymbolic_MPIDense"
22 PetscErrorCode MatCholeskyFactorSymbolic_MPIDense(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
23 {
24   PetscErrorCode ierr;
25 
26   PetscFunctionBegin;
27   ierr = MatDuplicate(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr);
28   PetscFunctionReturn(0);
29 }
30 
31 #undef __FUNCT__
32 #define __FUNCT__ "MatDenseGetLocalMatrix"
33 /*@
34 
35       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
36               matrix that represents the operator. For sequential matrices it returns itself.
37 
38     Input Parameter:
39 .      A - the Seq or MPI dense matrix
40 
41     Output Parameter:
42 .      B - the inner matrix
43 
44     Level: intermediate
45 
46 @*/
47 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
48 {
49   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
50   PetscErrorCode ierr;
51   PetscTruth     flg;
52   PetscMPIInt    size;
53 
54   PetscFunctionBegin;
55   ierr = PetscTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
56   if (!flg) {  /* this check sucks! */
57     ierr = PetscTypeCompare((PetscObject)A,MATDENSE,&flg);CHKERRQ(ierr);
58     if (flg) {
59       ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
60       if (size == 1) flg = PETSC_FALSE;
61     }
62   }
63   if (flg) {
64     *B = mat->A;
65   } else {
66     *B = A;
67   }
68   PetscFunctionReturn(0);
69 }
70 
71 #undef __FUNCT__
72 #define __FUNCT__ "MatGetRow_MPIDense"
73 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
74 {
75   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
76   PetscErrorCode ierr;
77   PetscInt       lrow,rstart = A->rmap.rstart,rend = A->rmap.rend;
78 
79   PetscFunctionBegin;
80   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows")
81   lrow = row - rstart;
82   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr);
83   PetscFunctionReturn(0);
84 }
85 
86 #undef __FUNCT__
87 #define __FUNCT__ "MatRestoreRow_MPIDense"
88 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
89 {
90   PetscErrorCode ierr;
91 
92   PetscFunctionBegin;
93   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
94   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
95   PetscFunctionReturn(0);
96 }
97 
98 EXTERN_C_BEGIN
99 #undef __FUNCT__
100 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense"
101 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B)
102 {
103   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
104   PetscErrorCode ierr;
105   PetscInt       m = A->rmap.n,rstart = A->rmap.rstart;
106   PetscScalar    *array;
107   MPI_Comm       comm;
108 
109   PetscFunctionBegin;
110   if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported.");
111 
112   /* The reuse aspect is not implemented efficiently */
113   if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);}
114 
115   ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
116   ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr);
117   ierr = MatCreate(comm,B);CHKERRQ(ierr);
118   ierr = MatSetSizes(*B,m,m,m,m);CHKERRQ(ierr);
119   ierr = MatSetType(*B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
120   ierr = MatSeqDenseSetPreallocation(*B,array+m*rstart);CHKERRQ(ierr);
121   ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr);
122   ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
123   ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
124 
125   *iscopy = PETSC_TRUE;
126   PetscFunctionReturn(0);
127 }
128 EXTERN_C_END
129 
130 #undef __FUNCT__
131 #define __FUNCT__ "MatSetValues_MPIDense"
132 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
133 {
134   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
135   PetscErrorCode ierr;
136   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend,row;
137   PetscTruth     roworiented = A->roworiented;
138 
139   PetscFunctionBegin;
140   for (i=0; i<m; i++) {
141     if (idxm[i] < 0) continue;
142     if (idxm[i] >= mat->rmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
143     if (idxm[i] >= rstart && idxm[i] < rend) {
144       row = idxm[i] - rstart;
145       if (roworiented) {
146         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
147       } else {
148         for (j=0; j<n; j++) {
149           if (idxn[j] < 0) continue;
150           if (idxn[j] >= mat->cmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
151           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
152         }
153       }
154     } else {
155       if (!A->donotstash) {
156         if (roworiented) {
157           ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr);
158         } else {
159           ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr);
160         }
161       }
162     }
163   }
164   PetscFunctionReturn(0);
165 }
166 
167 #undef __FUNCT__
168 #define __FUNCT__ "MatGetValues_MPIDense"
169 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
170 {
171   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
172   PetscErrorCode ierr;
173   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend,row;
174 
175   PetscFunctionBegin;
176   for (i=0; i<m; i++) {
177     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
178     if (idxm[i] >= mat->rmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
179     if (idxm[i] >= rstart && idxm[i] < rend) {
180       row = idxm[i] - rstart;
181       for (j=0; j<n; j++) {
182         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
183         if (idxn[j] >= mat->cmap.N) {
184           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
185         }
186         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
187       }
188     } else {
189       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
190     }
191   }
192   PetscFunctionReturn(0);
193 }
194 
195 #undef __FUNCT__
196 #define __FUNCT__ "MatGetArray_MPIDense"
197 PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[])
198 {
199   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
200   PetscErrorCode ierr;
201 
202   PetscFunctionBegin;
203   ierr = MatGetArray(a->A,array);CHKERRQ(ierr);
204   PetscFunctionReturn(0);
205 }
206 
207 #undef __FUNCT__
208 #define __FUNCT__ "MatGetSubMatrix_MPIDense"
209 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B)
210 {
211   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data,*newmatd;
212   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
213   PetscErrorCode ierr;
214   PetscInt       i,j,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols;
215   PetscScalar    *av,*bv,*v = lmat->v;
216   Mat            newmat;
217 
218   PetscFunctionBegin;
219   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
220   ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr);
221   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
222   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
223 
224   /* No parallel redistribution currently supported! Should really check each index set
225      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
226      original matrix! */
227 
228   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
229   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
230 
231   /* Check submatrix call */
232   if (scall == MAT_REUSE_MATRIX) {
233     /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
234     /* Really need to test rows and column sizes! */
235     newmat = *B;
236   } else {
237     /* Create and fill new matrix */
238     ierr = MatCreate(((PetscObject)A)->comm,&newmat);CHKERRQ(ierr);
239     ierr = MatSetSizes(newmat,nrows,cs,PETSC_DECIDE,ncols);CHKERRQ(ierr);
240     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
241     ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr);
242   }
243 
244   /* Now extract the data pointers and do the copy, column at a time */
245   newmatd = (Mat_MPIDense*)newmat->data;
246   bv      = ((Mat_SeqDense *)newmatd->A->data)->v;
247 
248   for (i=0; i<ncols; i++) {
249     av = v + nlrows*icol[i];
250     for (j=0; j<nrows; j++) {
251       *bv++ = av[irow[j] - rstart];
252     }
253   }
254 
255   /* Assemble the matrices so that the correct flags are set */
256   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
257   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
258 
259   /* Free work space */
260   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
261   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
262   *B = newmat;
263   PetscFunctionReturn(0);
264 }
265 
266 #undef __FUNCT__
267 #define __FUNCT__ "MatRestoreArray_MPIDense"
268 PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[])
269 {
270   PetscFunctionBegin;
271   PetscFunctionReturn(0);
272 }
273 
274 #undef __FUNCT__
275 #define __FUNCT__ "MatAssemblyBegin_MPIDense"
276 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
277 {
278   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
279   MPI_Comm       comm = ((PetscObject)mat)->comm;
280   PetscErrorCode ierr;
281   PetscInt       nstash,reallocs;
282   InsertMode     addv;
283 
284   PetscFunctionBegin;
285   /* make sure all processors are either in INSERTMODE or ADDMODE */
286   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
287   if (addv == (ADD_VALUES|INSERT_VALUES)) {
288     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
289   }
290   mat->insertmode = addv; /* in case this processor had no cache */
291 
292   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap.range);CHKERRQ(ierr);
293   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
294   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
295   PetscFunctionReturn(0);
296 }
297 
298 #undef __FUNCT__
299 #define __FUNCT__ "MatAssemblyEnd_MPIDense"
300 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
301 {
302   Mat_MPIDense    *mdn=(Mat_MPIDense*)mat->data;
303   PetscErrorCode  ierr;
304   PetscInt        i,*row,*col,flg,j,rstart,ncols;
305   PetscMPIInt     n;
306   PetscScalar     *val;
307   InsertMode      addv=mat->insertmode;
308 
309   PetscFunctionBegin;
310   /*  wait on receives */
311   while (1) {
312     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
313     if (!flg) break;
314 
315     for (i=0; i<n;) {
316       /* Now identify the consecutive vals belonging to the same row */
317       for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
318       if (j < n) ncols = j-i;
319       else       ncols = n-i;
320       /* Now assemble all these values with a single function call */
321       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
322       i = j;
323     }
324   }
325   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
326 
327   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
328   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
329 
330   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
331     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
332   }
333   PetscFunctionReturn(0);
334 }
335 
336 #undef __FUNCT__
337 #define __FUNCT__ "MatZeroEntries_MPIDense"
338 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
339 {
340   PetscErrorCode ierr;
341   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
342 
343   PetscFunctionBegin;
344   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
345   PetscFunctionReturn(0);
346 }
347 
348 /* the code does not do the diagonal entries correctly unless the
349    matrix is square and the column and row owerships are identical.
350    This is a BUG. The only way to fix it seems to be to access
351    mdn->A and mdn->B directly and not through the MatZeroRows()
352    routine.
353 */
354 #undef __FUNCT__
355 #define __FUNCT__ "MatZeroRows_MPIDense"
356 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
357 {
358   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
359   PetscErrorCode ierr;
360   PetscInt       i,*owners = A->rmap.range;
361   PetscInt       *nprocs,j,idx,nsends;
362   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
363   PetscInt       *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source;
364   PetscInt       *lens,*lrows,*values;
365   PetscMPIInt    n,imdex,rank = l->rank,size = l->size;
366   MPI_Comm       comm = ((PetscObject)A)->comm;
367   MPI_Request    *send_waits,*recv_waits;
368   MPI_Status     recv_status,*send_status;
369   PetscTruth     found;
370 
371   PetscFunctionBegin;
372   /*  first count number of contributors to each processor */
373   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
374   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
375   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
376   for (i=0; i<N; i++) {
377     idx = rows[i];
378     found = PETSC_FALSE;
379     for (j=0; j<size; j++) {
380       if (idx >= owners[j] && idx < owners[j+1]) {
381         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
382       }
383     }
384     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
385   }
386   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
387 
388   /* inform other processors of number of messages and max length*/
389   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
390 
391   /* post receives:   */
392   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
393   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
394   for (i=0; i<nrecvs; i++) {
395     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
396   }
397 
398   /* do sends:
399       1) starts[i] gives the starting index in svalues for stuff going to
400          the ith processor
401   */
402   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
403   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
404   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
405   starts[0]  = 0;
406   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
407   for (i=0; i<N; i++) {
408     svalues[starts[owner[i]]++] = rows[i];
409   }
410 
411   starts[0] = 0;
412   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
413   count = 0;
414   for (i=0; i<size; i++) {
415     if (nprocs[2*i+1]) {
416       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
417     }
418   }
419   ierr = PetscFree(starts);CHKERRQ(ierr);
420 
421   base = owners[rank];
422 
423   /*  wait on receives */
424   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
425   source = lens + nrecvs;
426   count  = nrecvs; slen = 0;
427   while (count) {
428     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
429     /* unpack receives into our local space */
430     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
431     source[imdex]  = recv_status.MPI_SOURCE;
432     lens[imdex]    = n;
433     slen += n;
434     count--;
435   }
436   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
437 
438   /* move the data into the send scatter */
439   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
440   count = 0;
441   for (i=0; i<nrecvs; i++) {
442     values = rvalues + i*nmax;
443     for (j=0; j<lens[i]; j++) {
444       lrows[count++] = values[j] - base;
445     }
446   }
447   ierr = PetscFree(rvalues);CHKERRQ(ierr);
448   ierr = PetscFree(lens);CHKERRQ(ierr);
449   ierr = PetscFree(owner);CHKERRQ(ierr);
450   ierr = PetscFree(nprocs);CHKERRQ(ierr);
451 
452   /* actually zap the local rows */
453   ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
454   ierr = PetscFree(lrows);CHKERRQ(ierr);
455 
456   /* wait on sends */
457   if (nsends) {
458     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
459     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
460     ierr = PetscFree(send_status);CHKERRQ(ierr);
461   }
462   ierr = PetscFree(send_waits);CHKERRQ(ierr);
463   ierr = PetscFree(svalues);CHKERRQ(ierr);
464 
465   PetscFunctionReturn(0);
466 }
467 
468 #undef __FUNCT__
469 #define __FUNCT__ "MatMult_MPIDense"
470 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
471 {
472   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
473   PetscErrorCode ierr;
474 
475   PetscFunctionBegin;
476   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
477   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
478   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
479   PetscFunctionReturn(0);
480 }
481 
482 #undef __FUNCT__
483 #define __FUNCT__ "MatMultAdd_MPIDense"
484 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
485 {
486   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
487   PetscErrorCode ierr;
488 
489   PetscFunctionBegin;
490   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
491   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
492   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
493   PetscFunctionReturn(0);
494 }
495 
496 #undef __FUNCT__
497 #define __FUNCT__ "MatMultTranspose_MPIDense"
498 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
499 {
500   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
501   PetscErrorCode ierr;
502   PetscScalar    zero = 0.0;
503 
504   PetscFunctionBegin;
505   ierr = VecSet(yy,zero);CHKERRQ(ierr);
506   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
507   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
508   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
509   PetscFunctionReturn(0);
510 }
511 
512 #undef __FUNCT__
513 #define __FUNCT__ "MatMultTransposeAdd_MPIDense"
514 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
515 {
516   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
517   PetscErrorCode ierr;
518 
519   PetscFunctionBegin;
520   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
521   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
522   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
523   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
524   PetscFunctionReturn(0);
525 }
526 
527 #undef __FUNCT__
528 #define __FUNCT__ "MatGetDiagonal_MPIDense"
529 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
530 {
531   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
532   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
533   PetscErrorCode ierr;
534   PetscInt       len,i,n,m = A->rmap.n,radd;
535   PetscScalar    *x,zero = 0.0;
536 
537   PetscFunctionBegin;
538   ierr = VecSet(v,zero);CHKERRQ(ierr);
539   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
540   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
541   if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
542   len  = PetscMin(a->A->rmap.n,a->A->cmap.n);
543   radd = A->rmap.rstart*m;
544   for (i=0; i<len; i++) {
545     x[i] = aloc->v[radd + i*m + i];
546   }
547   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
548   PetscFunctionReturn(0);
549 }
550 
551 #undef __FUNCT__
552 #define __FUNCT__ "MatDestroy_MPIDense"
553 PetscErrorCode MatDestroy_MPIDense(Mat mat)
554 {
555   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
556   PetscErrorCode ierr;
557 
558   PetscFunctionBegin;
559 
560 #if defined(PETSC_USE_LOG)
561   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N);
562 #endif
563   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
564   ierr = MatDestroy(mdn->A);CHKERRQ(ierr);
565   if (mdn->lvec)   {ierr = VecDestroy(mdn->lvec);CHKERRQ(ierr);}
566   if (mdn->Mvctx)  {ierr = VecScatterDestroy(mdn->Mvctx);CHKERRQ(ierr);}
567   if (mdn->factor) {
568     ierr = PetscFree(mdn->factor->temp);CHKERRQ(ierr);
569     ierr = PetscFree(mdn->factor->tag);CHKERRQ(ierr);
570     ierr = PetscFree(mdn->factor->pivots);CHKERRQ(ierr);
571     ierr = PetscFree(mdn->factor);CHKERRQ(ierr);
572   }
573   ierr = PetscFree(mdn);CHKERRQ(ierr);
574   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
575   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
576   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
577   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
578   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
579   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
580   PetscFunctionReturn(0);
581 }
582 
583 #undef __FUNCT__
584 #define __FUNCT__ "MatView_MPIDense_Binary"
585 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
586 {
587   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
588   PetscErrorCode    ierr;
589   PetscViewerFormat format;
590   int               fd;
591   PetscInt          header[4],mmax,N = mat->cmap.N,i,j,m,k;
592   PetscMPIInt       rank,tag  = ((PetscObject)viewer)->tag,size;
593   PetscScalar       *work,*v,*vv;
594   Mat_SeqDense      *a = (Mat_SeqDense*)mdn->A->data;
595   MPI_Status        status;
596 
597   PetscFunctionBegin;
598   if (mdn->size == 1) {
599     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
600   } else {
601     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
602     ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
603     ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
604 
605     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
606     if (format == PETSC_VIEWER_BINARY_NATIVE) {
607 
608       if (!rank) {
609 	/* store the matrix as a dense matrix */
610 	header[0] = MAT_FILE_COOKIE;
611 	header[1] = mat->rmap.N;
612 	header[2] = N;
613 	header[3] = MATRIX_BINARY_FORMAT_DENSE;
614 	ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
615 
616 	/* get largest work array needed for transposing array */
617         mmax = mat->rmap.n;
618         for (i=1; i<size; i++) {
619           mmax = PetscMax(mmax,mat->rmap.range[i+1] - mat->rmap.range[i]);
620         }
621 	ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&work);CHKERRQ(ierr);
622 
623 	/* write out local array, by rows */
624         m    = mat->rmap.n;
625 	v    = a->v;
626         for (j=0; j<N; j++) {
627           for (i=0; i<m; i++) {
628 	    work[j + i*N] = *v++;
629 	  }
630 	}
631 	ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
632         /* get largest work array to receive messages from other processes, excludes process zero */
633         mmax = 0;
634         for (i=1; i<size; i++) {
635           mmax = PetscMax(mmax,mat->rmap.range[i+1] - mat->rmap.range[i]);
636         }
637 	ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&vv);CHKERRQ(ierr);
638         v = vv;
639         for (k=1; k<size; k++) {
640           m    = mat->rmap.range[k+1] - mat->rmap.range[k];
641           ierr = MPI_Recv(v,m*N,MPIU_SCALAR,k,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
642 
643           for (j=0; j<N; j++) {
644             for (i=0; i<m; i++) {
645               work[j + i*N] = *v++;
646 	    }
647 	  }
648 	  ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
649         }
650         ierr = PetscFree(work);CHKERRQ(ierr);
651         ierr = PetscFree(vv);CHKERRQ(ierr);
652       } else {
653         ierr = MPI_Send(a->v,mat->rmap.n*mat->cmap.N,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
654       }
655     }
656   }
657   PetscFunctionReturn(0);
658 }
659 
660 #undef __FUNCT__
661 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket"
662 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
663 {
664   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
665   PetscErrorCode    ierr;
666   PetscMPIInt       size = mdn->size,rank = mdn->rank;
667   PetscViewerType   vtype;
668   PetscTruth        iascii,isdraw;
669   PetscViewer       sviewer;
670   PetscViewerFormat format;
671 
672   PetscFunctionBegin;
673   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
674   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
675   if (iascii) {
676     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
677     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
678     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
679       MatInfo info;
680       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
681       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap.n,
682                    (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
683       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
684       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
685       PetscFunctionReturn(0);
686     } else if (format == PETSC_VIEWER_ASCII_INFO) {
687       PetscFunctionReturn(0);
688     }
689   } else if (isdraw) {
690     PetscDraw  draw;
691     PetscTruth isnull;
692 
693     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
694     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
695     if (isnull) PetscFunctionReturn(0);
696   }
697 
698   if (size == 1) {
699     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
700   } else {
701     /* assemble the entire matrix onto first processor. */
702     Mat         A;
703     PetscInt    M = mat->rmap.N,N = mat->cmap.N,m,row,i,nz;
704     PetscInt    *cols;
705     PetscScalar *vals;
706 
707     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
708     if (!rank) {
709       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
710     } else {
711       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
712     }
713     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
714     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
715     ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL);
716     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
717 
718     /* Copy the matrix ... This isn't the most efficient means,
719        but it's quick for now */
720     A->insertmode = INSERT_VALUES;
721     row = mat->rmap.rstart; m = mdn->A->rmap.n;
722     for (i=0; i<m; i++) {
723       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
724       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
725       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
726       row++;
727     }
728 
729     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
730     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
731     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
732     if (!rank) {
733       ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
734     }
735     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
736     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
737     ierr = MatDestroy(A);CHKERRQ(ierr);
738   }
739   PetscFunctionReturn(0);
740 }
741 
742 #undef __FUNCT__
743 #define __FUNCT__ "MatView_MPIDense"
744 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
745 {
746   PetscErrorCode ierr;
747   PetscTruth     iascii,isbinary,isdraw,issocket;
748 
749   PetscFunctionBegin;
750 
751   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
752   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
753   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
754   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
755 
756   if (iascii || issocket || isdraw) {
757     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
758   } else if (isbinary) {
759     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
760   } else {
761     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name);
762   }
763   PetscFunctionReturn(0);
764 }
765 
766 #undef __FUNCT__
767 #define __FUNCT__ "MatGetInfo_MPIDense"
768 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
769 {
770   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
771   Mat            mdn = mat->A;
772   PetscErrorCode ierr;
773   PetscReal      isend[5],irecv[5];
774 
775   PetscFunctionBegin;
776   info->rows_global    = (double)A->rmap.N;
777   info->columns_global = (double)A->cmap.N;
778   info->rows_local     = (double)A->rmap.n;
779   info->columns_local  = (double)A->cmap.N;
780   info->block_size     = 1.0;
781   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
782   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
783   isend[3] = info->memory;  isend[4] = info->mallocs;
784   if (flag == MAT_LOCAL) {
785     info->nz_used      = isend[0];
786     info->nz_allocated = isend[1];
787     info->nz_unneeded  = isend[2];
788     info->memory       = isend[3];
789     info->mallocs      = isend[4];
790   } else if (flag == MAT_GLOBAL_MAX) {
791     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr);
792     info->nz_used      = irecv[0];
793     info->nz_allocated = irecv[1];
794     info->nz_unneeded  = irecv[2];
795     info->memory       = irecv[3];
796     info->mallocs      = irecv[4];
797   } else if (flag == MAT_GLOBAL_SUM) {
798     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
799     info->nz_used      = irecv[0];
800     info->nz_allocated = irecv[1];
801     info->nz_unneeded  = irecv[2];
802     info->memory       = irecv[3];
803     info->mallocs      = irecv[4];
804   }
805   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
806   info->fill_ratio_needed = 0;
807   info->factor_mallocs    = 0;
808   PetscFunctionReturn(0);
809 }
810 
811 #undef __FUNCT__
812 #define __FUNCT__ "MatSetOption_MPIDense"
813 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscTruth flg)
814 {
815   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
816   PetscErrorCode ierr;
817 
818   PetscFunctionBegin;
819   switch (op) {
820   case MAT_NEW_NONZERO_LOCATIONS:
821   case MAT_NEW_NONZERO_LOCATION_ERR:
822   case MAT_NEW_NONZERO_ALLOCATION_ERR:
823     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
824     break;
825   case MAT_ROW_ORIENTED:
826     a->roworiented = flg;
827     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
828     break;
829   case MAT_NEW_DIAGONALS:
830   case MAT_USE_HASH_TABLE:
831     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
832     break;
833   case MAT_IGNORE_OFF_PROC_ENTRIES:
834     a->donotstash = flg;
835     break;
836   case MAT_SYMMETRIC:
837   case MAT_STRUCTURALLY_SYMMETRIC:
838   case MAT_HERMITIAN:
839   case MAT_SYMMETRY_ETERNAL:
840     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
841     break;
842   default:
843     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
844   }
845   PetscFunctionReturn(0);
846 }
847 
848 
849 #undef __FUNCT__
850 #define __FUNCT__ "MatDiagonalScale_MPIDense"
851 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
852 {
853   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
854   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
855   PetscScalar    *l,*r,x,*v;
856   PetscErrorCode ierr;
857   PetscInt       i,j,s2a,s3a,s2,s3,m=mdn->A->rmap.n,n=mdn->A->cmap.n;
858 
859   PetscFunctionBegin;
860   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
861   if (ll) {
862     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
863     if (s2a != s2) SETERRQ2(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
864     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
865     for (i=0; i<m; i++) {
866       x = l[i];
867       v = mat->v + i;
868       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
869     }
870     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
871     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
872   }
873   if (rr) {
874     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
875     if (s3a != s3) SETERRQ2(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
876     ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
877     ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
878     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
879     for (i=0; i<n; i++) {
880       x = r[i];
881       v = mat->v + i*m;
882       for (j=0; j<m; j++) { (*v++) *= x;}
883     }
884     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
885     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
886   }
887   PetscFunctionReturn(0);
888 }
889 
890 #undef __FUNCT__
891 #define __FUNCT__ "MatNorm_MPIDense"
892 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
893 {
894   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
895   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
896   PetscErrorCode ierr;
897   PetscInt       i,j;
898   PetscReal      sum = 0.0;
899   PetscScalar    *v = mat->v;
900 
901   PetscFunctionBegin;
902   if (mdn->size == 1) {
903     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
904   } else {
905     if (type == NORM_FROBENIUS) {
906       for (i=0; i<mdn->A->cmap.n*mdn->A->rmap.n; i++) {
907 #if defined(PETSC_USE_COMPLEX)
908         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
909 #else
910         sum += (*v)*(*v); v++;
911 #endif
912       }
913       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
914       *nrm = sqrt(*nrm);
915       ierr = PetscLogFlops(2*mdn->A->cmap.n*mdn->A->rmap.n);CHKERRQ(ierr);
916     } else if (type == NORM_1) {
917       PetscReal *tmp,*tmp2;
918       ierr = PetscMalloc(2*A->cmap.N*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
919       tmp2 = tmp + A->cmap.N;
920       ierr = PetscMemzero(tmp,2*A->cmap.N*sizeof(PetscReal));CHKERRQ(ierr);
921       *nrm = 0.0;
922       v = mat->v;
923       for (j=0; j<mdn->A->cmap.n; j++) {
924         for (i=0; i<mdn->A->rmap.n; i++) {
925           tmp[j] += PetscAbsScalar(*v);  v++;
926         }
927       }
928       ierr = MPI_Allreduce(tmp,tmp2,A->cmap.N,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
929       for (j=0; j<A->cmap.N; j++) {
930         if (tmp2[j] > *nrm) *nrm = tmp2[j];
931       }
932       ierr = PetscFree(tmp);CHKERRQ(ierr);
933       ierr = PetscLogFlops(A->cmap.n*A->rmap.n);CHKERRQ(ierr);
934     } else if (type == NORM_INFINITY) { /* max row norm */
935       PetscReal ntemp;
936       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
937       ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr);
938     } else {
939       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
940     }
941   }
942   PetscFunctionReturn(0);
943 }
944 
945 #undef __FUNCT__
946 #define __FUNCT__ "MatTranspose_MPIDense"
947 PetscErrorCode MatTranspose_MPIDense(Mat A,Mat *matout)
948 {
949   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
950   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
951   Mat            B;
952   PetscInt       M = A->rmap.N,N = A->cmap.N,m,n,*rwork,rstart = A->rmap.rstart;
953   PetscErrorCode ierr;
954   PetscInt       j,i;
955   PetscScalar    *v;
956 
957   PetscFunctionBegin;
958   if (!matout && M != N) {
959     SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place");
960   }
961   ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
962   ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,N,M);CHKERRQ(ierr);
963   ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
964   ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr);
965 
966   m = a->A->rmap.n; n = a->A->cmap.n; v = Aloc->v;
967   ierr = PetscMalloc(m*sizeof(PetscInt),&rwork);CHKERRQ(ierr);
968   for (i=0; i<m; i++) rwork[i] = rstart + i;
969   for (j=0; j<n; j++) {
970     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
971     v   += m;
972   }
973   ierr = PetscFree(rwork);CHKERRQ(ierr);
974   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
975   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
976   if (matout) {
977     *matout = B;
978   } else {
979     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
980   }
981   PetscFunctionReturn(0);
982 }
983 
984 #include "petscblaslapack.h"
985 #undef __FUNCT__
986 #define __FUNCT__ "MatScale_MPIDense"
987 PetscErrorCode MatScale_MPIDense(Mat inA,PetscScalar alpha)
988 {
989   Mat_MPIDense   *A = (Mat_MPIDense*)inA->data;
990   Mat_SeqDense   *a = (Mat_SeqDense*)A->A->data;
991   PetscScalar    oalpha = alpha;
992   PetscErrorCode ierr;
993   PetscBLASInt   one = 1,nz = PetscBLASIntCast(inA->rmap.n*inA->cmap.N);
994 
995   PetscFunctionBegin;
996   BLASscal_(&nz,&oalpha,a->v,&one);
997   ierr = PetscLogFlops(nz);CHKERRQ(ierr);
998   PetscFunctionReturn(0);
999 }
1000 
1001 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
1002 
1003 #undef __FUNCT__
1004 #define __FUNCT__ "MatSetUpPreallocation_MPIDense"
1005 PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A)
1006 {
1007   PetscErrorCode ierr;
1008 
1009   PetscFunctionBegin;
1010   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
1011   PetscFunctionReturn(0);
1012 }
1013 
1014 #undef __FUNCT__
1015 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIDense"
1016 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1017 {
1018   PetscErrorCode ierr;
1019   PetscInt       m=A->rmap.n,n=B->cmap.n;
1020   Mat            Cmat;
1021 
1022   PetscFunctionBegin;
1023   if (A->cmap.n != B->rmap.n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap.n %d != B->rmap.n %d\n",A->cmap.n,B->rmap.n);
1024   ierr = MatCreate(((PetscObject)B)->comm,&Cmat);CHKERRQ(ierr);
1025   ierr = MatSetSizes(Cmat,m,n,A->rmap.N,B->cmap.N);CHKERRQ(ierr);
1026   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
1027   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
1028   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1029   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1030   *C = Cmat;
1031   PetscFunctionReturn(0);
1032 }
1033 
1034 /* -------------------------------------------------------------------*/
1035 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
1036        MatGetRow_MPIDense,
1037        MatRestoreRow_MPIDense,
1038        MatMult_MPIDense,
1039 /* 4*/ MatMultAdd_MPIDense,
1040        MatMultTranspose_MPIDense,
1041        MatMultTransposeAdd_MPIDense,
1042        0,
1043        0,
1044        0,
1045 /*10*/ 0,
1046        0,
1047        0,
1048        0,
1049        MatTranspose_MPIDense,
1050 /*15*/ MatGetInfo_MPIDense,
1051        MatEqual_MPIDense,
1052        MatGetDiagonal_MPIDense,
1053        MatDiagonalScale_MPIDense,
1054        MatNorm_MPIDense,
1055 /*20*/ MatAssemblyBegin_MPIDense,
1056        MatAssemblyEnd_MPIDense,
1057        0,
1058        MatSetOption_MPIDense,
1059        MatZeroEntries_MPIDense,
1060 /*25*/ MatZeroRows_MPIDense,
1061        MatLUFactorSymbolic_MPIDense,
1062        0,
1063        MatCholeskyFactorSymbolic_MPIDense,
1064        0,
1065 /*30*/ MatSetUpPreallocation_MPIDense,
1066        0,
1067        0,
1068        MatGetArray_MPIDense,
1069        MatRestoreArray_MPIDense,
1070 /*35*/ MatDuplicate_MPIDense,
1071        0,
1072        0,
1073        0,
1074        0,
1075 /*40*/ 0,
1076        MatGetSubMatrices_MPIDense,
1077        0,
1078        MatGetValues_MPIDense,
1079        0,
1080 /*45*/ 0,
1081        MatScale_MPIDense,
1082        0,
1083        0,
1084        0,
1085 /*50*/ 0,
1086        0,
1087        0,
1088        0,
1089        0,
1090 /*55*/ 0,
1091        0,
1092        0,
1093        0,
1094        0,
1095 /*60*/ MatGetSubMatrix_MPIDense,
1096        MatDestroy_MPIDense,
1097        MatView_MPIDense,
1098        0,
1099        0,
1100 /*65*/ 0,
1101        0,
1102        0,
1103        0,
1104        0,
1105 /*70*/ 0,
1106        0,
1107        0,
1108        0,
1109        0,
1110 /*75*/ 0,
1111        0,
1112        0,
1113        0,
1114        0,
1115 /*80*/ 0,
1116        0,
1117        0,
1118        0,
1119 /*84*/ MatLoad_MPIDense,
1120        0,
1121        0,
1122        0,
1123        0,
1124        0,
1125 /*90*/ 0,
1126        MatMatMultSymbolic_MPIDense_MPIDense,
1127        0,
1128        0,
1129        0,
1130 /*95*/ 0,
1131        0,
1132        0,
1133        0};
1134 
1135 EXTERN_C_BEGIN
1136 #undef __FUNCT__
1137 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense"
1138 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1139 {
1140   Mat_MPIDense   *a;
1141   PetscErrorCode ierr;
1142 
1143   PetscFunctionBegin;
1144   mat->preallocated = PETSC_TRUE;
1145   /* Note:  For now, when data is specified above, this assumes the user correctly
1146    allocates the local dense storage space.  We should add error checking. */
1147 
1148   a    = (Mat_MPIDense*)mat->data;
1149   ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1150   ierr = MatSetSizes(a->A,mat->rmap.n,mat->cmap.N,mat->rmap.n,mat->cmap.N);CHKERRQ(ierr);
1151   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
1152   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1153   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1154   PetscFunctionReturn(0);
1155 }
1156 EXTERN_C_END
1157 
1158 /*MC
1159    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1160 
1161    Options Database Keys:
1162 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions()
1163 
1164   Level: beginner
1165 
1166 .seealso: MatCreateMPIDense
1167 M*/
1168 
1169 EXTERN_C_BEGIN
1170 #undef __FUNCT__
1171 #define __FUNCT__ "MatCreate_MPIDense"
1172 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIDense(Mat mat)
1173 {
1174   Mat_MPIDense   *a;
1175   PetscErrorCode ierr;
1176 
1177   PetscFunctionBegin;
1178   ierr              = PetscNewLog(mat,Mat_MPIDense,&a);CHKERRQ(ierr);
1179   mat->data         = (void*)a;
1180   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1181   mat->factor       = 0;
1182   mat->mapping      = 0;
1183 
1184   a->factor       = 0;
1185   mat->insertmode = NOT_SET_VALUES;
1186   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&a->rank);CHKERRQ(ierr);
1187   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&a->size);CHKERRQ(ierr);
1188 
1189   mat->rmap.bs = mat->cmap.bs = 1;
1190   ierr = PetscMapSetUp(&mat->rmap);CHKERRQ(ierr);
1191   ierr = PetscMapSetUp(&mat->cmap);CHKERRQ(ierr);
1192   a->nvec = mat->cmap.n;
1193 
1194   /* build cache for off array entries formed */
1195   a->donotstash = PETSC_FALSE;
1196   ierr = MatStashCreate_Private(((PetscObject)mat)->comm,1,&mat->stash);CHKERRQ(ierr);
1197 
1198   /* stuff used for matrix vector multiply */
1199   a->lvec        = 0;
1200   a->Mvctx       = 0;
1201   a->roworiented = PETSC_TRUE;
1202 
1203   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
1204                                      "MatGetDiagonalBlock_MPIDense",
1205                                      MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr);
1206   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C",
1207                                      "MatMPIDenseSetPreallocation_MPIDense",
1208                                      MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1209   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",
1210                                      "MatMatMult_MPIAIJ_MPIDense",
1211                                       MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1212   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",
1213                                      "MatMatMultSymbolic_MPIAIJ_MPIDense",
1214                                       MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1215   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",
1216                                      "MatMatMultNumeric_MPIAIJ_MPIDense",
1217                                       MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1218   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1219   PetscFunctionReturn(0);
1220 }
1221 EXTERN_C_END
1222 
1223 /*MC
1224    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1225 
1226    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1227    and MATMPIDENSE otherwise.
1228 
1229    Options Database Keys:
1230 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1231 
1232   Level: beginner
1233 
1234 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1235 M*/
1236 
1237 EXTERN_C_BEGIN
1238 #undef __FUNCT__
1239 #define __FUNCT__ "MatCreate_Dense"
1240 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_Dense(Mat A)
1241 {
1242   PetscErrorCode ierr;
1243   PetscMPIInt    size;
1244 
1245   PetscFunctionBegin;
1246   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
1247   if (size == 1) {
1248     ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr);
1249   } else {
1250     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
1251   }
1252   PetscFunctionReturn(0);
1253 }
1254 EXTERN_C_END
1255 
1256 #undef __FUNCT__
1257 #define __FUNCT__ "MatMPIDenseSetPreallocation"
1258 /*@C
1259    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1260 
1261    Not collective
1262 
1263    Input Parameters:
1264 .  A - the matrix
1265 -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
1266    to control all matrix memory allocation.
1267 
1268    Notes:
1269    The dense format is fully compatible with standard Fortran 77
1270    storage by columns.
1271 
1272    The data input variable is intended primarily for Fortran programmers
1273    who wish to allocate their own matrix memory space.  Most users should
1274    set data=PETSC_NULL.
1275 
1276    Level: intermediate
1277 
1278 .keywords: matrix,dense, parallel
1279 
1280 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1281 @*/
1282 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data)
1283 {
1284   PetscErrorCode ierr,(*f)(Mat,PetscScalar *);
1285 
1286   PetscFunctionBegin;
1287   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1288   if (f) {
1289     ierr = (*f)(mat,data);CHKERRQ(ierr);
1290   }
1291   PetscFunctionReturn(0);
1292 }
1293 
1294 #undef __FUNCT__
1295 #define __FUNCT__ "MatCreateMPIDense"
1296 /*@C
1297    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
1298 
1299    Collective on MPI_Comm
1300 
1301    Input Parameters:
1302 +  comm - MPI communicator
1303 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1304 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1305 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1306 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1307 -  data - optional location of matrix data.  Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1308    to control all matrix memory allocation.
1309 
1310    Output Parameter:
1311 .  A - the matrix
1312 
1313    Notes:
1314    The dense format is fully compatible with standard Fortran 77
1315    storage by columns.
1316 
1317    The data input variable is intended primarily for Fortran programmers
1318    who wish to allocate their own matrix memory space.  Most users should
1319    set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users).
1320 
1321    The user MUST specify either the local or global matrix dimensions
1322    (possibly both).
1323 
1324    Level: intermediate
1325 
1326 .keywords: matrix,dense, parallel
1327 
1328 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1329 @*/
1330 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1331 {
1332   PetscErrorCode ierr;
1333   PetscMPIInt    size;
1334 
1335   PetscFunctionBegin;
1336   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1337   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1338   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1339   if (size > 1) {
1340     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1341     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1342   } else {
1343     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1344     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1345   }
1346   PetscFunctionReturn(0);
1347 }
1348 
1349 #undef __FUNCT__
1350 #define __FUNCT__ "MatDuplicate_MPIDense"
1351 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1352 {
1353   Mat            mat;
1354   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
1355   PetscErrorCode ierr;
1356 
1357   PetscFunctionBegin;
1358   *newmat       = 0;
1359   ierr = MatCreate(((PetscObject)A)->comm,&mat);CHKERRQ(ierr);
1360   ierr = MatSetSizes(mat,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N);CHKERRQ(ierr);
1361   ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1362   a                 = (Mat_MPIDense*)mat->data;
1363   ierr              = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1364   mat->factor       = A->factor;
1365   mat->assembled    = PETSC_TRUE;
1366   mat->preallocated = PETSC_TRUE;
1367 
1368   mat->rmap.rstart     = A->rmap.rstart;
1369   mat->rmap.rend       = A->rmap.rend;
1370   a->size              = oldmat->size;
1371   a->rank              = oldmat->rank;
1372   mat->insertmode      = NOT_SET_VALUES;
1373   a->nvec              = oldmat->nvec;
1374   a->donotstash        = oldmat->donotstash;
1375 
1376   ierr = PetscMemcpy(mat->rmap.range,A->rmap.range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
1377   ierr = PetscMemcpy(mat->cmap.range,A->cmap.range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
1378   ierr = MatStashCreate_Private(((PetscObject)A)->comm,1,&mat->stash);CHKERRQ(ierr);
1379 
1380   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1381   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1382   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1383   *newmat = mat;
1384   PetscFunctionReturn(0);
1385 }
1386 
1387 #include "petscsys.h"
1388 
1389 #undef __FUNCT__
1390 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile"
1391 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N, MatType type,Mat *newmat)
1392 {
1393   PetscErrorCode ierr;
1394   PetscMPIInt    rank,size;
1395   PetscInt       *rowners,i,m,nz,j;
1396   PetscScalar    *array,*vals,*vals_ptr;
1397   MPI_Status     status;
1398 
1399   PetscFunctionBegin;
1400   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1401   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1402 
1403   /* determine ownership of all rows */
1404   m          = M/size + ((M % size) > rank);
1405   ierr       = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1406   ierr       = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
1407   rowners[0] = 0;
1408   for (i=2; i<=size; i++) {
1409     rowners[i] += rowners[i-1];
1410   }
1411 
1412   ierr = MatCreate(comm,newmat);CHKERRQ(ierr);
1413   ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1414   ierr = MatSetType(*newmat,type);CHKERRQ(ierr);
1415   ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr);
1416   ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr);
1417 
1418   if (!rank) {
1419     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1420 
1421     /* read in my part of the matrix numerical values  */
1422     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1423 
1424     /* insert into matrix-by row (this is why cannot directly read into array */
1425     vals_ptr = vals;
1426     for (i=0; i<m; i++) {
1427       for (j=0; j<N; j++) {
1428         array[i + j*m] = *vals_ptr++;
1429       }
1430     }
1431 
1432     /* read in other processors and ship out */
1433     for (i=1; i<size; i++) {
1434       nz   = (rowners[i+1] - rowners[i])*N;
1435       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1436       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(*newmat))->tag,comm);CHKERRQ(ierr);
1437     }
1438   } else {
1439     /* receive numeric values */
1440     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1441 
1442     /* receive message of values*/
1443     ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(*newmat))->tag,comm,&status);CHKERRQ(ierr);
1444 
1445     /* insert into matrix-by row (this is why cannot directly read into array */
1446     vals_ptr = vals;
1447     for (i=0; i<m; i++) {
1448       for (j=0; j<N; j++) {
1449         array[i + j*m] = *vals_ptr++;
1450       }
1451     }
1452   }
1453   ierr = PetscFree(rowners);CHKERRQ(ierr);
1454   ierr = PetscFree(vals);CHKERRQ(ierr);
1455   ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1456   ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1457   PetscFunctionReturn(0);
1458 }
1459 
1460 #undef __FUNCT__
1461 #define __FUNCT__ "MatLoad_MPIDense"
1462 PetscErrorCode MatLoad_MPIDense(PetscViewer viewer, MatType type,Mat *newmat)
1463 {
1464   Mat            A;
1465   PetscScalar    *vals,*svals;
1466   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1467   MPI_Status     status;
1468   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz;
1469   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1470   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1471   PetscInt       i,nz,j,rstart,rend;
1472   int            fd;
1473   PetscErrorCode ierr;
1474 
1475   PetscFunctionBegin;
1476   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1477   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1478   if (!rank) {
1479     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1480     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1481     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1482   }
1483 
1484   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1485   M = header[1]; N = header[2]; nz = header[3];
1486 
1487   /*
1488        Handle case where matrix is stored on disk as a dense matrix
1489   */
1490   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1491     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,type,newmat);CHKERRQ(ierr);
1492     PetscFunctionReturn(0);
1493   }
1494 
1495   /* determine ownership of all rows */
1496   m          = PetscMPIIntCast(M/size + ((M % size) > rank));
1497   ierr       = PetscMalloc((size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr);
1498   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1499   rowners[0] = 0;
1500   for (i=2; i<=size; i++) {
1501     rowners[i] += rowners[i-1];
1502   }
1503   rstart = rowners[rank];
1504   rend   = rowners[rank+1];
1505 
1506   /* distribute row lengths to all processors */
1507   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&ourlens);CHKERRQ(ierr);
1508   offlens = ourlens + (rend-rstart);
1509   if (!rank) {
1510     ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
1511     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1512     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
1513     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1514     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1515     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1516   } else {
1517     ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1518   }
1519 
1520   if (!rank) {
1521     /* calculate the number of nonzeros on each processor */
1522     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
1523     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1524     for (i=0; i<size; i++) {
1525       for (j=rowners[i]; j< rowners[i+1]; j++) {
1526         procsnz[i] += rowlengths[j];
1527       }
1528     }
1529     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1530 
1531     /* determine max buffer needed and allocate it */
1532     maxnz = 0;
1533     for (i=0; i<size; i++) {
1534       maxnz = PetscMax(maxnz,procsnz[i]);
1535     }
1536     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1537 
1538     /* read in my part of the matrix column indices  */
1539     nz = procsnz[0];
1540     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1541     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1542 
1543     /* read in every one elses and ship off */
1544     for (i=1; i<size; i++) {
1545       nz   = procsnz[i];
1546       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1547       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1548     }
1549     ierr = PetscFree(cols);CHKERRQ(ierr);
1550   } else {
1551     /* determine buffer space needed for message */
1552     nz = 0;
1553     for (i=0; i<m; i++) {
1554       nz += ourlens[i];
1555     }
1556     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1557 
1558     /* receive message of column indices*/
1559     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1560     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1561     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1562   }
1563 
1564   /* loop over local rows, determining number of off diagonal entries */
1565   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
1566   jj = 0;
1567   for (i=0; i<m; i++) {
1568     for (j=0; j<ourlens[i]; j++) {
1569       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1570       jj++;
1571     }
1572   }
1573 
1574   /* create our matrix */
1575   for (i=0; i<m; i++) {
1576     ourlens[i] -= offlens[i];
1577   }
1578   ierr = MatCreate(comm,newmat);CHKERRQ(ierr);
1579   ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1580   ierr = MatSetType(*newmat,type);CHKERRQ(ierr);
1581   ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr);
1582   A = *newmat;
1583   for (i=0; i<m; i++) {
1584     ourlens[i] += offlens[i];
1585   }
1586 
1587   if (!rank) {
1588     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1589 
1590     /* read in my part of the matrix numerical values  */
1591     nz = procsnz[0];
1592     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1593 
1594     /* insert into matrix */
1595     jj      = rstart;
1596     smycols = mycols;
1597     svals   = vals;
1598     for (i=0; i<m; i++) {
1599       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1600       smycols += ourlens[i];
1601       svals   += ourlens[i];
1602       jj++;
1603     }
1604 
1605     /* read in other processors and ship out */
1606     for (i=1; i<size; i++) {
1607       nz   = procsnz[i];
1608       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1609       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
1610     }
1611     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1612   } else {
1613     /* receive numeric values */
1614     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1615 
1616     /* receive message of values*/
1617     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
1618     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1619     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1620 
1621     /* insert into matrix */
1622     jj      = rstart;
1623     smycols = mycols;
1624     svals   = vals;
1625     for (i=0; i<m; i++) {
1626       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1627       smycols += ourlens[i];
1628       svals   += ourlens[i];
1629       jj++;
1630     }
1631   }
1632   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1633   ierr = PetscFree(vals);CHKERRQ(ierr);
1634   ierr = PetscFree(mycols);CHKERRQ(ierr);
1635   ierr = PetscFree(rowners);CHKERRQ(ierr);
1636 
1637   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1638   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1639   PetscFunctionReturn(0);
1640 }
1641 
1642 #undef __FUNCT__
1643 #define __FUNCT__ "MatEqual_MPIDense"
1644 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscTruth *flag)
1645 {
1646   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
1647   Mat            a,b;
1648   PetscTruth     flg;
1649   PetscErrorCode ierr;
1650 
1651   PetscFunctionBegin;
1652   a = matA->A;
1653   b = matB->A;
1654   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
1655   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1656   PetscFunctionReturn(0);
1657 }
1658 
1659 
1660 
1661