xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 38f2d2fdb3b6f522a3102c6eb796cebecf3224c0)
1 #define PETSCMAT_DLL
2 
3 /*
4    Basic functions for basic parallel dense matrices.
5 */
6 
7 #include "src/mat/impls/dense/mpi/mpidense.h"    /*I   "petscmat.h"  I*/
8 
9 #undef __FUNCT__
10 #define __FUNCT__ "MatLUFactorSymbolic_MPIDense"
11 PetscErrorCode MatLUFactorSymbolic_MPIDense(Mat A,IS row,IS col,MatFactorInfo *info,Mat *fact)
12 {
13   PetscErrorCode ierr;
14 
15   PetscFunctionBegin;
16   ierr = MatDuplicate(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr);
17   PetscFunctionReturn(0);
18 }
19 
20 #undef __FUNCT__
21 #define __FUNCT__ "MatCholeskyFactorSymbolic_MPIDense"
22 PetscErrorCode MatCholeskyFactorSymbolic_MPIDense(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
23 {
24   PetscErrorCode ierr;
25 
26   PetscFunctionBegin;
27   ierr = MatDuplicate(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr);
28   PetscFunctionReturn(0);
29 }
30 
31 #undef __FUNCT__
32 #define __FUNCT__ "MatDenseGetLocalMatrix"
33 /*@
34 
35       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
36               matrix that represents the operator. For sequential matrices it returns itself.
37 
38     Input Parameter:
39 .      A - the Seq or MPI dense matrix
40 
41     Output Parameter:
42 .      B - the inner matrix
43 
44     Level: intermediate
45 
46 @*/
47 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
48 {
49   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
50   PetscErrorCode ierr;
51   PetscTruth     flg;
52   PetscMPIInt    size;
53 
54   PetscFunctionBegin;
55   ierr = PetscTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
56   if (!flg) {  /* this check sucks! */
57     ierr = PetscTypeCompare((PetscObject)A,MATDENSE,&flg);CHKERRQ(ierr);
58     if (flg) {
59       ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
60       if (size == 1) flg = PETSC_FALSE;
61     }
62   }
63   if (flg) {
64     *B = mat->A;
65   } else {
66     *B = A;
67   }
68   PetscFunctionReturn(0);
69 }
70 
71 #undef __FUNCT__
72 #define __FUNCT__ "MatGetRow_MPIDense"
73 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
74 {
75   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
76   PetscErrorCode ierr;
77   PetscInt       lrow,rstart = A->rmap.rstart,rend = A->rmap.rend;
78 
79   PetscFunctionBegin;
80   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows")
81   lrow = row - rstart;
82   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr);
83   PetscFunctionReturn(0);
84 }
85 
86 #undef __FUNCT__
87 #define __FUNCT__ "MatRestoreRow_MPIDense"
88 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
89 {
90   PetscErrorCode ierr;
91 
92   PetscFunctionBegin;
93   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
94   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
95   PetscFunctionReturn(0);
96 }
97 
98 EXTERN_C_BEGIN
99 #undef __FUNCT__
100 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense"
101 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B)
102 {
103   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
104   PetscErrorCode ierr;
105   PetscInt       m = A->rmap.n,rstart = A->rmap.rstart;
106   PetscScalar    *array;
107   MPI_Comm       comm;
108 
109   PetscFunctionBegin;
110   if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported.");
111 
112   /* The reuse aspect is not implemented efficiently */
113   if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);}
114 
115   ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
116   ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr);
117   ierr = MatCreate(comm,B);CHKERRQ(ierr);
118   ierr = MatSetSizes(*B,m,m,m,m);CHKERRQ(ierr);
119   ierr = MatSetType(*B,mdn->A->type_name);CHKERRQ(ierr);
120   ierr = MatSeqDenseSetPreallocation(*B,array+m*rstart);CHKERRQ(ierr);
121   ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr);
122   ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
123   ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
124 
125   *iscopy = PETSC_TRUE;
126   PetscFunctionReturn(0);
127 }
128 EXTERN_C_END
129 
130 #undef __FUNCT__
131 #define __FUNCT__ "MatSetValues_MPIDense"
132 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
133 {
134   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
135   PetscErrorCode ierr;
136   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend,row;
137   PetscTruth     roworiented = A->roworiented;
138 
139   PetscFunctionBegin;
140   for (i=0; i<m; i++) {
141     if (idxm[i] < 0) continue;
142     if (idxm[i] >= mat->rmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
143     if (idxm[i] >= rstart && idxm[i] < rend) {
144       row = idxm[i] - rstart;
145       if (roworiented) {
146         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
147       } else {
148         for (j=0; j<n; j++) {
149           if (idxn[j] < 0) continue;
150           if (idxn[j] >= mat->cmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
151           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
152         }
153       }
154     } else {
155       if (!A->donotstash) {
156         if (roworiented) {
157           ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr);
158         } else {
159           ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr);
160         }
161       }
162     }
163   }
164   PetscFunctionReturn(0);
165 }
166 
167 #undef __FUNCT__
168 #define __FUNCT__ "MatGetValues_MPIDense"
169 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
170 {
171   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
172   PetscErrorCode ierr;
173   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend,row;
174 
175   PetscFunctionBegin;
176   for (i=0; i<m; i++) {
177     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
178     if (idxm[i] >= mat->rmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
179     if (idxm[i] >= rstart && idxm[i] < rend) {
180       row = idxm[i] - rstart;
181       for (j=0; j<n; j++) {
182         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
183         if (idxn[j] >= mat->cmap.N) {
184           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
185         }
186         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
187       }
188     } else {
189       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
190     }
191   }
192   PetscFunctionReturn(0);
193 }
194 
195 #undef __FUNCT__
196 #define __FUNCT__ "MatGetArray_MPIDense"
197 PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[])
198 {
199   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
200   PetscErrorCode ierr;
201 
202   PetscFunctionBegin;
203   ierr = MatGetArray(a->A,array);CHKERRQ(ierr);
204   PetscFunctionReturn(0);
205 }
206 
207 #undef __FUNCT__
208 #define __FUNCT__ "MatGetSubMatrix_MPIDense"
209 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B)
210 {
211   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data,*newmatd;
212   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
213   PetscErrorCode ierr;
214   PetscInt       i,j,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols;
215   PetscScalar    *av,*bv,*v = lmat->v;
216   Mat            newmat;
217 
218   PetscFunctionBegin;
219   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
220   ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr);
221   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
222   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
223 
224   /* No parallel redistribution currently supported! Should really check each index set
225      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
226      original matrix! */
227 
228   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
229   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
230 
231   /* Check submatrix call */
232   if (scall == MAT_REUSE_MATRIX) {
233     /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
234     /* Really need to test rows and column sizes! */
235     newmat = *B;
236   } else {
237     /* Create and fill new matrix */
238     ierr = MatCreate(A->comm,&newmat);CHKERRQ(ierr);
239     ierr = MatSetSizes(newmat,nrows,cs,PETSC_DECIDE,ncols);CHKERRQ(ierr);
240     ierr = MatSetType(newmat,A->type_name);CHKERRQ(ierr);
241     ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr);
242   }
243 
244   /* Now extract the data pointers and do the copy, column at a time */
245   newmatd = (Mat_MPIDense*)newmat->data;
246   bv      = ((Mat_SeqDense *)newmatd->A->data)->v;
247 
248   for (i=0; i<ncols; i++) {
249     av = v + nlrows*icol[i];
250     for (j=0; j<nrows; j++) {
251       *bv++ = av[irow[j] - rstart];
252     }
253   }
254 
255   /* Assemble the matrices so that the correct flags are set */
256   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
257   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
258 
259   /* Free work space */
260   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
261   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
262   *B = newmat;
263   PetscFunctionReturn(0);
264 }
265 
266 #undef __FUNCT__
267 #define __FUNCT__ "MatRestoreArray_MPIDense"
268 PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[])
269 {
270   PetscFunctionBegin;
271   PetscFunctionReturn(0);
272 }
273 
274 #undef __FUNCT__
275 #define __FUNCT__ "MatAssemblyBegin_MPIDense"
276 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
277 {
278   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
279   MPI_Comm       comm = mat->comm;
280   PetscErrorCode ierr;
281   PetscInt       nstash,reallocs;
282   InsertMode     addv;
283 
284   PetscFunctionBegin;
285   /* make sure all processors are either in INSERTMODE or ADDMODE */
286   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
287   if (addv == (ADD_VALUES|INSERT_VALUES)) {
288     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
289   }
290   mat->insertmode = addv; /* in case this processor had no cache */
291 
292   ierr = MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);CHKERRQ(ierr);
293   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
294   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
295   PetscFunctionReturn(0);
296 }
297 
298 #undef __FUNCT__
299 #define __FUNCT__ "MatAssemblyEnd_MPIDense"
300 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
301 {
302   Mat_MPIDense    *mdn=(Mat_MPIDense*)mat->data;
303   PetscErrorCode  ierr;
304   PetscInt        i,*row,*col,flg,j,rstart,ncols;
305   PetscMPIInt     n;
306   PetscScalar     *val;
307   InsertMode      addv=mat->insertmode;
308 
309   PetscFunctionBegin;
310   /*  wait on receives */
311   while (1) {
312     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
313     if (!flg) break;
314 
315     for (i=0; i<n;) {
316       /* Now identify the consecutive vals belonging to the same row */
317       for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
318       if (j < n) ncols = j-i;
319       else       ncols = n-i;
320       /* Now assemble all these values with a single function call */
321       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
322       i = j;
323     }
324   }
325   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
326 
327   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
328   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
329 
330   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
331     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
332   }
333   PetscFunctionReturn(0);
334 }
335 
336 #undef __FUNCT__
337 #define __FUNCT__ "MatZeroEntries_MPIDense"
338 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
339 {
340   PetscErrorCode ierr;
341   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
342 
343   PetscFunctionBegin;
344   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
345   PetscFunctionReturn(0);
346 }
347 
348 /* the code does not do the diagonal entries correctly unless the
349    matrix is square and the column and row owerships are identical.
350    This is a BUG. The only way to fix it seems to be to access
351    mdn->A and mdn->B directly and not through the MatZeroRows()
352    routine.
353 */
354 #undef __FUNCT__
355 #define __FUNCT__ "MatZeroRows_MPIDense"
356 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
357 {
358   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
359   PetscErrorCode ierr;
360   PetscInt       i,*owners = A->rmap.range;
361   PetscInt       *nprocs,j,idx,nsends;
362   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
363   PetscInt       *rvalues,tag = A->tag,count,base,slen,*source;
364   PetscInt       *lens,*lrows,*values;
365   PetscMPIInt    n,imdex,rank = l->rank,size = l->size;
366   MPI_Comm       comm = A->comm;
367   MPI_Request    *send_waits,*recv_waits;
368   MPI_Status     recv_status,*send_status;
369   PetscTruth     found;
370 
371   PetscFunctionBegin;
372   /*  first count number of contributors to each processor */
373   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
374   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
375   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
376   for (i=0; i<N; i++) {
377     idx = rows[i];
378     found = PETSC_FALSE;
379     for (j=0; j<size; j++) {
380       if (idx >= owners[j] && idx < owners[j+1]) {
381         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
382       }
383     }
384     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
385   }
386   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
387 
388   /* inform other processors of number of messages and max length*/
389   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
390 
391   /* post receives:   */
392   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
393   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
394   for (i=0; i<nrecvs; i++) {
395     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
396   }
397 
398   /* do sends:
399       1) starts[i] gives the starting index in svalues for stuff going to
400          the ith processor
401   */
402   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
403   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
404   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
405   starts[0]  = 0;
406   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
407   for (i=0; i<N; i++) {
408     svalues[starts[owner[i]]++] = rows[i];
409   }
410 
411   starts[0] = 0;
412   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
413   count = 0;
414   for (i=0; i<size; i++) {
415     if (nprocs[2*i+1]) {
416       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
417     }
418   }
419   ierr = PetscFree(starts);CHKERRQ(ierr);
420 
421   base = owners[rank];
422 
423   /*  wait on receives */
424   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
425   source = lens + nrecvs;
426   count  = nrecvs; slen = 0;
427   while (count) {
428     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
429     /* unpack receives into our local space */
430     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
431     source[imdex]  = recv_status.MPI_SOURCE;
432     lens[imdex]    = n;
433     slen += n;
434     count--;
435   }
436   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
437 
438   /* move the data into the send scatter */
439   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
440   count = 0;
441   for (i=0; i<nrecvs; i++) {
442     values = rvalues + i*nmax;
443     for (j=0; j<lens[i]; j++) {
444       lrows[count++] = values[j] - base;
445     }
446   }
447   ierr = PetscFree(rvalues);CHKERRQ(ierr);
448   ierr = PetscFree(lens);CHKERRQ(ierr);
449   ierr = PetscFree(owner);CHKERRQ(ierr);
450   ierr = PetscFree(nprocs);CHKERRQ(ierr);
451 
452   /* actually zap the local rows */
453   ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
454   ierr = PetscFree(lrows);CHKERRQ(ierr);
455 
456   /* wait on sends */
457   if (nsends) {
458     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
459     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
460     ierr = PetscFree(send_status);CHKERRQ(ierr);
461   }
462   ierr = PetscFree(send_waits);CHKERRQ(ierr);
463   ierr = PetscFree(svalues);CHKERRQ(ierr);
464 
465   PetscFunctionReturn(0);
466 }
467 
468 #undef __FUNCT__
469 #define __FUNCT__ "MatMult_MPIDense"
470 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
471 {
472   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
473   PetscErrorCode ierr;
474 
475   PetscFunctionBegin;
476   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
477   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
478   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
479   PetscFunctionReturn(0);
480 }
481 
482 #undef __FUNCT__
483 #define __FUNCT__ "MatMultAdd_MPIDense"
484 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
485 {
486   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
487   PetscErrorCode ierr;
488 
489   PetscFunctionBegin;
490   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
491   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
492   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
493   PetscFunctionReturn(0);
494 }
495 
496 #undef __FUNCT__
497 #define __FUNCT__ "MatMultTranspose_MPIDense"
498 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
499 {
500   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
501   PetscErrorCode ierr;
502   PetscScalar    zero = 0.0;
503 
504   PetscFunctionBegin;
505   ierr = VecSet(yy,zero);CHKERRQ(ierr);
506   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
507   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
508   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
509   PetscFunctionReturn(0);
510 }
511 
512 #undef __FUNCT__
513 #define __FUNCT__ "MatMultTransposeAdd_MPIDense"
514 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
515 {
516   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
517   PetscErrorCode ierr;
518 
519   PetscFunctionBegin;
520   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
521   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
522   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
523   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
524   PetscFunctionReturn(0);
525 }
526 
527 #undef __FUNCT__
528 #define __FUNCT__ "MatGetDiagonal_MPIDense"
529 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
530 {
531   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
532   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
533   PetscErrorCode ierr;
534   PetscInt       len,i,n,m = A->rmap.n,radd;
535   PetscScalar    *x,zero = 0.0;
536 
537   PetscFunctionBegin;
538   ierr = VecSet(v,zero);CHKERRQ(ierr);
539   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
540   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
541   if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
542   len  = PetscMin(a->A->rmap.n,a->A->cmap.n);
543   radd = A->rmap.rstart*m;
544   for (i=0; i<len; i++) {
545     x[i] = aloc->v[radd + i*m + i];
546   }
547   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
548   PetscFunctionReturn(0);
549 }
550 
551 #undef __FUNCT__
552 #define __FUNCT__ "MatDestroy_MPIDense"
553 PetscErrorCode MatDestroy_MPIDense(Mat mat)
554 {
555   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
556   PetscErrorCode ierr;
557 
558   PetscFunctionBegin;
559 
560 #if defined(PETSC_USE_LOG)
561   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N);
562 #endif
563   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
564   ierr = MatDestroy(mdn->A);CHKERRQ(ierr);
565   if (mdn->lvec)   {ierr = VecDestroy(mdn->lvec);CHKERRQ(ierr);}
566   if (mdn->Mvctx)  {ierr = VecScatterDestroy(mdn->Mvctx);CHKERRQ(ierr);}
567   if (mdn->factor) {
568     ierr = PetscFree(mdn->factor->temp);CHKERRQ(ierr);
569     ierr = PetscFree(mdn->factor->tag);CHKERRQ(ierr);
570     ierr = PetscFree(mdn->factor->pivots);CHKERRQ(ierr);
571     ierr = PetscFree(mdn->factor);CHKERRQ(ierr);
572   }
573   ierr = PetscFree(mdn);CHKERRQ(ierr);
574   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
575   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
576   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
577   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
578   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
579   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
580   PetscFunctionReturn(0);
581 }
582 
583 #undef __FUNCT__
584 #define __FUNCT__ "MatView_MPIDense_Binary"
585 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
586 {
587   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
588   PetscErrorCode    ierr;
589   PetscViewerFormat format;
590   int               fd;
591   PetscInt          header[4],mmax,N = mat->cmap.N,i,j,m,k;
592   PetscMPIInt       rank,tag  = ((PetscObject)viewer)->tag,size;
593   PetscScalar       *work,*v;
594   Mat_SeqDense      *a = (Mat_SeqDense*)mdn->A->data;
595   MPI_Status        status;
596 
597   PetscFunctionBegin;
598   if (mdn->size == 1) {
599     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
600   } else {
601     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
602     ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
603     ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr);
604 
605     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
606     if (format == PETSC_VIEWER_BINARY_NATIVE) {
607 
608       if (!rank) {
609 	/* store the matrix as a dense matrix */
610 	header[0] = MAT_FILE_COOKIE;
611 	header[1] = mat->rmap.N;
612 	header[2] = N;
613 	header[3] = MATRIX_BINARY_FORMAT_DENSE;
614 	ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
615 
616 	/* get largest work array needed for transposing array */
617         mmax = mat->rmap.n;
618         for (i=1; i<size; i++) {
619           mmax = PetscMax(mmax,mat->rmap.range[i+1] - mat->rmap.range[i]);
620         }
621 	ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&work);CHKERRQ(ierr);
622 
623 	/* write out local array, by rows */
624         m    = mat->rmap.n;
625 	v    = a->v;
626         for (j=0; j<N; j++) {
627 	  for (i=0; i<m; i++) {
628 	    work[i + j*N] = *v++;
629 	  }
630 	}
631 	ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
632 
633         /* get largest work array to receive messages from other processes, excludes process zero */
634         mmax = 0;
635         for (i=1; i<size; i++) {
636           mmax = PetscMax(mmax,mat->rmap.range[i+1] - mat->rmap.range[i]);
637         }
638 	ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&v);CHKERRQ(ierr);
639         for (k=1; k<size; k++) {
640           m    = mat->rmap.range[k+1] - mat->rmap.range[k];
641           ierr = MPI_Recv(v,m*N,MPIU_SCALAR,k,tag,mat->comm,&status);CHKERRQ(ierr);
642 
643 	  for (j=0; j<N; j++) {
644 	    for (i=0; i<m; i++) {
645 	      work[i + j*N] = *v++;
646 	    }
647 	  }
648 	  ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
649         }
650         ierr = PetscFree(work);CHKERRQ(ierr);
651         ierr = PetscFree(v);CHKERRQ(ierr);
652       } else {
653         ierr = MPI_Send(a->v,mat->rmap.n*mat->cmap.N,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr);
654       }
655     }
656   }
657   PetscFunctionReturn(0);
658 }
659 
660 #undef __FUNCT__
661 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket"
662 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
663 {
664   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
665   PetscErrorCode    ierr;
666   PetscMPIInt       size = mdn->size,rank = mdn->rank;
667   PetscViewerType   vtype;
668   PetscTruth        iascii,isdraw;
669   PetscViewer       sviewer;
670   PetscViewerFormat format;
671 
672   PetscFunctionBegin;
673   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
674   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
675   if (iascii) {
676     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
677     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
678     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
679       MatInfo info;
680       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
681       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap.n,
682                    (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
683       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
684       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
685       PetscFunctionReturn(0);
686     } else if (format == PETSC_VIEWER_ASCII_INFO) {
687       PetscFunctionReturn(0);
688     }
689   } else if (isdraw) {
690     PetscDraw  draw;
691     PetscTruth isnull;
692 
693     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
694     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
695     if (isnull) PetscFunctionReturn(0);
696   }
697 
698   if (size == 1) {
699     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
700   } else {
701     /* assemble the entire matrix onto first processor. */
702     Mat         A;
703     PetscInt    M = mat->rmap.N,N = mat->cmap.N,m,row,i,nz;
704     PetscInt    *cols;
705     PetscScalar *vals;
706 
707     ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr);
708     if (!rank) {
709       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
710     } else {
711       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
712     }
713     /* Since this is a temporary matrix, MATMPIDENSE instead of A->type_name here is probably acceptable. */
714     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
715     ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL);
716     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
717 
718     /* Copy the matrix ... This isn't the most efficient means,
719        but it's quick for now */
720     A->insertmode = INSERT_VALUES;
721     row = mat->rmap.rstart; m = mdn->A->rmap.n;
722     for (i=0; i<m; i++) {
723       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
724       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
725       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
726       row++;
727     }
728 
729     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
730     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
731     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
732     if (!rank) {
733       ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
734     }
735     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
736     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
737     ierr = MatDestroy(A);CHKERRQ(ierr);
738   }
739   PetscFunctionReturn(0);
740 }
741 
742 #undef __FUNCT__
743 #define __FUNCT__ "MatView_MPIDense"
744 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
745 {
746   PetscErrorCode ierr;
747   PetscTruth     iascii,isbinary,isdraw,issocket;
748 
749   PetscFunctionBegin;
750 
751   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
752   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
753   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
754   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
755 
756   if (iascii || issocket || isdraw) {
757     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
758   } else if (isbinary) {
759     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
760   } else {
761     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name);
762   }
763   PetscFunctionReturn(0);
764 }
765 
766 #undef __FUNCT__
767 #define __FUNCT__ "MatGetInfo_MPIDense"
768 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
769 {
770   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
771   Mat            mdn = mat->A;
772   PetscErrorCode ierr;
773   PetscReal      isend[5],irecv[5];
774 
775   PetscFunctionBegin;
776   info->rows_global    = (double)A->rmap.N;
777   info->columns_global = (double)A->cmap.N;
778   info->rows_local     = (double)A->rmap.n;
779   info->columns_local  = (double)A->cmap.N;
780   info->block_size     = 1.0;
781   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
782   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
783   isend[3] = info->memory;  isend[4] = info->mallocs;
784   if (flag == MAT_LOCAL) {
785     info->nz_used      = isend[0];
786     info->nz_allocated = isend[1];
787     info->nz_unneeded  = isend[2];
788     info->memory       = isend[3];
789     info->mallocs      = isend[4];
790   } else if (flag == MAT_GLOBAL_MAX) {
791     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr);
792     info->nz_used      = irecv[0];
793     info->nz_allocated = irecv[1];
794     info->nz_unneeded  = irecv[2];
795     info->memory       = irecv[3];
796     info->mallocs      = irecv[4];
797   } else if (flag == MAT_GLOBAL_SUM) {
798     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr);
799     info->nz_used      = irecv[0];
800     info->nz_allocated = irecv[1];
801     info->nz_unneeded  = irecv[2];
802     info->memory       = irecv[3];
803     info->mallocs      = irecv[4];
804   }
805   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
806   info->fill_ratio_needed = 0;
807   info->factor_mallocs    = 0;
808   PetscFunctionReturn(0);
809 }
810 
811 #undef __FUNCT__
812 #define __FUNCT__ "MatSetOption_MPIDense"
813 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op)
814 {
815   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
816   PetscErrorCode ierr;
817 
818   PetscFunctionBegin;
819   switch (op) {
820   case MAT_NO_NEW_NONZERO_LOCATIONS:
821   case MAT_YES_NEW_NONZERO_LOCATIONS:
822   case MAT_NEW_NONZERO_LOCATION_ERR:
823   case MAT_NEW_NONZERO_ALLOCATION_ERR:
824   case MAT_COLUMNS_SORTED:
825   case MAT_COLUMNS_UNSORTED:
826     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
827     break;
828   case MAT_ROW_ORIENTED:
829     a->roworiented = PETSC_TRUE;
830     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
831     break;
832   case MAT_ROWS_SORTED:
833   case MAT_ROWS_UNSORTED:
834   case MAT_YES_NEW_DIAGONALS:
835   case MAT_USE_HASH_TABLE:
836     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
837     break;
838   case MAT_COLUMN_ORIENTED:
839     a->roworiented = PETSC_FALSE;
840     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
841     break;
842   case MAT_IGNORE_OFF_PROC_ENTRIES:
843     a->donotstash = PETSC_TRUE;
844     break;
845   case MAT_NO_NEW_DIAGONALS:
846     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
847   case MAT_SYMMETRIC:
848   case MAT_STRUCTURALLY_SYMMETRIC:
849   case MAT_NOT_SYMMETRIC:
850   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
851   case MAT_HERMITIAN:
852   case MAT_NOT_HERMITIAN:
853   case MAT_SYMMETRY_ETERNAL:
854   case MAT_NOT_SYMMETRY_ETERNAL:
855     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
856     break;
857   default:
858     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
859   }
860   PetscFunctionReturn(0);
861 }
862 
863 
864 #undef __FUNCT__
865 #define __FUNCT__ "MatDiagonalScale_MPIDense"
866 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
867 {
868   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
869   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
870   PetscScalar    *l,*r,x,*v;
871   PetscErrorCode ierr;
872   PetscInt       i,j,s2a,s3a,s2,s3,m=mdn->A->rmap.n,n=mdn->A->cmap.n;
873 
874   PetscFunctionBegin;
875   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
876   if (ll) {
877     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
878     if (s2a != s2) SETERRQ2(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
879     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
880     for (i=0; i<m; i++) {
881       x = l[i];
882       v = mat->v + i;
883       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
884     }
885     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
886     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
887   }
888   if (rr) {
889     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
890     if (s3a != s3) SETERRQ2(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
891     ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
892     ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
893     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
894     for (i=0; i<n; i++) {
895       x = r[i];
896       v = mat->v + i*m;
897       for (j=0; j<m; j++) { (*v++) *= x;}
898     }
899     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
900     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
901   }
902   PetscFunctionReturn(0);
903 }
904 
905 #undef __FUNCT__
906 #define __FUNCT__ "MatNorm_MPIDense"
907 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
908 {
909   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
910   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
911   PetscErrorCode ierr;
912   PetscInt       i,j;
913   PetscReal      sum = 0.0;
914   PetscScalar    *v = mat->v;
915 
916   PetscFunctionBegin;
917   if (mdn->size == 1) {
918     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
919   } else {
920     if (type == NORM_FROBENIUS) {
921       for (i=0; i<mdn->A->cmap.n*mdn->A->rmap.n; i++) {
922 #if defined(PETSC_USE_COMPLEX)
923         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
924 #else
925         sum += (*v)*(*v); v++;
926 #endif
927       }
928       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr);
929       *nrm = sqrt(*nrm);
930       ierr = PetscLogFlops(2*mdn->A->cmap.n*mdn->A->rmap.n);CHKERRQ(ierr);
931     } else if (type == NORM_1) {
932       PetscReal *tmp,*tmp2;
933       ierr = PetscMalloc(2*A->cmap.N*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
934       tmp2 = tmp + A->cmap.N;
935       ierr = PetscMemzero(tmp,2*A->cmap.N*sizeof(PetscReal));CHKERRQ(ierr);
936       *nrm = 0.0;
937       v = mat->v;
938       for (j=0; j<mdn->A->cmap.n; j++) {
939         for (i=0; i<mdn->A->rmap.n; i++) {
940           tmp[j] += PetscAbsScalar(*v);  v++;
941         }
942       }
943       ierr = MPI_Allreduce(tmp,tmp2,A->cmap.N,MPIU_REAL,MPI_SUM,A->comm);CHKERRQ(ierr);
944       for (j=0; j<A->cmap.N; j++) {
945         if (tmp2[j] > *nrm) *nrm = tmp2[j];
946       }
947       ierr = PetscFree(tmp);CHKERRQ(ierr);
948       ierr = PetscLogFlops(A->cmap.n*A->rmap.n);CHKERRQ(ierr);
949     } else if (type == NORM_INFINITY) { /* max row norm */
950       PetscReal ntemp;
951       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
952       ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,A->comm);CHKERRQ(ierr);
953     } else {
954       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
955     }
956   }
957   PetscFunctionReturn(0);
958 }
959 
960 #undef __FUNCT__
961 #define __FUNCT__ "MatTranspose_MPIDense"
962 PetscErrorCode MatTranspose_MPIDense(Mat A,Mat *matout)
963 {
964   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
965   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
966   Mat            B;
967   PetscInt       M = A->rmap.N,N = A->cmap.N,m,n,*rwork,rstart = A->rmap.rstart;
968   PetscErrorCode ierr;
969   PetscInt       j,i;
970   PetscScalar    *v;
971 
972   PetscFunctionBegin;
973   if (!matout && M != N) {
974     SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place");
975   }
976   ierr = MatCreate(A->comm,&B);CHKERRQ(ierr);
977   ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,N,M);CHKERRQ(ierr);
978   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
979   ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr);
980 
981   m = a->A->rmap.n; n = a->A->cmap.n; v = Aloc->v;
982   ierr = PetscMalloc(m*sizeof(PetscInt),&rwork);CHKERRQ(ierr);
983   for (i=0; i<m; i++) rwork[i] = rstart + i;
984   for (j=0; j<n; j++) {
985     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
986     v   += m;
987   }
988   ierr = PetscFree(rwork);CHKERRQ(ierr);
989   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
990   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
991   if (matout) {
992     *matout = B;
993   } else {
994     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
995   }
996   PetscFunctionReturn(0);
997 }
998 
999 #include "petscblaslapack.h"
1000 #undef __FUNCT__
1001 #define __FUNCT__ "MatScale_MPIDense"
1002 PetscErrorCode MatScale_MPIDense(Mat inA,PetscScalar alpha)
1003 {
1004   Mat_MPIDense   *A = (Mat_MPIDense*)inA->data;
1005   Mat_SeqDense   *a = (Mat_SeqDense*)A->A->data;
1006   PetscScalar    oalpha = alpha;
1007   PetscBLASInt   one = 1,nz = (PetscBLASInt)inA->rmap.n*inA->cmap.N;
1008   PetscErrorCode ierr;
1009 
1010   PetscFunctionBegin;
1011   BLASscal_(&nz,&oalpha,a->v,&one);
1012   ierr = PetscLogFlops(nz);CHKERRQ(ierr);
1013   PetscFunctionReturn(0);
1014 }
1015 
1016 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
1017 
1018 #undef __FUNCT__
1019 #define __FUNCT__ "MatSetUpPreallocation_MPIDense"
1020 PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A)
1021 {
1022   PetscErrorCode ierr;
1023 
1024   PetscFunctionBegin;
1025   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
1026   PetscFunctionReturn(0);
1027 }
1028 
1029 #undef __FUNCT__
1030 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIDense"
1031 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1032 {
1033   PetscErrorCode ierr;
1034   PetscInt       m=A->rmap.n,n=B->cmap.n;
1035   Mat            Cmat;
1036 
1037   PetscFunctionBegin;
1038   if (A->cmap.n != B->rmap.n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap.n %d != B->rmap.n %d\n",A->cmap.n,B->rmap.n);
1039   ierr = MatCreate(B->comm,&Cmat);CHKERRQ(ierr);
1040   ierr = MatSetSizes(Cmat,m,n,A->rmap.N,B->cmap.N);CHKERRQ(ierr);
1041   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
1042   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
1043   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1044   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1045   *C = Cmat;
1046   PetscFunctionReturn(0);
1047 }
1048 
1049 /* -------------------------------------------------------------------*/
1050 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
1051        MatGetRow_MPIDense,
1052        MatRestoreRow_MPIDense,
1053        MatMult_MPIDense,
1054 /* 4*/ MatMultAdd_MPIDense,
1055        MatMultTranspose_MPIDense,
1056        MatMultTransposeAdd_MPIDense,
1057        0,
1058        0,
1059        0,
1060 /*10*/ 0,
1061        0,
1062        0,
1063        0,
1064        MatTranspose_MPIDense,
1065 /*15*/ MatGetInfo_MPIDense,
1066        MatEqual_MPIDense,
1067        MatGetDiagonal_MPIDense,
1068        MatDiagonalScale_MPIDense,
1069        MatNorm_MPIDense,
1070 /*20*/ MatAssemblyBegin_MPIDense,
1071        MatAssemblyEnd_MPIDense,
1072        0,
1073        MatSetOption_MPIDense,
1074        MatZeroEntries_MPIDense,
1075 /*25*/ MatZeroRows_MPIDense,
1076        MatLUFactorSymbolic_MPIDense,
1077        0,
1078        MatCholeskyFactorSymbolic_MPIDense,
1079        0,
1080 /*30*/ MatSetUpPreallocation_MPIDense,
1081        0,
1082        0,
1083        MatGetArray_MPIDense,
1084        MatRestoreArray_MPIDense,
1085 /*35*/ MatDuplicate_MPIDense,
1086        0,
1087        0,
1088        0,
1089        0,
1090 /*40*/ 0,
1091        MatGetSubMatrices_MPIDense,
1092        0,
1093        MatGetValues_MPIDense,
1094        0,
1095 /*45*/ 0,
1096        MatScale_MPIDense,
1097        0,
1098        0,
1099        0,
1100 /*50*/ 0,
1101        0,
1102        0,
1103        0,
1104        0,
1105 /*55*/ 0,
1106        0,
1107        0,
1108        0,
1109        0,
1110 /*60*/ MatGetSubMatrix_MPIDense,
1111        MatDestroy_MPIDense,
1112        MatView_MPIDense,
1113        0,
1114        0,
1115 /*65*/ 0,
1116        0,
1117        0,
1118        0,
1119        0,
1120 /*70*/ 0,
1121        0,
1122        0,
1123        0,
1124        0,
1125 /*75*/ 0,
1126        0,
1127        0,
1128        0,
1129        0,
1130 /*80*/ 0,
1131        0,
1132        0,
1133        0,
1134 /*84*/ MatLoad_MPIDense,
1135        0,
1136        0,
1137        0,
1138        0,
1139        0,
1140 /*90*/ 0,
1141        MatMatMultSymbolic_MPIDense_MPIDense,
1142        0,
1143        0,
1144        0,
1145 /*95*/ 0,
1146        0,
1147        0,
1148        0};
1149 
1150 EXTERN_C_BEGIN
1151 #undef __FUNCT__
1152 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense"
1153 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1154 {
1155   Mat_MPIDense   *a;
1156   PetscErrorCode ierr;
1157 
1158   PetscFunctionBegin;
1159   mat->preallocated = PETSC_TRUE;
1160   /* Note:  For now, when data is specified above, this assumes the user correctly
1161    allocates the local dense storage space.  We should add error checking. */
1162 
1163   a    = (Mat_MPIDense*)mat->data;
1164   ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1165   ierr = MatSetSizes(a->A,mat->rmap.n,mat->cmap.N,mat->rmap.n,mat->cmap.N);CHKERRQ(ierr);
1166   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
1167   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1168   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1169   PetscFunctionReturn(0);
1170 }
1171 EXTERN_C_END
1172 
1173 /*MC
1174    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1175 
1176    Options Database Keys:
1177 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions()
1178 
1179   Level: beginner
1180 
1181 .seealso: MatCreateMPIDense
1182 M*/
1183 
1184 EXTERN_C_BEGIN
1185 #undef __FUNCT__
1186 #define __FUNCT__ "MatCreate_MPIDense"
1187 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIDense(Mat mat)
1188 {
1189   Mat_MPIDense   *a;
1190   PetscErrorCode ierr;
1191 
1192   PetscFunctionBegin;
1193   ierr              = PetscNewLog(mat,Mat_MPIDense,&a);CHKERRQ(ierr);
1194   mat->data         = (void*)a;
1195   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1196   mat->factor       = 0;
1197   mat->mapping      = 0;
1198 
1199   a->factor       = 0;
1200   mat->insertmode = NOT_SET_VALUES;
1201   ierr = MPI_Comm_rank(mat->comm,&a->rank);CHKERRQ(ierr);
1202   ierr = MPI_Comm_size(mat->comm,&a->size);CHKERRQ(ierr);
1203 
1204   mat->rmap.bs = mat->cmap.bs = 1;
1205   ierr = PetscMapSetUp(&mat->rmap);CHKERRQ(ierr);
1206   ierr = PetscMapSetUp(&mat->cmap);CHKERRQ(ierr);
1207   a->nvec = mat->cmap.n;
1208 
1209   /* build cache for off array entries formed */
1210   a->donotstash = PETSC_FALSE;
1211   ierr = MatStashCreate_Private(mat->comm,1,&mat->stash);CHKERRQ(ierr);
1212 
1213   /* stuff used for matrix vector multiply */
1214   a->lvec        = 0;
1215   a->Mvctx       = 0;
1216   a->roworiented = PETSC_TRUE;
1217 
1218   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
1219                                      "MatGetDiagonalBlock_MPIDense",
1220                                      MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr);
1221   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C",
1222                                      "MatMPIDenseSetPreallocation_MPIDense",
1223                                      MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1224   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",
1225                                      "MatMatMult_MPIAIJ_MPIDense",
1226                                       MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1227   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",
1228                                      "MatMatMultSymbolic_MPIAIJ_MPIDense",
1229                                       MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1230   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",
1231                                      "MatMatMultNumeric_MPIAIJ_MPIDense",
1232                                       MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1233   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1234   PetscFunctionReturn(0);
1235 }
1236 EXTERN_C_END
1237 
1238 /*MC
1239    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1240 
1241    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1242    and MATMPIDENSE otherwise.
1243 
1244    Options Database Keys:
1245 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1246 
1247   Level: beginner
1248 
1249 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1250 M*/
1251 
1252 EXTERN_C_BEGIN
1253 #undef __FUNCT__
1254 #define __FUNCT__ "MatCreate_Dense"
1255 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_Dense(Mat A)
1256 {
1257   PetscErrorCode ierr;
1258   PetscMPIInt    size;
1259 
1260   PetscFunctionBegin;
1261   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1262   if (size == 1) {
1263     ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr);
1264   } else {
1265     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
1266   }
1267   PetscFunctionReturn(0);
1268 }
1269 EXTERN_C_END
1270 
1271 #undef __FUNCT__
1272 #define __FUNCT__ "MatMPIDenseSetPreallocation"
1273 /*@C
1274    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1275 
1276    Not collective
1277 
1278    Input Parameters:
1279 .  A - the matrix
1280 -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
1281    to control all matrix memory allocation.
1282 
1283    Notes:
1284    The dense format is fully compatible with standard Fortran 77
1285    storage by columns.
1286 
1287    The data input variable is intended primarily for Fortran programmers
1288    who wish to allocate their own matrix memory space.  Most users should
1289    set data=PETSC_NULL.
1290 
1291    Level: intermediate
1292 
1293 .keywords: matrix,dense, parallel
1294 
1295 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1296 @*/
1297 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data)
1298 {
1299   PetscErrorCode ierr,(*f)(Mat,PetscScalar *);
1300 
1301   PetscFunctionBegin;
1302   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1303   if (f) {
1304     ierr = (*f)(mat,data);CHKERRQ(ierr);
1305   }
1306   PetscFunctionReturn(0);
1307 }
1308 
1309 #undef __FUNCT__
1310 #define __FUNCT__ "MatCreateMPIDense"
1311 /*@C
1312    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
1313 
1314    Collective on MPI_Comm
1315 
1316    Input Parameters:
1317 +  comm - MPI communicator
1318 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1319 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1320 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1321 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1322 -  data - optional location of matrix data.  Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1323    to control all matrix memory allocation.
1324 
1325    Output Parameter:
1326 .  A - the matrix
1327 
1328    Notes:
1329    The dense format is fully compatible with standard Fortran 77
1330    storage by columns.
1331 
1332    The data input variable is intended primarily for Fortran programmers
1333    who wish to allocate their own matrix memory space.  Most users should
1334    set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users).
1335 
1336    The user MUST specify either the local or global matrix dimensions
1337    (possibly both).
1338 
1339    Level: intermediate
1340 
1341 .keywords: matrix,dense, parallel
1342 
1343 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1344 @*/
1345 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1346 {
1347   PetscErrorCode ierr;
1348   PetscMPIInt    size;
1349 
1350   PetscFunctionBegin;
1351   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1352   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1353   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1354   if (size > 1) {
1355     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1356     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1357   } else {
1358     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1359     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1360   }
1361   PetscFunctionReturn(0);
1362 }
1363 
1364 #undef __FUNCT__
1365 #define __FUNCT__ "MatDuplicate_MPIDense"
1366 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1367 {
1368   Mat            mat;
1369   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
1370   PetscErrorCode ierr;
1371 
1372   PetscFunctionBegin;
1373   *newmat       = 0;
1374   ierr = MatCreate(A->comm,&mat);CHKERRQ(ierr);
1375   ierr = MatSetSizes(mat,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N);CHKERRQ(ierr);
1376   ierr = MatSetType(mat,A->type_name);CHKERRQ(ierr);
1377   a                 = (Mat_MPIDense*)mat->data;
1378   ierr              = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1379   mat->factor       = A->factor;
1380   mat->assembled    = PETSC_TRUE;
1381   mat->preallocated = PETSC_TRUE;
1382 
1383   mat->rmap.rstart     = A->rmap.rstart;
1384   mat->rmap.rend       = A->rmap.rend;
1385   a->size              = oldmat->size;
1386   a->rank              = oldmat->rank;
1387   mat->insertmode      = NOT_SET_VALUES;
1388   a->nvec              = oldmat->nvec;
1389   a->donotstash        = oldmat->donotstash;
1390 
1391   ierr = PetscMemcpy(mat->rmap.range,A->rmap.range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
1392   ierr = PetscMemcpy(mat->cmap.range,A->cmap.range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
1393   ierr = MatStashCreate_Private(A->comm,1,&mat->stash);CHKERRQ(ierr);
1394 
1395   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1396   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1397   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1398   *newmat = mat;
1399   PetscFunctionReturn(0);
1400 }
1401 
1402 #include "petscsys.h"
1403 
1404 #undef __FUNCT__
1405 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile"
1406 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N, MatType type,Mat *newmat)
1407 {
1408   PetscErrorCode ierr;
1409   PetscMPIInt    rank,size;
1410   PetscInt       *rowners,i,m,nz,j;
1411   PetscScalar    *array,*vals,*vals_ptr;
1412   MPI_Status     status;
1413 
1414   PetscFunctionBegin;
1415   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1416   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1417 
1418   /* determine ownership of all rows */
1419   m          = M/size + ((M % size) > rank);
1420   ierr       = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1421   ierr       = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
1422   rowners[0] = 0;
1423   for (i=2; i<=size; i++) {
1424     rowners[i] += rowners[i-1];
1425   }
1426 
1427   ierr = MatCreate(comm,newmat);CHKERRQ(ierr);
1428   ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1429   ierr = MatSetType(*newmat,type);CHKERRQ(ierr);
1430   ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr);
1431   ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr);
1432 
1433   if (!rank) {
1434     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1435 
1436     /* read in my part of the matrix numerical values  */
1437     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1438 
1439     /* insert into matrix-by row (this is why cannot directly read into array */
1440     vals_ptr = vals;
1441     for (i=0; i<m; i++) {
1442       for (j=0; j<N; j++) {
1443         array[i + j*m] = *vals_ptr++;
1444       }
1445     }
1446 
1447     /* read in other processors and ship out */
1448     for (i=1; i<size; i++) {
1449       nz   = (rowners[i+1] - rowners[i])*N;
1450       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1451       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);CHKERRQ(ierr);
1452     }
1453   } else {
1454     /* receive numeric values */
1455     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1456 
1457     /* receive message of values*/
1458     ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);CHKERRQ(ierr);
1459 
1460     /* insert into matrix-by row (this is why cannot directly read into array */
1461     vals_ptr = vals;
1462     for (i=0; i<m; i++) {
1463       for (j=0; j<N; j++) {
1464         array[i + j*m] = *vals_ptr++;
1465       }
1466     }
1467   }
1468   ierr = PetscFree(rowners);CHKERRQ(ierr);
1469   ierr = PetscFree(vals);CHKERRQ(ierr);
1470   ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1471   ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1472   PetscFunctionReturn(0);
1473 }
1474 
1475 #undef __FUNCT__
1476 #define __FUNCT__ "MatLoad_MPIDense"
1477 PetscErrorCode MatLoad_MPIDense(PetscViewer viewer, MatType type,Mat *newmat)
1478 {
1479   Mat            A;
1480   PetscScalar    *vals,*svals;
1481   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1482   MPI_Status     status;
1483   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz;
1484   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1485   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1486   PetscInt       i,nz,j,rstart,rend;
1487   int            fd;
1488   PetscErrorCode ierr;
1489 
1490   PetscFunctionBegin;
1491   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1492   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1493   if (!rank) {
1494     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1495     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1496     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1497   }
1498 
1499   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1500   M = header[1]; N = header[2]; nz = header[3];
1501 
1502   /*
1503        Handle case where matrix is stored on disk as a dense matrix
1504   */
1505   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1506     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,type,newmat);CHKERRQ(ierr);
1507     PetscFunctionReturn(0);
1508   }
1509 
1510   /* determine ownership of all rows */
1511   m          = M/size + ((M % size) > rank);
1512   ierr       = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1513   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1514   rowners[0] = 0;
1515   for (i=2; i<=size; i++) {
1516     rowners[i] += rowners[i-1];
1517   }
1518   rstart = rowners[rank];
1519   rend   = rowners[rank+1];
1520 
1521   /* distribute row lengths to all processors */
1522   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&ourlens);CHKERRQ(ierr);
1523   offlens = ourlens + (rend-rstart);
1524   if (!rank) {
1525     ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
1526     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1527     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
1528     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1529     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1530     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1531   } else {
1532     ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1533   }
1534 
1535   if (!rank) {
1536     /* calculate the number of nonzeros on each processor */
1537     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
1538     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1539     for (i=0; i<size; i++) {
1540       for (j=rowners[i]; j< rowners[i+1]; j++) {
1541         procsnz[i] += rowlengths[j];
1542       }
1543     }
1544     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1545 
1546     /* determine max buffer needed and allocate it */
1547     maxnz = 0;
1548     for (i=0; i<size; i++) {
1549       maxnz = PetscMax(maxnz,procsnz[i]);
1550     }
1551     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1552 
1553     /* read in my part of the matrix column indices  */
1554     nz = procsnz[0];
1555     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1556     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1557 
1558     /* read in every one elses and ship off */
1559     for (i=1; i<size; i++) {
1560       nz   = procsnz[i];
1561       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1562       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1563     }
1564     ierr = PetscFree(cols);CHKERRQ(ierr);
1565   } else {
1566     /* determine buffer space needed for message */
1567     nz = 0;
1568     for (i=0; i<m; i++) {
1569       nz += ourlens[i];
1570     }
1571     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1572 
1573     /* receive message of column indices*/
1574     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1575     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1576     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1577   }
1578 
1579   /* loop over local rows, determining number of off diagonal entries */
1580   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
1581   jj = 0;
1582   for (i=0; i<m; i++) {
1583     for (j=0; j<ourlens[i]; j++) {
1584       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1585       jj++;
1586     }
1587   }
1588 
1589   /* create our matrix */
1590   for (i=0; i<m; i++) {
1591     ourlens[i] -= offlens[i];
1592   }
1593   ierr = MatCreate(comm,newmat);CHKERRQ(ierr);
1594   ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1595   ierr = MatSetType(*newmat,type);CHKERRQ(ierr);
1596   ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr);
1597   A = *newmat;
1598   for (i=0; i<m; i++) {
1599     ourlens[i] += offlens[i];
1600   }
1601 
1602   if (!rank) {
1603     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1604 
1605     /* read in my part of the matrix numerical values  */
1606     nz = procsnz[0];
1607     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1608 
1609     /* insert into matrix */
1610     jj      = rstart;
1611     smycols = mycols;
1612     svals   = vals;
1613     for (i=0; i<m; i++) {
1614       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1615       smycols += ourlens[i];
1616       svals   += ourlens[i];
1617       jj++;
1618     }
1619 
1620     /* read in other processors and ship out */
1621     for (i=1; i<size; i++) {
1622       nz   = procsnz[i];
1623       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1624       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1625     }
1626     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1627   } else {
1628     /* receive numeric values */
1629     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1630 
1631     /* receive message of values*/
1632     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1633     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1634     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1635 
1636     /* insert into matrix */
1637     jj      = rstart;
1638     smycols = mycols;
1639     svals   = vals;
1640     for (i=0; i<m; i++) {
1641       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1642       smycols += ourlens[i];
1643       svals   += ourlens[i];
1644       jj++;
1645     }
1646   }
1647   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1648   ierr = PetscFree(vals);CHKERRQ(ierr);
1649   ierr = PetscFree(mycols);CHKERRQ(ierr);
1650   ierr = PetscFree(rowners);CHKERRQ(ierr);
1651 
1652   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1653   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1654   PetscFunctionReturn(0);
1655 }
1656 
1657 #undef __FUNCT__
1658 #define __FUNCT__ "MatEqual_MPIDense"
1659 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscTruth *flag)
1660 {
1661   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
1662   Mat            a,b;
1663   PetscTruth     flg;
1664   PetscErrorCode ierr;
1665 
1666   PetscFunctionBegin;
1667   a = matA->A;
1668   b = matB->A;
1669   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
1670   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1671   PetscFunctionReturn(0);
1672 }
1673 
1674 
1675 
1676