xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 7842634f46ad2ee292fa135afd07ef040c80768f)
1 #define PETSCMAT_DLL
2 
3 /*
4    Basic functions for basic parallel dense matrices.
5 */
6 
7 
8 #include "../src/mat/impls/dense/mpi/mpidense.h"    /*I   "petscmat.h"  I*/
9 #if defined(PETSC_HAVE_PLAPACK)
10 static PetscMPIInt Plapack_nprows,Plapack_npcols,Plapack_ierror,Plapack_nb_alg;
11 static MPI_Comm Plapack_comm_2d;
12 EXTERN_C_BEGIN
13 #include "PLA.h"
14 EXTERN_C_END
15 
16 typedef struct {
17   PLA_Obj        A,pivots;
18   PLA_Template   templ;
19   MPI_Datatype   datatype;
20   PetscInt       nb,rstart;
21   VecScatter     ctx;
22   IS             is_pla,is_petsc;
23   PetscTruth     pla_solved;
24   MatStructure   mstruct;
25 } Mat_Plapack;
26 #endif
27 
28 #undef __FUNCT__
29 #define __FUNCT__ "MatDenseGetLocalMatrix"
30 /*@
31 
32       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
33               matrix that represents the operator. For sequential matrices it returns itself.
34 
35     Input Parameter:
36 .      A - the Seq or MPI dense matrix
37 
38     Output Parameter:
39 .      B - the inner matrix
40 
41     Level: intermediate
42 
43 @*/
44 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
45 {
46   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
47   PetscErrorCode ierr;
48   PetscTruth     flg;
49 
50   PetscFunctionBegin;
51   ierr = PetscTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
52   if (flg) {
53     *B = mat->A;
54   } else {
55     *B = A;
56   }
57   PetscFunctionReturn(0);
58 }
59 
60 #undef __FUNCT__
61 #define __FUNCT__ "MatGetRow_MPIDense"
62 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
63 {
64   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
65   PetscErrorCode ierr;
66   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
67 
68   PetscFunctionBegin;
69   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
70   lrow = row - rstart;
71   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr);
72   PetscFunctionReturn(0);
73 }
74 
75 #undef __FUNCT__
76 #define __FUNCT__ "MatRestoreRow_MPIDense"
77 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
78 {
79   PetscErrorCode ierr;
80 
81   PetscFunctionBegin;
82   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
83   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
84   PetscFunctionReturn(0);
85 }
86 
87 EXTERN_C_BEGIN
88 #undef __FUNCT__
89 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense"
90 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B)
91 {
92   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
93   PetscErrorCode ierr;
94   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
95   PetscScalar    *array;
96   MPI_Comm       comm;
97 
98   PetscFunctionBegin;
99   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
100 
101   /* The reuse aspect is not implemented efficiently */
102   if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);}
103 
104   ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
105   ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr);
106   ierr = MatCreate(comm,B);CHKERRQ(ierr);
107   ierr = MatSetSizes(*B,m,m,m,m);CHKERRQ(ierr);
108   ierr = MatSetType(*B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
109   ierr = MatSeqDenseSetPreallocation(*B,array+m*rstart);CHKERRQ(ierr);
110   ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr);
111   ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
112   ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
113 
114   *iscopy = PETSC_TRUE;
115   PetscFunctionReturn(0);
116 }
117 EXTERN_C_END
118 
119 #undef __FUNCT__
120 #define __FUNCT__ "MatSetValues_MPIDense"
121 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
122 {
123   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
124   PetscErrorCode ierr;
125   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
126   PetscTruth     roworiented = A->roworiented;
127 
128   PetscFunctionBegin;
129   if (v) PetscValidScalarPointer(v,6);
130   for (i=0; i<m; i++) {
131     if (idxm[i] < 0) continue;
132     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
133     if (idxm[i] >= rstart && idxm[i] < rend) {
134       row = idxm[i] - rstart;
135       if (roworiented) {
136         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
137       } else {
138         for (j=0; j<n; j++) {
139           if (idxn[j] < 0) continue;
140           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
141           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
142         }
143       }
144     } else {
145       if (!A->donotstash) {
146         if (roworiented) {
147           ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
148         } else {
149           ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
150         }
151       }
152     }
153   }
154   PetscFunctionReturn(0);
155 }
156 
157 #undef __FUNCT__
158 #define __FUNCT__ "MatGetValues_MPIDense"
159 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
160 {
161   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
162   PetscErrorCode ierr;
163   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
164 
165   PetscFunctionBegin;
166   for (i=0; i<m; i++) {
167     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
168     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
169     if (idxm[i] >= rstart && idxm[i] < rend) {
170       row = idxm[i] - rstart;
171       for (j=0; j<n; j++) {
172         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
173         if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
174         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
175       }
176     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
177   }
178   PetscFunctionReturn(0);
179 }
180 
181 #undef __FUNCT__
182 #define __FUNCT__ "MatGetArray_MPIDense"
183 PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[])
184 {
185   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
186   PetscErrorCode ierr;
187 
188   PetscFunctionBegin;
189   ierr = MatGetArray(a->A,array);CHKERRQ(ierr);
190   PetscFunctionReturn(0);
191 }
192 
193 #undef __FUNCT__
194 #define __FUNCT__ "MatGetSubMatrix_MPIDense"
195 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
196 {
197   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data,*newmatd;
198   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
199   PetscErrorCode ierr;
200   PetscInt       i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
201   const PetscInt *irow,*icol;
202   PetscScalar    *av,*bv,*v = lmat->v;
203   Mat            newmat;
204   IS             iscol_local;
205 
206   PetscFunctionBegin;
207   ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
208   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
209   ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr);
210   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
211   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
212   ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */
213 
214   /* No parallel redistribution currently supported! Should really check each index set
215      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
216      original matrix! */
217 
218   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
219   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
220 
221   /* Check submatrix call */
222   if (scall == MAT_REUSE_MATRIX) {
223     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
224     /* Really need to test rows and column sizes! */
225     newmat = *B;
226   } else {
227     /* Create and fill new matrix */
228     ierr = MatCreate(((PetscObject)A)->comm,&newmat);CHKERRQ(ierr);
229     ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
230     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
231     ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr);
232   }
233 
234   /* Now extract the data pointers and do the copy, column at a time */
235   newmatd = (Mat_MPIDense*)newmat->data;
236   bv      = ((Mat_SeqDense *)newmatd->A->data)->v;
237 
238   for (i=0; i<Ncols; i++) {
239     av = v + ((Mat_SeqDense *)mat->A->data)->lda*icol[i];
240     for (j=0; j<nrows; j++) {
241       *bv++ = av[irow[j] - rstart];
242     }
243   }
244 
245   /* Assemble the matrices so that the correct flags are set */
246   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
247   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
248 
249   /* Free work space */
250   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
251   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
252   *B = newmat;
253   PetscFunctionReturn(0);
254 }
255 
256 #undef __FUNCT__
257 #define __FUNCT__ "MatRestoreArray_MPIDense"
258 PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[])
259 {
260   PetscFunctionBegin;
261   PetscFunctionReturn(0);
262 }
263 
264 #undef __FUNCT__
265 #define __FUNCT__ "MatAssemblyBegin_MPIDense"
266 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
267 {
268   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
269   MPI_Comm       comm = ((PetscObject)mat)->comm;
270   PetscErrorCode ierr;
271   PetscInt       nstash,reallocs;
272   InsertMode     addv;
273 
274   PetscFunctionBegin;
275   /* make sure all processors are either in INSERTMODE or ADDMODE */
276   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
277   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
278   mat->insertmode = addv; /* in case this processor had no cache */
279 
280   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
281   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
282   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
283   PetscFunctionReturn(0);
284 }
285 
286 #undef __FUNCT__
287 #define __FUNCT__ "MatAssemblyEnd_MPIDense"
288 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
289 {
290   Mat_MPIDense    *mdn=(Mat_MPIDense*)mat->data;
291   PetscErrorCode  ierr;
292   PetscInt        i,*row,*col,flg,j,rstart,ncols;
293   PetscMPIInt     n;
294   PetscScalar     *val;
295   InsertMode      addv=mat->insertmode;
296 
297   PetscFunctionBegin;
298   /*  wait on receives */
299   while (1) {
300     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
301     if (!flg) break;
302 
303     for (i=0; i<n;) {
304       /* Now identify the consecutive vals belonging to the same row */
305       for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
306       if (j < n) ncols = j-i;
307       else       ncols = n-i;
308       /* Now assemble all these values with a single function call */
309       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
310       i = j;
311     }
312   }
313   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
314 
315   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
316   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
317 
318   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
319     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
320   }
321   PetscFunctionReturn(0);
322 }
323 
324 #undef __FUNCT__
325 #define __FUNCT__ "MatZeroEntries_MPIDense"
326 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
327 {
328   PetscErrorCode ierr;
329   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
330 
331   PetscFunctionBegin;
332   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
333   PetscFunctionReturn(0);
334 }
335 
336 /* the code does not do the diagonal entries correctly unless the
337    matrix is square and the column and row owerships are identical.
338    This is a BUG. The only way to fix it seems to be to access
339    mdn->A and mdn->B directly and not through the MatZeroRows()
340    routine.
341 */
342 #undef __FUNCT__
343 #define __FUNCT__ "MatZeroRows_MPIDense"
344 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
345 {
346   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
347   PetscErrorCode ierr;
348   PetscInt       i,*owners = A->rmap->range;
349   PetscInt       *nprocs,j,idx,nsends;
350   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
351   PetscInt       *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source;
352   PetscInt       *lens,*lrows,*values;
353   PetscMPIInt    n,imdex,rank = l->rank,size = l->size;
354   MPI_Comm       comm = ((PetscObject)A)->comm;
355   MPI_Request    *send_waits,*recv_waits;
356   MPI_Status     recv_status,*send_status;
357   PetscTruth     found;
358 
359   PetscFunctionBegin;
360   /*  first count number of contributors to each processor */
361   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
362   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
363   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
364   for (i=0; i<N; i++) {
365     idx = rows[i];
366     found = PETSC_FALSE;
367     for (j=0; j<size; j++) {
368       if (idx >= owners[j] && idx < owners[j+1]) {
369         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
370       }
371     }
372     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
373   }
374   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
375 
376   /* inform other processors of number of messages and max length*/
377   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
378 
379   /* post receives:   */
380   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
381   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
382   for (i=0; i<nrecvs; i++) {
383     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
384   }
385 
386   /* do sends:
387       1) starts[i] gives the starting index in svalues for stuff going to
388          the ith processor
389   */
390   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
391   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
392   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
393   starts[0]  = 0;
394   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
395   for (i=0; i<N; i++) {
396     svalues[starts[owner[i]]++] = rows[i];
397   }
398 
399   starts[0] = 0;
400   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
401   count = 0;
402   for (i=0; i<size; i++) {
403     if (nprocs[2*i+1]) {
404       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
405     }
406   }
407   ierr = PetscFree(starts);CHKERRQ(ierr);
408 
409   base = owners[rank];
410 
411   /*  wait on receives */
412   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
413   count  = nrecvs;
414   slen   = 0;
415   while (count) {
416     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
417     /* unpack receives into our local space */
418     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
419     source[imdex]  = recv_status.MPI_SOURCE;
420     lens[imdex]    = n;
421     slen += n;
422     count--;
423   }
424   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
425 
426   /* move the data into the send scatter */
427   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
428   count = 0;
429   for (i=0; i<nrecvs; i++) {
430     values = rvalues + i*nmax;
431     for (j=0; j<lens[i]; j++) {
432       lrows[count++] = values[j] - base;
433     }
434   }
435   ierr = PetscFree(rvalues);CHKERRQ(ierr);
436   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
437   ierr = PetscFree(owner);CHKERRQ(ierr);
438   ierr = PetscFree(nprocs);CHKERRQ(ierr);
439 
440   /* actually zap the local rows */
441   ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
442   ierr = PetscFree(lrows);CHKERRQ(ierr);
443 
444   /* wait on sends */
445   if (nsends) {
446     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
447     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
448     ierr = PetscFree(send_status);CHKERRQ(ierr);
449   }
450   ierr = PetscFree(send_waits);CHKERRQ(ierr);
451   ierr = PetscFree(svalues);CHKERRQ(ierr);
452 
453   PetscFunctionReturn(0);
454 }
455 
456 #undef __FUNCT__
457 #define __FUNCT__ "MatMult_MPIDense"
458 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
459 {
460   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
461   PetscErrorCode ierr;
462 
463   PetscFunctionBegin;
464   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
465   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
466   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
467   PetscFunctionReturn(0);
468 }
469 
470 #undef __FUNCT__
471 #define __FUNCT__ "MatMultAdd_MPIDense"
472 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
473 {
474   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
475   PetscErrorCode ierr;
476 
477   PetscFunctionBegin;
478   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
479   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
480   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
481   PetscFunctionReturn(0);
482 }
483 
484 #undef __FUNCT__
485 #define __FUNCT__ "MatMultTranspose_MPIDense"
486 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
487 {
488   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
489   PetscErrorCode ierr;
490   PetscScalar    zero = 0.0;
491 
492   PetscFunctionBegin;
493   ierr = VecSet(yy,zero);CHKERRQ(ierr);
494   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
495   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
496   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
497   PetscFunctionReturn(0);
498 }
499 
500 #undef __FUNCT__
501 #define __FUNCT__ "MatMultTransposeAdd_MPIDense"
502 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
503 {
504   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
505   PetscErrorCode ierr;
506 
507   PetscFunctionBegin;
508   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
509   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
510   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
511   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
512   PetscFunctionReturn(0);
513 }
514 
515 #undef __FUNCT__
516 #define __FUNCT__ "MatGetDiagonal_MPIDense"
517 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
518 {
519   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
520   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
521   PetscErrorCode ierr;
522   PetscInt       len,i,n,m = A->rmap->n,radd;
523   PetscScalar    *x,zero = 0.0;
524 
525   PetscFunctionBegin;
526   ierr = VecSet(v,zero);CHKERRQ(ierr);
527   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
528   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
529   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
530   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
531   radd = A->rmap->rstart*m;
532   for (i=0; i<len; i++) {
533     x[i] = aloc->v[radd + i*m + i];
534   }
535   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
536   PetscFunctionReturn(0);
537 }
538 
539 #undef __FUNCT__
540 #define __FUNCT__ "MatDestroy_MPIDense"
541 PetscErrorCode MatDestroy_MPIDense(Mat mat)
542 {
543   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
544   PetscErrorCode ierr;
545 #if defined(PETSC_HAVE_PLAPACK)
546   Mat_Plapack   *lu=(Mat_Plapack*)(mat->spptr);
547 #endif
548 
549   PetscFunctionBegin;
550 
551 #if defined(PETSC_USE_LOG)
552   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
553 #endif
554   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
555   ierr = MatDestroy(mdn->A);CHKERRQ(ierr);
556   if (mdn->lvec)   {ierr = VecDestroy(mdn->lvec);CHKERRQ(ierr);}
557   if (mdn->Mvctx)  {ierr = VecScatterDestroy(mdn->Mvctx);CHKERRQ(ierr);}
558 #if defined(PETSC_HAVE_PLAPACK)
559   if (lu) {
560     ierr = PLA_Obj_free(&lu->A);CHKERRQ(ierr);
561     ierr = PLA_Obj_free (&lu->pivots);CHKERRQ(ierr);
562     ierr = PLA_Temp_free(&lu->templ);CHKERRQ(ierr);
563 
564     if (lu->is_pla) {
565       ierr = ISDestroy(lu->is_pla);CHKERRQ(ierr);
566       ierr = ISDestroy(lu->is_petsc);CHKERRQ(ierr);
567       ierr = VecScatterDestroy(lu->ctx);CHKERRQ(ierr);
568     }
569   }
570 #endif
571 
572   ierr = PetscFree(mdn);CHKERRQ(ierr);
573   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
574   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
575   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
576   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
577   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
578   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
579   PetscFunctionReturn(0);
580 }
581 
582 #undef __FUNCT__
583 #define __FUNCT__ "MatView_MPIDense_Binary"
584 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
585 {
586   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
587   PetscErrorCode    ierr;
588   PetscViewerFormat format;
589   int               fd;
590   PetscInt          header[4],mmax,N = mat->cmap->N,i,j,m,k;
591   PetscMPIInt       rank,tag  = ((PetscObject)viewer)->tag,size;
592   PetscScalar       *work,*v,*vv;
593   Mat_SeqDense      *a = (Mat_SeqDense*)mdn->A->data;
594   MPI_Status        status;
595 
596   PetscFunctionBegin;
597   if (mdn->size == 1) {
598     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
599   } else {
600     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
601     ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
602     ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
603 
604     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
605     if (format == PETSC_VIEWER_NATIVE) {
606 
607       if (!rank) {
608         /* store the matrix as a dense matrix */
609         header[0] = MAT_FILE_CLASSID;
610         header[1] = mat->rmap->N;
611         header[2] = N;
612         header[3] = MATRIX_BINARY_FORMAT_DENSE;
613         ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
614 
615         /* get largest work array needed for transposing array */
616         mmax = mat->rmap->n;
617         for (i=1; i<size; i++) {
618           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
619         }
620         ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&work);CHKERRQ(ierr);
621 
622         /* write out local array, by rows */
623         m    = mat->rmap->n;
624         v    = a->v;
625         for (j=0; j<N; j++) {
626           for (i=0; i<m; i++) {
627             work[j + i*N] = *v++;
628           }
629         }
630         ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
631         /* get largest work array to receive messages from other processes, excludes process zero */
632         mmax = 0;
633         for (i=1; i<size; i++) {
634           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
635         }
636         ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&vv);CHKERRQ(ierr);
637         for(k = 1; k < size; k++) {
638           v    = vv;
639           m    = mat->rmap->range[k+1] - mat->rmap->range[k];
640           ierr = MPI_Recv(v,m*N,MPIU_SCALAR,k,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
641 
642           for(j = 0; j < N; j++) {
643             for(i = 0; i < m; i++) {
644               work[j + i*N] = *v++;
645             }
646           }
647           ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
648         }
649         ierr = PetscFree(work);CHKERRQ(ierr);
650         ierr = PetscFree(vv);CHKERRQ(ierr);
651       } else {
652         ierr = MPI_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
653       }
654     } else {
655       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerSetFormat(viewer,PETSC_VIEWER_NATIVE)");
656     }
657   }
658   PetscFunctionReturn(0);
659 }
660 
661 #undef __FUNCT__
662 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket"
663 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
664 {
665   Mat_MPIDense          *mdn = (Mat_MPIDense*)mat->data;
666   PetscErrorCode        ierr;
667   PetscMPIInt           size = mdn->size,rank = mdn->rank;
668   const PetscViewerType vtype;
669   PetscTruth            iascii,isdraw;
670   PetscViewer           sviewer;
671   PetscViewerFormat     format;
672 #if defined(PETSC_HAVE_PLAPACK)
673   Mat_Plapack           *lu;
674 #endif
675 
676   PetscFunctionBegin;
677   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
678   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
679   if (iascii) {
680     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
681     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
682     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
683       MatInfo info;
684       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
685       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,
686                    (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
687       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
688 #if defined(PETSC_HAVE_PLAPACK)
689       ierr = PetscViewerASCIIPrintf(viewer,"PLAPACK run parameters:\n");CHKERRQ(ierr);
690       ierr = PetscViewerASCIIPrintf(viewer,"  Processor mesh: nprows %d, npcols %d\n",Plapack_nprows, Plapack_npcols);CHKERRQ(ierr);
691       ierr = PetscViewerASCIIPrintf(viewer,"  Error checking: %d\n",Plapack_ierror);CHKERRQ(ierr);
692       ierr = PetscViewerASCIIPrintf(viewer,"  Algorithmic block size: %d\n",Plapack_nb_alg);CHKERRQ(ierr);
693       if (mat->factortype){
694         lu=(Mat_Plapack*)(mat->spptr);
695         ierr = PetscViewerASCIIPrintf(viewer,"  Distr. block size nb: %d \n",lu->nb);CHKERRQ(ierr);
696       }
697 #else
698       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
699 #endif
700       PetscFunctionReturn(0);
701     } else if (format == PETSC_VIEWER_ASCII_INFO) {
702       PetscFunctionReturn(0);
703     }
704   } else if (isdraw) {
705     PetscDraw  draw;
706     PetscTruth isnull;
707 
708     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
709     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
710     if (isnull) PetscFunctionReturn(0);
711   }
712 
713   if (size == 1) {
714     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
715   } else {
716     /* assemble the entire matrix onto first processor. */
717     Mat         A;
718     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
719     PetscInt    *cols;
720     PetscScalar *vals;
721 
722     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
723     if (!rank) {
724       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
725     } else {
726       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
727     }
728     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
729     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
730     ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL);
731     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
732 
733     /* Copy the matrix ... This isn't the most efficient means,
734        but it's quick for now */
735     A->insertmode = INSERT_VALUES;
736     row = mat->rmap->rstart; m = mdn->A->rmap->n;
737     for (i=0; i<m; i++) {
738       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
739       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
740       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
741       row++;
742     }
743 
744     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
745     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
746     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
747     if (!rank) {
748       ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
749     }
750     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
751     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
752     ierr = MatDestroy(A);CHKERRQ(ierr);
753   }
754   PetscFunctionReturn(0);
755 }
756 
757 #undef __FUNCT__
758 #define __FUNCT__ "MatView_MPIDense"
759 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
760 {
761   PetscErrorCode ierr;
762   PetscTruth     iascii,isbinary,isdraw,issocket;
763 
764   PetscFunctionBegin;
765 
766   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
767   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
768   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
769   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
770 
771   if (iascii || issocket || isdraw) {
772     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
773   } else if (isbinary) {
774     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
775   } else {
776     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name);
777   }
778   PetscFunctionReturn(0);
779 }
780 
781 #undef __FUNCT__
782 #define __FUNCT__ "MatGetInfo_MPIDense"
783 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
784 {
785   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
786   Mat            mdn = mat->A;
787   PetscErrorCode ierr;
788   PetscReal      isend[5],irecv[5];
789 
790   PetscFunctionBegin;
791   info->block_size     = 1.0;
792   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
793   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
794   isend[3] = info->memory;  isend[4] = info->mallocs;
795   if (flag == MAT_LOCAL) {
796     info->nz_used      = isend[0];
797     info->nz_allocated = isend[1];
798     info->nz_unneeded  = isend[2];
799     info->memory       = isend[3];
800     info->mallocs      = isend[4];
801   } else if (flag == MAT_GLOBAL_MAX) {
802     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr);
803     info->nz_used      = irecv[0];
804     info->nz_allocated = irecv[1];
805     info->nz_unneeded  = irecv[2];
806     info->memory       = irecv[3];
807     info->mallocs      = irecv[4];
808   } else if (flag == MAT_GLOBAL_SUM) {
809     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
810     info->nz_used      = irecv[0];
811     info->nz_allocated = irecv[1];
812     info->nz_unneeded  = irecv[2];
813     info->memory       = irecv[3];
814     info->mallocs      = irecv[4];
815   }
816   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
817   info->fill_ratio_needed = 0;
818   info->factor_mallocs    = 0;
819   PetscFunctionReturn(0);
820 }
821 
822 #undef __FUNCT__
823 #define __FUNCT__ "MatSetOption_MPIDense"
824 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscTruth flg)
825 {
826   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
827   PetscErrorCode ierr;
828 
829   PetscFunctionBegin;
830   switch (op) {
831   case MAT_NEW_NONZERO_LOCATIONS:
832   case MAT_NEW_NONZERO_LOCATION_ERR:
833   case MAT_NEW_NONZERO_ALLOCATION_ERR:
834     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
835     break;
836   case MAT_ROW_ORIENTED:
837     a->roworiented = flg;
838     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
839     break;
840   case MAT_NEW_DIAGONALS:
841   case MAT_USE_HASH_TABLE:
842     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
843     break;
844   case MAT_IGNORE_OFF_PROC_ENTRIES:
845     a->donotstash = flg;
846     break;
847   case MAT_SYMMETRIC:
848   case MAT_STRUCTURALLY_SYMMETRIC:
849   case MAT_HERMITIAN:
850   case MAT_SYMMETRY_ETERNAL:
851   case MAT_IGNORE_LOWER_TRIANGULAR:
852     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
853     break;
854   default:
855     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
856   }
857   PetscFunctionReturn(0);
858 }
859 
860 
861 #undef __FUNCT__
862 #define __FUNCT__ "MatDiagonalScale_MPIDense"
863 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
864 {
865   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
866   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
867   PetscScalar    *l,*r,x,*v;
868   PetscErrorCode ierr;
869   PetscInt       i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n;
870 
871   PetscFunctionBegin;
872   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
873   if (ll) {
874     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
875     if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
876     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
877     for (i=0; i<m; i++) {
878       x = l[i];
879       v = mat->v + i;
880       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
881     }
882     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
883     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
884   }
885   if (rr) {
886     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
887     if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
888     ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
889     ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
890     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
891     for (i=0; i<n; i++) {
892       x = r[i];
893       v = mat->v + i*m;
894       for (j=0; j<m; j++) { (*v++) *= x;}
895     }
896     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
897     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
898   }
899   PetscFunctionReturn(0);
900 }
901 
902 #undef __FUNCT__
903 #define __FUNCT__ "MatNorm_MPIDense"
904 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
905 {
906   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
907   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
908   PetscErrorCode ierr;
909   PetscInt       i,j;
910   PetscReal      sum = 0.0;
911   PetscScalar    *v = mat->v;
912 
913   PetscFunctionBegin;
914   if (mdn->size == 1) {
915     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
916   } else {
917     if (type == NORM_FROBENIUS) {
918       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
919 #if defined(PETSC_USE_COMPLEX)
920         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
921 #else
922         sum += (*v)*(*v); v++;
923 #endif
924       }
925       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
926       *nrm = sqrt(*nrm);
927       ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr);
928     } else if (type == NORM_1) {
929       PetscReal *tmp,*tmp2;
930       ierr = PetscMalloc2(A->cmap->N,PetscReal,&tmp,A->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr);
931       ierr = PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
932       ierr = PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
933       *nrm = 0.0;
934       v = mat->v;
935       for (j=0; j<mdn->A->cmap->n; j++) {
936         for (i=0; i<mdn->A->rmap->n; i++) {
937           tmp[j] += PetscAbsScalar(*v);  v++;
938         }
939       }
940       ierr = MPI_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
941       for (j=0; j<A->cmap->N; j++) {
942         if (tmp2[j] > *nrm) *nrm = tmp2[j];
943       }
944       ierr = PetscFree2(tmp,tmp);CHKERRQ(ierr);
945       ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr);
946     } else if (type == NORM_INFINITY) { /* max row norm */
947       PetscReal ntemp;
948       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
949       ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr);
950     } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"No support for two norm");
951   }
952   PetscFunctionReturn(0);
953 }
954 
955 #undef __FUNCT__
956 #define __FUNCT__ "MatTranspose_MPIDense"
957 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
958 {
959   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
960   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
961   Mat            B;
962   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
963   PetscErrorCode ierr;
964   PetscInt       j,i;
965   PetscScalar    *v;
966 
967   PetscFunctionBegin;
968   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports square matrix only in-place");
969   if (reuse == MAT_INITIAL_MATRIX || A == *matout) {
970     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
971     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
972     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
973     ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr);
974   } else {
975     B = *matout;
976   }
977 
978   m = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v;
979   ierr = PetscMalloc(m*sizeof(PetscInt),&rwork);CHKERRQ(ierr);
980   for (i=0; i<m; i++) rwork[i] = rstart + i;
981   for (j=0; j<n; j++) {
982     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
983     v   += m;
984   }
985   ierr = PetscFree(rwork);CHKERRQ(ierr);
986   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
987   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
988   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
989     *matout = B;
990   } else {
991     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
992   }
993   PetscFunctionReturn(0);
994 }
995 
996 
997 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
998 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
999 
1000 #undef __FUNCT__
1001 #define __FUNCT__ "MatSetUpPreallocation_MPIDense"
1002 PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A)
1003 {
1004   PetscErrorCode ierr;
1005 
1006   PetscFunctionBegin;
1007   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
1008   PetscFunctionReturn(0);
1009 }
1010 
1011 #if defined(PETSC_HAVE_PLAPACK)
1012 
1013 #undef __FUNCT__
1014 #define __FUNCT__ "MatMPIDenseCopyToPlapack"
1015 PetscErrorCode MatMPIDenseCopyToPlapack(Mat A,Mat F)
1016 {
1017   Mat_Plapack    *lu = (Mat_Plapack*)(F)->spptr;
1018   PetscErrorCode ierr;
1019   PetscInt       M=A->cmap->N,m=A->rmap->n,rstart;
1020   PetscScalar    *array;
1021   PetscReal      one = 1.0;
1022 
1023   PetscFunctionBegin;
1024   /* Copy A into F->lu->A */
1025   ierr = PLA_Obj_set_to_zero(lu->A);CHKERRQ(ierr);
1026   ierr = PLA_API_begin();CHKERRQ(ierr);
1027   ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr);
1028   ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr);
1029   ierr = MatGetArray(A,&array);CHKERRQ(ierr);
1030   ierr = PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);CHKERRQ(ierr);
1031   ierr = MatRestoreArray(A,&array);CHKERRQ(ierr);
1032   ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr);
1033   ierr = PLA_API_end();CHKERRQ(ierr);
1034   lu->rstart = rstart;
1035   PetscFunctionReturn(0);
1036 }
1037 
1038 #undef __FUNCT__
1039 #define __FUNCT__ "MatMPIDenseCopyFromPlapack"
1040 PetscErrorCode MatMPIDenseCopyFromPlapack(Mat F,Mat A)
1041 {
1042   Mat_Plapack    *lu = (Mat_Plapack*)(F)->spptr;
1043   PetscErrorCode ierr;
1044   PetscInt       M=A->cmap->N,m=A->rmap->n,rstart;
1045   PetscScalar    *array;
1046   PetscReal      one = 1.0;
1047 
1048   PetscFunctionBegin;
1049   /* Copy F into A->lu->A */
1050   ierr = MatZeroEntries(A);CHKERRQ(ierr);
1051   ierr = PLA_API_begin();CHKERRQ(ierr);
1052   ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr);
1053   ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr);
1054   ierr = MatGetArray(A,&array);CHKERRQ(ierr);
1055   ierr = PLA_API_axpy_global_to_matrix(m,M, &one,lu->A,rstart,0,(void *)array,m);CHKERRQ(ierr);
1056   ierr = MatRestoreArray(A,&array);CHKERRQ(ierr);
1057   ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr);
1058   ierr = PLA_API_end();CHKERRQ(ierr);
1059   lu->rstart = rstart;
1060   PetscFunctionReturn(0);
1061 }
1062 
1063 #undef __FUNCT__
1064 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIDense"
1065 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1066 {
1067   PetscErrorCode ierr;
1068   Mat_Plapack    *luA = (Mat_Plapack*)A->spptr;
1069   Mat_Plapack    *luB = (Mat_Plapack*)B->spptr;
1070   Mat_Plapack    *luC = (Mat_Plapack*)C->spptr;
1071   PLA_Obj        alpha = NULL,beta = NULL;
1072 
1073   PetscFunctionBegin;
1074   ierr = MatMPIDenseCopyToPlapack(A,A);CHKERRQ(ierr);
1075   ierr = MatMPIDenseCopyToPlapack(B,B);CHKERRQ(ierr);
1076 
1077   /*
1078   ierr = PLA_Global_show("A = ",luA->A,"%g ","");CHKERRQ(ierr);
1079   ierr = PLA_Global_show("B = ",luB->A,"%g ","");CHKERRQ(ierr);
1080   */
1081 
1082   /* do the multiply in PLA  */
1083   ierr = PLA_Create_constants_conf_to(luA->A,NULL,NULL,&alpha);CHKERRQ(ierr);
1084   ierr = PLA_Create_constants_conf_to(luC->A,NULL,&beta,NULL);CHKERRQ(ierr);
1085   CHKMEMQ;
1086 
1087   ierr = PLA_Gemm(PLA_NO_TRANSPOSE,PLA_NO_TRANSPOSE,alpha,luA->A,luB->A,beta,luC->A); /* CHKERRQ(ierr); */
1088   CHKMEMQ;
1089   ierr = PLA_Obj_free(&alpha);CHKERRQ(ierr);
1090   ierr = PLA_Obj_free(&beta);CHKERRQ(ierr);
1091 
1092   /*
1093   ierr = PLA_Global_show("C = ",luC->A,"%g ","");CHKERRQ(ierr);
1094   */
1095   ierr = MatMPIDenseCopyFromPlapack(C,C);CHKERRQ(ierr);
1096   PetscFunctionReturn(0);
1097 }
1098 
1099 #undef __FUNCT__
1100 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIDense"
1101 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1102 {
1103   PetscErrorCode ierr;
1104   PetscInt       m=A->rmap->n,n=B->cmap->n;
1105   Mat            Cmat;
1106 
1107   PetscFunctionBegin;
1108   if (A->cmap->n != B->rmap->n) SETERRQ2(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
1109   SETERRQ(((PetscObject)A)->comm,PETSC_ERR_LIB,"Due to apparent bugs in PLAPACK,this is not currently supported");
1110   ierr = MatCreate(((PetscObject)B)->comm,&Cmat);CHKERRQ(ierr);
1111   ierr = MatSetSizes(Cmat,m,n,A->rmap->N,B->cmap->N);CHKERRQ(ierr);
1112   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
1113   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1114   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1115 
1116   *C = Cmat;
1117   PetscFunctionReturn(0);
1118 }
1119 
1120 #undef __FUNCT__
1121 #define __FUNCT__ "MatMatMult_MPIDense_MPIDense"
1122 PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
1123 {
1124   PetscErrorCode ierr;
1125 
1126   PetscFunctionBegin;
1127   if (scall == MAT_INITIAL_MATRIX){
1128     ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr);
1129   }
1130   ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr);
1131   PetscFunctionReturn(0);
1132 }
1133 
1134 #undef __FUNCT__
1135 #define __FUNCT__ "MatSolve_MPIDense"
1136 PetscErrorCode MatSolve_MPIDense(Mat A,Vec b,Vec x)
1137 {
1138   MPI_Comm       comm = ((PetscObject)A)->comm;
1139   Mat_Plapack    *lu = (Mat_Plapack*)A->spptr;
1140   PetscErrorCode ierr;
1141   PetscInt       M=A->rmap->N,m=A->rmap->n,rstart,i,j,*idx_pla,*idx_petsc,loc_m,loc_stride;
1142   PetscScalar    *array;
1143   PetscReal      one = 1.0;
1144   PetscMPIInt    size,rank,r_rank,r_nproc,c_rank,c_nproc;;
1145   PLA_Obj        v_pla = NULL;
1146   PetscScalar    *loc_buf;
1147   Vec            loc_x;
1148 
1149   PetscFunctionBegin;
1150   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1151   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1152 
1153   /* Create PLAPACK vector objects, then copy b into PLAPACK b */
1154   PLA_Mvector_create(lu->datatype,M,1,lu->templ,PLA_ALIGN_FIRST,&v_pla);
1155   PLA_Obj_set_to_zero(v_pla);
1156 
1157   /* Copy b into rhs_pla */
1158   PLA_API_begin();
1159   PLA_Obj_API_open(v_pla);
1160   ierr = VecGetArray(b,&array);CHKERRQ(ierr);
1161   PLA_API_axpy_vector_to_global(m,&one,(void *)array,1,v_pla,lu->rstart);
1162   ierr = VecRestoreArray(b,&array);CHKERRQ(ierr);
1163   PLA_Obj_API_close(v_pla);
1164   PLA_API_end();
1165 
1166   if (A->factortype == MAT_FACTOR_LU){
1167     /* Apply the permutations to the right hand sides */
1168     PLA_Apply_pivots_to_rows (v_pla,lu->pivots);
1169 
1170     /* Solve L y = b, overwriting b with y */
1171     PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_UNIT_DIAG,lu->A,v_pla );
1172 
1173     /* Solve U x = y (=b), overwriting b with x */
1174     PLA_Trsv( PLA_UPPER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla );
1175   } else { /* MAT_FACTOR_CHOLESKY */
1176     PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla);
1177     PLA_Trsv( PLA_LOWER_TRIANGULAR,(lu->datatype == MPI_DOUBLE ? PLA_TRANSPOSE : PLA_CONJUGATE_TRANSPOSE),
1178                                     PLA_NONUNIT_DIAG,lu->A,v_pla);
1179   }
1180 
1181   /* Copy PLAPACK x into Petsc vector x  */
1182   PLA_Obj_local_length(v_pla, &loc_m);
1183   PLA_Obj_local_buffer(v_pla, (void**)&loc_buf);
1184   PLA_Obj_local_stride(v_pla, &loc_stride);
1185   /*
1186     PetscPrintf(PETSC_COMM_SELF," [%d] b - local_m %d local_stride %d, loc_buf: %g %g, nb: %d\n",rank,loc_m,loc_stride,loc_buf[0],loc_buf[(loc_m-1)*loc_stride],lu->nb);
1187   */
1188   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,loc_m*loc_stride,loc_buf,&loc_x);CHKERRQ(ierr);
1189   if (!lu->pla_solved){
1190 
1191     PLA_Temp_comm_row_info(lu->templ,&Plapack_comm_2d,&r_rank,&r_nproc);
1192     PLA_Temp_comm_col_info(lu->templ,&Plapack_comm_2d,&c_rank,&c_nproc);
1193 
1194     /* Create IS and cts for VecScatterring */
1195     PLA_Obj_local_length(v_pla, &loc_m);
1196     PLA_Obj_local_stride(v_pla, &loc_stride);
1197     ierr = PetscMalloc2(loc_m,PetscInt,&idx_pla,loc_m,PetscInt,&idx_petsc);CHKERRQ(ierr);
1198 
1199     rstart = (r_rank*c_nproc+c_rank)*lu->nb;
1200     for (i=0; i<loc_m; i+=lu->nb){
1201       j = 0;
1202       while (j < lu->nb && i+j < loc_m){
1203         idx_petsc[i+j] = rstart + j; j++;
1204       }
1205       rstart += size*lu->nb;
1206     }
1207 
1208     for (i=0; i<loc_m; i++) idx_pla[i] = i*loc_stride;
1209 
1210     ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_pla,&lu->is_pla);CHKERRQ(ierr);
1211     ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_petsc,&lu->is_petsc);CHKERRQ(ierr);
1212     ierr = PetscFree2(idx_pla,idx_petsc);CHKERRQ(ierr);
1213     ierr = VecScatterCreate(loc_x,lu->is_pla,x,lu->is_petsc,&lu->ctx);CHKERRQ(ierr);
1214   }
1215   ierr = VecScatterBegin(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1216   ierr = VecScatterEnd(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1217 
1218   /* Free data */
1219   ierr = VecDestroy(loc_x);CHKERRQ(ierr);
1220   PLA_Obj_free(&v_pla);
1221 
1222   lu->pla_solved = PETSC_TRUE;
1223   PetscFunctionReturn(0);
1224 }
1225 
1226 #undef __FUNCT__
1227 #define __FUNCT__ "MatLUFactorNumeric_MPIDense"
1228 PetscErrorCode MatLUFactorNumeric_MPIDense(Mat F,Mat A,const MatFactorInfo *info)
1229 {
1230   Mat_Plapack    *lu = (Mat_Plapack*)(F)->spptr;
1231   PetscErrorCode ierr;
1232   PetscInt       M=A->rmap->N,m=A->rmap->n,rstart,rend;
1233   PetscInt       info_pla=0;
1234   PetscScalar    *array,one = 1.0;
1235 
1236   PetscFunctionBegin;
1237   if (lu->mstruct == SAME_NONZERO_PATTERN){
1238     PLA_Obj_free(&lu->A);
1239     PLA_Obj_free (&lu->pivots);
1240   }
1241   /* Create PLAPACK matrix object */
1242   lu->A = NULL; lu->pivots = NULL;
1243   PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);
1244   PLA_Obj_set_to_zero(lu->A);
1245   PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots);
1246 
1247   /* Copy A into lu->A */
1248   PLA_API_begin();
1249   PLA_Obj_API_open(lu->A);
1250   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
1251   ierr = MatGetArray(A,&array);CHKERRQ(ierr);
1252   PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);
1253   ierr = MatRestoreArray(A,&array);CHKERRQ(ierr);
1254   PLA_Obj_API_close(lu->A);
1255   PLA_API_end();
1256 
1257   /* Factor P A -> L U overwriting lower triangular portion of A with L, upper, U */
1258   info_pla = PLA_LU(lu->A,lu->pivots);
1259   if (info_pla != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot encountered at row %d from PLA_LU()",info_pla);
1260 
1261   lu->rstart         = rstart;
1262   lu->mstruct        = SAME_NONZERO_PATTERN;
1263   F->ops->solve      = MatSolve_MPIDense;
1264   F->assembled       = PETSC_TRUE;  /* required by -ksp_view */
1265   PetscFunctionReturn(0);
1266 }
1267 
1268 #undef __FUNCT__
1269 #define __FUNCT__ "MatCholeskyFactorNumeric_MPIDense"
1270 PetscErrorCode MatCholeskyFactorNumeric_MPIDense(Mat F,Mat A,const MatFactorInfo *info)
1271 {
1272   Mat_Plapack    *lu = (Mat_Plapack*)F->spptr;
1273   PetscErrorCode ierr;
1274   PetscInt       M=A->rmap->N,m=A->rmap->n,rstart,rend;
1275   PetscInt       info_pla=0;
1276   PetscScalar    *array,one = 1.0;
1277 
1278   PetscFunctionBegin;
1279   if (lu->mstruct == SAME_NONZERO_PATTERN){
1280     PLA_Obj_free(&lu->A);
1281   }
1282   /* Create PLAPACK matrix object */
1283   lu->A      = NULL;
1284   lu->pivots = NULL;
1285   PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);
1286 
1287   /* Copy A into lu->A */
1288   PLA_API_begin();
1289   PLA_Obj_API_open(lu->A);
1290   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
1291   ierr = MatGetArray(A,&array);CHKERRQ(ierr);
1292   PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);
1293   ierr = MatRestoreArray(A,&array);CHKERRQ(ierr);
1294   PLA_Obj_API_close(lu->A);
1295   PLA_API_end();
1296 
1297   /* Factor P A -> Chol */
1298   info_pla = PLA_Chol(PLA_LOWER_TRIANGULAR,lu->A);
1299   if (info_pla != 0) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT,"Nonpositive definite matrix detected at row %d from PLA_Chol()",info_pla);
1300 
1301   lu->rstart         = rstart;
1302   lu->mstruct        = SAME_NONZERO_PATTERN;
1303   F->ops->solve      = MatSolve_MPIDense;
1304   F->assembled       = PETSC_TRUE;  /* required by -ksp_view */
1305   PetscFunctionReturn(0);
1306 }
1307 
1308 /* Note the Petsc perm permutation is ignored */
1309 #undef __FUNCT__
1310 #define __FUNCT__ "MatCholeskyFactorSymbolic_MPIDense"
1311 PetscErrorCode MatCholeskyFactorSymbolic_MPIDense(Mat F,Mat A,IS perm,const MatFactorInfo *info)
1312 {
1313   PetscErrorCode ierr;
1314   PetscTruth     issymmetric,set;
1315 
1316   PetscFunctionBegin;
1317   ierr = MatIsSymmetricKnown(A,&set,&issymmetric);CHKERRQ(ierr);
1318   if (!set || !issymmetric) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_USER,"Matrix must be set as MAT_SYMMETRIC for CholeskyFactor()");
1319   F->ops->choleskyfactornumeric  = MatCholeskyFactorNumeric_MPIDense;
1320   PetscFunctionReturn(0);
1321 }
1322 
1323 /* Note the Petsc r and c permutations are ignored */
1324 #undef __FUNCT__
1325 #define __FUNCT__ "MatLUFactorSymbolic_MPIDense"
1326 PetscErrorCode MatLUFactorSymbolic_MPIDense(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
1327 {
1328   PetscErrorCode ierr;
1329   PetscInt       M = A->rmap->N;
1330   Mat_Plapack    *lu;
1331 
1332   PetscFunctionBegin;
1333   lu = (Mat_Plapack*)F->spptr;
1334   ierr = PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots);CHKERRQ(ierr);
1335   F->ops->lufactornumeric  = MatLUFactorNumeric_MPIDense;
1336   PetscFunctionReturn(0);
1337 }
1338 
1339 EXTERN_C_BEGIN
1340 #undef __FUNCT__
1341 #define __FUNCT__ "MatFactorGetSolverPackage_mpidense_plapack"
1342 PetscErrorCode MatFactorGetSolverPackage_mpidense_plapack(Mat A,const MatSolverPackage *type)
1343 {
1344   PetscFunctionBegin;
1345   *type = MATSOLVERPLAPACK;
1346   PetscFunctionReturn(0);
1347 }
1348 EXTERN_C_END
1349 
1350 EXTERN_C_BEGIN
1351 #undef __FUNCT__
1352 #define __FUNCT__ "MatGetFactor_mpidense_plapack"
1353 PetscErrorCode MatGetFactor_mpidense_plapack(Mat A,MatFactorType ftype,Mat *F)
1354 {
1355   PetscErrorCode ierr;
1356   Mat_Plapack    *lu;
1357   PetscMPIInt    size;
1358   PetscInt       M=A->rmap->N;
1359 
1360   PetscFunctionBegin;
1361   /* Create the factorization matrix */
1362   ierr = MatCreate(((PetscObject)A)->comm,F);CHKERRQ(ierr);
1363   ierr = MatSetSizes(*F,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1364   ierr = MatSetType(*F,((PetscObject)A)->type_name);CHKERRQ(ierr);
1365   ierr = PetscNewLog(*F,Mat_Plapack,&lu);CHKERRQ(ierr);
1366   (*F)->spptr = (void*)lu;
1367 
1368   /* Set default Plapack parameters */
1369   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
1370   lu->nb = M/size;
1371   if (M - lu->nb*size) lu->nb++; /* without cyclic distribution */
1372 
1373   /* Set runtime options */
1374   ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"PLAPACK Options","Mat");CHKERRQ(ierr);
1375     ierr = PetscOptionsInt("-mat_plapack_nb","block size of template vector","None",lu->nb,&lu->nb,PETSC_NULL);CHKERRQ(ierr);
1376   PetscOptionsEnd();
1377 
1378   /* Create object distribution template */
1379   lu->templ = NULL;
1380   ierr = PLA_Temp_create(lu->nb, 0, &lu->templ);CHKERRQ(ierr);
1381 
1382   /* Set the datatype */
1383 #if defined(PETSC_USE_COMPLEX)
1384   lu->datatype = MPI_DOUBLE_COMPLEX;
1385 #else
1386   lu->datatype = MPI_DOUBLE;
1387 #endif
1388 
1389   ierr = PLA_Matrix_create(lu->datatype,M,A->cmap->N,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);CHKERRQ(ierr);
1390 
1391 
1392   lu->pla_solved     = PETSC_FALSE; /* MatSolve_Plapack() is called yet */
1393   lu->mstruct        = DIFFERENT_NONZERO_PATTERN;
1394 
1395   if (ftype == MAT_FACTOR_LU) {
1396     (*F)->ops->lufactorsymbolic = MatLUFactorSymbolic_MPIDense;
1397   } else if (ftype == MAT_FACTOR_CHOLESKY) {
1398     (*F)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MPIDense;
1399   } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"No incomplete factorizations for dense matrices");
1400   (*F)->factortype = ftype;
1401   ierr = PetscObjectComposeFunctionDynamic((PetscObject)(*F),"MatFactorGetSolverPackage_C","MatFactorGetSolverPackage_mpidense_plapack",MatFactorGetSolverPackage_mpidense_plapack);CHKERRQ(ierr);
1402   PetscFunctionReturn(0);
1403 }
1404 EXTERN_C_END
1405 #endif
1406 
1407 #undef __FUNCT__
1408 #define __FUNCT__ "MatGetFactor_mpidense_petsc"
1409 PetscErrorCode MatGetFactor_mpidense_petsc(Mat A,MatFactorType ftype,Mat *F)
1410 {
1411 #if defined(PETSC_HAVE_PLAPACK)
1412   PetscErrorCode ierr;
1413 #endif
1414 
1415   PetscFunctionBegin;
1416 #if defined(PETSC_HAVE_PLAPACK)
1417   ierr = MatGetFactor_mpidense_plapack(A,ftype,F);CHKERRQ(ierr);
1418 #else
1419   SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix format %s uses PLAPACK direct solver. Install PLAPACK",((PetscObject)A)->type_name);
1420 #endif
1421   PetscFunctionReturn(0);
1422 }
1423 
1424 #undef __FUNCT__
1425 #define __FUNCT__ "MatAXPY_MPIDense"
1426 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
1427 {
1428   PetscErrorCode ierr;
1429   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
1430 
1431   PetscFunctionBegin;
1432   ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr);
1433   PetscFunctionReturn(0);
1434 }
1435 
1436 #undef __FUNCT__
1437 #define __FUNCT__ "MatConjugate_MPIDense"
1438 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIDense(Mat mat)
1439 {
1440   Mat_MPIDense   *a = (Mat_MPIDense *)mat->data;
1441   PetscErrorCode ierr;
1442 
1443   PetscFunctionBegin;
1444   ierr = MatConjugate(a->A);CHKERRQ(ierr);
1445   PetscFunctionReturn(0);
1446 }
1447 
1448 #undef __FUNCT__
1449 #define __FUNCT__ "MatRealPart_MPIDense"
1450 PetscErrorCode MatRealPart_MPIDense(Mat A)
1451 {
1452   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1453   PetscErrorCode ierr;
1454 
1455   PetscFunctionBegin;
1456   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1457   PetscFunctionReturn(0);
1458 }
1459 
1460 #undef __FUNCT__
1461 #define __FUNCT__ "MatImaginaryPart_MPIDense"
1462 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1463 {
1464   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1465   PetscErrorCode ierr;
1466 
1467   PetscFunctionBegin;
1468   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1469   PetscFunctionReturn(0);
1470 }
1471 
1472 /* -------------------------------------------------------------------*/
1473 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
1474        MatGetRow_MPIDense,
1475        MatRestoreRow_MPIDense,
1476        MatMult_MPIDense,
1477 /* 4*/ MatMultAdd_MPIDense,
1478        MatMultTranspose_MPIDense,
1479        MatMultTransposeAdd_MPIDense,
1480        0,
1481        0,
1482        0,
1483 /*10*/ 0,
1484        0,
1485        0,
1486        0,
1487        MatTranspose_MPIDense,
1488 /*15*/ MatGetInfo_MPIDense,
1489        MatEqual_MPIDense,
1490        MatGetDiagonal_MPIDense,
1491        MatDiagonalScale_MPIDense,
1492        MatNorm_MPIDense,
1493 /*20*/ MatAssemblyBegin_MPIDense,
1494        MatAssemblyEnd_MPIDense,
1495        MatSetOption_MPIDense,
1496        MatZeroEntries_MPIDense,
1497 /*24*/ MatZeroRows_MPIDense,
1498        0,
1499        0,
1500        0,
1501        0,
1502 /*29*/ MatSetUpPreallocation_MPIDense,
1503        0,
1504        0,
1505        MatGetArray_MPIDense,
1506        MatRestoreArray_MPIDense,
1507 /*34*/ MatDuplicate_MPIDense,
1508        0,
1509        0,
1510        0,
1511        0,
1512 /*39*/ MatAXPY_MPIDense,
1513        MatGetSubMatrices_MPIDense,
1514        0,
1515        MatGetValues_MPIDense,
1516        0,
1517 /*44*/ 0,
1518        MatScale_MPIDense,
1519        0,
1520        0,
1521        0,
1522 /*49*/ 0,
1523        0,
1524        0,
1525        0,
1526        0,
1527 /*54*/ 0,
1528        0,
1529        0,
1530        0,
1531        0,
1532 /*59*/ MatGetSubMatrix_MPIDense,
1533        MatDestroy_MPIDense,
1534        MatView_MPIDense,
1535        0,
1536        0,
1537 /*64*/ 0,
1538        0,
1539        0,
1540        0,
1541        0,
1542 /*69*/ 0,
1543        0,
1544        0,
1545        0,
1546        0,
1547 /*74*/ 0,
1548        0,
1549        0,
1550        0,
1551        0,
1552 /*79*/ 0,
1553        0,
1554        0,
1555        0,
1556 /*83*/ 0,
1557        0,
1558        0,
1559        0,
1560        0,
1561        0,
1562 /*89*/
1563 #if defined(PETSC_HAVE_PLAPACK)
1564        MatMatMult_MPIDense_MPIDense,
1565        MatMatMultSymbolic_MPIDense_MPIDense,
1566        MatMatMultNumeric_MPIDense_MPIDense,
1567 #else
1568        0,
1569        0,
1570        0,
1571 #endif
1572        0,
1573        0,
1574 /*94*/ 0,
1575        0,
1576        0,
1577        0,
1578        0,
1579 /*99*/ 0,
1580        0,
1581        0,
1582        MatConjugate_MPIDense,
1583        0,
1584 /*104*/0,
1585        MatRealPart_MPIDense,
1586        MatImaginaryPart_MPIDense,
1587        0,
1588        0,
1589 /*109*/0,
1590        0,
1591        0,
1592        0,
1593        0,
1594 /*114*/0,
1595        0,
1596        0,
1597        0,
1598        0,
1599 /*119*/0,
1600        0,
1601        0,
1602        0,
1603        MatLoadnew_MPIDense
1604 };
1605 
1606 EXTERN_C_BEGIN
1607 #undef __FUNCT__
1608 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense"
1609 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1610 {
1611   Mat_MPIDense   *a;
1612   PetscErrorCode ierr;
1613 
1614   PetscFunctionBegin;
1615   mat->preallocated = PETSC_TRUE;
1616   /* Note:  For now, when data is specified above, this assumes the user correctly
1617    allocates the local dense storage space.  We should add error checking. */
1618 
1619   a    = (Mat_MPIDense*)mat->data;
1620   ierr = PetscLayoutSetBlockSize(mat->rmap,1);CHKERRQ(ierr);
1621   ierr = PetscLayoutSetBlockSize(mat->cmap,1);CHKERRQ(ierr);
1622   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
1623   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
1624   a->nvec = mat->cmap->n;
1625 
1626   ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1627   ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr);
1628   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
1629   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1630   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1631   PetscFunctionReturn(0);
1632 }
1633 EXTERN_C_END
1634 
1635 /*MC
1636    MATSOLVERPLAPACK = "mpidense" - Parallel LU and Cholesky factorization for MATMPIDENSE matrices
1637 
1638   run ./configure with the option --download-plapack
1639 
1640 
1641   Options Database Keys:
1642 . -mat_plapack_nprows <n> - number of rows in processor partition
1643 . -mat_plapack_npcols <n> - number of columns in processor partition
1644 . -mat_plapack_nb <n> - block size of template vector
1645 . -mat_plapack_nb_alg <n> - algorithmic block size
1646 - -mat_plapack_ckerror <n> - error checking flag
1647 
1648    Level: intermediate
1649 
1650 .seealso: MatCreateMPIDense(), MATDENSE, MATSEQDENSE, PCFactorSetSolverPackage(), MatSolverPackage
1651 
1652 M*/
1653 
1654 EXTERN_C_BEGIN
1655 #undef __FUNCT__
1656 #define __FUNCT__ "MatCreate_MPIDense"
1657 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIDense(Mat mat)
1658 {
1659   Mat_MPIDense   *a;
1660   PetscErrorCode ierr;
1661 
1662   PetscFunctionBegin;
1663   ierr              = PetscNewLog(mat,Mat_MPIDense,&a);CHKERRQ(ierr);
1664   mat->data         = (void*)a;
1665   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1666   mat->mapping      = 0;
1667 
1668   mat->insertmode = NOT_SET_VALUES;
1669   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&a->rank);CHKERRQ(ierr);
1670   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&a->size);CHKERRQ(ierr);
1671 
1672   /* build cache for off array entries formed */
1673   a->donotstash = PETSC_FALSE;
1674   ierr = MatStashCreate_Private(((PetscObject)mat)->comm,1,&mat->stash);CHKERRQ(ierr);
1675 
1676   /* stuff used for matrix vector multiply */
1677   a->lvec        = 0;
1678   a->Mvctx       = 0;
1679   a->roworiented = PETSC_TRUE;
1680 
1681   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
1682                                      "MatGetDiagonalBlock_MPIDense",
1683                                      MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr);
1684   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C",
1685                                      "MatMPIDenseSetPreallocation_MPIDense",
1686                                      MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1687   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",
1688                                      "MatMatMult_MPIAIJ_MPIDense",
1689                                       MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1690   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",
1691                                      "MatMatMultSymbolic_MPIAIJ_MPIDense",
1692                                       MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1693   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",
1694                                      "MatMatMultNumeric_MPIAIJ_MPIDense",
1695                                       MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1696   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetFactor_petsc_C",
1697                                      "MatGetFactor_mpidense_petsc",
1698                                       MatGetFactor_mpidense_petsc);CHKERRQ(ierr);
1699 #if defined(PETSC_HAVE_PLAPACK)
1700   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetFactor_plapack_C",
1701                                      "MatGetFactor_mpidense_plapack",
1702                                       MatGetFactor_mpidense_plapack);CHKERRQ(ierr);
1703   ierr = PetscPLAPACKInitializePackage(((PetscObject)mat)->comm);CHKERRQ(ierr);
1704 #endif
1705   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1706 
1707   PetscFunctionReturn(0);
1708 }
1709 EXTERN_C_END
1710 
1711 /*MC
1712    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1713 
1714    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1715    and MATMPIDENSE otherwise.
1716 
1717    Options Database Keys:
1718 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1719 
1720   Level: beginner
1721 
1722 
1723 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1724 M*/
1725 
1726 EXTERN_C_BEGIN
1727 #undef __FUNCT__
1728 #define __FUNCT__ "MatCreate_Dense"
1729 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_Dense(Mat A)
1730 {
1731   PetscErrorCode ierr;
1732   PetscMPIInt    size;
1733 
1734   PetscFunctionBegin;
1735   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
1736   if (size == 1) {
1737     ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr);
1738   } else {
1739     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
1740   }
1741   PetscFunctionReturn(0);
1742 }
1743 EXTERN_C_END
1744 
1745 #undef __FUNCT__
1746 #define __FUNCT__ "MatMPIDenseSetPreallocation"
1747 /*@C
1748    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1749 
1750    Not collective
1751 
1752    Input Parameters:
1753 .  A - the matrix
1754 -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
1755    to control all matrix memory allocation.
1756 
1757    Notes:
1758    The dense format is fully compatible with standard Fortran 77
1759    storage by columns.
1760 
1761    The data input variable is intended primarily for Fortran programmers
1762    who wish to allocate their own matrix memory space.  Most users should
1763    set data=PETSC_NULL.
1764 
1765    Level: intermediate
1766 
1767 .keywords: matrix,dense, parallel
1768 
1769 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1770 @*/
1771 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data)
1772 {
1773   PetscErrorCode ierr,(*f)(Mat,PetscScalar *);
1774 
1775   PetscFunctionBegin;
1776   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1777   if (f) {
1778     ierr = (*f)(mat,data);CHKERRQ(ierr);
1779   }
1780   PetscFunctionReturn(0);
1781 }
1782 
1783 #undef __FUNCT__
1784 #define __FUNCT__ "MatCreateMPIDense"
1785 /*@C
1786    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
1787 
1788    Collective on MPI_Comm
1789 
1790    Input Parameters:
1791 +  comm - MPI communicator
1792 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1793 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1794 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1795 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1796 -  data - optional location of matrix data.  Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1797    to control all matrix memory allocation.
1798 
1799    Output Parameter:
1800 .  A - the matrix
1801 
1802    Notes:
1803    The dense format is fully compatible with standard Fortran 77
1804    storage by columns.
1805 
1806    The data input variable is intended primarily for Fortran programmers
1807    who wish to allocate their own matrix memory space.  Most users should
1808    set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users).
1809 
1810    The user MUST specify either the local or global matrix dimensions
1811    (possibly both).
1812 
1813    Level: intermediate
1814 
1815 .keywords: matrix,dense, parallel
1816 
1817 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1818 @*/
1819 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1820 {
1821   PetscErrorCode ierr;
1822   PetscMPIInt    size;
1823 
1824   PetscFunctionBegin;
1825   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1826   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1827   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1828   if (size > 1) {
1829     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1830     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1831   } else {
1832     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1833     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1834   }
1835   PetscFunctionReturn(0);
1836 }
1837 
1838 #undef __FUNCT__
1839 #define __FUNCT__ "MatDuplicate_MPIDense"
1840 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1841 {
1842   Mat            mat;
1843   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
1844   PetscErrorCode ierr;
1845 
1846   PetscFunctionBegin;
1847   *newmat       = 0;
1848   ierr = MatCreate(((PetscObject)A)->comm,&mat);CHKERRQ(ierr);
1849   ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1850   ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1851   a                 = (Mat_MPIDense*)mat->data;
1852   ierr              = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1853   mat->factortype   = A->factortype;
1854   mat->assembled    = PETSC_TRUE;
1855   mat->preallocated = PETSC_TRUE;
1856 
1857   mat->rmap->rstart     = A->rmap->rstart;
1858   mat->rmap->rend       = A->rmap->rend;
1859   a->size              = oldmat->size;
1860   a->rank              = oldmat->rank;
1861   mat->insertmode      = NOT_SET_VALUES;
1862   a->nvec              = oldmat->nvec;
1863   a->donotstash        = oldmat->donotstash;
1864 
1865   ierr = PetscMemcpy(mat->rmap->range,A->rmap->range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
1866   ierr = PetscMemcpy(mat->cmap->range,A->cmap->range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
1867 
1868   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1869   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1870   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1871 
1872   *newmat = mat;
1873   PetscFunctionReturn(0);
1874 }
1875 
1876 #undef __FUNCT__
1877 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile"
1878 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N, const MatType type,Mat *newmat)
1879 {
1880   PetscErrorCode ierr;
1881   PetscMPIInt    rank,size;
1882   PetscInt       *rowners,i,m,nz,j;
1883   PetscScalar    *array,*vals,*vals_ptr;
1884   MPI_Status     status;
1885 
1886   PetscFunctionBegin;
1887   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1888   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1889 
1890   /* determine ownership of all rows */
1891   m          = M/size + ((M % size) > rank);
1892   ierr       = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1893   ierr       = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
1894   rowners[0] = 0;
1895   for (i=2; i<=size; i++) {
1896     rowners[i] += rowners[i-1];
1897   }
1898 
1899   ierr = MatCreate(comm,newmat);CHKERRQ(ierr);
1900   ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1901   ierr = MatSetType(*newmat,type);CHKERRQ(ierr);
1902   ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr);
1903   ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr);
1904 
1905   if (!rank) {
1906     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1907 
1908     /* read in my part of the matrix numerical values  */
1909     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1910 
1911     /* insert into matrix-by row (this is why cannot directly read into array */
1912     vals_ptr = vals;
1913     for (i=0; i<m; i++) {
1914       for (j=0; j<N; j++) {
1915         array[i + j*m] = *vals_ptr++;
1916       }
1917     }
1918 
1919     /* read in other processors and ship out */
1920     for (i=1; i<size; i++) {
1921       nz   = (rowners[i+1] - rowners[i])*N;
1922       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1923       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(*newmat))->tag,comm);CHKERRQ(ierr);
1924     }
1925   } else {
1926     /* receive numeric values */
1927     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1928 
1929     /* receive message of values*/
1930     ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(*newmat))->tag,comm,&status);CHKERRQ(ierr);
1931 
1932     /* insert into matrix-by row (this is why cannot directly read into array */
1933     vals_ptr = vals;
1934     for (i=0; i<m; i++) {
1935       for (j=0; j<N; j++) {
1936         array[i + j*m] = *vals_ptr++;
1937       }
1938     }
1939   }
1940   ierr = PetscFree(rowners);CHKERRQ(ierr);
1941   ierr = PetscFree(vals);CHKERRQ(ierr);
1942   ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1943   ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1944   PetscFunctionReturn(0);
1945 }
1946 
1947 #undef __FUNCT__
1948 #define __FUNCT__ "MatLoad_MPIDense"
1949 PetscErrorCode MatLoad_MPIDense(PetscViewer viewer,const MatType type,Mat *newmat)
1950 {
1951   Mat            A;
1952   PetscScalar    *vals,*svals;
1953   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1954   MPI_Status     status;
1955   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz;
1956   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1957   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1958   PetscInt       i,nz,j,rstart,rend;
1959   int            fd;
1960   PetscErrorCode ierr;
1961 
1962   PetscFunctionBegin;
1963   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1964   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1965   if (!rank) {
1966     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1967     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1968     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1969   }
1970 
1971   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1972   M = header[1]; N = header[2]; nz = header[3];
1973 
1974   /*
1975        Handle case where matrix is stored on disk as a dense matrix
1976   */
1977   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1978     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,type,newmat);CHKERRQ(ierr);
1979     PetscFunctionReturn(0);
1980   }
1981 
1982   /* determine ownership of all rows */
1983   m          = PetscMPIIntCast(M/size + ((M % size) > rank));
1984   ierr       = PetscMalloc((size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr);
1985   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1986   rowners[0] = 0;
1987   for (i=2; i<=size; i++) {
1988     rowners[i] += rowners[i-1];
1989   }
1990   rstart = rowners[rank];
1991   rend   = rowners[rank+1];
1992 
1993   /* distribute row lengths to all processors */
1994   ierr    = PetscMalloc2(rend-rstart,PetscInt,&ourlens,rend-rstart,PetscInt,&offlens);CHKERRQ(ierr);
1995   if (!rank) {
1996     ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
1997     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1998     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
1999     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
2000     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
2001     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2002   } else {
2003     ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
2004   }
2005 
2006   if (!rank) {
2007     /* calculate the number of nonzeros on each processor */
2008     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2009     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2010     for (i=0; i<size; i++) {
2011       for (j=rowners[i]; j< rowners[i+1]; j++) {
2012         procsnz[i] += rowlengths[j];
2013       }
2014     }
2015     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2016 
2017     /* determine max buffer needed and allocate it */
2018     maxnz = 0;
2019     for (i=0; i<size; i++) {
2020       maxnz = PetscMax(maxnz,procsnz[i]);
2021     }
2022     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2023 
2024     /* read in my part of the matrix column indices  */
2025     nz = procsnz[0];
2026     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2027     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2028 
2029     /* read in every one elses and ship off */
2030     for (i=1; i<size; i++) {
2031       nz   = procsnz[i];
2032       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2033       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2034     }
2035     ierr = PetscFree(cols);CHKERRQ(ierr);
2036   } else {
2037     /* determine buffer space needed for message */
2038     nz = 0;
2039     for (i=0; i<m; i++) {
2040       nz += ourlens[i];
2041     }
2042     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2043 
2044     /* receive message of column indices*/
2045     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2046     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2047     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2048   }
2049 
2050   /* loop over local rows, determining number of off diagonal entries */
2051   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2052   jj = 0;
2053   for (i=0; i<m; i++) {
2054     for (j=0; j<ourlens[i]; j++) {
2055       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
2056       jj++;
2057     }
2058   }
2059 
2060   /* create our matrix */
2061   for (i=0; i<m; i++) {
2062     ourlens[i] -= offlens[i];
2063   }
2064   ierr = MatCreate(comm,newmat);CHKERRQ(ierr);
2065   ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
2066   ierr = MatSetType(*newmat,type);CHKERRQ(ierr);
2067   ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr);
2068   A = *newmat;
2069   for (i=0; i<m; i++) {
2070     ourlens[i] += offlens[i];
2071   }
2072 
2073   if (!rank) {
2074     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2075 
2076     /* read in my part of the matrix numerical values  */
2077     nz = procsnz[0];
2078     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2079 
2080     /* insert into matrix */
2081     jj      = rstart;
2082     smycols = mycols;
2083     svals   = vals;
2084     for (i=0; i<m; i++) {
2085       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2086       smycols += ourlens[i];
2087       svals   += ourlens[i];
2088       jj++;
2089     }
2090 
2091     /* read in other processors and ship out */
2092     for (i=1; i<size; i++) {
2093       nz   = procsnz[i];
2094       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2095       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
2096     }
2097     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2098   } else {
2099     /* receive numeric values */
2100     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2101 
2102     /* receive message of values*/
2103     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
2104     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2105     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2106 
2107     /* insert into matrix */
2108     jj      = rstart;
2109     smycols = mycols;
2110     svals   = vals;
2111     for (i=0; i<m; i++) {
2112       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2113       smycols += ourlens[i];
2114       svals   += ourlens[i];
2115       jj++;
2116     }
2117   }
2118   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2119   ierr = PetscFree(vals);CHKERRQ(ierr);
2120   ierr = PetscFree(mycols);CHKERRQ(ierr);
2121   ierr = PetscFree(rowners);CHKERRQ(ierr);
2122 
2123   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2124   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2125   PetscFunctionReturn(0);
2126 }
2127 
2128 #undef __FUNCT__
2129 #define __FUNCT__ "MatLoadnew_MPIDense_DenseInFile"
2130 PetscErrorCode MatLoadnew_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat,PetscInt sizesset)
2131 {
2132   PetscErrorCode ierr;
2133   PetscMPIInt    rank,size;
2134   PetscInt       *rowners,i,m,nz,j;
2135   PetscScalar    *array,*vals,*vals_ptr;
2136   MPI_Status     status;
2137 
2138   PetscFunctionBegin;
2139   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2140   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2141 
2142   /* determine ownership of all rows */
2143   if (newmat->rmap->n < 0) m          = M/size + ((M % size) > rank);
2144   else m = newmat->rmap->n;
2145   ierr       = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
2146   ierr       = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2147   rowners[0] = 0;
2148   for (i=2; i<=size; i++) {
2149     rowners[i] += rowners[i-1];
2150   }
2151 
2152   if (!sizesset) {
2153     ierr = MatSetSizes(newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
2154   }
2155   ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr);
2156   ierr = MatGetArray(newmat,&array);CHKERRQ(ierr);
2157 
2158   if (!rank) {
2159     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2160 
2161     /* read in my part of the matrix numerical values  */
2162     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
2163 
2164     /* insert into matrix-by row (this is why cannot directly read into array */
2165     vals_ptr = vals;
2166     for (i=0; i<m; i++) {
2167       for (j=0; j<N; j++) {
2168         array[i + j*m] = *vals_ptr++;
2169       }
2170     }
2171 
2172     /* read in other processors and ship out */
2173     for (i=1; i<size; i++) {
2174       nz   = (rowners[i+1] - rowners[i])*N;
2175       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2176       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr);
2177     }
2178   } else {
2179     /* receive numeric values */
2180     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2181 
2182     /* receive message of values*/
2183     ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm,&status);CHKERRQ(ierr);
2184 
2185     /* insert into matrix-by row (this is why cannot directly read into array */
2186     vals_ptr = vals;
2187     for (i=0; i<m; i++) {
2188       for (j=0; j<N; j++) {
2189         array[i + j*m] = *vals_ptr++;
2190       }
2191     }
2192   }
2193   ierr = PetscFree(rowners);CHKERRQ(ierr);
2194   ierr = PetscFree(vals);CHKERRQ(ierr);
2195   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2196   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2197   PetscFunctionReturn(0);
2198 }
2199 
2200 #undef __FUNCT__
2201 #define __FUNCT__ "MatLoadnew_MPIDense"
2202 PetscErrorCode MatLoadnew_MPIDense(PetscViewer viewer,Mat newmat)
2203 {
2204   PetscScalar    *vals,*svals;
2205   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2206   MPI_Status     status;
2207   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz;
2208   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
2209   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
2210   PetscInt       i,nz,j,rstart,rend,sizesset=1,grows,gcols;
2211   int            fd;
2212   PetscErrorCode ierr;
2213 
2214   PetscFunctionBegin;
2215   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2216   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2217   if (!rank) {
2218     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2219     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2220     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2221   }
2222 
2223   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
2224 
2225   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2226   M = header[1]; N = header[2]; nz = header[3];
2227 
2228   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
2229   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
2230   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
2231 
2232   /* If global sizes are set, check if they are consistent with that given in the file */
2233   if (sizesset) {
2234     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
2235   }
2236   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Incostintent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
2237   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Incostintent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
2238 
2239   /*
2240        Handle case where matrix is stored on disk as a dense matrix
2241   */
2242   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
2243     ierr = MatLoadnew_MPIDense_DenseInFile(comm,fd,M,N,newmat,sizesset);CHKERRQ(ierr);
2244     PetscFunctionReturn(0);
2245   }
2246 
2247   /* determine ownership of all rows */
2248   if (newmat->rmap->n < 0) m          = PetscMPIIntCast(M/size + ((M % size) > rank));
2249   else m = PetscMPIIntCast(newmat->rmap->n);
2250   ierr       = PetscMalloc((size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr);
2251   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2252   rowners[0] = 0;
2253   for (i=2; i<=size; i++) {
2254     rowners[i] += rowners[i-1];
2255   }
2256   rstart = rowners[rank];
2257   rend   = rowners[rank+1];
2258 
2259   /* distribute row lengths to all processors */
2260   ierr    = PetscMalloc2(rend-rstart,PetscInt,&ourlens,rend-rstart,PetscInt,&offlens);CHKERRQ(ierr);
2261   if (!rank) {
2262     ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2263     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2264     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
2265     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
2266     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
2267     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2268   } else {
2269     ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
2270   }
2271 
2272   if (!rank) {
2273     /* calculate the number of nonzeros on each processor */
2274     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2275     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2276     for (i=0; i<size; i++) {
2277       for (j=rowners[i]; j< rowners[i+1]; j++) {
2278         procsnz[i] += rowlengths[j];
2279       }
2280     }
2281     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2282 
2283     /* determine max buffer needed and allocate it */
2284     maxnz = 0;
2285     for (i=0; i<size; i++) {
2286       maxnz = PetscMax(maxnz,procsnz[i]);
2287     }
2288     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2289 
2290     /* read in my part of the matrix column indices  */
2291     nz = procsnz[0];
2292     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2293     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2294 
2295     /* read in every one elses and ship off */
2296     for (i=1; i<size; i++) {
2297       nz   = procsnz[i];
2298       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2299       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2300     }
2301     ierr = PetscFree(cols);CHKERRQ(ierr);
2302   } else {
2303     /* determine buffer space needed for message */
2304     nz = 0;
2305     for (i=0; i<m; i++) {
2306       nz += ourlens[i];
2307     }
2308     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2309 
2310     /* receive message of column indices*/
2311     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2312     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2313     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2314   }
2315 
2316   /* loop over local rows, determining number of off diagonal entries */
2317   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2318   jj = 0;
2319   for (i=0; i<m; i++) {
2320     for (j=0; j<ourlens[i]; j++) {
2321       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
2322       jj++;
2323     }
2324   }
2325 
2326   /* create our matrix */
2327   for (i=0; i<m; i++) {
2328     ourlens[i] -= offlens[i];
2329   }
2330 
2331   if (!sizesset) {
2332     ierr = MatSetSizes(newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
2333   }
2334   ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr);
2335   for (i=0; i<m; i++) {
2336     ourlens[i] += offlens[i];
2337   }
2338 
2339   if (!rank) {
2340     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2341 
2342     /* read in my part of the matrix numerical values  */
2343     nz = procsnz[0];
2344     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2345 
2346     /* insert into matrix */
2347     jj      = rstart;
2348     smycols = mycols;
2349     svals   = vals;
2350     for (i=0; i<m; i++) {
2351       ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2352       smycols += ourlens[i];
2353       svals   += ourlens[i];
2354       jj++;
2355     }
2356 
2357     /* read in other processors and ship out */
2358     for (i=1; i<size; i++) {
2359       nz   = procsnz[i];
2360       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2361       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2362     }
2363     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2364   } else {
2365     /* receive numeric values */
2366     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2367 
2368     /* receive message of values*/
2369     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
2370     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2371     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2372 
2373     /* insert into matrix */
2374     jj      = rstart;
2375     smycols = mycols;
2376     svals   = vals;
2377     for (i=0; i<m; i++) {
2378       ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2379       smycols += ourlens[i];
2380       svals   += ourlens[i];
2381       jj++;
2382     }
2383   }
2384   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2385   ierr = PetscFree(vals);CHKERRQ(ierr);
2386   ierr = PetscFree(mycols);CHKERRQ(ierr);
2387   ierr = PetscFree(rowners);CHKERRQ(ierr);
2388 
2389   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2390   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2391   PetscFunctionReturn(0);
2392 }
2393 
2394 #undef __FUNCT__
2395 #define __FUNCT__ "MatEqual_MPIDense"
2396 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscTruth *flag)
2397 {
2398   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
2399   Mat            a,b;
2400   PetscTruth     flg;
2401   PetscErrorCode ierr;
2402 
2403   PetscFunctionBegin;
2404   a = matA->A;
2405   b = matB->A;
2406   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
2407   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
2408   PetscFunctionReturn(0);
2409 }
2410 
2411 #if defined(PETSC_HAVE_PLAPACK)
2412 
2413 #undef __FUNCT__
2414 #define __FUNCT__ "PetscPLAPACKFinalizePackage"
2415 /*@C
2416   PetscPLAPACKFinalizePackage - This function destroys everything in the Petsc interface to PLAPACK.
2417   Level: developer
2418 
2419 .keywords: Petsc, destroy, package, PLAPACK
2420 .seealso: PetscFinalize()
2421 @*/
2422 PetscErrorCode PETSC_DLLEXPORT PetscPLAPACKFinalizePackage(void)
2423 {
2424   PetscErrorCode ierr;
2425 
2426   PetscFunctionBegin;
2427   ierr = PLA_Finalize();CHKERRQ(ierr);
2428   PetscFunctionReturn(0);
2429 }
2430 
2431 #undef __FUNCT__
2432 #define __FUNCT__ "PetscPLAPACKInitializePackage"
2433 /*@C
2434   PetscPLAPACKInitializePackage - This function initializes everything in the Petsc interface to PLAPACK. It is
2435   called from MatCreate_MPIDense() the first time an MPI dense matrix is called.
2436 
2437   Input Parameter:
2438 .   comm - the communicator the matrix lives on
2439 
2440   Level: developer
2441 
2442    Notes: PLAPACK does not have a good fit with MPI communicators; all (parallel) PLAPACK objects have to live in the
2443   same communicator (because there is some global state that is initialized and used for all matrices). In addition if
2444   PLAPACK is initialized (that is the initial matrices created) are on subcommunicators of MPI_COMM_WORLD, these subcommunicators
2445   cannot overlap.
2446 
2447 .keywords: Petsc, initialize, package, PLAPACK
2448 .seealso: PetscSysInitializePackage(), PetscInitialize()
2449 @*/
2450 PetscErrorCode PETSC_DLLEXPORT PetscPLAPACKInitializePackage(MPI_Comm comm)
2451 {
2452   PetscMPIInt    size;
2453   PetscErrorCode ierr;
2454 
2455   PetscFunctionBegin;
2456   if (!PLA_Initialized(PETSC_NULL)) {
2457 
2458     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2459     Plapack_nprows = 1;
2460     Plapack_npcols = size;
2461 
2462     ierr = PetscOptionsBegin(comm,PETSC_NULL,"PLAPACK Options","Mat");CHKERRQ(ierr);
2463       ierr = PetscOptionsInt("-mat_plapack_nprows","row dimension of 2D processor mesh","None",Plapack_nprows,&Plapack_nprows,PETSC_NULL);CHKERRQ(ierr);
2464       ierr = PetscOptionsInt("-mat_plapack_npcols","column dimension of 2D processor mesh","None",Plapack_npcols,&Plapack_npcols,PETSC_NULL);CHKERRQ(ierr);
2465 #if defined(PETSC_USE_DEBUG)
2466       Plapack_ierror = 3;
2467 #else
2468       Plapack_ierror = 0;
2469 #endif
2470       ierr = PetscOptionsInt("-mat_plapack_ckerror","error checking flag","None",Plapack_ierror,&Plapack_ierror,PETSC_NULL);CHKERRQ(ierr);
2471       if (Plapack_ierror){
2472 	ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_TRUE,PETSC_TRUE,PETSC_FALSE );CHKERRQ(ierr);
2473       } else {
2474 	ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_FALSE,PETSC_FALSE,PETSC_FALSE );CHKERRQ(ierr);
2475       }
2476 
2477       Plapack_nb_alg = 0;
2478       ierr = PetscOptionsInt("-mat_plapack_nb_alg","algorithmic block size","None",Plapack_nb_alg,&Plapack_nb_alg,PETSC_NULL);CHKERRQ(ierr);
2479       if (Plapack_nb_alg) {
2480         ierr = pla_Environ_set_nb_alg (PLA_OP_ALL_ALG,Plapack_nb_alg);CHKERRQ(ierr);
2481       }
2482     PetscOptionsEnd();
2483 
2484     ierr = PLA_Comm_1D_to_2D(comm,Plapack_nprows,Plapack_npcols,&Plapack_comm_2d);CHKERRQ(ierr);
2485     ierr = PLA_Init(Plapack_comm_2d);CHKERRQ(ierr);
2486     ierr = PetscRegisterFinalize(PetscPLAPACKFinalizePackage);CHKERRQ(ierr);
2487   }
2488   PetscFunctionReturn(0);
2489 }
2490 
2491 #endif
2492