xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 9afaeae2f87bb1e981c47bb9f31bd363e6e7d4da)
1 #define PETSCMAT_DLL
2 
3 /*
4    Basic functions for basic parallel dense matrices.
5 */
6 
7 
8 #include "src/mat/impls/dense/mpi/mpidense.h"    /*I   "petscmat.h"  I*/
9 #if defined(PETSC_HAVE_PLAPACK)
10 static PetscMPIInt Plapack_nprows,Plapack_npcols,Plapack_ierror,Plapack_nb_alg;
11 static MPI_Comm Plapack_comm_2d;
12 #endif
13 
14 #undef __FUNCT__
15 #define __FUNCT__ "MatDenseGetLocalMatrix"
16 /*@
17 
18       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
19               matrix that represents the operator. For sequential matrices it returns itself.
20 
21     Input Parameter:
22 .      A - the Seq or MPI dense matrix
23 
24     Output Parameter:
25 .      B - the inner matrix
26 
27     Level: intermediate
28 
29 @*/
30 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
31 {
32   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
33   PetscErrorCode ierr;
34   PetscTruth     flg;
35 
36   PetscFunctionBegin;
37   ierr = PetscTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
38   if (flg) {
39     *B = mat->A;
40   } else {
41     *B = A;
42   }
43   PetscFunctionReturn(0);
44 }
45 
46 #undef __FUNCT__
47 #define __FUNCT__ "MatGetRow_MPIDense"
48 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
49 {
50   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
51   PetscErrorCode ierr;
52   PetscInt       lrow,rstart = A->rmap.rstart,rend = A->rmap.rend;
53 
54   PetscFunctionBegin;
55   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows")
56   lrow = row - rstart;
57   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr);
58   PetscFunctionReturn(0);
59 }
60 
61 #undef __FUNCT__
62 #define __FUNCT__ "MatRestoreRow_MPIDense"
63 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
64 {
65   PetscErrorCode ierr;
66 
67   PetscFunctionBegin;
68   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
69   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
70   PetscFunctionReturn(0);
71 }
72 
73 EXTERN_C_BEGIN
74 #undef __FUNCT__
75 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense"
76 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B)
77 {
78   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
79   PetscErrorCode ierr;
80   PetscInt       m = A->rmap.n,rstart = A->rmap.rstart;
81   PetscScalar    *array;
82   MPI_Comm       comm;
83 
84   PetscFunctionBegin;
85   if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported.");
86 
87   /* The reuse aspect is not implemented efficiently */
88   if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);}
89 
90   ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
91   ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr);
92   ierr = MatCreate(comm,B);CHKERRQ(ierr);
93   ierr = MatSetSizes(*B,m,m,m,m);CHKERRQ(ierr);
94   ierr = MatSetType(*B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
95   ierr = MatSeqDenseSetPreallocation(*B,array+m*rstart);CHKERRQ(ierr);
96   ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr);
97   ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
98   ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
99 
100   *iscopy = PETSC_TRUE;
101   PetscFunctionReturn(0);
102 }
103 EXTERN_C_END
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatSetValues_MPIDense"
107 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
108 {
109   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
110   PetscErrorCode ierr;
111   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend,row;
112   PetscTruth     roworiented = A->roworiented;
113 
114   PetscFunctionBegin;
115   for (i=0; i<m; i++) {
116     if (idxm[i] < 0) continue;
117     if (idxm[i] >= mat->rmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
118     if (idxm[i] >= rstart && idxm[i] < rend) {
119       row = idxm[i] - rstart;
120       if (roworiented) {
121         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
122       } else {
123         for (j=0; j<n; j++) {
124           if (idxn[j] < 0) continue;
125           if (idxn[j] >= mat->cmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
126           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
127         }
128       }
129     } else {
130       if (!A->donotstash) {
131         if (roworiented) {
132           ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr);
133         } else {
134           ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr);
135         }
136       }
137     }
138   }
139   PetscFunctionReturn(0);
140 }
141 
142 #undef __FUNCT__
143 #define __FUNCT__ "MatGetValues_MPIDense"
144 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
145 {
146   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
147   PetscErrorCode ierr;
148   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend,row;
149 
150   PetscFunctionBegin;
151   for (i=0; i<m; i++) {
152     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
153     if (idxm[i] >= mat->rmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
154     if (idxm[i] >= rstart && idxm[i] < rend) {
155       row = idxm[i] - rstart;
156       for (j=0; j<n; j++) {
157         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
158         if (idxn[j] >= mat->cmap.N) {
159           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
160         }
161         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
162       }
163     } else {
164       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
165     }
166   }
167   PetscFunctionReturn(0);
168 }
169 
170 #undef __FUNCT__
171 #define __FUNCT__ "MatGetArray_MPIDense"
172 PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[])
173 {
174   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
175   PetscErrorCode ierr;
176 
177   PetscFunctionBegin;
178   ierr = MatGetArray(a->A,array);CHKERRQ(ierr);
179   PetscFunctionReturn(0);
180 }
181 
182 #undef __FUNCT__
183 #define __FUNCT__ "MatGetSubMatrix_MPIDense"
184 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B)
185 {
186   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data,*newmatd;
187   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
188   PetscErrorCode ierr;
189   PetscInt       i,j,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols;
190   PetscScalar    *av,*bv,*v = lmat->v;
191   Mat            newmat;
192 
193   PetscFunctionBegin;
194   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
195   ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr);
196   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
197   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
198 
199   /* No parallel redistribution currently supported! Should really check each index set
200      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
201      original matrix! */
202 
203   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
204   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
205 
206   /* Check submatrix call */
207   if (scall == MAT_REUSE_MATRIX) {
208     /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
209     /* Really need to test rows and column sizes! */
210     newmat = *B;
211   } else {
212     /* Create and fill new matrix */
213     ierr = MatCreate(((PetscObject)A)->comm,&newmat);CHKERRQ(ierr);
214     ierr = MatSetSizes(newmat,nrows,cs,PETSC_DECIDE,ncols);CHKERRQ(ierr);
215     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
216     ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr);
217   }
218 
219   /* Now extract the data pointers and do the copy, column at a time */
220   newmatd = (Mat_MPIDense*)newmat->data;
221   bv      = ((Mat_SeqDense *)newmatd->A->data)->v;
222 
223   for (i=0; i<ncols; i++) {
224     av = v + nlrows*icol[i];
225     for (j=0; j<nrows; j++) {
226       *bv++ = av[irow[j] - rstart];
227     }
228   }
229 
230   /* Assemble the matrices so that the correct flags are set */
231   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
232   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
233 
234   /* Free work space */
235   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
236   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
237   *B = newmat;
238   PetscFunctionReturn(0);
239 }
240 
241 #undef __FUNCT__
242 #define __FUNCT__ "MatRestoreArray_MPIDense"
243 PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[])
244 {
245   PetscFunctionBegin;
246   PetscFunctionReturn(0);
247 }
248 
249 #undef __FUNCT__
250 #define __FUNCT__ "MatAssemblyBegin_MPIDense"
251 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
252 {
253   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
254   MPI_Comm       comm = ((PetscObject)mat)->comm;
255   PetscErrorCode ierr;
256   PetscInt       nstash,reallocs;
257   InsertMode     addv;
258 
259   PetscFunctionBegin;
260   /* make sure all processors are either in INSERTMODE or ADDMODE */
261   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
262   if (addv == (ADD_VALUES|INSERT_VALUES)) {
263     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
264   }
265   mat->insertmode = addv; /* in case this processor had no cache */
266 
267   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap.range);CHKERRQ(ierr);
268   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
269   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
270   PetscFunctionReturn(0);
271 }
272 
273 #undef __FUNCT__
274 #define __FUNCT__ "MatAssemblyEnd_MPIDense"
275 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
276 {
277   Mat_MPIDense    *mdn=(Mat_MPIDense*)mat->data;
278   PetscErrorCode  ierr;
279   PetscInt        i,*row,*col,flg,j,rstart,ncols;
280   PetscMPIInt     n;
281   PetscScalar     *val;
282   InsertMode      addv=mat->insertmode;
283 
284   PetscFunctionBegin;
285   /*  wait on receives */
286   while (1) {
287     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
288     if (!flg) break;
289 
290     for (i=0; i<n;) {
291       /* Now identify the consecutive vals belonging to the same row */
292       for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
293       if (j < n) ncols = j-i;
294       else       ncols = n-i;
295       /* Now assemble all these values with a single function call */
296       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
297       i = j;
298     }
299   }
300   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
301 
302   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
303   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
304 
305   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
306     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
307   }
308   PetscFunctionReturn(0);
309 }
310 
311 #undef __FUNCT__
312 #define __FUNCT__ "MatZeroEntries_MPIDense"
313 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
314 {
315   PetscErrorCode ierr;
316   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
317 
318   PetscFunctionBegin;
319   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
320   PetscFunctionReturn(0);
321 }
322 
323 /* the code does not do the diagonal entries correctly unless the
324    matrix is square and the column and row owerships are identical.
325    This is a BUG. The only way to fix it seems to be to access
326    mdn->A and mdn->B directly and not through the MatZeroRows()
327    routine.
328 */
329 #undef __FUNCT__
330 #define __FUNCT__ "MatZeroRows_MPIDense"
331 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
332 {
333   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
334   PetscErrorCode ierr;
335   PetscInt       i,*owners = A->rmap.range;
336   PetscInt       *nprocs,j,idx,nsends;
337   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
338   PetscInt       *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source;
339   PetscInt       *lens,*lrows,*values;
340   PetscMPIInt    n,imdex,rank = l->rank,size = l->size;
341   MPI_Comm       comm = ((PetscObject)A)->comm;
342   MPI_Request    *send_waits,*recv_waits;
343   MPI_Status     recv_status,*send_status;
344   PetscTruth     found;
345 
346   PetscFunctionBegin;
347   /*  first count number of contributors to each processor */
348   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
349   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
350   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
351   for (i=0; i<N; i++) {
352     idx = rows[i];
353     found = PETSC_FALSE;
354     for (j=0; j<size; j++) {
355       if (idx >= owners[j] && idx < owners[j+1]) {
356         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
357       }
358     }
359     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
360   }
361   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
362 
363   /* inform other processors of number of messages and max length*/
364   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
365 
366   /* post receives:   */
367   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
368   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
369   for (i=0; i<nrecvs; i++) {
370     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
371   }
372 
373   /* do sends:
374       1) starts[i] gives the starting index in svalues for stuff going to
375          the ith processor
376   */
377   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
378   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
379   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
380   starts[0]  = 0;
381   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
382   for (i=0; i<N; i++) {
383     svalues[starts[owner[i]]++] = rows[i];
384   }
385 
386   starts[0] = 0;
387   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
388   count = 0;
389   for (i=0; i<size; i++) {
390     if (nprocs[2*i+1]) {
391       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
392     }
393   }
394   ierr = PetscFree(starts);CHKERRQ(ierr);
395 
396   base = owners[rank];
397 
398   /*  wait on receives */
399   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
400   source = lens + nrecvs;
401   count  = nrecvs; slen = 0;
402   while (count) {
403     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
404     /* unpack receives into our local space */
405     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
406     source[imdex]  = recv_status.MPI_SOURCE;
407     lens[imdex]    = n;
408     slen += n;
409     count--;
410   }
411   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
412 
413   /* move the data into the send scatter */
414   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
415   count = 0;
416   for (i=0; i<nrecvs; i++) {
417     values = rvalues + i*nmax;
418     for (j=0; j<lens[i]; j++) {
419       lrows[count++] = values[j] - base;
420     }
421   }
422   ierr = PetscFree(rvalues);CHKERRQ(ierr);
423   ierr = PetscFree(lens);CHKERRQ(ierr);
424   ierr = PetscFree(owner);CHKERRQ(ierr);
425   ierr = PetscFree(nprocs);CHKERRQ(ierr);
426 
427   /* actually zap the local rows */
428   ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
429   ierr = PetscFree(lrows);CHKERRQ(ierr);
430 
431   /* wait on sends */
432   if (nsends) {
433     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
434     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
435     ierr = PetscFree(send_status);CHKERRQ(ierr);
436   }
437   ierr = PetscFree(send_waits);CHKERRQ(ierr);
438   ierr = PetscFree(svalues);CHKERRQ(ierr);
439 
440   PetscFunctionReturn(0);
441 }
442 
443 #undef __FUNCT__
444 #define __FUNCT__ "MatMult_MPIDense"
445 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
446 {
447   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
448   PetscErrorCode ierr;
449 
450   PetscFunctionBegin;
451   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
452   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
453   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
454   PetscFunctionReturn(0);
455 }
456 
457 #undef __FUNCT__
458 #define __FUNCT__ "MatMultAdd_MPIDense"
459 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
460 {
461   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
462   PetscErrorCode ierr;
463 
464   PetscFunctionBegin;
465   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
466   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
467   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
468   PetscFunctionReturn(0);
469 }
470 
471 #undef __FUNCT__
472 #define __FUNCT__ "MatMultTranspose_MPIDense"
473 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
474 {
475   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
476   PetscErrorCode ierr;
477   PetscScalar    zero = 0.0;
478 
479   PetscFunctionBegin;
480   ierr = VecSet(yy,zero);CHKERRQ(ierr);
481   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
482   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
483   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
484   PetscFunctionReturn(0);
485 }
486 
487 #undef __FUNCT__
488 #define __FUNCT__ "MatMultTransposeAdd_MPIDense"
489 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
490 {
491   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
492   PetscErrorCode ierr;
493 
494   PetscFunctionBegin;
495   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
496   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
497   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
498   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
499   PetscFunctionReturn(0);
500 }
501 
502 #undef __FUNCT__
503 #define __FUNCT__ "MatGetDiagonal_MPIDense"
504 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
505 {
506   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
507   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
508   PetscErrorCode ierr;
509   PetscInt       len,i,n,m = A->rmap.n,radd;
510   PetscScalar    *x,zero = 0.0;
511 
512   PetscFunctionBegin;
513   ierr = VecSet(v,zero);CHKERRQ(ierr);
514   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
515   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
516   if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
517   len  = PetscMin(a->A->rmap.n,a->A->cmap.n);
518   radd = A->rmap.rstart*m;
519   for (i=0; i<len; i++) {
520     x[i] = aloc->v[radd + i*m + i];
521   }
522   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
523   PetscFunctionReturn(0);
524 }
525 
526 #undef __FUNCT__
527 #define __FUNCT__ "MatDestroy_MPIDense"
528 PetscErrorCode MatDestroy_MPIDense(Mat mat)
529 {
530   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
531   PetscErrorCode ierr;
532 #if defined(PETSC_HAVE_PLAPACK)
533   Mat_Plapack   *lu=(Mat_Plapack*)(mat->spptr);
534 #endif
535 
536   PetscFunctionBegin;
537 
538 #if defined(PETSC_USE_LOG)
539   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N);
540 #endif
541   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
542   ierr = MatDestroy(mdn->A);CHKERRQ(ierr);
543   if (mdn->lvec)   {ierr = VecDestroy(mdn->lvec);CHKERRQ(ierr);}
544   if (mdn->Mvctx)  {ierr = VecScatterDestroy(mdn->Mvctx);CHKERRQ(ierr);}
545 #if defined(PETSC_HAVE_PLAPACK)
546   if (lu) {
547     ierr = PLA_Obj_free(&lu->A);CHKERRQ(ierr);
548     ierr = PLA_Obj_free (&lu->pivots);CHKERRQ(ierr);
549     ierr = PLA_Temp_free(&lu->templ);CHKERRQ(ierr);
550 
551     if (lu->is_pla) {
552       ierr = ISDestroy(lu->is_pla);CHKERRQ(ierr);
553       ierr = ISDestroy(lu->is_petsc);CHKERRQ(ierr);
554       ierr = VecScatterDestroy(lu->ctx);CHKERRQ(ierr);
555     }
556   }
557 #endif
558 
559   ierr = PetscFree(mdn);CHKERRQ(ierr);
560   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
561   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
562   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
563   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
564   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
565   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
566   PetscFunctionReturn(0);
567 }
568 
569 #undef __FUNCT__
570 #define __FUNCT__ "MatView_MPIDense_Binary"
571 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
572 {
573   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
574   PetscErrorCode    ierr;
575   PetscViewerFormat format;
576   int               fd;
577   PetscInt          header[4],mmax,N = mat->cmap.N,i,j,m,k;
578   PetscMPIInt       rank,tag  = ((PetscObject)viewer)->tag,size;
579   PetscScalar       *work,*v,*vv;
580   Mat_SeqDense      *a = (Mat_SeqDense*)mdn->A->data;
581   MPI_Status        status;
582 
583   PetscFunctionBegin;
584   if (mdn->size == 1) {
585     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
586   } else {
587     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
588     ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
589     ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
590 
591     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
592     if (format == PETSC_VIEWER_BINARY_NATIVE) {
593 
594       if (!rank) {
595         /* store the matrix as a dense matrix */
596         header[0] = MAT_FILE_COOKIE;
597         header[1] = mat->rmap.N;
598         header[2] = N;
599         header[3] = MATRIX_BINARY_FORMAT_DENSE;
600         ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
601 
602         /* get largest work array needed for transposing array */
603         mmax = mat->rmap.n;
604         for (i=1; i<size; i++) {
605           mmax = PetscMax(mmax,mat->rmap.range[i+1] - mat->rmap.range[i]);
606         }
607         ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&work);CHKERRQ(ierr);
608 
609         /* write out local array, by rows */
610         m    = mat->rmap.n;
611         v    = a->v;
612         for (j=0; j<N; j++) {
613           for (i=0; i<m; i++) {
614             work[j + i*N] = *v++;
615           }
616         }
617         ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
618         /* get largest work array to receive messages from other processes, excludes process zero */
619         mmax = 0;
620         for (i=1; i<size; i++) {
621           mmax = PetscMax(mmax,mat->rmap.range[i+1] - mat->rmap.range[i]);
622         }
623         ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&vv);CHKERRQ(ierr);
624         for(k = 1; k < size; k++) {
625           v    = vv;
626           m    = mat->rmap.range[k+1] - mat->rmap.range[k];
627           ierr = MPI_Recv(v,m*N,MPIU_SCALAR,k,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
628 
629           for(j = 0; j < N; j++) {
630             for(i = 0; i < m; i++) {
631               work[j + i*N] = *v++;
632             }
633           }
634           ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
635         }
636         ierr = PetscFree(work);CHKERRQ(ierr);
637         ierr = PetscFree(vv);CHKERRQ(ierr);
638       } else {
639         ierr = MPI_Send(a->v,mat->rmap.n*mat->cmap.N,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
640       }
641     } else {
642       SETERRQ(PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerSetFormat(viewer,PETSC_VIEWER_BINARY_NATIVE");
643     }
644   }
645   PetscFunctionReturn(0);
646 }
647 
648 #undef __FUNCT__
649 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket"
650 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
651 {
652   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
653   PetscErrorCode    ierr;
654   PetscMPIInt       size = mdn->size,rank = mdn->rank;
655   PetscViewerType   vtype;
656   PetscTruth        iascii,isdraw;
657   PetscViewer       sviewer;
658   PetscViewerFormat format;
659 #if defined(PETSC_HAVE_PLAPACK)
660   Mat_Plapack       *lu=(Mat_Plapack*)(mat->spptr);
661 #endif
662 
663   PetscFunctionBegin;
664   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
665   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
666   if (iascii) {
667     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
668     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
669     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
670       MatInfo info;
671       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
672       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap.n,
673                    (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
674       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
675       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
676 #if defined(PETSC_HAVE_PLAPACK)
677       ierr = PetscViewerASCIIPrintf(viewer,"PLAPACK run parameters:\n");CHKERRQ(ierr);
678       ierr = PetscViewerASCIIPrintf(viewer,"  Processor mesh: nprows %d, npcols %d\n",Plapack_nprows, Plapack_npcols);CHKERRQ(ierr);
679       ierr = PetscViewerASCIIPrintf(viewer,"  Distr. block size nb: %d \n",lu->nb);CHKERRQ(ierr);
680       ierr = PetscViewerASCIIPrintf(viewer,"  Error checking: %d\n",Plapack_ierror);CHKERRQ(ierr);
681       ierr = PetscViewerASCIIPrintf(viewer,"  Algorithmic block size: %d\n",Plapack_nb_alg);CHKERRQ(ierr);
682 #endif
683       PetscFunctionReturn(0);
684     } else if (format == PETSC_VIEWER_ASCII_INFO) {
685       PetscFunctionReturn(0);
686     }
687   } else if (isdraw) {
688     PetscDraw  draw;
689     PetscTruth isnull;
690 
691     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
692     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
693     if (isnull) PetscFunctionReturn(0);
694   }
695 
696   if (size == 1) {
697     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
698   } else {
699     /* assemble the entire matrix onto first processor. */
700     Mat         A;
701     PetscInt    M = mat->rmap.N,N = mat->cmap.N,m,row,i,nz;
702     PetscInt    *cols;
703     PetscScalar *vals;
704 
705     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
706     if (!rank) {
707       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
708     } else {
709       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
710     }
711     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
712     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
713     ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL);
714     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
715 
716     /* Copy the matrix ... This isn't the most efficient means,
717        but it's quick for now */
718     A->insertmode = INSERT_VALUES;
719     row = mat->rmap.rstart; m = mdn->A->rmap.n;
720     for (i=0; i<m; i++) {
721       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
722       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
723       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
724       row++;
725     }
726 
727     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
728     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
729     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
730     if (!rank) {
731       ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
732     }
733     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
734     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
735     ierr = MatDestroy(A);CHKERRQ(ierr);
736   }
737   PetscFunctionReturn(0);
738 }
739 
740 #undef __FUNCT__
741 #define __FUNCT__ "MatView_MPIDense"
742 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
743 {
744   PetscErrorCode ierr;
745   PetscTruth     iascii,isbinary,isdraw,issocket;
746 
747   PetscFunctionBegin;
748 
749   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
750   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
751   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
752   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
753 
754   if (iascii || issocket || isdraw) {
755     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
756   } else if (isbinary) {
757     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
758   } else {
759     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name);
760   }
761   PetscFunctionReturn(0);
762 }
763 
764 #undef __FUNCT__
765 #define __FUNCT__ "MatGetInfo_MPIDense"
766 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
767 {
768   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
769   Mat            mdn = mat->A;
770   PetscErrorCode ierr;
771   PetscReal      isend[5],irecv[5];
772 
773   PetscFunctionBegin;
774   info->rows_global    = (double)A->rmap.N;
775   info->columns_global = (double)A->cmap.N;
776   info->rows_local     = (double)A->rmap.n;
777   info->columns_local  = (double)A->cmap.N;
778   info->block_size     = 1.0;
779   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
780   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
781   isend[3] = info->memory;  isend[4] = info->mallocs;
782   if (flag == MAT_LOCAL) {
783     info->nz_used      = isend[0];
784     info->nz_allocated = isend[1];
785     info->nz_unneeded  = isend[2];
786     info->memory       = isend[3];
787     info->mallocs      = isend[4];
788   } else if (flag == MAT_GLOBAL_MAX) {
789     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr);
790     info->nz_used      = irecv[0];
791     info->nz_allocated = irecv[1];
792     info->nz_unneeded  = irecv[2];
793     info->memory       = irecv[3];
794     info->mallocs      = irecv[4];
795   } else if (flag == MAT_GLOBAL_SUM) {
796     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
797     info->nz_used      = irecv[0];
798     info->nz_allocated = irecv[1];
799     info->nz_unneeded  = irecv[2];
800     info->memory       = irecv[3];
801     info->mallocs      = irecv[4];
802   }
803   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
804   info->fill_ratio_needed = 0;
805   info->factor_mallocs    = 0;
806   PetscFunctionReturn(0);
807 }
808 
809 #undef __FUNCT__
810 #define __FUNCT__ "MatSetOption_MPIDense"
811 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscTruth flg)
812 {
813   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
814   PetscErrorCode ierr;
815 
816   PetscFunctionBegin;
817   switch (op) {
818   case MAT_NEW_NONZERO_LOCATIONS:
819   case MAT_NEW_NONZERO_LOCATION_ERR:
820   case MAT_NEW_NONZERO_ALLOCATION_ERR:
821     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
822     break;
823   case MAT_ROW_ORIENTED:
824     a->roworiented = flg;
825     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
826     break;
827   case MAT_NEW_DIAGONALS:
828   case MAT_USE_HASH_TABLE:
829     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
830     break;
831   case MAT_IGNORE_OFF_PROC_ENTRIES:
832     a->donotstash = flg;
833     break;
834   case MAT_SYMMETRIC:
835   case MAT_STRUCTURALLY_SYMMETRIC:
836   case MAT_HERMITIAN:
837   case MAT_SYMMETRY_ETERNAL:
838   case MAT_IGNORE_LOWER_TRIANGULAR:
839     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
840     break;
841   default:
842     SETERRQ1(PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
843   }
844   PetscFunctionReturn(0);
845 }
846 
847 
848 #undef __FUNCT__
849 #define __FUNCT__ "MatDiagonalScale_MPIDense"
850 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
851 {
852   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
853   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
854   PetscScalar    *l,*r,x,*v;
855   PetscErrorCode ierr;
856   PetscInt       i,j,s2a,s3a,s2,s3,m=mdn->A->rmap.n,n=mdn->A->cmap.n;
857 
858   PetscFunctionBegin;
859   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
860   if (ll) {
861     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
862     if (s2a != s2) SETERRQ2(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
863     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
864     for (i=0; i<m; i++) {
865       x = l[i];
866       v = mat->v + i;
867       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
868     }
869     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
870     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
871   }
872   if (rr) {
873     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
874     if (s3a != s3) SETERRQ2(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
875     ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
876     ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
877     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
878     for (i=0; i<n; i++) {
879       x = r[i];
880       v = mat->v + i*m;
881       for (j=0; j<m; j++) { (*v++) *= x;}
882     }
883     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
884     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
885   }
886   PetscFunctionReturn(0);
887 }
888 
889 #undef __FUNCT__
890 #define __FUNCT__ "MatNorm_MPIDense"
891 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
892 {
893   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
894   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
895   PetscErrorCode ierr;
896   PetscInt       i,j;
897   PetscReal      sum = 0.0;
898   PetscScalar    *v = mat->v;
899 
900   PetscFunctionBegin;
901   if (mdn->size == 1) {
902     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
903   } else {
904     if (type == NORM_FROBENIUS) {
905       for (i=0; i<mdn->A->cmap.n*mdn->A->rmap.n; i++) {
906 #if defined(PETSC_USE_COMPLEX)
907         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
908 #else
909         sum += (*v)*(*v); v++;
910 #endif
911       }
912       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
913       *nrm = sqrt(*nrm);
914       ierr = PetscLogFlops(2*mdn->A->cmap.n*mdn->A->rmap.n);CHKERRQ(ierr);
915     } else if (type == NORM_1) {
916       PetscReal *tmp,*tmp2;
917       ierr = PetscMalloc(2*A->cmap.N*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
918       tmp2 = tmp + A->cmap.N;
919       ierr = PetscMemzero(tmp,2*A->cmap.N*sizeof(PetscReal));CHKERRQ(ierr);
920       *nrm = 0.0;
921       v = mat->v;
922       for (j=0; j<mdn->A->cmap.n; j++) {
923         for (i=0; i<mdn->A->rmap.n; i++) {
924           tmp[j] += PetscAbsScalar(*v);  v++;
925         }
926       }
927       ierr = MPI_Allreduce(tmp,tmp2,A->cmap.N,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
928       for (j=0; j<A->cmap.N; j++) {
929         if (tmp2[j] > *nrm) *nrm = tmp2[j];
930       }
931       ierr = PetscFree(tmp);CHKERRQ(ierr);
932       ierr = PetscLogFlops(A->cmap.n*A->rmap.n);CHKERRQ(ierr);
933     } else if (type == NORM_INFINITY) { /* max row norm */
934       PetscReal ntemp;
935       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
936       ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr);
937     } else {
938       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
939     }
940   }
941   PetscFunctionReturn(0);
942 }
943 
944 #undef __FUNCT__
945 #define __FUNCT__ "MatTranspose_MPIDense"
946 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
947 {
948   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
949   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
950   Mat            B;
951   PetscInt       M = A->rmap.N,N = A->cmap.N,m,n,*rwork,rstart = A->rmap.rstart;
952   PetscErrorCode ierr;
953   PetscInt       j,i;
954   PetscScalar    *v;
955 
956   PetscFunctionBegin;
957   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place");
958   if (reuse == MAT_INITIAL_MATRIX || A == *matout) {
959     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
960     ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,N,M);CHKERRQ(ierr);
961     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
962     ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr);
963   } else {
964     B = *matout;
965   }
966 
967   m = a->A->rmap.n; n = a->A->cmap.n; v = Aloc->v;
968   ierr = PetscMalloc(m*sizeof(PetscInt),&rwork);CHKERRQ(ierr);
969   for (i=0; i<m; i++) rwork[i] = rstart + i;
970   for (j=0; j<n; j++) {
971     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
972     v   += m;
973   }
974   ierr = PetscFree(rwork);CHKERRQ(ierr);
975   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
976   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
977   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
978     *matout = B;
979   } else {
980     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
981   }
982   PetscFunctionReturn(0);
983 }
984 
985 #include "petscblaslapack.h"
986 #undef __FUNCT__
987 #define __FUNCT__ "MatScale_MPIDense"
988 PetscErrorCode MatScale_MPIDense(Mat inA,PetscScalar alpha)
989 {
990   Mat_MPIDense   *A = (Mat_MPIDense*)inA->data;
991   Mat_SeqDense   *a = (Mat_SeqDense*)A->A->data;
992   PetscScalar    oalpha = alpha;
993   PetscErrorCode ierr;
994   PetscBLASInt   one = 1,nz = PetscBLASIntCast(inA->rmap.n*inA->cmap.N);
995 
996   PetscFunctionBegin;
997   BLASscal_(&nz,&oalpha,a->v,&one);
998   ierr = PetscLogFlops(nz);CHKERRQ(ierr);
999   PetscFunctionReturn(0);
1000 }
1001 
1002 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
1003 
1004 #undef __FUNCT__
1005 #define __FUNCT__ "MatSetUpPreallocation_MPIDense"
1006 PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A)
1007 {
1008   PetscErrorCode ierr;
1009 
1010   PetscFunctionBegin;
1011   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
1012   PetscFunctionReturn(0);
1013 }
1014 
1015 #if defined(PETSC_HAVE_PLAPACK)
1016 
1017 #undef __FUNCT__
1018 #define __FUNCT__ "MatMPIDenseCreatePlapack"
1019 PetscErrorCode MatMPIDenseCreatePlapack(Mat A)
1020 {
1021   Mat_Plapack    *lu;
1022   PetscErrorCode ierr;
1023   PetscInt       M=A->rmap.N;
1024   MPI_Comm       comm=((PetscObject)A)->comm;
1025   PetscMPIInt    size;
1026 
1027   PetscFunctionBegin;
1028   if (A->spptr) PetscFunctionReturn(0);
1029   ierr = PetscNewLog(A,Mat_Plapack,&lu);CHKERRQ(ierr);
1030   A->spptr = (void*)lu;
1031 
1032   lu = (Mat_Plapack*)(A->spptr);
1033 
1034   /* Set default Plapack parameters */
1035   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1036   lu->nb     = M/size;
1037   if (M - lu->nb*size) lu->nb++; /* without cyclic distribution */
1038 
1039   /* Set runtime options */
1040   ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"PLAPACK Options","Mat");CHKERRQ(ierr);
1041     ierr = PetscOptionsInt("-mat_plapack_nb","block size of template vector","None",lu->nb,&lu->nb,PETSC_NULL);CHKERRQ(ierr);
1042   PetscOptionsEnd();
1043 
1044   /* Create object distribution template */
1045   lu->templ = NULL;
1046   ierr = PLA_Temp_create(lu->nb, 0, &lu->templ);CHKERRQ(ierr);
1047 
1048   /* Set the datatype */
1049 #if defined(PETSC_USE_COMPLEX)
1050   lu->datatype = MPI_DOUBLE_COMPLEX;
1051 #else
1052   lu->datatype = MPI_DOUBLE;
1053 #endif
1054 
1055   ierr = PLA_Matrix_create(lu->datatype,M,A->cmap.N,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);CHKERRQ(ierr);
1056 
1057 
1058   lu->pla_solved     = PETSC_FALSE; /* MatSolve_Plapack() is called yet */
1059   PetscFunctionReturn(0);
1060 }
1061 
1062 #undef __FUNCT__
1063 #define __FUNCT__ "MatMPIDenseCopyToPlapack"
1064 PetscErrorCode MatMPIDenseCopyToPlapack(Mat A,Mat F)
1065 {
1066   Mat_Plapack    *lu = (Mat_Plapack*)(F)->spptr;
1067   PetscErrorCode ierr;
1068   PetscInt       M=A->cmap.N,m=A->rmap.n,rstart;
1069   PetscScalar    *array;
1070   PetscReal      one = 1.0;
1071 
1072   PetscFunctionBegin;
1073   /* Copy A into F->lu->A */
1074   ierr = PLA_Obj_set_to_zero(lu->A);CHKERRQ(ierr);
1075   ierr = PLA_API_begin();CHKERRQ(ierr);
1076   ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr);
1077   ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr);
1078   ierr = MatGetArray(A,&array);CHKERRQ(ierr);
1079   ierr = PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);CHKERRQ(ierr);
1080   ierr = MatRestoreArray(A,&array);CHKERRQ(ierr);
1081   ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr);
1082   ierr = PLA_API_end();CHKERRQ(ierr);
1083   lu->rstart = rstart;
1084   PetscFunctionReturn(0);
1085 }
1086 
1087 #undef __FUNCT__
1088 #define __FUNCT__ "MatMPIDenseCopyFromPlapack"
1089 PetscErrorCode MatMPIDenseCopyFromPlapack(Mat F,Mat A)
1090 {
1091   Mat_Plapack    *lu = (Mat_Plapack*)(F)->spptr;
1092   PetscErrorCode ierr;
1093   PetscInt       M=A->cmap.N,m=A->rmap.n,rstart;
1094   PetscScalar    *array;
1095   PetscReal      one = 1.0;
1096 
1097   PetscFunctionBegin;
1098   /* Copy F into A->lu->A */
1099   ierr = MatZeroEntries(A);CHKERRQ(ierr);
1100   ierr = PLA_API_begin();CHKERRQ(ierr);
1101   ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr);
1102   ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr);
1103   ierr = MatGetArray(A,&array);CHKERRQ(ierr);
1104   ierr = PLA_API_axpy_global_to_matrix(m,M, &one,lu->A,rstart,0,(void *)array,m);CHKERRQ(ierr);
1105   ierr = MatRestoreArray(A,&array);CHKERRQ(ierr);
1106   ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr);
1107   ierr = PLA_API_end();CHKERRQ(ierr);
1108   lu->rstart = rstart;
1109   PetscFunctionReturn(0);
1110 }
1111 
1112 #undef __FUNCT__
1113 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIDense"
1114 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1115 {
1116   PetscErrorCode ierr;
1117   Mat_Plapack    *luA = (Mat_Plapack*)A->spptr;
1118   Mat_Plapack    *luB = (Mat_Plapack*)B->spptr;
1119   Mat_Plapack    *luC = (Mat_Plapack*)C->spptr;
1120   PLA_Obj        alpha = NULL,beta = NULL;
1121 
1122   PetscFunctionBegin;
1123   ierr = MatMPIDenseCopyToPlapack(A,A);CHKERRQ(ierr);
1124   ierr = MatMPIDenseCopyToPlapack(B,B);CHKERRQ(ierr);
1125 
1126   /*
1127   ierr = PLA_Global_show("A = ",luA->A,"%g ","");CHKERRQ(ierr);
1128   ierr = PLA_Global_show("B = ",luB->A,"%g ","");CHKERRQ(ierr);
1129   */
1130 
1131   /* do the multiply in PLA  */
1132   ierr = PLA_Create_constants_conf_to(luA->A,NULL,NULL,&alpha);CHKERRQ(ierr);
1133   ierr = PLA_Create_constants_conf_to(luC->A,NULL,&beta,NULL);CHKERRQ(ierr);
1134   CHKMEMQ;
1135 
1136   ierr = PLA_Gemm(PLA_NO_TRANSPOSE,PLA_NO_TRANSPOSE,alpha,luA->A,luB->A,beta,luC->A);//CHKERRQ(ierr);
1137   CHKMEMQ;
1138   ierr = PLA_Obj_free(&alpha);CHKERRQ(ierr);
1139   ierr = PLA_Obj_free(&beta);CHKERRQ(ierr);
1140 
1141   /*
1142   ierr = PLA_Global_show("C = ",luC->A,"%g ","");CHKERRQ(ierr);
1143   */
1144   ierr = MatMPIDenseCopyFromPlapack(C,C);CHKERRQ(ierr);
1145   PetscFunctionReturn(0);
1146 }
1147 
1148 #undef __FUNCT__
1149 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIDense"
1150 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1151 {
1152   PetscErrorCode ierr;
1153   PetscInt       m=A->rmap.n,n=B->cmap.n;
1154   Mat            Cmat;
1155 
1156   PetscFunctionBegin;
1157   if (A->cmap.n != B->rmap.n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap.n %d != B->rmap.n %d\n",A->cmap.n,B->rmap.n);
1158   SETERRQ(PETSC_ERR_LIB,"Due to aparent bugs in PLAPACK,this is not currently supported");
1159   ierr = MatCreate(((PetscObject)B)->comm,&Cmat);CHKERRQ(ierr);
1160   ierr = MatSetSizes(Cmat,m,n,A->rmap.N,B->cmap.N);CHKERRQ(ierr);
1161   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
1162   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1163   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1164   ierr = MatMPIDenseCreatePlapack(A);CHKERRQ(ierr);
1165   ierr = MatMPIDenseCreatePlapack(B);CHKERRQ(ierr);
1166   ierr = MatMPIDenseCreatePlapack(Cmat);CHKERRQ(ierr);
1167   *C = Cmat;
1168   PetscFunctionReturn(0);
1169 }
1170 
1171 #undef __FUNCT__
1172 #define __FUNCT__ "MatMatMult_MPIDense_MPIDense"
1173 PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
1174 {
1175   PetscErrorCode ierr;
1176 
1177   PetscFunctionBegin;
1178   if (scall == MAT_INITIAL_MATRIX){
1179     ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr);
1180   }
1181   ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr);
1182   PetscFunctionReturn(0);
1183 }
1184 
1185 
1186 #undef __FUNCT__
1187 #define __FUNCT__ "MatSolve_MPIDense"
1188 PetscErrorCode MatSolve_MPIDense(Mat A,Vec b,Vec x)
1189 {
1190   MPI_Comm       comm = ((PetscObject)A)->comm,dummy_comm;
1191   Mat_Plapack    *lu = (Mat_Plapack*)A->spptr;
1192   PetscErrorCode ierr;
1193   PetscInt       M=A->rmap.N,m=A->rmap.n,rstart,i,j,*idx_pla,*idx_petsc,loc_m,loc_stride;
1194   PetscScalar    *array;
1195   PetscReal      one = 1.0;
1196   PetscMPIInt    size,r_rank,r_nproc,c_rank,c_nproc;;
1197   PLA_Obj        v_pla = NULL;
1198   PetscScalar    *loc_buf;
1199   Vec            loc_x;
1200 
1201   PetscFunctionBegin;
1202   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1203 
1204   /* Create PLAPACK vector objects, then copy b into PLAPACK b */
1205   ierr = PLA_Mvector_create(lu->datatype,M,1,lu->templ,PLA_ALIGN_FIRST,&v_pla);CHKERRQ(ierr);
1206   ierr = PLA_Obj_set_to_zero(v_pla);CHKERRQ(ierr);
1207 
1208   /* Copy b into rhs_pla */
1209   ierr = PLA_API_begin();CHKERRQ(ierr);
1210   ierr = PLA_Obj_API_open(v_pla);CHKERRQ(ierr);
1211   ierr = VecGetArray(b,&array);CHKERRQ(ierr);
1212   ierr = PLA_API_axpy_vector_to_global(m,&one,(void *)array,1,v_pla,lu->rstart);CHKERRQ(ierr);
1213   ierr = VecRestoreArray(b,&array);CHKERRQ(ierr);
1214   ierr = PLA_Obj_API_close(v_pla);CHKERRQ(ierr);
1215   ierr = PLA_API_end();CHKERRQ(ierr);
1216 
1217   if (A->factor == FACTOR_LU){
1218     /* Apply the permutations to the right hand sides */
1219     ierr = PLA_Apply_pivots_to_rows (v_pla,lu->pivots);CHKERRQ(ierr);
1220 
1221     /* Solve L y = b, overwriting b with y */
1222     ierr = PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_UNIT_DIAG,lu->A,v_pla );CHKERRQ(ierr);
1223 
1224     /* Solve U x = y (=b), overwriting b with x */
1225     ierr = PLA_Trsv( PLA_UPPER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla );CHKERRQ(ierr);
1226   } else { /* FACTOR_CHOLESKY */
1227     ierr = PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla);CHKERRQ(ierr);
1228     ierr = PLA_Trsv( PLA_LOWER_TRIANGULAR,(lu->datatype == MPI_DOUBLE ? PLA_TRANSPOSE : PLA_CONJUGATE_TRANSPOSE),PLA_NONUNIT_DIAG,lu->A,v_pla);CHKERRQ(ierr);
1229   }
1230 
1231   /* Copy PLAPACK x into Petsc vector x  */
1232   ierr = PLA_Obj_local_length(v_pla, &loc_m);CHKERRQ(ierr);
1233   ierr = PLA_Obj_local_buffer(v_pla, (void**)&loc_buf);CHKERRQ(ierr);
1234   ierr = PLA_Obj_local_stride(v_pla, &loc_stride);CHKERRQ(ierr);
1235   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,loc_m*loc_stride,loc_buf,&loc_x);CHKERRQ(ierr);
1236   if (!lu->pla_solved){
1237 
1238     ierr = PLA_Temp_comm_row_info(lu->templ,&dummy_comm,&r_rank,&r_nproc);CHKERRQ(ierr);
1239     ierr = PLA_Temp_comm_col_info(lu->templ,&dummy_comm,&c_rank,&c_nproc);CHKERRQ(ierr);
1240     /* printf(" [%d] rank: %d %d, nproc: %d %d\n",rank,r_rank,c_rank,r_nproc,c_nproc); */
1241 
1242     /* Create IS and cts for VecScatterring */
1243     ierr = PLA_Obj_local_length(v_pla, &loc_m);CHKERRQ(ierr);
1244     ierr = PLA_Obj_local_stride(v_pla, &loc_stride);CHKERRQ(ierr);
1245     ierr = PetscMalloc((2*loc_m+1)*sizeof(PetscInt),&idx_pla);CHKERRQ(ierr);
1246     idx_petsc = idx_pla + loc_m;
1247 
1248     rstart = (r_rank*c_nproc+c_rank)*lu->nb;
1249     for (i=0; i<loc_m; i+=lu->nb){
1250       j = 0;
1251       while (j < lu->nb && i+j < loc_m){
1252         idx_petsc[i+j] = rstart + j; j++;
1253       }
1254       rstart += size*lu->nb;
1255     }
1256 
1257     for (i=0; i<loc_m; i++) idx_pla[i] = i*loc_stride;
1258 
1259     ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_pla,&lu->is_pla);CHKERRQ(ierr);
1260     ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_petsc,&lu->is_petsc);CHKERRQ(ierr);
1261     ierr = PetscFree(idx_pla);CHKERRQ(ierr);
1262     ierr = VecScatterCreate(loc_x,lu->is_pla,x,lu->is_petsc,&lu->ctx);CHKERRQ(ierr);
1263   }
1264   ierr = VecScatterBegin(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1265   ierr = VecScatterEnd(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1266 
1267   /* Free data */
1268   ierr = VecDestroy(loc_x);CHKERRQ(ierr);
1269   ierr = PLA_Obj_free(&v_pla);CHKERRQ(ierr);
1270 
1271   lu->pla_solved = PETSC_TRUE;
1272   PetscFunctionReturn(0);
1273 }
1274 
1275 #undef __FUNCT__
1276 #define __FUNCT__ "MatLUFactorNumeric_MPIDense"
1277 PetscErrorCode MatLUFactorNumeric_MPIDense(Mat A,MatFactorInfo *info,Mat *F)
1278 {
1279   Mat_Plapack    *lu = (Mat_Plapack*)(*F)->spptr;
1280   PetscErrorCode ierr;
1281   PetscInt       info_pla;
1282 
1283   PetscFunctionBegin;
1284   ierr = PLA_Obj_set_to_zero(lu->A);CHKERRQ(ierr);
1285 
1286   /* Copy A into F->lu->A */
1287   ierr = MatMPIDenseCopyToPlapack(A,*F);CHKERRQ(ierr);
1288 
1289   /* Factor P A -> L U overwriting lower triangular portion of A with L, upper, U */
1290   info_pla = PLA_LU(lu->A,lu->pivots);
1291   if (info_pla) SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot encountered at row %d from PLA_LU()",info_pla);
1292 
1293   (*F)->assembled    = PETSC_TRUE;
1294   PetscFunctionReturn(0);
1295 }
1296 
1297 #undef __FUNCT__
1298 #define __FUNCT__ "MatCholeskyFactorNumeric_MPIDense"
1299 PetscErrorCode MatCholeskyFactorNumeric_MPIDense(Mat A,MatFactorInfo *info,Mat *F)
1300 {
1301   Mat_Plapack    *lu = (Mat_Plapack*)(*F)->spptr;
1302   PetscErrorCode ierr;
1303   PetscInt       info_pla;
1304 
1305   PetscFunctionBegin;
1306 
1307   /* Copy A into F->lu->A */
1308   ierr = MatMPIDenseCopyToPlapack(A,*F);CHKERRQ(ierr);
1309 
1310   /* Factor P A -> Chol */
1311   info_pla = PLA_Chol(PLA_LOWER_TRIANGULAR,lu->A);
1312   if (info_pla) SETERRQ1( PETSC_ERR_MAT_CH_ZRPVT,"Nonpositive definite matrix detected at row %d from PLA_Chol()",info_pla);
1313 
1314   (*F)->assembled = PETSC_TRUE;
1315   PetscFunctionReturn(0);
1316 }
1317 
1318 
1319 #undef __FUNCT__
1320 #define __FUNCT__ "MatFactorSymbolic_MPIDense_Private"
1321 PetscErrorCode MatFactorSymbolic_MPIDense_Private(Mat A,MatFactorInfo *info,Mat *F)
1322 {
1323   PetscErrorCode ierr;
1324 
1325   PetscFunctionBegin;
1326   /* Create the factorization matrix */
1327   ierr = MatCreate(((PetscObject)A)->comm,F);CHKERRQ(ierr);
1328   ierr = MatSetSizes(*F,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N);CHKERRQ(ierr);
1329   ierr = MatSetType(*F,((PetscObject)A)->type_name);CHKERRQ(ierr);
1330   ierr = MatMPIDenseCreatePlapack(*F);CHKERRQ(ierr);
1331   PetscFunctionReturn(0);
1332 }
1333 
1334 /* Note the Petsc r and c permutations are ignored */
1335 #undef __FUNCT__
1336 #define __FUNCT__ "MatLUFactorSymbolic_MPIDense"
1337 PetscErrorCode MatLUFactorSymbolic_MPIDense(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F)
1338 {
1339   PetscErrorCode ierr;
1340   PetscInt       M = A->rmap.N;
1341   Mat_Plapack    *lu;
1342 
1343   PetscFunctionBegin;
1344   ierr = MatFactorSymbolic_MPIDense_Private(A,info,F);CHKERRQ(ierr);
1345   lu = (Mat_Plapack*)(*F)->spptr;
1346   ierr = PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots);CHKERRQ(ierr);
1347   (*F)->factor = FACTOR_LU;
1348   PetscFunctionReturn(0);
1349 }
1350 
1351 /* Note the Petsc perm permutation is ignored */
1352 #undef __FUNCT__
1353 #define __FUNCT__ "MatCholeskyFactorSymbolic_MPIDense"
1354 PetscErrorCode MatCholeskyFactorSymbolic_MPIDense(Mat A,IS perm,MatFactorInfo *info,Mat *F)
1355 {
1356   PetscErrorCode ierr;
1357   PetscTruth     issymmetric,set;
1358 
1359   PetscFunctionBegin;
1360   ierr = MatIsSymmetricKnown(A,&set,&issymmetric); CHKERRQ(ierr);
1361   if (!set || !issymmetric) SETERRQ(PETSC_ERR_USER,"Matrix must be set as MAT_SYMMETRIC for CholeskyFactor()");
1362   ierr = MatFactorSymbolic_MPIDense_Private(A,info,F);CHKERRQ(ierr);
1363   (*F)->factor = FACTOR_CHOLESKY;
1364   PetscFunctionReturn(0);
1365 }
1366 #endif
1367 
1368 /* -------------------------------------------------------------------*/
1369 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
1370        MatGetRow_MPIDense,
1371        MatRestoreRow_MPIDense,
1372        MatMult_MPIDense,
1373 /* 4*/ MatMultAdd_MPIDense,
1374        MatMultTranspose_MPIDense,
1375        MatMultTransposeAdd_MPIDense,
1376 #if defined(PETSC_HAVE_PLAPACK)
1377        MatSolve_MPIDense,
1378 #else
1379        0,
1380 #endif
1381        0,
1382        0,
1383 /*10*/ 0,
1384        0,
1385        0,
1386        0,
1387        MatTranspose_MPIDense,
1388 /*15*/ MatGetInfo_MPIDense,
1389        MatEqual_MPIDense,
1390        MatGetDiagonal_MPIDense,
1391        MatDiagonalScale_MPIDense,
1392        MatNorm_MPIDense,
1393 /*20*/ MatAssemblyBegin_MPIDense,
1394        MatAssemblyEnd_MPIDense,
1395        0,
1396        MatSetOption_MPIDense,
1397        MatZeroEntries_MPIDense,
1398 /*25*/ MatZeroRows_MPIDense,
1399 #if defined(PETSC_HAVE_PLAPACK)
1400        MatLUFactorSymbolic_MPIDense,
1401        MatLUFactorNumeric_MPIDense,
1402        MatCholeskyFactorSymbolic_MPIDense,
1403        MatCholeskyFactorNumeric_MPIDense,
1404 #else
1405        0,
1406        0,
1407        0,
1408        0,
1409 #endif
1410 /*30*/ MatSetUpPreallocation_MPIDense,
1411        0,
1412        0,
1413        MatGetArray_MPIDense,
1414        MatRestoreArray_MPIDense,
1415 /*35*/ MatDuplicate_MPIDense,
1416        0,
1417        0,
1418        0,
1419        0,
1420 /*40*/ 0,
1421        MatGetSubMatrices_MPIDense,
1422        0,
1423        MatGetValues_MPIDense,
1424        0,
1425 /*45*/ 0,
1426        MatScale_MPIDense,
1427        0,
1428        0,
1429        0,
1430 /*50*/ 0,
1431        0,
1432        0,
1433        0,
1434        0,
1435 /*55*/ 0,
1436        0,
1437        0,
1438        0,
1439        0,
1440 /*60*/ MatGetSubMatrix_MPIDense,
1441        MatDestroy_MPIDense,
1442        MatView_MPIDense,
1443        0,
1444        0,
1445 /*65*/ 0,
1446        0,
1447        0,
1448        0,
1449        0,
1450 /*70*/ 0,
1451        0,
1452        0,
1453        0,
1454        0,
1455 /*75*/ 0,
1456        0,
1457        0,
1458        0,
1459        0,
1460 /*80*/ 0,
1461        0,
1462        0,
1463        0,
1464 /*84*/ MatLoad_MPIDense,
1465        0,
1466        0,
1467        0,
1468        0,
1469        0,
1470 /*90*/
1471 #if defined(PETSC_HAVE_PLAPACK)
1472        MatMatMult_MPIDense_MPIDense,
1473        MatMatMultSymbolic_MPIDense_MPIDense,
1474        MatMatMultNumeric_MPIDense_MPIDense,
1475 #else
1476        0,
1477        0,
1478        0,
1479 #endif
1480        0,
1481 /*95*/ 0,
1482        0,
1483        0,
1484        0};
1485 
1486 EXTERN_C_BEGIN
1487 #undef __FUNCT__
1488 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense"
1489 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1490 {
1491   Mat_MPIDense   *a;
1492   PetscErrorCode ierr;
1493 
1494   PetscFunctionBegin;
1495   mat->preallocated = PETSC_TRUE;
1496   /* Note:  For now, when data is specified above, this assumes the user correctly
1497    allocates the local dense storage space.  We should add error checking. */
1498 
1499   a    = (Mat_MPIDense*)mat->data;
1500   ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1501   ierr = MatSetSizes(a->A,mat->rmap.n,mat->cmap.N,mat->rmap.n,mat->cmap.N);CHKERRQ(ierr);
1502   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
1503   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1504   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1505   PetscFunctionReturn(0);
1506 }
1507 EXTERN_C_END
1508 
1509 EXTERN_C_BEGIN
1510 #if defined(PETSC_HAVE_PLAPACK)
1511 #undef __FUNCT__
1512 #define __FUNCT__ "MatSetSolverType_MPIDense_PLAPACK"
1513 PetscErrorCode PETSCMAT_DLLEXPORT MatSetSolverType_MPIDense_PLAPACK(Mat mat,const char *type)
1514 {
1515   PetscFunctionBegin;
1516   /* dummy function so that -mat_solver_type plapack or MatSetSolverType(mat,"plapack"); silently work (since PLAPACK is on by default) */
1517   PetscFunctionReturn(0);
1518 }
1519 #endif
1520 EXTERN_C_END
1521 
1522 /*MC
1523    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1524 
1525    Options Database Keys:
1526 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions()
1527 
1528   Level: beginner
1529 
1530   MATMPIDENSE matrices may use direct solvers (LU, Cholesky, and QR)
1531   for parallel dense matrices via the external package PLAPACK, if PLAPACK is installed
1532   (run config/configure.py with the option --download-plapack)
1533 
1534 
1535   Options Database Keys:
1536 . -mat_plapack_nprows <n> - number of rows in processor partition
1537 . -mat_plapack_npcols <n> - number of columns in processor partition
1538 . -mat_plapack_nb <n> - block size of template vector
1539 . -mat_plapack_nb_alg <n> - algorithmic block size
1540 - -mat_plapack_ckerror <n> - error checking flag
1541 
1542 .seealso: MatCreateMPIDense(), MATDENSE, MATSEQDENSE
1543 M*/
1544 
1545 EXTERN_C_BEGIN
1546 #undef __FUNCT__
1547 #define __FUNCT__ "MatCreate_MPIDense"
1548 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIDense(Mat mat)
1549 {
1550   Mat_MPIDense   *a;
1551   PetscErrorCode ierr;
1552 
1553   PetscFunctionBegin;
1554   ierr              = PetscNewLog(mat,Mat_MPIDense,&a);CHKERRQ(ierr);
1555   mat->data         = (void*)a;
1556   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1557   mat->factor       = 0;
1558   mat->mapping      = 0;
1559 
1560   mat->insertmode = NOT_SET_VALUES;
1561   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&a->rank);CHKERRQ(ierr);
1562   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&a->size);CHKERRQ(ierr);
1563 
1564   mat->rmap.bs = mat->cmap.bs = 1;
1565   ierr = PetscMapSetUp(&mat->rmap);CHKERRQ(ierr);
1566   ierr = PetscMapSetUp(&mat->cmap);CHKERRQ(ierr);
1567   a->nvec = mat->cmap.n;
1568 
1569   /* build cache for off array entries formed */
1570   a->donotstash = PETSC_FALSE;
1571   ierr = MatStashCreate_Private(((PetscObject)mat)->comm,1,&mat->stash);CHKERRQ(ierr);
1572 
1573   /* stuff used for matrix vector multiply */
1574   a->lvec        = 0;
1575   a->Mvctx       = 0;
1576   a->roworiented = PETSC_TRUE;
1577 
1578   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
1579                                      "MatGetDiagonalBlock_MPIDense",
1580                                      MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr);
1581   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C",
1582                                      "MatMPIDenseSetPreallocation_MPIDense",
1583                                      MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1584   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",
1585                                      "MatMatMult_MPIAIJ_MPIDense",
1586                                       MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1587   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",
1588                                      "MatMatMultSymbolic_MPIAIJ_MPIDense",
1589                                       MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1590   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",
1591                                      "MatMatMultNumeric_MPIAIJ_MPIDense",
1592                                       MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1593 #if defined(PETSC_HAVE_PLAPACK)
1594   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatSetSolverType_mpidense_plapack_C",
1595                                      "MatSetSolverType_MPIDense_PLAPACK",
1596                                       MatSetSolverType_MPIDense_PLAPACK);CHKERRQ(ierr);
1597 #endif
1598   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1599 
1600   PetscFunctionReturn(0);
1601 }
1602 EXTERN_C_END
1603 
1604 /*MC
1605    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1606 
1607    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1608    and MATMPIDENSE otherwise.
1609 
1610    Options Database Keys:
1611 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1612 
1613   Level: beginner
1614 
1615 
1616 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1617 M*/
1618 
1619 EXTERN_C_BEGIN
1620 #undef __FUNCT__
1621 #define __FUNCT__ "MatCreate_Dense"
1622 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_Dense(Mat A)
1623 {
1624   PetscErrorCode ierr;
1625   PetscMPIInt    size;
1626 
1627   PetscFunctionBegin;
1628   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
1629   if (size == 1) {
1630     ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr);
1631   } else {
1632     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
1633   }
1634   PetscFunctionReturn(0);
1635 }
1636 EXTERN_C_END
1637 
1638 #undef __FUNCT__
1639 #define __FUNCT__ "MatMPIDenseSetPreallocation"
1640 /*@C
1641    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1642 
1643    Not collective
1644 
1645    Input Parameters:
1646 .  A - the matrix
1647 -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
1648    to control all matrix memory allocation.
1649 
1650    Notes:
1651    The dense format is fully compatible with standard Fortran 77
1652    storage by columns.
1653 
1654    The data input variable is intended primarily for Fortran programmers
1655    who wish to allocate their own matrix memory space.  Most users should
1656    set data=PETSC_NULL.
1657 
1658    Level: intermediate
1659 
1660 .keywords: matrix,dense, parallel
1661 
1662 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1663 @*/
1664 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data)
1665 {
1666   PetscErrorCode ierr,(*f)(Mat,PetscScalar *);
1667 
1668   PetscFunctionBegin;
1669   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1670   if (f) {
1671     ierr = (*f)(mat,data);CHKERRQ(ierr);
1672   }
1673   PetscFunctionReturn(0);
1674 }
1675 
1676 #undef __FUNCT__
1677 #define __FUNCT__ "MatCreateMPIDense"
1678 /*@C
1679    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
1680 
1681    Collective on MPI_Comm
1682 
1683    Input Parameters:
1684 +  comm - MPI communicator
1685 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1686 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1687 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1688 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1689 -  data - optional location of matrix data.  Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1690    to control all matrix memory allocation.
1691 
1692    Output Parameter:
1693 .  A - the matrix
1694 
1695    Notes:
1696    The dense format is fully compatible with standard Fortran 77
1697    storage by columns.
1698 
1699    The data input variable is intended primarily for Fortran programmers
1700    who wish to allocate their own matrix memory space.  Most users should
1701    set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users).
1702 
1703    The user MUST specify either the local or global matrix dimensions
1704    (possibly both).
1705 
1706    Level: intermediate
1707 
1708 .keywords: matrix,dense, parallel
1709 
1710 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1711 @*/
1712 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1713 {
1714   PetscErrorCode ierr;
1715   PetscMPIInt    size;
1716 
1717   PetscFunctionBegin;
1718   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1719   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1720   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1721   if (size > 1) {
1722     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1723     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1724   } else {
1725     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1726     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1727   }
1728   PetscFunctionReturn(0);
1729 }
1730 
1731 #undef __FUNCT__
1732 #define __FUNCT__ "MatDuplicate_MPIDense"
1733 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1734 {
1735   Mat            mat;
1736   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
1737   PetscErrorCode ierr;
1738 
1739   PetscFunctionBegin;
1740   *newmat       = 0;
1741   ierr = MatCreate(((PetscObject)A)->comm,&mat);CHKERRQ(ierr);
1742   ierr = MatSetSizes(mat,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N);CHKERRQ(ierr);
1743   ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1744   a                 = (Mat_MPIDense*)mat->data;
1745   ierr              = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1746   mat->factor       = A->factor;
1747   mat->assembled    = PETSC_TRUE;
1748   mat->preallocated = PETSC_TRUE;
1749 
1750   mat->rmap.rstart     = A->rmap.rstart;
1751   mat->rmap.rend       = A->rmap.rend;
1752   a->size              = oldmat->size;
1753   a->rank              = oldmat->rank;
1754   mat->insertmode      = NOT_SET_VALUES;
1755   a->nvec              = oldmat->nvec;
1756   a->donotstash        = oldmat->donotstash;
1757 
1758   ierr = PetscMemcpy(mat->rmap.range,A->rmap.range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
1759   ierr = PetscMemcpy(mat->cmap.range,A->cmap.range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
1760   ierr = MatStashCreate_Private(((PetscObject)A)->comm,1,&mat->stash);CHKERRQ(ierr);
1761 
1762   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1763   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1764   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1765 
1766 #if defined(PETSC_HAVE_PLAPACK)
1767   ierr = PetscMemcpy(mat->spptr,A->spptr,sizeof(Mat_Plapack));CHKERRQ(ierr);
1768 #endif
1769   *newmat = mat;
1770   PetscFunctionReturn(0);
1771 }
1772 
1773 #include "petscsys.h"
1774 
1775 #undef __FUNCT__
1776 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile"
1777 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N, MatType type,Mat *newmat)
1778 {
1779   PetscErrorCode ierr;
1780   PetscMPIInt    rank,size;
1781   PetscInt       *rowners,i,m,nz,j;
1782   PetscScalar    *array,*vals,*vals_ptr;
1783   MPI_Status     status;
1784 
1785   PetscFunctionBegin;
1786   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1787   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1788 
1789   /* determine ownership of all rows */
1790   m          = M/size + ((M % size) > rank);
1791   ierr       = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1792   ierr       = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
1793   rowners[0] = 0;
1794   for (i=2; i<=size; i++) {
1795     rowners[i] += rowners[i-1];
1796   }
1797 
1798   ierr = MatCreate(comm,newmat);CHKERRQ(ierr);
1799   ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1800   ierr = MatSetType(*newmat,type);CHKERRQ(ierr);
1801   ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr);
1802   ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr);
1803 
1804   if (!rank) {
1805     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1806 
1807     /* read in my part of the matrix numerical values  */
1808     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1809 
1810     /* insert into matrix-by row (this is why cannot directly read into array */
1811     vals_ptr = vals;
1812     for (i=0; i<m; i++) {
1813       for (j=0; j<N; j++) {
1814         array[i + j*m] = *vals_ptr++;
1815       }
1816     }
1817 
1818     /* read in other processors and ship out */
1819     for (i=1; i<size; i++) {
1820       nz   = (rowners[i+1] - rowners[i])*N;
1821       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1822       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(*newmat))->tag,comm);CHKERRQ(ierr);
1823     }
1824   } else {
1825     /* receive numeric values */
1826     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1827 
1828     /* receive message of values*/
1829     ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(*newmat))->tag,comm,&status);CHKERRQ(ierr);
1830 
1831     /* insert into matrix-by row (this is why cannot directly read into array */
1832     vals_ptr = vals;
1833     for (i=0; i<m; i++) {
1834       for (j=0; j<N; j++) {
1835         array[i + j*m] = *vals_ptr++;
1836       }
1837     }
1838   }
1839   ierr = PetscFree(rowners);CHKERRQ(ierr);
1840   ierr = PetscFree(vals);CHKERRQ(ierr);
1841   ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1842   ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1843   PetscFunctionReturn(0);
1844 }
1845 
1846 #undef __FUNCT__
1847 #define __FUNCT__ "MatLoad_MPIDense"
1848 PetscErrorCode MatLoad_MPIDense(PetscViewer viewer, MatType type,Mat *newmat)
1849 {
1850   Mat            A;
1851   PetscScalar    *vals,*svals;
1852   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1853   MPI_Status     status;
1854   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz;
1855   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1856   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1857   PetscInt       i,nz,j,rstart,rend;
1858   int            fd;
1859   PetscErrorCode ierr;
1860 
1861   PetscFunctionBegin;
1862   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1863   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1864   if (!rank) {
1865     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1866     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1867     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1868   }
1869 
1870   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1871   M = header[1]; N = header[2]; nz = header[3];
1872 
1873   /*
1874        Handle case where matrix is stored on disk as a dense matrix
1875   */
1876   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1877     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,type,newmat);CHKERRQ(ierr);
1878     PetscFunctionReturn(0);
1879   }
1880 
1881   /* determine ownership of all rows */
1882   m          = PetscMPIIntCast(M/size + ((M % size) > rank));
1883   ierr       = PetscMalloc((size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr);
1884   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1885   rowners[0] = 0;
1886   for (i=2; i<=size; i++) {
1887     rowners[i] += rowners[i-1];
1888   }
1889   rstart = rowners[rank];
1890   rend   = rowners[rank+1];
1891 
1892   /* distribute row lengths to all processors */
1893   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&ourlens);CHKERRQ(ierr);
1894   offlens = ourlens + (rend-rstart);
1895   if (!rank) {
1896     ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
1897     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1898     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
1899     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1900     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1901     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1902   } else {
1903     ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1904   }
1905 
1906   if (!rank) {
1907     /* calculate the number of nonzeros on each processor */
1908     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
1909     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1910     for (i=0; i<size; i++) {
1911       for (j=rowners[i]; j< rowners[i+1]; j++) {
1912         procsnz[i] += rowlengths[j];
1913       }
1914     }
1915     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1916 
1917     /* determine max buffer needed and allocate it */
1918     maxnz = 0;
1919     for (i=0; i<size; i++) {
1920       maxnz = PetscMax(maxnz,procsnz[i]);
1921     }
1922     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1923 
1924     /* read in my part of the matrix column indices  */
1925     nz = procsnz[0];
1926     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1927     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1928 
1929     /* read in every one elses and ship off */
1930     for (i=1; i<size; i++) {
1931       nz   = procsnz[i];
1932       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1933       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1934     }
1935     ierr = PetscFree(cols);CHKERRQ(ierr);
1936   } else {
1937     /* determine buffer space needed for message */
1938     nz = 0;
1939     for (i=0; i<m; i++) {
1940       nz += ourlens[i];
1941     }
1942     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1943 
1944     /* receive message of column indices*/
1945     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1946     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1947     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1948   }
1949 
1950   /* loop over local rows, determining number of off diagonal entries */
1951   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
1952   jj = 0;
1953   for (i=0; i<m; i++) {
1954     for (j=0; j<ourlens[i]; j++) {
1955       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1956       jj++;
1957     }
1958   }
1959 
1960   /* create our matrix */
1961   for (i=0; i<m; i++) {
1962     ourlens[i] -= offlens[i];
1963   }
1964   ierr = MatCreate(comm,newmat);CHKERRQ(ierr);
1965   ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1966   ierr = MatSetType(*newmat,type);CHKERRQ(ierr);
1967   ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr);
1968   A = *newmat;
1969   for (i=0; i<m; i++) {
1970     ourlens[i] += offlens[i];
1971   }
1972 
1973   if (!rank) {
1974     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1975 
1976     /* read in my part of the matrix numerical values  */
1977     nz = procsnz[0];
1978     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1979 
1980     /* insert into matrix */
1981     jj      = rstart;
1982     smycols = mycols;
1983     svals   = vals;
1984     for (i=0; i<m; i++) {
1985       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1986       smycols += ourlens[i];
1987       svals   += ourlens[i];
1988       jj++;
1989     }
1990 
1991     /* read in other processors and ship out */
1992     for (i=1; i<size; i++) {
1993       nz   = procsnz[i];
1994       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1995       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
1996     }
1997     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1998   } else {
1999     /* receive numeric values */
2000     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2001 
2002     /* receive message of values*/
2003     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
2004     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2005     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2006 
2007     /* insert into matrix */
2008     jj      = rstart;
2009     smycols = mycols;
2010     svals   = vals;
2011     for (i=0; i<m; i++) {
2012       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2013       smycols += ourlens[i];
2014       svals   += ourlens[i];
2015       jj++;
2016     }
2017   }
2018   ierr = PetscFree(ourlens);CHKERRQ(ierr);
2019   ierr = PetscFree(vals);CHKERRQ(ierr);
2020   ierr = PetscFree(mycols);CHKERRQ(ierr);
2021   ierr = PetscFree(rowners);CHKERRQ(ierr);
2022 
2023   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2024   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2025   PetscFunctionReturn(0);
2026 }
2027 
2028 #undef __FUNCT__
2029 #define __FUNCT__ "MatEqual_MPIDense"
2030 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscTruth *flag)
2031 {
2032   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
2033   Mat            a,b;
2034   PetscTruth     flg;
2035   PetscErrorCode ierr;
2036 
2037   PetscFunctionBegin;
2038   a = matA->A;
2039   b = matB->A;
2040   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
2041   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
2042   PetscFunctionReturn(0);
2043 }
2044 
2045 #if defined(PETSC_HAVE_PLAPACK)
2046 
2047 #undef __FUNCT__
2048 #define __FUNCT__ "PetscPLAPACKFinalizePackage"
2049 /*@C
2050   PetscPLAPACKFinalizePackage - This function destroys everything in the Petsc interface to PLAPACK.
2051   Level: developer
2052 
2053 .keywords: Petsc, destroy, package, PLAPACK
2054 .seealso: PetscFinalize()
2055 @*/
2056 PetscErrorCode PETSC_DLLEXPORT PetscPLAPACKFinalizePackage(void)
2057 {
2058   PetscErrorCode ierr;
2059 
2060   PetscFunctionBegin;
2061   ierr = PLA_Finalize();CHKERRQ(ierr);
2062   PetscFunctionReturn(0);
2063 }
2064 
2065 #undef __FUNCT__
2066 #define __FUNCT__ "PetscPLAPACKInitializePackage"
2067 /*@C
2068   PetscPLAPACKInitializePackage - This function initializes everything in the Petsc interface to PLAPACK. It is
2069   called from PetscDLLibraryRegister() when using dynamic libraries, and on the call to PetscInitialize()
2070   when using static libraries.
2071 
2072   Input Parameter:
2073   path - The dynamic library path, or PETSC_NULL
2074 
2075   Level: developer
2076 
2077 .keywords: Petsc, initialize, package, PLAPACK
2078 .seealso: PetscInitializePackage(), PetscInitialize()
2079 @*/
2080 PetscErrorCode PETSC_DLLEXPORT PetscPLAPACKInitializePackage(const char path[])
2081 {
2082   MPI_Comm       comm = PETSC_COMM_WORLD;
2083   PetscMPIInt    size;
2084   PetscErrorCode ierr;
2085 
2086   PetscFunctionBegin;
2087   if (!PLA_Initialized(PETSC_NULL)) {
2088 
2089     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2090     Plapack_nprows = 1;
2091     Plapack_npcols = size;
2092 
2093     ierr = PetscOptionsBegin(comm,PETSC_NULL,"PLAPACK Options","Mat");CHKERRQ(ierr);
2094       ierr = PetscOptionsInt("-plapack_nprows","row dimension of 2D processor mesh","None",Plapack_nprows,&Plapack_nprows,PETSC_NULL);CHKERRQ(ierr);
2095       ierr = PetscOptionsInt("-plapack_npcols","column dimension of 2D processor mesh","None",Plapack_npcols,&Plapack_npcols,PETSC_NULL);CHKERRQ(ierr);
2096 #if defined(PETSC_USE_DEBUG)
2097       Plapack_ierror = 3;
2098 #else
2099       Plapack_ierror = 0;
2100 #endif
2101       ierr = PetscOptionsInt("-plapack_ckerror","error checking flag","None",Plapack_ierror,&Plapack_ierror,PETSC_NULL);CHKERRQ(ierr);
2102       if (Plapack_ierror){
2103 	ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_TRUE,PETSC_TRUE,PETSC_FALSE );CHKERRQ(ierr);
2104       } else {
2105 	ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_FALSE,PETSC_FALSE,PETSC_FALSE );CHKERRQ(ierr);
2106       }
2107 
2108       Plapack_nb_alg = 0;
2109       ierr = PetscOptionsInt("-plapack_nb_alg","algorithmic block size","None",Plapack_nb_alg,&Plapack_nb_alg,PETSC_NULL);CHKERRQ(ierr);
2110       if (Plapack_nb_alg) {
2111         ierr = pla_Environ_set_nb_alg (PLA_OP_ALL_ALG,Plapack_nb_alg);CHKERRQ(ierr);
2112       }
2113     PetscOptionsEnd();
2114 
2115     ierr = PLA_Comm_1D_to_2D(comm,Plapack_nprows,Plapack_npcols,&Plapack_comm_2d);CHKERRQ(ierr);
2116     ierr = PLA_Init(Plapack_comm_2d);CHKERRQ(ierr);
2117     ierr = PetscRegisterFinalize(PetscPLAPACKFinalizePackage);CHKERRQ(ierr);
2118   }
2119   PetscFunctionReturn(0);
2120 }
2121 
2122 
2123 #endif
2124