xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 28adb3f739167aae2a3fdf9bf501dbd0150de1de)
1 #define PETSCMAT_DLL
2 
3 /*
4    Basic functions for basic parallel dense matrices.
5 */
6 
7 
8 #include "../src/mat/impls/dense/mpi/mpidense.h"    /*I   "petscmat.h"  I*/
9 #if defined(PETSC_HAVE_PLAPACK)
10 static PetscMPIInt Plapack_nprows,Plapack_npcols,Plapack_ierror,Plapack_nb_alg;
11 static MPI_Comm Plapack_comm_2d;
12 #endif
13 
14 #undef __FUNCT__
15 #define __FUNCT__ "MatDenseGetLocalMatrix"
16 /*@
17 
18       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
19               matrix that represents the operator. For sequential matrices it returns itself.
20 
21     Input Parameter:
22 .      A - the Seq or MPI dense matrix
23 
24     Output Parameter:
25 .      B - the inner matrix
26 
27     Level: intermediate
28 
29 @*/
30 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
31 {
32   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
33   PetscErrorCode ierr;
34   PetscTruth     flg;
35 
36   PetscFunctionBegin;
37   ierr = PetscTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
38   if (flg) {
39     *B = mat->A;
40   } else {
41     *B = A;
42   }
43   PetscFunctionReturn(0);
44 }
45 
46 #undef __FUNCT__
47 #define __FUNCT__ "MatGetRow_MPIDense"
48 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
49 {
50   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
51   PetscErrorCode ierr;
52   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
53 
54   PetscFunctionBegin;
55   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows")
56   lrow = row - rstart;
57   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr);
58   PetscFunctionReturn(0);
59 }
60 
61 #undef __FUNCT__
62 #define __FUNCT__ "MatRestoreRow_MPIDense"
63 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
64 {
65   PetscErrorCode ierr;
66 
67   PetscFunctionBegin;
68   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
69   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
70   PetscFunctionReturn(0);
71 }
72 
73 EXTERN_C_BEGIN
74 #undef __FUNCT__
75 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense"
76 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B)
77 {
78   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
79   PetscErrorCode ierr;
80   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
81   PetscScalar    *array;
82   MPI_Comm       comm;
83 
84   PetscFunctionBegin;
85   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported.");
86 
87   /* The reuse aspect is not implemented efficiently */
88   if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);}
89 
90   ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
91   ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr);
92   ierr = MatCreate(comm,B);CHKERRQ(ierr);
93   ierr = MatSetSizes(*B,m,m,m,m);CHKERRQ(ierr);
94   ierr = MatSetType(*B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
95   ierr = MatSeqDenseSetPreallocation(*B,array+m*rstart);CHKERRQ(ierr);
96   ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr);
97   ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
98   ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
99 
100   *iscopy = PETSC_TRUE;
101   PetscFunctionReturn(0);
102 }
103 EXTERN_C_END
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatSetValues_MPIDense"
107 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
108 {
109   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
110   PetscErrorCode ierr;
111   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
112   PetscTruth     roworiented = A->roworiented;
113 
114   PetscFunctionBegin;
115   for (i=0; i<m; i++) {
116     if (idxm[i] < 0) continue;
117     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
118     if (idxm[i] >= rstart && idxm[i] < rend) {
119       row = idxm[i] - rstart;
120       if (roworiented) {
121         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
122       } else {
123         for (j=0; j<n; j++) {
124           if (idxn[j] < 0) continue;
125           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
126           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
127         }
128       }
129     } else {
130       if (!A->donotstash) {
131         if (roworiented) {
132           ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
133         } else {
134           ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
135         }
136       }
137     }
138   }
139   PetscFunctionReturn(0);
140 }
141 
142 #undef __FUNCT__
143 #define __FUNCT__ "MatGetValues_MPIDense"
144 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
145 {
146   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
147   PetscErrorCode ierr;
148   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
149 
150   PetscFunctionBegin;
151   for (i=0; i<m; i++) {
152     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
153     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
154     if (idxm[i] >= rstart && idxm[i] < rend) {
155       row = idxm[i] - rstart;
156       for (j=0; j<n; j++) {
157         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
158         if (idxn[j] >= mat->cmap->N) {
159           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
160         }
161         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
162       }
163     } else {
164       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
165     }
166   }
167   PetscFunctionReturn(0);
168 }
169 
170 #undef __FUNCT__
171 #define __FUNCT__ "MatGetArray_MPIDense"
172 PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[])
173 {
174   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
175   PetscErrorCode ierr;
176 
177   PetscFunctionBegin;
178   ierr = MatGetArray(a->A,array);CHKERRQ(ierr);
179   PetscFunctionReturn(0);
180 }
181 
182 #undef __FUNCT__
183 #define __FUNCT__ "MatGetSubMatrix_MPIDense"
184 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
185 {
186   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data,*newmatd;
187   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
188   PetscErrorCode ierr;
189   PetscInt       i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
190   const PetscInt *irow,*icol;
191   PetscScalar    *av,*bv,*v = lmat->v;
192   Mat            newmat;
193   IS             iscol_local;
194 
195   PetscFunctionBegin;
196   ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
197   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
198   ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr);
199   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
200   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
201   ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */
202 
203   /* No parallel redistribution currently supported! Should really check each index set
204      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
205      original matrix! */
206 
207   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
208   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
209 
210   /* Check submatrix call */
211   if (scall == MAT_REUSE_MATRIX) {
212     /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
213     /* Really need to test rows and column sizes! */
214     newmat = *B;
215   } else {
216     /* Create and fill new matrix */
217     ierr = MatCreate(((PetscObject)A)->comm,&newmat);CHKERRQ(ierr);
218     ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
219     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
220     ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr);
221   }
222 
223   /* Now extract the data pointers and do the copy, column at a time */
224   newmatd = (Mat_MPIDense*)newmat->data;
225   bv      = ((Mat_SeqDense *)newmatd->A->data)->v;
226 
227   for (i=0; i<Ncols; i++) {
228     av = v + ((Mat_SeqDense *)mat->A->data)->lda*icol[i];
229     for (j=0; j<nrows; j++) {
230       *bv++ = av[irow[j] - rstart];
231     }
232   }
233 
234   /* Assemble the matrices so that the correct flags are set */
235   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
236   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
237 
238   /* Free work space */
239   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
240   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
241   *B = newmat;
242   PetscFunctionReturn(0);
243 }
244 
245 #undef __FUNCT__
246 #define __FUNCT__ "MatRestoreArray_MPIDense"
247 PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[])
248 {
249   PetscFunctionBegin;
250   PetscFunctionReturn(0);
251 }
252 
253 #undef __FUNCT__
254 #define __FUNCT__ "MatAssemblyBegin_MPIDense"
255 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
256 {
257   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
258   MPI_Comm       comm = ((PetscObject)mat)->comm;
259   PetscErrorCode ierr;
260   PetscInt       nstash,reallocs;
261   InsertMode     addv;
262 
263   PetscFunctionBegin;
264   /* make sure all processors are either in INSERTMODE or ADDMODE */
265   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
266   if (addv == (ADD_VALUES|INSERT_VALUES)) {
267     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
268   }
269   mat->insertmode = addv; /* in case this processor had no cache */
270 
271   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
272   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
273   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
274   PetscFunctionReturn(0);
275 }
276 
277 #undef __FUNCT__
278 #define __FUNCT__ "MatAssemblyEnd_MPIDense"
279 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
280 {
281   Mat_MPIDense    *mdn=(Mat_MPIDense*)mat->data;
282   PetscErrorCode  ierr;
283   PetscInt        i,*row,*col,flg,j,rstart,ncols;
284   PetscMPIInt     n;
285   PetscScalar     *val;
286   InsertMode      addv=mat->insertmode;
287 
288   PetscFunctionBegin;
289   /*  wait on receives */
290   while (1) {
291     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
292     if (!flg) break;
293 
294     for (i=0; i<n;) {
295       /* Now identify the consecutive vals belonging to the same row */
296       for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
297       if (j < n) ncols = j-i;
298       else       ncols = n-i;
299       /* Now assemble all these values with a single function call */
300       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
301       i = j;
302     }
303   }
304   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
305 
306   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
307   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
308 
309   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
310     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
311   }
312   PetscFunctionReturn(0);
313 }
314 
315 #undef __FUNCT__
316 #define __FUNCT__ "MatZeroEntries_MPIDense"
317 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
318 {
319   PetscErrorCode ierr;
320   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
321 
322   PetscFunctionBegin;
323   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
324   PetscFunctionReturn(0);
325 }
326 
327 /* the code does not do the diagonal entries correctly unless the
328    matrix is square and the column and row owerships are identical.
329    This is a BUG. The only way to fix it seems to be to access
330    mdn->A and mdn->B directly and not through the MatZeroRows()
331    routine.
332 */
333 #undef __FUNCT__
334 #define __FUNCT__ "MatZeroRows_MPIDense"
335 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
336 {
337   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
338   PetscErrorCode ierr;
339   PetscInt       i,*owners = A->rmap->range;
340   PetscInt       *nprocs,j,idx,nsends;
341   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
342   PetscInt       *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source;
343   PetscInt       *lens,*lrows,*values;
344   PetscMPIInt    n,imdex,rank = l->rank,size = l->size;
345   MPI_Comm       comm = ((PetscObject)A)->comm;
346   MPI_Request    *send_waits,*recv_waits;
347   MPI_Status     recv_status,*send_status;
348   PetscTruth     found;
349 
350   PetscFunctionBegin;
351   /*  first count number of contributors to each processor */
352   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
353   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
354   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
355   for (i=0; i<N; i++) {
356     idx = rows[i];
357     found = PETSC_FALSE;
358     for (j=0; j<size; j++) {
359       if (idx >= owners[j] && idx < owners[j+1]) {
360         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
361       }
362     }
363     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
364   }
365   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
366 
367   /* inform other processors of number of messages and max length*/
368   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
369 
370   /* post receives:   */
371   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
372   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
373   for (i=0; i<nrecvs; i++) {
374     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
375   }
376 
377   /* do sends:
378       1) starts[i] gives the starting index in svalues for stuff going to
379          the ith processor
380   */
381   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
382   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
383   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
384   starts[0]  = 0;
385   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
386   for (i=0; i<N; i++) {
387     svalues[starts[owner[i]]++] = rows[i];
388   }
389 
390   starts[0] = 0;
391   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
392   count = 0;
393   for (i=0; i<size; i++) {
394     if (nprocs[2*i+1]) {
395       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
396     }
397   }
398   ierr = PetscFree(starts);CHKERRQ(ierr);
399 
400   base = owners[rank];
401 
402   /*  wait on receives */
403   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
404   source = lens + nrecvs;
405   count  = nrecvs; slen = 0;
406   while (count) {
407     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
408     /* unpack receives into our local space */
409     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
410     source[imdex]  = recv_status.MPI_SOURCE;
411     lens[imdex]    = n;
412     slen += n;
413     count--;
414   }
415   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
416 
417   /* move the data into the send scatter */
418   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
419   count = 0;
420   for (i=0; i<nrecvs; i++) {
421     values = rvalues + i*nmax;
422     for (j=0; j<lens[i]; j++) {
423       lrows[count++] = values[j] - base;
424     }
425   }
426   ierr = PetscFree(rvalues);CHKERRQ(ierr);
427   ierr = PetscFree(lens);CHKERRQ(ierr);
428   ierr = PetscFree(owner);CHKERRQ(ierr);
429   ierr = PetscFree(nprocs);CHKERRQ(ierr);
430 
431   /* actually zap the local rows */
432   ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
433   ierr = PetscFree(lrows);CHKERRQ(ierr);
434 
435   /* wait on sends */
436   if (nsends) {
437     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
438     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
439     ierr = PetscFree(send_status);CHKERRQ(ierr);
440   }
441   ierr = PetscFree(send_waits);CHKERRQ(ierr);
442   ierr = PetscFree(svalues);CHKERRQ(ierr);
443 
444   PetscFunctionReturn(0);
445 }
446 
447 #undef __FUNCT__
448 #define __FUNCT__ "MatMult_MPIDense"
449 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
450 {
451   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
452   PetscErrorCode ierr;
453 
454   PetscFunctionBegin;
455   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
456   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
457   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
458   PetscFunctionReturn(0);
459 }
460 
461 #undef __FUNCT__
462 #define __FUNCT__ "MatMultAdd_MPIDense"
463 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
464 {
465   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
466   PetscErrorCode ierr;
467 
468   PetscFunctionBegin;
469   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
470   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
471   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
472   PetscFunctionReturn(0);
473 }
474 
475 #undef __FUNCT__
476 #define __FUNCT__ "MatMultTranspose_MPIDense"
477 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
478 {
479   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
480   PetscErrorCode ierr;
481   PetscScalar    zero = 0.0;
482 
483   PetscFunctionBegin;
484   ierr = VecSet(yy,zero);CHKERRQ(ierr);
485   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
486   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
487   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
488   PetscFunctionReturn(0);
489 }
490 
491 #undef __FUNCT__
492 #define __FUNCT__ "MatMultTransposeAdd_MPIDense"
493 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
494 {
495   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
496   PetscErrorCode ierr;
497 
498   PetscFunctionBegin;
499   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
500   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
501   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
502   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
503   PetscFunctionReturn(0);
504 }
505 
506 #undef __FUNCT__
507 #define __FUNCT__ "MatGetDiagonal_MPIDense"
508 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
509 {
510   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
511   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
512   PetscErrorCode ierr;
513   PetscInt       len,i,n,m = A->rmap->n,radd;
514   PetscScalar    *x,zero = 0.0;
515 
516   PetscFunctionBegin;
517   ierr = VecSet(v,zero);CHKERRQ(ierr);
518   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
519   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
520   if (n != A->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
521   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
522   radd = A->rmap->rstart*m;
523   for (i=0; i<len; i++) {
524     x[i] = aloc->v[radd + i*m + i];
525   }
526   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
527   PetscFunctionReturn(0);
528 }
529 
530 #undef __FUNCT__
531 #define __FUNCT__ "MatDestroy_MPIDense"
532 PetscErrorCode MatDestroy_MPIDense(Mat mat)
533 {
534   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
535   PetscErrorCode ierr;
536 #if defined(PETSC_HAVE_PLAPACK)
537   Mat_Plapack   *lu=(Mat_Plapack*)(mat->spptr);
538 #endif
539 
540   PetscFunctionBegin;
541 
542 #if defined(PETSC_USE_LOG)
543   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
544 #endif
545   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
546   ierr = MatDestroy(mdn->A);CHKERRQ(ierr);
547   if (mdn->lvec)   {ierr = VecDestroy(mdn->lvec);CHKERRQ(ierr);}
548   if (mdn->Mvctx)  {ierr = VecScatterDestroy(mdn->Mvctx);CHKERRQ(ierr);}
549 #if defined(PETSC_HAVE_PLAPACK)
550   if (lu) {
551     ierr = PLA_Obj_free(&lu->A);CHKERRQ(ierr);
552     ierr = PLA_Obj_free (&lu->pivots);CHKERRQ(ierr);
553     ierr = PLA_Temp_free(&lu->templ);CHKERRQ(ierr);
554 
555     if (lu->is_pla) {
556       ierr = ISDestroy(lu->is_pla);CHKERRQ(ierr);
557       ierr = ISDestroy(lu->is_petsc);CHKERRQ(ierr);
558       ierr = VecScatterDestroy(lu->ctx);CHKERRQ(ierr);
559     }
560   }
561 #endif
562 
563   ierr = PetscFree(mdn);CHKERRQ(ierr);
564   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
565   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
566   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
567   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
568   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
569   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
570   PetscFunctionReturn(0);
571 }
572 
573 #undef __FUNCT__
574 #define __FUNCT__ "MatView_MPIDense_Binary"
575 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
576 {
577   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
578   PetscErrorCode    ierr;
579   PetscViewerFormat format;
580   int               fd;
581   PetscInt          header[4],mmax,N = mat->cmap->N,i,j,m,k;
582   PetscMPIInt       rank,tag  = ((PetscObject)viewer)->tag,size;
583   PetscScalar       *work,*v,*vv;
584   Mat_SeqDense      *a = (Mat_SeqDense*)mdn->A->data;
585   MPI_Status        status;
586 
587   PetscFunctionBegin;
588   if (mdn->size == 1) {
589     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
590   } else {
591     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
592     ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
593     ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
594 
595     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
596     if (format == PETSC_VIEWER_NATIVE) {
597 
598       if (!rank) {
599         /* store the matrix as a dense matrix */
600         header[0] = MAT_FILE_COOKIE;
601         header[1] = mat->rmap->N;
602         header[2] = N;
603         header[3] = MATRIX_BINARY_FORMAT_DENSE;
604         ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
605 
606         /* get largest work array needed for transposing array */
607         mmax = mat->rmap->n;
608         for (i=1; i<size; i++) {
609           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
610         }
611         ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&work);CHKERRQ(ierr);
612 
613         /* write out local array, by rows */
614         m    = mat->rmap->n;
615         v    = a->v;
616         for (j=0; j<N; j++) {
617           for (i=0; i<m; i++) {
618             work[j + i*N] = *v++;
619           }
620         }
621         ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
622         /* get largest work array to receive messages from other processes, excludes process zero */
623         mmax = 0;
624         for (i=1; i<size; i++) {
625           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
626         }
627         ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&vv);CHKERRQ(ierr);
628         for(k = 1; k < size; k++) {
629           v    = vv;
630           m    = mat->rmap->range[k+1] - mat->rmap->range[k];
631           ierr = MPI_Recv(v,m*N,MPIU_SCALAR,k,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
632 
633           for(j = 0; j < N; j++) {
634             for(i = 0; i < m; i++) {
635               work[j + i*N] = *v++;
636             }
637           }
638           ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
639         }
640         ierr = PetscFree(work);CHKERRQ(ierr);
641         ierr = PetscFree(vv);CHKERRQ(ierr);
642       } else {
643         ierr = MPI_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
644       }
645     } else {
646       SETERRQ(PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerSetFormat(viewer,PETSC_VIEWER_NATIVE");
647     }
648   }
649   PetscFunctionReturn(0);
650 }
651 
652 #undef __FUNCT__
653 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket"
654 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
655 {
656   Mat_MPIDense          *mdn = (Mat_MPIDense*)mat->data;
657   PetscErrorCode        ierr;
658   PetscMPIInt           size = mdn->size,rank = mdn->rank;
659   const PetscViewerType vtype;
660   PetscTruth            iascii,isdraw;
661   PetscViewer           sviewer;
662   PetscViewerFormat     format;
663 #if defined(PETSC_HAVE_PLAPACK)
664   Mat_Plapack           *lu;
665 #endif
666 
667   PetscFunctionBegin;
668   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
669   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
670   if (iascii) {
671     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
672     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
673     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
674       MatInfo info;
675       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
676       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,
677                    (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
678       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
679 #if defined(PETSC_HAVE_PLAPACK)
680       ierr = PetscViewerASCIIPrintf(viewer,"PLAPACK run parameters:\n");CHKERRQ(ierr);
681       ierr = PetscViewerASCIIPrintf(viewer,"  Processor mesh: nprows %d, npcols %d\n",Plapack_nprows, Plapack_npcols);CHKERRQ(ierr);
682       ierr = PetscViewerASCIIPrintf(viewer,"  Error checking: %d\n",Plapack_ierror);CHKERRQ(ierr);
683       ierr = PetscViewerASCIIPrintf(viewer,"  Algorithmic block size: %d\n",Plapack_nb_alg);CHKERRQ(ierr);
684       if (mat->factor){
685         lu=(Mat_Plapack*)(mat->spptr);
686         ierr = PetscViewerASCIIPrintf(viewer,"  Distr. block size nb: %d \n",lu->nb);CHKERRQ(ierr);
687       }
688 #else
689       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
690 #endif
691       PetscFunctionReturn(0);
692     } else if (format == PETSC_VIEWER_ASCII_INFO) {
693       PetscFunctionReturn(0);
694     }
695   } else if (isdraw) {
696     PetscDraw  draw;
697     PetscTruth isnull;
698 
699     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
700     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
701     if (isnull) PetscFunctionReturn(0);
702   }
703 
704   if (size == 1) {
705     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
706   } else {
707     /* assemble the entire matrix onto first processor. */
708     Mat         A;
709     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
710     PetscInt    *cols;
711     PetscScalar *vals;
712 
713     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
714     if (!rank) {
715       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
716     } else {
717       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
718     }
719     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
720     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
721     ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL);
722     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
723 
724     /* Copy the matrix ... This isn't the most efficient means,
725        but it's quick for now */
726     A->insertmode = INSERT_VALUES;
727     row = mat->rmap->rstart; m = mdn->A->rmap->n;
728     for (i=0; i<m; i++) {
729       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
730       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
731       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
732       row++;
733     }
734 
735     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
736     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
737     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
738     if (!rank) {
739       ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
740     }
741     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
742     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
743     ierr = MatDestroy(A);CHKERRQ(ierr);
744   }
745   PetscFunctionReturn(0);
746 }
747 
748 #undef __FUNCT__
749 #define __FUNCT__ "MatView_MPIDense"
750 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
751 {
752   PetscErrorCode ierr;
753   PetscTruth     iascii,isbinary,isdraw,issocket;
754 
755   PetscFunctionBegin;
756 
757   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
758   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
759   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
760   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
761 
762   if (iascii || issocket || isdraw) {
763     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
764   } else if (isbinary) {
765     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
766   } else {
767     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name);
768   }
769   PetscFunctionReturn(0);
770 }
771 
772 #undef __FUNCT__
773 #define __FUNCT__ "MatGetInfo_MPIDense"
774 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
775 {
776   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
777   Mat            mdn = mat->A;
778   PetscErrorCode ierr;
779   PetscReal      isend[5],irecv[5];
780 
781   PetscFunctionBegin;
782   info->block_size     = 1.0;
783   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
784   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
785   isend[3] = info->memory;  isend[4] = info->mallocs;
786   if (flag == MAT_LOCAL) {
787     info->nz_used      = isend[0];
788     info->nz_allocated = isend[1];
789     info->nz_unneeded  = isend[2];
790     info->memory       = isend[3];
791     info->mallocs      = isend[4];
792   } else if (flag == MAT_GLOBAL_MAX) {
793     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr);
794     info->nz_used      = irecv[0];
795     info->nz_allocated = irecv[1];
796     info->nz_unneeded  = irecv[2];
797     info->memory       = irecv[3];
798     info->mallocs      = irecv[4];
799   } else if (flag == MAT_GLOBAL_SUM) {
800     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
801     info->nz_used      = irecv[0];
802     info->nz_allocated = irecv[1];
803     info->nz_unneeded  = irecv[2];
804     info->memory       = irecv[3];
805     info->mallocs      = irecv[4];
806   }
807   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
808   info->fill_ratio_needed = 0;
809   info->factor_mallocs    = 0;
810   PetscFunctionReturn(0);
811 }
812 
813 #undef __FUNCT__
814 #define __FUNCT__ "MatSetOption_MPIDense"
815 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscTruth flg)
816 {
817   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
818   PetscErrorCode ierr;
819 
820   PetscFunctionBegin;
821   switch (op) {
822   case MAT_NEW_NONZERO_LOCATIONS:
823   case MAT_NEW_NONZERO_LOCATION_ERR:
824   case MAT_NEW_NONZERO_ALLOCATION_ERR:
825     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
826     break;
827   case MAT_ROW_ORIENTED:
828     a->roworiented = flg;
829     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
830     break;
831   case MAT_NEW_DIAGONALS:
832   case MAT_USE_HASH_TABLE:
833     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
834     break;
835   case MAT_IGNORE_OFF_PROC_ENTRIES:
836     a->donotstash = flg;
837     break;
838   case MAT_SYMMETRIC:
839   case MAT_STRUCTURALLY_SYMMETRIC:
840   case MAT_HERMITIAN:
841   case MAT_SYMMETRY_ETERNAL:
842   case MAT_IGNORE_LOWER_TRIANGULAR:
843     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
844     break;
845   default:
846     SETERRQ1(PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
847   }
848   PetscFunctionReturn(0);
849 }
850 
851 
852 #undef __FUNCT__
853 #define __FUNCT__ "MatDiagonalScale_MPIDense"
854 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
855 {
856   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
857   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
858   PetscScalar    *l,*r,x,*v;
859   PetscErrorCode ierr;
860   PetscInt       i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n;
861 
862   PetscFunctionBegin;
863   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
864   if (ll) {
865     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
866     if (s2a != s2) SETERRQ2(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
867     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
868     for (i=0; i<m; i++) {
869       x = l[i];
870       v = mat->v + i;
871       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
872     }
873     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
874     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
875   }
876   if (rr) {
877     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
878     if (s3a != s3) SETERRQ2(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
879     ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
880     ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
881     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
882     for (i=0; i<n; i++) {
883       x = r[i];
884       v = mat->v + i*m;
885       for (j=0; j<m; j++) { (*v++) *= x;}
886     }
887     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
888     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
889   }
890   PetscFunctionReturn(0);
891 }
892 
893 #undef __FUNCT__
894 #define __FUNCT__ "MatNorm_MPIDense"
895 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
896 {
897   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
898   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
899   PetscErrorCode ierr;
900   PetscInt       i,j;
901   PetscReal      sum = 0.0;
902   PetscScalar    *v = mat->v;
903 
904   PetscFunctionBegin;
905   if (mdn->size == 1) {
906     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
907   } else {
908     if (type == NORM_FROBENIUS) {
909       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
910 #if defined(PETSC_USE_COMPLEX)
911         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
912 #else
913         sum += (*v)*(*v); v++;
914 #endif
915       }
916       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
917       *nrm = sqrt(*nrm);
918       ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr);
919     } else if (type == NORM_1) {
920       PetscReal *tmp,*tmp2;
921       ierr = PetscMalloc(2*A->cmap->N*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
922       tmp2 = tmp + A->cmap->N;
923       ierr = PetscMemzero(tmp,2*A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
924       *nrm = 0.0;
925       v = mat->v;
926       for (j=0; j<mdn->A->cmap->n; j++) {
927         for (i=0; i<mdn->A->rmap->n; i++) {
928           tmp[j] += PetscAbsScalar(*v);  v++;
929         }
930       }
931       ierr = MPI_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
932       for (j=0; j<A->cmap->N; j++) {
933         if (tmp2[j] > *nrm) *nrm = tmp2[j];
934       }
935       ierr = PetscFree(tmp);CHKERRQ(ierr);
936       ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr);
937     } else if (type == NORM_INFINITY) { /* max row norm */
938       PetscReal ntemp;
939       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
940       ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr);
941     } else {
942       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
943     }
944   }
945   PetscFunctionReturn(0);
946 }
947 
948 #undef __FUNCT__
949 #define __FUNCT__ "MatTranspose_MPIDense"
950 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
951 {
952   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
953   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
954   Mat            B;
955   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
956   PetscErrorCode ierr;
957   PetscInt       j,i;
958   PetscScalar    *v;
959 
960   PetscFunctionBegin;
961   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place");
962   if (reuse == MAT_INITIAL_MATRIX || A == *matout) {
963     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
964     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
965     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
966     ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr);
967   } else {
968     B = *matout;
969   }
970 
971   m = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v;
972   ierr = PetscMalloc(m*sizeof(PetscInt),&rwork);CHKERRQ(ierr);
973   for (i=0; i<m; i++) rwork[i] = rstart + i;
974   for (j=0; j<n; j++) {
975     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
976     v   += m;
977   }
978   ierr = PetscFree(rwork);CHKERRQ(ierr);
979   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
980   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
981   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
982     *matout = B;
983   } else {
984     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
985   }
986   PetscFunctionReturn(0);
987 }
988 
989 #include "petscblaslapack.h"
990 #undef __FUNCT__
991 #define __FUNCT__ "MatScale_MPIDense"
992 PetscErrorCode MatScale_MPIDense(Mat inA,PetscScalar alpha)
993 {
994   Mat_MPIDense   *A = (Mat_MPIDense*)inA->data;
995   Mat_SeqDense   *a = (Mat_SeqDense*)A->A->data;
996   PetscScalar    oalpha = alpha;
997   PetscErrorCode ierr;
998   PetscBLASInt   one = 1,nz = PetscBLASIntCast(inA->rmap->n*inA->cmap->N);
999 
1000   PetscFunctionBegin;
1001   BLASscal_(&nz,&oalpha,a->v,&one);
1002   ierr = PetscLogFlops(nz);CHKERRQ(ierr);
1003   PetscFunctionReturn(0);
1004 }
1005 
1006 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
1007 
1008 #undef __FUNCT__
1009 #define __FUNCT__ "MatSetUpPreallocation_MPIDense"
1010 PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A)
1011 {
1012   PetscErrorCode ierr;
1013 
1014   PetscFunctionBegin;
1015   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
1016   PetscFunctionReturn(0);
1017 }
1018 
1019 #if defined(PETSC_HAVE_PLAPACK)
1020 
1021 #undef __FUNCT__
1022 #define __FUNCT__ "MatMPIDenseCopyToPlapack"
1023 PetscErrorCode MatMPIDenseCopyToPlapack(Mat A,Mat F)
1024 {
1025   Mat_Plapack    *lu = (Mat_Plapack*)(F)->spptr;
1026   PetscErrorCode ierr;
1027   PetscInt       M=A->cmap->N,m=A->rmap->n,rstart;
1028   PetscScalar    *array;
1029   PetscReal      one = 1.0;
1030 
1031   PetscFunctionBegin;
1032   /* Copy A into F->lu->A */
1033   ierr = PLA_Obj_set_to_zero(lu->A);CHKERRQ(ierr);
1034   ierr = PLA_API_begin();CHKERRQ(ierr);
1035   ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr);
1036   ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr);
1037   ierr = MatGetArray(A,&array);CHKERRQ(ierr);
1038   ierr = PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);CHKERRQ(ierr);
1039   ierr = MatRestoreArray(A,&array);CHKERRQ(ierr);
1040   ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr);
1041   ierr = PLA_API_end();CHKERRQ(ierr);
1042   lu->rstart = rstart;
1043   PetscFunctionReturn(0);
1044 }
1045 
1046 #undef __FUNCT__
1047 #define __FUNCT__ "MatMPIDenseCopyFromPlapack"
1048 PetscErrorCode MatMPIDenseCopyFromPlapack(Mat F,Mat A)
1049 {
1050   Mat_Plapack    *lu = (Mat_Plapack*)(F)->spptr;
1051   PetscErrorCode ierr;
1052   PetscInt       M=A->cmap->N,m=A->rmap->n,rstart;
1053   PetscScalar    *array;
1054   PetscReal      one = 1.0;
1055 
1056   PetscFunctionBegin;
1057   /* Copy F into A->lu->A */
1058   ierr = MatZeroEntries(A);CHKERRQ(ierr);
1059   ierr = PLA_API_begin();CHKERRQ(ierr);
1060   ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr);
1061   ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr);
1062   ierr = MatGetArray(A,&array);CHKERRQ(ierr);
1063   ierr = PLA_API_axpy_global_to_matrix(m,M, &one,lu->A,rstart,0,(void *)array,m);CHKERRQ(ierr);
1064   ierr = MatRestoreArray(A,&array);CHKERRQ(ierr);
1065   ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr);
1066   ierr = PLA_API_end();CHKERRQ(ierr);
1067   lu->rstart = rstart;
1068   PetscFunctionReturn(0);
1069 }
1070 
1071 #undef __FUNCT__
1072 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIDense"
1073 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1074 {
1075   PetscErrorCode ierr;
1076   Mat_Plapack    *luA = (Mat_Plapack*)A->spptr;
1077   Mat_Plapack    *luB = (Mat_Plapack*)B->spptr;
1078   Mat_Plapack    *luC = (Mat_Plapack*)C->spptr;
1079   PLA_Obj        alpha = NULL,beta = NULL;
1080 
1081   PetscFunctionBegin;
1082   ierr = MatMPIDenseCopyToPlapack(A,A);CHKERRQ(ierr);
1083   ierr = MatMPIDenseCopyToPlapack(B,B);CHKERRQ(ierr);
1084 
1085   /*
1086   ierr = PLA_Global_show("A = ",luA->A,"%g ","");CHKERRQ(ierr);
1087   ierr = PLA_Global_show("B = ",luB->A,"%g ","");CHKERRQ(ierr);
1088   */
1089 
1090   /* do the multiply in PLA  */
1091   ierr = PLA_Create_constants_conf_to(luA->A,NULL,NULL,&alpha);CHKERRQ(ierr);
1092   ierr = PLA_Create_constants_conf_to(luC->A,NULL,&beta,NULL);CHKERRQ(ierr);
1093   CHKMEMQ;
1094 
1095   ierr = PLA_Gemm(PLA_NO_TRANSPOSE,PLA_NO_TRANSPOSE,alpha,luA->A,luB->A,beta,luC->A); /* CHKERRQ(ierr); */
1096   CHKMEMQ;
1097   ierr = PLA_Obj_free(&alpha);CHKERRQ(ierr);
1098   ierr = PLA_Obj_free(&beta);CHKERRQ(ierr);
1099 
1100   /*
1101   ierr = PLA_Global_show("C = ",luC->A,"%g ","");CHKERRQ(ierr);
1102   */
1103   ierr = MatMPIDenseCopyFromPlapack(C,C);CHKERRQ(ierr);
1104   PetscFunctionReturn(0);
1105 }
1106 
1107 #undef __FUNCT__
1108 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIDense"
1109 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1110 {
1111   PetscErrorCode ierr;
1112   PetscInt       m=A->rmap->n,n=B->cmap->n;
1113   Mat            Cmat;
1114 
1115   PetscFunctionBegin;
1116   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
1117   SETERRQ(PETSC_ERR_LIB,"Due to aparent bugs in PLAPACK,this is not currently supported");
1118   ierr = MatCreate(((PetscObject)B)->comm,&Cmat);CHKERRQ(ierr);
1119   ierr = MatSetSizes(Cmat,m,n,A->rmap->N,B->cmap->N);CHKERRQ(ierr);
1120   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
1121   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1122   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1123 
1124   *C = Cmat;
1125   PetscFunctionReturn(0);
1126 }
1127 
1128 #undef __FUNCT__
1129 #define __FUNCT__ "MatMatMult_MPIDense_MPIDense"
1130 PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
1131 {
1132   PetscErrorCode ierr;
1133 
1134   PetscFunctionBegin;
1135   if (scall == MAT_INITIAL_MATRIX){
1136     ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr);
1137   }
1138   ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr);
1139   PetscFunctionReturn(0);
1140 }
1141 
1142 #undef __FUNCT__
1143 #define __FUNCT__ "MatSolve_MPIDense"
1144 PetscErrorCode MatSolve_MPIDense(Mat A,Vec b,Vec x)
1145 {
1146   MPI_Comm       comm = ((PetscObject)A)->comm;
1147   Mat_Plapack    *lu = (Mat_Plapack*)A->spptr;
1148   PetscErrorCode ierr;
1149   PetscInt       M=A->rmap->N,m=A->rmap->n,rstart,i,j,*idx_pla,*idx_petsc,loc_m,loc_stride;
1150   PetscScalar    *array;
1151   PetscReal      one = 1.0;
1152   PetscMPIInt    size,rank,r_rank,r_nproc,c_rank,c_nproc;;
1153   PLA_Obj        v_pla = NULL;
1154   PetscScalar    *loc_buf;
1155   Vec            loc_x;
1156 
1157   PetscFunctionBegin;
1158   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1159   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1160 
1161   /* Create PLAPACK vector objects, then copy b into PLAPACK b */
1162   PLA_Mvector_create(lu->datatype,M,1,lu->templ,PLA_ALIGN_FIRST,&v_pla);
1163   PLA_Obj_set_to_zero(v_pla);
1164 
1165   /* Copy b into rhs_pla */
1166   PLA_API_begin();
1167   PLA_Obj_API_open(v_pla);
1168   ierr = VecGetArray(b,&array);CHKERRQ(ierr);
1169   PLA_API_axpy_vector_to_global(m,&one,(void *)array,1,v_pla,lu->rstart);
1170   ierr = VecRestoreArray(b,&array);CHKERRQ(ierr);
1171   PLA_Obj_API_close(v_pla);
1172   PLA_API_end();
1173 
1174   if (A->factor == MAT_FACTOR_LU){
1175     /* Apply the permutations to the right hand sides */
1176     PLA_Apply_pivots_to_rows (v_pla,lu->pivots);
1177 
1178     /* Solve L y = b, overwriting b with y */
1179     PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_UNIT_DIAG,lu->A,v_pla );
1180 
1181     /* Solve U x = y (=b), overwriting b with x */
1182     PLA_Trsv( PLA_UPPER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla );
1183   } else { /* MAT_FACTOR_CHOLESKY */
1184     PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla);
1185     PLA_Trsv( PLA_LOWER_TRIANGULAR,(lu->datatype == MPI_DOUBLE ? PLA_TRANSPOSE : PLA_CONJUGATE_TRANSPOSE),
1186                                     PLA_NONUNIT_DIAG,lu->A,v_pla);
1187   }
1188 
1189   /* Copy PLAPACK x into Petsc vector x  */
1190   PLA_Obj_local_length(v_pla, &loc_m);
1191   PLA_Obj_local_buffer(v_pla, (void**)&loc_buf);
1192   PLA_Obj_local_stride(v_pla, &loc_stride);
1193   /*
1194     PetscPrintf(PETSC_COMM_SELF," [%d] b - local_m %d local_stride %d, loc_buf: %g %g, nb: %d\n",rank,loc_m,loc_stride,loc_buf[0],loc_buf[(loc_m-1)*loc_stride],lu->nb);
1195   */
1196   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,loc_m*loc_stride,loc_buf,&loc_x);CHKERRQ(ierr);
1197   if (!lu->pla_solved){
1198 
1199     PLA_Temp_comm_row_info(lu->templ,&Plapack_comm_2d,&r_rank,&r_nproc);
1200     PLA_Temp_comm_col_info(lu->templ,&Plapack_comm_2d,&c_rank,&c_nproc);
1201 
1202     /* Create IS and cts for VecScatterring */
1203     PLA_Obj_local_length(v_pla, &loc_m);
1204     PLA_Obj_local_stride(v_pla, &loc_stride);
1205     ierr = PetscMalloc((2*loc_m+1)*sizeof(PetscInt),&idx_pla);CHKERRQ(ierr);
1206     idx_petsc = idx_pla + loc_m;
1207 
1208     rstart = (r_rank*c_nproc+c_rank)*lu->nb;
1209     for (i=0; i<loc_m; i+=lu->nb){
1210       j = 0;
1211       while (j < lu->nb && i+j < loc_m){
1212         idx_petsc[i+j] = rstart + j; j++;
1213       }
1214       rstart += size*lu->nb;
1215     }
1216 
1217     for (i=0; i<loc_m; i++) idx_pla[i] = i*loc_stride;
1218 
1219     ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_pla,&lu->is_pla);CHKERRQ(ierr);
1220     ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_petsc,&lu->is_petsc);CHKERRQ(ierr);
1221     ierr = PetscFree(idx_pla);CHKERRQ(ierr);
1222     ierr = VecScatterCreate(loc_x,lu->is_pla,x,lu->is_petsc,&lu->ctx);CHKERRQ(ierr);
1223   }
1224   ierr = VecScatterBegin(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1225   ierr = VecScatterEnd(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1226 
1227   /* Free data */
1228   ierr = VecDestroy(loc_x);CHKERRQ(ierr);
1229   PLA_Obj_free(&v_pla);
1230 
1231   lu->pla_solved = PETSC_TRUE;
1232   PetscFunctionReturn(0);
1233 }
1234 
1235 #undef __FUNCT__
1236 #define __FUNCT__ "MatLUFactorNumeric_MPIDense"
1237 PetscErrorCode MatLUFactorNumeric_MPIDense(Mat F,Mat A,const MatFactorInfo *info)
1238 {
1239   Mat_Plapack    *lu = (Mat_Plapack*)(F)->spptr;
1240   PetscErrorCode ierr;
1241   PetscInt       M=A->rmap->N,m=A->rmap->n,rstart,rend;
1242   PetscInt       info_pla=0;
1243   PetscScalar    *array,one = 1.0;
1244 
1245   PetscFunctionBegin;
1246   if (lu->mstruct == SAME_NONZERO_PATTERN){
1247     PLA_Obj_free(&lu->A);
1248     PLA_Obj_free (&lu->pivots);
1249   }
1250   /* Create PLAPACK matrix object */
1251   lu->A = NULL; lu->pivots = NULL;
1252   PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);
1253   PLA_Obj_set_to_zero(lu->A);
1254   PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots);
1255 
1256   /* Copy A into lu->A */
1257   PLA_API_begin();
1258   PLA_Obj_API_open(lu->A);
1259   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
1260   ierr = MatGetArray(A,&array);CHKERRQ(ierr);
1261   PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);
1262   ierr = MatRestoreArray(A,&array);CHKERRQ(ierr);
1263   PLA_Obj_API_close(lu->A);
1264   PLA_API_end();
1265 
1266   /* Factor P A -> L U overwriting lower triangular portion of A with L, upper, U */
1267   info_pla = PLA_LU(lu->A,lu->pivots);
1268   if (info_pla != 0)
1269     SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot encountered at row %d from PLA_LU()",info_pla);
1270 
1271   lu->rstart         = rstart;
1272   lu->mstruct        = SAME_NONZERO_PATTERN;
1273   F->ops->solve      = MatSolve_MPIDense;
1274   F->assembled       = PETSC_TRUE;  /* required by -ksp_view */
1275   PetscFunctionReturn(0);
1276 }
1277 
1278 #undef __FUNCT__
1279 #define __FUNCT__ "MatCholeskyFactorNumeric_MPIDense"
1280 PetscErrorCode MatCholeskyFactorNumeric_MPIDense(Mat F,Mat A,const MatFactorInfo *info)
1281 {
1282   Mat_Plapack    *lu = (Mat_Plapack*)F->spptr;
1283   PetscErrorCode ierr;
1284   PetscInt       M=A->rmap->N,m=A->rmap->n,rstart,rend;
1285   PetscInt       info_pla=0;
1286   PetscScalar    *array,one = 1.0;
1287 
1288   PetscFunctionBegin;
1289   if (lu->mstruct == SAME_NONZERO_PATTERN){
1290     PLA_Obj_free(&lu->A);
1291   }
1292   /* Create PLAPACK matrix object */
1293   lu->A      = NULL;
1294   lu->pivots = NULL;
1295   PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);
1296 
1297   /* Copy A into lu->A */
1298   PLA_API_begin();
1299   PLA_Obj_API_open(lu->A);
1300   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
1301   ierr = MatGetArray(A,&array);CHKERRQ(ierr);
1302   PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);
1303   ierr = MatRestoreArray(A,&array);CHKERRQ(ierr);
1304   PLA_Obj_API_close(lu->A);
1305   PLA_API_end();
1306 
1307   /* Factor P A -> Chol */
1308   info_pla = PLA_Chol(PLA_LOWER_TRIANGULAR,lu->A);
1309   if (info_pla != 0)
1310     SETERRQ1( PETSC_ERR_MAT_CH_ZRPVT,"Nonpositive definite matrix detected at row %d from PLA_Chol()",info_pla);
1311 
1312   lu->rstart         = rstart;
1313   lu->mstruct        = SAME_NONZERO_PATTERN;
1314   F->ops->solve      = MatSolve_MPIDense;
1315   F->assembled       = PETSC_TRUE;  /* required by -ksp_view */
1316   PetscFunctionReturn(0);
1317 }
1318 
1319 /* Note the Petsc perm permutation is ignored */
1320 #undef __FUNCT__
1321 #define __FUNCT__ "MatCholeskyFactorSymbolic_MPIDense"
1322 PetscErrorCode MatCholeskyFactorSymbolic_MPIDense(Mat F,Mat A,IS perm,const MatFactorInfo *info)
1323 {
1324   PetscErrorCode ierr;
1325   PetscTruth     issymmetric,set;
1326 
1327   PetscFunctionBegin;
1328   ierr = MatIsSymmetricKnown(A,&set,&issymmetric);CHKERRQ(ierr);
1329   if (!set || !issymmetric) SETERRQ(PETSC_ERR_USER,"Matrix must be set as MAT_SYMMETRIC for CholeskyFactor()");
1330   F->ops->choleskyfactornumeric  = MatCholeskyFactorNumeric_MPIDense;
1331   PetscFunctionReturn(0);
1332 }
1333 
1334 /* Note the Petsc r and c permutations are ignored */
1335 #undef __FUNCT__
1336 #define __FUNCT__ "MatLUFactorSymbolic_MPIDense"
1337 PetscErrorCode MatLUFactorSymbolic_MPIDense(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
1338 {
1339   PetscErrorCode ierr;
1340   PetscInt       M = A->rmap->N;
1341   Mat_Plapack    *lu;
1342 
1343   PetscFunctionBegin;
1344   lu = (Mat_Plapack*)F->spptr;
1345   ierr = PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots);CHKERRQ(ierr);
1346   F->ops->lufactornumeric  = MatLUFactorNumeric_MPIDense;
1347   PetscFunctionReturn(0);
1348 }
1349 
1350 EXTERN_C_BEGIN
1351 #undef __FUNCT__
1352 #define __FUNCT__ "MatFactorGetSolverPackage_mpidense_plapack"
1353 PetscErrorCode MatFactorGetSolverPackage_mpidense_plapack(Mat A,const MatSolverPackage *type)
1354 {
1355   PetscFunctionBegin;
1356   *type = MAT_SOLVER_PLAPACK;
1357   PetscFunctionReturn(0);
1358 }
1359 EXTERN_C_END
1360 
1361 EXTERN_C_BEGIN
1362 #undef __FUNCT__
1363 #define __FUNCT__ "MatGetFactor_mpidense_plapack"
1364 PetscErrorCode MatGetFactor_mpidense_plapack(Mat A,MatFactorType ftype,Mat *F)
1365 {
1366   PetscErrorCode ierr;
1367   Mat_Plapack    *lu;
1368   PetscMPIInt    size;
1369   PetscInt       M=A->rmap->N;
1370 
1371   PetscFunctionBegin;
1372   /* Create the factorization matrix */
1373   ierr = MatCreate(((PetscObject)A)->comm,F);CHKERRQ(ierr);
1374   ierr = MatSetSizes(*F,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1375   ierr = MatSetType(*F,((PetscObject)A)->type_name);CHKERRQ(ierr);
1376   ierr = PetscNewLog(*F,Mat_Plapack,&lu);CHKERRQ(ierr);
1377   (*F)->spptr = (void*)lu;
1378 
1379   /* Set default Plapack parameters */
1380   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
1381   lu->nb = M/size;
1382   if (M - lu->nb*size) lu->nb++; /* without cyclic distribution */
1383 
1384   /* Set runtime options */
1385   ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"PLAPACK Options","Mat");CHKERRQ(ierr);
1386     ierr = PetscOptionsInt("-mat_plapack_nb","block size of template vector","None",lu->nb,&lu->nb,PETSC_NULL);CHKERRQ(ierr);
1387   PetscOptionsEnd();
1388 
1389   /* Create object distribution template */
1390   lu->templ = NULL;
1391   ierr = PLA_Temp_create(lu->nb, 0, &lu->templ);CHKERRQ(ierr);
1392 
1393   /* Set the datatype */
1394 #if defined(PETSC_USE_COMPLEX)
1395   lu->datatype = MPI_DOUBLE_COMPLEX;
1396 #else
1397   lu->datatype = MPI_DOUBLE;
1398 #endif
1399 
1400   ierr = PLA_Matrix_create(lu->datatype,M,A->cmap->N,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);CHKERRQ(ierr);
1401 
1402 
1403   lu->pla_solved     = PETSC_FALSE; /* MatSolve_Plapack() is called yet */
1404   lu->mstruct        = DIFFERENT_NONZERO_PATTERN;
1405 
1406   if (ftype == MAT_FACTOR_LU) {
1407     (*F)->ops->lufactorsymbolic = MatLUFactorSymbolic_MPIDense;
1408   } else if (ftype == MAT_FACTOR_CHOLESKY) {
1409     (*F)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MPIDense;
1410   } else SETERRQ(PETSC_ERR_SUP,"No incomplete factorizations for dense matrices");
1411   (*F)->factor = ftype;
1412   ierr = PetscObjectComposeFunctionDynamic((PetscObject)(*F),"MatFactorGetSolverPackage_C","MatFactorGetSolverPackage_mpidense_plapack",MatFactorGetSolverPackage_mpidense_plapack);CHKERRQ(ierr);
1413   PetscFunctionReturn(0);
1414 }
1415 EXTERN_C_END
1416 #endif
1417 
1418 #undef __FUNCT__
1419 #define __FUNCT__ "MatAXPY_MPIDense"
1420 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
1421 {
1422   PetscErrorCode ierr;
1423   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
1424 
1425   PetscFunctionBegin;
1426   ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr);
1427   PetscFunctionReturn(0);
1428 }
1429 
1430 /* -------------------------------------------------------------------*/
1431 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
1432        MatGetRow_MPIDense,
1433        MatRestoreRow_MPIDense,
1434        MatMult_MPIDense,
1435 /* 4*/ MatMultAdd_MPIDense,
1436        MatMultTranspose_MPIDense,
1437        MatMultTransposeAdd_MPIDense,
1438        0,
1439        0,
1440        0,
1441 /*10*/ 0,
1442        0,
1443        0,
1444        0,
1445        MatTranspose_MPIDense,
1446 /*15*/ MatGetInfo_MPIDense,
1447        MatEqual_MPIDense,
1448        MatGetDiagonal_MPIDense,
1449        MatDiagonalScale_MPIDense,
1450        MatNorm_MPIDense,
1451 /*20*/ MatAssemblyBegin_MPIDense,
1452        MatAssemblyEnd_MPIDense,
1453        MatSetOption_MPIDense,
1454        MatZeroEntries_MPIDense,
1455 /*24*/ MatZeroRows_MPIDense,
1456        0,
1457        0,
1458        0,
1459        0,
1460 /*29*/ MatSetUpPreallocation_MPIDense,
1461        0,
1462        0,
1463        MatGetArray_MPIDense,
1464        MatRestoreArray_MPIDense,
1465 /*34*/ MatDuplicate_MPIDense,
1466        0,
1467        0,
1468        0,
1469        0,
1470 /*39*/ MatAXPY_MPIDense,
1471        MatGetSubMatrices_MPIDense,
1472        0,
1473        MatGetValues_MPIDense,
1474        0,
1475 /*44*/ 0,
1476        MatScale_MPIDense,
1477        0,
1478        0,
1479        0,
1480 /*49*/ 0,
1481        0,
1482        0,
1483        0,
1484        0,
1485 /*54*/ 0,
1486        0,
1487        0,
1488        0,
1489        0,
1490 /*59*/ MatGetSubMatrix_MPIDense,
1491        MatDestroy_MPIDense,
1492        MatView_MPIDense,
1493        0,
1494        0,
1495 /*64*/ 0,
1496        0,
1497        0,
1498        0,
1499        0,
1500 /*69*/ 0,
1501        0,
1502        0,
1503        0,
1504        0,
1505 /*74*/ 0,
1506        0,
1507        0,
1508        0,
1509        0,
1510 /*79*/ 0,
1511        0,
1512        0,
1513        0,
1514 /*83*/ MatLoad_MPIDense,
1515        0,
1516        0,
1517        0,
1518        0,
1519        0,
1520 /*89*/
1521 #if defined(PETSC_HAVE_PLAPACK)
1522        MatMatMult_MPIDense_MPIDense,
1523        MatMatMultSymbolic_MPIDense_MPIDense,
1524        MatMatMultNumeric_MPIDense_MPIDense,
1525 #else
1526        0,
1527        0,
1528        0,
1529 #endif
1530        0,
1531 /*94*/ 0,
1532        0,
1533        0,
1534        0};
1535 
1536 EXTERN_C_BEGIN
1537 #undef __FUNCT__
1538 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense"
1539 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1540 {
1541   Mat_MPIDense   *a;
1542   PetscErrorCode ierr;
1543 
1544   PetscFunctionBegin;
1545   mat->preallocated = PETSC_TRUE;
1546   /* Note:  For now, when data is specified above, this assumes the user correctly
1547    allocates the local dense storage space.  We should add error checking. */
1548 
1549   a    = (Mat_MPIDense*)mat->data;
1550   ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1551   ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr);
1552   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
1553   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1554   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1555   PetscFunctionReturn(0);
1556 }
1557 EXTERN_C_END
1558 
1559 /*MC
1560    MAT_SOLVER_PLAPACK = "mpidense" - Parallel LU and Cholesky factorization for MATMPIDENSE matrices
1561 
1562   run config/configure.py with the option --download-plapack
1563 
1564 
1565   Options Database Keys:
1566 . -mat_plapack_nprows <n> - number of rows in processor partition
1567 . -mat_plapack_npcols <n> - number of columns in processor partition
1568 . -mat_plapack_nb <n> - block size of template vector
1569 . -mat_plapack_nb_alg <n> - algorithmic block size
1570 - -mat_plapack_ckerror <n> - error checking flag
1571 
1572 .seealso: MatCreateMPIDense(), MATDENSE, MATSEQDENSE, PCFactorSetSolverPackage(), MatSolverPackage
1573 
1574 M*/
1575 
1576 EXTERN_C_BEGIN
1577 #undef __FUNCT__
1578 #define __FUNCT__ "MatCreate_MPIDense"
1579 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIDense(Mat mat)
1580 {
1581   Mat_MPIDense   *a;
1582   PetscErrorCode ierr;
1583 
1584   PetscFunctionBegin;
1585   ierr              = PetscNewLog(mat,Mat_MPIDense,&a);CHKERRQ(ierr);
1586   mat->data         = (void*)a;
1587   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1588   mat->mapping      = 0;
1589 
1590   mat->insertmode = NOT_SET_VALUES;
1591   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&a->rank);CHKERRQ(ierr);
1592   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&a->size);CHKERRQ(ierr);
1593 
1594   ierr = PetscMapSetBlockSize(mat->rmap,1);CHKERRQ(ierr);
1595   ierr = PetscMapSetBlockSize(mat->cmap,1);CHKERRQ(ierr);
1596   ierr = PetscMapSetUp(mat->rmap);CHKERRQ(ierr);
1597   ierr = PetscMapSetUp(mat->cmap);CHKERRQ(ierr);
1598   a->nvec = mat->cmap->n;
1599 
1600   /* build cache for off array entries formed */
1601   a->donotstash = PETSC_FALSE;
1602   ierr = MatStashCreate_Private(((PetscObject)mat)->comm,1,&mat->stash);CHKERRQ(ierr);
1603 
1604   /* stuff used for matrix vector multiply */
1605   a->lvec        = 0;
1606   a->Mvctx       = 0;
1607   a->roworiented = PETSC_TRUE;
1608 
1609   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
1610                                      "MatGetDiagonalBlock_MPIDense",
1611                                      MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr);
1612   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C",
1613                                      "MatMPIDenseSetPreallocation_MPIDense",
1614                                      MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1615   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",
1616                                      "MatMatMult_MPIAIJ_MPIDense",
1617                                       MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1618   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",
1619                                      "MatMatMultSymbolic_MPIAIJ_MPIDense",
1620                                       MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1621   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",
1622                                      "MatMatMultNumeric_MPIAIJ_MPIDense",
1623                                       MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1624 #if defined(PETSC_HAVE_PLAPACK)
1625   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetFactor_mpidense_plapack_C",
1626                                      "MatGetFactor_mpidense_plapack",
1627                                       MatGetFactor_mpidense_plapack);CHKERRQ(ierr);
1628   ierr = PetscPLAPACKInitializePackage(((PetscObject)mat)->comm);CHKERRQ(ierr);
1629 #endif
1630   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1631 
1632   PetscFunctionReturn(0);
1633 }
1634 EXTERN_C_END
1635 
1636 /*MC
1637    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1638 
1639    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1640    and MATMPIDENSE otherwise.
1641 
1642    Options Database Keys:
1643 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1644 
1645   Level: beginner
1646 
1647 
1648 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1649 M*/
1650 
1651 EXTERN_C_BEGIN
1652 #undef __FUNCT__
1653 #define __FUNCT__ "MatCreate_Dense"
1654 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_Dense(Mat A)
1655 {
1656   PetscErrorCode ierr;
1657   PetscMPIInt    size;
1658 
1659   PetscFunctionBegin;
1660   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
1661   if (size == 1) {
1662     ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr);
1663   } else {
1664     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
1665   }
1666   PetscFunctionReturn(0);
1667 }
1668 EXTERN_C_END
1669 
1670 #undef __FUNCT__
1671 #define __FUNCT__ "MatMPIDenseSetPreallocation"
1672 /*@C
1673    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1674 
1675    Not collective
1676 
1677    Input Parameters:
1678 .  A - the matrix
1679 -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
1680    to control all matrix memory allocation.
1681 
1682    Notes:
1683    The dense format is fully compatible with standard Fortran 77
1684    storage by columns.
1685 
1686    The data input variable is intended primarily for Fortran programmers
1687    who wish to allocate their own matrix memory space.  Most users should
1688    set data=PETSC_NULL.
1689 
1690    Level: intermediate
1691 
1692 .keywords: matrix,dense, parallel
1693 
1694 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1695 @*/
1696 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data)
1697 {
1698   PetscErrorCode ierr,(*f)(Mat,PetscScalar *);
1699 
1700   PetscFunctionBegin;
1701   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1702   if (f) {
1703     ierr = (*f)(mat,data);CHKERRQ(ierr);
1704   }
1705   PetscFunctionReturn(0);
1706 }
1707 
1708 #undef __FUNCT__
1709 #define __FUNCT__ "MatCreateMPIDense"
1710 /*@C
1711    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
1712 
1713    Collective on MPI_Comm
1714 
1715    Input Parameters:
1716 +  comm - MPI communicator
1717 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1718 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1719 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1720 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1721 -  data - optional location of matrix data.  Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1722    to control all matrix memory allocation.
1723 
1724    Output Parameter:
1725 .  A - the matrix
1726 
1727    Notes:
1728    The dense format is fully compatible with standard Fortran 77
1729    storage by columns.
1730 
1731    The data input variable is intended primarily for Fortran programmers
1732    who wish to allocate their own matrix memory space.  Most users should
1733    set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users).
1734 
1735    The user MUST specify either the local or global matrix dimensions
1736    (possibly both).
1737 
1738    Level: intermediate
1739 
1740 .keywords: matrix,dense, parallel
1741 
1742 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1743 @*/
1744 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1745 {
1746   PetscErrorCode ierr;
1747   PetscMPIInt    size;
1748 
1749   PetscFunctionBegin;
1750   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1751   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1752   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1753   if (size > 1) {
1754     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1755     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1756   } else {
1757     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1758     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1759   }
1760   PetscFunctionReturn(0);
1761 }
1762 
1763 #undef __FUNCT__
1764 #define __FUNCT__ "MatDuplicate_MPIDense"
1765 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1766 {
1767   Mat            mat;
1768   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
1769   PetscErrorCode ierr;
1770 
1771   PetscFunctionBegin;
1772   *newmat       = 0;
1773   ierr = MatCreate(((PetscObject)A)->comm,&mat);CHKERRQ(ierr);
1774   ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1775   ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1776   a                 = (Mat_MPIDense*)mat->data;
1777   ierr              = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1778   mat->factor       = A->factor;
1779   mat->assembled    = PETSC_TRUE;
1780   mat->preallocated = PETSC_TRUE;
1781 
1782   mat->rmap->rstart     = A->rmap->rstart;
1783   mat->rmap->rend       = A->rmap->rend;
1784   a->size              = oldmat->size;
1785   a->rank              = oldmat->rank;
1786   mat->insertmode      = NOT_SET_VALUES;
1787   a->nvec              = oldmat->nvec;
1788   a->donotstash        = oldmat->donotstash;
1789 
1790   ierr = PetscMemcpy(mat->rmap->range,A->rmap->range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
1791   ierr = PetscMemcpy(mat->cmap->range,A->cmap->range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
1792   ierr = MatStashCreate_Private(((PetscObject)A)->comm,1,&mat->stash);CHKERRQ(ierr);
1793 
1794   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1795   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1796   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1797 
1798   *newmat = mat;
1799   PetscFunctionReturn(0);
1800 }
1801 
1802 #include "petscsys.h"
1803 
1804 #undef __FUNCT__
1805 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile"
1806 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N, const MatType type,Mat *newmat)
1807 {
1808   PetscErrorCode ierr;
1809   PetscMPIInt    rank,size;
1810   PetscInt       *rowners,i,m,nz,j;
1811   PetscScalar    *array,*vals,*vals_ptr;
1812   MPI_Status     status;
1813 
1814   PetscFunctionBegin;
1815   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1816   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1817 
1818   /* determine ownership of all rows */
1819   m          = M/size + ((M % size) > rank);
1820   ierr       = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1821   ierr       = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
1822   rowners[0] = 0;
1823   for (i=2; i<=size; i++) {
1824     rowners[i] += rowners[i-1];
1825   }
1826 
1827   ierr = MatCreate(comm,newmat);CHKERRQ(ierr);
1828   ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1829   ierr = MatSetType(*newmat,type);CHKERRQ(ierr);
1830   ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr);
1831   ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr);
1832 
1833   if (!rank) {
1834     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1835 
1836     /* read in my part of the matrix numerical values  */
1837     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1838 
1839     /* insert into matrix-by row (this is why cannot directly read into array */
1840     vals_ptr = vals;
1841     for (i=0; i<m; i++) {
1842       for (j=0; j<N; j++) {
1843         array[i + j*m] = *vals_ptr++;
1844       }
1845     }
1846 
1847     /* read in other processors and ship out */
1848     for (i=1; i<size; i++) {
1849       nz   = (rowners[i+1] - rowners[i])*N;
1850       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1851       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(*newmat))->tag,comm);CHKERRQ(ierr);
1852     }
1853   } else {
1854     /* receive numeric values */
1855     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1856 
1857     /* receive message of values*/
1858     ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(*newmat))->tag,comm,&status);CHKERRQ(ierr);
1859 
1860     /* insert into matrix-by row (this is why cannot directly read into array */
1861     vals_ptr = vals;
1862     for (i=0; i<m; i++) {
1863       for (j=0; j<N; j++) {
1864         array[i + j*m] = *vals_ptr++;
1865       }
1866     }
1867   }
1868   ierr = PetscFree(rowners);CHKERRQ(ierr);
1869   ierr = PetscFree(vals);CHKERRQ(ierr);
1870   ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1871   ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1872   PetscFunctionReturn(0);
1873 }
1874 
1875 #undef __FUNCT__
1876 #define __FUNCT__ "MatLoad_MPIDense"
1877 PetscErrorCode MatLoad_MPIDense(PetscViewer viewer,const MatType type,Mat *newmat)
1878 {
1879   Mat            A;
1880   PetscScalar    *vals,*svals;
1881   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1882   MPI_Status     status;
1883   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz;
1884   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1885   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1886   PetscInt       i,nz,j,rstart,rend;
1887   int            fd;
1888   PetscErrorCode ierr;
1889 
1890   PetscFunctionBegin;
1891   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1892   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1893   if (!rank) {
1894     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1895     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1896     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1897   }
1898 
1899   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1900   M = header[1]; N = header[2]; nz = header[3];
1901 
1902   /*
1903        Handle case where matrix is stored on disk as a dense matrix
1904   */
1905   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1906     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,type,newmat);CHKERRQ(ierr);
1907     PetscFunctionReturn(0);
1908   }
1909 
1910   /* determine ownership of all rows */
1911   m          = PetscMPIIntCast(M/size + ((M % size) > rank));
1912   ierr       = PetscMalloc((size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr);
1913   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1914   rowners[0] = 0;
1915   for (i=2; i<=size; i++) {
1916     rowners[i] += rowners[i-1];
1917   }
1918   rstart = rowners[rank];
1919   rend   = rowners[rank+1];
1920 
1921   /* distribute row lengths to all processors */
1922   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&ourlens);CHKERRQ(ierr);
1923   offlens = ourlens + (rend-rstart);
1924   if (!rank) {
1925     ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
1926     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1927     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
1928     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1929     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1930     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1931   } else {
1932     ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1933   }
1934 
1935   if (!rank) {
1936     /* calculate the number of nonzeros on each processor */
1937     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
1938     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1939     for (i=0; i<size; i++) {
1940       for (j=rowners[i]; j< rowners[i+1]; j++) {
1941         procsnz[i] += rowlengths[j];
1942       }
1943     }
1944     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1945 
1946     /* determine max buffer needed and allocate it */
1947     maxnz = 0;
1948     for (i=0; i<size; i++) {
1949       maxnz = PetscMax(maxnz,procsnz[i]);
1950     }
1951     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1952 
1953     /* read in my part of the matrix column indices  */
1954     nz = procsnz[0];
1955     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1956     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1957 
1958     /* read in every one elses and ship off */
1959     for (i=1; i<size; i++) {
1960       nz   = procsnz[i];
1961       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1962       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1963     }
1964     ierr = PetscFree(cols);CHKERRQ(ierr);
1965   } else {
1966     /* determine buffer space needed for message */
1967     nz = 0;
1968     for (i=0; i<m; i++) {
1969       nz += ourlens[i];
1970     }
1971     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1972 
1973     /* receive message of column indices*/
1974     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1975     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1976     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1977   }
1978 
1979   /* loop over local rows, determining number of off diagonal entries */
1980   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
1981   jj = 0;
1982   for (i=0; i<m; i++) {
1983     for (j=0; j<ourlens[i]; j++) {
1984       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1985       jj++;
1986     }
1987   }
1988 
1989   /* create our matrix */
1990   for (i=0; i<m; i++) {
1991     ourlens[i] -= offlens[i];
1992   }
1993   ierr = MatCreate(comm,newmat);CHKERRQ(ierr);
1994   ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1995   ierr = MatSetType(*newmat,type);CHKERRQ(ierr);
1996   ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr);
1997   A = *newmat;
1998   for (i=0; i<m; i++) {
1999     ourlens[i] += offlens[i];
2000   }
2001 
2002   if (!rank) {
2003     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2004 
2005     /* read in my part of the matrix numerical values  */
2006     nz = procsnz[0];
2007     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2008 
2009     /* insert into matrix */
2010     jj      = rstart;
2011     smycols = mycols;
2012     svals   = vals;
2013     for (i=0; i<m; i++) {
2014       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2015       smycols += ourlens[i];
2016       svals   += ourlens[i];
2017       jj++;
2018     }
2019 
2020     /* read in other processors and ship out */
2021     for (i=1; i<size; i++) {
2022       nz   = procsnz[i];
2023       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2024       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
2025     }
2026     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2027   } else {
2028     /* receive numeric values */
2029     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2030 
2031     /* receive message of values*/
2032     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
2033     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2034     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2035 
2036     /* insert into matrix */
2037     jj      = rstart;
2038     smycols = mycols;
2039     svals   = vals;
2040     for (i=0; i<m; i++) {
2041       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2042       smycols += ourlens[i];
2043       svals   += ourlens[i];
2044       jj++;
2045     }
2046   }
2047   ierr = PetscFree(ourlens);CHKERRQ(ierr);
2048   ierr = PetscFree(vals);CHKERRQ(ierr);
2049   ierr = PetscFree(mycols);CHKERRQ(ierr);
2050   ierr = PetscFree(rowners);CHKERRQ(ierr);
2051 
2052   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2053   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2054   PetscFunctionReturn(0);
2055 }
2056 
2057 #undef __FUNCT__
2058 #define __FUNCT__ "MatEqual_MPIDense"
2059 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscTruth *flag)
2060 {
2061   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
2062   Mat            a,b;
2063   PetscTruth     flg;
2064   PetscErrorCode ierr;
2065 
2066   PetscFunctionBegin;
2067   a = matA->A;
2068   b = matB->A;
2069   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
2070   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
2071   PetscFunctionReturn(0);
2072 }
2073 
2074 #if defined(PETSC_HAVE_PLAPACK)
2075 
2076 #undef __FUNCT__
2077 #define __FUNCT__ "PetscPLAPACKFinalizePackage"
2078 /*@C
2079   PetscPLAPACKFinalizePackage - This function destroys everything in the Petsc interface to PLAPACK.
2080   Level: developer
2081 
2082 .keywords: Petsc, destroy, package, PLAPACK
2083 .seealso: PetscFinalize()
2084 @*/
2085 PetscErrorCode PETSC_DLLEXPORT PetscPLAPACKFinalizePackage(void)
2086 {
2087   PetscErrorCode ierr;
2088 
2089   PetscFunctionBegin;
2090   ierr = PLA_Finalize();CHKERRQ(ierr);
2091   PetscFunctionReturn(0);
2092 }
2093 
2094 #undef __FUNCT__
2095 #define __FUNCT__ "PetscPLAPACKInitializePackage"
2096 /*@C
2097   PetscPLAPACKInitializePackage - This function initializes everything in the Petsc interface to PLAPACK. It is
2098   called from MatCreate_MPIDense() the first time an MPI dense matrix is called.
2099 
2100   Input Parameter:
2101 .   comm - the communicator the matrix lives on
2102 
2103   Level: developer
2104 
2105    Notes: PLAPACK does not have a good fit with MPI communicators; all (parallel) PLAPACK objects have to live in the
2106   same communicator (because there is some global state that is initialized and used for all matrices). In addition if
2107   PLAPACK is initialized (that is the initial matrices created) are on subcommunicators of MPI_COMM_WORLD, these subcommunicators
2108   cannot overlap.
2109 
2110 .keywords: Petsc, initialize, package, PLAPACK
2111 .seealso: PetscInitializePackage(), PetscInitialize()
2112 @*/
2113 PetscErrorCode PETSC_DLLEXPORT PetscPLAPACKInitializePackage(MPI_Comm comm)
2114 {
2115   PetscMPIInt    size;
2116   PetscErrorCode ierr;
2117 
2118   PetscFunctionBegin;
2119   if (!PLA_Initialized(PETSC_NULL)) {
2120 
2121     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2122     Plapack_nprows = 1;
2123     Plapack_npcols = size;
2124 
2125     ierr = PetscOptionsBegin(comm,PETSC_NULL,"PLAPACK Options","Mat");CHKERRQ(ierr);
2126       ierr = PetscOptionsInt("-mat_plapack_nprows","row dimension of 2D processor mesh","None",Plapack_nprows,&Plapack_nprows,PETSC_NULL);CHKERRQ(ierr);
2127       ierr = PetscOptionsInt("-mat_plapack_npcols","column dimension of 2D processor mesh","None",Plapack_npcols,&Plapack_npcols,PETSC_NULL);CHKERRQ(ierr);
2128 #if defined(PETSC_USE_DEBUG)
2129       Plapack_ierror = 3;
2130 #else
2131       Plapack_ierror = 0;
2132 #endif
2133       ierr = PetscOptionsInt("-mat_plapack_ckerror","error checking flag","None",Plapack_ierror,&Plapack_ierror,PETSC_NULL);CHKERRQ(ierr);
2134       if (Plapack_ierror){
2135 	ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_TRUE,PETSC_TRUE,PETSC_FALSE );CHKERRQ(ierr);
2136       } else {
2137 	ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_FALSE,PETSC_FALSE,PETSC_FALSE );CHKERRQ(ierr);
2138       }
2139 
2140       Plapack_nb_alg = 0;
2141       ierr = PetscOptionsInt("-mat_plapack_nb_alg","algorithmic block size","None",Plapack_nb_alg,&Plapack_nb_alg,PETSC_NULL);CHKERRQ(ierr);
2142       if (Plapack_nb_alg) {
2143         ierr = pla_Environ_set_nb_alg (PLA_OP_ALL_ALG,Plapack_nb_alg);CHKERRQ(ierr);
2144       }
2145     PetscOptionsEnd();
2146 
2147     ierr = PLA_Comm_1D_to_2D(comm,Plapack_nprows,Plapack_npcols,&Plapack_comm_2d);CHKERRQ(ierr);
2148     ierr = PLA_Init(Plapack_comm_2d);CHKERRQ(ierr);
2149     ierr = PetscRegisterFinalize(PetscPLAPACKFinalizePackage);CHKERRQ(ierr);
2150   }
2151   PetscFunctionReturn(0);
2152 }
2153 
2154 #endif
2155