xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 5d0c19d75c660d4fec594a5399ec8d8ba29c54a8)
1 #define PETSCMAT_DLL
2 
3 /*
4    Basic functions for basic parallel dense matrices.
5 */
6 
7 
8 #include "src/mat/impls/dense/mpi/mpidense.h"    /*I   "petscmat.h"  I*/
9 #if defined(PETSC_HAVE_PLAPACK)
10 static PetscMPIInt Plapack_nprows,Plapack_npcols,Plapack_ierror,Plapack_nb_alg;
11 static MPI_Comm Plapack_comm_2d;
12 #endif
13 
14 #undef __FUNCT__
15 #define __FUNCT__ "MatDenseGetLocalMatrix"
16 /*@
17 
18       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
19               matrix that represents the operator. For sequential matrices it returns itself.
20 
21     Input Parameter:
22 .      A - the Seq or MPI dense matrix
23 
24     Output Parameter:
25 .      B - the inner matrix
26 
27     Level: intermediate
28 
29 @*/
30 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
31 {
32   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
33   PetscErrorCode ierr;
34   PetscTruth     flg;
35 
36   PetscFunctionBegin;
37   ierr = PetscTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
38   if (flg) {
39     *B = mat->A;
40   } else {
41     *B = A;
42   }
43   PetscFunctionReturn(0);
44 }
45 
46 #undef __FUNCT__
47 #define __FUNCT__ "MatGetRow_MPIDense"
48 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
49 {
50   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
51   PetscErrorCode ierr;
52   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
53 
54   PetscFunctionBegin;
55   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows")
56   lrow = row - rstart;
57   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr);
58   PetscFunctionReturn(0);
59 }
60 
61 #undef __FUNCT__
62 #define __FUNCT__ "MatRestoreRow_MPIDense"
63 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
64 {
65   PetscErrorCode ierr;
66 
67   PetscFunctionBegin;
68   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
69   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
70   PetscFunctionReturn(0);
71 }
72 
73 EXTERN_C_BEGIN
74 #undef __FUNCT__
75 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense"
76 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B)
77 {
78   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
79   PetscErrorCode ierr;
80   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
81   PetscScalar    *array;
82   MPI_Comm       comm;
83 
84   PetscFunctionBegin;
85   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported.");
86 
87   /* The reuse aspect is not implemented efficiently */
88   if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);}
89 
90   ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
91   ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr);
92   ierr = MatCreate(comm,B);CHKERRQ(ierr);
93   ierr = MatSetSizes(*B,m,m,m,m);CHKERRQ(ierr);
94   ierr = MatSetType(*B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
95   ierr = MatSeqDenseSetPreallocation(*B,array+m*rstart);CHKERRQ(ierr);
96   ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr);
97   ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
98   ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
99 
100   *iscopy = PETSC_TRUE;
101   PetscFunctionReturn(0);
102 }
103 EXTERN_C_END
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatSetValues_MPIDense"
107 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
108 {
109   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
110   PetscErrorCode ierr;
111   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
112   PetscTruth     roworiented = A->roworiented;
113 
114   PetscFunctionBegin;
115   for (i=0; i<m; i++) {
116     if (idxm[i] < 0) continue;
117     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
118     if (idxm[i] >= rstart && idxm[i] < rend) {
119       row = idxm[i] - rstart;
120       if (roworiented) {
121         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
122       } else {
123         for (j=0; j<n; j++) {
124           if (idxn[j] < 0) continue;
125           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
126           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
127         }
128       }
129     } else {
130       if (!A->donotstash) {
131         if (roworiented) {
132           ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr);
133         } else {
134           ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr);
135         }
136       }
137     }
138   }
139   PetscFunctionReturn(0);
140 }
141 
142 #undef __FUNCT__
143 #define __FUNCT__ "MatGetValues_MPIDense"
144 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
145 {
146   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
147   PetscErrorCode ierr;
148   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
149 
150   PetscFunctionBegin;
151   for (i=0; i<m; i++) {
152     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
153     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
154     if (idxm[i] >= rstart && idxm[i] < rend) {
155       row = idxm[i] - rstart;
156       for (j=0; j<n; j++) {
157         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
158         if (idxn[j] >= mat->cmap->N) {
159           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
160         }
161         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
162       }
163     } else {
164       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
165     }
166   }
167   PetscFunctionReturn(0);
168 }
169 
170 #undef __FUNCT__
171 #define __FUNCT__ "MatGetArray_MPIDense"
172 PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[])
173 {
174   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
175   PetscErrorCode ierr;
176 
177   PetscFunctionBegin;
178   ierr = MatGetArray(a->A,array);CHKERRQ(ierr);
179   PetscFunctionReturn(0);
180 }
181 
182 #undef __FUNCT__
183 #define __FUNCT__ "MatGetSubMatrix_MPIDense"
184 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B)
185 {
186   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data,*newmatd;
187   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
188   PetscErrorCode ierr;
189   PetscInt       i,j,rstart,rend,nrows,ncols,nlrows,nlcols;
190   const PetscInt *irow,*icol;
191   PetscScalar    *av,*bv,*v = lmat->v;
192   Mat            newmat;
193 
194   PetscFunctionBegin;
195   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
196   ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr);
197   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
198   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
199 
200   /* No parallel redistribution currently supported! Should really check each index set
201      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
202      original matrix! */
203 
204   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
205   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
206 
207   /* Check submatrix call */
208   if (scall == MAT_REUSE_MATRIX) {
209     /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
210     /* Really need to test rows and column sizes! */
211     newmat = *B;
212   } else {
213     /* Create and fill new matrix */
214     ierr = MatCreate(((PetscObject)A)->comm,&newmat);CHKERRQ(ierr);
215     ierr = MatSetSizes(newmat,nrows,cs,PETSC_DECIDE,ncols);CHKERRQ(ierr);
216     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
217     ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr);
218   }
219 
220   /* Now extract the data pointers and do the copy, column at a time */
221   newmatd = (Mat_MPIDense*)newmat->data;
222   bv      = ((Mat_SeqDense *)newmatd->A->data)->v;
223 
224   for (i=0; i<ncols; i++) {
225     av = v + nlrows*icol[i];
226     for (j=0; j<nrows; j++) {
227       *bv++ = av[irow[j] - rstart];
228     }
229   }
230 
231   /* Assemble the matrices so that the correct flags are set */
232   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
233   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
234 
235   /* Free work space */
236   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
237   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
238   *B = newmat;
239   PetscFunctionReturn(0);
240 }
241 
242 #undef __FUNCT__
243 #define __FUNCT__ "MatRestoreArray_MPIDense"
244 PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[])
245 {
246   PetscFunctionBegin;
247   PetscFunctionReturn(0);
248 }
249 
250 #undef __FUNCT__
251 #define __FUNCT__ "MatAssemblyBegin_MPIDense"
252 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
253 {
254   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
255   MPI_Comm       comm = ((PetscObject)mat)->comm;
256   PetscErrorCode ierr;
257   PetscInt       nstash,reallocs;
258   InsertMode     addv;
259 
260   PetscFunctionBegin;
261   /* make sure all processors are either in INSERTMODE or ADDMODE */
262   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
263   if (addv == (ADD_VALUES|INSERT_VALUES)) {
264     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
265   }
266   mat->insertmode = addv; /* in case this processor had no cache */
267 
268   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
269   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
270   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
271   PetscFunctionReturn(0);
272 }
273 
274 #undef __FUNCT__
275 #define __FUNCT__ "MatAssemblyEnd_MPIDense"
276 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
277 {
278   Mat_MPIDense    *mdn=(Mat_MPIDense*)mat->data;
279   PetscErrorCode  ierr;
280   PetscInt        i,*row,*col,flg,j,rstart,ncols;
281   PetscMPIInt     n;
282   PetscScalar     *val;
283   InsertMode      addv=mat->insertmode;
284 
285   PetscFunctionBegin;
286   /*  wait on receives */
287   while (1) {
288     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
289     if (!flg) break;
290 
291     for (i=0; i<n;) {
292       /* Now identify the consecutive vals belonging to the same row */
293       for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
294       if (j < n) ncols = j-i;
295       else       ncols = n-i;
296       /* Now assemble all these values with a single function call */
297       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
298       i = j;
299     }
300   }
301   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
302 
303   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
304   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
305 
306   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
307     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
308   }
309   PetscFunctionReturn(0);
310 }
311 
312 #undef __FUNCT__
313 #define __FUNCT__ "MatZeroEntries_MPIDense"
314 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
315 {
316   PetscErrorCode ierr;
317   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
318 
319   PetscFunctionBegin;
320   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
321   PetscFunctionReturn(0);
322 }
323 
324 /* the code does not do the diagonal entries correctly unless the
325    matrix is square and the column and row owerships are identical.
326    This is a BUG. The only way to fix it seems to be to access
327    mdn->A and mdn->B directly and not through the MatZeroRows()
328    routine.
329 */
330 #undef __FUNCT__
331 #define __FUNCT__ "MatZeroRows_MPIDense"
332 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
333 {
334   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
335   PetscErrorCode ierr;
336   PetscInt       i,*owners = A->rmap->range;
337   PetscInt       *nprocs,j,idx,nsends;
338   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
339   PetscInt       *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source;
340   PetscInt       *lens,*lrows,*values;
341   PetscMPIInt    n,imdex,rank = l->rank,size = l->size;
342   MPI_Comm       comm = ((PetscObject)A)->comm;
343   MPI_Request    *send_waits,*recv_waits;
344   MPI_Status     recv_status,*send_status;
345   PetscTruth     found;
346 
347   PetscFunctionBegin;
348   /*  first count number of contributors to each processor */
349   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
350   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
351   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
352   for (i=0; i<N; i++) {
353     idx = rows[i];
354     found = PETSC_FALSE;
355     for (j=0; j<size; j++) {
356       if (idx >= owners[j] && idx < owners[j+1]) {
357         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
358       }
359     }
360     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
361   }
362   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
363 
364   /* inform other processors of number of messages and max length*/
365   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
366 
367   /* post receives:   */
368   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
369   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
370   for (i=0; i<nrecvs; i++) {
371     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
372   }
373 
374   /* do sends:
375       1) starts[i] gives the starting index in svalues for stuff going to
376          the ith processor
377   */
378   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
379   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
380   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
381   starts[0]  = 0;
382   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
383   for (i=0; i<N; i++) {
384     svalues[starts[owner[i]]++] = rows[i];
385   }
386 
387   starts[0] = 0;
388   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
389   count = 0;
390   for (i=0; i<size; i++) {
391     if (nprocs[2*i+1]) {
392       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
393     }
394   }
395   ierr = PetscFree(starts);CHKERRQ(ierr);
396 
397   base = owners[rank];
398 
399   /*  wait on receives */
400   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
401   source = lens + nrecvs;
402   count  = nrecvs; slen = 0;
403   while (count) {
404     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
405     /* unpack receives into our local space */
406     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
407     source[imdex]  = recv_status.MPI_SOURCE;
408     lens[imdex]    = n;
409     slen += n;
410     count--;
411   }
412   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
413 
414   /* move the data into the send scatter */
415   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
416   count = 0;
417   for (i=0; i<nrecvs; i++) {
418     values = rvalues + i*nmax;
419     for (j=0; j<lens[i]; j++) {
420       lrows[count++] = values[j] - base;
421     }
422   }
423   ierr = PetscFree(rvalues);CHKERRQ(ierr);
424   ierr = PetscFree(lens);CHKERRQ(ierr);
425   ierr = PetscFree(owner);CHKERRQ(ierr);
426   ierr = PetscFree(nprocs);CHKERRQ(ierr);
427 
428   /* actually zap the local rows */
429   ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
430   ierr = PetscFree(lrows);CHKERRQ(ierr);
431 
432   /* wait on sends */
433   if (nsends) {
434     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
435     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
436     ierr = PetscFree(send_status);CHKERRQ(ierr);
437   }
438   ierr = PetscFree(send_waits);CHKERRQ(ierr);
439   ierr = PetscFree(svalues);CHKERRQ(ierr);
440 
441   PetscFunctionReturn(0);
442 }
443 
444 #undef __FUNCT__
445 #define __FUNCT__ "MatMult_MPIDense"
446 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
447 {
448   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
449   PetscErrorCode ierr;
450 
451   PetscFunctionBegin;
452   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
453   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
454   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
455   PetscFunctionReturn(0);
456 }
457 
458 #undef __FUNCT__
459 #define __FUNCT__ "MatMultAdd_MPIDense"
460 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
461 {
462   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
463   PetscErrorCode ierr;
464 
465   PetscFunctionBegin;
466   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
467   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
468   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
469   PetscFunctionReturn(0);
470 }
471 
472 #undef __FUNCT__
473 #define __FUNCT__ "MatMultTranspose_MPIDense"
474 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
475 {
476   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
477   PetscErrorCode ierr;
478   PetscScalar    zero = 0.0;
479 
480   PetscFunctionBegin;
481   ierr = VecSet(yy,zero);CHKERRQ(ierr);
482   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
483   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
484   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
485   PetscFunctionReturn(0);
486 }
487 
488 #undef __FUNCT__
489 #define __FUNCT__ "MatMultTransposeAdd_MPIDense"
490 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
491 {
492   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
493   PetscErrorCode ierr;
494 
495   PetscFunctionBegin;
496   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
497   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
498   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
499   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
500   PetscFunctionReturn(0);
501 }
502 
503 #undef __FUNCT__
504 #define __FUNCT__ "MatGetDiagonal_MPIDense"
505 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
506 {
507   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
508   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
509   PetscErrorCode ierr;
510   PetscInt       len,i,n,m = A->rmap->n,radd;
511   PetscScalar    *x,zero = 0.0;
512 
513   PetscFunctionBegin;
514   ierr = VecSet(v,zero);CHKERRQ(ierr);
515   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
516   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
517   if (n != A->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
518   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
519   radd = A->rmap->rstart*m;
520   for (i=0; i<len; i++) {
521     x[i] = aloc->v[radd + i*m + i];
522   }
523   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
524   PetscFunctionReturn(0);
525 }
526 
527 #undef __FUNCT__
528 #define __FUNCT__ "MatDestroy_MPIDense"
529 PetscErrorCode MatDestroy_MPIDense(Mat mat)
530 {
531   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
532   PetscErrorCode ierr;
533 #if defined(PETSC_HAVE_PLAPACK)
534   Mat_Plapack   *lu=(Mat_Plapack*)(mat->spptr);
535 #endif
536 
537   PetscFunctionBegin;
538 
539 #if defined(PETSC_USE_LOG)
540   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
541 #endif
542   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
543   ierr = MatDestroy(mdn->A);CHKERRQ(ierr);
544   if (mdn->lvec)   {ierr = VecDestroy(mdn->lvec);CHKERRQ(ierr);}
545   if (mdn->Mvctx)  {ierr = VecScatterDestroy(mdn->Mvctx);CHKERRQ(ierr);}
546 #if defined(PETSC_HAVE_PLAPACK)
547   if (lu) {
548     ierr = PLA_Obj_free(&lu->A);CHKERRQ(ierr);
549     ierr = PLA_Obj_free (&lu->pivots);CHKERRQ(ierr);
550     ierr = PLA_Temp_free(&lu->templ);CHKERRQ(ierr);
551 
552     if (lu->is_pla) {
553       ierr = ISDestroy(lu->is_pla);CHKERRQ(ierr);
554       ierr = ISDestroy(lu->is_petsc);CHKERRQ(ierr);
555       ierr = VecScatterDestroy(lu->ctx);CHKERRQ(ierr);
556     }
557   }
558 #endif
559 
560   ierr = PetscFree(mdn);CHKERRQ(ierr);
561   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
562   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
563   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
564   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
565   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
566   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
567   PetscFunctionReturn(0);
568 }
569 
570 #undef __FUNCT__
571 #define __FUNCT__ "MatView_MPIDense_Binary"
572 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
573 {
574   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
575   PetscErrorCode    ierr;
576   PetscViewerFormat format;
577   int               fd;
578   PetscInt          header[4],mmax,N = mat->cmap->N,i,j,m,k;
579   PetscMPIInt       rank,tag  = ((PetscObject)viewer)->tag,size;
580   PetscScalar       *work,*v,*vv;
581   Mat_SeqDense      *a = (Mat_SeqDense*)mdn->A->data;
582   MPI_Status        status;
583 
584   PetscFunctionBegin;
585   if (mdn->size == 1) {
586     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
587   } else {
588     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
589     ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
590     ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
591 
592     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
593     if (format == PETSC_VIEWER_BINARY_NATIVE) {
594 
595       if (!rank) {
596         /* store the matrix as a dense matrix */
597         header[0] = MAT_FILE_COOKIE;
598         header[1] = mat->rmap->N;
599         header[2] = N;
600         header[3] = MATRIX_BINARY_FORMAT_DENSE;
601         ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
602 
603         /* get largest work array needed for transposing array */
604         mmax = mat->rmap->n;
605         for (i=1; i<size; i++) {
606           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
607         }
608         ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&work);CHKERRQ(ierr);
609 
610         /* write out local array, by rows */
611         m    = mat->rmap->n;
612         v    = a->v;
613         for (j=0; j<N; j++) {
614           for (i=0; i<m; i++) {
615             work[j + i*N] = *v++;
616           }
617         }
618         ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
619         /* get largest work array to receive messages from other processes, excludes process zero */
620         mmax = 0;
621         for (i=1; i<size; i++) {
622           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
623         }
624         ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&vv);CHKERRQ(ierr);
625         for(k = 1; k < size; k++) {
626           v    = vv;
627           m    = mat->rmap->range[k+1] - mat->rmap->range[k];
628           ierr = MPI_Recv(v,m*N,MPIU_SCALAR,k,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
629 
630           for(j = 0; j < N; j++) {
631             for(i = 0; i < m; i++) {
632               work[j + i*N] = *v++;
633             }
634           }
635           ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
636         }
637         ierr = PetscFree(work);CHKERRQ(ierr);
638         ierr = PetscFree(vv);CHKERRQ(ierr);
639       } else {
640         ierr = MPI_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
641       }
642     } else {
643       SETERRQ(PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerSetFormat(viewer,PETSC_VIEWER_BINARY_NATIVE");
644     }
645   }
646   PetscFunctionReturn(0);
647 }
648 
649 #undef __FUNCT__
650 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket"
651 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
652 {
653   Mat_MPIDense          *mdn = (Mat_MPIDense*)mat->data;
654   PetscErrorCode        ierr;
655   PetscMPIInt           size = mdn->size,rank = mdn->rank;
656   const PetscViewerType vtype;
657   PetscTruth            iascii,isdraw;
658   PetscViewer           sviewer;
659   PetscViewerFormat     format;
660 #if defined(PETSC_HAVE_PLAPACK)
661   Mat_Plapack           *lu=(Mat_Plapack*)(mat->spptr);
662 #endif
663 
664   PetscFunctionBegin;
665   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
666   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
667   if (iascii) {
668     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
669     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
670     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
671       MatInfo info;
672       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
673       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,
674                    (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
675       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
676       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
677 #if defined(PETSC_HAVE_PLAPACK)
678       ierr = PetscViewerASCIIPrintf(viewer,"PLAPACK run parameters:\n");CHKERRQ(ierr);
679       ierr = PetscViewerASCIIPrintf(viewer,"  Processor mesh: nprows %d, npcols %d\n",lu->Plapack_nprows, lu->Plapack_npcols);CHKERRQ(ierr);
680       ierr = PetscViewerASCIIPrintf(viewer,"  Distr. block size nb: %d \n",lu->nb);CHKERRQ(ierr);
681       ierr = PetscViewerASCIIPrintf(viewer,"  Error checking: %d\n",lu->Plapack_ierror);CHKERRQ(ierr);
682       ierr = PetscViewerASCIIPrintf(viewer,"  Algorithmic block size: %d\n",lu->Plapack_nb_alg);CHKERRQ(ierr);
683 #endif
684       PetscFunctionReturn(0);
685     } else if (format == PETSC_VIEWER_ASCII_INFO) {
686       PetscFunctionReturn(0);
687     }
688   } else if (isdraw) {
689     PetscDraw  draw;
690     PetscTruth isnull;
691 
692     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
693     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
694     if (isnull) PetscFunctionReturn(0);
695   }
696 
697   if (size == 1) {
698     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
699   } else {
700     /* assemble the entire matrix onto first processor. */
701     Mat         A;
702     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
703     PetscInt    *cols;
704     PetscScalar *vals;
705 
706     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
707     if (!rank) {
708       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
709     } else {
710       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
711     }
712     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
713     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
714     ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL);
715     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
716 
717     /* Copy the matrix ... This isn't the most efficient means,
718        but it's quick for now */
719     A->insertmode = INSERT_VALUES;
720     row = mat->rmap->rstart; m = mdn->A->rmap->n;
721     for (i=0; i<m; i++) {
722       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
723       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
724       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
725       row++;
726     }
727 
728     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
729     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
730     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
731     if (!rank) {
732       ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
733     }
734     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
735     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
736     ierr = MatDestroy(A);CHKERRQ(ierr);
737   }
738   PetscFunctionReturn(0);
739 }
740 
741 #undef __FUNCT__
742 #define __FUNCT__ "MatView_MPIDense"
743 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
744 {
745   PetscErrorCode ierr;
746   PetscTruth     iascii,isbinary,isdraw,issocket;
747 
748   PetscFunctionBegin;
749 
750   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
751   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
752   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
753   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
754 
755   if (iascii || issocket || isdraw) {
756     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
757   } else if (isbinary) {
758     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
759   } else {
760     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name);
761   }
762   PetscFunctionReturn(0);
763 }
764 
765 #undef __FUNCT__
766 #define __FUNCT__ "MatGetInfo_MPIDense"
767 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
768 {
769   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
770   Mat            mdn = mat->A;
771   PetscErrorCode ierr;
772   PetscReal      isend[5],irecv[5];
773 
774   PetscFunctionBegin;
775   info->rows_global    = (double)A->rmap->N;
776   info->columns_global = (double)A->cmap->N;
777   info->rows_local     = (double)A->rmap->n;
778   info->columns_local  = (double)A->cmap->N;
779   info->block_size     = 1.0;
780   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
781   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
782   isend[3] = info->memory;  isend[4] = info->mallocs;
783   if (flag == MAT_LOCAL) {
784     info->nz_used      = isend[0];
785     info->nz_allocated = isend[1];
786     info->nz_unneeded  = isend[2];
787     info->memory       = isend[3];
788     info->mallocs      = isend[4];
789   } else if (flag == MAT_GLOBAL_MAX) {
790     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr);
791     info->nz_used      = irecv[0];
792     info->nz_allocated = irecv[1];
793     info->nz_unneeded  = irecv[2];
794     info->memory       = irecv[3];
795     info->mallocs      = irecv[4];
796   } else if (flag == MAT_GLOBAL_SUM) {
797     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
798     info->nz_used      = irecv[0];
799     info->nz_allocated = irecv[1];
800     info->nz_unneeded  = irecv[2];
801     info->memory       = irecv[3];
802     info->mallocs      = irecv[4];
803   }
804   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
805   info->fill_ratio_needed = 0;
806   info->factor_mallocs    = 0;
807   PetscFunctionReturn(0);
808 }
809 
810 #undef __FUNCT__
811 #define __FUNCT__ "MatSetOption_MPIDense"
812 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscTruth flg)
813 {
814   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
815   PetscErrorCode ierr;
816 
817   PetscFunctionBegin;
818   switch (op) {
819   case MAT_NEW_NONZERO_LOCATIONS:
820   case MAT_NEW_NONZERO_LOCATION_ERR:
821   case MAT_NEW_NONZERO_ALLOCATION_ERR:
822     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
823     break;
824   case MAT_ROW_ORIENTED:
825     a->roworiented = flg;
826     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
827     break;
828   case MAT_NEW_DIAGONALS:
829   case MAT_USE_HASH_TABLE:
830     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
831     break;
832   case MAT_IGNORE_OFF_PROC_ENTRIES:
833     a->donotstash = flg;
834     break;
835   case MAT_SYMMETRIC:
836   case MAT_STRUCTURALLY_SYMMETRIC:
837   case MAT_HERMITIAN:
838   case MAT_SYMMETRY_ETERNAL:
839   case MAT_IGNORE_LOWER_TRIANGULAR:
840     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
841     break;
842   default:
843     SETERRQ1(PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
844   }
845   PetscFunctionReturn(0);
846 }
847 
848 
849 #undef __FUNCT__
850 #define __FUNCT__ "MatDiagonalScale_MPIDense"
851 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
852 {
853   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
854   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
855   PetscScalar    *l,*r,x,*v;
856   PetscErrorCode ierr;
857   PetscInt       i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n;
858 
859   PetscFunctionBegin;
860   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
861   if (ll) {
862     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
863     if (s2a != s2) SETERRQ2(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
864     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
865     for (i=0; i<m; i++) {
866       x = l[i];
867       v = mat->v + i;
868       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
869     }
870     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
871     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
872   }
873   if (rr) {
874     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
875     if (s3a != s3) SETERRQ2(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
876     ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
877     ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
878     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
879     for (i=0; i<n; i++) {
880       x = r[i];
881       v = mat->v + i*m;
882       for (j=0; j<m; j++) { (*v++) *= x;}
883     }
884     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
885     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
886   }
887   PetscFunctionReturn(0);
888 }
889 
890 #undef __FUNCT__
891 #define __FUNCT__ "MatNorm_MPIDense"
892 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
893 {
894   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
895   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
896   PetscErrorCode ierr;
897   PetscInt       i,j;
898   PetscReal      sum = 0.0;
899   PetscScalar    *v = mat->v;
900 
901   PetscFunctionBegin;
902   if (mdn->size == 1) {
903     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
904   } else {
905     if (type == NORM_FROBENIUS) {
906       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
907 #if defined(PETSC_USE_COMPLEX)
908         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
909 #else
910         sum += (*v)*(*v); v++;
911 #endif
912       }
913       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
914       *nrm = sqrt(*nrm);
915       ierr = PetscLogFlops(2*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr);
916     } else if (type == NORM_1) {
917       PetscReal *tmp,*tmp2;
918       ierr = PetscMalloc(2*A->cmap->N*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
919       tmp2 = tmp + A->cmap->N;
920       ierr = PetscMemzero(tmp,2*A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
921       *nrm = 0.0;
922       v = mat->v;
923       for (j=0; j<mdn->A->cmap->n; j++) {
924         for (i=0; i<mdn->A->rmap->n; i++) {
925           tmp[j] += PetscAbsScalar(*v);  v++;
926         }
927       }
928       ierr = MPI_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
929       for (j=0; j<A->cmap->N; j++) {
930         if (tmp2[j] > *nrm) *nrm = tmp2[j];
931       }
932       ierr = PetscFree(tmp);CHKERRQ(ierr);
933       ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr);
934     } else if (type == NORM_INFINITY) { /* max row norm */
935       PetscReal ntemp;
936       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
937       ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr);
938     } else {
939       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
940     }
941   }
942   PetscFunctionReturn(0);
943 }
944 
945 #undef __FUNCT__
946 #define __FUNCT__ "MatTranspose_MPIDense"
947 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
948 {
949   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
950   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
951   Mat            B;
952   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
953   PetscErrorCode ierr;
954   PetscInt       j,i;
955   PetscScalar    *v;
956 
957   PetscFunctionBegin;
958   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place");
959   if (reuse == MAT_INITIAL_MATRIX || A == *matout) {
960     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
961     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
962     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
963     ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr);
964   } else {
965     B = *matout;
966   }
967 
968   m = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v;
969   ierr = PetscMalloc(m*sizeof(PetscInt),&rwork);CHKERRQ(ierr);
970   for (i=0; i<m; i++) rwork[i] = rstart + i;
971   for (j=0; j<n; j++) {
972     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
973     v   += m;
974   }
975   ierr = PetscFree(rwork);CHKERRQ(ierr);
976   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
977   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
978   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
979     *matout = B;
980   } else {
981     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
982   }
983   PetscFunctionReturn(0);
984 }
985 
986 #include "petscblaslapack.h"
987 #undef __FUNCT__
988 #define __FUNCT__ "MatScale_MPIDense"
989 PetscErrorCode MatScale_MPIDense(Mat inA,PetscScalar alpha)
990 {
991   Mat_MPIDense   *A = (Mat_MPIDense*)inA->data;
992   Mat_SeqDense   *a = (Mat_SeqDense*)A->A->data;
993   PetscScalar    oalpha = alpha;
994   PetscErrorCode ierr;
995   PetscBLASInt   one = 1,nz = PetscBLASIntCast(inA->rmap->n*inA->cmap->N);
996 
997   PetscFunctionBegin;
998   BLASscal_(&nz,&oalpha,a->v,&one);
999   ierr = PetscLogFlops(nz);CHKERRQ(ierr);
1000   PetscFunctionReturn(0);
1001 }
1002 
1003 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
1004 
1005 #undef __FUNCT__
1006 #define __FUNCT__ "MatSetUpPreallocation_MPIDense"
1007 PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A)
1008 {
1009   PetscErrorCode ierr;
1010 
1011   PetscFunctionBegin;
1012   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
1013   PetscFunctionReturn(0);
1014 }
1015 
1016 #if defined(PETSC_HAVE_PLAPACK)
1017 
1018 #undef __FUNCT__
1019 #define __FUNCT__ "MatMPIDenseCopyToPlapack"
1020 PetscErrorCode MatMPIDenseCopyToPlapack(Mat A,Mat F)
1021 {
1022   Mat_Plapack    *lu = (Mat_Plapack*)(F)->spptr;
1023   PetscErrorCode ierr;
1024   PetscInt       M=A->cmap->N,m=A->rmap->n,rstart;
1025   PetscScalar    *array;
1026   PetscReal      one = 1.0;
1027 
1028   PetscFunctionBegin;
1029   /* Copy A into F->lu->A */
1030   ierr = PLA_Obj_set_to_zero(lu->A);CHKERRQ(ierr);
1031   ierr = PLA_API_begin();CHKERRQ(ierr);
1032   ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr);
1033   ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr);
1034   ierr = MatGetArray(A,&array);CHKERRQ(ierr);
1035   ierr = PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);CHKERRQ(ierr);
1036   ierr = MatRestoreArray(A,&array);CHKERRQ(ierr);
1037   ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr);
1038   ierr = PLA_API_end();CHKERRQ(ierr);
1039   lu->rstart = rstart;
1040   PetscFunctionReturn(0);
1041 }
1042 
1043 #undef __FUNCT__
1044 #define __FUNCT__ "MatMPIDenseCopyFromPlapack"
1045 PetscErrorCode MatMPIDenseCopyFromPlapack(Mat F,Mat A)
1046 {
1047   Mat_Plapack    *lu = (Mat_Plapack*)(F)->spptr;
1048   PetscErrorCode ierr;
1049   PetscInt       M=A->cmap->N,m=A->rmap->n,rstart;
1050   PetscScalar    *array;
1051   PetscReal      one = 1.0;
1052 
1053   PetscFunctionBegin;
1054   /* Copy F into A->lu->A */
1055   ierr = MatZeroEntries(A);CHKERRQ(ierr);
1056   ierr = PLA_API_begin();CHKERRQ(ierr);
1057   ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr);
1058   ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr);
1059   ierr = MatGetArray(A,&array);CHKERRQ(ierr);
1060   ierr = PLA_API_axpy_global_to_matrix(m,M, &one,lu->A,rstart,0,(void *)array,m);CHKERRQ(ierr);
1061   ierr = MatRestoreArray(A,&array);CHKERRQ(ierr);
1062   ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr);
1063   ierr = PLA_API_end();CHKERRQ(ierr);
1064   lu->rstart = rstart;
1065   PetscFunctionReturn(0);
1066 }
1067 
1068 #undef __FUNCT__
1069 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIDense"
1070 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1071 {
1072   PetscErrorCode ierr;
1073   Mat_Plapack    *luA = (Mat_Plapack*)A->spptr;
1074   Mat_Plapack    *luB = (Mat_Plapack*)B->spptr;
1075   Mat_Plapack    *luC = (Mat_Plapack*)C->spptr;
1076   PLA_Obj        alpha = NULL,beta = NULL;
1077 
1078   PetscFunctionBegin;
1079   ierr = MatMPIDenseCopyToPlapack(A,A);CHKERRQ(ierr);
1080   ierr = MatMPIDenseCopyToPlapack(B,B);CHKERRQ(ierr);
1081 
1082   /*
1083   ierr = PLA_Global_show("A = ",luA->A,"%g ","");CHKERRQ(ierr);
1084   ierr = PLA_Global_show("B = ",luB->A,"%g ","");CHKERRQ(ierr);
1085   */
1086 
1087   /* do the multiply in PLA  */
1088   ierr = PLA_Create_constants_conf_to(luA->A,NULL,NULL,&alpha);CHKERRQ(ierr);
1089   ierr = PLA_Create_constants_conf_to(luC->A,NULL,&beta,NULL);CHKERRQ(ierr);
1090   CHKMEMQ;
1091 
1092   ierr = PLA_Gemm(PLA_NO_TRANSPOSE,PLA_NO_TRANSPOSE,alpha,luA->A,luB->A,beta,luC->A); /* CHKERRQ(ierr); */
1093   CHKMEMQ;
1094   ierr = PLA_Obj_free(&alpha);CHKERRQ(ierr);
1095   ierr = PLA_Obj_free(&beta);CHKERRQ(ierr);
1096 
1097   /*
1098   ierr = PLA_Global_show("C = ",luC->A,"%g ","");CHKERRQ(ierr);
1099   */
1100   ierr = MatMPIDenseCopyFromPlapack(C,C);CHKERRQ(ierr);
1101   PetscFunctionReturn(0);
1102 }
1103 
1104 #undef __FUNCT__
1105 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIDense"
1106 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1107 {
1108   PetscErrorCode ierr;
1109   PetscInt       m=A->rmap->n,n=B->cmap->n;
1110   Mat            Cmat;
1111 
1112   PetscFunctionBegin;
1113   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
1114   SETERRQ(PETSC_ERR_LIB,"Due to aparent bugs in PLAPACK,this is not currently supported");
1115   ierr = MatCreate(((PetscObject)B)->comm,&Cmat);CHKERRQ(ierr);
1116   ierr = MatSetSizes(Cmat,m,n,A->rmap->N,B->cmap->N);CHKERRQ(ierr);
1117   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
1118   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1119   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1120   //ierr = MatMPIDenseCreatePlapack(A);CHKERRQ(ierr);
1121   //ierr = MatMPIDenseCreatePlapack(B);CHKERRQ(ierr);
1122   //ierr = MatMPIDenseCreatePlapack(Cmat);CHKERRQ(ierr);
1123   *C = Cmat;
1124   PetscFunctionReturn(0);
1125 }
1126 
1127 #undef __FUNCT__
1128 #define __FUNCT__ "MatMatMult_MPIDense_MPIDense"
1129 PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
1130 {
1131   PetscErrorCode ierr;
1132 
1133   PetscFunctionBegin;
1134   if (scall == MAT_INITIAL_MATRIX){
1135     ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr);
1136   }
1137   ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr);
1138   PetscFunctionReturn(0);
1139 }
1140 
1141 #undef __FUNCT__
1142 #define __FUNCT__ "MatSolve_MPIDense"
1143 PetscErrorCode MatSolve_MPIDense(Mat A,Vec b,Vec x)
1144 {
1145   MPI_Comm       comm = ((PetscObject)A)->comm;
1146   Mat_Plapack    *lu = (Mat_Plapack*)A->spptr;
1147   PetscErrorCode ierr;
1148   PetscInt       M=A->rmap->N,m=A->rmap->n,rstart,i,j,*idx_pla,*idx_petsc,loc_m,loc_stride;
1149   PetscScalar    *array;
1150   PetscReal      one = 1.0;
1151   PetscMPIInt    size,rank,r_rank,r_nproc,c_rank,c_nproc;;
1152   PLA_Obj        v_pla = NULL;
1153   PetscScalar    *loc_buf;
1154   Vec            loc_x;
1155 
1156   PetscFunctionBegin;
1157   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1158   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1159 
1160   /* Create PLAPACK vector objects, then copy b into PLAPACK b */
1161   PLA_Mvector_create(lu->datatype,M,1,lu->templ,PLA_ALIGN_FIRST,&v_pla);
1162   PLA_Obj_set_to_zero(v_pla);
1163 
1164   /* Copy b into rhs_pla */
1165   PLA_API_begin();
1166   PLA_Obj_API_open(v_pla);
1167   ierr = VecGetArray(b,&array);CHKERRQ(ierr);
1168   PLA_API_axpy_vector_to_global(m,&one,(void *)array,1,v_pla,lu->rstart);
1169   ierr = VecRestoreArray(b,&array);CHKERRQ(ierr);
1170   PLA_Obj_API_close(v_pla);
1171   PLA_API_end();
1172 
1173   if (A->factor == MAT_FACTOR_LU){
1174     /* Apply the permutations to the right hand sides */
1175     PLA_Apply_pivots_to_rows (v_pla,lu->pivots);
1176 
1177     /* Solve L y = b, overwriting b with y */
1178     PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_UNIT_DIAG,lu->A,v_pla );
1179 
1180     /* Solve U x = y (=b), overwriting b with x */
1181     PLA_Trsv( PLA_UPPER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla );
1182   } else { /*MAT_FACTOR_CHOLESKY */
1183     PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla);
1184     PLA_Trsv( PLA_LOWER_TRIANGULAR,(lu->datatype == MPI_DOUBLE ? PLA_TRANSPOSE : PLA_CONJUGATE_TRANSPOSE),
1185                                     PLA_NONUNIT_DIAG,lu->A,v_pla);
1186   }
1187 
1188   /* Copy PLAPACK x into Petsc vector x  */
1189   PLA_Obj_local_length(v_pla, &loc_m);
1190   PLA_Obj_local_buffer(v_pla, (void**)&loc_buf);
1191   PLA_Obj_local_stride(v_pla, &loc_stride);
1192   /*
1193     PetscPrintf(PETSC_COMM_SELF," [%d] b - local_m %d local_stride %d, loc_buf: %g %g, nb: %d\n",rank,loc_m,loc_stride,loc_buf[0],loc_buf[(loc_m-1)*loc_stride],lu->nb);
1194   */
1195   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,loc_m*loc_stride,loc_buf,&loc_x);CHKERRQ(ierr);
1196   if (!lu->pla_solved){
1197 
1198     PLA_Temp_comm_row_info(lu->templ,&lu->comm_2d,&r_rank,&r_nproc);
1199     PLA_Temp_comm_col_info(lu->templ,&lu->comm_2d,&c_rank,&c_nproc);
1200     /* printf(" [%d] rank: %d %d, nproc: %d %d\n",rank,r_rank,c_rank,r_nproc,c_nproc); */
1201 
1202     /* Create IS and cts for VecScatterring */
1203     PLA_Obj_local_length(v_pla, &loc_m);
1204     PLA_Obj_local_stride(v_pla, &loc_stride);
1205     ierr = PetscMalloc((2*loc_m+1)*sizeof(PetscInt),&idx_pla);CHKERRQ(ierr);
1206     idx_petsc = idx_pla + loc_m;
1207 
1208     rstart = (r_rank*c_nproc+c_rank)*lu->nb;
1209     for (i=0; i<loc_m; i+=lu->nb){
1210       j = 0;
1211       while (j < lu->nb && i+j < loc_m){
1212         idx_petsc[i+j] = rstart + j; j++;
1213       }
1214       rstart += size*lu->nb;
1215     }
1216 
1217     for (i=0; i<loc_m; i++) idx_pla[i] = i*loc_stride;
1218 
1219     ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_pla,&lu->is_pla);CHKERRQ(ierr);
1220     ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_petsc,&lu->is_petsc);CHKERRQ(ierr);
1221     ierr = PetscFree(idx_pla);CHKERRQ(ierr);
1222     ierr = VecScatterCreate(loc_x,lu->is_pla,x,lu->is_petsc,&lu->ctx);CHKERRQ(ierr);
1223   }
1224   ierr = VecScatterBegin(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1225   ierr = VecScatterEnd(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1226 
1227   /* Free data */
1228   ierr = VecDestroy(loc_x);CHKERRQ(ierr);
1229   PLA_Obj_free(&v_pla);
1230 
1231   lu->pla_solved = PETSC_TRUE;
1232   PetscFunctionReturn(0);
1233 }
1234 
1235 #undef __FUNCT__
1236 #define __FUNCT__ "MatLUFactorNumeric_MPIDense"
1237 PetscErrorCode MatLUFactorNumeric_MPIDense(Mat F,Mat A,MatFactorInfo *info)
1238 {
1239   Mat_Plapack    *lu = (Mat_Plapack*)(F)->spptr;
1240   PetscErrorCode ierr;
1241   PetscInt       M=A->rmap->N,m=A->rmap->n,rstart,rend;
1242   PetscInt       info_pla=0;
1243   PetscScalar    *array,one = 1.0;
1244 
1245   PetscFunctionBegin;
1246   if (lu->mstruct == SAME_NONZERO_PATTERN){
1247     PLA_Obj_free(&lu->A);
1248     PLA_Obj_free (&lu->pivots);
1249   }
1250   /* Create PLAPACK matrix object */
1251   lu->A = NULL; lu->pivots = NULL;
1252   PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);
1253   PLA_Obj_set_to_zero(lu->A);
1254   PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots);
1255 
1256   /* Copy A into lu->A */
1257   PLA_API_begin();
1258   PLA_Obj_API_open(lu->A);
1259   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
1260   ierr = MatGetArray(A,&array);CHKERRQ(ierr);
1261   PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);
1262   ierr = MatRestoreArray(A,&array);CHKERRQ(ierr);
1263   PLA_Obj_API_close(lu->A);
1264   PLA_API_end();
1265 
1266   /* Factor P A -> L U overwriting lower triangular portion of A with L, upper, U */
1267   info_pla = PLA_LU(lu->A,lu->pivots);
1268   if (info_pla != 0)
1269     SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot encountered at row %d from PLA_LU()",info_pla);
1270 
1271   lu->CleanUpPlapack = PETSC_TRUE;
1272   lu->rstart         = rstart;
1273   lu->mstruct        = SAME_NONZERO_PATTERN;
1274   F->ops->solve      = MatSolve_MPIDense;
1275   F->assembled       = PETSC_TRUE;  /* required by -ksp_view */
1276   PetscFunctionReturn(0);
1277 }
1278 
1279 #undef __FUNCT__
1280 #define __FUNCT__ "MatCholeskyFactorNumeric_MPIDense"
1281 PetscErrorCode MatCholeskyFactorNumeric_MPIDense(Mat F,Mat A,MatFactorInfo *info)
1282 {
1283   Mat_Plapack    *lu = (Mat_Plapack*)F->spptr;
1284   PetscErrorCode ierr;
1285   PetscInt       M=A->rmap->N,m=A->rmap->n,rstart,rend;
1286   PetscInt       info_pla=0;
1287   PetscScalar    *array,one = 1.0;
1288 
1289   PetscFunctionBegin;
1290   if (lu->mstruct == SAME_NONZERO_PATTERN){
1291     PLA_Obj_free(&lu->A);
1292   }
1293   /* Create PLAPACK matrix object */
1294   lu->A      = NULL;
1295   lu->pivots = NULL;
1296   PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);
1297 
1298   /* Copy A into lu->A */
1299   PLA_API_begin();
1300   PLA_Obj_API_open(lu->A);
1301   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
1302   ierr = MatGetArray(A,&array);CHKERRQ(ierr);
1303   PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);
1304   ierr = MatRestoreArray(A,&array);CHKERRQ(ierr);
1305   PLA_Obj_API_close(lu->A);
1306   PLA_API_end();
1307 
1308   /* Factor P A -> Chol */
1309   info_pla = PLA_Chol(PLA_LOWER_TRIANGULAR,lu->A);
1310   if (info_pla != 0)
1311     SETERRQ1( PETSC_ERR_MAT_CH_ZRPVT,"Nonpositive definite matrix detected at row %d from PLA_Chol()",info_pla);
1312 
1313   lu->CleanUpPlapack = PETSC_TRUE;
1314   lu->rstart         = rstart;
1315   lu->mstruct        = SAME_NONZERO_PATTERN;
1316   F->ops->solve      = MatSolve_MPIDense;
1317   F->assembled       = PETSC_TRUE;  /* required by -ksp_view */
1318   PetscFunctionReturn(0);
1319 }
1320 
1321 #undef __FUNCT__
1322 #define __FUNCT__ "MatFactorSymbolic_Plapack_Private"
1323 PetscErrorCode MatFactorSymbolic_Plapack_Private(Mat F,Mat A,MatFactorInfo *info)
1324 {
1325   Mat            B = F;
1326   Mat_Plapack    *lu;
1327   PetscErrorCode ierr;
1328   PetscInt       M=A->rmap->N;
1329   MPI_Comm       comm=((PetscObject)A)->comm,comm_2d;
1330   PetscMPIInt    size;
1331   PetscInt       ierror;
1332 
1333   PetscFunctionBegin;
1334   lu = (Mat_Plapack*)(B->spptr);
1335 
1336   /* Set default Plapack parameters */
1337   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1338   lu->nprows = 1; lu->npcols = size;
1339   ierror = 0;
1340   lu->nb     = M/size;
1341   if (M - lu->nb*size) lu->nb++; /* without cyclic distribution */
1342 
1343   /* Set runtime options */
1344   ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"PLAPACK Options","Mat");CHKERRQ(ierr);
1345   ierr = PetscOptionsInt("-mat_plapack_nprows","row dimension of 2D processor mesh","None",lu->nprows,&lu->nprows,PETSC_NULL);CHKERRQ(ierr);
1346   ierr = PetscOptionsInt("-mat_plapack_npcols","column dimension of 2D processor mesh","None",lu->npcols,&lu->npcols,PETSC_NULL);CHKERRQ(ierr);
1347 
1348   ierr = PetscOptionsInt("-mat_plapack_nb","block size of template vector","None",lu->nb,&lu->nb,PETSC_NULL);CHKERRQ(ierr);
1349   ierr = PetscOptionsInt("-mat_plapack_ckerror","error checking flag","None",ierror,&ierror,PETSC_NULL);CHKERRQ(ierr);
1350   if (ierror){
1351     PLA_Set_error_checking(ierror,PETSC_TRUE,PETSC_TRUE,PETSC_FALSE );
1352   } else {
1353     PLA_Set_error_checking(ierror,PETSC_FALSE,PETSC_FALSE,PETSC_FALSE );
1354   }
1355   lu->ierror = ierror;
1356 
1357   lu->nb_alg = 0;
1358   ierr = PetscOptionsInt("-mat_plapack_nb_alg","algorithmic block size","None",lu->nb_alg,&lu->nb_alg,PETSC_NULL);CHKERRQ(ierr);
1359   if (lu->nb_alg){
1360     pla_Environ_set_nb_alg (PLA_OP_ALL_ALG,lu->nb_alg);
1361   }
1362   PetscOptionsEnd();
1363 
1364 
1365   /* Create a 2D communicator */
1366   PLA_Comm_1D_to_2D(comm,lu->nprows,lu->npcols,&comm_2d);
1367   lu->comm_2d = comm_2d;
1368 
1369   /* Initialize PLAPACK */
1370   PLA_Init(comm_2d);
1371 
1372   /* Create object distribution template */
1373   lu->templ = NULL;
1374   PLA_Temp_create(lu->nb, 0, &lu->templ);
1375 
1376   /* Use suggested nb_alg if it is not provided by user */
1377   if (lu->nb_alg == 0){
1378     PLA_Environ_nb_alg(PLA_OP_PAN_PAN,lu->templ,&lu->nb_alg);
1379     pla_Environ_set_nb_alg(PLA_OP_ALL_ALG,lu->nb_alg);
1380   }
1381 
1382   /* Set the datatype */
1383 #if defined(PETSC_USE_COMPLEX)
1384   lu->datatype = MPI_DOUBLE_COMPLEX;
1385 #else
1386   lu->datatype = MPI_DOUBLE;
1387 #endif
1388 
1389   lu->pla_solved     = PETSC_FALSE; /* MatSolve_Plapack() is called yet */
1390   lu->mstruct        = DIFFERENT_NONZERO_PATTERN;
1391   lu->CleanUpPlapack = PETSC_TRUE;
1392   PetscFunctionReturn(0);
1393 }
1394 
1395 /* Note the Petsc perm permutation is ignored */
1396 #undef __FUNCT__
1397 #define __FUNCT__ "MatCholeskyFactorSymbolic_MPIDense"
1398 PetscErrorCode MatCholeskyFactorSymbolic_MPIDense(Mat F,Mat A,IS perm,MatFactorInfo *info)
1399 {
1400   PetscErrorCode ierr;
1401   PetscTruth     issymmetric,set;
1402 
1403   PetscFunctionBegin;
1404   ierr = MatIsSymmetricKnown(A,&set,&issymmetric); CHKERRQ(ierr);
1405   if (!set || !issymmetric) SETERRQ(PETSC_ERR_USER,"Matrix must be set as MAT_SYMMETRIC for CholeskyFactor()");
1406   ierr = MatFactorSymbolic_Plapack_Private(F,A,info);CHKERRQ(ierr);
1407   F->ops->choleskyfactornumeric  = MatCholeskyFactorNumeric_MPIDense;
1408   PetscFunctionReturn(0);
1409 }
1410 
1411 /* Note the Petsc r and c permutations are ignored */
1412 #undef __FUNCT__
1413 #define __FUNCT__ "MatLUFactorSymbolic_MPIDense"
1414 PetscErrorCode MatLUFactorSymbolic_MPIDense(Mat F,Mat A,IS r,IS c,MatFactorInfo *info)
1415 {
1416   PetscErrorCode ierr;
1417   PetscInt       M = A->rmap->N;
1418   Mat_Plapack    *lu;
1419 
1420   PetscFunctionBegin;
1421   ierr = MatFactorSymbolic_Plapack_Private(F,A,info);CHKERRQ(ierr);
1422   lu = (Mat_Plapack*)F->spptr;
1423   ierr = PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots);CHKERRQ(ierr);
1424   F->ops->lufactornumeric  = MatLUFactorNumeric_MPIDense;
1425   PetscFunctionReturn(0);
1426 }
1427 
1428 #undef __FUNCT__
1429 #define __FUNCT__ "MatGetFactor_mpidense_plapack"
1430 PetscErrorCode MatGetFactor_mpidense_plapack(Mat A,MatFactorType ftype,Mat *F)
1431 {
1432   PetscErrorCode ierr;
1433   Mat_Plapack    *lu;
1434   PetscMPIInt    size;
1435   PetscInt       M;
1436 
1437   PetscFunctionBegin;
1438   /* Create the factorization matrix */
1439   ierr = MatCreate(((PetscObject)A)->comm,F);CHKERRQ(ierr);
1440   ierr = MatSetSizes(*F,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1441   ierr = MatSetType(*F,((PetscObject)A)->type_name);CHKERRQ(ierr);
1442   ierr = PetscNewLog(A,Mat_Plapack,&lu);CHKERRQ(ierr);
1443   A->spptr = (void*)lu;
1444 
1445   lu = (Mat_Plapack*)(A->spptr);
1446 
1447   /* Set default Plapack parameters */
1448   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
1449   lu->nb     = M/size;
1450   if (M - lu->nb*size) lu->nb++; /* without cyclic distribution */
1451 
1452   /* Set runtime options */
1453   ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"PLAPACK Options","Mat");CHKERRQ(ierr);
1454     ierr = PetscOptionsInt("-mat_plapack_nb","block size of template vector","None",lu->nb,&lu->nb,PETSC_NULL);CHKERRQ(ierr);
1455   PetscOptionsEnd();
1456 
1457   /* Create object distribution template */
1458   lu->templ = NULL;
1459   ierr = PLA_Temp_create(lu->nb, 0, &lu->templ);CHKERRQ(ierr);
1460 
1461   /* Set the datatype */
1462 #if defined(PETSC_USE_COMPLEX)
1463   lu->datatype = MPI_DOUBLE_COMPLEX;
1464 #else
1465   lu->datatype = MPI_DOUBLE;
1466 #endif
1467 
1468   ierr = PLA_Matrix_create(lu->datatype,M,A->cmap->N,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);CHKERRQ(ierr);
1469 
1470 
1471   lu->pla_solved     = PETSC_FALSE; /* MatSolve_Plapack() is called yet */
1472 
1473   if (ftype == MAT_FACTOR_LU) {
1474     (*F)->ops->lufactorsymbolic = MatLUFactorSymbolic_MPIDense;
1475   } else if (ftype == MAT_FACTOR_CHOLESKY) {
1476     (*F)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MPIDense;
1477   } else SETERRQ(PETSC_ERR_SUP,"No incomplete factorizations for dense matrices");
1478   (*F)->factor = ftype;
1479   PetscFunctionReturn(0);
1480 }
1481 #endif
1482 
1483 /* -------------------------------------------------------------------*/
1484 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
1485        MatGetRow_MPIDense,
1486        MatRestoreRow_MPIDense,
1487        MatMult_MPIDense,
1488 /* 4*/ MatMultAdd_MPIDense,
1489        MatMultTranspose_MPIDense,
1490        MatMultTransposeAdd_MPIDense,
1491        0,
1492        0,
1493        0,
1494 /*10*/ 0,
1495        0,
1496        0,
1497        0,
1498        MatTranspose_MPIDense,
1499 /*15*/ MatGetInfo_MPIDense,
1500        MatEqual_MPIDense,
1501        MatGetDiagonal_MPIDense,
1502        MatDiagonalScale_MPIDense,
1503        MatNorm_MPIDense,
1504 /*20*/ MatAssemblyBegin_MPIDense,
1505        MatAssemblyEnd_MPIDense,
1506        0,
1507        MatSetOption_MPIDense,
1508        MatZeroEntries_MPIDense,
1509 /*25*/ MatZeroRows_MPIDense,
1510        0,
1511        0,
1512        0,
1513        0,
1514 /*30*/ MatSetUpPreallocation_MPIDense,
1515        0,
1516        0,
1517        MatGetArray_MPIDense,
1518        MatRestoreArray_MPIDense,
1519 /*35*/ MatDuplicate_MPIDense,
1520        0,
1521        0,
1522        0,
1523        0,
1524 /*40*/ 0,
1525        MatGetSubMatrices_MPIDense,
1526        0,
1527        MatGetValues_MPIDense,
1528        0,
1529 /*45*/ 0,
1530        MatScale_MPIDense,
1531        0,
1532        0,
1533        0,
1534 /*50*/ 0,
1535        0,
1536        0,
1537        0,
1538        0,
1539 /*55*/ 0,
1540        0,
1541        0,
1542        0,
1543        0,
1544 /*60*/ MatGetSubMatrix_MPIDense,
1545        MatDestroy_MPIDense,
1546        MatView_MPIDense,
1547        0,
1548        0,
1549 /*65*/ 0,
1550        0,
1551        0,
1552        0,
1553        0,
1554 /*70*/ 0,
1555        0,
1556        0,
1557        0,
1558        0,
1559 /*75*/ 0,
1560        0,
1561        0,
1562        0,
1563        0,
1564 /*80*/ 0,
1565        0,
1566        0,
1567        0,
1568 /*84*/ MatLoad_MPIDense,
1569        0,
1570        0,
1571        0,
1572        0,
1573        0,
1574 /*90*/
1575 #if defined(PETSC_HAVE_PLAPACK)
1576        MatMatMult_MPIDense_MPIDense,
1577        MatMatMultSymbolic_MPIDense_MPIDense,
1578        MatMatMultNumeric_MPIDense_MPIDense,
1579 #else
1580        0,
1581        0,
1582        0,
1583 #endif
1584        0,
1585 /*95*/ 0,
1586        0,
1587        0,
1588        0};
1589 
1590 EXTERN_C_BEGIN
1591 #undef __FUNCT__
1592 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense"
1593 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1594 {
1595   Mat_MPIDense   *a;
1596   PetscErrorCode ierr;
1597 
1598   PetscFunctionBegin;
1599   mat->preallocated = PETSC_TRUE;
1600   /* Note:  For now, when data is specified above, this assumes the user correctly
1601    allocates the local dense storage space.  We should add error checking. */
1602 
1603   a    = (Mat_MPIDense*)mat->data;
1604   ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1605   ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr);
1606   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
1607   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1608   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1609   PetscFunctionReturn(0);
1610 }
1611 EXTERN_C_END
1612 
1613 /*MC
1614    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1615 
1616    Options Database Keys:
1617 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions()
1618 
1619   Level: beginner
1620 
1621   MATMPIDENSE matrices may use direct solvers (LU, Cholesky, and QR)
1622   for parallel dense matrices via the external package PLAPACK, if PLAPACK is installed
1623   (run config/configure.py with the option --download-plapack)
1624 
1625 
1626   Options Database Keys:
1627 . -mat_plapack_nprows <n> - number of rows in processor partition
1628 . -mat_plapack_npcols <n> - number of columns in processor partition
1629 . -mat_plapack_nb <n> - block size of template vector
1630 . -mat_plapack_nb_alg <n> - algorithmic block size
1631 - -mat_plapack_ckerror <n> - error checking flag
1632 
1633 .seealso: MatCreateMPIDense(), MATDENSE, MATSEQDENSE
1634 M*/
1635 
1636 EXTERN_C_BEGIN
1637 #undef __FUNCT__
1638 #define __FUNCT__ "MatCreate_MPIDense"
1639 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIDense(Mat mat)
1640 {
1641   Mat_MPIDense   *a;
1642   PetscErrorCode ierr;
1643 
1644   PetscFunctionBegin;
1645   ierr              = PetscNewLog(mat,Mat_MPIDense,&a);CHKERRQ(ierr);
1646   mat->data         = (void*)a;
1647   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1648   mat->mapping      = 0;
1649 
1650   mat->insertmode = NOT_SET_VALUES;
1651   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&a->rank);CHKERRQ(ierr);
1652   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&a->size);CHKERRQ(ierr);
1653 
1654   mat->rmap->bs = mat->cmap->bs = 1;
1655   ierr = PetscMapSetUp(mat->rmap);CHKERRQ(ierr);
1656   ierr = PetscMapSetUp(mat->cmap);CHKERRQ(ierr);
1657   a->nvec = mat->cmap->n;
1658 
1659   /* build cache for off array entries formed */
1660   a->donotstash = PETSC_FALSE;
1661   ierr = MatStashCreate_Private(((PetscObject)mat)->comm,1,&mat->stash);CHKERRQ(ierr);
1662 
1663   /* stuff used for matrix vector multiply */
1664   a->lvec        = 0;
1665   a->Mvctx       = 0;
1666   a->roworiented = PETSC_TRUE;
1667 
1668   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
1669                                      "MatGetDiagonalBlock_MPIDense",
1670                                      MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr);
1671   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C",
1672                                      "MatMPIDenseSetPreallocation_MPIDense",
1673                                      MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1674   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",
1675                                      "MatMatMult_MPIAIJ_MPIDense",
1676                                       MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1677   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",
1678                                      "MatMatMultSymbolic_MPIAIJ_MPIDense",
1679                                       MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1680   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",
1681                                      "MatMatMultNumeric_MPIAIJ_MPIDense",
1682                                       MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1683 #if defined(PETSC_HAVE_PLAPACK)
1684   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetFactor_mpidense_plapack_C",
1685                                      "MatGetFactor_mpidense_plapack",
1686                                       MatGetFactor_mpidense_plapack);CHKERRQ(ierr);
1687 #if defined(PETSC_HAVE_PLAPACK)
1688   ierr = PetscPLAPACKInitializePackage(((PetscObject)mat)->comm);CHKERRQ(ierr);
1689 #endif
1690 
1691 #endif
1692   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1693 
1694   PetscFunctionReturn(0);
1695 }
1696 EXTERN_C_END
1697 
1698 /*MC
1699    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1700 
1701    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1702    and MATMPIDENSE otherwise.
1703 
1704    Options Database Keys:
1705 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1706 
1707   Level: beginner
1708 
1709 
1710 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1711 M*/
1712 
1713 EXTERN_C_BEGIN
1714 #undef __FUNCT__
1715 #define __FUNCT__ "MatCreate_Dense"
1716 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_Dense(Mat A)
1717 {
1718   PetscErrorCode ierr;
1719   PetscMPIInt    size;
1720 
1721   PetscFunctionBegin;
1722   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
1723   if (size == 1) {
1724     ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr);
1725   } else {
1726     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
1727   }
1728   PetscFunctionReturn(0);
1729 }
1730 EXTERN_C_END
1731 
1732 #undef __FUNCT__
1733 #define __FUNCT__ "MatMPIDenseSetPreallocation"
1734 /*@C
1735    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1736 
1737    Not collective
1738 
1739    Input Parameters:
1740 .  A - the matrix
1741 -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
1742    to control all matrix memory allocation.
1743 
1744    Notes:
1745    The dense format is fully compatible with standard Fortran 77
1746    storage by columns.
1747 
1748    The data input variable is intended primarily for Fortran programmers
1749    who wish to allocate their own matrix memory space.  Most users should
1750    set data=PETSC_NULL.
1751 
1752    Level: intermediate
1753 
1754 .keywords: matrix,dense, parallel
1755 
1756 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1757 @*/
1758 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data)
1759 {
1760   PetscErrorCode ierr,(*f)(Mat,PetscScalar *);
1761 
1762   PetscFunctionBegin;
1763   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1764   if (f) {
1765     ierr = (*f)(mat,data);CHKERRQ(ierr);
1766   }
1767   PetscFunctionReturn(0);
1768 }
1769 
1770 #undef __FUNCT__
1771 #define __FUNCT__ "MatCreateMPIDense"
1772 /*@C
1773    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
1774 
1775    Collective on MPI_Comm
1776 
1777    Input Parameters:
1778 +  comm - MPI communicator
1779 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1780 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1781 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1782 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1783 -  data - optional location of matrix data.  Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1784    to control all matrix memory allocation.
1785 
1786    Output Parameter:
1787 .  A - the matrix
1788 
1789    Notes:
1790    The dense format is fully compatible with standard Fortran 77
1791    storage by columns.
1792 
1793    The data input variable is intended primarily for Fortran programmers
1794    who wish to allocate their own matrix memory space.  Most users should
1795    set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users).
1796 
1797    The user MUST specify either the local or global matrix dimensions
1798    (possibly both).
1799 
1800    Level: intermediate
1801 
1802 .keywords: matrix,dense, parallel
1803 
1804 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1805 @*/
1806 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1807 {
1808   PetscErrorCode ierr;
1809   PetscMPIInt    size;
1810 
1811   PetscFunctionBegin;
1812   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1813   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1814   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1815   if (size > 1) {
1816     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1817     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1818   } else {
1819     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1820     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1821   }
1822   PetscFunctionReturn(0);
1823 }
1824 
1825 #undef __FUNCT__
1826 #define __FUNCT__ "MatDuplicate_MPIDense"
1827 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1828 {
1829   Mat            mat;
1830   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
1831   PetscErrorCode ierr;
1832 
1833   PetscFunctionBegin;
1834   *newmat       = 0;
1835   ierr = MatCreate(((PetscObject)A)->comm,&mat);CHKERRQ(ierr);
1836   ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1837   ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1838   a                 = (Mat_MPIDense*)mat->data;
1839   ierr              = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1840   mat->factor       = A->factor;
1841   mat->assembled    = PETSC_TRUE;
1842   mat->preallocated = PETSC_TRUE;
1843 
1844   mat->rmap->rstart     = A->rmap->rstart;
1845   mat->rmap->rend       = A->rmap->rend;
1846   a->size              = oldmat->size;
1847   a->rank              = oldmat->rank;
1848   mat->insertmode      = NOT_SET_VALUES;
1849   a->nvec              = oldmat->nvec;
1850   a->donotstash        = oldmat->donotstash;
1851 
1852   ierr = PetscMemcpy(mat->rmap->range,A->rmap->range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
1853   ierr = PetscMemcpy(mat->cmap->range,A->cmap->range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
1854   ierr = MatStashCreate_Private(((PetscObject)A)->comm,1,&mat->stash);CHKERRQ(ierr);
1855 
1856   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1857   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1858   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1859 
1860 #if defined(PETSC_HAVE_PLAPACK)
1861   ierr = PetscMemcpy(mat->spptr,A->spptr,sizeof(Mat_Plapack));CHKERRQ(ierr);
1862 #endif
1863   *newmat = mat;
1864   PetscFunctionReturn(0);
1865 }
1866 
1867 #include "petscsys.h"
1868 
1869 #undef __FUNCT__
1870 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile"
1871 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N, const MatType type,Mat *newmat)
1872 {
1873   PetscErrorCode ierr;
1874   PetscMPIInt    rank,size;
1875   PetscInt       *rowners,i,m,nz,j;
1876   PetscScalar    *array,*vals,*vals_ptr;
1877   MPI_Status     status;
1878 
1879   PetscFunctionBegin;
1880   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1881   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1882 
1883   /* determine ownership of all rows */
1884   m          = M/size + ((M % size) > rank);
1885   ierr       = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1886   ierr       = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
1887   rowners[0] = 0;
1888   for (i=2; i<=size; i++) {
1889     rowners[i] += rowners[i-1];
1890   }
1891 
1892   ierr = MatCreate(comm,newmat);CHKERRQ(ierr);
1893   ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1894   ierr = MatSetType(*newmat,type);CHKERRQ(ierr);
1895   ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr);
1896   ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr);
1897 
1898   if (!rank) {
1899     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1900 
1901     /* read in my part of the matrix numerical values  */
1902     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1903 
1904     /* insert into matrix-by row (this is why cannot directly read into array */
1905     vals_ptr = vals;
1906     for (i=0; i<m; i++) {
1907       for (j=0; j<N; j++) {
1908         array[i + j*m] = *vals_ptr++;
1909       }
1910     }
1911 
1912     /* read in other processors and ship out */
1913     for (i=1; i<size; i++) {
1914       nz   = (rowners[i+1] - rowners[i])*N;
1915       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1916       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(*newmat))->tag,comm);CHKERRQ(ierr);
1917     }
1918   } else {
1919     /* receive numeric values */
1920     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1921 
1922     /* receive message of values*/
1923     ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(*newmat))->tag,comm,&status);CHKERRQ(ierr);
1924 
1925     /* insert into matrix-by row (this is why cannot directly read into array */
1926     vals_ptr = vals;
1927     for (i=0; i<m; i++) {
1928       for (j=0; j<N; j++) {
1929         array[i + j*m] = *vals_ptr++;
1930       }
1931     }
1932   }
1933   ierr = PetscFree(rowners);CHKERRQ(ierr);
1934   ierr = PetscFree(vals);CHKERRQ(ierr);
1935   ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1936   ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1937   PetscFunctionReturn(0);
1938 }
1939 
1940 #undef __FUNCT__
1941 #define __FUNCT__ "MatLoad_MPIDense"
1942 PetscErrorCode MatLoad_MPIDense(PetscViewer viewer,const MatType type,Mat *newmat)
1943 {
1944   Mat            A;
1945   PetscScalar    *vals,*svals;
1946   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1947   MPI_Status     status;
1948   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz;
1949   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1950   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1951   PetscInt       i,nz,j,rstart,rend;
1952   int            fd;
1953   PetscErrorCode ierr;
1954 
1955   PetscFunctionBegin;
1956   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1957   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1958   if (!rank) {
1959     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1960     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1961     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1962   }
1963 
1964   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1965   M = header[1]; N = header[2]; nz = header[3];
1966 
1967   /*
1968        Handle case where matrix is stored on disk as a dense matrix
1969   */
1970   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1971     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,type,newmat);CHKERRQ(ierr);
1972     PetscFunctionReturn(0);
1973   }
1974 
1975   /* determine ownership of all rows */
1976   m          = PetscMPIIntCast(M/size + ((M % size) > rank));
1977   ierr       = PetscMalloc((size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr);
1978   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1979   rowners[0] = 0;
1980   for (i=2; i<=size; i++) {
1981     rowners[i] += rowners[i-1];
1982   }
1983   rstart = rowners[rank];
1984   rend   = rowners[rank+1];
1985 
1986   /* distribute row lengths to all processors */
1987   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&ourlens);CHKERRQ(ierr);
1988   offlens = ourlens + (rend-rstart);
1989   if (!rank) {
1990     ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
1991     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1992     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
1993     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1994     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1995     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1996   } else {
1997     ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1998   }
1999 
2000   if (!rank) {
2001     /* calculate the number of nonzeros on each processor */
2002     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2003     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2004     for (i=0; i<size; i++) {
2005       for (j=rowners[i]; j< rowners[i+1]; j++) {
2006         procsnz[i] += rowlengths[j];
2007       }
2008     }
2009     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2010 
2011     /* determine max buffer needed and allocate it */
2012     maxnz = 0;
2013     for (i=0; i<size; i++) {
2014       maxnz = PetscMax(maxnz,procsnz[i]);
2015     }
2016     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2017 
2018     /* read in my part of the matrix column indices  */
2019     nz = procsnz[0];
2020     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2021     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2022 
2023     /* read in every one elses and ship off */
2024     for (i=1; i<size; i++) {
2025       nz   = procsnz[i];
2026       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2027       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2028     }
2029     ierr = PetscFree(cols);CHKERRQ(ierr);
2030   } else {
2031     /* determine buffer space needed for message */
2032     nz = 0;
2033     for (i=0; i<m; i++) {
2034       nz += ourlens[i];
2035     }
2036     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2037 
2038     /* receive message of column indices*/
2039     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2040     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2041     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2042   }
2043 
2044   /* loop over local rows, determining number of off diagonal entries */
2045   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2046   jj = 0;
2047   for (i=0; i<m; i++) {
2048     for (j=0; j<ourlens[i]; j++) {
2049       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
2050       jj++;
2051     }
2052   }
2053 
2054   /* create our matrix */
2055   for (i=0; i<m; i++) {
2056     ourlens[i] -= offlens[i];
2057   }
2058   ierr = MatCreate(comm,newmat);CHKERRQ(ierr);
2059   ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
2060   ierr = MatSetType(*newmat,type);CHKERRQ(ierr);
2061   ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr);
2062   A = *newmat;
2063   for (i=0; i<m; i++) {
2064     ourlens[i] += offlens[i];
2065   }
2066 
2067   if (!rank) {
2068     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2069 
2070     /* read in my part of the matrix numerical values  */
2071     nz = procsnz[0];
2072     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2073 
2074     /* insert into matrix */
2075     jj      = rstart;
2076     smycols = mycols;
2077     svals   = vals;
2078     for (i=0; i<m; i++) {
2079       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2080       smycols += ourlens[i];
2081       svals   += ourlens[i];
2082       jj++;
2083     }
2084 
2085     /* read in other processors and ship out */
2086     for (i=1; i<size; i++) {
2087       nz   = procsnz[i];
2088       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2089       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
2090     }
2091     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2092   } else {
2093     /* receive numeric values */
2094     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2095 
2096     /* receive message of values*/
2097     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
2098     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2099     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2100 
2101     /* insert into matrix */
2102     jj      = rstart;
2103     smycols = mycols;
2104     svals   = vals;
2105     for (i=0; i<m; i++) {
2106       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2107       smycols += ourlens[i];
2108       svals   += ourlens[i];
2109       jj++;
2110     }
2111   }
2112   ierr = PetscFree(ourlens);CHKERRQ(ierr);
2113   ierr = PetscFree(vals);CHKERRQ(ierr);
2114   ierr = PetscFree(mycols);CHKERRQ(ierr);
2115   ierr = PetscFree(rowners);CHKERRQ(ierr);
2116 
2117   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2118   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2119   PetscFunctionReturn(0);
2120 }
2121 
2122 #undef __FUNCT__
2123 #define __FUNCT__ "MatEqual_MPIDense"
2124 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscTruth *flag)
2125 {
2126   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
2127   Mat            a,b;
2128   PetscTruth     flg;
2129   PetscErrorCode ierr;
2130 
2131   PetscFunctionBegin;
2132   a = matA->A;
2133   b = matB->A;
2134   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
2135   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
2136   PetscFunctionReturn(0);
2137 }
2138 
2139 #if defined(PETSC_HAVE_PLAPACK)
2140 
2141 #undef __FUNCT__
2142 #define __FUNCT__ "PetscPLAPACKFinalizePackage"
2143 /*@C
2144   PetscPLAPACKFinalizePackage - This function destroys everything in the Petsc interface to PLAPACK.
2145   Level: developer
2146 
2147 .keywords: Petsc, destroy, package, PLAPACK
2148 .seealso: PetscFinalize()
2149 @*/
2150 PetscErrorCode PETSC_DLLEXPORT PetscPLAPACKFinalizePackage(void)
2151 {
2152   PetscErrorCode ierr;
2153 
2154   PetscFunctionBegin;
2155   ierr = PLA_Finalize();CHKERRQ(ierr);
2156   PetscFunctionReturn(0);
2157 }
2158 
2159 #undef __FUNCT__
2160 #define __FUNCT__ "PetscPLAPACKInitializePackage"
2161 /*@C
2162   PetscPLAPACKInitializePackage - This function initializes everything in the Petsc interface to PLAPACK. It is
2163   called from MatCreate_MPIDense() the first time an MPI dense matrix is called.
2164 
2165   Input Parameter:
2166 .   comm - the communicator the matrix lives on
2167 
2168   Level: developer
2169 
2170    Notes: PLAPACK does not have a good fit with MPI communicators; all (parallel) PLAPACK objects have to live in the
2171   same communicator (because there is some global state that is initialized and used for all matrices). In addition if
2172   PLAPACK is initialized (that is the initial matrices created) are on subcommunicators of MPI_COMM_WORLD, these subcommunicators
2173   cannot overlap.
2174 
2175 .keywords: Petsc, initialize, package, PLAPACK
2176 .seealso: PetscInitializePackage(), PetscInitialize()
2177 @*/
2178 PetscErrorCode PETSC_DLLEXPORT PetscPLAPACKInitializePackage(MPI_Comm comm)
2179 {
2180   PetscMPIInt    size;
2181   PetscErrorCode ierr;
2182 
2183   PetscFunctionBegin;
2184   if (!PLA_Initialized(PETSC_NULL)) {
2185 
2186     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2187     Plapack_nprows = 1;
2188     Plapack_npcols = size;
2189 
2190     ierr = PetscOptionsBegin(comm,PETSC_NULL,"PLAPACK Options","Mat");CHKERRQ(ierr);
2191       ierr = PetscOptionsInt("-plapack_nprows","row dimension of 2D processor mesh","None",Plapack_nprows,&Plapack_nprows,PETSC_NULL);CHKERRQ(ierr);
2192       ierr = PetscOptionsInt("-plapack_npcols","column dimension of 2D processor mesh","None",Plapack_npcols,&Plapack_npcols,PETSC_NULL);CHKERRQ(ierr);
2193 #if defined(PETSC_USE_DEBUG)
2194       Plapack_ierror = 3;
2195 #else
2196       Plapack_ierror = 0;
2197 #endif
2198       ierr = PetscOptionsInt("-plapack_ckerror","error checking flag","None",Plapack_ierror,&Plapack_ierror,PETSC_NULL);CHKERRQ(ierr);
2199       if (Plapack_ierror){
2200 	ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_TRUE,PETSC_TRUE,PETSC_FALSE );CHKERRQ(ierr);
2201       } else {
2202 	ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_FALSE,PETSC_FALSE,PETSC_FALSE );CHKERRQ(ierr);
2203       }
2204 
2205       Plapack_nb_alg = 0;
2206       ierr = PetscOptionsInt("-plapack_nb_alg","algorithmic block size","None",Plapack_nb_alg,&Plapack_nb_alg,PETSC_NULL);CHKERRQ(ierr);
2207       if (Plapack_nb_alg) {
2208         ierr = pla_Environ_set_nb_alg (PLA_OP_ALL_ALG,Plapack_nb_alg);CHKERRQ(ierr);
2209       }
2210     PetscOptionsEnd();
2211 
2212     ierr = PLA_Comm_1D_to_2D(comm,Plapack_nprows,Plapack_npcols,&Plapack_comm_2d);CHKERRQ(ierr);
2213     ierr = PLA_Init(Plapack_comm_2d);CHKERRQ(ierr);
2214     ierr = PetscRegisterFinalize(PetscPLAPACKFinalizePackage);CHKERRQ(ierr);
2215   }
2216   PetscFunctionReturn(0);
2217 }
2218 
2219 
2220 #endif
2221