xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 300a7f5b1a2fd87c9550c4ae2ba6f40f37aef7e5)
1 #define PETSCMAT_DLL
2 
3 /*
4    Basic functions for basic parallel dense matrices.
5 */
6 
7 
8 #include "src/mat/impls/dense/mpi/mpidense.h"    /*I   "petscmat.h"  I*/
9 #if defined(PETSC_HAVE_PLAPACK)
10 static PetscMPIInt Plapack_nprows,Plapack_npcols,Plapack_ierror,Plapack_nb_alg;
11 static MPI_Comm Plapack_comm_2d;
12 #endif
13 
14 #undef __FUNCT__
15 #define __FUNCT__ "MatDenseGetLocalMatrix"
16 /*@
17 
18       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
19               matrix that represents the operator. For sequential matrices it returns itself.
20 
21     Input Parameter:
22 .      A - the Seq or MPI dense matrix
23 
24     Output Parameter:
25 .      B - the inner matrix
26 
27     Level: intermediate
28 
29 @*/
30 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
31 {
32   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
33   PetscErrorCode ierr;
34   PetscTruth     flg;
35 
36   PetscFunctionBegin;
37   ierr = PetscTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
38   if (flg) {
39     *B = mat->A;
40   } else {
41     *B = A;
42   }
43   PetscFunctionReturn(0);
44 }
45 
46 #undef __FUNCT__
47 #define __FUNCT__ "MatGetRow_MPIDense"
48 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
49 {
50   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
51   PetscErrorCode ierr;
52   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
53 
54   PetscFunctionBegin;
55   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows")
56   lrow = row - rstart;
57   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr);
58   PetscFunctionReturn(0);
59 }
60 
61 #undef __FUNCT__
62 #define __FUNCT__ "MatRestoreRow_MPIDense"
63 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
64 {
65   PetscErrorCode ierr;
66 
67   PetscFunctionBegin;
68   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
69   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
70   PetscFunctionReturn(0);
71 }
72 
73 EXTERN_C_BEGIN
74 #undef __FUNCT__
75 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense"
76 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B)
77 {
78   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
79   PetscErrorCode ierr;
80   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
81   PetscScalar    *array;
82   MPI_Comm       comm;
83 
84   PetscFunctionBegin;
85   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported.");
86 
87   /* The reuse aspect is not implemented efficiently */
88   if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);}
89 
90   ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
91   ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr);
92   ierr = MatCreate(comm,B);CHKERRQ(ierr);
93   ierr = MatSetSizes(*B,m,m,m,m);CHKERRQ(ierr);
94   ierr = MatSetType(*B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
95   ierr = MatSeqDenseSetPreallocation(*B,array+m*rstart);CHKERRQ(ierr);
96   ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr);
97   ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
98   ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
99 
100   *iscopy = PETSC_TRUE;
101   PetscFunctionReturn(0);
102 }
103 EXTERN_C_END
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatSetValues_MPIDense"
107 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
108 {
109   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
110   PetscErrorCode ierr;
111   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
112   PetscTruth     roworiented = A->roworiented;
113 
114   PetscFunctionBegin;
115   for (i=0; i<m; i++) {
116     if (idxm[i] < 0) continue;
117     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
118     if (idxm[i] >= rstart && idxm[i] < rend) {
119       row = idxm[i] - rstart;
120       if (roworiented) {
121         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
122       } else {
123         for (j=0; j<n; j++) {
124           if (idxn[j] < 0) continue;
125           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
126           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
127         }
128       }
129     } else {
130       if (!A->donotstash) {
131         if (roworiented) {
132           ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr);
133         } else {
134           ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr);
135         }
136       }
137     }
138   }
139   PetscFunctionReturn(0);
140 }
141 
142 #undef __FUNCT__
143 #define __FUNCT__ "MatGetValues_MPIDense"
144 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
145 {
146   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
147   PetscErrorCode ierr;
148   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
149 
150   PetscFunctionBegin;
151   for (i=0; i<m; i++) {
152     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
153     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
154     if (idxm[i] >= rstart && idxm[i] < rend) {
155       row = idxm[i] - rstart;
156       for (j=0; j<n; j++) {
157         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
158         if (idxn[j] >= mat->cmap->N) {
159           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
160         }
161         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
162       }
163     } else {
164       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
165     }
166   }
167   PetscFunctionReturn(0);
168 }
169 
170 #undef __FUNCT__
171 #define __FUNCT__ "MatGetArray_MPIDense"
172 PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[])
173 {
174   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
175   PetscErrorCode ierr;
176 
177   PetscFunctionBegin;
178   ierr = MatGetArray(a->A,array);CHKERRQ(ierr);
179   PetscFunctionReturn(0);
180 }
181 
182 #undef __FUNCT__
183 #define __FUNCT__ "MatGetSubMatrix_MPIDense"
184 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B)
185 {
186   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data,*newmatd;
187   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
188   PetscErrorCode ierr;
189   PetscInt       i,j,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols;
190   PetscScalar    *av,*bv,*v = lmat->v;
191   Mat            newmat;
192 
193   PetscFunctionBegin;
194   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
195   ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr);
196   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
197   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
198 
199   /* No parallel redistribution currently supported! Should really check each index set
200      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
201      original matrix! */
202 
203   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
204   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
205 
206   /* Check submatrix call */
207   if (scall == MAT_REUSE_MATRIX) {
208     /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
209     /* Really need to test rows and column sizes! */
210     newmat = *B;
211   } else {
212     /* Create and fill new matrix */
213     ierr = MatCreate(((PetscObject)A)->comm,&newmat);CHKERRQ(ierr);
214     ierr = MatSetSizes(newmat,nrows,cs,PETSC_DECIDE,ncols);CHKERRQ(ierr);
215     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
216     ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr);
217   }
218 
219   /* Now extract the data pointers and do the copy, column at a time */
220   newmatd = (Mat_MPIDense*)newmat->data;
221   bv      = ((Mat_SeqDense *)newmatd->A->data)->v;
222 
223   for (i=0; i<ncols; i++) {
224     av = v + nlrows*icol[i];
225     for (j=0; j<nrows; j++) {
226       *bv++ = av[irow[j] - rstart];
227     }
228   }
229 
230   /* Assemble the matrices so that the correct flags are set */
231   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
232   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
233 
234   /* Free work space */
235   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
236   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
237   *B = newmat;
238   PetscFunctionReturn(0);
239 }
240 
241 #undef __FUNCT__
242 #define __FUNCT__ "MatRestoreArray_MPIDense"
243 PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[])
244 {
245   PetscFunctionBegin;
246   PetscFunctionReturn(0);
247 }
248 
249 #undef __FUNCT__
250 #define __FUNCT__ "MatAssemblyBegin_MPIDense"
251 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
252 {
253   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
254   MPI_Comm       comm = ((PetscObject)mat)->comm;
255   PetscErrorCode ierr;
256   PetscInt       nstash,reallocs;
257   InsertMode     addv;
258 
259   PetscFunctionBegin;
260   /* make sure all processors are either in INSERTMODE or ADDMODE */
261   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
262   if (addv == (ADD_VALUES|INSERT_VALUES)) {
263     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
264   }
265   mat->insertmode = addv; /* in case this processor had no cache */
266 
267   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
268   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
269   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
270   PetscFunctionReturn(0);
271 }
272 
273 #undef __FUNCT__
274 #define __FUNCT__ "MatAssemblyEnd_MPIDense"
275 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
276 {
277   Mat_MPIDense    *mdn=(Mat_MPIDense*)mat->data;
278   PetscErrorCode  ierr;
279   PetscInt        i,*row,*col,flg,j,rstart,ncols;
280   PetscMPIInt     n;
281   PetscScalar     *val;
282   InsertMode      addv=mat->insertmode;
283 
284   PetscFunctionBegin;
285   /*  wait on receives */
286   while (1) {
287     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
288     if (!flg) break;
289 
290     for (i=0; i<n;) {
291       /* Now identify the consecutive vals belonging to the same row */
292       for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
293       if (j < n) ncols = j-i;
294       else       ncols = n-i;
295       /* Now assemble all these values with a single function call */
296       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
297       i = j;
298     }
299   }
300   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
301 
302   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
303   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
304 
305   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
306     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
307   }
308   PetscFunctionReturn(0);
309 }
310 
311 #undef __FUNCT__
312 #define __FUNCT__ "MatZeroEntries_MPIDense"
313 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
314 {
315   PetscErrorCode ierr;
316   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
317 
318   PetscFunctionBegin;
319   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
320   PetscFunctionReturn(0);
321 }
322 
323 /* the code does not do the diagonal entries correctly unless the
324    matrix is square and the column and row owerships are identical.
325    This is a BUG. The only way to fix it seems to be to access
326    mdn->A and mdn->B directly and not through the MatZeroRows()
327    routine.
328 */
329 #undef __FUNCT__
330 #define __FUNCT__ "MatZeroRows_MPIDense"
331 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
332 {
333   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
334   PetscErrorCode ierr;
335   PetscInt       i,*owners = A->rmap->range;
336   PetscInt       *nprocs,j,idx,nsends;
337   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
338   PetscInt       *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source;
339   PetscInt       *lens,*lrows,*values;
340   PetscMPIInt    n,imdex,rank = l->rank,size = l->size;
341   MPI_Comm       comm = ((PetscObject)A)->comm;
342   MPI_Request    *send_waits,*recv_waits;
343   MPI_Status     recv_status,*send_status;
344   PetscTruth     found;
345 
346   PetscFunctionBegin;
347   /*  first count number of contributors to each processor */
348   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
349   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
350   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
351   for (i=0; i<N; i++) {
352     idx = rows[i];
353     found = PETSC_FALSE;
354     for (j=0; j<size; j++) {
355       if (idx >= owners[j] && idx < owners[j+1]) {
356         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
357       }
358     }
359     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
360   }
361   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
362 
363   /* inform other processors of number of messages and max length*/
364   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
365 
366   /* post receives:   */
367   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
368   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
369   for (i=0; i<nrecvs; i++) {
370     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
371   }
372 
373   /* do sends:
374       1) starts[i] gives the starting index in svalues for stuff going to
375          the ith processor
376   */
377   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
378   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
379   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
380   starts[0]  = 0;
381   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
382   for (i=0; i<N; i++) {
383     svalues[starts[owner[i]]++] = rows[i];
384   }
385 
386   starts[0] = 0;
387   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
388   count = 0;
389   for (i=0; i<size; i++) {
390     if (nprocs[2*i+1]) {
391       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
392     }
393   }
394   ierr = PetscFree(starts);CHKERRQ(ierr);
395 
396   base = owners[rank];
397 
398   /*  wait on receives */
399   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
400   source = lens + nrecvs;
401   count  = nrecvs; slen = 0;
402   while (count) {
403     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
404     /* unpack receives into our local space */
405     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
406     source[imdex]  = recv_status.MPI_SOURCE;
407     lens[imdex]    = n;
408     slen += n;
409     count--;
410   }
411   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
412 
413   /* move the data into the send scatter */
414   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
415   count = 0;
416   for (i=0; i<nrecvs; i++) {
417     values = rvalues + i*nmax;
418     for (j=0; j<lens[i]; j++) {
419       lrows[count++] = values[j] - base;
420     }
421   }
422   ierr = PetscFree(rvalues);CHKERRQ(ierr);
423   ierr = PetscFree(lens);CHKERRQ(ierr);
424   ierr = PetscFree(owner);CHKERRQ(ierr);
425   ierr = PetscFree(nprocs);CHKERRQ(ierr);
426 
427   /* actually zap the local rows */
428   ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
429   ierr = PetscFree(lrows);CHKERRQ(ierr);
430 
431   /* wait on sends */
432   if (nsends) {
433     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
434     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
435     ierr = PetscFree(send_status);CHKERRQ(ierr);
436   }
437   ierr = PetscFree(send_waits);CHKERRQ(ierr);
438   ierr = PetscFree(svalues);CHKERRQ(ierr);
439 
440   PetscFunctionReturn(0);
441 }
442 
443 #undef __FUNCT__
444 #define __FUNCT__ "MatMult_MPIDense"
445 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
446 {
447   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
448   PetscErrorCode ierr;
449 
450   PetscFunctionBegin;
451   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
452   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
453   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
454   PetscFunctionReturn(0);
455 }
456 
457 #undef __FUNCT__
458 #define __FUNCT__ "MatMultAdd_MPIDense"
459 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
460 {
461   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
462   PetscErrorCode ierr;
463 
464   PetscFunctionBegin;
465   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
466   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
467   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
468   PetscFunctionReturn(0);
469 }
470 
471 #undef __FUNCT__
472 #define __FUNCT__ "MatMultTranspose_MPIDense"
473 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
474 {
475   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
476   PetscErrorCode ierr;
477   PetscScalar    zero = 0.0;
478 
479   PetscFunctionBegin;
480   ierr = VecSet(yy,zero);CHKERRQ(ierr);
481   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
482   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
483   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
484   PetscFunctionReturn(0);
485 }
486 
487 #undef __FUNCT__
488 #define __FUNCT__ "MatMultTransposeAdd_MPIDense"
489 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
490 {
491   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
492   PetscErrorCode ierr;
493 
494   PetscFunctionBegin;
495   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
496   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
497   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
498   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
499   PetscFunctionReturn(0);
500 }
501 
502 #undef __FUNCT__
503 #define __FUNCT__ "MatGetDiagonal_MPIDense"
504 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
505 {
506   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
507   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
508   PetscErrorCode ierr;
509   PetscInt       len,i,n,m = A->rmap->n,radd;
510   PetscScalar    *x,zero = 0.0;
511 
512   PetscFunctionBegin;
513   ierr = VecSet(v,zero);CHKERRQ(ierr);
514   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
515   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
516   if (n != A->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
517   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
518   radd = A->rmap->rstart*m;
519   for (i=0; i<len; i++) {
520     x[i] = aloc->v[radd + i*m + i];
521   }
522   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
523   PetscFunctionReturn(0);
524 }
525 
526 #undef __FUNCT__
527 #define __FUNCT__ "MatDestroy_MPIDense"
528 PetscErrorCode MatDestroy_MPIDense(Mat mat)
529 {
530   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
531   PetscErrorCode ierr;
532 #if defined(PETSC_HAVE_PLAPACK)
533   Mat_Plapack   *lu=(Mat_Plapack*)(mat->spptr);
534 #endif
535 
536   PetscFunctionBegin;
537 
538 #if defined(PETSC_USE_LOG)
539   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
540 #endif
541   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
542   ierr = MatDestroy(mdn->A);CHKERRQ(ierr);
543   if (mdn->lvec)   {ierr = VecDestroy(mdn->lvec);CHKERRQ(ierr);}
544   if (mdn->Mvctx)  {ierr = VecScatterDestroy(mdn->Mvctx);CHKERRQ(ierr);}
545 #if defined(PETSC_HAVE_PLAPACK)
546   if (lu) {
547     ierr = PLA_Obj_free(&lu->A);CHKERRQ(ierr);
548     ierr = PLA_Obj_free (&lu->pivots);CHKERRQ(ierr);
549     ierr = PLA_Temp_free(&lu->templ);CHKERRQ(ierr);
550 
551     if (lu->is_pla) {
552       ierr = ISDestroy(lu->is_pla);CHKERRQ(ierr);
553       ierr = ISDestroy(lu->is_petsc);CHKERRQ(ierr);
554       ierr = VecScatterDestroy(lu->ctx);CHKERRQ(ierr);
555     }
556   }
557 #endif
558 
559   ierr = PetscFree(mdn);CHKERRQ(ierr);
560   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
561   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
562   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
563   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
564   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
565   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
566   PetscFunctionReturn(0);
567 }
568 
569 #undef __FUNCT__
570 #define __FUNCT__ "MatView_MPIDense_Binary"
571 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
572 {
573   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
574   PetscErrorCode    ierr;
575   PetscViewerFormat format;
576   int               fd;
577   PetscInt          header[4],mmax,N = mat->cmap->N,i,j,m,k;
578   PetscMPIInt       rank,tag  = ((PetscObject)viewer)->tag,size;
579   PetscScalar       *work,*v,*vv;
580   Mat_SeqDense      *a = (Mat_SeqDense*)mdn->A->data;
581   MPI_Status        status;
582 
583   PetscFunctionBegin;
584   if (mdn->size == 1) {
585     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
586   } else {
587     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
588     ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
589     ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
590 
591     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
592     if (format == PETSC_VIEWER_BINARY_NATIVE) {
593 
594       if (!rank) {
595         /* store the matrix as a dense matrix */
596         header[0] = MAT_FILE_COOKIE;
597         header[1] = mat->rmap->N;
598         header[2] = N;
599         header[3] = MATRIX_BINARY_FORMAT_DENSE;
600         ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
601 
602         /* get largest work array needed for transposing array */
603         mmax = mat->rmap->n;
604         for (i=1; i<size; i++) {
605           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
606         }
607         ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&work);CHKERRQ(ierr);
608 
609         /* write out local array, by rows */
610         m    = mat->rmap->n;
611         v    = a->v;
612         for (j=0; j<N; j++) {
613           for (i=0; i<m; i++) {
614             work[j + i*N] = *v++;
615           }
616         }
617         ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
618         /* get largest work array to receive messages from other processes, excludes process zero */
619         mmax = 0;
620         for (i=1; i<size; i++) {
621           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
622         }
623         ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&vv);CHKERRQ(ierr);
624         for(k = 1; k < size; k++) {
625           v    = vv;
626           m    = mat->rmap->range[k+1] - mat->rmap->range[k];
627           ierr = MPI_Recv(v,m*N,MPIU_SCALAR,k,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
628 
629           for(j = 0; j < N; j++) {
630             for(i = 0; i < m; i++) {
631               work[j + i*N] = *v++;
632             }
633           }
634           ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
635         }
636         ierr = PetscFree(work);CHKERRQ(ierr);
637         ierr = PetscFree(vv);CHKERRQ(ierr);
638       } else {
639         ierr = MPI_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
640       }
641     } else {
642       SETERRQ(PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerSetFormat(viewer,PETSC_VIEWER_BINARY_NATIVE");
643     }
644   }
645   PetscFunctionReturn(0);
646 }
647 
648 #undef __FUNCT__
649 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket"
650 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
651 {
652   Mat_MPIDense          *mdn = (Mat_MPIDense*)mat->data;
653   PetscErrorCode        ierr;
654   PetscMPIInt           size = mdn->size,rank = mdn->rank;
655   const PetscViewerType vtype;
656   PetscTruth            iascii,isdraw;
657   PetscViewer           sviewer;
658   PetscViewerFormat     format;
659 #if defined(PETSC_HAVE_PLAPACK)
660   Mat_Plapack           *lu=(Mat_Plapack*)(mat->spptr);
661 #endif
662 
663   PetscFunctionBegin;
664   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
665   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
666   if (iascii) {
667     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
668     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
669     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
670       MatInfo info;
671       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
672       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,
673                    (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
674       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
675       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
676 #if defined(PETSC_HAVE_PLAPACK)
677       ierr = PetscViewerASCIIPrintf(viewer,"PLAPACK run parameters:\n");CHKERRQ(ierr);
678       ierr = PetscViewerASCIIPrintf(viewer,"  Processor mesh: nprows %d, npcols %d\n",lu->Plapack_nprows, lu->Plapack_npcols);CHKERRQ(ierr);
679       ierr = PetscViewerASCIIPrintf(viewer,"  Distr. block size nb: %d \n",lu->nb);CHKERRQ(ierr);
680       ierr = PetscViewerASCIIPrintf(viewer,"  Error checking: %d\n",lu->Plapack_ierror);CHKERRQ(ierr);
681       ierr = PetscViewerASCIIPrintf(viewer,"  Algorithmic block size: %d\n",lu->Plapack_nb_alg);CHKERRQ(ierr);
682 #endif
683       PetscFunctionReturn(0);
684     } else if (format == PETSC_VIEWER_ASCII_INFO) {
685       PetscFunctionReturn(0);
686     }
687   } else if (isdraw) {
688     PetscDraw  draw;
689     PetscTruth isnull;
690 
691     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
692     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
693     if (isnull) PetscFunctionReturn(0);
694   }
695 
696   if (size == 1) {
697     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
698   } else {
699     /* assemble the entire matrix onto first processor. */
700     Mat         A;
701     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
702     PetscInt    *cols;
703     PetscScalar *vals;
704 
705     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
706     if (!rank) {
707       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
708     } else {
709       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
710     }
711     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
712     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
713     ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL);
714     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
715 
716     /* Copy the matrix ... This isn't the most efficient means,
717        but it's quick for now */
718     A->insertmode = INSERT_VALUES;
719     row = mat->rmap->rstart; m = mdn->A->rmap->n;
720     for (i=0; i<m; i++) {
721       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
722       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
723       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
724       row++;
725     }
726 
727     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
728     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
729     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
730     if (!rank) {
731       ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
732     }
733     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
734     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
735     ierr = MatDestroy(A);CHKERRQ(ierr);
736   }
737   PetscFunctionReturn(0);
738 }
739 
740 #undef __FUNCT__
741 #define __FUNCT__ "MatView_MPIDense"
742 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
743 {
744   PetscErrorCode ierr;
745   PetscTruth     iascii,isbinary,isdraw,issocket;
746 
747   PetscFunctionBegin;
748 
749   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
750   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
751   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
752   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
753 
754   if (iascii || issocket || isdraw) {
755     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
756   } else if (isbinary) {
757     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
758   } else {
759     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name);
760   }
761   PetscFunctionReturn(0);
762 }
763 
764 #undef __FUNCT__
765 #define __FUNCT__ "MatGetInfo_MPIDense"
766 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
767 {
768   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
769   Mat            mdn = mat->A;
770   PetscErrorCode ierr;
771   PetscReal      isend[5],irecv[5];
772 
773   PetscFunctionBegin;
774   info->rows_global    = (double)A->rmap->N;
775   info->columns_global = (double)A->cmap->N;
776   info->rows_local     = (double)A->rmap->n;
777   info->columns_local  = (double)A->cmap->N;
778   info->block_size     = 1.0;
779   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
780   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
781   isend[3] = info->memory;  isend[4] = info->mallocs;
782   if (flag == MAT_LOCAL) {
783     info->nz_used      = isend[0];
784     info->nz_allocated = isend[1];
785     info->nz_unneeded  = isend[2];
786     info->memory       = isend[3];
787     info->mallocs      = isend[4];
788   } else if (flag == MAT_GLOBAL_MAX) {
789     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr);
790     info->nz_used      = irecv[0];
791     info->nz_allocated = irecv[1];
792     info->nz_unneeded  = irecv[2];
793     info->memory       = irecv[3];
794     info->mallocs      = irecv[4];
795   } else if (flag == MAT_GLOBAL_SUM) {
796     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
797     info->nz_used      = irecv[0];
798     info->nz_allocated = irecv[1];
799     info->nz_unneeded  = irecv[2];
800     info->memory       = irecv[3];
801     info->mallocs      = irecv[4];
802   }
803   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
804   info->fill_ratio_needed = 0;
805   info->factor_mallocs    = 0;
806   PetscFunctionReturn(0);
807 }
808 
809 #undef __FUNCT__
810 #define __FUNCT__ "MatSetOption_MPIDense"
811 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscTruth flg)
812 {
813   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
814   PetscErrorCode ierr;
815 
816   PetscFunctionBegin;
817   switch (op) {
818   case MAT_NEW_NONZERO_LOCATIONS:
819   case MAT_NEW_NONZERO_LOCATION_ERR:
820   case MAT_NEW_NONZERO_ALLOCATION_ERR:
821     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
822     break;
823   case MAT_ROW_ORIENTED:
824     a->roworiented = flg;
825     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
826     break;
827   case MAT_NEW_DIAGONALS:
828   case MAT_USE_HASH_TABLE:
829     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
830     break;
831   case MAT_IGNORE_OFF_PROC_ENTRIES:
832     a->donotstash = flg;
833     break;
834   case MAT_SYMMETRIC:
835   case MAT_STRUCTURALLY_SYMMETRIC:
836   case MAT_HERMITIAN:
837   case MAT_SYMMETRY_ETERNAL:
838   case MAT_IGNORE_LOWER_TRIANGULAR:
839     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
840     break;
841   default:
842     SETERRQ1(PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
843   }
844   PetscFunctionReturn(0);
845 }
846 
847 
848 #undef __FUNCT__
849 #define __FUNCT__ "MatDiagonalScale_MPIDense"
850 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
851 {
852   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
853   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
854   PetscScalar    *l,*r,x,*v;
855   PetscErrorCode ierr;
856   PetscInt       i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n;
857 
858   PetscFunctionBegin;
859   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
860   if (ll) {
861     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
862     if (s2a != s2) SETERRQ2(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
863     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
864     for (i=0; i<m; i++) {
865       x = l[i];
866       v = mat->v + i;
867       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
868     }
869     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
870     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
871   }
872   if (rr) {
873     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
874     if (s3a != s3) SETERRQ2(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
875     ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
876     ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
877     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
878     for (i=0; i<n; i++) {
879       x = r[i];
880       v = mat->v + i*m;
881       for (j=0; j<m; j++) { (*v++) *= x;}
882     }
883     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
884     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
885   }
886   PetscFunctionReturn(0);
887 }
888 
889 #undef __FUNCT__
890 #define __FUNCT__ "MatNorm_MPIDense"
891 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
892 {
893   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
894   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
895   PetscErrorCode ierr;
896   PetscInt       i,j;
897   PetscReal      sum = 0.0;
898   PetscScalar    *v = mat->v;
899 
900   PetscFunctionBegin;
901   if (mdn->size == 1) {
902     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
903   } else {
904     if (type == NORM_FROBENIUS) {
905       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
906 #if defined(PETSC_USE_COMPLEX)
907         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
908 #else
909         sum += (*v)*(*v); v++;
910 #endif
911       }
912       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
913       *nrm = sqrt(*nrm);
914       ierr = PetscLogFlops(2*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr);
915     } else if (type == NORM_1) {
916       PetscReal *tmp,*tmp2;
917       ierr = PetscMalloc(2*A->cmap->N*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
918       tmp2 = tmp + A->cmap->N;
919       ierr = PetscMemzero(tmp,2*A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
920       *nrm = 0.0;
921       v = mat->v;
922       for (j=0; j<mdn->A->cmap->n; j++) {
923         for (i=0; i<mdn->A->rmap->n; i++) {
924           tmp[j] += PetscAbsScalar(*v);  v++;
925         }
926       }
927       ierr = MPI_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
928       for (j=0; j<A->cmap->N; j++) {
929         if (tmp2[j] > *nrm) *nrm = tmp2[j];
930       }
931       ierr = PetscFree(tmp);CHKERRQ(ierr);
932       ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr);
933     } else if (type == NORM_INFINITY) { /* max row norm */
934       PetscReal ntemp;
935       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
936       ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr);
937     } else {
938       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
939     }
940   }
941   PetscFunctionReturn(0);
942 }
943 
944 #undef __FUNCT__
945 #define __FUNCT__ "MatTranspose_MPIDense"
946 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
947 {
948   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
949   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
950   Mat            B;
951   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
952   PetscErrorCode ierr;
953   PetscInt       j,i;
954   PetscScalar    *v;
955 
956   PetscFunctionBegin;
957   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place");
958   if (reuse == MAT_INITIAL_MATRIX || A == *matout) {
959     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
960     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
961     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
962     ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr);
963   } else {
964     B = *matout;
965   }
966 
967   m = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v;
968   ierr = PetscMalloc(m*sizeof(PetscInt),&rwork);CHKERRQ(ierr);
969   for (i=0; i<m; i++) rwork[i] = rstart + i;
970   for (j=0; j<n; j++) {
971     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
972     v   += m;
973   }
974   ierr = PetscFree(rwork);CHKERRQ(ierr);
975   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
976   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
977   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
978     *matout = B;
979   } else {
980     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
981   }
982   PetscFunctionReturn(0);
983 }
984 
985 #include "petscblaslapack.h"
986 #undef __FUNCT__
987 #define __FUNCT__ "MatScale_MPIDense"
988 PetscErrorCode MatScale_MPIDense(Mat inA,PetscScalar alpha)
989 {
990   Mat_MPIDense   *A = (Mat_MPIDense*)inA->data;
991   Mat_SeqDense   *a = (Mat_SeqDense*)A->A->data;
992   PetscScalar    oalpha = alpha;
993   PetscErrorCode ierr;
994   PetscBLASInt   one = 1,nz = PetscBLASIntCast(inA->rmap->n*inA->cmap->N);
995 
996   PetscFunctionBegin;
997   BLASscal_(&nz,&oalpha,a->v,&one);
998   ierr = PetscLogFlops(nz);CHKERRQ(ierr);
999   PetscFunctionReturn(0);
1000 }
1001 
1002 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
1003 
1004 #undef __FUNCT__
1005 #define __FUNCT__ "MatSetUpPreallocation_MPIDense"
1006 PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A)
1007 {
1008   PetscErrorCode ierr;
1009 
1010   PetscFunctionBegin;
1011   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
1012   PetscFunctionReturn(0);
1013 }
1014 
1015 #if defined(PETSC_HAVE_PLAPACK)
1016 
1017 #undef __FUNCT__
1018 #define __FUNCT__ "MatMPIDenseCopyToPlapack"
1019 PetscErrorCode MatMPIDenseCopyToPlapack(Mat A,Mat F)
1020 {
1021   Mat_Plapack    *lu = (Mat_Plapack*)(F)->spptr;
1022   PetscErrorCode ierr;
1023   PetscInt       M=A->cmap->N,m=A->rmap->n,rstart;
1024   PetscScalar    *array;
1025   PetscReal      one = 1.0;
1026 
1027   PetscFunctionBegin;
1028   /* Copy A into F->lu->A */
1029   ierr = PLA_Obj_set_to_zero(lu->A);CHKERRQ(ierr);
1030   ierr = PLA_API_begin();CHKERRQ(ierr);
1031   ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr);
1032   ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr);
1033   ierr = MatGetArray(A,&array);CHKERRQ(ierr);
1034   ierr = PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);CHKERRQ(ierr);
1035   ierr = MatRestoreArray(A,&array);CHKERRQ(ierr);
1036   ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr);
1037   ierr = PLA_API_end();CHKERRQ(ierr);
1038   lu->rstart = rstart;
1039   PetscFunctionReturn(0);
1040 }
1041 
1042 #undef __FUNCT__
1043 #define __FUNCT__ "MatMPIDenseCopyFromPlapack"
1044 PetscErrorCode MatMPIDenseCopyFromPlapack(Mat F,Mat A)
1045 {
1046   Mat_Plapack    *lu = (Mat_Plapack*)(F)->spptr;
1047   PetscErrorCode ierr;
1048   PetscInt       M=A->cmap->N,m=A->rmap->n,rstart;
1049   PetscScalar    *array;
1050   PetscReal      one = 1.0;
1051 
1052   PetscFunctionBegin;
1053   /* Copy F into A->lu->A */
1054   ierr = MatZeroEntries(A);CHKERRQ(ierr);
1055   ierr = PLA_API_begin();CHKERRQ(ierr);
1056   ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr);
1057   ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr);
1058   ierr = MatGetArray(A,&array);CHKERRQ(ierr);
1059   ierr = PLA_API_axpy_global_to_matrix(m,M, &one,lu->A,rstart,0,(void *)array,m);CHKERRQ(ierr);
1060   ierr = MatRestoreArray(A,&array);CHKERRQ(ierr);
1061   ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr);
1062   ierr = PLA_API_end();CHKERRQ(ierr);
1063   lu->rstart = rstart;
1064   PetscFunctionReturn(0);
1065 }
1066 
1067 #undef __FUNCT__
1068 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIDense"
1069 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1070 {
1071   PetscErrorCode ierr;
1072   Mat_Plapack    *luA = (Mat_Plapack*)A->spptr;
1073   Mat_Plapack    *luB = (Mat_Plapack*)B->spptr;
1074   Mat_Plapack    *luC = (Mat_Plapack*)C->spptr;
1075   PLA_Obj        alpha = NULL,beta = NULL;
1076 
1077   PetscFunctionBegin;
1078   ierr = MatMPIDenseCopyToPlapack(A,A);CHKERRQ(ierr);
1079   ierr = MatMPIDenseCopyToPlapack(B,B);CHKERRQ(ierr);
1080 
1081   /*
1082   ierr = PLA_Global_show("A = ",luA->A,"%g ","");CHKERRQ(ierr);
1083   ierr = PLA_Global_show("B = ",luB->A,"%g ","");CHKERRQ(ierr);
1084   */
1085 
1086   /* do the multiply in PLA  */
1087   ierr = PLA_Create_constants_conf_to(luA->A,NULL,NULL,&alpha);CHKERRQ(ierr);
1088   ierr = PLA_Create_constants_conf_to(luC->A,NULL,&beta,NULL);CHKERRQ(ierr);
1089   CHKMEMQ;
1090 
1091   ierr = PLA_Gemm(PLA_NO_TRANSPOSE,PLA_NO_TRANSPOSE,alpha,luA->A,luB->A,beta,luC->A); /* CHKERRQ(ierr); */
1092   CHKMEMQ;
1093   ierr = PLA_Obj_free(&alpha);CHKERRQ(ierr);
1094   ierr = PLA_Obj_free(&beta);CHKERRQ(ierr);
1095 
1096   /*
1097   ierr = PLA_Global_show("C = ",luC->A,"%g ","");CHKERRQ(ierr);
1098   */
1099   ierr = MatMPIDenseCopyFromPlapack(C,C);CHKERRQ(ierr);
1100   PetscFunctionReturn(0);
1101 }
1102 
1103 #undef __FUNCT__
1104 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIDense"
1105 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1106 {
1107   PetscErrorCode ierr;
1108   PetscInt       m=A->rmap->n,n=B->cmap->n;
1109   Mat            Cmat;
1110 
1111   PetscFunctionBegin;
1112   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
1113   SETERRQ(PETSC_ERR_LIB,"Due to aparent bugs in PLAPACK,this is not currently supported");
1114   ierr = MatCreate(((PetscObject)B)->comm,&Cmat);CHKERRQ(ierr);
1115   ierr = MatSetSizes(Cmat,m,n,A->rmap->N,B->cmap->N);CHKERRQ(ierr);
1116   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
1117   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1118   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1119   //ierr = MatMPIDenseCreatePlapack(A);CHKERRQ(ierr);
1120   //ierr = MatMPIDenseCreatePlapack(B);CHKERRQ(ierr);
1121   //ierr = MatMPIDenseCreatePlapack(Cmat);CHKERRQ(ierr);
1122   *C = Cmat;
1123   PetscFunctionReturn(0);
1124 }
1125 
1126 #undef __FUNCT__
1127 #define __FUNCT__ "MatMatMult_MPIDense_MPIDense"
1128 PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
1129 {
1130   PetscErrorCode ierr;
1131 
1132   PetscFunctionBegin;
1133   if (scall == MAT_INITIAL_MATRIX){
1134     ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr);
1135   }
1136   ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr);
1137   PetscFunctionReturn(0);
1138 }
1139 
1140 #undef __FUNCT__
1141 #define __FUNCT__ "MatSolve_MPIDense"
1142 PetscErrorCode MatSolve_MPIDense(Mat A,Vec b,Vec x)
1143 {
1144   MPI_Comm       comm = ((PetscObject)A)->comm;
1145   Mat_Plapack    *lu = (Mat_Plapack*)A->spptr;
1146   PetscErrorCode ierr;
1147   PetscInt       M=A->rmap->N,m=A->rmap->n,rstart,i,j,*idx_pla,*idx_petsc,loc_m,loc_stride;
1148   PetscScalar    *array;
1149   PetscReal      one = 1.0;
1150   PetscMPIInt    size,rank,r_rank,r_nproc,c_rank,c_nproc;;
1151   PLA_Obj        v_pla = NULL;
1152   PetscScalar    *loc_buf;
1153   Vec            loc_x;
1154 
1155   PetscFunctionBegin;
1156   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1157   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1158 
1159   /* Create PLAPACK vector objects, then copy b into PLAPACK b */
1160   PLA_Mvector_create(lu->datatype,M,1,lu->templ,PLA_ALIGN_FIRST,&v_pla);
1161   PLA_Obj_set_to_zero(v_pla);
1162 
1163   /* Copy b into rhs_pla */
1164   PLA_API_begin();
1165   PLA_Obj_API_open(v_pla);
1166   ierr = VecGetArray(b,&array);CHKERRQ(ierr);
1167   PLA_API_axpy_vector_to_global(m,&one,(void *)array,1,v_pla,lu->rstart);
1168   ierr = VecRestoreArray(b,&array);CHKERRQ(ierr);
1169   PLA_Obj_API_close(v_pla);
1170   PLA_API_end();
1171 
1172   if (A->factor == MAT_FACTOR_LU){
1173     /* Apply the permutations to the right hand sides */
1174     PLA_Apply_pivots_to_rows (v_pla,lu->pivots);
1175 
1176     /* Solve L y = b, overwriting b with y */
1177     PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_UNIT_DIAG,lu->A,v_pla );
1178 
1179     /* Solve U x = y (=b), overwriting b with x */
1180     PLA_Trsv( PLA_UPPER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla );
1181   } else { /*MAT_FACTOR_CHOLESKY */
1182     PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla);
1183     PLA_Trsv( PLA_LOWER_TRIANGULAR,(lu->datatype == MPI_DOUBLE ? PLA_TRANSPOSE : PLA_CONJUGATE_TRANSPOSE),
1184                                     PLA_NONUNIT_DIAG,lu->A,v_pla);
1185   }
1186 
1187   /* Copy PLAPACK x into Petsc vector x  */
1188   PLA_Obj_local_length(v_pla, &loc_m);
1189   PLA_Obj_local_buffer(v_pla, (void**)&loc_buf);
1190   PLA_Obj_local_stride(v_pla, &loc_stride);
1191   /*
1192     PetscPrintf(PETSC_COMM_SELF," [%d] b - local_m %d local_stride %d, loc_buf: %g %g, nb: %d\n",rank,loc_m,loc_stride,loc_buf[0],loc_buf[(loc_m-1)*loc_stride],lu->nb);
1193   */
1194   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,loc_m*loc_stride,loc_buf,&loc_x);CHKERRQ(ierr);
1195   if (!lu->pla_solved){
1196 
1197     PLA_Temp_comm_row_info(lu->templ,&lu->comm_2d,&r_rank,&r_nproc);
1198     PLA_Temp_comm_col_info(lu->templ,&lu->comm_2d,&c_rank,&c_nproc);
1199     /* printf(" [%d] rank: %d %d, nproc: %d %d\n",rank,r_rank,c_rank,r_nproc,c_nproc); */
1200 
1201     /* Create IS and cts for VecScatterring */
1202     PLA_Obj_local_length(v_pla, &loc_m);
1203     PLA_Obj_local_stride(v_pla, &loc_stride);
1204     ierr = PetscMalloc((2*loc_m+1)*sizeof(PetscInt),&idx_pla);CHKERRQ(ierr);
1205     idx_petsc = idx_pla + loc_m;
1206 
1207     rstart = (r_rank*c_nproc+c_rank)*lu->nb;
1208     for (i=0; i<loc_m; i+=lu->nb){
1209       j = 0;
1210       while (j < lu->nb && i+j < loc_m){
1211         idx_petsc[i+j] = rstart + j; j++;
1212       }
1213       rstart += size*lu->nb;
1214     }
1215 
1216     for (i=0; i<loc_m; i++) idx_pla[i] = i*loc_stride;
1217 
1218     ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_pla,&lu->is_pla);CHKERRQ(ierr);
1219     ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_petsc,&lu->is_petsc);CHKERRQ(ierr);
1220     ierr = PetscFree(idx_pla);CHKERRQ(ierr);
1221     ierr = VecScatterCreate(loc_x,lu->is_pla,x,lu->is_petsc,&lu->ctx);CHKERRQ(ierr);
1222   }
1223   ierr = VecScatterBegin(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1224   ierr = VecScatterEnd(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1225 
1226   /* Free data */
1227   ierr = VecDestroy(loc_x);CHKERRQ(ierr);
1228   PLA_Obj_free(&v_pla);
1229 
1230   lu->pla_solved = PETSC_TRUE;
1231   PetscFunctionReturn(0);
1232 }
1233 
1234 #undef __FUNCT__
1235 #define __FUNCT__ "MatLUFactorNumeric_MPIDense"
1236 PetscErrorCode MatLUFactorNumeric_MPIDense(Mat A,MatFactorInfo *info,Mat *F)
1237 {
1238   Mat_Plapack    *lu = (Mat_Plapack*)(*F)->spptr;
1239   PetscErrorCode ierr;
1240   PetscInt       M=A->rmap->N,m=A->rmap->n,rstart,rend;
1241   PetscInt       info_pla=0;
1242   PetscScalar    *array,one = 1.0;
1243 
1244   PetscFunctionBegin;
1245   if (lu->mstruct == SAME_NONZERO_PATTERN){
1246     PLA_Obj_free(&lu->A);
1247     PLA_Obj_free (&lu->pivots);
1248   }
1249   /* Create PLAPACK matrix object */
1250   lu->A = NULL; lu->pivots = NULL;
1251   PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);
1252   PLA_Obj_set_to_zero(lu->A);
1253   PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots);
1254 
1255   /* Copy A into lu->A */
1256   PLA_API_begin();
1257   PLA_Obj_API_open(lu->A);
1258   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
1259   ierr = MatGetArray(A,&array);CHKERRQ(ierr);
1260   PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);
1261   ierr = MatRestoreArray(A,&array);CHKERRQ(ierr);
1262   PLA_Obj_API_close(lu->A);
1263   PLA_API_end();
1264 
1265   /* Factor P A -> L U overwriting lower triangular portion of A with L, upper, U */
1266   info_pla = PLA_LU(lu->A,lu->pivots);
1267   if (info_pla != 0)
1268     SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot encountered at row %d from PLA_LU()",info_pla);
1269 
1270   lu->CleanUpPlapack = PETSC_TRUE;
1271   lu->rstart         = rstart;
1272   lu->mstruct        = SAME_NONZERO_PATTERN;
1273 
1274   (*F)->assembled    = PETSC_TRUE;  /* required by -ksp_view */
1275   PetscFunctionReturn(0);
1276 }
1277 
1278 #undef __FUNCT__
1279 #define __FUNCT__ "MatCholeskyFactorNumeric_MPIDense"
1280 PetscErrorCode MatCholeskyFactorNumeric_MPIDense(Mat A,MatFactorInfo *info,Mat *F)
1281 {
1282   Mat_Plapack    *lu = (Mat_Plapack*)(*F)->spptr;
1283   PetscErrorCode ierr;
1284   PetscInt       M=A->rmap->N,m=A->rmap->n,rstart,rend;
1285   PetscInt       info_pla=0;
1286   PetscScalar    *array,one = 1.0;
1287 
1288   PetscFunctionBegin;
1289   if (lu->mstruct == SAME_NONZERO_PATTERN){
1290     PLA_Obj_free(&lu->A);
1291   }
1292   /* Create PLAPACK matrix object */
1293   lu->A      = NULL;
1294   lu->pivots = NULL;
1295   PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);
1296 
1297   /* Copy A into lu->A */
1298   PLA_API_begin();
1299   PLA_Obj_API_open(lu->A);
1300   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
1301   ierr = MatGetArray(A,&array);CHKERRQ(ierr);
1302   PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);
1303   ierr = MatRestoreArray(A,&array);CHKERRQ(ierr);
1304   PLA_Obj_API_close(lu->A);
1305   PLA_API_end();
1306 
1307   /* Factor P A -> Chol */
1308   info_pla = PLA_Chol(PLA_LOWER_TRIANGULAR,lu->A);
1309   if (info_pla != 0)
1310     SETERRQ1( PETSC_ERR_MAT_CH_ZRPVT,"Nonpositive definite matrix detected at row %d from PLA_Chol()",info_pla);
1311 
1312   lu->CleanUpPlapack = PETSC_TRUE;
1313   lu->rstart         = rstart;
1314   lu->mstruct        = SAME_NONZERO_PATTERN;
1315 
1316   (*F)->assembled    = PETSC_TRUE;  /* required by -ksp_view */
1317   PetscFunctionReturn(0);
1318 }
1319 
1320 #undef __FUNCT__
1321 #define __FUNCT__ "MatFactorSymbolic_Plapack_Private"
1322 PetscErrorCode MatFactorSymbolic_Plapack_Private(Mat A,MatFactorInfo *info,Mat *F)
1323 {
1324   Mat            B = *F;
1325   Mat_Plapack    *lu;
1326   PetscErrorCode ierr;
1327   PetscInt       M=A->rmap->N;
1328   MPI_Comm       comm=((PetscObject)A)->comm,comm_2d;
1329   PetscMPIInt    size;
1330   PetscInt       ierror;
1331 
1332   PetscFunctionBegin;
1333   lu = (Mat_Plapack*)(B->spptr);
1334 
1335   /* Set default Plapack parameters */
1336   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1337   lu->nprows = 1; lu->npcols = size;
1338   ierror = 0;
1339   lu->nb     = M/size;
1340   if (M - lu->nb*size) lu->nb++; /* without cyclic distribution */
1341 
1342   /* Set runtime options */
1343   ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"PLAPACK Options","Mat");CHKERRQ(ierr);
1344   ierr = PetscOptionsInt("-mat_plapack_nprows","row dimension of 2D processor mesh","None",lu->nprows,&lu->nprows,PETSC_NULL);CHKERRQ(ierr);
1345   ierr = PetscOptionsInt("-mat_plapack_npcols","column dimension of 2D processor mesh","None",lu->npcols,&lu->npcols,PETSC_NULL);CHKERRQ(ierr);
1346 
1347   ierr = PetscOptionsInt("-mat_plapack_nb","block size of template vector","None",lu->nb,&lu->nb,PETSC_NULL);CHKERRQ(ierr);
1348   ierr = PetscOptionsInt("-mat_plapack_ckerror","error checking flag","None",ierror,&ierror,PETSC_NULL);CHKERRQ(ierr);
1349   if (ierror){
1350     PLA_Set_error_checking(ierror,PETSC_TRUE,PETSC_TRUE,PETSC_FALSE );
1351   } else {
1352     PLA_Set_error_checking(ierror,PETSC_FALSE,PETSC_FALSE,PETSC_FALSE );
1353   }
1354   lu->ierror = ierror;
1355 
1356   lu->nb_alg = 0;
1357   ierr = PetscOptionsInt("-mat_plapack_nb_alg","algorithmic block size","None",lu->nb_alg,&lu->nb_alg,PETSC_NULL);CHKERRQ(ierr);
1358   if (lu->nb_alg){
1359     pla_Environ_set_nb_alg (PLA_OP_ALL_ALG,lu->nb_alg);
1360   }
1361   PetscOptionsEnd();
1362 
1363 
1364   /* Create a 2D communicator */
1365   PLA_Comm_1D_to_2D(comm,lu->nprows,lu->npcols,&comm_2d);
1366   lu->comm_2d = comm_2d;
1367 
1368   /* Initialize PLAPACK */
1369   PLA_Init(comm_2d);
1370 
1371   /* Create object distribution template */
1372   lu->templ = NULL;
1373   PLA_Temp_create(lu->nb, 0, &lu->templ);
1374 
1375   /* Use suggested nb_alg if it is not provided by user */
1376   if (lu->nb_alg == 0){
1377     PLA_Environ_nb_alg(PLA_OP_PAN_PAN,lu->templ,&lu->nb_alg);
1378     pla_Environ_set_nb_alg(PLA_OP_ALL_ALG,lu->nb_alg);
1379   }
1380 
1381   /* Set the datatype */
1382 #if defined(PETSC_USE_COMPLEX)
1383   lu->datatype = MPI_DOUBLE_COMPLEX;
1384 #else
1385   lu->datatype = MPI_DOUBLE;
1386 #endif
1387 
1388   lu->pla_solved     = PETSC_FALSE; /* MatSolve_Plapack() is called yet */
1389   lu->mstruct        = DIFFERENT_NONZERO_PATTERN;
1390   lu->CleanUpPlapack = PETSC_TRUE;
1391   *F                 = B;
1392   PetscFunctionReturn(0);
1393 }
1394 
1395 /* Note the Petsc perm permutation is ignored */
1396 #undef __FUNCT__
1397 #define __FUNCT__ "MatCholeskyFactorSymbolic_MPIDense"
1398 PetscErrorCode MatCholeskyFactorSymbolic_MPIDense(Mat A,IS perm,MatFactorInfo *info,Mat *F)
1399 {
1400   PetscErrorCode ierr;
1401   PetscTruth     issymmetric,set;
1402 
1403   PetscFunctionBegin;
1404   ierr = MatIsSymmetricKnown(A,&set,&issymmetric); CHKERRQ(ierr);
1405   if (!set || !issymmetric) SETERRQ(PETSC_ERR_USER,"Matrix must be set as MAT_SYMMETRIC for CholeskyFactor()");
1406   ierr = MatFactorSymbolic_Plapack_Private(A,info,F);CHKERRQ(ierr);
1407   (*F)->factor = MAT_FACTOR_CHOLESKY;
1408   PetscFunctionReturn(0);
1409 }
1410 
1411 /* Note the Petsc r and c permutations are ignored */
1412 #undef __FUNCT__
1413 #define __FUNCT__ "MatLUFactorSymbolic_MPIDense"
1414 PetscErrorCode MatLUFactorSymbolic_MPIDense(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F)
1415 {
1416   PetscErrorCode ierr;
1417   PetscInt       M = A->rmap->N;
1418   Mat_Plapack    *lu;
1419 
1420   PetscFunctionBegin;
1421   ierr = MatFactorSymbolic_Plapack_Private(A,info,F);CHKERRQ(ierr);
1422   lu = (Mat_Plapack*)(*F)->spptr;
1423   ierr = PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots);CHKERRQ(ierr);
1424   (*F)->factor = MAT_FACTOR_LU;
1425   PetscFunctionReturn(0);
1426 }
1427 
1428 #undef __FUNCT__
1429 #define __FUNCT__ "MatGetFactor_mpidense_plapack"
1430 PetscErrorCode MatGetFactor_mpidense_plapack(Mat A,MatFactorType ftype,Mat *F)
1431 {
1432   PetscErrorCode ierr;
1433   Mat_Plapack    *lu;
1434   PetscMPIInt    size;
1435   PetscInt       M;
1436 
1437   PetscFunctionBegin;
1438   /* Create the factorization matrix */
1439   ierr = MatCreate(((PetscObject)A)->comm,F);CHKERRQ(ierr);
1440   ierr = MatSetSizes(*F,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1441   ierr = MatSetType(*F,((PetscObject)A)->type_name);CHKERRQ(ierr);
1442   ierr = PetscNewLog(A,Mat_Plapack,&lu);CHKERRQ(ierr);
1443   A->spptr = (void*)lu;
1444 
1445   lu = (Mat_Plapack*)(A->spptr);
1446 
1447   /* Set default Plapack parameters */
1448   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
1449   lu->nb     = M/size;
1450   if (M - lu->nb*size) lu->nb++; /* without cyclic distribution */
1451 
1452   /* Set runtime options */
1453   ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"PLAPACK Options","Mat");CHKERRQ(ierr);
1454     ierr = PetscOptionsInt("-mat_plapack_nb","block size of template vector","None",lu->nb,&lu->nb,PETSC_NULL);CHKERRQ(ierr);
1455   PetscOptionsEnd();
1456 
1457   /* Create object distribution template */
1458   lu->templ = NULL;
1459   ierr = PLA_Temp_create(lu->nb, 0, &lu->templ);CHKERRQ(ierr);
1460 
1461   /* Set the datatype */
1462 #if defined(PETSC_USE_COMPLEX)
1463   lu->datatype = MPI_DOUBLE_COMPLEX;
1464 #else
1465   lu->datatype = MPI_DOUBLE;
1466 #endif
1467 
1468   ierr = PLA_Matrix_create(lu->datatype,M,A->cmap->N,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);CHKERRQ(ierr);
1469 
1470 
1471   lu->pla_solved     = PETSC_FALSE; /* MatSolve_Plapack() is called yet */
1472 
1473   if (ftype == MAT_FACTOR_LU) {
1474     (*F)->ops->lufactorsymbolic = MatLUFactorSymbolic_MPIDense;
1475     (*F)->ops->lufactornumeric  = MatLUFactorNumeric_MPIDense;
1476     (*F)->ops->solve            = MatSolve_MPIDense;
1477   } else if (ftype == MAT_FACTOR_CHOLESKY) {
1478     (*F)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MPIDense;
1479     (*F)->ops->choleskyfactornumeric  = MatCholeskyFactorNumeric_MPIDense;
1480     (*F)->ops->solve                  = MatSolve_MPIDense;
1481   } else SETERRQ(PETSC_ERR_SUP,"No incomplete factorizations for dense matrices");
1482 
1483   PetscFunctionReturn(0);
1484 }
1485 #endif
1486 
1487 /* -------------------------------------------------------------------*/
1488 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
1489        MatGetRow_MPIDense,
1490        MatRestoreRow_MPIDense,
1491        MatMult_MPIDense,
1492 /* 4*/ MatMultAdd_MPIDense,
1493        MatMultTranspose_MPIDense,
1494        MatMultTransposeAdd_MPIDense,
1495 #if defined(PETSC_HAVE_PLAPACK)
1496        MatSolve_MPIDense,
1497 #else
1498        0,
1499 #endif
1500        0,
1501        0,
1502 /*10*/ 0,
1503        0,
1504        0,
1505        0,
1506        MatTranspose_MPIDense,
1507 /*15*/ MatGetInfo_MPIDense,
1508        MatEqual_MPIDense,
1509        MatGetDiagonal_MPIDense,
1510        MatDiagonalScale_MPIDense,
1511        MatNorm_MPIDense,
1512 /*20*/ MatAssemblyBegin_MPIDense,
1513        MatAssemblyEnd_MPIDense,
1514        0,
1515        MatSetOption_MPIDense,
1516        MatZeroEntries_MPIDense,
1517 /*25*/ MatZeroRows_MPIDense,
1518 #if defined(PETSC_HAVE_PLAPACK)
1519        MatLUFactorSymbolic_MPIDense,
1520        MatLUFactorNumeric_MPIDense,
1521        MatCholeskyFactorSymbolic_MPIDense,
1522        MatCholeskyFactorNumeric_MPIDense,
1523 #else
1524        0,
1525        0,
1526        0,
1527        0,
1528 #endif
1529 /*30*/ MatSetUpPreallocation_MPIDense,
1530        0,
1531        0,
1532        MatGetArray_MPIDense,
1533        MatRestoreArray_MPIDense,
1534 /*35*/ MatDuplicate_MPIDense,
1535        0,
1536        0,
1537        0,
1538        0,
1539 /*40*/ 0,
1540        MatGetSubMatrices_MPIDense,
1541        0,
1542        MatGetValues_MPIDense,
1543        0,
1544 /*45*/ 0,
1545        MatScale_MPIDense,
1546        0,
1547        0,
1548        0,
1549 /*50*/ 0,
1550        0,
1551        0,
1552        0,
1553        0,
1554 /*55*/ 0,
1555        0,
1556        0,
1557        0,
1558        0,
1559 /*60*/ MatGetSubMatrix_MPIDense,
1560        MatDestroy_MPIDense,
1561        MatView_MPIDense,
1562        0,
1563        0,
1564 /*65*/ 0,
1565        0,
1566        0,
1567        0,
1568        0,
1569 /*70*/ 0,
1570        0,
1571        0,
1572        0,
1573        0,
1574 /*75*/ 0,
1575        0,
1576        0,
1577        0,
1578        0,
1579 /*80*/ 0,
1580        0,
1581        0,
1582        0,
1583 /*84*/ MatLoad_MPIDense,
1584        0,
1585        0,
1586        0,
1587        0,
1588        0,
1589 /*90*/
1590 #if defined(PETSC_HAVE_PLAPACK)
1591        MatMatMult_MPIDense_MPIDense,
1592        MatMatMultSymbolic_MPIDense_MPIDense,
1593        MatMatMultNumeric_MPIDense_MPIDense,
1594 #else
1595        0,
1596        0,
1597        0,
1598 #endif
1599        0,
1600 /*95*/ 0,
1601        0,
1602        0,
1603        0};
1604 
1605 EXTERN_C_BEGIN
1606 #undef __FUNCT__
1607 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense"
1608 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1609 {
1610   Mat_MPIDense   *a;
1611   PetscErrorCode ierr;
1612 
1613   PetscFunctionBegin;
1614   mat->preallocated = PETSC_TRUE;
1615   /* Note:  For now, when data is specified above, this assumes the user correctly
1616    allocates the local dense storage space.  We should add error checking. */
1617 
1618   a    = (Mat_MPIDense*)mat->data;
1619   ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1620   ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr);
1621   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
1622   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1623   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1624   PetscFunctionReturn(0);
1625 }
1626 EXTERN_C_END
1627 
1628 /*MC
1629    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1630 
1631    Options Database Keys:
1632 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions()
1633 
1634   Level: beginner
1635 
1636   MATMPIDENSE matrices may use direct solvers (LU, Cholesky, and QR)
1637   for parallel dense matrices via the external package PLAPACK, if PLAPACK is installed
1638   (run config/configure.py with the option --download-plapack)
1639 
1640 
1641   Options Database Keys:
1642 . -mat_plapack_nprows <n> - number of rows in processor partition
1643 . -mat_plapack_npcols <n> - number of columns in processor partition
1644 . -mat_plapack_nb <n> - block size of template vector
1645 . -mat_plapack_nb_alg <n> - algorithmic block size
1646 - -mat_plapack_ckerror <n> - error checking flag
1647 
1648 .seealso: MatCreateMPIDense(), MATDENSE, MATSEQDENSE
1649 M*/
1650 
1651 EXTERN_C_BEGIN
1652 #undef __FUNCT__
1653 #define __FUNCT__ "MatCreate_MPIDense"
1654 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIDense(Mat mat)
1655 {
1656   Mat_MPIDense   *a;
1657   PetscErrorCode ierr;
1658 
1659   PetscFunctionBegin;
1660   ierr              = PetscNewLog(mat,Mat_MPIDense,&a);CHKERRQ(ierr);
1661   mat->data         = (void*)a;
1662   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1663   mat->mapping      = 0;
1664 
1665   mat->insertmode = NOT_SET_VALUES;
1666   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&a->rank);CHKERRQ(ierr);
1667   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&a->size);CHKERRQ(ierr);
1668 
1669   mat->rmap->bs = mat->cmap->bs = 1;
1670   ierr = PetscMapSetUp(mat->rmap);CHKERRQ(ierr);
1671   ierr = PetscMapSetUp(mat->cmap);CHKERRQ(ierr);
1672   a->nvec = mat->cmap->n;
1673 
1674   /* build cache for off array entries formed */
1675   a->donotstash = PETSC_FALSE;
1676   ierr = MatStashCreate_Private(((PetscObject)mat)->comm,1,&mat->stash);CHKERRQ(ierr);
1677 
1678   /* stuff used for matrix vector multiply */
1679   a->lvec        = 0;
1680   a->Mvctx       = 0;
1681   a->roworiented = PETSC_TRUE;
1682 
1683   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
1684                                      "MatGetDiagonalBlock_MPIDense",
1685                                      MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr);
1686   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C",
1687                                      "MatMPIDenseSetPreallocation_MPIDense",
1688                                      MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1689   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",
1690                                      "MatMatMult_MPIAIJ_MPIDense",
1691                                       MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1692   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",
1693                                      "MatMatMultSymbolic_MPIAIJ_MPIDense",
1694                                       MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1695   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",
1696                                      "MatMatMultNumeric_MPIAIJ_MPIDense",
1697                                       MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1698 #if defined(PETSC_HAVE_PLAPACK)
1699   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetFactor_mpidense_plapack_C",
1700                                      "MatGetFactor_mpidense_plapack",
1701                                       MatGetFactor_mpidense_plapack);CHKERRQ(ierr);
1702 #endif
1703   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1704 
1705   PetscFunctionReturn(0);
1706 }
1707 EXTERN_C_END
1708 
1709 /*MC
1710    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1711 
1712    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1713    and MATMPIDENSE otherwise.
1714 
1715    Options Database Keys:
1716 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1717 
1718   Level: beginner
1719 
1720 
1721 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1722 M*/
1723 
1724 EXTERN_C_BEGIN
1725 #undef __FUNCT__
1726 #define __FUNCT__ "MatCreate_Dense"
1727 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_Dense(Mat A)
1728 {
1729   PetscErrorCode ierr;
1730   PetscMPIInt    size;
1731 
1732   PetscFunctionBegin;
1733   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
1734   if (size == 1) {
1735     ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr);
1736   } else {
1737     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
1738   }
1739   PetscFunctionReturn(0);
1740 }
1741 EXTERN_C_END
1742 
1743 #undef __FUNCT__
1744 #define __FUNCT__ "MatMPIDenseSetPreallocation"
1745 /*@C
1746    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1747 
1748    Not collective
1749 
1750    Input Parameters:
1751 .  A - the matrix
1752 -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
1753    to control all matrix memory allocation.
1754 
1755    Notes:
1756    The dense format is fully compatible with standard Fortran 77
1757    storage by columns.
1758 
1759    The data input variable is intended primarily for Fortran programmers
1760    who wish to allocate their own matrix memory space.  Most users should
1761    set data=PETSC_NULL.
1762 
1763    Level: intermediate
1764 
1765 .keywords: matrix,dense, parallel
1766 
1767 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1768 @*/
1769 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data)
1770 {
1771   PetscErrorCode ierr,(*f)(Mat,PetscScalar *);
1772 
1773   PetscFunctionBegin;
1774   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1775   if (f) {
1776     ierr = (*f)(mat,data);CHKERRQ(ierr);
1777   }
1778   PetscFunctionReturn(0);
1779 }
1780 
1781 #undef __FUNCT__
1782 #define __FUNCT__ "MatCreateMPIDense"
1783 /*@C
1784    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
1785 
1786    Collective on MPI_Comm
1787 
1788    Input Parameters:
1789 +  comm - MPI communicator
1790 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1791 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1792 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1793 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1794 -  data - optional location of matrix data.  Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1795    to control all matrix memory allocation.
1796 
1797    Output Parameter:
1798 .  A - the matrix
1799 
1800    Notes:
1801    The dense format is fully compatible with standard Fortran 77
1802    storage by columns.
1803 
1804    The data input variable is intended primarily for Fortran programmers
1805    who wish to allocate their own matrix memory space.  Most users should
1806    set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users).
1807 
1808    The user MUST specify either the local or global matrix dimensions
1809    (possibly both).
1810 
1811    Level: intermediate
1812 
1813 .keywords: matrix,dense, parallel
1814 
1815 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1816 @*/
1817 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1818 {
1819   PetscErrorCode ierr;
1820   PetscMPIInt    size;
1821 
1822   PetscFunctionBegin;
1823   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1824   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1825   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1826   if (size > 1) {
1827     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1828     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1829   } else {
1830     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1831     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1832   }
1833   PetscFunctionReturn(0);
1834 }
1835 
1836 #undef __FUNCT__
1837 #define __FUNCT__ "MatDuplicate_MPIDense"
1838 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1839 {
1840   Mat            mat;
1841   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
1842   PetscErrorCode ierr;
1843 
1844   PetscFunctionBegin;
1845   *newmat       = 0;
1846   ierr = MatCreate(((PetscObject)A)->comm,&mat);CHKERRQ(ierr);
1847   ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1848   ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1849   a                 = (Mat_MPIDense*)mat->data;
1850   ierr              = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1851   mat->factor       = A->factor;
1852   mat->assembled    = PETSC_TRUE;
1853   mat->preallocated = PETSC_TRUE;
1854 
1855   mat->rmap->rstart     = A->rmap->rstart;
1856   mat->rmap->rend       = A->rmap->rend;
1857   a->size              = oldmat->size;
1858   a->rank              = oldmat->rank;
1859   mat->insertmode      = NOT_SET_VALUES;
1860   a->nvec              = oldmat->nvec;
1861   a->donotstash        = oldmat->donotstash;
1862 
1863   ierr = PetscMemcpy(mat->rmap->range,A->rmap->range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
1864   ierr = PetscMemcpy(mat->cmap->range,A->cmap->range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
1865   ierr = MatStashCreate_Private(((PetscObject)A)->comm,1,&mat->stash);CHKERRQ(ierr);
1866 
1867   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1868   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1869   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1870 
1871 #if defined(PETSC_HAVE_PLAPACK)
1872   ierr = PetscMemcpy(mat->spptr,A->spptr,sizeof(Mat_Plapack));CHKERRQ(ierr);
1873 #endif
1874   *newmat = mat;
1875   PetscFunctionReturn(0);
1876 }
1877 
1878 #include "petscsys.h"
1879 
1880 #undef __FUNCT__
1881 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile"
1882 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N, const MatType type,Mat *newmat)
1883 {
1884   PetscErrorCode ierr;
1885   PetscMPIInt    rank,size;
1886   PetscInt       *rowners,i,m,nz,j;
1887   PetscScalar    *array,*vals,*vals_ptr;
1888   MPI_Status     status;
1889 
1890   PetscFunctionBegin;
1891   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1892   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1893 
1894   /* determine ownership of all rows */
1895   m          = M/size + ((M % size) > rank);
1896   ierr       = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1897   ierr       = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
1898   rowners[0] = 0;
1899   for (i=2; i<=size; i++) {
1900     rowners[i] += rowners[i-1];
1901   }
1902 
1903   ierr = MatCreate(comm,newmat);CHKERRQ(ierr);
1904   ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1905   ierr = MatSetType(*newmat,type);CHKERRQ(ierr);
1906   ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr);
1907   ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr);
1908 
1909   if (!rank) {
1910     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1911 
1912     /* read in my part of the matrix numerical values  */
1913     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1914 
1915     /* insert into matrix-by row (this is why cannot directly read into array */
1916     vals_ptr = vals;
1917     for (i=0; i<m; i++) {
1918       for (j=0; j<N; j++) {
1919         array[i + j*m] = *vals_ptr++;
1920       }
1921     }
1922 
1923     /* read in other processors and ship out */
1924     for (i=1; i<size; i++) {
1925       nz   = (rowners[i+1] - rowners[i])*N;
1926       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1927       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(*newmat))->tag,comm);CHKERRQ(ierr);
1928     }
1929   } else {
1930     /* receive numeric values */
1931     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1932 
1933     /* receive message of values*/
1934     ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(*newmat))->tag,comm,&status);CHKERRQ(ierr);
1935 
1936     /* insert into matrix-by row (this is why cannot directly read into array */
1937     vals_ptr = vals;
1938     for (i=0; i<m; i++) {
1939       for (j=0; j<N; j++) {
1940         array[i + j*m] = *vals_ptr++;
1941       }
1942     }
1943   }
1944   ierr = PetscFree(rowners);CHKERRQ(ierr);
1945   ierr = PetscFree(vals);CHKERRQ(ierr);
1946   ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1947   ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1948   PetscFunctionReturn(0);
1949 }
1950 
1951 #undef __FUNCT__
1952 #define __FUNCT__ "MatLoad_MPIDense"
1953 PetscErrorCode MatLoad_MPIDense(PetscViewer viewer,const MatType type,Mat *newmat)
1954 {
1955   Mat            A;
1956   PetscScalar    *vals,*svals;
1957   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1958   MPI_Status     status;
1959   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz;
1960   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1961   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1962   PetscInt       i,nz,j,rstart,rend;
1963   int            fd;
1964   PetscErrorCode ierr;
1965 
1966   PetscFunctionBegin;
1967   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1968   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1969   if (!rank) {
1970     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1971     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1972     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1973   }
1974 
1975   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1976   M = header[1]; N = header[2]; nz = header[3];
1977 
1978   /*
1979        Handle case where matrix is stored on disk as a dense matrix
1980   */
1981   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1982     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,type,newmat);CHKERRQ(ierr);
1983     PetscFunctionReturn(0);
1984   }
1985 
1986   /* determine ownership of all rows */
1987   m          = PetscMPIIntCast(M/size + ((M % size) > rank));
1988   ierr       = PetscMalloc((size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr);
1989   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1990   rowners[0] = 0;
1991   for (i=2; i<=size; i++) {
1992     rowners[i] += rowners[i-1];
1993   }
1994   rstart = rowners[rank];
1995   rend   = rowners[rank+1];
1996 
1997   /* distribute row lengths to all processors */
1998   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&ourlens);CHKERRQ(ierr);
1999   offlens = ourlens + (rend-rstart);
2000   if (!rank) {
2001     ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2002     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2003     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
2004     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
2005     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
2006     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2007   } else {
2008     ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
2009   }
2010 
2011   if (!rank) {
2012     /* calculate the number of nonzeros on each processor */
2013     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2014     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2015     for (i=0; i<size; i++) {
2016       for (j=rowners[i]; j< rowners[i+1]; j++) {
2017         procsnz[i] += rowlengths[j];
2018       }
2019     }
2020     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2021 
2022     /* determine max buffer needed and allocate it */
2023     maxnz = 0;
2024     for (i=0; i<size; i++) {
2025       maxnz = PetscMax(maxnz,procsnz[i]);
2026     }
2027     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2028 
2029     /* read in my part of the matrix column indices  */
2030     nz = procsnz[0];
2031     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2032     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2033 
2034     /* read in every one elses and ship off */
2035     for (i=1; i<size; i++) {
2036       nz   = procsnz[i];
2037       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2038       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2039     }
2040     ierr = PetscFree(cols);CHKERRQ(ierr);
2041   } else {
2042     /* determine buffer space needed for message */
2043     nz = 0;
2044     for (i=0; i<m; i++) {
2045       nz += ourlens[i];
2046     }
2047     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2048 
2049     /* receive message of column indices*/
2050     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2051     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2052     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2053   }
2054 
2055   /* loop over local rows, determining number of off diagonal entries */
2056   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2057   jj = 0;
2058   for (i=0; i<m; i++) {
2059     for (j=0; j<ourlens[i]; j++) {
2060       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
2061       jj++;
2062     }
2063   }
2064 
2065   /* create our matrix */
2066   for (i=0; i<m; i++) {
2067     ourlens[i] -= offlens[i];
2068   }
2069   ierr = MatCreate(comm,newmat);CHKERRQ(ierr);
2070   ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
2071   ierr = MatSetType(*newmat,type);CHKERRQ(ierr);
2072   ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr);
2073   A = *newmat;
2074   for (i=0; i<m; i++) {
2075     ourlens[i] += offlens[i];
2076   }
2077 
2078   if (!rank) {
2079     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2080 
2081     /* read in my part of the matrix numerical values  */
2082     nz = procsnz[0];
2083     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2084 
2085     /* insert into matrix */
2086     jj      = rstart;
2087     smycols = mycols;
2088     svals   = vals;
2089     for (i=0; i<m; i++) {
2090       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2091       smycols += ourlens[i];
2092       svals   += ourlens[i];
2093       jj++;
2094     }
2095 
2096     /* read in other processors and ship out */
2097     for (i=1; i<size; i++) {
2098       nz   = procsnz[i];
2099       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2100       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
2101     }
2102     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2103   } else {
2104     /* receive numeric values */
2105     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2106 
2107     /* receive message of values*/
2108     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
2109     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2110     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2111 
2112     /* insert into matrix */
2113     jj      = rstart;
2114     smycols = mycols;
2115     svals   = vals;
2116     for (i=0; i<m; i++) {
2117       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2118       smycols += ourlens[i];
2119       svals   += ourlens[i];
2120       jj++;
2121     }
2122   }
2123   ierr = PetscFree(ourlens);CHKERRQ(ierr);
2124   ierr = PetscFree(vals);CHKERRQ(ierr);
2125   ierr = PetscFree(mycols);CHKERRQ(ierr);
2126   ierr = PetscFree(rowners);CHKERRQ(ierr);
2127 
2128   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2129   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2130   PetscFunctionReturn(0);
2131 }
2132 
2133 #undef __FUNCT__
2134 #define __FUNCT__ "MatEqual_MPIDense"
2135 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscTruth *flag)
2136 {
2137   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
2138   Mat            a,b;
2139   PetscTruth     flg;
2140   PetscErrorCode ierr;
2141 
2142   PetscFunctionBegin;
2143   a = matA->A;
2144   b = matB->A;
2145   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
2146   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
2147   PetscFunctionReturn(0);
2148 }
2149 
2150 #if defined(PETSC_HAVE_PLAPACK)
2151 
2152 #undef __FUNCT__
2153 #define __FUNCT__ "PetscPLAPACKFinalizePackage"
2154 /*@C
2155   PetscPLAPACKFinalizePackage - This function destroys everything in the Petsc interface to PLAPACK.
2156   Level: developer
2157 
2158 .keywords: Petsc, destroy, package, PLAPACK
2159 .seealso: PetscFinalize()
2160 @*/
2161 PetscErrorCode PETSC_DLLEXPORT PetscPLAPACKFinalizePackage(void)
2162 {
2163   PetscErrorCode ierr;
2164 
2165   PetscFunctionBegin;
2166   ierr = PLA_Finalize();CHKERRQ(ierr);
2167   PetscFunctionReturn(0);
2168 }
2169 
2170 #undef __FUNCT__
2171 #define __FUNCT__ "PetscPLAPACKInitializePackage"
2172 /*@C
2173   PetscPLAPACKInitializePackage - This function initializes everything in the Petsc interface to PLAPACK. It is
2174   called from PetscDLLibraryRegister() when using dynamic libraries, and on the call to PetscInitialize()
2175   when using static libraries.
2176 
2177   Input Parameter:
2178   path - The dynamic library path, or PETSC_NULL
2179 
2180   Level: developer
2181 
2182 .keywords: Petsc, initialize, package, PLAPACK
2183 .seealso: PetscInitializePackage(), PetscInitialize()
2184 @*/
2185 PetscErrorCode PETSC_DLLEXPORT PetscPLAPACKInitializePackage(const char path[])
2186 {
2187   MPI_Comm       comm = PETSC_COMM_WORLD;
2188   PetscMPIInt    size;
2189   PetscErrorCode ierr;
2190 
2191   PetscFunctionBegin;
2192   if (!PLA_Initialized(PETSC_NULL)) {
2193 
2194     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2195     Plapack_nprows = 1;
2196     Plapack_npcols = size;
2197 
2198     ierr = PetscOptionsBegin(comm,PETSC_NULL,"PLAPACK Options","Mat");CHKERRQ(ierr);
2199       ierr = PetscOptionsInt("-plapack_nprows","row dimension of 2D processor mesh","None",Plapack_nprows,&Plapack_nprows,PETSC_NULL);CHKERRQ(ierr);
2200       ierr = PetscOptionsInt("-plapack_npcols","column dimension of 2D processor mesh","None",Plapack_npcols,&Plapack_npcols,PETSC_NULL);CHKERRQ(ierr);
2201 #if defined(PETSC_USE_DEBUG)
2202       Plapack_ierror = 3;
2203 #else
2204       Plapack_ierror = 0;
2205 #endif
2206       ierr = PetscOptionsInt("-plapack_ckerror","error checking flag","None",Plapack_ierror,&Plapack_ierror,PETSC_NULL);CHKERRQ(ierr);
2207       if (Plapack_ierror){
2208 	ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_TRUE,PETSC_TRUE,PETSC_FALSE );CHKERRQ(ierr);
2209       } else {
2210 	ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_FALSE,PETSC_FALSE,PETSC_FALSE );CHKERRQ(ierr);
2211       }
2212 
2213       Plapack_nb_alg = 0;
2214       ierr = PetscOptionsInt("-plapack_nb_alg","algorithmic block size","None",Plapack_nb_alg,&Plapack_nb_alg,PETSC_NULL);CHKERRQ(ierr);
2215       if (Plapack_nb_alg) {
2216         ierr = pla_Environ_set_nb_alg (PLA_OP_ALL_ALG,Plapack_nb_alg);CHKERRQ(ierr);
2217       }
2218     PetscOptionsEnd();
2219 
2220     ierr = PLA_Comm_1D_to_2D(comm,Plapack_nprows,Plapack_npcols,&Plapack_comm_2d);CHKERRQ(ierr);
2221     ierr = PLA_Init(Plapack_comm_2d);CHKERRQ(ierr);
2222     ierr = PetscRegisterFinalize(PetscPLAPACKFinalizePackage);CHKERRQ(ierr);
2223   }
2224   PetscFunctionReturn(0);
2225 }
2226 
2227 
2228 #endif
2229