xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision f65d6f8a1967a3448e084da7cdbad5889413e460)
1 #define PETSCMAT_DLL
2 
3 /*
4    Basic functions for basic parallel dense matrices.
5 */
6 
7 
8 #include "../src/mat/impls/dense/mpi/mpidense.h"    /*I   "petscmat.h"  I*/
9 #if defined(PETSC_HAVE_PLAPACK)
10 static PetscMPIInt Plapack_nprows,Plapack_npcols,Plapack_ierror,Plapack_nb_alg;
11 static MPI_Comm Plapack_comm_2d;
12 #endif
13 
14 #undef __FUNCT__
15 #define __FUNCT__ "MatDenseGetLocalMatrix"
16 /*@
17 
18       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
19               matrix that represents the operator. For sequential matrices it returns itself.
20 
21     Input Parameter:
22 .      A - the Seq or MPI dense matrix
23 
24     Output Parameter:
25 .      B - the inner matrix
26 
27     Level: intermediate
28 
29 @*/
30 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
31 {
32   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
33   PetscErrorCode ierr;
34   PetscTruth     flg;
35 
36   PetscFunctionBegin;
37   ierr = PetscTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
38   if (flg) {
39     *B = mat->A;
40   } else {
41     *B = A;
42   }
43   PetscFunctionReturn(0);
44 }
45 
46 #undef __FUNCT__
47 #define __FUNCT__ "MatGetRow_MPIDense"
48 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
49 {
50   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
51   PetscErrorCode ierr;
52   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
53 
54   PetscFunctionBegin;
55   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows")
56   lrow = row - rstart;
57   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr);
58   PetscFunctionReturn(0);
59 }
60 
61 #undef __FUNCT__
62 #define __FUNCT__ "MatRestoreRow_MPIDense"
63 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
64 {
65   PetscErrorCode ierr;
66 
67   PetscFunctionBegin;
68   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
69   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
70   PetscFunctionReturn(0);
71 }
72 
73 EXTERN_C_BEGIN
74 #undef __FUNCT__
75 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense"
76 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B)
77 {
78   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
79   PetscErrorCode ierr;
80   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
81   PetscScalar    *array;
82   MPI_Comm       comm;
83 
84   PetscFunctionBegin;
85   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported.");
86 
87   /* The reuse aspect is not implemented efficiently */
88   if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);}
89 
90   ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
91   ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr);
92   ierr = MatCreate(comm,B);CHKERRQ(ierr);
93   ierr = MatSetSizes(*B,m,m,m,m);CHKERRQ(ierr);
94   ierr = MatSetType(*B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
95   ierr = MatSeqDenseSetPreallocation(*B,array+m*rstart);CHKERRQ(ierr);
96   ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr);
97   ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
98   ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
99 
100   *iscopy = PETSC_TRUE;
101   PetscFunctionReturn(0);
102 }
103 EXTERN_C_END
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatSetValues_MPIDense"
107 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
108 {
109   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
110   PetscErrorCode ierr;
111   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
112   PetscTruth     roworiented = A->roworiented;
113 
114   PetscFunctionBegin;
115   for (i=0; i<m; i++) {
116     if (idxm[i] < 0) continue;
117     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
118     if (idxm[i] >= rstart && idxm[i] < rend) {
119       row = idxm[i] - rstart;
120       if (roworiented) {
121         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
122       } else {
123         for (j=0; j<n; j++) {
124           if (idxn[j] < 0) continue;
125           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
126           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
127         }
128       }
129     } else {
130       if (!A->donotstash) {
131         if (roworiented) {
132           ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
133         } else {
134           ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
135         }
136       }
137     }
138   }
139   PetscFunctionReturn(0);
140 }
141 
142 #undef __FUNCT__
143 #define __FUNCT__ "MatGetValues_MPIDense"
144 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
145 {
146   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
147   PetscErrorCode ierr;
148   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
149 
150   PetscFunctionBegin;
151   for (i=0; i<m; i++) {
152     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
153     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
154     if (idxm[i] >= rstart && idxm[i] < rend) {
155       row = idxm[i] - rstart;
156       for (j=0; j<n; j++) {
157         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
158         if (idxn[j] >= mat->cmap->N) {
159           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
160         }
161         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
162       }
163     } else {
164       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
165     }
166   }
167   PetscFunctionReturn(0);
168 }
169 
170 #undef __FUNCT__
171 #define __FUNCT__ "MatGetArray_MPIDense"
172 PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[])
173 {
174   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
175   PetscErrorCode ierr;
176 
177   PetscFunctionBegin;
178   ierr = MatGetArray(a->A,array);CHKERRQ(ierr);
179   PetscFunctionReturn(0);
180 }
181 
182 #undef __FUNCT__
183 #define __FUNCT__ "MatGetSubMatrix_MPIDense"
184 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B)
185 {
186   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data,*newmatd;
187   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
188   PetscErrorCode ierr;
189   PetscInt       i,j,rstart,rend,nrows,ncols,nlrows,nlcols;
190   const PetscInt *irow,*icol;
191   PetscScalar    *av,*bv,*v = lmat->v;
192   Mat            newmat;
193 
194   PetscFunctionBegin;
195   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
196   ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr);
197   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
198   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
199 
200   /* No parallel redistribution currently supported! Should really check each index set
201      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
202      original matrix! */
203 
204   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
205   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
206 
207   /* Check submatrix call */
208   if (scall == MAT_REUSE_MATRIX) {
209     /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
210     /* Really need to test rows and column sizes! */
211     newmat = *B;
212   } else {
213     /* Create and fill new matrix */
214     ierr = MatCreate(((PetscObject)A)->comm,&newmat);CHKERRQ(ierr);
215     ierr = MatSetSizes(newmat,nrows,cs,PETSC_DECIDE,ncols);CHKERRQ(ierr);
216     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
217     ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr);
218   }
219 
220   /* Now extract the data pointers and do the copy, column at a time */
221   newmatd = (Mat_MPIDense*)newmat->data;
222   bv      = ((Mat_SeqDense *)newmatd->A->data)->v;
223 
224   for (i=0; i<ncols; i++) {
225     av = v + ((Mat_SeqDense *)mat->A->data)->lda*icol[i];
226     for (j=0; j<nrows; j++) {
227       *bv++ = av[irow[j] - rstart];
228     }
229   }
230 
231   /* Assemble the matrices so that the correct flags are set */
232   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
233   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
234 
235   /* Free work space */
236   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
237   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
238   *B = newmat;
239   PetscFunctionReturn(0);
240 }
241 
242 #undef __FUNCT__
243 #define __FUNCT__ "MatRestoreArray_MPIDense"
244 PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[])
245 {
246   PetscFunctionBegin;
247   PetscFunctionReturn(0);
248 }
249 
250 #undef __FUNCT__
251 #define __FUNCT__ "MatAssemblyBegin_MPIDense"
252 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
253 {
254   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
255   MPI_Comm       comm = ((PetscObject)mat)->comm;
256   PetscErrorCode ierr;
257   PetscInt       nstash,reallocs;
258   InsertMode     addv;
259 
260   PetscFunctionBegin;
261   /* make sure all processors are either in INSERTMODE or ADDMODE */
262   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
263   if (addv == (ADD_VALUES|INSERT_VALUES)) {
264     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
265   }
266   mat->insertmode = addv; /* in case this processor had no cache */
267 
268   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
269   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
270   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
271   PetscFunctionReturn(0);
272 }
273 
274 #undef __FUNCT__
275 #define __FUNCT__ "MatAssemblyEnd_MPIDense"
276 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
277 {
278   Mat_MPIDense    *mdn=(Mat_MPIDense*)mat->data;
279   PetscErrorCode  ierr;
280   PetscInt        i,*row,*col,flg,j,rstart,ncols;
281   PetscMPIInt     n;
282   PetscScalar     *val;
283   InsertMode      addv=mat->insertmode;
284 
285   PetscFunctionBegin;
286   /*  wait on receives */
287   while (1) {
288     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
289     if (!flg) break;
290 
291     for (i=0; i<n;) {
292       /* Now identify the consecutive vals belonging to the same row */
293       for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
294       if (j < n) ncols = j-i;
295       else       ncols = n-i;
296       /* Now assemble all these values with a single function call */
297       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
298       i = j;
299     }
300   }
301   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
302 
303   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
304   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
305 
306   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
307     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
308   }
309   PetscFunctionReturn(0);
310 }
311 
312 #undef __FUNCT__
313 #define __FUNCT__ "MatZeroEntries_MPIDense"
314 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
315 {
316   PetscErrorCode ierr;
317   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
318 
319   PetscFunctionBegin;
320   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
321   PetscFunctionReturn(0);
322 }
323 
324 /* the code does not do the diagonal entries correctly unless the
325    matrix is square and the column and row owerships are identical.
326    This is a BUG. The only way to fix it seems to be to access
327    mdn->A and mdn->B directly and not through the MatZeroRows()
328    routine.
329 */
330 #undef __FUNCT__
331 #define __FUNCT__ "MatZeroRows_MPIDense"
332 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
333 {
334   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
335   PetscErrorCode ierr;
336   PetscInt       i,*owners = A->rmap->range;
337   PetscInt       *nprocs,j,idx,nsends;
338   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
339   PetscInt       *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source;
340   PetscInt       *lens,*lrows,*values;
341   PetscMPIInt    n,imdex,rank = l->rank,size = l->size;
342   MPI_Comm       comm = ((PetscObject)A)->comm;
343   MPI_Request    *send_waits,*recv_waits;
344   MPI_Status     recv_status,*send_status;
345   PetscTruth     found;
346 
347   PetscFunctionBegin;
348   /*  first count number of contributors to each processor */
349   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
350   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
351   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
352   for (i=0; i<N; i++) {
353     idx = rows[i];
354     found = PETSC_FALSE;
355     for (j=0; j<size; j++) {
356       if (idx >= owners[j] && idx < owners[j+1]) {
357         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
358       }
359     }
360     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
361   }
362   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
363 
364   /* inform other processors of number of messages and max length*/
365   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
366 
367   /* post receives:   */
368   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
369   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
370   for (i=0; i<nrecvs; i++) {
371     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
372   }
373 
374   /* do sends:
375       1) starts[i] gives the starting index in svalues for stuff going to
376          the ith processor
377   */
378   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
379   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
380   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
381   starts[0]  = 0;
382   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
383   for (i=0; i<N; i++) {
384     svalues[starts[owner[i]]++] = rows[i];
385   }
386 
387   starts[0] = 0;
388   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
389   count = 0;
390   for (i=0; i<size; i++) {
391     if (nprocs[2*i+1]) {
392       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
393     }
394   }
395   ierr = PetscFree(starts);CHKERRQ(ierr);
396 
397   base = owners[rank];
398 
399   /*  wait on receives */
400   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
401   source = lens + nrecvs;
402   count  = nrecvs; slen = 0;
403   while (count) {
404     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
405     /* unpack receives into our local space */
406     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
407     source[imdex]  = recv_status.MPI_SOURCE;
408     lens[imdex]    = n;
409     slen += n;
410     count--;
411   }
412   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
413 
414   /* move the data into the send scatter */
415   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
416   count = 0;
417   for (i=0; i<nrecvs; i++) {
418     values = rvalues + i*nmax;
419     for (j=0; j<lens[i]; j++) {
420       lrows[count++] = values[j] - base;
421     }
422   }
423   ierr = PetscFree(rvalues);CHKERRQ(ierr);
424   ierr = PetscFree(lens);CHKERRQ(ierr);
425   ierr = PetscFree(owner);CHKERRQ(ierr);
426   ierr = PetscFree(nprocs);CHKERRQ(ierr);
427 
428   /* actually zap the local rows */
429   ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
430   ierr = PetscFree(lrows);CHKERRQ(ierr);
431 
432   /* wait on sends */
433   if (nsends) {
434     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
435     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
436     ierr = PetscFree(send_status);CHKERRQ(ierr);
437   }
438   ierr = PetscFree(send_waits);CHKERRQ(ierr);
439   ierr = PetscFree(svalues);CHKERRQ(ierr);
440 
441   PetscFunctionReturn(0);
442 }
443 
444 #undef __FUNCT__
445 #define __FUNCT__ "MatMult_MPIDense"
446 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
447 {
448   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
449   PetscErrorCode ierr;
450 
451   PetscFunctionBegin;
452   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
453   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
454   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
455   PetscFunctionReturn(0);
456 }
457 
458 #undef __FUNCT__
459 #define __FUNCT__ "MatMultAdd_MPIDense"
460 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
461 {
462   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
463   PetscErrorCode ierr;
464 
465   PetscFunctionBegin;
466   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
467   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
468   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
469   PetscFunctionReturn(0);
470 }
471 
472 #undef __FUNCT__
473 #define __FUNCT__ "MatMultTranspose_MPIDense"
474 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
475 {
476   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
477   PetscErrorCode ierr;
478   PetscScalar    zero = 0.0;
479 
480   PetscFunctionBegin;
481   ierr = VecSet(yy,zero);CHKERRQ(ierr);
482   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
483   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
484   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
485   PetscFunctionReturn(0);
486 }
487 
488 #undef __FUNCT__
489 #define __FUNCT__ "MatMultTransposeAdd_MPIDense"
490 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
491 {
492   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
493   PetscErrorCode ierr;
494 
495   PetscFunctionBegin;
496   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
497   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
498   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
499   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
500   PetscFunctionReturn(0);
501 }
502 
503 #undef __FUNCT__
504 #define __FUNCT__ "MatGetDiagonal_MPIDense"
505 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
506 {
507   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
508   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
509   PetscErrorCode ierr;
510   PetscInt       len,i,n,m = A->rmap->n,radd;
511   PetscScalar    *x,zero = 0.0;
512 
513   PetscFunctionBegin;
514   ierr = VecSet(v,zero);CHKERRQ(ierr);
515   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
516   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
517   if (n != A->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
518   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
519   radd = A->rmap->rstart*m;
520   for (i=0; i<len; i++) {
521     x[i] = aloc->v[radd + i*m + i];
522   }
523   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
524   PetscFunctionReturn(0);
525 }
526 
527 #undef __FUNCT__
528 #define __FUNCT__ "MatDestroy_MPIDense"
529 PetscErrorCode MatDestroy_MPIDense(Mat mat)
530 {
531   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
532   PetscErrorCode ierr;
533 #if defined(PETSC_HAVE_PLAPACK)
534   Mat_Plapack   *lu=(Mat_Plapack*)(mat->spptr);
535 #endif
536 
537   PetscFunctionBegin;
538 
539 #if defined(PETSC_USE_LOG)
540   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
541 #endif
542   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
543   ierr = MatDestroy(mdn->A);CHKERRQ(ierr);
544   if (mdn->lvec)   {ierr = VecDestroy(mdn->lvec);CHKERRQ(ierr);}
545   if (mdn->Mvctx)  {ierr = VecScatterDestroy(mdn->Mvctx);CHKERRQ(ierr);}
546 #if defined(PETSC_HAVE_PLAPACK)
547   if (lu) {
548     ierr = PLA_Obj_free(&lu->A);CHKERRQ(ierr);
549     ierr = PLA_Obj_free (&lu->pivots);CHKERRQ(ierr);
550     ierr = PLA_Temp_free(&lu->templ);CHKERRQ(ierr);
551 
552     if (lu->is_pla) {
553       ierr = ISDestroy(lu->is_pla);CHKERRQ(ierr);
554       ierr = ISDestroy(lu->is_petsc);CHKERRQ(ierr);
555       ierr = VecScatterDestroy(lu->ctx);CHKERRQ(ierr);
556     }
557   }
558 #endif
559 
560   ierr = PetscFree(mdn);CHKERRQ(ierr);
561   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
562   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
563   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
564   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
565   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
566   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
567   PetscFunctionReturn(0);
568 }
569 
570 #undef __FUNCT__
571 #define __FUNCT__ "MatView_MPIDense_Binary"
572 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
573 {
574   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
575   PetscErrorCode    ierr;
576   PetscViewerFormat format;
577   int               fd;
578   PetscInt          header[4],mmax,N = mat->cmap->N,i,j,m,k;
579   PetscMPIInt       rank,tag  = ((PetscObject)viewer)->tag,size;
580   PetscScalar       *work,*v,*vv;
581   Mat_SeqDense      *a = (Mat_SeqDense*)mdn->A->data;
582   MPI_Status        status;
583 
584   PetscFunctionBegin;
585   if (mdn->size == 1) {
586     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
587   } else {
588     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
589     ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
590     ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
591 
592     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
593     if (format == PETSC_VIEWER_NATIVE) {
594 
595       if (!rank) {
596         /* store the matrix as a dense matrix */
597         header[0] = MAT_FILE_COOKIE;
598         header[1] = mat->rmap->N;
599         header[2] = N;
600         header[3] = MATRIX_BINARY_FORMAT_DENSE;
601         ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
602 
603         /* get largest work array needed for transposing array */
604         mmax = mat->rmap->n;
605         for (i=1; i<size; i++) {
606           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
607         }
608         ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&work);CHKERRQ(ierr);
609 
610         /* write out local array, by rows */
611         m    = mat->rmap->n;
612         v    = a->v;
613         for (j=0; j<N; j++) {
614           for (i=0; i<m; i++) {
615             work[j + i*N] = *v++;
616           }
617         }
618         ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
619         /* get largest work array to receive messages from other processes, excludes process zero */
620         mmax = 0;
621         for (i=1; i<size; i++) {
622           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
623         }
624         ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&vv);CHKERRQ(ierr);
625         for(k = 1; k < size; k++) {
626           v    = vv;
627           m    = mat->rmap->range[k+1] - mat->rmap->range[k];
628           ierr = MPI_Recv(v,m*N,MPIU_SCALAR,k,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
629 
630           for(j = 0; j < N; j++) {
631             for(i = 0; i < m; i++) {
632               work[j + i*N] = *v++;
633             }
634           }
635           ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
636         }
637         ierr = PetscFree(work);CHKERRQ(ierr);
638         ierr = PetscFree(vv);CHKERRQ(ierr);
639       } else {
640         ierr = MPI_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
641       }
642     } else {
643       SETERRQ(PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerSetFormat(viewer,PETSC_VIEWER_NATIVE");
644     }
645   }
646   PetscFunctionReturn(0);
647 }
648 
649 #undef __FUNCT__
650 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket"
651 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
652 {
653   Mat_MPIDense          *mdn = (Mat_MPIDense*)mat->data;
654   PetscErrorCode        ierr;
655   PetscMPIInt           size = mdn->size,rank = mdn->rank;
656   const PetscViewerType vtype;
657   PetscTruth            iascii,isdraw;
658   PetscViewer           sviewer;
659   PetscViewerFormat     format;
660 #if defined(PETSC_HAVE_PLAPACK)
661   Mat_Plapack           *lu;
662 #endif
663 
664   PetscFunctionBegin;
665   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
666   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
667   if (iascii) {
668     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
669     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
670     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
671       MatInfo info;
672       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
673       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,
674                    (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
675       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
676 #if defined(PETSC_HAVE_PLAPACK)
677       ierr = PetscViewerASCIIPrintf(viewer,"PLAPACK run parameters:\n");CHKERRQ(ierr);
678       ierr = PetscViewerASCIIPrintf(viewer,"  Processor mesh: nprows %d, npcols %d\n",Plapack_nprows, Plapack_npcols);CHKERRQ(ierr);
679       ierr = PetscViewerASCIIPrintf(viewer,"  Error checking: %d\n",Plapack_ierror);CHKERRQ(ierr);
680       ierr = PetscViewerASCIIPrintf(viewer,"  Algorithmic block size: %d\n",Plapack_nb_alg);CHKERRQ(ierr);
681       if (mat->factor){
682         lu=(Mat_Plapack*)(mat->spptr);
683         ierr = PetscViewerASCIIPrintf(viewer,"  Distr. block size nb: %d \n",lu->nb);CHKERRQ(ierr);
684       }
685 #else
686       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
687 #endif
688       PetscFunctionReturn(0);
689     } else if (format == PETSC_VIEWER_ASCII_INFO) {
690       PetscFunctionReturn(0);
691     }
692   } else if (isdraw) {
693     PetscDraw  draw;
694     PetscTruth isnull;
695 
696     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
697     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
698     if (isnull) PetscFunctionReturn(0);
699   }
700 
701   if (size == 1) {
702     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
703   } else {
704     /* assemble the entire matrix onto first processor. */
705     Mat         A;
706     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
707     PetscInt    *cols;
708     PetscScalar *vals;
709 
710     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
711     if (!rank) {
712       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
713     } else {
714       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
715     }
716     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
717     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
718     ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL);
719     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
720 
721     /* Copy the matrix ... This isn't the most efficient means,
722        but it's quick for now */
723     A->insertmode = INSERT_VALUES;
724     row = mat->rmap->rstart; m = mdn->A->rmap->n;
725     for (i=0; i<m; i++) {
726       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
727       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
728       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
729       row++;
730     }
731 
732     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
733     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
734     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
735     if (!rank) {
736       ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
737     }
738     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
739     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
740     ierr = MatDestroy(A);CHKERRQ(ierr);
741   }
742   PetscFunctionReturn(0);
743 }
744 
745 #undef __FUNCT__
746 #define __FUNCT__ "MatView_MPIDense"
747 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
748 {
749   PetscErrorCode ierr;
750   PetscTruth     iascii,isbinary,isdraw,issocket;
751 
752   PetscFunctionBegin;
753 
754   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
755   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
756   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
757   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
758 
759   if (iascii || issocket || isdraw) {
760     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
761   } else if (isbinary) {
762     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
763   } else {
764     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name);
765   }
766   PetscFunctionReturn(0);
767 }
768 
769 #undef __FUNCT__
770 #define __FUNCT__ "MatGetInfo_MPIDense"
771 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
772 {
773   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
774   Mat            mdn = mat->A;
775   PetscErrorCode ierr;
776   PetscReal      isend[5],irecv[5];
777 
778   PetscFunctionBegin;
779   info->block_size     = 1.0;
780   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
781   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
782   isend[3] = info->memory;  isend[4] = info->mallocs;
783   if (flag == MAT_LOCAL) {
784     info->nz_used      = isend[0];
785     info->nz_allocated = isend[1];
786     info->nz_unneeded  = isend[2];
787     info->memory       = isend[3];
788     info->mallocs      = isend[4];
789   } else if (flag == MAT_GLOBAL_MAX) {
790     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr);
791     info->nz_used      = irecv[0];
792     info->nz_allocated = irecv[1];
793     info->nz_unneeded  = irecv[2];
794     info->memory       = irecv[3];
795     info->mallocs      = irecv[4];
796   } else if (flag == MAT_GLOBAL_SUM) {
797     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
798     info->nz_used      = irecv[0];
799     info->nz_allocated = irecv[1];
800     info->nz_unneeded  = irecv[2];
801     info->memory       = irecv[3];
802     info->mallocs      = irecv[4];
803   }
804   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
805   info->fill_ratio_needed = 0;
806   info->factor_mallocs    = 0;
807   PetscFunctionReturn(0);
808 }
809 
810 #undef __FUNCT__
811 #define __FUNCT__ "MatSetOption_MPIDense"
812 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscTruth flg)
813 {
814   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
815   PetscErrorCode ierr;
816 
817   PetscFunctionBegin;
818   switch (op) {
819   case MAT_NEW_NONZERO_LOCATIONS:
820   case MAT_NEW_NONZERO_LOCATION_ERR:
821   case MAT_NEW_NONZERO_ALLOCATION_ERR:
822     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
823     break;
824   case MAT_ROW_ORIENTED:
825     a->roworiented = flg;
826     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
827     break;
828   case MAT_NEW_DIAGONALS:
829   case MAT_USE_HASH_TABLE:
830     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
831     break;
832   case MAT_IGNORE_OFF_PROC_ENTRIES:
833     a->donotstash = flg;
834     break;
835   case MAT_SYMMETRIC:
836   case MAT_STRUCTURALLY_SYMMETRIC:
837   case MAT_HERMITIAN:
838   case MAT_SYMMETRY_ETERNAL:
839   case MAT_IGNORE_LOWER_TRIANGULAR:
840     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
841     break;
842   default:
843     SETERRQ1(PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
844   }
845   PetscFunctionReturn(0);
846 }
847 
848 
849 #undef __FUNCT__
850 #define __FUNCT__ "MatDiagonalScale_MPIDense"
851 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
852 {
853   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
854   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
855   PetscScalar    *l,*r,x,*v;
856   PetscErrorCode ierr;
857   PetscInt       i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n;
858 
859   PetscFunctionBegin;
860   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
861   if (ll) {
862     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
863     if (s2a != s2) SETERRQ2(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
864     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
865     for (i=0; i<m; i++) {
866       x = l[i];
867       v = mat->v + i;
868       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
869     }
870     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
871     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
872   }
873   if (rr) {
874     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
875     if (s3a != s3) SETERRQ2(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
876     ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
877     ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
878     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
879     for (i=0; i<n; i++) {
880       x = r[i];
881       v = mat->v + i*m;
882       for (j=0; j<m; j++) { (*v++) *= x;}
883     }
884     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
885     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
886   }
887   PetscFunctionReturn(0);
888 }
889 
890 #undef __FUNCT__
891 #define __FUNCT__ "MatNorm_MPIDense"
892 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
893 {
894   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
895   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
896   PetscErrorCode ierr;
897   PetscInt       i,j;
898   PetscReal      sum = 0.0;
899   PetscScalar    *v = mat->v;
900 
901   PetscFunctionBegin;
902   if (mdn->size == 1) {
903     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
904   } else {
905     if (type == NORM_FROBENIUS) {
906       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
907 #if defined(PETSC_USE_COMPLEX)
908         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
909 #else
910         sum += (*v)*(*v); v++;
911 #endif
912       }
913       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
914       *nrm = sqrt(*nrm);
915       ierr = PetscLogFlops(2*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr);
916     } else if (type == NORM_1) {
917       PetscReal *tmp,*tmp2;
918       ierr = PetscMalloc(2*A->cmap->N*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
919       tmp2 = tmp + A->cmap->N;
920       ierr = PetscMemzero(tmp,2*A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
921       *nrm = 0.0;
922       v = mat->v;
923       for (j=0; j<mdn->A->cmap->n; j++) {
924         for (i=0; i<mdn->A->rmap->n; i++) {
925           tmp[j] += PetscAbsScalar(*v);  v++;
926         }
927       }
928       ierr = MPI_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
929       for (j=0; j<A->cmap->N; j++) {
930         if (tmp2[j] > *nrm) *nrm = tmp2[j];
931       }
932       ierr = PetscFree(tmp);CHKERRQ(ierr);
933       ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr);
934     } else if (type == NORM_INFINITY) { /* max row norm */
935       PetscReal ntemp;
936       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
937       ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr);
938     } else {
939       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
940     }
941   }
942   PetscFunctionReturn(0);
943 }
944 
945 #undef __FUNCT__
946 #define __FUNCT__ "MatTranspose_MPIDense"
947 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
948 {
949   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
950   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
951   Mat            B;
952   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
953   PetscErrorCode ierr;
954   PetscInt       j,i;
955   PetscScalar    *v;
956 
957   PetscFunctionBegin;
958   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place");
959   if (reuse == MAT_INITIAL_MATRIX || A == *matout) {
960     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
961     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
962     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
963     ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr);
964   } else {
965     B = *matout;
966   }
967 
968   m = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v;
969   ierr = PetscMalloc(m*sizeof(PetscInt),&rwork);CHKERRQ(ierr);
970   for (i=0; i<m; i++) rwork[i] = rstart + i;
971   for (j=0; j<n; j++) {
972     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
973     v   += m;
974   }
975   ierr = PetscFree(rwork);CHKERRQ(ierr);
976   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
977   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
978   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
979     *matout = B;
980   } else {
981     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
982   }
983   PetscFunctionReturn(0);
984 }
985 
986 #include "petscblaslapack.h"
987 #undef __FUNCT__
988 #define __FUNCT__ "MatScale_MPIDense"
989 PetscErrorCode MatScale_MPIDense(Mat inA,PetscScalar alpha)
990 {
991   Mat_MPIDense   *A = (Mat_MPIDense*)inA->data;
992   Mat_SeqDense   *a = (Mat_SeqDense*)A->A->data;
993   PetscScalar    oalpha = alpha;
994   PetscErrorCode ierr;
995   PetscBLASInt   one = 1,nz = PetscBLASIntCast(inA->rmap->n*inA->cmap->N);
996 
997   PetscFunctionBegin;
998   BLASscal_(&nz,&oalpha,a->v,&one);
999   ierr = PetscLogFlops(nz);CHKERRQ(ierr);
1000   PetscFunctionReturn(0);
1001 }
1002 
1003 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
1004 
1005 #undef __FUNCT__
1006 #define __FUNCT__ "MatSetUpPreallocation_MPIDense"
1007 PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A)
1008 {
1009   PetscErrorCode ierr;
1010 
1011   PetscFunctionBegin;
1012   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
1013   PetscFunctionReturn(0);
1014 }
1015 
1016 #if defined(PETSC_HAVE_PLAPACK)
1017 
1018 #undef __FUNCT__
1019 #define __FUNCT__ "MatMPIDenseCopyToPlapack"
1020 PetscErrorCode MatMPIDenseCopyToPlapack(Mat A,Mat F)
1021 {
1022   Mat_Plapack    *lu = (Mat_Plapack*)(F)->spptr;
1023   PetscErrorCode ierr;
1024   PetscInt       M=A->cmap->N,m=A->rmap->n,rstart;
1025   PetscScalar    *array;
1026   PetscReal      one = 1.0;
1027 
1028   PetscFunctionBegin;
1029   /* Copy A into F->lu->A */
1030   ierr = PLA_Obj_set_to_zero(lu->A);CHKERRQ(ierr);
1031   ierr = PLA_API_begin();CHKERRQ(ierr);
1032   ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr);
1033   ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr);
1034   ierr = MatGetArray(A,&array);CHKERRQ(ierr);
1035   ierr = PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);CHKERRQ(ierr);
1036   ierr = MatRestoreArray(A,&array);CHKERRQ(ierr);
1037   ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr);
1038   ierr = PLA_API_end();CHKERRQ(ierr);
1039   lu->rstart = rstart;
1040   PetscFunctionReturn(0);
1041 }
1042 
1043 #undef __FUNCT__
1044 #define __FUNCT__ "MatMPIDenseCopyFromPlapack"
1045 PetscErrorCode MatMPIDenseCopyFromPlapack(Mat F,Mat A)
1046 {
1047   Mat_Plapack    *lu = (Mat_Plapack*)(F)->spptr;
1048   PetscErrorCode ierr;
1049   PetscInt       M=A->cmap->N,m=A->rmap->n,rstart;
1050   PetscScalar    *array;
1051   PetscReal      one = 1.0;
1052 
1053   PetscFunctionBegin;
1054   /* Copy F into A->lu->A */
1055   ierr = MatZeroEntries(A);CHKERRQ(ierr);
1056   ierr = PLA_API_begin();CHKERRQ(ierr);
1057   ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr);
1058   ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr);
1059   ierr = MatGetArray(A,&array);CHKERRQ(ierr);
1060   ierr = PLA_API_axpy_global_to_matrix(m,M, &one,lu->A,rstart,0,(void *)array,m);CHKERRQ(ierr);
1061   ierr = MatRestoreArray(A,&array);CHKERRQ(ierr);
1062   ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr);
1063   ierr = PLA_API_end();CHKERRQ(ierr);
1064   lu->rstart = rstart;
1065   PetscFunctionReturn(0);
1066 }
1067 
1068 #undef __FUNCT__
1069 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIDense"
1070 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1071 {
1072   PetscErrorCode ierr;
1073   Mat_Plapack    *luA = (Mat_Plapack*)A->spptr;
1074   Mat_Plapack    *luB = (Mat_Plapack*)B->spptr;
1075   Mat_Plapack    *luC = (Mat_Plapack*)C->spptr;
1076   PLA_Obj        alpha = NULL,beta = NULL;
1077 
1078   PetscFunctionBegin;
1079   ierr = MatMPIDenseCopyToPlapack(A,A);CHKERRQ(ierr);
1080   ierr = MatMPIDenseCopyToPlapack(B,B);CHKERRQ(ierr);
1081 
1082   /*
1083   ierr = PLA_Global_show("A = ",luA->A,"%g ","");CHKERRQ(ierr);
1084   ierr = PLA_Global_show("B = ",luB->A,"%g ","");CHKERRQ(ierr);
1085   */
1086 
1087   /* do the multiply in PLA  */
1088   ierr = PLA_Create_constants_conf_to(luA->A,NULL,NULL,&alpha);CHKERRQ(ierr);
1089   ierr = PLA_Create_constants_conf_to(luC->A,NULL,&beta,NULL);CHKERRQ(ierr);
1090   CHKMEMQ;
1091 
1092   ierr = PLA_Gemm(PLA_NO_TRANSPOSE,PLA_NO_TRANSPOSE,alpha,luA->A,luB->A,beta,luC->A); /* CHKERRQ(ierr); */
1093   CHKMEMQ;
1094   ierr = PLA_Obj_free(&alpha);CHKERRQ(ierr);
1095   ierr = PLA_Obj_free(&beta);CHKERRQ(ierr);
1096 
1097   /*
1098   ierr = PLA_Global_show("C = ",luC->A,"%g ","");CHKERRQ(ierr);
1099   */
1100   ierr = MatMPIDenseCopyFromPlapack(C,C);CHKERRQ(ierr);
1101   PetscFunctionReturn(0);
1102 }
1103 
1104 #undef __FUNCT__
1105 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIDense"
1106 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1107 {
1108   PetscErrorCode ierr;
1109   PetscInt       m=A->rmap->n,n=B->cmap->n;
1110   Mat            Cmat;
1111 
1112   PetscFunctionBegin;
1113   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
1114   SETERRQ(PETSC_ERR_LIB,"Due to aparent bugs in PLAPACK,this is not currently supported");
1115   ierr = MatCreate(((PetscObject)B)->comm,&Cmat);CHKERRQ(ierr);
1116   ierr = MatSetSizes(Cmat,m,n,A->rmap->N,B->cmap->N);CHKERRQ(ierr);
1117   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
1118   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1119   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1120 
1121   *C = Cmat;
1122   PetscFunctionReturn(0);
1123 }
1124 
1125 #undef __FUNCT__
1126 #define __FUNCT__ "MatMatMult_MPIDense_MPIDense"
1127 PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
1128 {
1129   PetscErrorCode ierr;
1130 
1131   PetscFunctionBegin;
1132   if (scall == MAT_INITIAL_MATRIX){
1133     ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr);
1134   }
1135   ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr);
1136   PetscFunctionReturn(0);
1137 }
1138 
1139 #undef __FUNCT__
1140 #define __FUNCT__ "MatSolve_MPIDense"
1141 PetscErrorCode MatSolve_MPIDense(Mat A,Vec b,Vec x)
1142 {
1143   MPI_Comm       comm = ((PetscObject)A)->comm;
1144   Mat_Plapack    *lu = (Mat_Plapack*)A->spptr;
1145   PetscErrorCode ierr;
1146   PetscInt       M=A->rmap->N,m=A->rmap->n,rstart,i,j,*idx_pla,*idx_petsc,loc_m,loc_stride;
1147   PetscScalar    *array;
1148   PetscReal      one = 1.0;
1149   PetscMPIInt    size,rank,r_rank,r_nproc,c_rank,c_nproc;;
1150   PLA_Obj        v_pla = NULL;
1151   PetscScalar    *loc_buf;
1152   Vec            loc_x;
1153 
1154   PetscFunctionBegin;
1155   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1156   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1157 
1158   /* Create PLAPACK vector objects, then copy b into PLAPACK b */
1159   PLA_Mvector_create(lu->datatype,M,1,lu->templ,PLA_ALIGN_FIRST,&v_pla);
1160   PLA_Obj_set_to_zero(v_pla);
1161 
1162   /* Copy b into rhs_pla */
1163   PLA_API_begin();
1164   PLA_Obj_API_open(v_pla);
1165   ierr = VecGetArray(b,&array);CHKERRQ(ierr);
1166   PLA_API_axpy_vector_to_global(m,&one,(void *)array,1,v_pla,lu->rstart);
1167   ierr = VecRestoreArray(b,&array);CHKERRQ(ierr);
1168   PLA_Obj_API_close(v_pla);
1169   PLA_API_end();
1170 
1171   if (A->factor == MAT_FACTOR_LU){
1172     /* Apply the permutations to the right hand sides */
1173     PLA_Apply_pivots_to_rows (v_pla,lu->pivots);
1174 
1175     /* Solve L y = b, overwriting b with y */
1176     PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_UNIT_DIAG,lu->A,v_pla );
1177 
1178     /* Solve U x = y (=b), overwriting b with x */
1179     PLA_Trsv( PLA_UPPER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla );
1180   } else { /* MAT_FACTOR_CHOLESKY */
1181     PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla);
1182     PLA_Trsv( PLA_LOWER_TRIANGULAR,(lu->datatype == MPI_DOUBLE ? PLA_TRANSPOSE : PLA_CONJUGATE_TRANSPOSE),
1183                                     PLA_NONUNIT_DIAG,lu->A,v_pla);
1184   }
1185 
1186   /* Copy PLAPACK x into Petsc vector x  */
1187   PLA_Obj_local_length(v_pla, &loc_m);
1188   PLA_Obj_local_buffer(v_pla, (void**)&loc_buf);
1189   PLA_Obj_local_stride(v_pla, &loc_stride);
1190   /*
1191     PetscPrintf(PETSC_COMM_SELF," [%d] b - local_m %d local_stride %d, loc_buf: %g %g, nb: %d\n",rank,loc_m,loc_stride,loc_buf[0],loc_buf[(loc_m-1)*loc_stride],lu->nb);
1192   */
1193   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,loc_m*loc_stride,loc_buf,&loc_x);CHKERRQ(ierr);
1194   if (!lu->pla_solved){
1195 
1196     PLA_Temp_comm_row_info(lu->templ,&Plapack_comm_2d,&r_rank,&r_nproc);
1197     PLA_Temp_comm_col_info(lu->templ,&Plapack_comm_2d,&c_rank,&c_nproc);
1198 
1199     /* Create IS and cts for VecScatterring */
1200     PLA_Obj_local_length(v_pla, &loc_m);
1201     PLA_Obj_local_stride(v_pla, &loc_stride);
1202     ierr = PetscMalloc((2*loc_m+1)*sizeof(PetscInt),&idx_pla);CHKERRQ(ierr);
1203     idx_petsc = idx_pla + loc_m;
1204 
1205     rstart = (r_rank*c_nproc+c_rank)*lu->nb;
1206     for (i=0; i<loc_m; i+=lu->nb){
1207       j = 0;
1208       while (j < lu->nb && i+j < loc_m){
1209         idx_petsc[i+j] = rstart + j; j++;
1210       }
1211       rstart += size*lu->nb;
1212     }
1213 
1214     for (i=0; i<loc_m; i++) idx_pla[i] = i*loc_stride;
1215 
1216     ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_pla,&lu->is_pla);CHKERRQ(ierr);
1217     ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_petsc,&lu->is_petsc);CHKERRQ(ierr);
1218     ierr = PetscFree(idx_pla);CHKERRQ(ierr);
1219     ierr = VecScatterCreate(loc_x,lu->is_pla,x,lu->is_petsc,&lu->ctx);CHKERRQ(ierr);
1220   }
1221   ierr = VecScatterBegin(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1222   ierr = VecScatterEnd(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1223 
1224   /* Free data */
1225   ierr = VecDestroy(loc_x);CHKERRQ(ierr);
1226   PLA_Obj_free(&v_pla);
1227 
1228   lu->pla_solved = PETSC_TRUE;
1229   PetscFunctionReturn(0);
1230 }
1231 
1232 #undef __FUNCT__
1233 #define __FUNCT__ "MatLUFactorNumeric_MPIDense"
1234 PetscErrorCode MatLUFactorNumeric_MPIDense(Mat F,Mat A,const MatFactorInfo *info)
1235 {
1236   Mat_Plapack    *lu = (Mat_Plapack*)(F)->spptr;
1237   PetscErrorCode ierr;
1238   PetscInt       M=A->rmap->N,m=A->rmap->n,rstart,rend;
1239   PetscInt       info_pla=0;
1240   PetscScalar    *array,one = 1.0;
1241 
1242   PetscFunctionBegin;
1243   if (lu->mstruct == SAME_NONZERO_PATTERN){
1244     PLA_Obj_free(&lu->A);
1245     PLA_Obj_free (&lu->pivots);
1246   }
1247   /* Create PLAPACK matrix object */
1248   lu->A = NULL; lu->pivots = NULL;
1249   PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);
1250   PLA_Obj_set_to_zero(lu->A);
1251   PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots);
1252 
1253   /* Copy A into lu->A */
1254   PLA_API_begin();
1255   PLA_Obj_API_open(lu->A);
1256   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
1257   ierr = MatGetArray(A,&array);CHKERRQ(ierr);
1258   PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);
1259   ierr = MatRestoreArray(A,&array);CHKERRQ(ierr);
1260   PLA_Obj_API_close(lu->A);
1261   PLA_API_end();
1262 
1263   /* Factor P A -> L U overwriting lower triangular portion of A with L, upper, U */
1264   info_pla = PLA_LU(lu->A,lu->pivots);
1265   if (info_pla != 0)
1266     SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot encountered at row %d from PLA_LU()",info_pla);
1267 
1268   lu->rstart         = rstart;
1269   lu->mstruct        = SAME_NONZERO_PATTERN;
1270   F->ops->solve      = MatSolve_MPIDense;
1271   F->assembled       = PETSC_TRUE;  /* required by -ksp_view */
1272   PetscFunctionReturn(0);
1273 }
1274 
1275 #undef __FUNCT__
1276 #define __FUNCT__ "MatCholeskyFactorNumeric_MPIDense"
1277 PetscErrorCode MatCholeskyFactorNumeric_MPIDense(Mat F,Mat A,const MatFactorInfo *info)
1278 {
1279   Mat_Plapack    *lu = (Mat_Plapack*)F->spptr;
1280   PetscErrorCode ierr;
1281   PetscInt       M=A->rmap->N,m=A->rmap->n,rstart,rend;
1282   PetscInt       info_pla=0;
1283   PetscScalar    *array,one = 1.0;
1284 
1285   PetscFunctionBegin;
1286   if (lu->mstruct == SAME_NONZERO_PATTERN){
1287     PLA_Obj_free(&lu->A);
1288   }
1289   /* Create PLAPACK matrix object */
1290   lu->A      = NULL;
1291   lu->pivots = NULL;
1292   PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);
1293 
1294   /* Copy A into lu->A */
1295   PLA_API_begin();
1296   PLA_Obj_API_open(lu->A);
1297   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
1298   ierr = MatGetArray(A,&array);CHKERRQ(ierr);
1299   PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);
1300   ierr = MatRestoreArray(A,&array);CHKERRQ(ierr);
1301   PLA_Obj_API_close(lu->A);
1302   PLA_API_end();
1303 
1304   /* Factor P A -> Chol */
1305   info_pla = PLA_Chol(PLA_LOWER_TRIANGULAR,lu->A);
1306   if (info_pla != 0)
1307     SETERRQ1( PETSC_ERR_MAT_CH_ZRPVT,"Nonpositive definite matrix detected at row %d from PLA_Chol()",info_pla);
1308 
1309   lu->rstart         = rstart;
1310   lu->mstruct        = SAME_NONZERO_PATTERN;
1311   F->ops->solve      = MatSolve_MPIDense;
1312   F->assembled       = PETSC_TRUE;  /* required by -ksp_view */
1313   PetscFunctionReturn(0);
1314 }
1315 
1316 /* Note the Petsc perm permutation is ignored */
1317 #undef __FUNCT__
1318 #define __FUNCT__ "MatCholeskyFactorSymbolic_MPIDense"
1319 PetscErrorCode MatCholeskyFactorSymbolic_MPIDense(Mat F,Mat A,IS perm,const MatFactorInfo *info)
1320 {
1321   PetscErrorCode ierr;
1322   PetscTruth     issymmetric,set;
1323 
1324   PetscFunctionBegin;
1325   ierr = MatIsSymmetricKnown(A,&set,&issymmetric); CHKERRQ(ierr);
1326   if (!set || !issymmetric) SETERRQ(PETSC_ERR_USER,"Matrix must be set as MAT_SYMMETRIC for CholeskyFactor()");
1327   F->ops->choleskyfactornumeric  = MatCholeskyFactorNumeric_MPIDense;
1328   PetscFunctionReturn(0);
1329 }
1330 
1331 /* Note the Petsc r and c permutations are ignored */
1332 #undef __FUNCT__
1333 #define __FUNCT__ "MatLUFactorSymbolic_MPIDense"
1334 PetscErrorCode MatLUFactorSymbolic_MPIDense(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
1335 {
1336   PetscErrorCode ierr;
1337   PetscInt       M = A->rmap->N;
1338   Mat_Plapack    *lu;
1339 
1340   PetscFunctionBegin;
1341   lu = (Mat_Plapack*)F->spptr;
1342   ierr = PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots);CHKERRQ(ierr);
1343   F->ops->lufactornumeric  = MatLUFactorNumeric_MPIDense;
1344   PetscFunctionReturn(0);
1345 }
1346 
1347 EXTERN_C_BEGIN
1348 #undef __FUNCT__
1349 #define __FUNCT__ "MatFactorGetSolverPackage_mpidense_plapack"
1350 PetscErrorCode MatFactorGetSolverPackage_mpidense_plapack(Mat A,const MatSolverPackage *type)
1351 {
1352   PetscFunctionBegin;
1353   *type = MAT_SOLVER_PLAPACK;
1354   PetscFunctionReturn(0);
1355 }
1356 EXTERN_C_END
1357 
1358 #undef __FUNCT__
1359 #define __FUNCT__ "MatGetFactor_mpidense_plapack"
1360 PetscErrorCode MatGetFactor_mpidense_plapack(Mat A,MatFactorType ftype,Mat *F)
1361 {
1362   PetscErrorCode ierr;
1363   Mat_Plapack    *lu;
1364   PetscMPIInt    size;
1365   PetscInt       M=A->rmap->N;
1366 
1367   PetscFunctionBegin;
1368   /* Create the factorization matrix */
1369   ierr = MatCreate(((PetscObject)A)->comm,F);CHKERRQ(ierr);
1370   ierr = MatSetSizes(*F,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1371   ierr = MatSetType(*F,((PetscObject)A)->type_name);CHKERRQ(ierr);
1372   ierr = PetscNewLog(*F,Mat_Plapack,&lu);CHKERRQ(ierr);
1373   (*F)->spptr = (void*)lu;
1374 
1375   /* Set default Plapack parameters */
1376   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
1377   lu->nb = M/size;
1378   if (M - lu->nb*size) lu->nb++; /* without cyclic distribution */
1379 
1380   /* Set runtime options */
1381   ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"PLAPACK Options","Mat");CHKERRQ(ierr);
1382     ierr = PetscOptionsInt("-mat_plapack_nb","block size of template vector","None",lu->nb,&lu->nb,PETSC_NULL);CHKERRQ(ierr);
1383   PetscOptionsEnd();
1384 
1385   /* Create object distribution template */
1386   lu->templ = NULL;
1387   ierr = PLA_Temp_create(lu->nb, 0, &lu->templ);CHKERRQ(ierr);
1388 
1389   /* Set the datatype */
1390 #if defined(PETSC_USE_COMPLEX)
1391   lu->datatype = MPI_DOUBLE_COMPLEX;
1392 #else
1393   lu->datatype = MPI_DOUBLE;
1394 #endif
1395 
1396   ierr = PLA_Matrix_create(lu->datatype,M,A->cmap->N,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);CHKERRQ(ierr);
1397 
1398 
1399   lu->pla_solved     = PETSC_FALSE; /* MatSolve_Plapack() is called yet */
1400   lu->mstruct        = DIFFERENT_NONZERO_PATTERN;
1401 
1402   if (ftype == MAT_FACTOR_LU) {
1403     (*F)->ops->lufactorsymbolic = MatLUFactorSymbolic_MPIDense;
1404   } else if (ftype == MAT_FACTOR_CHOLESKY) {
1405     (*F)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MPIDense;
1406   } else SETERRQ(PETSC_ERR_SUP,"No incomplete factorizations for dense matrices");
1407   (*F)->factor = ftype;
1408   ierr = PetscObjectComposeFunctionDynamic((PetscObject)(*F),"MatFactorGetSolverPackage_C","MatFactorGetSolverPackage_mpidense_plapack",MatFactorGetSolverPackage_mpidense_plapack);CHKERRQ(ierr);
1409   PetscFunctionReturn(0);
1410 }
1411 #endif
1412 
1413 #undef __FUNCT__
1414 #define __FUNCT__ "MatGetFactor_mpidense_petsc"
1415 PetscErrorCode MatGetFactor_mpidense_petsc(Mat A,MatFactorType ftype,Mat *F)
1416 {
1417 #if defined(PETSC_HAVE_PLAPACK)
1418   PetscErrorCode ierr;
1419 #endif
1420 
1421   PetscFunctionBegin;
1422 #if defined(PETSC_HAVE_PLAPACK)
1423   ierr = MatGetFactor_mpidense_plapack(A,ftype,F);CHKERRQ(ierr);
1424 #else
1425   SETERRQ1(PETSC_ERR_SUP,"Matrix format %s uses PLAPACK direct solver. Install PLAPACK",((PetscObject)A)->type_name);
1426 #endif
1427   PetscFunctionReturn(0);
1428 }
1429 
1430 #undef __FUNCT__
1431 #define __FUNCT__ "MatAXPY_MPIDense"
1432 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
1433 {
1434   PetscErrorCode ierr;
1435   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
1436 
1437   PetscFunctionBegin;
1438   ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr);
1439   PetscFunctionReturn(0);
1440 }
1441 
1442 /* -------------------------------------------------------------------*/
1443 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
1444        MatGetRow_MPIDense,
1445        MatRestoreRow_MPIDense,
1446        MatMult_MPIDense,
1447 /* 4*/ MatMultAdd_MPIDense,
1448        MatMultTranspose_MPIDense,
1449        MatMultTransposeAdd_MPIDense,
1450        0,
1451        0,
1452        0,
1453 /*10*/ 0,
1454        0,
1455        0,
1456        0,
1457        MatTranspose_MPIDense,
1458 /*15*/ MatGetInfo_MPIDense,
1459        MatEqual_MPIDense,
1460        MatGetDiagonal_MPIDense,
1461        MatDiagonalScale_MPIDense,
1462        MatNorm_MPIDense,
1463 /*20*/ MatAssemblyBegin_MPIDense,
1464        MatAssemblyEnd_MPIDense,
1465        0,
1466        MatSetOption_MPIDense,
1467        MatZeroEntries_MPIDense,
1468 /*25*/ MatZeroRows_MPIDense,
1469        0,
1470        0,
1471        0,
1472        0,
1473 /*30*/ MatSetUpPreallocation_MPIDense,
1474        0,
1475        0,
1476        MatGetArray_MPIDense,
1477        MatRestoreArray_MPIDense,
1478 /*35*/ MatDuplicate_MPIDense,
1479        0,
1480        0,
1481        0,
1482        0,
1483 /*40*/ MatAXPY_MPIDense,
1484        MatGetSubMatrices_MPIDense,
1485        0,
1486        MatGetValues_MPIDense,
1487        0,
1488 /*45*/ 0,
1489        MatScale_MPIDense,
1490        0,
1491        0,
1492        0,
1493 /*50*/ 0,
1494        0,
1495        0,
1496        0,
1497        0,
1498 /*55*/ 0,
1499        0,
1500        0,
1501        0,
1502        0,
1503 /*60*/ MatGetSubMatrix_MPIDense,
1504        MatDestroy_MPIDense,
1505        MatView_MPIDense,
1506        0,
1507        0,
1508 /*65*/ 0,
1509        0,
1510        0,
1511        0,
1512        0,
1513 /*70*/ 0,
1514        0,
1515        0,
1516        0,
1517        0,
1518 /*75*/ 0,
1519        0,
1520        0,
1521        0,
1522        0,
1523 /*80*/ 0,
1524        0,
1525        0,
1526        0,
1527 /*84*/ MatLoad_MPIDense,
1528        0,
1529        0,
1530        0,
1531        0,
1532        0,
1533 /*90*/
1534 #if defined(PETSC_HAVE_PLAPACK)
1535        MatMatMult_MPIDense_MPIDense,
1536        MatMatMultSymbolic_MPIDense_MPIDense,
1537        MatMatMultNumeric_MPIDense_MPIDense,
1538 #else
1539        0,
1540        0,
1541        0,
1542 #endif
1543        0,
1544 /*95*/ 0,
1545        0,
1546        0,
1547        0};
1548 
1549 EXTERN_C_BEGIN
1550 #undef __FUNCT__
1551 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense"
1552 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1553 {
1554   Mat_MPIDense   *a;
1555   PetscErrorCode ierr;
1556 
1557   PetscFunctionBegin;
1558   mat->preallocated = PETSC_TRUE;
1559   /* Note:  For now, when data is specified above, this assumes the user correctly
1560    allocates the local dense storage space.  We should add error checking. */
1561 
1562   a    = (Mat_MPIDense*)mat->data;
1563   ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1564   ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr);
1565   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
1566   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1567   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1568   PetscFunctionReturn(0);
1569 }
1570 EXTERN_C_END
1571 
1572 /*MC
1573    MAT_SOLVER_PLAPACK = "mpidense" - Parallel LU and Cholesky factorization for MATMPIDENSE matrices
1574 
1575   run config/configure.py with the option --download-plapack
1576 
1577 
1578   Options Database Keys:
1579 . -mat_plapack_nprows <n> - number of rows in processor partition
1580 . -mat_plapack_npcols <n> - number of columns in processor partition
1581 . -mat_plapack_nb <n> - block size of template vector
1582 . -mat_plapack_nb_alg <n> - algorithmic block size
1583 - -mat_plapack_ckerror <n> - error checking flag
1584 
1585 .seealso: MatCreateMPIDense(), MATDENSE, MATSEQDENSE, PCFactorSetSolverPackage(), MatSolverPackage
1586 
1587 M*/
1588 
1589 EXTERN_C_BEGIN
1590 #undef __FUNCT__
1591 #define __FUNCT__ "MatCreate_MPIDense"
1592 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIDense(Mat mat)
1593 {
1594   Mat_MPIDense   *a;
1595   PetscErrorCode ierr;
1596 
1597   PetscFunctionBegin;
1598   ierr              = PetscNewLog(mat,Mat_MPIDense,&a);CHKERRQ(ierr);
1599   mat->data         = (void*)a;
1600   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1601   mat->mapping      = 0;
1602 
1603   mat->insertmode = NOT_SET_VALUES;
1604   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&a->rank);CHKERRQ(ierr);
1605   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&a->size);CHKERRQ(ierr);
1606 
1607   ierr = PetscMapSetBlockSize(mat->rmap,1);CHKERRQ(ierr);
1608   ierr = PetscMapSetBlockSize(mat->cmap,1);CHKERRQ(ierr);
1609   ierr = PetscMapSetUp(mat->rmap);CHKERRQ(ierr);
1610   ierr = PetscMapSetUp(mat->cmap);CHKERRQ(ierr);
1611   a->nvec = mat->cmap->n;
1612 
1613   /* build cache for off array entries formed */
1614   a->donotstash = PETSC_FALSE;
1615   ierr = MatStashCreate_Private(((PetscObject)mat)->comm,1,&mat->stash);CHKERRQ(ierr);
1616 
1617   /* stuff used for matrix vector multiply */
1618   a->lvec        = 0;
1619   a->Mvctx       = 0;
1620   a->roworiented = PETSC_TRUE;
1621 
1622   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
1623                                      "MatGetDiagonalBlock_MPIDense",
1624                                      MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr);
1625   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C",
1626                                      "MatMPIDenseSetPreallocation_MPIDense",
1627                                      MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1628   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",
1629                                      "MatMatMult_MPIAIJ_MPIDense",
1630                                       MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1631   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",
1632                                      "MatMatMultSymbolic_MPIAIJ_MPIDense",
1633                                       MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1634   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",
1635                                      "MatMatMultNumeric_MPIAIJ_MPIDense",
1636                                       MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1637   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetFactor_mpidense_petsc_C",
1638                                      "MatGetFactor_mpidense_petsc",
1639                                       MatGetFactor_mpidense_petsc);CHKERRQ(ierr);
1640 #if defined(PETSC_HAVE_PLAPACK)
1641   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetFactor_mpidense_plapack_C",
1642                                      "MatGetFactor_mpidense_plapack",
1643                                       MatGetFactor_mpidense_plapack);CHKERRQ(ierr);
1644   ierr = PetscPLAPACKInitializePackage(((PetscObject)mat)->comm);CHKERRQ(ierr);
1645 #endif
1646   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1647 
1648   PetscFunctionReturn(0);
1649 }
1650 EXTERN_C_END
1651 
1652 /*MC
1653    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1654 
1655    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1656    and MATMPIDENSE otherwise.
1657 
1658    Options Database Keys:
1659 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1660 
1661   Level: beginner
1662 
1663 
1664 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1665 M*/
1666 
1667 EXTERN_C_BEGIN
1668 #undef __FUNCT__
1669 #define __FUNCT__ "MatCreate_Dense"
1670 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_Dense(Mat A)
1671 {
1672   PetscErrorCode ierr;
1673   PetscMPIInt    size;
1674 
1675   PetscFunctionBegin;
1676   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
1677   if (size == 1) {
1678     ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr);
1679   } else {
1680     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
1681   }
1682   PetscFunctionReturn(0);
1683 }
1684 EXTERN_C_END
1685 
1686 #undef __FUNCT__
1687 #define __FUNCT__ "MatMPIDenseSetPreallocation"
1688 /*@C
1689    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1690 
1691    Not collective
1692 
1693    Input Parameters:
1694 .  A - the matrix
1695 -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
1696    to control all matrix memory allocation.
1697 
1698    Notes:
1699    The dense format is fully compatible with standard Fortran 77
1700    storage by columns.
1701 
1702    The data input variable is intended primarily for Fortran programmers
1703    who wish to allocate their own matrix memory space.  Most users should
1704    set data=PETSC_NULL.
1705 
1706    Level: intermediate
1707 
1708 .keywords: matrix,dense, parallel
1709 
1710 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1711 @*/
1712 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data)
1713 {
1714   PetscErrorCode ierr,(*f)(Mat,PetscScalar *);
1715 
1716   PetscFunctionBegin;
1717   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1718   if (f) {
1719     ierr = (*f)(mat,data);CHKERRQ(ierr);
1720   }
1721   PetscFunctionReturn(0);
1722 }
1723 
1724 #undef __FUNCT__
1725 #define __FUNCT__ "MatCreateMPIDense"
1726 /*@C
1727    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
1728 
1729    Collective on MPI_Comm
1730 
1731    Input Parameters:
1732 +  comm - MPI communicator
1733 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1734 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1735 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1736 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1737 -  data - optional location of matrix data.  Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1738    to control all matrix memory allocation.
1739 
1740    Output Parameter:
1741 .  A - the matrix
1742 
1743    Notes:
1744    The dense format is fully compatible with standard Fortran 77
1745    storage by columns.
1746 
1747    The data input variable is intended primarily for Fortran programmers
1748    who wish to allocate their own matrix memory space.  Most users should
1749    set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users).
1750 
1751    The user MUST specify either the local or global matrix dimensions
1752    (possibly both).
1753 
1754    Level: intermediate
1755 
1756 .keywords: matrix,dense, parallel
1757 
1758 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1759 @*/
1760 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1761 {
1762   PetscErrorCode ierr;
1763   PetscMPIInt    size;
1764 
1765   PetscFunctionBegin;
1766   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1767   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1768   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1769   if (size > 1) {
1770     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1771     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1772   } else {
1773     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1774     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1775   }
1776   PetscFunctionReturn(0);
1777 }
1778 
1779 #undef __FUNCT__
1780 #define __FUNCT__ "MatDuplicate_MPIDense"
1781 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1782 {
1783   Mat            mat;
1784   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
1785   PetscErrorCode ierr;
1786 
1787   PetscFunctionBegin;
1788   *newmat       = 0;
1789   ierr = MatCreate(((PetscObject)A)->comm,&mat);CHKERRQ(ierr);
1790   ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1791   ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1792   a                 = (Mat_MPIDense*)mat->data;
1793   ierr              = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1794   mat->factor       = A->factor;
1795   mat->assembled    = PETSC_TRUE;
1796   mat->preallocated = PETSC_TRUE;
1797 
1798   mat->rmap->rstart     = A->rmap->rstart;
1799   mat->rmap->rend       = A->rmap->rend;
1800   a->size              = oldmat->size;
1801   a->rank              = oldmat->rank;
1802   mat->insertmode      = NOT_SET_VALUES;
1803   a->nvec              = oldmat->nvec;
1804   a->donotstash        = oldmat->donotstash;
1805 
1806   ierr = PetscMemcpy(mat->rmap->range,A->rmap->range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
1807   ierr = PetscMemcpy(mat->cmap->range,A->cmap->range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
1808   ierr = MatStashCreate_Private(((PetscObject)A)->comm,1,&mat->stash);CHKERRQ(ierr);
1809 
1810   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1811   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1812   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1813 
1814   *newmat = mat;
1815   PetscFunctionReturn(0);
1816 }
1817 
1818 #include "petscsys.h"
1819 
1820 #undef __FUNCT__
1821 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile"
1822 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N, const MatType type,Mat *newmat)
1823 {
1824   PetscErrorCode ierr;
1825   PetscMPIInt    rank,size;
1826   PetscInt       *rowners,i,m,nz,j;
1827   PetscScalar    *array,*vals,*vals_ptr;
1828   MPI_Status     status;
1829 
1830   PetscFunctionBegin;
1831   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1832   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1833 
1834   /* determine ownership of all rows */
1835   m          = M/size + ((M % size) > rank);
1836   ierr       = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1837   ierr       = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
1838   rowners[0] = 0;
1839   for (i=2; i<=size; i++) {
1840     rowners[i] += rowners[i-1];
1841   }
1842 
1843   ierr = MatCreate(comm,newmat);CHKERRQ(ierr);
1844   ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1845   ierr = MatSetType(*newmat,type);CHKERRQ(ierr);
1846   ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr);
1847   ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr);
1848 
1849   if (!rank) {
1850     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1851 
1852     /* read in my part of the matrix numerical values  */
1853     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1854 
1855     /* insert into matrix-by row (this is why cannot directly read into array */
1856     vals_ptr = vals;
1857     for (i=0; i<m; i++) {
1858       for (j=0; j<N; j++) {
1859         array[i + j*m] = *vals_ptr++;
1860       }
1861     }
1862 
1863     /* read in other processors and ship out */
1864     for (i=1; i<size; i++) {
1865       nz   = (rowners[i+1] - rowners[i])*N;
1866       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1867       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(*newmat))->tag,comm);CHKERRQ(ierr);
1868     }
1869   } else {
1870     /* receive numeric values */
1871     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1872 
1873     /* receive message of values*/
1874     ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(*newmat))->tag,comm,&status);CHKERRQ(ierr);
1875 
1876     /* insert into matrix-by row (this is why cannot directly read into array */
1877     vals_ptr = vals;
1878     for (i=0; i<m; i++) {
1879       for (j=0; j<N; j++) {
1880         array[i + j*m] = *vals_ptr++;
1881       }
1882     }
1883   }
1884   ierr = PetscFree(rowners);CHKERRQ(ierr);
1885   ierr = PetscFree(vals);CHKERRQ(ierr);
1886   ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1887   ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1888   PetscFunctionReturn(0);
1889 }
1890 
1891 #undef __FUNCT__
1892 #define __FUNCT__ "MatLoad_MPIDense"
1893 PetscErrorCode MatLoad_MPIDense(PetscViewer viewer,const MatType type,Mat *newmat)
1894 {
1895   Mat            A;
1896   PetscScalar    *vals,*svals;
1897   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1898   MPI_Status     status;
1899   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz;
1900   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1901   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1902   PetscInt       i,nz,j,rstart,rend;
1903   int            fd;
1904   PetscErrorCode ierr;
1905 
1906   PetscFunctionBegin;
1907   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1908   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1909   if (!rank) {
1910     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1911     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1912     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1913   }
1914 
1915   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1916   M = header[1]; N = header[2]; nz = header[3];
1917 
1918   /*
1919        Handle case where matrix is stored on disk as a dense matrix
1920   */
1921   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1922     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,type,newmat);CHKERRQ(ierr);
1923     PetscFunctionReturn(0);
1924   }
1925 
1926   /* determine ownership of all rows */
1927   m          = PetscMPIIntCast(M/size + ((M % size) > rank));
1928   ierr       = PetscMalloc((size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr);
1929   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1930   rowners[0] = 0;
1931   for (i=2; i<=size; i++) {
1932     rowners[i] += rowners[i-1];
1933   }
1934   rstart = rowners[rank];
1935   rend   = rowners[rank+1];
1936 
1937   /* distribute row lengths to all processors */
1938   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&ourlens);CHKERRQ(ierr);
1939   offlens = ourlens + (rend-rstart);
1940   if (!rank) {
1941     ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
1942     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1943     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
1944     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1945     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1946     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1947   } else {
1948     ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1949   }
1950 
1951   if (!rank) {
1952     /* calculate the number of nonzeros on each processor */
1953     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
1954     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1955     for (i=0; i<size; i++) {
1956       for (j=rowners[i]; j< rowners[i+1]; j++) {
1957         procsnz[i] += rowlengths[j];
1958       }
1959     }
1960     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1961 
1962     /* determine max buffer needed and allocate it */
1963     maxnz = 0;
1964     for (i=0; i<size; i++) {
1965       maxnz = PetscMax(maxnz,procsnz[i]);
1966     }
1967     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1968 
1969     /* read in my part of the matrix column indices  */
1970     nz = procsnz[0];
1971     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1972     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1973 
1974     /* read in every one elses and ship off */
1975     for (i=1; i<size; i++) {
1976       nz   = procsnz[i];
1977       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1978       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1979     }
1980     ierr = PetscFree(cols);CHKERRQ(ierr);
1981   } else {
1982     /* determine buffer space needed for message */
1983     nz = 0;
1984     for (i=0; i<m; i++) {
1985       nz += ourlens[i];
1986     }
1987     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1988 
1989     /* receive message of column indices*/
1990     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1991     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1992     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1993   }
1994 
1995   /* loop over local rows, determining number of off diagonal entries */
1996   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
1997   jj = 0;
1998   for (i=0; i<m; i++) {
1999     for (j=0; j<ourlens[i]; j++) {
2000       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
2001       jj++;
2002     }
2003   }
2004 
2005   /* create our matrix */
2006   for (i=0; i<m; i++) {
2007     ourlens[i] -= offlens[i];
2008   }
2009   ierr = MatCreate(comm,newmat);CHKERRQ(ierr);
2010   ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
2011   ierr = MatSetType(*newmat,type);CHKERRQ(ierr);
2012   ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr);
2013   A = *newmat;
2014   for (i=0; i<m; i++) {
2015     ourlens[i] += offlens[i];
2016   }
2017 
2018   if (!rank) {
2019     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2020 
2021     /* read in my part of the matrix numerical values  */
2022     nz = procsnz[0];
2023     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2024 
2025     /* insert into matrix */
2026     jj      = rstart;
2027     smycols = mycols;
2028     svals   = vals;
2029     for (i=0; i<m; i++) {
2030       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2031       smycols += ourlens[i];
2032       svals   += ourlens[i];
2033       jj++;
2034     }
2035 
2036     /* read in other processors and ship out */
2037     for (i=1; i<size; i++) {
2038       nz   = procsnz[i];
2039       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2040       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
2041     }
2042     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2043   } else {
2044     /* receive numeric values */
2045     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2046 
2047     /* receive message of values*/
2048     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
2049     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2050     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2051 
2052     /* insert into matrix */
2053     jj      = rstart;
2054     smycols = mycols;
2055     svals   = vals;
2056     for (i=0; i<m; i++) {
2057       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2058       smycols += ourlens[i];
2059       svals   += ourlens[i];
2060       jj++;
2061     }
2062   }
2063   ierr = PetscFree(ourlens);CHKERRQ(ierr);
2064   ierr = PetscFree(vals);CHKERRQ(ierr);
2065   ierr = PetscFree(mycols);CHKERRQ(ierr);
2066   ierr = PetscFree(rowners);CHKERRQ(ierr);
2067 
2068   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2069   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2070   PetscFunctionReturn(0);
2071 }
2072 
2073 #undef __FUNCT__
2074 #define __FUNCT__ "MatEqual_MPIDense"
2075 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscTruth *flag)
2076 {
2077   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
2078   Mat            a,b;
2079   PetscTruth     flg;
2080   PetscErrorCode ierr;
2081 
2082   PetscFunctionBegin;
2083   a = matA->A;
2084   b = matB->A;
2085   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
2086   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
2087   PetscFunctionReturn(0);
2088 }
2089 
2090 #if defined(PETSC_HAVE_PLAPACK)
2091 
2092 #undef __FUNCT__
2093 #define __FUNCT__ "PetscPLAPACKFinalizePackage"
2094 /*@C
2095   PetscPLAPACKFinalizePackage - This function destroys everything in the Petsc interface to PLAPACK.
2096   Level: developer
2097 
2098 .keywords: Petsc, destroy, package, PLAPACK
2099 .seealso: PetscFinalize()
2100 @*/
2101 PetscErrorCode PETSC_DLLEXPORT PetscPLAPACKFinalizePackage(void)
2102 {
2103   PetscErrorCode ierr;
2104 
2105   PetscFunctionBegin;
2106   ierr = PLA_Finalize();CHKERRQ(ierr);
2107   PetscFunctionReturn(0);
2108 }
2109 
2110 #undef __FUNCT__
2111 #define __FUNCT__ "PetscPLAPACKInitializePackage"
2112 /*@C
2113   PetscPLAPACKInitializePackage - This function initializes everything in the Petsc interface to PLAPACK. It is
2114   called from MatCreate_MPIDense() the first time an MPI dense matrix is called.
2115 
2116   Input Parameter:
2117 .   comm - the communicator the matrix lives on
2118 
2119   Level: developer
2120 
2121    Notes: PLAPACK does not have a good fit with MPI communicators; all (parallel) PLAPACK objects have to live in the
2122   same communicator (because there is some global state that is initialized and used for all matrices). In addition if
2123   PLAPACK is initialized (that is the initial matrices created) are on subcommunicators of MPI_COMM_WORLD, these subcommunicators
2124   cannot overlap.
2125 
2126 .keywords: Petsc, initialize, package, PLAPACK
2127 .seealso: PetscInitializePackage(), PetscInitialize()
2128 @*/
2129 PetscErrorCode PETSC_DLLEXPORT PetscPLAPACKInitializePackage(MPI_Comm comm)
2130 {
2131   PetscMPIInt    size;
2132   PetscErrorCode ierr;
2133 
2134   PetscFunctionBegin;
2135   if (!PLA_Initialized(PETSC_NULL)) {
2136 
2137     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2138     Plapack_nprows = 1;
2139     Plapack_npcols = size;
2140 
2141     ierr = PetscOptionsBegin(comm,PETSC_NULL,"PLAPACK Options","Mat");CHKERRQ(ierr);
2142       ierr = PetscOptionsInt("-mat_plapack_nprows","row dimension of 2D processor mesh","None",Plapack_nprows,&Plapack_nprows,PETSC_NULL);CHKERRQ(ierr);
2143       ierr = PetscOptionsInt("-mat_plapack_npcols","column dimension of 2D processor mesh","None",Plapack_npcols,&Plapack_npcols,PETSC_NULL);CHKERRQ(ierr);
2144 #if defined(PETSC_USE_DEBUG)
2145       Plapack_ierror = 3;
2146 #else
2147       Plapack_ierror = 0;
2148 #endif
2149       ierr = PetscOptionsInt("-mat_plapack_ckerror","error checking flag","None",Plapack_ierror,&Plapack_ierror,PETSC_NULL);CHKERRQ(ierr);
2150       if (Plapack_ierror){
2151 	ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_TRUE,PETSC_TRUE,PETSC_FALSE );CHKERRQ(ierr);
2152       } else {
2153 	ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_FALSE,PETSC_FALSE,PETSC_FALSE );CHKERRQ(ierr);
2154       }
2155 
2156       Plapack_nb_alg = 0;
2157       ierr = PetscOptionsInt("-mat_plapack_nb_alg","algorithmic block size","None",Plapack_nb_alg,&Plapack_nb_alg,PETSC_NULL);CHKERRQ(ierr);
2158       if (Plapack_nb_alg) {
2159         ierr = pla_Environ_set_nb_alg (PLA_OP_ALL_ALG,Plapack_nb_alg);CHKERRQ(ierr);
2160       }
2161     PetscOptionsEnd();
2162 
2163     ierr = PLA_Comm_1D_to_2D(comm,Plapack_nprows,Plapack_npcols,&Plapack_comm_2d);CHKERRQ(ierr);
2164     ierr = PLA_Init(Plapack_comm_2d);CHKERRQ(ierr);
2165     ierr = PetscRegisterFinalize(PetscPLAPACKFinalizePackage);CHKERRQ(ierr);
2166   }
2167   PetscFunctionReturn(0);
2168 }
2169 
2170 #endif
2171