xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision dbf0e21dc9b62c19f2224f20451fa79066c7e006)
1 #define PETSCMAT_DLL
2 
3 /*
4    Basic functions for basic parallel dense matrices.
5 */
6 
7 
8 #include "../src/mat/impls/dense/mpi/mpidense.h"    /*I   "petscmat.h"  I*/
9 #if defined(PETSC_HAVE_PLAPACK)
10 static PetscMPIInt Plapack_nprows,Plapack_npcols,Plapack_ierror,Plapack_nb_alg;
11 static MPI_Comm Plapack_comm_2d;
12 #endif
13 
14 #undef __FUNCT__
15 #define __FUNCT__ "MatDenseGetLocalMatrix"
16 /*@
17 
18       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
19               matrix that represents the operator. For sequential matrices it returns itself.
20 
21     Input Parameter:
22 .      A - the Seq or MPI dense matrix
23 
24     Output Parameter:
25 .      B - the inner matrix
26 
27     Level: intermediate
28 
29 @*/
30 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
31 {
32   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
33   PetscErrorCode ierr;
34   PetscTruth     flg;
35 
36   PetscFunctionBegin;
37   ierr = PetscTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
38   if (flg) {
39     *B = mat->A;
40   } else {
41     *B = A;
42   }
43   PetscFunctionReturn(0);
44 }
45 
46 #undef __FUNCT__
47 #define __FUNCT__ "MatGetRow_MPIDense"
48 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
49 {
50   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
51   PetscErrorCode ierr;
52   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
53 
54   PetscFunctionBegin;
55   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows")
56   lrow = row - rstart;
57   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr);
58   PetscFunctionReturn(0);
59 }
60 
61 #undef __FUNCT__
62 #define __FUNCT__ "MatRestoreRow_MPIDense"
63 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
64 {
65   PetscErrorCode ierr;
66 
67   PetscFunctionBegin;
68   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
69   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
70   PetscFunctionReturn(0);
71 }
72 
73 EXTERN_C_BEGIN
74 #undef __FUNCT__
75 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense"
76 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B)
77 {
78   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
79   PetscErrorCode ierr;
80   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
81   PetscScalar    *array;
82   MPI_Comm       comm;
83 
84   PetscFunctionBegin;
85   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported.");
86 
87   /* The reuse aspect is not implemented efficiently */
88   if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);}
89 
90   ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
91   ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr);
92   ierr = MatCreate(comm,B);CHKERRQ(ierr);
93   ierr = MatSetSizes(*B,m,m,m,m);CHKERRQ(ierr);
94   ierr = MatSetType(*B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
95   ierr = MatSeqDenseSetPreallocation(*B,array+m*rstart);CHKERRQ(ierr);
96   ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr);
97   ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
98   ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
99 
100   *iscopy = PETSC_TRUE;
101   PetscFunctionReturn(0);
102 }
103 EXTERN_C_END
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatSetValues_MPIDense"
107 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
108 {
109   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
110   PetscErrorCode ierr;
111   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
112   PetscTruth     roworiented = A->roworiented;
113 
114   PetscFunctionBegin;
115   for (i=0; i<m; i++) {
116     if (idxm[i] < 0) continue;
117     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
118     if (idxm[i] >= rstart && idxm[i] < rend) {
119       row = idxm[i] - rstart;
120       if (roworiented) {
121         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
122       } else {
123         for (j=0; j<n; j++) {
124           if (idxn[j] < 0) continue;
125           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
126           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
127         }
128       }
129     } else {
130       if (!A->donotstash) {
131         if (roworiented) {
132           ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr);
133         } else {
134           ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr);
135         }
136       }
137     }
138   }
139   PetscFunctionReturn(0);
140 }
141 
142 #undef __FUNCT__
143 #define __FUNCT__ "MatGetValues_MPIDense"
144 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
145 {
146   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
147   PetscErrorCode ierr;
148   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
149 
150   PetscFunctionBegin;
151   for (i=0; i<m; i++) {
152     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
153     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
154     if (idxm[i] >= rstart && idxm[i] < rend) {
155       row = idxm[i] - rstart;
156       for (j=0; j<n; j++) {
157         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
158         if (idxn[j] >= mat->cmap->N) {
159           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
160         }
161         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
162       }
163     } else {
164       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
165     }
166   }
167   PetscFunctionReturn(0);
168 }
169 
170 #undef __FUNCT__
171 #define __FUNCT__ "MatGetArray_MPIDense"
172 PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[])
173 {
174   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
175   PetscErrorCode ierr;
176 
177   PetscFunctionBegin;
178   ierr = MatGetArray(a->A,array);CHKERRQ(ierr);
179   PetscFunctionReturn(0);
180 }
181 
182 #undef __FUNCT__
183 #define __FUNCT__ "MatGetSubMatrix_MPIDense"
184 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B)
185 {
186   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data,*newmatd;
187   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
188   PetscErrorCode ierr;
189   PetscInt       i,j,rstart,rend,nrows,ncols,nlrows,nlcols;
190   const PetscInt *irow,*icol;
191   PetscScalar    *av,*bv,*v = lmat->v;
192   Mat            newmat;
193 
194   PetscFunctionBegin;
195   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
196   ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr);
197   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
198   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
199 
200   /* No parallel redistribution currently supported! Should really check each index set
201      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
202      original matrix! */
203 
204   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
205   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
206 
207   /* Check submatrix call */
208   if (scall == MAT_REUSE_MATRIX) {
209     /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
210     /* Really need to test rows and column sizes! */
211     newmat = *B;
212   } else {
213     /* Create and fill new matrix */
214     ierr = MatCreate(((PetscObject)A)->comm,&newmat);CHKERRQ(ierr);
215     ierr = MatSetSizes(newmat,nrows,cs,PETSC_DECIDE,ncols);CHKERRQ(ierr);
216     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
217     ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr);
218   }
219 
220   /* Now extract the data pointers and do the copy, column at a time */
221   newmatd = (Mat_MPIDense*)newmat->data;
222   bv      = ((Mat_SeqDense *)newmatd->A->data)->v;
223 
224   for (i=0; i<ncols; i++) {
225     av = v + ((Mat_SeqDense *)mat->A->data)->lda*icol[i];
226     for (j=0; j<nrows; j++) {
227       *bv++ = av[irow[j] - rstart];
228     }
229   }
230 
231   /* Assemble the matrices so that the correct flags are set */
232   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
233   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
234 
235   /* Free work space */
236   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
237   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
238   *B = newmat;
239   PetscFunctionReturn(0);
240 }
241 
242 #undef __FUNCT__
243 #define __FUNCT__ "MatRestoreArray_MPIDense"
244 PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[])
245 {
246   PetscFunctionBegin;
247   PetscFunctionReturn(0);
248 }
249 
250 #undef __FUNCT__
251 #define __FUNCT__ "MatAssemblyBegin_MPIDense"
252 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
253 {
254   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
255   MPI_Comm       comm = ((PetscObject)mat)->comm;
256   PetscErrorCode ierr;
257   PetscInt       nstash,reallocs;
258   InsertMode     addv;
259 
260   PetscFunctionBegin;
261   /* make sure all processors are either in INSERTMODE or ADDMODE */
262   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
263   if (addv == (ADD_VALUES|INSERT_VALUES)) {
264     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
265   }
266   mat->insertmode = addv; /* in case this processor had no cache */
267 
268   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
269   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
270   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
271   PetscFunctionReturn(0);
272 }
273 
274 #undef __FUNCT__
275 #define __FUNCT__ "MatAssemblyEnd_MPIDense"
276 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
277 {
278   Mat_MPIDense    *mdn=(Mat_MPIDense*)mat->data;
279   PetscErrorCode  ierr;
280   PetscInt        i,*row,*col,flg,j,rstart,ncols;
281   PetscMPIInt     n;
282   PetscScalar     *val;
283   InsertMode      addv=mat->insertmode;
284 
285   PetscFunctionBegin;
286   /*  wait on receives */
287   while (1) {
288     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
289     if (!flg) break;
290 
291     for (i=0; i<n;) {
292       /* Now identify the consecutive vals belonging to the same row */
293       for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
294       if (j < n) ncols = j-i;
295       else       ncols = n-i;
296       /* Now assemble all these values with a single function call */
297       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
298       i = j;
299     }
300   }
301   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
302 
303   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
304   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
305 
306   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
307     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
308   }
309   PetscFunctionReturn(0);
310 }
311 
312 #undef __FUNCT__
313 #define __FUNCT__ "MatZeroEntries_MPIDense"
314 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
315 {
316   PetscErrorCode ierr;
317   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
318 
319   PetscFunctionBegin;
320   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
321   PetscFunctionReturn(0);
322 }
323 
324 /* the code does not do the diagonal entries correctly unless the
325    matrix is square and the column and row owerships are identical.
326    This is a BUG. The only way to fix it seems to be to access
327    mdn->A and mdn->B directly and not through the MatZeroRows()
328    routine.
329 */
330 #undef __FUNCT__
331 #define __FUNCT__ "MatZeroRows_MPIDense"
332 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
333 {
334   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
335   PetscErrorCode ierr;
336   PetscInt       i,*owners = A->rmap->range;
337   PetscInt       *nprocs,j,idx,nsends;
338   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
339   PetscInt       *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source;
340   PetscInt       *lens,*lrows,*values;
341   PetscMPIInt    n,imdex,rank = l->rank,size = l->size;
342   MPI_Comm       comm = ((PetscObject)A)->comm;
343   MPI_Request    *send_waits,*recv_waits;
344   MPI_Status     recv_status,*send_status;
345   PetscTruth     found;
346 
347   PetscFunctionBegin;
348   /*  first count number of contributors to each processor */
349   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
350   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
351   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
352   for (i=0; i<N; i++) {
353     idx = rows[i];
354     found = PETSC_FALSE;
355     for (j=0; j<size; j++) {
356       if (idx >= owners[j] && idx < owners[j+1]) {
357         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
358       }
359     }
360     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
361   }
362   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
363 
364   /* inform other processors of number of messages and max length*/
365   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
366 
367   /* post receives:   */
368   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
369   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
370   for (i=0; i<nrecvs; i++) {
371     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
372   }
373 
374   /* do sends:
375       1) starts[i] gives the starting index in svalues for stuff going to
376          the ith processor
377   */
378   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
379   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
380   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
381   starts[0]  = 0;
382   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
383   for (i=0; i<N; i++) {
384     svalues[starts[owner[i]]++] = rows[i];
385   }
386 
387   starts[0] = 0;
388   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
389   count = 0;
390   for (i=0; i<size; i++) {
391     if (nprocs[2*i+1]) {
392       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
393     }
394   }
395   ierr = PetscFree(starts);CHKERRQ(ierr);
396 
397   base = owners[rank];
398 
399   /*  wait on receives */
400   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
401   source = lens + nrecvs;
402   count  = nrecvs; slen = 0;
403   while (count) {
404     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
405     /* unpack receives into our local space */
406     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
407     source[imdex]  = recv_status.MPI_SOURCE;
408     lens[imdex]    = n;
409     slen += n;
410     count--;
411   }
412   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
413 
414   /* move the data into the send scatter */
415   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
416   count = 0;
417   for (i=0; i<nrecvs; i++) {
418     values = rvalues + i*nmax;
419     for (j=0; j<lens[i]; j++) {
420       lrows[count++] = values[j] - base;
421     }
422   }
423   ierr = PetscFree(rvalues);CHKERRQ(ierr);
424   ierr = PetscFree(lens);CHKERRQ(ierr);
425   ierr = PetscFree(owner);CHKERRQ(ierr);
426   ierr = PetscFree(nprocs);CHKERRQ(ierr);
427 
428   /* actually zap the local rows */
429   ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
430   ierr = PetscFree(lrows);CHKERRQ(ierr);
431 
432   /* wait on sends */
433   if (nsends) {
434     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
435     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
436     ierr = PetscFree(send_status);CHKERRQ(ierr);
437   }
438   ierr = PetscFree(send_waits);CHKERRQ(ierr);
439   ierr = PetscFree(svalues);CHKERRQ(ierr);
440 
441   PetscFunctionReturn(0);
442 }
443 
444 #undef __FUNCT__
445 #define __FUNCT__ "MatMult_MPIDense"
446 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
447 {
448   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
449   PetscErrorCode ierr;
450 
451   PetscFunctionBegin;
452   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
453   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
454   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
455   PetscFunctionReturn(0);
456 }
457 
458 #undef __FUNCT__
459 #define __FUNCT__ "MatMultAdd_MPIDense"
460 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
461 {
462   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
463   PetscErrorCode ierr;
464 
465   PetscFunctionBegin;
466   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
467   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
468   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
469   PetscFunctionReturn(0);
470 }
471 
472 #undef __FUNCT__
473 #define __FUNCT__ "MatMultTranspose_MPIDense"
474 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
475 {
476   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
477   PetscErrorCode ierr;
478   PetscScalar    zero = 0.0;
479 
480   PetscFunctionBegin;
481   ierr = VecSet(yy,zero);CHKERRQ(ierr);
482   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
483   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
484   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
485   PetscFunctionReturn(0);
486 }
487 
488 #undef __FUNCT__
489 #define __FUNCT__ "MatMultTransposeAdd_MPIDense"
490 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
491 {
492   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
493   PetscErrorCode ierr;
494 
495   PetscFunctionBegin;
496   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
497   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
498   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
499   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
500   PetscFunctionReturn(0);
501 }
502 
503 #undef __FUNCT__
504 #define __FUNCT__ "MatGetDiagonal_MPIDense"
505 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
506 {
507   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
508   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
509   PetscErrorCode ierr;
510   PetscInt       len,i,n,m = A->rmap->n,radd;
511   PetscScalar    *x,zero = 0.0;
512 
513   PetscFunctionBegin;
514   ierr = VecSet(v,zero);CHKERRQ(ierr);
515   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
516   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
517   if (n != A->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
518   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
519   radd = A->rmap->rstart*m;
520   for (i=0; i<len; i++) {
521     x[i] = aloc->v[radd + i*m + i];
522   }
523   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
524   PetscFunctionReturn(0);
525 }
526 
527 #undef __FUNCT__
528 #define __FUNCT__ "MatDestroy_MPIDense"
529 PetscErrorCode MatDestroy_MPIDense(Mat mat)
530 {
531   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
532   PetscErrorCode ierr;
533 #if defined(PETSC_HAVE_PLAPACK)
534   Mat_Plapack   *lu=(Mat_Plapack*)(mat->spptr);
535 #endif
536 
537   PetscFunctionBegin;
538 
539 #if defined(PETSC_USE_LOG)
540   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
541 #endif
542   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
543   ierr = MatDestroy(mdn->A);CHKERRQ(ierr);
544   if (mdn->lvec)   {ierr = VecDestroy(mdn->lvec);CHKERRQ(ierr);}
545   if (mdn->Mvctx)  {ierr = VecScatterDestroy(mdn->Mvctx);CHKERRQ(ierr);}
546 #if defined(PETSC_HAVE_PLAPACK)
547   if (lu) {
548     ierr = PLA_Obj_free(&lu->A);CHKERRQ(ierr);
549     ierr = PLA_Obj_free (&lu->pivots);CHKERRQ(ierr);
550     ierr = PLA_Temp_free(&lu->templ);CHKERRQ(ierr);
551 
552     if (lu->is_pla) {
553       ierr = ISDestroy(lu->is_pla);CHKERRQ(ierr);
554       ierr = ISDestroy(lu->is_petsc);CHKERRQ(ierr);
555       ierr = VecScatterDestroy(lu->ctx);CHKERRQ(ierr);
556     }
557   }
558 #endif
559 
560   ierr = PetscFree(mdn);CHKERRQ(ierr);
561   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
562   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
563   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
564   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
565   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
566   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
567   PetscFunctionReturn(0);
568 }
569 
570 #undef __FUNCT__
571 #define __FUNCT__ "MatView_MPIDense_Binary"
572 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
573 {
574   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
575   PetscErrorCode    ierr;
576   PetscViewerFormat format;
577   int               fd;
578   PetscInt          header[4],mmax,N = mat->cmap->N,i,j,m,k;
579   PetscMPIInt       rank,tag  = ((PetscObject)viewer)->tag,size;
580   PetscScalar       *work,*v,*vv;
581   Mat_SeqDense      *a = (Mat_SeqDense*)mdn->A->data;
582   MPI_Status        status;
583 
584   PetscFunctionBegin;
585   if (mdn->size == 1) {
586     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
587   } else {
588     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
589     ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
590     ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
591 
592     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
593     if (format == PETSC_VIEWER_NATIVE) {
594 
595       if (!rank) {
596         /* store the matrix as a dense matrix */
597         header[0] = MAT_FILE_COOKIE;
598         header[1] = mat->rmap->N;
599         header[2] = N;
600         header[3] = MATRIX_BINARY_FORMAT_DENSE;
601         ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
602 
603         /* get largest work array needed for transposing array */
604         mmax = mat->rmap->n;
605         for (i=1; i<size; i++) {
606           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
607         }
608         ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&work);CHKERRQ(ierr);
609 
610         /* write out local array, by rows */
611         m    = mat->rmap->n;
612         v    = a->v;
613         for (j=0; j<N; j++) {
614           for (i=0; i<m; i++) {
615             work[j + i*N] = *v++;
616           }
617         }
618         ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
619         /* get largest work array to receive messages from other processes, excludes process zero */
620         mmax = 0;
621         for (i=1; i<size; i++) {
622           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
623         }
624         ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&vv);CHKERRQ(ierr);
625         for(k = 1; k < size; k++) {
626           v    = vv;
627           m    = mat->rmap->range[k+1] - mat->rmap->range[k];
628           ierr = MPI_Recv(v,m*N,MPIU_SCALAR,k,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
629 
630           for(j = 0; j < N; j++) {
631             for(i = 0; i < m; i++) {
632               work[j + i*N] = *v++;
633             }
634           }
635           ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
636         }
637         ierr = PetscFree(work);CHKERRQ(ierr);
638         ierr = PetscFree(vv);CHKERRQ(ierr);
639       } else {
640         ierr = MPI_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
641       }
642     } else {
643       SETERRQ(PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerSetFormat(viewer,PETSC_VIEWER_NATIVE");
644     }
645   }
646   PetscFunctionReturn(0);
647 }
648 
649 #undef __FUNCT__
650 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket"
651 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
652 {
653   Mat_MPIDense          *mdn = (Mat_MPIDense*)mat->data;
654   PetscErrorCode        ierr;
655   PetscMPIInt           size = mdn->size,rank = mdn->rank;
656   const PetscViewerType vtype;
657   PetscTruth            iascii,isdraw;
658   PetscViewer           sviewer;
659   PetscViewerFormat     format;
660 #if defined(PETSC_HAVE_PLAPACK)
661   Mat_Plapack           *lu;
662 #endif
663 
664   PetscFunctionBegin;
665   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
666   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
667   if (iascii) {
668     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
669     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
670     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
671       MatInfo info;
672       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
673       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,
674                    (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
675       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
676 #if defined(PETSC_HAVE_PLAPACK)
677       ierr = PetscViewerASCIIPrintf(viewer,"PLAPACK run parameters:\n");CHKERRQ(ierr);
678       ierr = PetscViewerASCIIPrintf(viewer,"  Processor mesh: nprows %d, npcols %d\n",Plapack_nprows, Plapack_npcols);CHKERRQ(ierr);
679       ierr = PetscViewerASCIIPrintf(viewer,"  Error checking: %d\n",Plapack_ierror);CHKERRQ(ierr);
680       ierr = PetscViewerASCIIPrintf(viewer,"  Algorithmic block size: %d\n",Plapack_nb_alg);CHKERRQ(ierr);
681       if (mat->factor){
682         lu=(Mat_Plapack*)(mat->spptr);
683         ierr = PetscViewerASCIIPrintf(viewer,"  Distr. block size nb: %d \n",lu->nb);CHKERRQ(ierr);
684       }
685 #else
686       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
687 #endif
688       PetscFunctionReturn(0);
689     } else if (format == PETSC_VIEWER_ASCII_INFO) {
690       PetscFunctionReturn(0);
691     }
692   } else if (isdraw) {
693     PetscDraw  draw;
694     PetscTruth isnull;
695 
696     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
697     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
698     if (isnull) PetscFunctionReturn(0);
699   }
700 
701   if (size == 1) {
702     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
703   } else {
704     /* assemble the entire matrix onto first processor. */
705     Mat         A;
706     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
707     PetscInt    *cols;
708     PetscScalar *vals;
709 
710     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
711     if (!rank) {
712       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
713     } else {
714       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
715     }
716     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
717     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
718     ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL);
719     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
720 
721     /* Copy the matrix ... This isn't the most efficient means,
722        but it's quick for now */
723     A->insertmode = INSERT_VALUES;
724     row = mat->rmap->rstart; m = mdn->A->rmap->n;
725     for (i=0; i<m; i++) {
726       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
727       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
728       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
729       row++;
730     }
731 
732     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
733     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
734     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
735     if (!rank) {
736       ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
737     }
738     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
739     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
740     ierr = MatDestroy(A);CHKERRQ(ierr);
741   }
742   PetscFunctionReturn(0);
743 }
744 
745 #undef __FUNCT__
746 #define __FUNCT__ "MatView_MPIDense"
747 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
748 {
749   PetscErrorCode ierr;
750   PetscTruth     iascii,isbinary,isdraw,issocket;
751 
752   PetscFunctionBegin;
753 
754   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
755   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
756   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
757   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
758 
759   if (iascii || issocket || isdraw) {
760     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
761   } else if (isbinary) {
762     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
763   } else {
764     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name);
765   }
766   PetscFunctionReturn(0);
767 }
768 
769 #undef __FUNCT__
770 #define __FUNCT__ "MatGetInfo_MPIDense"
771 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
772 {
773   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
774   Mat            mdn = mat->A;
775   PetscErrorCode ierr;
776   PetscReal      isend[5],irecv[5];
777 
778   PetscFunctionBegin;
779   info->block_size     = 1.0;
780   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
781   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
782   isend[3] = info->memory;  isend[4] = info->mallocs;
783   if (flag == MAT_LOCAL) {
784     info->nz_used      = isend[0];
785     info->nz_allocated = isend[1];
786     info->nz_unneeded  = isend[2];
787     info->memory       = isend[3];
788     info->mallocs      = isend[4];
789   } else if (flag == MAT_GLOBAL_MAX) {
790     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr);
791     info->nz_used      = irecv[0];
792     info->nz_allocated = irecv[1];
793     info->nz_unneeded  = irecv[2];
794     info->memory       = irecv[3];
795     info->mallocs      = irecv[4];
796   } else if (flag == MAT_GLOBAL_SUM) {
797     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
798     info->nz_used      = irecv[0];
799     info->nz_allocated = irecv[1];
800     info->nz_unneeded  = irecv[2];
801     info->memory       = irecv[3];
802     info->mallocs      = irecv[4];
803   }
804   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
805   info->fill_ratio_needed = 0;
806   info->factor_mallocs    = 0;
807   PetscFunctionReturn(0);
808 }
809 
810 #undef __FUNCT__
811 #define __FUNCT__ "MatSetOption_MPIDense"
812 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscTruth flg)
813 {
814   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
815   PetscErrorCode ierr;
816 
817   PetscFunctionBegin;
818   switch (op) {
819   case MAT_NEW_NONZERO_LOCATIONS:
820   case MAT_NEW_NONZERO_LOCATION_ERR:
821   case MAT_NEW_NONZERO_ALLOCATION_ERR:
822     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
823     break;
824   case MAT_ROW_ORIENTED:
825     a->roworiented = flg;
826     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
827     break;
828   case MAT_NEW_DIAGONALS:
829   case MAT_USE_HASH_TABLE:
830     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
831     break;
832   case MAT_IGNORE_OFF_PROC_ENTRIES:
833     a->donotstash = flg;
834     break;
835   case MAT_SYMMETRIC:
836   case MAT_STRUCTURALLY_SYMMETRIC:
837   case MAT_HERMITIAN:
838   case MAT_SYMMETRY_ETERNAL:
839   case MAT_IGNORE_LOWER_TRIANGULAR:
840     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
841     break;
842   default:
843     SETERRQ1(PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
844   }
845   PetscFunctionReturn(0);
846 }
847 
848 
849 #undef __FUNCT__
850 #define __FUNCT__ "MatDiagonalScale_MPIDense"
851 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
852 {
853   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
854   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
855   PetscScalar    *l,*r,x,*v;
856   PetscErrorCode ierr;
857   PetscInt       i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n;
858 
859   PetscFunctionBegin;
860   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
861   if (ll) {
862     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
863     if (s2a != s2) SETERRQ2(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
864     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
865     for (i=0; i<m; i++) {
866       x = l[i];
867       v = mat->v + i;
868       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
869     }
870     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
871     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
872   }
873   if (rr) {
874     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
875     if (s3a != s3) SETERRQ2(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
876     ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
877     ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
878     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
879     for (i=0; i<n; i++) {
880       x = r[i];
881       v = mat->v + i*m;
882       for (j=0; j<m; j++) { (*v++) *= x;}
883     }
884     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
885     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
886   }
887   PetscFunctionReturn(0);
888 }
889 
890 #undef __FUNCT__
891 #define __FUNCT__ "MatNorm_MPIDense"
892 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
893 {
894   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
895   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
896   PetscErrorCode ierr;
897   PetscInt       i,j;
898   PetscReal      sum = 0.0;
899   PetscScalar    *v = mat->v;
900 
901   PetscFunctionBegin;
902   if (mdn->size == 1) {
903     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
904   } else {
905     if (type == NORM_FROBENIUS) {
906       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
907 #if defined(PETSC_USE_COMPLEX)
908         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
909 #else
910         sum += (*v)*(*v); v++;
911 #endif
912       }
913       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
914       *nrm = sqrt(*nrm);
915       ierr = PetscLogFlops(2*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr);
916     } else if (type == NORM_1) {
917       PetscReal *tmp,*tmp2;
918       ierr = PetscMalloc(2*A->cmap->N*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
919       tmp2 = tmp + A->cmap->N;
920       ierr = PetscMemzero(tmp,2*A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
921       *nrm = 0.0;
922       v = mat->v;
923       for (j=0; j<mdn->A->cmap->n; j++) {
924         for (i=0; i<mdn->A->rmap->n; i++) {
925           tmp[j] += PetscAbsScalar(*v);  v++;
926         }
927       }
928       ierr = MPI_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
929       for (j=0; j<A->cmap->N; j++) {
930         if (tmp2[j] > *nrm) *nrm = tmp2[j];
931       }
932       ierr = PetscFree(tmp);CHKERRQ(ierr);
933       ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr);
934     } else if (type == NORM_INFINITY) { /* max row norm */
935       PetscReal ntemp;
936       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
937       ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr);
938     } else {
939       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
940     }
941   }
942   PetscFunctionReturn(0);
943 }
944 
945 #undef __FUNCT__
946 #define __FUNCT__ "MatTranspose_MPIDense"
947 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
948 {
949   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
950   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
951   Mat            B;
952   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
953   PetscErrorCode ierr;
954   PetscInt       j,i;
955   PetscScalar    *v;
956 
957   PetscFunctionBegin;
958   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place");
959   if (reuse == MAT_INITIAL_MATRIX || A == *matout) {
960     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
961     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
962     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
963     ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr);
964   } else {
965     B = *matout;
966   }
967 
968   m = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v;
969   ierr = PetscMalloc(m*sizeof(PetscInt),&rwork);CHKERRQ(ierr);
970   for (i=0; i<m; i++) rwork[i] = rstart + i;
971   for (j=0; j<n; j++) {
972     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
973     v   += m;
974   }
975   ierr = PetscFree(rwork);CHKERRQ(ierr);
976   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
977   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
978   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
979     *matout = B;
980   } else {
981     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
982   }
983   PetscFunctionReturn(0);
984 }
985 
986 #include "petscblaslapack.h"
987 #undef __FUNCT__
988 #define __FUNCT__ "MatScale_MPIDense"
989 PetscErrorCode MatScale_MPIDense(Mat inA,PetscScalar alpha)
990 {
991   Mat_MPIDense   *A = (Mat_MPIDense*)inA->data;
992   Mat_SeqDense   *a = (Mat_SeqDense*)A->A->data;
993   PetscScalar    oalpha = alpha;
994   PetscErrorCode ierr;
995   PetscBLASInt   one = 1,nz = PetscBLASIntCast(inA->rmap->n*inA->cmap->N);
996 
997   PetscFunctionBegin;
998   BLASscal_(&nz,&oalpha,a->v,&one);
999   ierr = PetscLogFlops(nz);CHKERRQ(ierr);
1000   PetscFunctionReturn(0);
1001 }
1002 
1003 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
1004 
1005 #undef __FUNCT__
1006 #define __FUNCT__ "MatSetUpPreallocation_MPIDense"
1007 PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A)
1008 {
1009   PetscErrorCode ierr;
1010 
1011   PetscFunctionBegin;
1012   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
1013   PetscFunctionReturn(0);
1014 }
1015 
1016 #if defined(PETSC_HAVE_PLAPACK)
1017 
1018 #undef __FUNCT__
1019 #define __FUNCT__ "MatMPIDenseCopyToPlapack"
1020 PetscErrorCode MatMPIDenseCopyToPlapack(Mat A,Mat F)
1021 {
1022   Mat_Plapack    *lu = (Mat_Plapack*)(F)->spptr;
1023   PetscErrorCode ierr;
1024   PetscInt       M=A->cmap->N,m=A->rmap->n,rstart;
1025   PetscScalar    *array;
1026   PetscReal      one = 1.0;
1027 
1028   PetscFunctionBegin;
1029   /* Copy A into F->lu->A */
1030   ierr = PLA_Obj_set_to_zero(lu->A);CHKERRQ(ierr);
1031   ierr = PLA_API_begin();CHKERRQ(ierr);
1032   ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr);
1033   ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr);
1034   ierr = MatGetArray(A,&array);CHKERRQ(ierr);
1035   ierr = PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);CHKERRQ(ierr);
1036   ierr = MatRestoreArray(A,&array);CHKERRQ(ierr);
1037   ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr);
1038   ierr = PLA_API_end();CHKERRQ(ierr);
1039   lu->rstart = rstart;
1040   PetscFunctionReturn(0);
1041 }
1042 
1043 #undef __FUNCT__
1044 #define __FUNCT__ "MatMPIDenseCopyFromPlapack"
1045 PetscErrorCode MatMPIDenseCopyFromPlapack(Mat F,Mat A)
1046 {
1047   Mat_Plapack    *lu = (Mat_Plapack*)(F)->spptr;
1048   PetscErrorCode ierr;
1049   PetscInt       M=A->cmap->N,m=A->rmap->n,rstart;
1050   PetscScalar    *array;
1051   PetscReal      one = 1.0;
1052 
1053   PetscFunctionBegin;
1054   /* Copy F into A->lu->A */
1055   ierr = MatZeroEntries(A);CHKERRQ(ierr);
1056   ierr = PLA_API_begin();CHKERRQ(ierr);
1057   ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr);
1058   ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr);
1059   ierr = MatGetArray(A,&array);CHKERRQ(ierr);
1060   ierr = PLA_API_axpy_global_to_matrix(m,M, &one,lu->A,rstart,0,(void *)array,m);CHKERRQ(ierr);
1061   ierr = MatRestoreArray(A,&array);CHKERRQ(ierr);
1062   ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr);
1063   ierr = PLA_API_end();CHKERRQ(ierr);
1064   lu->rstart = rstart;
1065   PetscFunctionReturn(0);
1066 }
1067 
1068 #undef __FUNCT__
1069 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIDense"
1070 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1071 {
1072   PetscErrorCode ierr;
1073   Mat_Plapack    *luA = (Mat_Plapack*)A->spptr;
1074   Mat_Plapack    *luB = (Mat_Plapack*)B->spptr;
1075   Mat_Plapack    *luC = (Mat_Plapack*)C->spptr;
1076   PLA_Obj        alpha = NULL,beta = NULL;
1077 
1078   PetscFunctionBegin;
1079   ierr = MatMPIDenseCopyToPlapack(A,A);CHKERRQ(ierr);
1080   ierr = MatMPIDenseCopyToPlapack(B,B);CHKERRQ(ierr);
1081 
1082   /*
1083   ierr = PLA_Global_show("A = ",luA->A,"%g ","");CHKERRQ(ierr);
1084   ierr = PLA_Global_show("B = ",luB->A,"%g ","");CHKERRQ(ierr);
1085   */
1086 
1087   /* do the multiply in PLA  */
1088   ierr = PLA_Create_constants_conf_to(luA->A,NULL,NULL,&alpha);CHKERRQ(ierr);
1089   ierr = PLA_Create_constants_conf_to(luC->A,NULL,&beta,NULL);CHKERRQ(ierr);
1090   CHKMEMQ;
1091 
1092   ierr = PLA_Gemm(PLA_NO_TRANSPOSE,PLA_NO_TRANSPOSE,alpha,luA->A,luB->A,beta,luC->A); /* CHKERRQ(ierr); */
1093   CHKMEMQ;
1094   ierr = PLA_Obj_free(&alpha);CHKERRQ(ierr);
1095   ierr = PLA_Obj_free(&beta);CHKERRQ(ierr);
1096 
1097   /*
1098   ierr = PLA_Global_show("C = ",luC->A,"%g ","");CHKERRQ(ierr);
1099   */
1100   ierr = MatMPIDenseCopyFromPlapack(C,C);CHKERRQ(ierr);
1101   PetscFunctionReturn(0);
1102 }
1103 
1104 #undef __FUNCT__
1105 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIDense"
1106 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1107 {
1108   PetscErrorCode ierr;
1109   PetscInt       m=A->rmap->n,n=B->cmap->n;
1110   Mat            Cmat;
1111 
1112   PetscFunctionBegin;
1113   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
1114   SETERRQ(PETSC_ERR_LIB,"Due to aparent bugs in PLAPACK,this is not currently supported");
1115   ierr = MatCreate(((PetscObject)B)->comm,&Cmat);CHKERRQ(ierr);
1116   ierr = MatSetSizes(Cmat,m,n,A->rmap->N,B->cmap->N);CHKERRQ(ierr);
1117   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
1118   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1119   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1120 
1121   *C = Cmat;
1122   PetscFunctionReturn(0);
1123 }
1124 
1125 #undef __FUNCT__
1126 #define __FUNCT__ "MatMatMult_MPIDense_MPIDense"
1127 PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
1128 {
1129   PetscErrorCode ierr;
1130 
1131   PetscFunctionBegin;
1132   if (scall == MAT_INITIAL_MATRIX){
1133     ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr);
1134   }
1135   ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr);
1136   PetscFunctionReturn(0);
1137 }
1138 
1139 #undef __FUNCT__
1140 #define __FUNCT__ "MatSolve_MPIDense"
1141 PetscErrorCode MatSolve_MPIDense(Mat A,Vec b,Vec x)
1142 {
1143   MPI_Comm       comm = ((PetscObject)A)->comm;
1144   Mat_Plapack    *lu = (Mat_Plapack*)A->spptr;
1145   PetscErrorCode ierr;
1146   PetscInt       M=A->rmap->N,m=A->rmap->n,rstart,i,j,*idx_pla,*idx_petsc,loc_m,loc_stride;
1147   PetscScalar    *array;
1148   PetscReal      one = 1.0;
1149   PetscMPIInt    size,rank,r_rank,r_nproc,c_rank,c_nproc;;
1150   PLA_Obj        v_pla = NULL;
1151   PetscScalar    *loc_buf;
1152   Vec            loc_x;
1153 
1154   PetscFunctionBegin;
1155   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1156   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1157 
1158   /* Create PLAPACK vector objects, then copy b into PLAPACK b */
1159   PLA_Mvector_create(lu->datatype,M,1,lu->templ,PLA_ALIGN_FIRST,&v_pla);
1160   PLA_Obj_set_to_zero(v_pla);
1161 
1162   /* Copy b into rhs_pla */
1163   PLA_API_begin();
1164   PLA_Obj_API_open(v_pla);
1165   ierr = VecGetArray(b,&array);CHKERRQ(ierr);
1166   PLA_API_axpy_vector_to_global(m,&one,(void *)array,1,v_pla,lu->rstart);
1167   ierr = VecRestoreArray(b,&array);CHKERRQ(ierr);
1168   PLA_Obj_API_close(v_pla);
1169   PLA_API_end();
1170 
1171   if (A->factor == MAT_FACTOR_LU){
1172     /* Apply the permutations to the right hand sides */
1173     PLA_Apply_pivots_to_rows (v_pla,lu->pivots);
1174 
1175     /* Solve L y = b, overwriting b with y */
1176     PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_UNIT_DIAG,lu->A,v_pla );
1177 
1178     /* Solve U x = y (=b), overwriting b with x */
1179     PLA_Trsv( PLA_UPPER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla );
1180   } else { /* MAT_FACTOR_CHOLESKY */
1181     PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla);
1182     PLA_Trsv( PLA_LOWER_TRIANGULAR,(lu->datatype == MPI_DOUBLE ? PLA_TRANSPOSE : PLA_CONJUGATE_TRANSPOSE),
1183                                     PLA_NONUNIT_DIAG,lu->A,v_pla);
1184   }
1185 
1186   /* Copy PLAPACK x into Petsc vector x  */
1187   PLA_Obj_local_length(v_pla, &loc_m);
1188   PLA_Obj_local_buffer(v_pla, (void**)&loc_buf);
1189   PLA_Obj_local_stride(v_pla, &loc_stride);
1190   /*
1191     PetscPrintf(PETSC_COMM_SELF," [%d] b - local_m %d local_stride %d, loc_buf: %g %g, nb: %d\n",rank,loc_m,loc_stride,loc_buf[0],loc_buf[(loc_m-1)*loc_stride],lu->nb);
1192   */
1193   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,loc_m*loc_stride,loc_buf,&loc_x);CHKERRQ(ierr);
1194   if (!lu->pla_solved){
1195 
1196     PLA_Temp_comm_row_info(lu->templ,&Plapack_comm_2d,&r_rank,&r_nproc);
1197     PLA_Temp_comm_col_info(lu->templ,&Plapack_comm_2d,&c_rank,&c_nproc);
1198 
1199     /* Create IS and cts for VecScatterring */
1200     PLA_Obj_local_length(v_pla, &loc_m);
1201     PLA_Obj_local_stride(v_pla, &loc_stride);
1202     ierr = PetscMalloc((2*loc_m+1)*sizeof(PetscInt),&idx_pla);CHKERRQ(ierr);
1203     idx_petsc = idx_pla + loc_m;
1204 
1205     rstart = (r_rank*c_nproc+c_rank)*lu->nb;
1206     for (i=0; i<loc_m; i+=lu->nb){
1207       j = 0;
1208       while (j < lu->nb && i+j < loc_m){
1209         idx_petsc[i+j] = rstart + j; j++;
1210       }
1211       rstart += size*lu->nb;
1212     }
1213 
1214     for (i=0; i<loc_m; i++) idx_pla[i] = i*loc_stride;
1215 
1216     ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_pla,&lu->is_pla);CHKERRQ(ierr);
1217     ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_petsc,&lu->is_petsc);CHKERRQ(ierr);
1218     ierr = PetscFree(idx_pla);CHKERRQ(ierr);
1219     ierr = VecScatterCreate(loc_x,lu->is_pla,x,lu->is_petsc,&lu->ctx);CHKERRQ(ierr);
1220   }
1221   ierr = VecScatterBegin(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1222   ierr = VecScatterEnd(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1223 
1224   /* Free data */
1225   ierr = VecDestroy(loc_x);CHKERRQ(ierr);
1226   PLA_Obj_free(&v_pla);
1227 
1228   lu->pla_solved = PETSC_TRUE;
1229   PetscFunctionReturn(0);
1230 }
1231 
1232 #undef __FUNCT__
1233 #define __FUNCT__ "MatLUFactorNumeric_MPIDense"
1234 PetscErrorCode MatLUFactorNumeric_MPIDense(Mat F,Mat A,const MatFactorInfo *info)
1235 {
1236   Mat_Plapack    *lu = (Mat_Plapack*)(F)->spptr;
1237   PetscErrorCode ierr;
1238   PetscInt       M=A->rmap->N,m=A->rmap->n,rstart,rend;
1239   PetscInt       info_pla=0;
1240   PetscScalar    *array,one = 1.0;
1241 
1242   PetscFunctionBegin;
1243   if (lu->mstruct == SAME_NONZERO_PATTERN){
1244     PLA_Obj_free(&lu->A);
1245     PLA_Obj_free (&lu->pivots);
1246   }
1247   /* Create PLAPACK matrix object */
1248   lu->A = NULL; lu->pivots = NULL;
1249   PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);
1250   PLA_Obj_set_to_zero(lu->A);
1251   PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots);
1252 
1253   /* Copy A into lu->A */
1254   PLA_API_begin();
1255   PLA_Obj_API_open(lu->A);
1256   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
1257   ierr = MatGetArray(A,&array);CHKERRQ(ierr);
1258   PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);
1259   ierr = MatRestoreArray(A,&array);CHKERRQ(ierr);
1260   PLA_Obj_API_close(lu->A);
1261   PLA_API_end();
1262 
1263   /* Factor P A -> L U overwriting lower triangular portion of A with L, upper, U */
1264   info_pla = PLA_LU(lu->A,lu->pivots);
1265   if (info_pla != 0)
1266     SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot encountered at row %d from PLA_LU()",info_pla);
1267 
1268   lu->rstart         = rstart;
1269   lu->mstruct        = SAME_NONZERO_PATTERN;
1270   F->ops->solve      = MatSolve_MPIDense;
1271   F->assembled       = PETSC_TRUE;  /* required by -ksp_view */
1272   PetscFunctionReturn(0);
1273 }
1274 
1275 #undef __FUNCT__
1276 #define __FUNCT__ "MatCholeskyFactorNumeric_MPIDense"
1277 PetscErrorCode MatCholeskyFactorNumeric_MPIDense(Mat F,Mat A,const MatFactorInfo *info)
1278 {
1279   Mat_Plapack    *lu = (Mat_Plapack*)F->spptr;
1280   PetscErrorCode ierr;
1281   PetscInt       M=A->rmap->N,m=A->rmap->n,rstart,rend;
1282   PetscInt       info_pla=0;
1283   PetscScalar    *array,one = 1.0;
1284 
1285   PetscFunctionBegin;
1286   if (lu->mstruct == SAME_NONZERO_PATTERN){
1287     PLA_Obj_free(&lu->A);
1288   }
1289   /* Create PLAPACK matrix object */
1290   lu->A      = NULL;
1291   lu->pivots = NULL;
1292   PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);
1293 
1294   /* Copy A into lu->A */
1295   PLA_API_begin();
1296   PLA_Obj_API_open(lu->A);
1297   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
1298   ierr = MatGetArray(A,&array);CHKERRQ(ierr);
1299   PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);
1300   ierr = MatRestoreArray(A,&array);CHKERRQ(ierr);
1301   PLA_Obj_API_close(lu->A);
1302   PLA_API_end();
1303 
1304   /* Factor P A -> Chol */
1305   info_pla = PLA_Chol(PLA_LOWER_TRIANGULAR,lu->A);
1306   if (info_pla != 0)
1307     SETERRQ1( PETSC_ERR_MAT_CH_ZRPVT,"Nonpositive definite matrix detected at row %d from PLA_Chol()",info_pla);
1308 
1309   lu->rstart         = rstart;
1310   lu->mstruct        = SAME_NONZERO_PATTERN;
1311   F->ops->solve      = MatSolve_MPIDense;
1312   F->assembled       = PETSC_TRUE;  /* required by -ksp_view */
1313   PetscFunctionReturn(0);
1314 }
1315 
1316 /* Note the Petsc perm permutation is ignored */
1317 #undef __FUNCT__
1318 #define __FUNCT__ "MatCholeskyFactorSymbolic_MPIDense"
1319 PetscErrorCode MatCholeskyFactorSymbolic_MPIDense(Mat F,Mat A,IS perm,const MatFactorInfo *info)
1320 {
1321   PetscErrorCode ierr;
1322   PetscTruth     issymmetric,set;
1323 
1324   PetscFunctionBegin;
1325   ierr = MatIsSymmetricKnown(A,&set,&issymmetric);CHKERRQ(ierr);
1326   if (!set || !issymmetric) SETERRQ(PETSC_ERR_USER,"Matrix must be set as MAT_SYMMETRIC for CholeskyFactor()");
1327   F->ops->choleskyfactornumeric  = MatCholeskyFactorNumeric_MPIDense;
1328   PetscFunctionReturn(0);
1329 }
1330 
1331 /* Note the Petsc r and c permutations are ignored */
1332 #undef __FUNCT__
1333 #define __FUNCT__ "MatLUFactorSymbolic_MPIDense"
1334 PetscErrorCode MatLUFactorSymbolic_MPIDense(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
1335 {
1336   PetscErrorCode ierr;
1337   PetscInt       M = A->rmap->N;
1338   Mat_Plapack    *lu;
1339 
1340   PetscFunctionBegin;
1341   lu = (Mat_Plapack*)F->spptr;
1342   ierr = PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots);CHKERRQ(ierr);
1343   F->ops->lufactornumeric  = MatLUFactorNumeric_MPIDense;
1344   PetscFunctionReturn(0);
1345 }
1346 
1347 EXTERN_C_BEGIN
1348 #undef __FUNCT__
1349 #define __FUNCT__ "MatFactorGetSolverPackage_mpidense_plapack"
1350 PetscErrorCode MatFactorGetSolverPackage_mpidense_plapack(Mat A,const MatSolverPackage *type)
1351 {
1352   PetscFunctionBegin;
1353   *type = MAT_SOLVER_PLAPACK;
1354   PetscFunctionReturn(0);
1355 }
1356 EXTERN_C_END
1357 
1358 #undef __FUNCT__
1359 #define __FUNCT__ "MatGetFactor_mpidense_plapack"
1360 PetscErrorCode MatGetFactor_mpidense_plapack(Mat A,MatFactorType ftype,Mat *F)
1361 {
1362   PetscErrorCode ierr;
1363   Mat_Plapack    *lu;
1364   PetscMPIInt    size;
1365   PetscInt       M=A->rmap->N;
1366 
1367   PetscFunctionBegin;
1368   /* Create the factorization matrix */
1369   ierr = MatCreate(((PetscObject)A)->comm,F);CHKERRQ(ierr);
1370   ierr = MatSetSizes(*F,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1371   ierr = MatSetType(*F,((PetscObject)A)->type_name);CHKERRQ(ierr);
1372   ierr = PetscNewLog(*F,Mat_Plapack,&lu);CHKERRQ(ierr);
1373   (*F)->spptr = (void*)lu;
1374 
1375   /* Set default Plapack parameters */
1376   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
1377   lu->nb = M/size;
1378   if (M - lu->nb*size) lu->nb++; /* without cyclic distribution */
1379 
1380   /* Set runtime options */
1381   ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"PLAPACK Options","Mat");CHKERRQ(ierr);
1382     ierr = PetscOptionsInt("-mat_plapack_nb","block size of template vector","None",lu->nb,&lu->nb,PETSC_NULL);CHKERRQ(ierr);
1383   PetscOptionsEnd();
1384 
1385   /* Create object distribution template */
1386   lu->templ = NULL;
1387   ierr = PLA_Temp_create(lu->nb, 0, &lu->templ);CHKERRQ(ierr);
1388 
1389   /* Set the datatype */
1390 #if defined(PETSC_USE_COMPLEX)
1391   lu->datatype = MPI_DOUBLE_COMPLEX;
1392 #else
1393   lu->datatype = MPI_DOUBLE;
1394 #endif
1395 
1396   ierr = PLA_Matrix_create(lu->datatype,M,A->cmap->N,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);CHKERRQ(ierr);
1397 
1398 
1399   lu->pla_solved     = PETSC_FALSE; /* MatSolve_Plapack() is called yet */
1400   lu->mstruct        = DIFFERENT_NONZERO_PATTERN;
1401 
1402   if (ftype == MAT_FACTOR_LU) {
1403     (*F)->ops->lufactorsymbolic = MatLUFactorSymbolic_MPIDense;
1404   } else if (ftype == MAT_FACTOR_CHOLESKY) {
1405     (*F)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MPIDense;
1406   } else SETERRQ(PETSC_ERR_SUP,"No incomplete factorizations for dense matrices");
1407   (*F)->factor = ftype;
1408   ierr = PetscObjectComposeFunctionDynamic((PetscObject)(*F),"MatFactorGetSolverPackage_C","MatFactorGetSolverPackage_mpidense_plapack",MatFactorGetSolverPackage_mpidense_plapack);CHKERRQ(ierr);
1409   PetscFunctionReturn(0);
1410 }
1411 #endif
1412 
1413 #undef __FUNCT__
1414 #define __FUNCT__ "MatAXPY_MPIDense"
1415 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
1416 {
1417   PetscErrorCode ierr;
1418   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
1419 
1420   PetscFunctionBegin;
1421   ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr);
1422   PetscFunctionReturn(0);
1423 }
1424 
1425 /* -------------------------------------------------------------------*/
1426 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
1427        MatGetRow_MPIDense,
1428        MatRestoreRow_MPIDense,
1429        MatMult_MPIDense,
1430 /* 4*/ MatMultAdd_MPIDense,
1431        MatMultTranspose_MPIDense,
1432        MatMultTransposeAdd_MPIDense,
1433        0,
1434        0,
1435        0,
1436 /*10*/ 0,
1437        0,
1438        0,
1439        0,
1440        MatTranspose_MPIDense,
1441 /*15*/ MatGetInfo_MPIDense,
1442        MatEqual_MPIDense,
1443        MatGetDiagonal_MPIDense,
1444        MatDiagonalScale_MPIDense,
1445        MatNorm_MPIDense,
1446 /*20*/ MatAssemblyBegin_MPIDense,
1447        MatAssemblyEnd_MPIDense,
1448        0,
1449        MatSetOption_MPIDense,
1450        MatZeroEntries_MPIDense,
1451 /*25*/ MatZeroRows_MPIDense,
1452        0,
1453        0,
1454        0,
1455        0,
1456 /*30*/ MatSetUpPreallocation_MPIDense,
1457        0,
1458        0,
1459        MatGetArray_MPIDense,
1460        MatRestoreArray_MPIDense,
1461 /*35*/ MatDuplicate_MPIDense,
1462        0,
1463        0,
1464        0,
1465        0,
1466 /*40*/ MatAXPY_MPIDense,
1467        MatGetSubMatrices_MPIDense,
1468        0,
1469        MatGetValues_MPIDense,
1470        0,
1471 /*45*/ 0,
1472        MatScale_MPIDense,
1473        0,
1474        0,
1475        0,
1476 /*50*/ 0,
1477        0,
1478        0,
1479        0,
1480        0,
1481 /*55*/ 0,
1482        0,
1483        0,
1484        0,
1485        0,
1486 /*60*/ MatGetSubMatrix_MPIDense,
1487        MatDestroy_MPIDense,
1488        MatView_MPIDense,
1489        0,
1490        0,
1491 /*65*/ 0,
1492        0,
1493        0,
1494        0,
1495        0,
1496 /*70*/ 0,
1497        0,
1498        0,
1499        0,
1500        0,
1501 /*75*/ 0,
1502        0,
1503        0,
1504        0,
1505        0,
1506 /*80*/ 0,
1507        0,
1508        0,
1509        0,
1510 /*84*/ MatLoad_MPIDense,
1511        0,
1512        0,
1513        0,
1514        0,
1515        0,
1516 /*90*/
1517 #if defined(PETSC_HAVE_PLAPACK)
1518        MatMatMult_MPIDense_MPIDense,
1519        MatMatMultSymbolic_MPIDense_MPIDense,
1520        MatMatMultNumeric_MPIDense_MPIDense,
1521 #else
1522        0,
1523        0,
1524        0,
1525 #endif
1526        0,
1527 /*95*/ 0,
1528        0,
1529        0,
1530        0};
1531 
1532 EXTERN_C_BEGIN
1533 #undef __FUNCT__
1534 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense"
1535 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1536 {
1537   Mat_MPIDense   *a;
1538   PetscErrorCode ierr;
1539 
1540   PetscFunctionBegin;
1541   mat->preallocated = PETSC_TRUE;
1542   /* Note:  For now, when data is specified above, this assumes the user correctly
1543    allocates the local dense storage space.  We should add error checking. */
1544 
1545   a    = (Mat_MPIDense*)mat->data;
1546   ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1547   ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr);
1548   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
1549   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1550   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1551   PetscFunctionReturn(0);
1552 }
1553 EXTERN_C_END
1554 
1555 /*MC
1556    MAT_SOLVER_PLAPACK = "mpidense" - Parallel LU and Cholesky factorization for MATMPIDENSE matrices
1557 
1558   run config/configure.py with the option --download-plapack
1559 
1560 
1561   Options Database Keys:
1562 . -mat_plapack_nprows <n> - number of rows in processor partition
1563 . -mat_plapack_npcols <n> - number of columns in processor partition
1564 . -mat_plapack_nb <n> - block size of template vector
1565 . -mat_plapack_nb_alg <n> - algorithmic block size
1566 - -mat_plapack_ckerror <n> - error checking flag
1567 
1568 .seealso: MatCreateMPIDense(), MATDENSE, MATSEQDENSE, PCFactorSetSolverPackage(), MatSolverPackage
1569 
1570 M*/
1571 
1572 EXTERN_C_BEGIN
1573 #undef __FUNCT__
1574 #define __FUNCT__ "MatCreate_MPIDense"
1575 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIDense(Mat mat)
1576 {
1577   Mat_MPIDense   *a;
1578   PetscErrorCode ierr;
1579 
1580   PetscFunctionBegin;
1581   ierr              = PetscNewLog(mat,Mat_MPIDense,&a);CHKERRQ(ierr);
1582   mat->data         = (void*)a;
1583   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1584   mat->mapping      = 0;
1585 
1586   mat->insertmode = NOT_SET_VALUES;
1587   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&a->rank);CHKERRQ(ierr);
1588   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&a->size);CHKERRQ(ierr);
1589 
1590   ierr = PetscMapSetBlockSize(mat->rmap,1);CHKERRQ(ierr);
1591   ierr = PetscMapSetBlockSize(mat->cmap,1);CHKERRQ(ierr);
1592   ierr = PetscMapSetUp(mat->rmap);CHKERRQ(ierr);
1593   ierr = PetscMapSetUp(mat->cmap);CHKERRQ(ierr);
1594   a->nvec = mat->cmap->n;
1595 
1596   /* build cache for off array entries formed */
1597   a->donotstash = PETSC_FALSE;
1598   ierr = MatStashCreate_Private(((PetscObject)mat)->comm,1,&mat->stash);CHKERRQ(ierr);
1599 
1600   /* stuff used for matrix vector multiply */
1601   a->lvec        = 0;
1602   a->Mvctx       = 0;
1603   a->roworiented = PETSC_TRUE;
1604 
1605   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
1606                                      "MatGetDiagonalBlock_MPIDense",
1607                                      MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr);
1608   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C",
1609                                      "MatMPIDenseSetPreallocation_MPIDense",
1610                                      MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1611   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",
1612                                      "MatMatMult_MPIAIJ_MPIDense",
1613                                       MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1614   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",
1615                                      "MatMatMultSymbolic_MPIAIJ_MPIDense",
1616                                       MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1617   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",
1618                                      "MatMatMultNumeric_MPIAIJ_MPIDense",
1619                                       MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1620 #if defined(PETSC_HAVE_PLAPACK)
1621   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetFactor_mpidense_plapack_C",
1622                                      "MatGetFactor_mpidense_plapack",
1623                                       MatGetFactor_mpidense_plapack);CHKERRQ(ierr);
1624   ierr = PetscPLAPACKInitializePackage(((PetscObject)mat)->comm);CHKERRQ(ierr);
1625 #endif
1626   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1627 
1628   PetscFunctionReturn(0);
1629 }
1630 EXTERN_C_END
1631 
1632 /*MC
1633    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1634 
1635    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1636    and MATMPIDENSE otherwise.
1637 
1638    Options Database Keys:
1639 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1640 
1641   Level: beginner
1642 
1643 
1644 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1645 M*/
1646 
1647 EXTERN_C_BEGIN
1648 #undef __FUNCT__
1649 #define __FUNCT__ "MatCreate_Dense"
1650 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_Dense(Mat A)
1651 {
1652   PetscErrorCode ierr;
1653   PetscMPIInt    size;
1654 
1655   PetscFunctionBegin;
1656   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
1657   if (size == 1) {
1658     ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr);
1659   } else {
1660     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
1661   }
1662   PetscFunctionReturn(0);
1663 }
1664 EXTERN_C_END
1665 
1666 #undef __FUNCT__
1667 #define __FUNCT__ "MatMPIDenseSetPreallocation"
1668 /*@C
1669    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1670 
1671    Not collective
1672 
1673    Input Parameters:
1674 .  A - the matrix
1675 -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
1676    to control all matrix memory allocation.
1677 
1678    Notes:
1679    The dense format is fully compatible with standard Fortran 77
1680    storage by columns.
1681 
1682    The data input variable is intended primarily for Fortran programmers
1683    who wish to allocate their own matrix memory space.  Most users should
1684    set data=PETSC_NULL.
1685 
1686    Level: intermediate
1687 
1688 .keywords: matrix,dense, parallel
1689 
1690 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1691 @*/
1692 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data)
1693 {
1694   PetscErrorCode ierr,(*f)(Mat,PetscScalar *);
1695 
1696   PetscFunctionBegin;
1697   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1698   if (f) {
1699     ierr = (*f)(mat,data);CHKERRQ(ierr);
1700   }
1701   PetscFunctionReturn(0);
1702 }
1703 
1704 #undef __FUNCT__
1705 #define __FUNCT__ "MatCreateMPIDense"
1706 /*@C
1707    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
1708 
1709    Collective on MPI_Comm
1710 
1711    Input Parameters:
1712 +  comm - MPI communicator
1713 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1714 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1715 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1716 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1717 -  data - optional location of matrix data.  Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1718    to control all matrix memory allocation.
1719 
1720    Output Parameter:
1721 .  A - the matrix
1722 
1723    Notes:
1724    The dense format is fully compatible with standard Fortran 77
1725    storage by columns.
1726 
1727    The data input variable is intended primarily for Fortran programmers
1728    who wish to allocate their own matrix memory space.  Most users should
1729    set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users).
1730 
1731    The user MUST specify either the local or global matrix dimensions
1732    (possibly both).
1733 
1734    Level: intermediate
1735 
1736 .keywords: matrix,dense, parallel
1737 
1738 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1739 @*/
1740 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1741 {
1742   PetscErrorCode ierr;
1743   PetscMPIInt    size;
1744 
1745   PetscFunctionBegin;
1746   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1747   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1748   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1749   if (size > 1) {
1750     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1751     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1752   } else {
1753     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1754     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1755   }
1756   PetscFunctionReturn(0);
1757 }
1758 
1759 #undef __FUNCT__
1760 #define __FUNCT__ "MatDuplicate_MPIDense"
1761 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1762 {
1763   Mat            mat;
1764   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
1765   PetscErrorCode ierr;
1766 
1767   PetscFunctionBegin;
1768   *newmat       = 0;
1769   ierr = MatCreate(((PetscObject)A)->comm,&mat);CHKERRQ(ierr);
1770   ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1771   ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1772   a                 = (Mat_MPIDense*)mat->data;
1773   ierr              = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1774   mat->factor       = A->factor;
1775   mat->assembled    = PETSC_TRUE;
1776   mat->preallocated = PETSC_TRUE;
1777 
1778   mat->rmap->rstart     = A->rmap->rstart;
1779   mat->rmap->rend       = A->rmap->rend;
1780   a->size              = oldmat->size;
1781   a->rank              = oldmat->rank;
1782   mat->insertmode      = NOT_SET_VALUES;
1783   a->nvec              = oldmat->nvec;
1784   a->donotstash        = oldmat->donotstash;
1785 
1786   ierr = PetscMemcpy(mat->rmap->range,A->rmap->range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
1787   ierr = PetscMemcpy(mat->cmap->range,A->cmap->range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
1788   ierr = MatStashCreate_Private(((PetscObject)A)->comm,1,&mat->stash);CHKERRQ(ierr);
1789 
1790   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1791   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1792   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1793 
1794   *newmat = mat;
1795   PetscFunctionReturn(0);
1796 }
1797 
1798 #include "petscsys.h"
1799 
1800 #undef __FUNCT__
1801 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile"
1802 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N, const MatType type,Mat *newmat)
1803 {
1804   PetscErrorCode ierr;
1805   PetscMPIInt    rank,size;
1806   PetscInt       *rowners,i,m,nz,j;
1807   PetscScalar    *array,*vals,*vals_ptr;
1808   MPI_Status     status;
1809 
1810   PetscFunctionBegin;
1811   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1812   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1813 
1814   /* determine ownership of all rows */
1815   m          = M/size + ((M % size) > rank);
1816   ierr       = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1817   ierr       = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
1818   rowners[0] = 0;
1819   for (i=2; i<=size; i++) {
1820     rowners[i] += rowners[i-1];
1821   }
1822 
1823   ierr = MatCreate(comm,newmat);CHKERRQ(ierr);
1824   ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1825   ierr = MatSetType(*newmat,type);CHKERRQ(ierr);
1826   ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr);
1827   ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr);
1828 
1829   if (!rank) {
1830     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1831 
1832     /* read in my part of the matrix numerical values  */
1833     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1834 
1835     /* insert into matrix-by row (this is why cannot directly read into array */
1836     vals_ptr = vals;
1837     for (i=0; i<m; i++) {
1838       for (j=0; j<N; j++) {
1839         array[i + j*m] = *vals_ptr++;
1840       }
1841     }
1842 
1843     /* read in other processors and ship out */
1844     for (i=1; i<size; i++) {
1845       nz   = (rowners[i+1] - rowners[i])*N;
1846       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1847       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(*newmat))->tag,comm);CHKERRQ(ierr);
1848     }
1849   } else {
1850     /* receive numeric values */
1851     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1852 
1853     /* receive message of values*/
1854     ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(*newmat))->tag,comm,&status);CHKERRQ(ierr);
1855 
1856     /* insert into matrix-by row (this is why cannot directly read into array */
1857     vals_ptr = vals;
1858     for (i=0; i<m; i++) {
1859       for (j=0; j<N; j++) {
1860         array[i + j*m] = *vals_ptr++;
1861       }
1862     }
1863   }
1864   ierr = PetscFree(rowners);CHKERRQ(ierr);
1865   ierr = PetscFree(vals);CHKERRQ(ierr);
1866   ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1867   ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1868   PetscFunctionReturn(0);
1869 }
1870 
1871 #undef __FUNCT__
1872 #define __FUNCT__ "MatLoad_MPIDense"
1873 PetscErrorCode MatLoad_MPIDense(PetscViewer viewer,const MatType type,Mat *newmat)
1874 {
1875   Mat            A;
1876   PetscScalar    *vals,*svals;
1877   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1878   MPI_Status     status;
1879   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz;
1880   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1881   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1882   PetscInt       i,nz,j,rstart,rend;
1883   int            fd;
1884   PetscErrorCode ierr;
1885 
1886   PetscFunctionBegin;
1887   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1888   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1889   if (!rank) {
1890     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1891     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1892     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1893   }
1894 
1895   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1896   M = header[1]; N = header[2]; nz = header[3];
1897 
1898   /*
1899        Handle case where matrix is stored on disk as a dense matrix
1900   */
1901   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1902     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,type,newmat);CHKERRQ(ierr);
1903     PetscFunctionReturn(0);
1904   }
1905 
1906   /* determine ownership of all rows */
1907   m          = PetscMPIIntCast(M/size + ((M % size) > rank));
1908   ierr       = PetscMalloc((size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr);
1909   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1910   rowners[0] = 0;
1911   for (i=2; i<=size; i++) {
1912     rowners[i] += rowners[i-1];
1913   }
1914   rstart = rowners[rank];
1915   rend   = rowners[rank+1];
1916 
1917   /* distribute row lengths to all processors */
1918   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&ourlens);CHKERRQ(ierr);
1919   offlens = ourlens + (rend-rstart);
1920   if (!rank) {
1921     ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
1922     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1923     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
1924     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1925     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1926     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1927   } else {
1928     ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1929   }
1930 
1931   if (!rank) {
1932     /* calculate the number of nonzeros on each processor */
1933     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
1934     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1935     for (i=0; i<size; i++) {
1936       for (j=rowners[i]; j< rowners[i+1]; j++) {
1937         procsnz[i] += rowlengths[j];
1938       }
1939     }
1940     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1941 
1942     /* determine max buffer needed and allocate it */
1943     maxnz = 0;
1944     for (i=0; i<size; i++) {
1945       maxnz = PetscMax(maxnz,procsnz[i]);
1946     }
1947     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1948 
1949     /* read in my part of the matrix column indices  */
1950     nz = procsnz[0];
1951     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1952     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1953 
1954     /* read in every one elses and ship off */
1955     for (i=1; i<size; i++) {
1956       nz   = procsnz[i];
1957       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1958       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1959     }
1960     ierr = PetscFree(cols);CHKERRQ(ierr);
1961   } else {
1962     /* determine buffer space needed for message */
1963     nz = 0;
1964     for (i=0; i<m; i++) {
1965       nz += ourlens[i];
1966     }
1967     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1968 
1969     /* receive message of column indices*/
1970     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1971     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1972     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1973   }
1974 
1975   /* loop over local rows, determining number of off diagonal entries */
1976   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
1977   jj = 0;
1978   for (i=0; i<m; i++) {
1979     for (j=0; j<ourlens[i]; j++) {
1980       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1981       jj++;
1982     }
1983   }
1984 
1985   /* create our matrix */
1986   for (i=0; i<m; i++) {
1987     ourlens[i] -= offlens[i];
1988   }
1989   ierr = MatCreate(comm,newmat);CHKERRQ(ierr);
1990   ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1991   ierr = MatSetType(*newmat,type);CHKERRQ(ierr);
1992   ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr);
1993   A = *newmat;
1994   for (i=0; i<m; i++) {
1995     ourlens[i] += offlens[i];
1996   }
1997 
1998   if (!rank) {
1999     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2000 
2001     /* read in my part of the matrix numerical values  */
2002     nz = procsnz[0];
2003     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2004 
2005     /* insert into matrix */
2006     jj      = rstart;
2007     smycols = mycols;
2008     svals   = vals;
2009     for (i=0; i<m; i++) {
2010       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2011       smycols += ourlens[i];
2012       svals   += ourlens[i];
2013       jj++;
2014     }
2015 
2016     /* read in other processors and ship out */
2017     for (i=1; i<size; i++) {
2018       nz   = procsnz[i];
2019       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2020       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
2021     }
2022     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2023   } else {
2024     /* receive numeric values */
2025     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2026 
2027     /* receive message of values*/
2028     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
2029     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2030     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2031 
2032     /* insert into matrix */
2033     jj      = rstart;
2034     smycols = mycols;
2035     svals   = vals;
2036     for (i=0; i<m; i++) {
2037       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2038       smycols += ourlens[i];
2039       svals   += ourlens[i];
2040       jj++;
2041     }
2042   }
2043   ierr = PetscFree(ourlens);CHKERRQ(ierr);
2044   ierr = PetscFree(vals);CHKERRQ(ierr);
2045   ierr = PetscFree(mycols);CHKERRQ(ierr);
2046   ierr = PetscFree(rowners);CHKERRQ(ierr);
2047 
2048   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2049   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2050   PetscFunctionReturn(0);
2051 }
2052 
2053 #undef __FUNCT__
2054 #define __FUNCT__ "MatEqual_MPIDense"
2055 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscTruth *flag)
2056 {
2057   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
2058   Mat            a,b;
2059   PetscTruth     flg;
2060   PetscErrorCode ierr;
2061 
2062   PetscFunctionBegin;
2063   a = matA->A;
2064   b = matB->A;
2065   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
2066   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
2067   PetscFunctionReturn(0);
2068 }
2069 
2070 #if defined(PETSC_HAVE_PLAPACK)
2071 
2072 #undef __FUNCT__
2073 #define __FUNCT__ "PetscPLAPACKFinalizePackage"
2074 /*@C
2075   PetscPLAPACKFinalizePackage - This function destroys everything in the Petsc interface to PLAPACK.
2076   Level: developer
2077 
2078 .keywords: Petsc, destroy, package, PLAPACK
2079 .seealso: PetscFinalize()
2080 @*/
2081 PetscErrorCode PETSC_DLLEXPORT PetscPLAPACKFinalizePackage(void)
2082 {
2083   PetscErrorCode ierr;
2084 
2085   PetscFunctionBegin;
2086   ierr = PLA_Finalize();CHKERRQ(ierr);
2087   PetscFunctionReturn(0);
2088 }
2089 
2090 #undef __FUNCT__
2091 #define __FUNCT__ "PetscPLAPACKInitializePackage"
2092 /*@C
2093   PetscPLAPACKInitializePackage - This function initializes everything in the Petsc interface to PLAPACK. It is
2094   called from MatCreate_MPIDense() the first time an MPI dense matrix is called.
2095 
2096   Input Parameter:
2097 .   comm - the communicator the matrix lives on
2098 
2099   Level: developer
2100 
2101    Notes: PLAPACK does not have a good fit with MPI communicators; all (parallel) PLAPACK objects have to live in the
2102   same communicator (because there is some global state that is initialized and used for all matrices). In addition if
2103   PLAPACK is initialized (that is the initial matrices created) are on subcommunicators of MPI_COMM_WORLD, these subcommunicators
2104   cannot overlap.
2105 
2106 .keywords: Petsc, initialize, package, PLAPACK
2107 .seealso: PetscInitializePackage(), PetscInitialize()
2108 @*/
2109 PetscErrorCode PETSC_DLLEXPORT PetscPLAPACKInitializePackage(MPI_Comm comm)
2110 {
2111   PetscMPIInt    size;
2112   PetscErrorCode ierr;
2113 
2114   PetscFunctionBegin;
2115   if (!PLA_Initialized(PETSC_NULL)) {
2116 
2117     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2118     Plapack_nprows = 1;
2119     Plapack_npcols = size;
2120 
2121     ierr = PetscOptionsBegin(comm,PETSC_NULL,"PLAPACK Options","Mat");CHKERRQ(ierr);
2122       ierr = PetscOptionsInt("-mat_plapack_nprows","row dimension of 2D processor mesh","None",Plapack_nprows,&Plapack_nprows,PETSC_NULL);CHKERRQ(ierr);
2123       ierr = PetscOptionsInt("-mat_plapack_npcols","column dimension of 2D processor mesh","None",Plapack_npcols,&Plapack_npcols,PETSC_NULL);CHKERRQ(ierr);
2124 #if defined(PETSC_USE_DEBUG)
2125       Plapack_ierror = 3;
2126 #else
2127       Plapack_ierror = 0;
2128 #endif
2129       ierr = PetscOptionsInt("-mat_plapack_ckerror","error checking flag","None",Plapack_ierror,&Plapack_ierror,PETSC_NULL);CHKERRQ(ierr);
2130       if (Plapack_ierror){
2131 	ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_TRUE,PETSC_TRUE,PETSC_FALSE );CHKERRQ(ierr);
2132       } else {
2133 	ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_FALSE,PETSC_FALSE,PETSC_FALSE );CHKERRQ(ierr);
2134       }
2135 
2136       Plapack_nb_alg = 0;
2137       ierr = PetscOptionsInt("-mat_plapack_nb_alg","algorithmic block size","None",Plapack_nb_alg,&Plapack_nb_alg,PETSC_NULL);CHKERRQ(ierr);
2138       if (Plapack_nb_alg) {
2139         ierr = pla_Environ_set_nb_alg (PLA_OP_ALL_ALG,Plapack_nb_alg);CHKERRQ(ierr);
2140       }
2141     PetscOptionsEnd();
2142 
2143     ierr = PLA_Comm_1D_to_2D(comm,Plapack_nprows,Plapack_npcols,&Plapack_comm_2d);CHKERRQ(ierr);
2144     ierr = PLA_Init(Plapack_comm_2d);CHKERRQ(ierr);
2145     ierr = PetscRegisterFinalize(PetscPLAPACKFinalizePackage);CHKERRQ(ierr);
2146   }
2147   PetscFunctionReturn(0);
2148 }
2149 
2150 #endif
2151