xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision e3caeda681d93b7b1d053090fe6dee7657faa56d)
1 #define PETSCMAT_DLL
2 
3 /*
4    Basic functions for basic parallel dense matrices.
5 */
6 
7 
8 #include "../src/mat/impls/dense/mpi/mpidense.h"    /*I   "petscmat.h"  I*/
9 #if defined(PETSC_HAVE_PLAPACK)
10 static PetscMPIInt Plapack_nprows,Plapack_npcols,Plapack_ierror,Plapack_nb_alg;
11 static MPI_Comm Plapack_comm_2d;
12 #endif
13 
14 #undef __FUNCT__
15 #define __FUNCT__ "MatDenseGetLocalMatrix"
16 /*@
17 
18       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
19               matrix that represents the operator. For sequential matrices it returns itself.
20 
21     Input Parameter:
22 .      A - the Seq or MPI dense matrix
23 
24     Output Parameter:
25 .      B - the inner matrix
26 
27     Level: intermediate
28 
29 @*/
30 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
31 {
32   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
33   PetscErrorCode ierr;
34   PetscTruth     flg;
35 
36   PetscFunctionBegin;
37   ierr = PetscTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
38   if (flg) {
39     *B = mat->A;
40   } else {
41     *B = A;
42   }
43   PetscFunctionReturn(0);
44 }
45 
46 #undef __FUNCT__
47 #define __FUNCT__ "MatGetRow_MPIDense"
48 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
49 {
50   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
51   PetscErrorCode ierr;
52   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
53 
54   PetscFunctionBegin;
55   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows")
56   lrow = row - rstart;
57   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr);
58   PetscFunctionReturn(0);
59 }
60 
61 #undef __FUNCT__
62 #define __FUNCT__ "MatRestoreRow_MPIDense"
63 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
64 {
65   PetscErrorCode ierr;
66 
67   PetscFunctionBegin;
68   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
69   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
70   PetscFunctionReturn(0);
71 }
72 
73 EXTERN_C_BEGIN
74 #undef __FUNCT__
75 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense"
76 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B)
77 {
78   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
79   PetscErrorCode ierr;
80   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
81   PetscScalar    *array;
82   MPI_Comm       comm;
83 
84   PetscFunctionBegin;
85   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported.");
86 
87   /* The reuse aspect is not implemented efficiently */
88   if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);}
89 
90   ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
91   ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr);
92   ierr = MatCreate(comm,B);CHKERRQ(ierr);
93   ierr = MatSetSizes(*B,m,m,m,m);CHKERRQ(ierr);
94   ierr = MatSetType(*B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
95   ierr = MatSeqDenseSetPreallocation(*B,array+m*rstart);CHKERRQ(ierr);
96   ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr);
97   ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
98   ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
99 
100   *iscopy = PETSC_TRUE;
101   PetscFunctionReturn(0);
102 }
103 EXTERN_C_END
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatSetValues_MPIDense"
107 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
108 {
109   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
110   PetscErrorCode ierr;
111   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
112   PetscTruth     roworiented = A->roworiented;
113 
114   PetscFunctionBegin;
115   for (i=0; i<m; i++) {
116     if (idxm[i] < 0) continue;
117     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
118     if (idxm[i] >= rstart && idxm[i] < rend) {
119       row = idxm[i] - rstart;
120       if (roworiented) {
121         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
122       } else {
123         for (j=0; j<n; j++) {
124           if (idxn[j] < 0) continue;
125           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
126           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
127         }
128       }
129     } else {
130       if (!A->donotstash) {
131         if (roworiented) {
132           ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr);
133         } else {
134           ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr);
135         }
136       }
137     }
138   }
139   PetscFunctionReturn(0);
140 }
141 
142 #undef __FUNCT__
143 #define __FUNCT__ "MatGetValues_MPIDense"
144 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
145 {
146   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
147   PetscErrorCode ierr;
148   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
149 
150   PetscFunctionBegin;
151   for (i=0; i<m; i++) {
152     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
153     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
154     if (idxm[i] >= rstart && idxm[i] < rend) {
155       row = idxm[i] - rstart;
156       for (j=0; j<n; j++) {
157         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
158         if (idxn[j] >= mat->cmap->N) {
159           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
160         }
161         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
162       }
163     } else {
164       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
165     }
166   }
167   PetscFunctionReturn(0);
168 }
169 
170 #undef __FUNCT__
171 #define __FUNCT__ "MatGetArray_MPIDense"
172 PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[])
173 {
174   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
175   PetscErrorCode ierr;
176 
177   PetscFunctionBegin;
178   ierr = MatGetArray(a->A,array);CHKERRQ(ierr);
179   PetscFunctionReturn(0);
180 }
181 
182 #undef __FUNCT__
183 #define __FUNCT__ "MatGetSubMatrix_MPIDense"
184 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B)
185 {
186   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data,*newmatd;
187   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
188   PetscErrorCode ierr;
189   PetscInt       i,j,rstart,rend,nrows,ncols,nlrows,nlcols;
190   const PetscInt *irow,*icol;
191   PetscScalar    *av,*bv,*v = lmat->v;
192   Mat            newmat;
193 
194   PetscFunctionBegin;
195   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
196   ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr);
197   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
198   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
199 
200   /* No parallel redistribution currently supported! Should really check each index set
201      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
202      original matrix! */
203 
204   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
205   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
206 
207   /* Check submatrix call */
208   if (scall == MAT_REUSE_MATRIX) {
209     /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
210     /* Really need to test rows and column sizes! */
211     newmat = *B;
212   } else {
213     /* Create and fill new matrix */
214     ierr = MatCreate(((PetscObject)A)->comm,&newmat);CHKERRQ(ierr);
215     ierr = MatSetSizes(newmat,nrows,cs,PETSC_DECIDE,ncols);CHKERRQ(ierr);
216     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
217     ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr);
218   }
219 
220   /* Now extract the data pointers and do the copy, column at a time */
221   newmatd = (Mat_MPIDense*)newmat->data;
222   bv      = ((Mat_SeqDense *)newmatd->A->data)->v;
223 
224   for (i=0; i<ncols; i++) {
225     av = v + ((Mat_SeqDense *)mat->A->data)->lda*icol[i];
226     for (j=0; j<nrows; j++) {
227       *bv++ = av[irow[j] - rstart];
228     }
229   }
230 
231   /* Assemble the matrices so that the correct flags are set */
232   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
233   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
234 
235   /* Free work space */
236   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
237   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
238   *B = newmat;
239   PetscFunctionReturn(0);
240 }
241 
242 #undef __FUNCT__
243 #define __FUNCT__ "MatRestoreArray_MPIDense"
244 PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[])
245 {
246   PetscFunctionBegin;
247   PetscFunctionReturn(0);
248 }
249 
250 #undef __FUNCT__
251 #define __FUNCT__ "MatAssemblyBegin_MPIDense"
252 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
253 {
254   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
255   MPI_Comm       comm = ((PetscObject)mat)->comm;
256   PetscErrorCode ierr;
257   PetscInt       nstash,reallocs;
258   InsertMode     addv;
259 
260   PetscFunctionBegin;
261   /* make sure all processors are either in INSERTMODE or ADDMODE */
262   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
263   if (addv == (ADD_VALUES|INSERT_VALUES)) {
264     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
265   }
266   mat->insertmode = addv; /* in case this processor had no cache */
267 
268   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
269   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
270   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
271   PetscFunctionReturn(0);
272 }
273 
274 #undef __FUNCT__
275 #define __FUNCT__ "MatAssemblyEnd_MPIDense"
276 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
277 {
278   Mat_MPIDense    *mdn=(Mat_MPIDense*)mat->data;
279   PetscErrorCode  ierr;
280   PetscInt        i,*row,*col,flg,j,rstart,ncols;
281   PetscMPIInt     n;
282   PetscScalar     *val;
283   InsertMode      addv=mat->insertmode;
284 
285   PetscFunctionBegin;
286   /*  wait on receives */
287   while (1) {
288     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
289     if (!flg) break;
290 
291     for (i=0; i<n;) {
292       /* Now identify the consecutive vals belonging to the same row */
293       for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
294       if (j < n) ncols = j-i;
295       else       ncols = n-i;
296       /* Now assemble all these values with a single function call */
297       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
298       i = j;
299     }
300   }
301   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
302 
303   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
304   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
305 
306   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
307     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
308   }
309   PetscFunctionReturn(0);
310 }
311 
312 #undef __FUNCT__
313 #define __FUNCT__ "MatZeroEntries_MPIDense"
314 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
315 {
316   PetscErrorCode ierr;
317   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
318 
319   PetscFunctionBegin;
320   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
321   PetscFunctionReturn(0);
322 }
323 
324 /* the code does not do the diagonal entries correctly unless the
325    matrix is square and the column and row owerships are identical.
326    This is a BUG. The only way to fix it seems to be to access
327    mdn->A and mdn->B directly and not through the MatZeroRows()
328    routine.
329 */
330 #undef __FUNCT__
331 #define __FUNCT__ "MatZeroRows_MPIDense"
332 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
333 {
334   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
335   PetscErrorCode ierr;
336   PetscInt       i,*owners = A->rmap->range;
337   PetscInt       *nprocs,j,idx,nsends;
338   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
339   PetscInt       *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source;
340   PetscInt       *lens,*lrows,*values;
341   PetscMPIInt    n,imdex,rank = l->rank,size = l->size;
342   MPI_Comm       comm = ((PetscObject)A)->comm;
343   MPI_Request    *send_waits,*recv_waits;
344   MPI_Status     recv_status,*send_status;
345   PetscTruth     found;
346 
347   PetscFunctionBegin;
348   /*  first count number of contributors to each processor */
349   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
350   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
351   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
352   for (i=0; i<N; i++) {
353     idx = rows[i];
354     found = PETSC_FALSE;
355     for (j=0; j<size; j++) {
356       if (idx >= owners[j] && idx < owners[j+1]) {
357         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
358       }
359     }
360     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
361   }
362   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
363 
364   /* inform other processors of number of messages and max length*/
365   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
366 
367   /* post receives:   */
368   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
369   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
370   for (i=0; i<nrecvs; i++) {
371     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
372   }
373 
374   /* do sends:
375       1) starts[i] gives the starting index in svalues for stuff going to
376          the ith processor
377   */
378   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
379   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
380   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
381   starts[0]  = 0;
382   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
383   for (i=0; i<N; i++) {
384     svalues[starts[owner[i]]++] = rows[i];
385   }
386 
387   starts[0] = 0;
388   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
389   count = 0;
390   for (i=0; i<size; i++) {
391     if (nprocs[2*i+1]) {
392       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
393     }
394   }
395   ierr = PetscFree(starts);CHKERRQ(ierr);
396 
397   base = owners[rank];
398 
399   /*  wait on receives */
400   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
401   source = lens + nrecvs;
402   count  = nrecvs; slen = 0;
403   while (count) {
404     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
405     /* unpack receives into our local space */
406     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
407     source[imdex]  = recv_status.MPI_SOURCE;
408     lens[imdex]    = n;
409     slen += n;
410     count--;
411   }
412   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
413 
414   /* move the data into the send scatter */
415   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
416   count = 0;
417   for (i=0; i<nrecvs; i++) {
418     values = rvalues + i*nmax;
419     for (j=0; j<lens[i]; j++) {
420       lrows[count++] = values[j] - base;
421     }
422   }
423   ierr = PetscFree(rvalues);CHKERRQ(ierr);
424   ierr = PetscFree(lens);CHKERRQ(ierr);
425   ierr = PetscFree(owner);CHKERRQ(ierr);
426   ierr = PetscFree(nprocs);CHKERRQ(ierr);
427 
428   /* actually zap the local rows */
429   ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
430   ierr = PetscFree(lrows);CHKERRQ(ierr);
431 
432   /* wait on sends */
433   if (nsends) {
434     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
435     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
436     ierr = PetscFree(send_status);CHKERRQ(ierr);
437   }
438   ierr = PetscFree(send_waits);CHKERRQ(ierr);
439   ierr = PetscFree(svalues);CHKERRQ(ierr);
440 
441   PetscFunctionReturn(0);
442 }
443 
444 #undef __FUNCT__
445 #define __FUNCT__ "MatMult_MPIDense"
446 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
447 {
448   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
449   PetscErrorCode ierr;
450 
451   PetscFunctionBegin;
452   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
453   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
454   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
455   PetscFunctionReturn(0);
456 }
457 
458 #undef __FUNCT__
459 #define __FUNCT__ "MatMultAdd_MPIDense"
460 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
461 {
462   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
463   PetscErrorCode ierr;
464 
465   PetscFunctionBegin;
466   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
467   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
468   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
469   PetscFunctionReturn(0);
470 }
471 
472 #undef __FUNCT__
473 #define __FUNCT__ "MatMultTranspose_MPIDense"
474 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
475 {
476   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
477   PetscErrorCode ierr;
478   PetscScalar    zero = 0.0;
479 
480   PetscFunctionBegin;
481   ierr = VecSet(yy,zero);CHKERRQ(ierr);
482   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
483   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
484   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
485   PetscFunctionReturn(0);
486 }
487 
488 #undef __FUNCT__
489 #define __FUNCT__ "MatMultTransposeAdd_MPIDense"
490 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
491 {
492   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
493   PetscErrorCode ierr;
494 
495   PetscFunctionBegin;
496   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
497   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
498   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
499   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
500   PetscFunctionReturn(0);
501 }
502 
503 #undef __FUNCT__
504 #define __FUNCT__ "MatGetDiagonal_MPIDense"
505 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
506 {
507   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
508   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
509   PetscErrorCode ierr;
510   PetscInt       len,i,n,m = A->rmap->n,radd;
511   PetscScalar    *x,zero = 0.0;
512 
513   PetscFunctionBegin;
514   ierr = VecSet(v,zero);CHKERRQ(ierr);
515   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
516   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
517   if (n != A->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
518   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
519   radd = A->rmap->rstart*m;
520   for (i=0; i<len; i++) {
521     x[i] = aloc->v[radd + i*m + i];
522   }
523   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
524   PetscFunctionReturn(0);
525 }
526 
527 #undef __FUNCT__
528 #define __FUNCT__ "MatDestroy_MPIDense"
529 PetscErrorCode MatDestroy_MPIDense(Mat mat)
530 {
531   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
532   PetscErrorCode ierr;
533 #if defined(PETSC_HAVE_PLAPACK)
534   Mat_Plapack   *lu=(Mat_Plapack*)(mat->spptr);
535 #endif
536 
537   PetscFunctionBegin;
538 
539 #if defined(PETSC_USE_LOG)
540   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
541 #endif
542   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
543   ierr = MatDestroy(mdn->A);CHKERRQ(ierr);
544   if (mdn->lvec)   {ierr = VecDestroy(mdn->lvec);CHKERRQ(ierr);}
545   if (mdn->Mvctx)  {ierr = VecScatterDestroy(mdn->Mvctx);CHKERRQ(ierr);}
546 #if defined(PETSC_HAVE_PLAPACK)
547   if (lu) {
548     ierr = PLA_Obj_free(&lu->A);CHKERRQ(ierr);
549     ierr = PLA_Obj_free (&lu->pivots);CHKERRQ(ierr);
550     ierr = PLA_Temp_free(&lu->templ);CHKERRQ(ierr);
551 
552     if (lu->is_pla) {
553       ierr = ISDestroy(lu->is_pla);CHKERRQ(ierr);
554       ierr = ISDestroy(lu->is_petsc);CHKERRQ(ierr);
555       ierr = VecScatterDestroy(lu->ctx);CHKERRQ(ierr);
556     }
557   }
558 #endif
559 
560   ierr = PetscFree(mdn);CHKERRQ(ierr);
561   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
562   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
563   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
564   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
565   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
566   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
567   PetscFunctionReturn(0);
568 }
569 
570 #undef __FUNCT__
571 #define __FUNCT__ "MatView_MPIDense_Binary"
572 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
573 {
574   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
575   PetscErrorCode    ierr;
576   PetscViewerFormat format;
577   int               fd;
578   PetscInt          header[4],mmax,N = mat->cmap->N,i,j,m,k;
579   PetscMPIInt       rank,tag  = ((PetscObject)viewer)->tag,size;
580   PetscScalar       *work,*v,*vv;
581   Mat_SeqDense      *a = (Mat_SeqDense*)mdn->A->data;
582   MPI_Status        status;
583 
584   PetscFunctionBegin;
585   if (mdn->size == 1) {
586     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
587   } else {
588     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
589     ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
590     ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
591 
592     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
593     if (format == PETSC_VIEWER_NATIVE) {
594 
595       if (!rank) {
596         /* store the matrix as a dense matrix */
597         header[0] = MAT_FILE_COOKIE;
598         header[1] = mat->rmap->N;
599         header[2] = N;
600         header[3] = MATRIX_BINARY_FORMAT_DENSE;
601         ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
602 
603         /* get largest work array needed for transposing array */
604         mmax = mat->rmap->n;
605         for (i=1; i<size; i++) {
606           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
607         }
608         ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&work);CHKERRQ(ierr);
609 
610         /* write out local array, by rows */
611         m    = mat->rmap->n;
612         v    = a->v;
613         for (j=0; j<N; j++) {
614           for (i=0; i<m; i++) {
615             work[j + i*N] = *v++;
616           }
617         }
618         ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
619         /* get largest work array to receive messages from other processes, excludes process zero */
620         mmax = 0;
621         for (i=1; i<size; i++) {
622           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
623         }
624         ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&vv);CHKERRQ(ierr);
625         for(k = 1; k < size; k++) {
626           v    = vv;
627           m    = mat->rmap->range[k+1] - mat->rmap->range[k];
628           ierr = MPI_Recv(v,m*N,MPIU_SCALAR,k,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
629 
630           for(j = 0; j < N; j++) {
631             for(i = 0; i < m; i++) {
632               work[j + i*N] = *v++;
633             }
634           }
635           ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
636         }
637         ierr = PetscFree(work);CHKERRQ(ierr);
638         ierr = PetscFree(vv);CHKERRQ(ierr);
639       } else {
640         ierr = MPI_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
641       }
642     } else {
643       SETERRQ(PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerSetFormat(viewer,PETSC_VIEWER_NATIVE");
644     }
645   }
646   PetscFunctionReturn(0);
647 }
648 
649 #undef __FUNCT__
650 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket"
651 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
652 {
653   Mat_MPIDense          *mdn = (Mat_MPIDense*)mat->data;
654   PetscErrorCode        ierr;
655   PetscMPIInt           size = mdn->size,rank = mdn->rank;
656   const PetscViewerType vtype;
657   PetscTruth            iascii,isdraw;
658   PetscViewer           sviewer;
659   PetscViewerFormat     format;
660 #if defined(PETSC_HAVE_PLAPACK)
661   Mat_Plapack           *lu;
662 #endif
663 
664   PetscFunctionBegin;
665   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
666   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
667   if (iascii) {
668     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
669     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
670     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
671       MatInfo info;
672       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
673       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,
674                    (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
675       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
676 #if defined(PETSC_HAVE_PLAPACK)
677       ierr = PetscViewerASCIIPrintf(viewer,"PLAPACK run parameters:\n");CHKERRQ(ierr);
678       ierr = PetscViewerASCIIPrintf(viewer,"  Processor mesh: nprows %d, npcols %d\n",Plapack_nprows, Plapack_npcols);CHKERRQ(ierr);
679       ierr = PetscViewerASCIIPrintf(viewer,"  Error checking: %d\n",Plapack_ierror);CHKERRQ(ierr);
680       ierr = PetscViewerASCIIPrintf(viewer,"  Algorithmic block size: %d\n",Plapack_nb_alg);CHKERRQ(ierr);
681       if (mat->factor){
682         lu=(Mat_Plapack*)(mat->spptr);
683         ierr = PetscViewerASCIIPrintf(viewer,"  Distr. block size nb: %d \n",lu->nb);CHKERRQ(ierr);
684       }
685 #else
686       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
687 #endif
688       PetscFunctionReturn(0);
689     } else if (format == PETSC_VIEWER_ASCII_INFO) {
690       PetscFunctionReturn(0);
691     }
692   } else if (isdraw) {
693     PetscDraw  draw;
694     PetscTruth isnull;
695 
696     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
697     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
698     if (isnull) PetscFunctionReturn(0);
699   }
700 
701   if (size == 1) {
702     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
703   } else {
704     /* assemble the entire matrix onto first processor. */
705     Mat         A;
706     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
707     PetscInt    *cols;
708     PetscScalar *vals;
709 
710     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
711     if (!rank) {
712       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
713     } else {
714       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
715     }
716     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
717     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
718     ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL);
719     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
720 
721     /* Copy the matrix ... This isn't the most efficient means,
722        but it's quick for now */
723     A->insertmode = INSERT_VALUES;
724     row = mat->rmap->rstart; m = mdn->A->rmap->n;
725     for (i=0; i<m; i++) {
726       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
727       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
728       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
729       row++;
730     }
731 
732     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
733     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
734     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
735     if (!rank) {
736       ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
737     }
738     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
739     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
740     ierr = MatDestroy(A);CHKERRQ(ierr);
741   }
742   PetscFunctionReturn(0);
743 }
744 
745 #undef __FUNCT__
746 #define __FUNCT__ "MatView_MPIDense"
747 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
748 {
749   PetscErrorCode ierr;
750   PetscTruth     iascii,isbinary,isdraw,issocket;
751 
752   PetscFunctionBegin;
753 
754   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
755   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
756   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
757   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
758 
759   if (iascii || issocket || isdraw) {
760     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
761   } else if (isbinary) {
762     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
763   } else {
764     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name);
765   }
766   PetscFunctionReturn(0);
767 }
768 
769 #undef __FUNCT__
770 #define __FUNCT__ "MatGetInfo_MPIDense"
771 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
772 {
773   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
774   Mat            mdn = mat->A;
775   PetscErrorCode ierr;
776   PetscReal      isend[5],irecv[5];
777 
778   PetscFunctionBegin;
779   info->block_size     = 1.0;
780   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
781   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
782   isend[3] = info->memory;  isend[4] = info->mallocs;
783   if (flag == MAT_LOCAL) {
784     info->nz_used      = isend[0];
785     info->nz_allocated = isend[1];
786     info->nz_unneeded  = isend[2];
787     info->memory       = isend[3];
788     info->mallocs      = isend[4];
789   } else if (flag == MAT_GLOBAL_MAX) {
790     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr);
791     info->nz_used      = irecv[0];
792     info->nz_allocated = irecv[1];
793     info->nz_unneeded  = irecv[2];
794     info->memory       = irecv[3];
795     info->mallocs      = irecv[4];
796   } else if (flag == MAT_GLOBAL_SUM) {
797     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
798     info->nz_used      = irecv[0];
799     info->nz_allocated = irecv[1];
800     info->nz_unneeded  = irecv[2];
801     info->memory       = irecv[3];
802     info->mallocs      = irecv[4];
803   }
804   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
805   info->fill_ratio_needed = 0;
806   info->factor_mallocs    = 0;
807   PetscFunctionReturn(0);
808 }
809 
810 #undef __FUNCT__
811 #define __FUNCT__ "MatSetOption_MPIDense"
812 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscTruth flg)
813 {
814   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
815   PetscErrorCode ierr;
816 
817   PetscFunctionBegin;
818   switch (op) {
819   case MAT_NEW_NONZERO_LOCATIONS:
820   case MAT_NEW_NONZERO_LOCATION_ERR:
821   case MAT_NEW_NONZERO_ALLOCATION_ERR:
822     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
823     break;
824   case MAT_ROW_ORIENTED:
825     a->roworiented = flg;
826     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
827     break;
828   case MAT_NEW_DIAGONALS:
829   case MAT_USE_HASH_TABLE:
830     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
831     break;
832   case MAT_IGNORE_OFF_PROC_ENTRIES:
833     a->donotstash = flg;
834     break;
835   case MAT_SYMMETRIC:
836   case MAT_STRUCTURALLY_SYMMETRIC:
837   case MAT_HERMITIAN:
838   case MAT_SYMMETRY_ETERNAL:
839   case MAT_IGNORE_LOWER_TRIANGULAR:
840     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
841     break;
842   default:
843     SETERRQ1(PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
844   }
845   PetscFunctionReturn(0);
846 }
847 
848 
849 #undef __FUNCT__
850 #define __FUNCT__ "MatDiagonalScale_MPIDense"
851 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
852 {
853   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
854   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
855   PetscScalar    *l,*r,x,*v;
856   PetscErrorCode ierr;
857   PetscInt       i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n;
858 
859   PetscFunctionBegin;
860   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
861   if (ll) {
862     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
863     if (s2a != s2) SETERRQ2(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
864     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
865     for (i=0; i<m; i++) {
866       x = l[i];
867       v = mat->v + i;
868       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
869     }
870     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
871     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
872   }
873   if (rr) {
874     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
875     if (s3a != s3) SETERRQ2(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
876     ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
877     ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
878     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
879     for (i=0; i<n; i++) {
880       x = r[i];
881       v = mat->v + i*m;
882       for (j=0; j<m; j++) { (*v++) *= x;}
883     }
884     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
885     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
886   }
887   PetscFunctionReturn(0);
888 }
889 
890 #undef __FUNCT__
891 #define __FUNCT__ "MatNorm_MPIDense"
892 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
893 {
894   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
895   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
896   PetscErrorCode ierr;
897   PetscInt       i,j;
898   PetscReal      sum = 0.0;
899   PetscScalar    *v = mat->v;
900 
901   PetscFunctionBegin;
902   if (mdn->size == 1) {
903     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
904   } else {
905     if (type == NORM_FROBENIUS) {
906       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
907 #if defined(PETSC_USE_COMPLEX)
908         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
909 #else
910         sum += (*v)*(*v); v++;
911 #endif
912       }
913       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
914       *nrm = sqrt(*nrm);
915       ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr);
916     } else if (type == NORM_1) {
917       PetscReal *tmp,*tmp2;
918       ierr = PetscMalloc(2*A->cmap->N*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
919       tmp2 = tmp + A->cmap->N;
920       ierr = PetscMemzero(tmp,2*A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
921       *nrm = 0.0;
922       v = mat->v;
923       for (j=0; j<mdn->A->cmap->n; j++) {
924         for (i=0; i<mdn->A->rmap->n; i++) {
925           tmp[j] += PetscAbsScalar(*v);  v++;
926         }
927       }
928       ierr = MPI_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
929       for (j=0; j<A->cmap->N; j++) {
930         if (tmp2[j] > *nrm) *nrm = tmp2[j];
931       }
932       ierr = PetscFree(tmp);CHKERRQ(ierr);
933       ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr);
934     } else if (type == NORM_INFINITY) { /* max row norm */
935       PetscReal ntemp;
936       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
937       ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr);
938     } else {
939       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
940     }
941   }
942   PetscFunctionReturn(0);
943 }
944 
945 #undef __FUNCT__
946 #define __FUNCT__ "MatTranspose_MPIDense"
947 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
948 {
949   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
950   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
951   Mat            B;
952   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
953   PetscErrorCode ierr;
954   PetscInt       j,i;
955   PetscScalar    *v;
956 
957   PetscFunctionBegin;
958   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place");
959   if (reuse == MAT_INITIAL_MATRIX || A == *matout) {
960     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
961     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
962     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
963     ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr);
964   } else {
965     B = *matout;
966   }
967 
968   m = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v;
969   ierr = PetscMalloc(m*sizeof(PetscInt),&rwork);CHKERRQ(ierr);
970   for (i=0; i<m; i++) rwork[i] = rstart + i;
971   for (j=0; j<n; j++) {
972     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
973     v   += m;
974   }
975   ierr = PetscFree(rwork);CHKERRQ(ierr);
976   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
977   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
978   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
979     *matout = B;
980   } else {
981     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
982   }
983   PetscFunctionReturn(0);
984 }
985 
986 #include "petscblaslapack.h"
987 #undef __FUNCT__
988 #define __FUNCT__ "MatScale_MPIDense"
989 PetscErrorCode MatScale_MPIDense(Mat inA,PetscScalar alpha)
990 {
991   Mat_MPIDense   *A = (Mat_MPIDense*)inA->data;
992   Mat_SeqDense   *a = (Mat_SeqDense*)A->A->data;
993   PetscScalar    oalpha = alpha;
994   PetscErrorCode ierr;
995   PetscBLASInt   one = 1,nz = PetscBLASIntCast(inA->rmap->n*inA->cmap->N);
996 
997   PetscFunctionBegin;
998   BLASscal_(&nz,&oalpha,a->v,&one);
999   ierr = PetscLogFlops(nz);CHKERRQ(ierr);
1000   PetscFunctionReturn(0);
1001 }
1002 
1003 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
1004 
1005 #undef __FUNCT__
1006 #define __FUNCT__ "MatSetUpPreallocation_MPIDense"
1007 PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A)
1008 {
1009   PetscErrorCode ierr;
1010 
1011   PetscFunctionBegin;
1012   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
1013   PetscFunctionReturn(0);
1014 }
1015 
1016 #if defined(PETSC_HAVE_PLAPACK)
1017 
1018 #undef __FUNCT__
1019 #define __FUNCT__ "MatMPIDenseCopyToPlapack"
1020 PetscErrorCode MatMPIDenseCopyToPlapack(Mat A,Mat F)
1021 {
1022   Mat_Plapack    *lu = (Mat_Plapack*)(F)->spptr;
1023   PetscErrorCode ierr;
1024   PetscInt       M=A->cmap->N,m=A->rmap->n,rstart;
1025   PetscScalar    *array;
1026   PetscReal      one = 1.0;
1027 
1028   PetscFunctionBegin;
1029   /* Copy A into F->lu->A */
1030   ierr = PLA_Obj_set_to_zero(lu->A);CHKERRQ(ierr);
1031   ierr = PLA_API_begin();CHKERRQ(ierr);
1032   ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr);
1033   ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr);
1034   ierr = MatGetArray(A,&array);CHKERRQ(ierr);
1035   ierr = PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);CHKERRQ(ierr);
1036   ierr = MatRestoreArray(A,&array);CHKERRQ(ierr);
1037   ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr);
1038   ierr = PLA_API_end();CHKERRQ(ierr);
1039   lu->rstart = rstart;
1040   PetscFunctionReturn(0);
1041 }
1042 
1043 #undef __FUNCT__
1044 #define __FUNCT__ "MatMPIDenseCopyFromPlapack"
1045 PetscErrorCode MatMPIDenseCopyFromPlapack(Mat F,Mat A)
1046 {
1047   Mat_Plapack    *lu = (Mat_Plapack*)(F)->spptr;
1048   PetscErrorCode ierr;
1049   PetscInt       M=A->cmap->N,m=A->rmap->n,rstart;
1050   PetscScalar    *array;
1051   PetscReal      one = 1.0;
1052 
1053   PetscFunctionBegin;
1054   /* Copy F into A->lu->A */
1055   ierr = MatZeroEntries(A);CHKERRQ(ierr);
1056   ierr = PLA_API_begin();CHKERRQ(ierr);
1057   ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr);
1058   ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr);
1059   ierr = MatGetArray(A,&array);CHKERRQ(ierr);
1060   ierr = PLA_API_axpy_global_to_matrix(m,M, &one,lu->A,rstart,0,(void *)array,m);CHKERRQ(ierr);
1061   ierr = MatRestoreArray(A,&array);CHKERRQ(ierr);
1062   ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr);
1063   ierr = PLA_API_end();CHKERRQ(ierr);
1064   lu->rstart = rstart;
1065   PetscFunctionReturn(0);
1066 }
1067 
1068 #undef __FUNCT__
1069 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIDense"
1070 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1071 {
1072   PetscErrorCode ierr;
1073   Mat_Plapack    *luA = (Mat_Plapack*)A->spptr;
1074   Mat_Plapack    *luB = (Mat_Plapack*)B->spptr;
1075   Mat_Plapack    *luC = (Mat_Plapack*)C->spptr;
1076   PLA_Obj        alpha = NULL,beta = NULL;
1077 
1078   PetscFunctionBegin;
1079   ierr = MatMPIDenseCopyToPlapack(A,A);CHKERRQ(ierr);
1080   ierr = MatMPIDenseCopyToPlapack(B,B);CHKERRQ(ierr);
1081 
1082   /*
1083   ierr = PLA_Global_show("A = ",luA->A,"%g ","");CHKERRQ(ierr);
1084   ierr = PLA_Global_show("B = ",luB->A,"%g ","");CHKERRQ(ierr);
1085   */
1086 
1087   /* do the multiply in PLA  */
1088   ierr = PLA_Create_constants_conf_to(luA->A,NULL,NULL,&alpha);CHKERRQ(ierr);
1089   ierr = PLA_Create_constants_conf_to(luC->A,NULL,&beta,NULL);CHKERRQ(ierr);
1090   CHKMEMQ;
1091 
1092   ierr = PLA_Gemm(PLA_NO_TRANSPOSE,PLA_NO_TRANSPOSE,alpha,luA->A,luB->A,beta,luC->A); /* CHKERRQ(ierr); */
1093   CHKMEMQ;
1094   ierr = PLA_Obj_free(&alpha);CHKERRQ(ierr);
1095   ierr = PLA_Obj_free(&beta);CHKERRQ(ierr);
1096 
1097   /*
1098   ierr = PLA_Global_show("C = ",luC->A,"%g ","");CHKERRQ(ierr);
1099   */
1100   ierr = MatMPIDenseCopyFromPlapack(C,C);CHKERRQ(ierr);
1101   PetscFunctionReturn(0);
1102 }
1103 
1104 #undef __FUNCT__
1105 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIDense"
1106 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1107 {
1108   PetscErrorCode ierr;
1109   PetscInt       m=A->rmap->n,n=B->cmap->n;
1110   Mat            Cmat;
1111 
1112   PetscFunctionBegin;
1113   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
1114   SETERRQ(PETSC_ERR_LIB,"Due to aparent bugs in PLAPACK,this is not currently supported");
1115   ierr = MatCreate(((PetscObject)B)->comm,&Cmat);CHKERRQ(ierr);
1116   ierr = MatSetSizes(Cmat,m,n,A->rmap->N,B->cmap->N);CHKERRQ(ierr);
1117   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
1118   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1119   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1120 
1121   *C = Cmat;
1122   PetscFunctionReturn(0);
1123 }
1124 
1125 #undef __FUNCT__
1126 #define __FUNCT__ "MatMatMult_MPIDense_MPIDense"
1127 PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
1128 {
1129   PetscErrorCode ierr;
1130 
1131   PetscFunctionBegin;
1132   if (scall == MAT_INITIAL_MATRIX){
1133     ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr);
1134   }
1135   ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr);
1136   PetscFunctionReturn(0);
1137 }
1138 
1139 #undef __FUNCT__
1140 #define __FUNCT__ "MatSolve_MPIDense"
1141 PetscErrorCode MatSolve_MPIDense(Mat A,Vec b,Vec x)
1142 {
1143   MPI_Comm       comm = ((PetscObject)A)->comm;
1144   Mat_Plapack    *lu = (Mat_Plapack*)A->spptr;
1145   PetscErrorCode ierr;
1146   PetscInt       M=A->rmap->N,m=A->rmap->n,rstart,i,j,*idx_pla,*idx_petsc,loc_m,loc_stride;
1147   PetscScalar    *array;
1148   PetscReal      one = 1.0;
1149   PetscMPIInt    size,rank,r_rank,r_nproc,c_rank,c_nproc;;
1150   PLA_Obj        v_pla = NULL;
1151   PetscScalar    *loc_buf;
1152   Vec            loc_x;
1153 
1154   PetscFunctionBegin;
1155   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1156   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1157 
1158   /* Create PLAPACK vector objects, then copy b into PLAPACK b */
1159   PLA_Mvector_create(lu->datatype,M,1,lu->templ,PLA_ALIGN_FIRST,&v_pla);
1160   PLA_Obj_set_to_zero(v_pla);
1161 
1162   /* Copy b into rhs_pla */
1163   PLA_API_begin();
1164   PLA_Obj_API_open(v_pla);
1165   ierr = VecGetArray(b,&array);CHKERRQ(ierr);
1166   PLA_API_axpy_vector_to_global(m,&one,(void *)array,1,v_pla,lu->rstart);
1167   ierr = VecRestoreArray(b,&array);CHKERRQ(ierr);
1168   PLA_Obj_API_close(v_pla);
1169   PLA_API_end();
1170 
1171   if (A->factor == MAT_FACTOR_LU){
1172     /* Apply the permutations to the right hand sides */
1173     PLA_Apply_pivots_to_rows (v_pla,lu->pivots);
1174 
1175     /* Solve L y = b, overwriting b with y */
1176     PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_UNIT_DIAG,lu->A,v_pla );
1177 
1178     /* Solve U x = y (=b), overwriting b with x */
1179     PLA_Trsv( PLA_UPPER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla );
1180   } else { /* MAT_FACTOR_CHOLESKY */
1181     PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla);
1182     PLA_Trsv( PLA_LOWER_TRIANGULAR,(lu->datatype == MPI_DOUBLE ? PLA_TRANSPOSE : PLA_CONJUGATE_TRANSPOSE),
1183                                     PLA_NONUNIT_DIAG,lu->A,v_pla);
1184   }
1185 
1186   /* Copy PLAPACK x into Petsc vector x  */
1187   PLA_Obj_local_length(v_pla, &loc_m);
1188   PLA_Obj_local_buffer(v_pla, (void**)&loc_buf);
1189   PLA_Obj_local_stride(v_pla, &loc_stride);
1190   /*
1191     PetscPrintf(PETSC_COMM_SELF," [%d] b - local_m %d local_stride %d, loc_buf: %g %g, nb: %d\n",rank,loc_m,loc_stride,loc_buf[0],loc_buf[(loc_m-1)*loc_stride],lu->nb);
1192   */
1193   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,loc_m*loc_stride,loc_buf,&loc_x);CHKERRQ(ierr);
1194   if (!lu->pla_solved){
1195 
1196     PLA_Temp_comm_row_info(lu->templ,&Plapack_comm_2d,&r_rank,&r_nproc);
1197     PLA_Temp_comm_col_info(lu->templ,&Plapack_comm_2d,&c_rank,&c_nproc);
1198 
1199     /* Create IS and cts for VecScatterring */
1200     PLA_Obj_local_length(v_pla, &loc_m);
1201     PLA_Obj_local_stride(v_pla, &loc_stride);
1202     ierr = PetscMalloc((2*loc_m+1)*sizeof(PetscInt),&idx_pla);CHKERRQ(ierr);
1203     idx_petsc = idx_pla + loc_m;
1204 
1205     rstart = (r_rank*c_nproc+c_rank)*lu->nb;
1206     for (i=0; i<loc_m; i+=lu->nb){
1207       j = 0;
1208       while (j < lu->nb && i+j < loc_m){
1209         idx_petsc[i+j] = rstart + j; j++;
1210       }
1211       rstart += size*lu->nb;
1212     }
1213 
1214     for (i=0; i<loc_m; i++) idx_pla[i] = i*loc_stride;
1215 
1216     ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_pla,&lu->is_pla);CHKERRQ(ierr);
1217     ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_petsc,&lu->is_petsc);CHKERRQ(ierr);
1218     ierr = PetscFree(idx_pla);CHKERRQ(ierr);
1219     ierr = VecScatterCreate(loc_x,lu->is_pla,x,lu->is_petsc,&lu->ctx);CHKERRQ(ierr);
1220   }
1221   ierr = VecScatterBegin(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1222   ierr = VecScatterEnd(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1223 
1224   /* Free data */
1225   ierr = VecDestroy(loc_x);CHKERRQ(ierr);
1226   PLA_Obj_free(&v_pla);
1227 
1228   lu->pla_solved = PETSC_TRUE;
1229   PetscFunctionReturn(0);
1230 }
1231 
1232 #undef __FUNCT__
1233 #define __FUNCT__ "MatLUFactorNumeric_MPIDense"
1234 PetscErrorCode MatLUFactorNumeric_MPIDense(Mat F,Mat A,const MatFactorInfo *info)
1235 {
1236   Mat_Plapack    *lu = (Mat_Plapack*)(F)->spptr;
1237   PetscErrorCode ierr;
1238   PetscInt       M=A->rmap->N,m=A->rmap->n,rstart,rend;
1239   PetscInt       info_pla=0;
1240   PetscScalar    *array,one = 1.0;
1241 
1242   PetscFunctionBegin;
1243   if (lu->mstruct == SAME_NONZERO_PATTERN){
1244     PLA_Obj_free(&lu->A);
1245     PLA_Obj_free (&lu->pivots);
1246   }
1247   /* Create PLAPACK matrix object */
1248   lu->A = NULL; lu->pivots = NULL;
1249   PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);
1250   PLA_Obj_set_to_zero(lu->A);
1251   PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots);
1252 
1253   /* Copy A into lu->A */
1254   PLA_API_begin();
1255   PLA_Obj_API_open(lu->A);
1256   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
1257   ierr = MatGetArray(A,&array);CHKERRQ(ierr);
1258   PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);
1259   ierr = MatRestoreArray(A,&array);CHKERRQ(ierr);
1260   PLA_Obj_API_close(lu->A);
1261   PLA_API_end();
1262 
1263   /* Factor P A -> L U overwriting lower triangular portion of A with L, upper, U */
1264   info_pla = PLA_LU(lu->A,lu->pivots);
1265   if (info_pla != 0)
1266     SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot encountered at row %d from PLA_LU()",info_pla);
1267 
1268   lu->rstart         = rstart;
1269   lu->mstruct        = SAME_NONZERO_PATTERN;
1270   F->ops->solve      = MatSolve_MPIDense;
1271   F->assembled       = PETSC_TRUE;  /* required by -ksp_view */
1272   PetscFunctionReturn(0);
1273 }
1274 
1275 #undef __FUNCT__
1276 #define __FUNCT__ "MatCholeskyFactorNumeric_MPIDense"
1277 PetscErrorCode MatCholeskyFactorNumeric_MPIDense(Mat F,Mat A,const MatFactorInfo *info)
1278 {
1279   Mat_Plapack    *lu = (Mat_Plapack*)F->spptr;
1280   PetscErrorCode ierr;
1281   PetscInt       M=A->rmap->N,m=A->rmap->n,rstart,rend;
1282   PetscInt       info_pla=0;
1283   PetscScalar    *array,one = 1.0;
1284 
1285   PetscFunctionBegin;
1286   if (lu->mstruct == SAME_NONZERO_PATTERN){
1287     PLA_Obj_free(&lu->A);
1288   }
1289   /* Create PLAPACK matrix object */
1290   lu->A      = NULL;
1291   lu->pivots = NULL;
1292   PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);
1293 
1294   /* Copy A into lu->A */
1295   PLA_API_begin();
1296   PLA_Obj_API_open(lu->A);
1297   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
1298   ierr = MatGetArray(A,&array);CHKERRQ(ierr);
1299   PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);
1300   ierr = MatRestoreArray(A,&array);CHKERRQ(ierr);
1301   PLA_Obj_API_close(lu->A);
1302   PLA_API_end();
1303 
1304   /* Factor P A -> Chol */
1305   info_pla = PLA_Chol(PLA_LOWER_TRIANGULAR,lu->A);
1306   if (info_pla != 0)
1307     SETERRQ1( PETSC_ERR_MAT_CH_ZRPVT,"Nonpositive definite matrix detected at row %d from PLA_Chol()",info_pla);
1308 
1309   lu->rstart         = rstart;
1310   lu->mstruct        = SAME_NONZERO_PATTERN;
1311   F->ops->solve      = MatSolve_MPIDense;
1312   F->assembled       = PETSC_TRUE;  /* required by -ksp_view */
1313   PetscFunctionReturn(0);
1314 }
1315 
1316 /* Note the Petsc perm permutation is ignored */
1317 #undef __FUNCT__
1318 #define __FUNCT__ "MatCholeskyFactorSymbolic_MPIDense"
1319 PetscErrorCode MatCholeskyFactorSymbolic_MPIDense(Mat F,Mat A,IS perm,const MatFactorInfo *info)
1320 {
1321   PetscErrorCode ierr;
1322   PetscTruth     issymmetric,set;
1323 
1324   PetscFunctionBegin;
1325   ierr = MatIsSymmetricKnown(A,&set,&issymmetric);CHKERRQ(ierr);
1326   if (!set || !issymmetric) SETERRQ(PETSC_ERR_USER,"Matrix must be set as MAT_SYMMETRIC for CholeskyFactor()");
1327   F->ops->choleskyfactornumeric  = MatCholeskyFactorNumeric_MPIDense;
1328   PetscFunctionReturn(0);
1329 }
1330 
1331 /* Note the Petsc r and c permutations are ignored */
1332 #undef __FUNCT__
1333 #define __FUNCT__ "MatLUFactorSymbolic_MPIDense"
1334 PetscErrorCode MatLUFactorSymbolic_MPIDense(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
1335 {
1336   PetscErrorCode ierr;
1337   PetscInt       M = A->rmap->N;
1338   Mat_Plapack    *lu;
1339 
1340   PetscFunctionBegin;
1341   lu = (Mat_Plapack*)F->spptr;
1342   ierr = PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots);CHKERRQ(ierr);
1343   F->ops->lufactornumeric  = MatLUFactorNumeric_MPIDense;
1344   PetscFunctionReturn(0);
1345 }
1346 
1347 EXTERN_C_BEGIN
1348 #undef __FUNCT__
1349 #define __FUNCT__ "MatFactorGetSolverPackage_mpidense_plapack"
1350 PetscErrorCode MatFactorGetSolverPackage_mpidense_plapack(Mat A,const MatSolverPackage *type)
1351 {
1352   PetscFunctionBegin;
1353   *type = MAT_SOLVER_PLAPACK;
1354   PetscFunctionReturn(0);
1355 }
1356 EXTERN_C_END
1357 
1358 #undef __FUNCT__
1359 #define __FUNCT__ "MatGetFactor_mpidense_plapack"
1360 PetscErrorCode MatGetFactor_mpidense_plapack(Mat A,MatFactorType ftype,Mat *F)
1361 {
1362   PetscErrorCode ierr;
1363   Mat_Plapack    *lu;
1364   PetscMPIInt    size;
1365   PetscInt       M=A->rmap->N;
1366 
1367   PetscFunctionBegin;
1368   /* Create the factorization matrix */
1369   ierr = MatCreate(((PetscObject)A)->comm,F);CHKERRQ(ierr);
1370   ierr = MatSetSizes(*F,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1371   ierr = MatSetType(*F,((PetscObject)A)->type_name);CHKERRQ(ierr);
1372   ierr = PetscNewLog(*F,Mat_Plapack,&lu);CHKERRQ(ierr);
1373   (*F)->spptr = (void*)lu;
1374 
1375   /* Set default Plapack parameters */
1376   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
1377   lu->nb = M/size;
1378   if (M - lu->nb*size) lu->nb++; /* without cyclic distribution */
1379 
1380   /* Set runtime options */
1381   ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"PLAPACK Options","Mat");CHKERRQ(ierr);
1382     ierr = PetscOptionsInt("-mat_plapack_nb","block size of template vector","None",lu->nb,&lu->nb,PETSC_NULL);CHKERRQ(ierr);
1383   PetscOptionsEnd();
1384 
1385   /* Create object distribution template */
1386   lu->templ = NULL;
1387   ierr = PLA_Temp_create(lu->nb, 0, &lu->templ);CHKERRQ(ierr);
1388 
1389   /* Set the datatype */
1390 #if defined(PETSC_USE_COMPLEX)
1391   lu->datatype = MPI_DOUBLE_COMPLEX;
1392 #else
1393   lu->datatype = MPI_DOUBLE;
1394 #endif
1395 
1396   ierr = PLA_Matrix_create(lu->datatype,M,A->cmap->N,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);CHKERRQ(ierr);
1397 
1398 
1399   lu->pla_solved     = PETSC_FALSE; /* MatSolve_Plapack() is called yet */
1400   lu->mstruct        = DIFFERENT_NONZERO_PATTERN;
1401 
1402   if (ftype == MAT_FACTOR_LU) {
1403     (*F)->ops->lufactorsymbolic = MatLUFactorSymbolic_MPIDense;
1404   } else if (ftype == MAT_FACTOR_CHOLESKY) {
1405     (*F)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MPIDense;
1406   } else SETERRQ(PETSC_ERR_SUP,"No incomplete factorizations for dense matrices");
1407   (*F)->factor = ftype;
1408   ierr = PetscObjectComposeFunctionDynamic((PetscObject)(*F),"MatFactorGetSolverPackage_C","MatFactorGetSolverPackage_mpidense_plapack",MatFactorGetSolverPackage_mpidense_plapack);CHKERRQ(ierr);
1409   PetscFunctionReturn(0);
1410 }
1411 #endif
1412 
1413 #undef __FUNCT__
1414 #define __FUNCT__ "MatAXPY_MPIDense"
1415 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
1416 {
1417   PetscErrorCode ierr;
1418   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
1419 
1420   PetscFunctionBegin;
1421   ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr);
1422   PetscFunctionReturn(0);
1423 }
1424 
1425 /* -------------------------------------------------------------------*/
1426 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
1427        MatGetRow_MPIDense,
1428        MatRestoreRow_MPIDense,
1429        MatMult_MPIDense,
1430 /* 4*/ MatMultAdd_MPIDense,
1431        MatMultTranspose_MPIDense,
1432        MatMultTransposeAdd_MPIDense,
1433        0,
1434        0,
1435        0,
1436 /*10*/ 0,
1437        0,
1438        0,
1439        0,
1440        MatTranspose_MPIDense,
1441 /*15*/ MatGetInfo_MPIDense,
1442        MatEqual_MPIDense,
1443        MatGetDiagonal_MPIDense,
1444        MatDiagonalScale_MPIDense,
1445        MatNorm_MPIDense,
1446 /*20*/ MatAssemblyBegin_MPIDense,
1447        MatAssemblyEnd_MPIDense,
1448        MatSetOption_MPIDense,
1449        MatZeroEntries_MPIDense,
1450 /*24*/ MatZeroRows_MPIDense,
1451        0,
1452        0,
1453        0,
1454        0,
1455 /*29*/ MatSetUpPreallocation_MPIDense,
1456        0,
1457        0,
1458        MatGetArray_MPIDense,
1459        MatRestoreArray_MPIDense,
1460 /*34*/ MatDuplicate_MPIDense,
1461        0,
1462        0,
1463        0,
1464        0,
1465 /*39*/ MatAXPY_MPIDense,
1466        MatGetSubMatrices_MPIDense,
1467        0,
1468        MatGetValues_MPIDense,
1469        0,
1470 /*44*/ 0,
1471        MatScale_MPIDense,
1472        0,
1473        0,
1474        0,
1475 /*49*/ 0,
1476        0,
1477        0,
1478        0,
1479        0,
1480 /*54*/ 0,
1481        0,
1482        0,
1483        0,
1484        0,
1485 /*59*/ MatGetSubMatrix_MPIDense,
1486        MatDestroy_MPIDense,
1487        MatView_MPIDense,
1488        0,
1489        0,
1490 /*64*/ 0,
1491        0,
1492        0,
1493        0,
1494        0,
1495 /*69*/ 0,
1496        0,
1497        0,
1498        0,
1499        0,
1500 /*74*/ 0,
1501        0,
1502        0,
1503        0,
1504        0,
1505 /*79*/ 0,
1506        0,
1507        0,
1508        0,
1509 /*83*/ MatLoad_MPIDense,
1510        0,
1511        0,
1512        0,
1513        0,
1514        0,
1515 /*89*/
1516 #if defined(PETSC_HAVE_PLAPACK)
1517        MatMatMult_MPIDense_MPIDense,
1518        MatMatMultSymbolic_MPIDense_MPIDense,
1519        MatMatMultNumeric_MPIDense_MPIDense,
1520 #else
1521        0,
1522        0,
1523        0,
1524 #endif
1525        0,
1526 /*94*/ 0,
1527        0,
1528        0,
1529        0};
1530 
1531 EXTERN_C_BEGIN
1532 #undef __FUNCT__
1533 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense"
1534 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1535 {
1536   Mat_MPIDense   *a;
1537   PetscErrorCode ierr;
1538 
1539   PetscFunctionBegin;
1540   mat->preallocated = PETSC_TRUE;
1541   /* Note:  For now, when data is specified above, this assumes the user correctly
1542    allocates the local dense storage space.  We should add error checking. */
1543 
1544   a    = (Mat_MPIDense*)mat->data;
1545   ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1546   ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr);
1547   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
1548   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1549   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1550   PetscFunctionReturn(0);
1551 }
1552 EXTERN_C_END
1553 
1554 /*MC
1555    MAT_SOLVER_PLAPACK = "mpidense" - Parallel LU and Cholesky factorization for MATMPIDENSE matrices
1556 
1557   run config/configure.py with the option --download-plapack
1558 
1559 
1560   Options Database Keys:
1561 . -mat_plapack_nprows <n> - number of rows in processor partition
1562 . -mat_plapack_npcols <n> - number of columns in processor partition
1563 . -mat_plapack_nb <n> - block size of template vector
1564 . -mat_plapack_nb_alg <n> - algorithmic block size
1565 - -mat_plapack_ckerror <n> - error checking flag
1566 
1567 .seealso: MatCreateMPIDense(), MATDENSE, MATSEQDENSE, PCFactorSetSolverPackage(), MatSolverPackage
1568 
1569 M*/
1570 
1571 EXTERN_C_BEGIN
1572 #undef __FUNCT__
1573 #define __FUNCT__ "MatCreate_MPIDense"
1574 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIDense(Mat mat)
1575 {
1576   Mat_MPIDense   *a;
1577   PetscErrorCode ierr;
1578 
1579   PetscFunctionBegin;
1580   ierr              = PetscNewLog(mat,Mat_MPIDense,&a);CHKERRQ(ierr);
1581   mat->data         = (void*)a;
1582   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1583   mat->mapping      = 0;
1584 
1585   mat->insertmode = NOT_SET_VALUES;
1586   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&a->rank);CHKERRQ(ierr);
1587   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&a->size);CHKERRQ(ierr);
1588 
1589   ierr = PetscMapSetBlockSize(mat->rmap,1);CHKERRQ(ierr);
1590   ierr = PetscMapSetBlockSize(mat->cmap,1);CHKERRQ(ierr);
1591   ierr = PetscMapSetUp(mat->rmap);CHKERRQ(ierr);
1592   ierr = PetscMapSetUp(mat->cmap);CHKERRQ(ierr);
1593   a->nvec = mat->cmap->n;
1594 
1595   /* build cache for off array entries formed */
1596   a->donotstash = PETSC_FALSE;
1597   ierr = MatStashCreate_Private(((PetscObject)mat)->comm,1,&mat->stash);CHKERRQ(ierr);
1598 
1599   /* stuff used for matrix vector multiply */
1600   a->lvec        = 0;
1601   a->Mvctx       = 0;
1602   a->roworiented = PETSC_TRUE;
1603 
1604   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
1605                                      "MatGetDiagonalBlock_MPIDense",
1606                                      MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr);
1607   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C",
1608                                      "MatMPIDenseSetPreallocation_MPIDense",
1609                                      MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1610   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",
1611                                      "MatMatMult_MPIAIJ_MPIDense",
1612                                       MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1613   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",
1614                                      "MatMatMultSymbolic_MPIAIJ_MPIDense",
1615                                       MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1616   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",
1617                                      "MatMatMultNumeric_MPIAIJ_MPIDense",
1618                                       MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1619 #if defined(PETSC_HAVE_PLAPACK)
1620   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetFactor_mpidense_plapack_C",
1621                                      "MatGetFactor_mpidense_plapack",
1622                                       MatGetFactor_mpidense_plapack);CHKERRQ(ierr);
1623   ierr = PetscPLAPACKInitializePackage(((PetscObject)mat)->comm);CHKERRQ(ierr);
1624 #endif
1625   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1626 
1627   PetscFunctionReturn(0);
1628 }
1629 EXTERN_C_END
1630 
1631 /*MC
1632    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1633 
1634    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1635    and MATMPIDENSE otherwise.
1636 
1637    Options Database Keys:
1638 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1639 
1640   Level: beginner
1641 
1642 
1643 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1644 M*/
1645 
1646 EXTERN_C_BEGIN
1647 #undef __FUNCT__
1648 #define __FUNCT__ "MatCreate_Dense"
1649 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_Dense(Mat A)
1650 {
1651   PetscErrorCode ierr;
1652   PetscMPIInt    size;
1653 
1654   PetscFunctionBegin;
1655   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
1656   if (size == 1) {
1657     ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr);
1658   } else {
1659     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
1660   }
1661   PetscFunctionReturn(0);
1662 }
1663 EXTERN_C_END
1664 
1665 #undef __FUNCT__
1666 #define __FUNCT__ "MatMPIDenseSetPreallocation"
1667 /*@C
1668    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1669 
1670    Not collective
1671 
1672    Input Parameters:
1673 .  A - the matrix
1674 -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
1675    to control all matrix memory allocation.
1676 
1677    Notes:
1678    The dense format is fully compatible with standard Fortran 77
1679    storage by columns.
1680 
1681    The data input variable is intended primarily for Fortran programmers
1682    who wish to allocate their own matrix memory space.  Most users should
1683    set data=PETSC_NULL.
1684 
1685    Level: intermediate
1686 
1687 .keywords: matrix,dense, parallel
1688 
1689 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1690 @*/
1691 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data)
1692 {
1693   PetscErrorCode ierr,(*f)(Mat,PetscScalar *);
1694 
1695   PetscFunctionBegin;
1696   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1697   if (f) {
1698     ierr = (*f)(mat,data);CHKERRQ(ierr);
1699   }
1700   PetscFunctionReturn(0);
1701 }
1702 
1703 #undef __FUNCT__
1704 #define __FUNCT__ "MatCreateMPIDense"
1705 /*@C
1706    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
1707 
1708    Collective on MPI_Comm
1709 
1710    Input Parameters:
1711 +  comm - MPI communicator
1712 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1713 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1714 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1715 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1716 -  data - optional location of matrix data.  Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1717    to control all matrix memory allocation.
1718 
1719    Output Parameter:
1720 .  A - the matrix
1721 
1722    Notes:
1723    The dense format is fully compatible with standard Fortran 77
1724    storage by columns.
1725 
1726    The data input variable is intended primarily for Fortran programmers
1727    who wish to allocate their own matrix memory space.  Most users should
1728    set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users).
1729 
1730    The user MUST specify either the local or global matrix dimensions
1731    (possibly both).
1732 
1733    Level: intermediate
1734 
1735 .keywords: matrix,dense, parallel
1736 
1737 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1738 @*/
1739 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1740 {
1741   PetscErrorCode ierr;
1742   PetscMPIInt    size;
1743 
1744   PetscFunctionBegin;
1745   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1746   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1747   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1748   if (size > 1) {
1749     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1750     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1751   } else {
1752     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1753     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1754   }
1755   PetscFunctionReturn(0);
1756 }
1757 
1758 #undef __FUNCT__
1759 #define __FUNCT__ "MatDuplicate_MPIDense"
1760 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1761 {
1762   Mat            mat;
1763   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
1764   PetscErrorCode ierr;
1765 
1766   PetscFunctionBegin;
1767   *newmat       = 0;
1768   ierr = MatCreate(((PetscObject)A)->comm,&mat);CHKERRQ(ierr);
1769   ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1770   ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1771   a                 = (Mat_MPIDense*)mat->data;
1772   ierr              = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1773   mat->factor       = A->factor;
1774   mat->assembled    = PETSC_TRUE;
1775   mat->preallocated = PETSC_TRUE;
1776 
1777   mat->rmap->rstart     = A->rmap->rstart;
1778   mat->rmap->rend       = A->rmap->rend;
1779   a->size              = oldmat->size;
1780   a->rank              = oldmat->rank;
1781   mat->insertmode      = NOT_SET_VALUES;
1782   a->nvec              = oldmat->nvec;
1783   a->donotstash        = oldmat->donotstash;
1784 
1785   ierr = PetscMemcpy(mat->rmap->range,A->rmap->range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
1786   ierr = PetscMemcpy(mat->cmap->range,A->cmap->range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
1787   ierr = MatStashCreate_Private(((PetscObject)A)->comm,1,&mat->stash);CHKERRQ(ierr);
1788 
1789   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1790   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1791   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1792 
1793   *newmat = mat;
1794   PetscFunctionReturn(0);
1795 }
1796 
1797 #include "petscsys.h"
1798 
1799 #undef __FUNCT__
1800 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile"
1801 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N, const MatType type,Mat *newmat)
1802 {
1803   PetscErrorCode ierr;
1804   PetscMPIInt    rank,size;
1805   PetscInt       *rowners,i,m,nz,j;
1806   PetscScalar    *array,*vals,*vals_ptr;
1807   MPI_Status     status;
1808 
1809   PetscFunctionBegin;
1810   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1811   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1812 
1813   /* determine ownership of all rows */
1814   m          = M/size + ((M % size) > rank);
1815   ierr       = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1816   ierr       = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
1817   rowners[0] = 0;
1818   for (i=2; i<=size; i++) {
1819     rowners[i] += rowners[i-1];
1820   }
1821 
1822   ierr = MatCreate(comm,newmat);CHKERRQ(ierr);
1823   ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1824   ierr = MatSetType(*newmat,type);CHKERRQ(ierr);
1825   ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr);
1826   ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr);
1827 
1828   if (!rank) {
1829     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1830 
1831     /* read in my part of the matrix numerical values  */
1832     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1833 
1834     /* insert into matrix-by row (this is why cannot directly read into array */
1835     vals_ptr = vals;
1836     for (i=0; i<m; i++) {
1837       for (j=0; j<N; j++) {
1838         array[i + j*m] = *vals_ptr++;
1839       }
1840     }
1841 
1842     /* read in other processors and ship out */
1843     for (i=1; i<size; i++) {
1844       nz   = (rowners[i+1] - rowners[i])*N;
1845       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1846       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(*newmat))->tag,comm);CHKERRQ(ierr);
1847     }
1848   } else {
1849     /* receive numeric values */
1850     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1851 
1852     /* receive message of values*/
1853     ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(*newmat))->tag,comm,&status);CHKERRQ(ierr);
1854 
1855     /* insert into matrix-by row (this is why cannot directly read into array */
1856     vals_ptr = vals;
1857     for (i=0; i<m; i++) {
1858       for (j=0; j<N; j++) {
1859         array[i + j*m] = *vals_ptr++;
1860       }
1861     }
1862   }
1863   ierr = PetscFree(rowners);CHKERRQ(ierr);
1864   ierr = PetscFree(vals);CHKERRQ(ierr);
1865   ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1866   ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1867   PetscFunctionReturn(0);
1868 }
1869 
1870 #undef __FUNCT__
1871 #define __FUNCT__ "MatLoad_MPIDense"
1872 PetscErrorCode MatLoad_MPIDense(PetscViewer viewer,const MatType type,Mat *newmat)
1873 {
1874   Mat            A;
1875   PetscScalar    *vals,*svals;
1876   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1877   MPI_Status     status;
1878   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz;
1879   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1880   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1881   PetscInt       i,nz,j,rstart,rend;
1882   int            fd;
1883   PetscErrorCode ierr;
1884 
1885   PetscFunctionBegin;
1886   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1887   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1888   if (!rank) {
1889     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1890     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1891     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1892   }
1893 
1894   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1895   M = header[1]; N = header[2]; nz = header[3];
1896 
1897   /*
1898        Handle case where matrix is stored on disk as a dense matrix
1899   */
1900   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1901     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,type,newmat);CHKERRQ(ierr);
1902     PetscFunctionReturn(0);
1903   }
1904 
1905   /* determine ownership of all rows */
1906   m          = PetscMPIIntCast(M/size + ((M % size) > rank));
1907   ierr       = PetscMalloc((size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr);
1908   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1909   rowners[0] = 0;
1910   for (i=2; i<=size; i++) {
1911     rowners[i] += rowners[i-1];
1912   }
1913   rstart = rowners[rank];
1914   rend   = rowners[rank+1];
1915 
1916   /* distribute row lengths to all processors */
1917   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&ourlens);CHKERRQ(ierr);
1918   offlens = ourlens + (rend-rstart);
1919   if (!rank) {
1920     ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
1921     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1922     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
1923     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1924     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1925     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1926   } else {
1927     ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1928   }
1929 
1930   if (!rank) {
1931     /* calculate the number of nonzeros on each processor */
1932     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
1933     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1934     for (i=0; i<size; i++) {
1935       for (j=rowners[i]; j< rowners[i+1]; j++) {
1936         procsnz[i] += rowlengths[j];
1937       }
1938     }
1939     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1940 
1941     /* determine max buffer needed and allocate it */
1942     maxnz = 0;
1943     for (i=0; i<size; i++) {
1944       maxnz = PetscMax(maxnz,procsnz[i]);
1945     }
1946     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1947 
1948     /* read in my part of the matrix column indices  */
1949     nz = procsnz[0];
1950     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1951     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1952 
1953     /* read in every one elses and ship off */
1954     for (i=1; i<size; i++) {
1955       nz   = procsnz[i];
1956       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1957       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1958     }
1959     ierr = PetscFree(cols);CHKERRQ(ierr);
1960   } else {
1961     /* determine buffer space needed for message */
1962     nz = 0;
1963     for (i=0; i<m; i++) {
1964       nz += ourlens[i];
1965     }
1966     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1967 
1968     /* receive message of column indices*/
1969     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1970     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1971     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1972   }
1973 
1974   /* loop over local rows, determining number of off diagonal entries */
1975   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
1976   jj = 0;
1977   for (i=0; i<m; i++) {
1978     for (j=0; j<ourlens[i]; j++) {
1979       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1980       jj++;
1981     }
1982   }
1983 
1984   /* create our matrix */
1985   for (i=0; i<m; i++) {
1986     ourlens[i] -= offlens[i];
1987   }
1988   ierr = MatCreate(comm,newmat);CHKERRQ(ierr);
1989   ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1990   ierr = MatSetType(*newmat,type);CHKERRQ(ierr);
1991   ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr);
1992   A = *newmat;
1993   for (i=0; i<m; i++) {
1994     ourlens[i] += offlens[i];
1995   }
1996 
1997   if (!rank) {
1998     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1999 
2000     /* read in my part of the matrix numerical values  */
2001     nz = procsnz[0];
2002     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2003 
2004     /* insert into matrix */
2005     jj      = rstart;
2006     smycols = mycols;
2007     svals   = vals;
2008     for (i=0; i<m; i++) {
2009       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2010       smycols += ourlens[i];
2011       svals   += ourlens[i];
2012       jj++;
2013     }
2014 
2015     /* read in other processors and ship out */
2016     for (i=1; i<size; i++) {
2017       nz   = procsnz[i];
2018       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2019       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
2020     }
2021     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2022   } else {
2023     /* receive numeric values */
2024     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2025 
2026     /* receive message of values*/
2027     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
2028     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2029     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2030 
2031     /* insert into matrix */
2032     jj      = rstart;
2033     smycols = mycols;
2034     svals   = vals;
2035     for (i=0; i<m; i++) {
2036       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2037       smycols += ourlens[i];
2038       svals   += ourlens[i];
2039       jj++;
2040     }
2041   }
2042   ierr = PetscFree(ourlens);CHKERRQ(ierr);
2043   ierr = PetscFree(vals);CHKERRQ(ierr);
2044   ierr = PetscFree(mycols);CHKERRQ(ierr);
2045   ierr = PetscFree(rowners);CHKERRQ(ierr);
2046 
2047   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2048   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2049   PetscFunctionReturn(0);
2050 }
2051 
2052 #undef __FUNCT__
2053 #define __FUNCT__ "MatEqual_MPIDense"
2054 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscTruth *flag)
2055 {
2056   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
2057   Mat            a,b;
2058   PetscTruth     flg;
2059   PetscErrorCode ierr;
2060 
2061   PetscFunctionBegin;
2062   a = matA->A;
2063   b = matB->A;
2064   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
2065   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
2066   PetscFunctionReturn(0);
2067 }
2068 
2069 #if defined(PETSC_HAVE_PLAPACK)
2070 
2071 #undef __FUNCT__
2072 #define __FUNCT__ "PetscPLAPACKFinalizePackage"
2073 /*@C
2074   PetscPLAPACKFinalizePackage - This function destroys everything in the Petsc interface to PLAPACK.
2075   Level: developer
2076 
2077 .keywords: Petsc, destroy, package, PLAPACK
2078 .seealso: PetscFinalize()
2079 @*/
2080 PetscErrorCode PETSC_DLLEXPORT PetscPLAPACKFinalizePackage(void)
2081 {
2082   PetscErrorCode ierr;
2083 
2084   PetscFunctionBegin;
2085   ierr = PLA_Finalize();CHKERRQ(ierr);
2086   PetscFunctionReturn(0);
2087 }
2088 
2089 #undef __FUNCT__
2090 #define __FUNCT__ "PetscPLAPACKInitializePackage"
2091 /*@C
2092   PetscPLAPACKInitializePackage - This function initializes everything in the Petsc interface to PLAPACK. It is
2093   called from MatCreate_MPIDense() the first time an MPI dense matrix is called.
2094 
2095   Input Parameter:
2096 .   comm - the communicator the matrix lives on
2097 
2098   Level: developer
2099 
2100    Notes: PLAPACK does not have a good fit with MPI communicators; all (parallel) PLAPACK objects have to live in the
2101   same communicator (because there is some global state that is initialized and used for all matrices). In addition if
2102   PLAPACK is initialized (that is the initial matrices created) are on subcommunicators of MPI_COMM_WORLD, these subcommunicators
2103   cannot overlap.
2104 
2105 .keywords: Petsc, initialize, package, PLAPACK
2106 .seealso: PetscInitializePackage(), PetscInitialize()
2107 @*/
2108 PetscErrorCode PETSC_DLLEXPORT PetscPLAPACKInitializePackage(MPI_Comm comm)
2109 {
2110   PetscMPIInt    size;
2111   PetscErrorCode ierr;
2112 
2113   PetscFunctionBegin;
2114   if (!PLA_Initialized(PETSC_NULL)) {
2115 
2116     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2117     Plapack_nprows = 1;
2118     Plapack_npcols = size;
2119 
2120     ierr = PetscOptionsBegin(comm,PETSC_NULL,"PLAPACK Options","Mat");CHKERRQ(ierr);
2121       ierr = PetscOptionsInt("-mat_plapack_nprows","row dimension of 2D processor mesh","None",Plapack_nprows,&Plapack_nprows,PETSC_NULL);CHKERRQ(ierr);
2122       ierr = PetscOptionsInt("-mat_plapack_npcols","column dimension of 2D processor mesh","None",Plapack_npcols,&Plapack_npcols,PETSC_NULL);CHKERRQ(ierr);
2123 #if defined(PETSC_USE_DEBUG)
2124       Plapack_ierror = 3;
2125 #else
2126       Plapack_ierror = 0;
2127 #endif
2128       ierr = PetscOptionsInt("-mat_plapack_ckerror","error checking flag","None",Plapack_ierror,&Plapack_ierror,PETSC_NULL);CHKERRQ(ierr);
2129       if (Plapack_ierror){
2130 	ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_TRUE,PETSC_TRUE,PETSC_FALSE );CHKERRQ(ierr);
2131       } else {
2132 	ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_FALSE,PETSC_FALSE,PETSC_FALSE );CHKERRQ(ierr);
2133       }
2134 
2135       Plapack_nb_alg = 0;
2136       ierr = PetscOptionsInt("-mat_plapack_nb_alg","algorithmic block size","None",Plapack_nb_alg,&Plapack_nb_alg,PETSC_NULL);CHKERRQ(ierr);
2137       if (Plapack_nb_alg) {
2138         ierr = pla_Environ_set_nb_alg (PLA_OP_ALL_ALG,Plapack_nb_alg);CHKERRQ(ierr);
2139       }
2140     PetscOptionsEnd();
2141 
2142     ierr = PLA_Comm_1D_to_2D(comm,Plapack_nprows,Plapack_npcols,&Plapack_comm_2d);CHKERRQ(ierr);
2143     ierr = PLA_Init(Plapack_comm_2d);CHKERRQ(ierr);
2144     ierr = PetscRegisterFinalize(PetscPLAPACKFinalizePackage);CHKERRQ(ierr);
2145   }
2146   PetscFunctionReturn(0);
2147 }
2148 
2149 #endif
2150