xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision cb2480ea1771985ba1730f45b3bb74758db5966a)
1 #define PETSCMAT_DLL
2 
3 /*
4    Basic functions for basic parallel dense matrices.
5 */
6 
7 
8 #include "src/mat/impls/dense/mpi/mpidense.h"    /*I   "petscmat.h"  I*/
9 #if defined(PETSC_HAVE_PLAPACK)
10 static PetscMPIInt Plapack_nprows,Plapack_npcols,Plapack_ierror,Plapack_nb_alg;
11 static MPI_Comm Plapack_comm_2d;
12 #endif
13 
14 #undef __FUNCT__
15 #define __FUNCT__ "MatDenseGetLocalMatrix"
16 /*@
17 
18       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
19               matrix that represents the operator. For sequential matrices it returns itself.
20 
21     Input Parameter:
22 .      A - the Seq or MPI dense matrix
23 
24     Output Parameter:
25 .      B - the inner matrix
26 
27     Level: intermediate
28 
29 @*/
30 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
31 {
32   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
33   PetscErrorCode ierr;
34   PetscTruth     flg;
35 
36   PetscFunctionBegin;
37   ierr = PetscTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
38   if (flg) {
39     *B = mat->A;
40   } else {
41     *B = A;
42   }
43   PetscFunctionReturn(0);
44 }
45 
46 #undef __FUNCT__
47 #define __FUNCT__ "MatGetRow_MPIDense"
48 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
49 {
50   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
51   PetscErrorCode ierr;
52   PetscInt       lrow,rstart = A->rmap.rstart,rend = A->rmap.rend;
53 
54   PetscFunctionBegin;
55   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows")
56   lrow = row - rstart;
57   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr);
58   PetscFunctionReturn(0);
59 }
60 
61 #undef __FUNCT__
62 #define __FUNCT__ "MatRestoreRow_MPIDense"
63 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
64 {
65   PetscErrorCode ierr;
66 
67   PetscFunctionBegin;
68   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
69   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
70   PetscFunctionReturn(0);
71 }
72 
73 EXTERN_C_BEGIN
74 #undef __FUNCT__
75 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense"
76 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B)
77 {
78   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
79   PetscErrorCode ierr;
80   PetscInt       m = A->rmap.n,rstart = A->rmap.rstart;
81   PetscScalar    *array;
82   MPI_Comm       comm;
83 
84   PetscFunctionBegin;
85   if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported.");
86 
87   /* The reuse aspect is not implemented efficiently */
88   if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);}
89 
90   ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
91   ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr);
92   ierr = MatCreate(comm,B);CHKERRQ(ierr);
93   ierr = MatSetSizes(*B,m,m,m,m);CHKERRQ(ierr);
94   ierr = MatSetType(*B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
95   ierr = MatSeqDenseSetPreallocation(*B,array+m*rstart);CHKERRQ(ierr);
96   ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr);
97   ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
98   ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
99 
100   *iscopy = PETSC_TRUE;
101   PetscFunctionReturn(0);
102 }
103 EXTERN_C_END
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatSetValues_MPIDense"
107 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
108 {
109   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
110   PetscErrorCode ierr;
111   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend,row;
112   PetscTruth     roworiented = A->roworiented;
113 
114   PetscFunctionBegin;
115   for (i=0; i<m; i++) {
116     if (idxm[i] < 0) continue;
117     if (idxm[i] >= mat->rmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
118     if (idxm[i] >= rstart && idxm[i] < rend) {
119       row = idxm[i] - rstart;
120       if (roworiented) {
121         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
122       } else {
123         for (j=0; j<n; j++) {
124           if (idxn[j] < 0) continue;
125           if (idxn[j] >= mat->cmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
126           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
127         }
128       }
129     } else {
130       if (!A->donotstash) {
131         if (roworiented) {
132           ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr);
133         } else {
134           ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr);
135         }
136       }
137     }
138   }
139   PetscFunctionReturn(0);
140 }
141 
142 #undef __FUNCT__
143 #define __FUNCT__ "MatGetValues_MPIDense"
144 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
145 {
146   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
147   PetscErrorCode ierr;
148   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend,row;
149 
150   PetscFunctionBegin;
151   for (i=0; i<m; i++) {
152     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
153     if (idxm[i] >= mat->rmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
154     if (idxm[i] >= rstart && idxm[i] < rend) {
155       row = idxm[i] - rstart;
156       for (j=0; j<n; j++) {
157         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
158         if (idxn[j] >= mat->cmap.N) {
159           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
160         }
161         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
162       }
163     } else {
164       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
165     }
166   }
167   PetscFunctionReturn(0);
168 }
169 
170 #undef __FUNCT__
171 #define __FUNCT__ "MatGetArray_MPIDense"
172 PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[])
173 {
174   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
175   PetscErrorCode ierr;
176 
177   PetscFunctionBegin;
178   ierr = MatGetArray(a->A,array);CHKERRQ(ierr);
179   PetscFunctionReturn(0);
180 }
181 
182 #undef __FUNCT__
183 #define __FUNCT__ "MatGetSubMatrix_MPIDense"
184 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B)
185 {
186   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data,*newmatd;
187   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
188   PetscErrorCode ierr;
189   PetscInt       i,j,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols;
190   PetscScalar    *av,*bv,*v = lmat->v;
191   Mat            newmat;
192 
193   PetscFunctionBegin;
194   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
195   ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr);
196   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
197   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
198 
199   /* No parallel redistribution currently supported! Should really check each index set
200      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
201      original matrix! */
202 
203   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
204   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
205 
206   /* Check submatrix call */
207   if (scall == MAT_REUSE_MATRIX) {
208     /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
209     /* Really need to test rows and column sizes! */
210     newmat = *B;
211   } else {
212     /* Create and fill new matrix */
213     ierr = MatCreate(((PetscObject)A)->comm,&newmat);CHKERRQ(ierr);
214     ierr = MatSetSizes(newmat,nrows,cs,PETSC_DECIDE,ncols);CHKERRQ(ierr);
215     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
216     ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr);
217   }
218 
219   /* Now extract the data pointers and do the copy, column at a time */
220   newmatd = (Mat_MPIDense*)newmat->data;
221   bv      = ((Mat_SeqDense *)newmatd->A->data)->v;
222 
223   for (i=0; i<ncols; i++) {
224     av = v + nlrows*icol[i];
225     for (j=0; j<nrows; j++) {
226       *bv++ = av[irow[j] - rstart];
227     }
228   }
229 
230   /* Assemble the matrices so that the correct flags are set */
231   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
232   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
233 
234   /* Free work space */
235   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
236   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
237   *B = newmat;
238   PetscFunctionReturn(0);
239 }
240 
241 #undef __FUNCT__
242 #define __FUNCT__ "MatRestoreArray_MPIDense"
243 PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[])
244 {
245   PetscFunctionBegin;
246   PetscFunctionReturn(0);
247 }
248 
249 #undef __FUNCT__
250 #define __FUNCT__ "MatAssemblyBegin_MPIDense"
251 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
252 {
253   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
254   MPI_Comm       comm = ((PetscObject)mat)->comm;
255   PetscErrorCode ierr;
256   PetscInt       nstash,reallocs;
257   InsertMode     addv;
258 
259   PetscFunctionBegin;
260   /* make sure all processors are either in INSERTMODE or ADDMODE */
261   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
262   if (addv == (ADD_VALUES|INSERT_VALUES)) {
263     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
264   }
265   mat->insertmode = addv; /* in case this processor had no cache */
266 
267   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap.range);CHKERRQ(ierr);
268   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
269   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
270   PetscFunctionReturn(0);
271 }
272 
273 #undef __FUNCT__
274 #define __FUNCT__ "MatAssemblyEnd_MPIDense"
275 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
276 {
277   Mat_MPIDense    *mdn=(Mat_MPIDense*)mat->data;
278   PetscErrorCode  ierr;
279   PetscInt        i,*row,*col,flg,j,rstart,ncols;
280   PetscMPIInt     n;
281   PetscScalar     *val;
282   InsertMode      addv=mat->insertmode;
283 
284   PetscFunctionBegin;
285   /*  wait on receives */
286   while (1) {
287     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
288     if (!flg) break;
289 
290     for (i=0; i<n;) {
291       /* Now identify the consecutive vals belonging to the same row */
292       for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
293       if (j < n) ncols = j-i;
294       else       ncols = n-i;
295       /* Now assemble all these values with a single function call */
296       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
297       i = j;
298     }
299   }
300   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
301 
302   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
303   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
304 
305   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
306     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
307   }
308   PetscFunctionReturn(0);
309 }
310 
311 #undef __FUNCT__
312 #define __FUNCT__ "MatZeroEntries_MPIDense"
313 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
314 {
315   PetscErrorCode ierr;
316   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
317 
318   PetscFunctionBegin;
319   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
320   PetscFunctionReturn(0);
321 }
322 
323 /* the code does not do the diagonal entries correctly unless the
324    matrix is square and the column and row owerships are identical.
325    This is a BUG. The only way to fix it seems to be to access
326    mdn->A and mdn->B directly and not through the MatZeroRows()
327    routine.
328 */
329 #undef __FUNCT__
330 #define __FUNCT__ "MatZeroRows_MPIDense"
331 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
332 {
333   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
334   PetscErrorCode ierr;
335   PetscInt       i,*owners = A->rmap.range;
336   PetscInt       *nprocs,j,idx,nsends;
337   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
338   PetscInt       *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source;
339   PetscInt       *lens,*lrows,*values;
340   PetscMPIInt    n,imdex,rank = l->rank,size = l->size;
341   MPI_Comm       comm = ((PetscObject)A)->comm;
342   MPI_Request    *send_waits,*recv_waits;
343   MPI_Status     recv_status,*send_status;
344   PetscTruth     found;
345 
346   PetscFunctionBegin;
347   /*  first count number of contributors to each processor */
348   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
349   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
350   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
351   for (i=0; i<N; i++) {
352     idx = rows[i];
353     found = PETSC_FALSE;
354     for (j=0; j<size; j++) {
355       if (idx >= owners[j] && idx < owners[j+1]) {
356         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
357       }
358     }
359     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
360   }
361   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
362 
363   /* inform other processors of number of messages and max length*/
364   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
365 
366   /* post receives:   */
367   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
368   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
369   for (i=0; i<nrecvs; i++) {
370     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
371   }
372 
373   /* do sends:
374       1) starts[i] gives the starting index in svalues for stuff going to
375          the ith processor
376   */
377   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
378   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
379   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
380   starts[0]  = 0;
381   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
382   for (i=0; i<N; i++) {
383     svalues[starts[owner[i]]++] = rows[i];
384   }
385 
386   starts[0] = 0;
387   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
388   count = 0;
389   for (i=0; i<size; i++) {
390     if (nprocs[2*i+1]) {
391       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
392     }
393   }
394   ierr = PetscFree(starts);CHKERRQ(ierr);
395 
396   base = owners[rank];
397 
398   /*  wait on receives */
399   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
400   source = lens + nrecvs;
401   count  = nrecvs; slen = 0;
402   while (count) {
403     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
404     /* unpack receives into our local space */
405     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
406     source[imdex]  = recv_status.MPI_SOURCE;
407     lens[imdex]    = n;
408     slen += n;
409     count--;
410   }
411   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
412 
413   /* move the data into the send scatter */
414   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
415   count = 0;
416   for (i=0; i<nrecvs; i++) {
417     values = rvalues + i*nmax;
418     for (j=0; j<lens[i]; j++) {
419       lrows[count++] = values[j] - base;
420     }
421   }
422   ierr = PetscFree(rvalues);CHKERRQ(ierr);
423   ierr = PetscFree(lens);CHKERRQ(ierr);
424   ierr = PetscFree(owner);CHKERRQ(ierr);
425   ierr = PetscFree(nprocs);CHKERRQ(ierr);
426 
427   /* actually zap the local rows */
428   ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
429   ierr = PetscFree(lrows);CHKERRQ(ierr);
430 
431   /* wait on sends */
432   if (nsends) {
433     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
434     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
435     ierr = PetscFree(send_status);CHKERRQ(ierr);
436   }
437   ierr = PetscFree(send_waits);CHKERRQ(ierr);
438   ierr = PetscFree(svalues);CHKERRQ(ierr);
439 
440   PetscFunctionReturn(0);
441 }
442 
443 #undef __FUNCT__
444 #define __FUNCT__ "MatMult_MPIDense"
445 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
446 {
447   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
448   PetscErrorCode ierr;
449 
450   PetscFunctionBegin;
451   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
452   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
453   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
454   PetscFunctionReturn(0);
455 }
456 
457 #undef __FUNCT__
458 #define __FUNCT__ "MatMultAdd_MPIDense"
459 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
460 {
461   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
462   PetscErrorCode ierr;
463 
464   PetscFunctionBegin;
465   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
466   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
467   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
468   PetscFunctionReturn(0);
469 }
470 
471 #undef __FUNCT__
472 #define __FUNCT__ "MatMultTranspose_MPIDense"
473 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
474 {
475   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
476   PetscErrorCode ierr;
477   PetscScalar    zero = 0.0;
478 
479   PetscFunctionBegin;
480   ierr = VecSet(yy,zero);CHKERRQ(ierr);
481   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
482   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
483   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
484   PetscFunctionReturn(0);
485 }
486 
487 #undef __FUNCT__
488 #define __FUNCT__ "MatMultTransposeAdd_MPIDense"
489 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
490 {
491   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
492   PetscErrorCode ierr;
493 
494   PetscFunctionBegin;
495   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
496   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
497   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
498   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
499   PetscFunctionReturn(0);
500 }
501 
502 #undef __FUNCT__
503 #define __FUNCT__ "MatGetDiagonal_MPIDense"
504 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
505 {
506   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
507   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
508   PetscErrorCode ierr;
509   PetscInt       len,i,n,m = A->rmap.n,radd;
510   PetscScalar    *x,zero = 0.0;
511 
512   PetscFunctionBegin;
513   ierr = VecSet(v,zero);CHKERRQ(ierr);
514   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
515   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
516   if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
517   len  = PetscMin(a->A->rmap.n,a->A->cmap.n);
518   radd = A->rmap.rstart*m;
519   for (i=0; i<len; i++) {
520     x[i] = aloc->v[radd + i*m + i];
521   }
522   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
523   PetscFunctionReturn(0);
524 }
525 
526 #undef __FUNCT__
527 #define __FUNCT__ "MatDestroy_MPIDense"
528 PetscErrorCode MatDestroy_MPIDense(Mat mat)
529 {
530   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
531   PetscErrorCode ierr;
532 #if defined(PETSC_HAVE_PLAPACK)
533   Mat_Plapack   *lu=(Mat_Plapack*)(mat->spptr);
534 #endif
535 
536   PetscFunctionBegin;
537 
538 #if defined(PETSC_USE_LOG)
539   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N);
540 #endif
541   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
542   ierr = MatDestroy(mdn->A);CHKERRQ(ierr);
543   if (mdn->lvec)   {ierr = VecDestroy(mdn->lvec);CHKERRQ(ierr);}
544   if (mdn->Mvctx)  {ierr = VecScatterDestroy(mdn->Mvctx);CHKERRQ(ierr);}
545 #if defined(PETSC_HAVE_PLAPACK)
546   if (lu) {
547     ierr = PLA_Obj_free(&lu->A);CHKERRQ(ierr);
548     ierr = PLA_Obj_free (&lu->pivots);CHKERRQ(ierr);
549     ierr = PLA_Temp_free(&lu->templ);CHKERRQ(ierr);
550 
551     if (lu->is_pla) {
552       ierr = ISDestroy(lu->is_pla);CHKERRQ(ierr);
553       ierr = ISDestroy(lu->is_petsc);CHKERRQ(ierr);
554       ierr = VecScatterDestroy(lu->ctx);CHKERRQ(ierr);
555     }
556   }
557 #endif
558 
559   ierr = PetscFree(mdn);CHKERRQ(ierr);
560   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
561   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
562   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
563   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
564   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
565   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
566   PetscFunctionReturn(0);
567 }
568 
569 #undef __FUNCT__
570 #define __FUNCT__ "MatView_MPIDense_Binary"
571 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
572 {
573   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
574   PetscErrorCode    ierr;
575   PetscViewerFormat format;
576   int               fd;
577   PetscInt          header[4],mmax,N = mat->cmap.N,i,j,m,k;
578   PetscMPIInt       rank,tag  = ((PetscObject)viewer)->tag,size;
579   PetscScalar       *work,*v,*vv;
580   Mat_SeqDense      *a = (Mat_SeqDense*)mdn->A->data;
581   MPI_Status        status;
582 
583   PetscFunctionBegin;
584   if (mdn->size == 1) {
585     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
586   } else {
587     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
588     ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
589     ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
590 
591     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
592     if (format == PETSC_VIEWER_BINARY_NATIVE) {
593 
594       if (!rank) {
595 	/* store the matrix as a dense matrix */
596 	header[0] = MAT_FILE_COOKIE;
597 	header[1] = mat->rmap.N;
598 	header[2] = N;
599 	header[3] = MATRIX_BINARY_FORMAT_DENSE;
600 	ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
601 
602 	/* get largest work array needed for transposing array */
603         mmax = mat->rmap.n;
604         for (i=1; i<size; i++) {
605           mmax = PetscMax(mmax,mat->rmap.range[i+1] - mat->rmap.range[i]);
606         }
607 	ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&work);CHKERRQ(ierr);
608 
609 	/* write out local array, by rows */
610         m    = mat->rmap.n;
611 	v    = a->v;
612         for (j=0; j<N; j++) {
613           for (i=0; i<m; i++) {
614 	    work[j + i*N] = *v++;
615 	  }
616 	}
617 	ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
618         /* get largest work array to receive messages from other processes, excludes process zero */
619         mmax = 0;
620         for (i=1; i<size; i++) {
621           mmax = PetscMax(mmax,mat->rmap.range[i+1] - mat->rmap.range[i]);
622         }
623 	ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&vv);CHKERRQ(ierr);
624         v = vv;
625         for (k=1; k<size; k++) {
626           m    = mat->rmap.range[k+1] - mat->rmap.range[k];
627           ierr = MPI_Recv(v,m*N,MPIU_SCALAR,k,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
628 
629           for (j=0; j<N; j++) {
630             for (i=0; i<m; i++) {
631               work[j + i*N] = *v++;
632 	    }
633 	  }
634 	  ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
635         }
636         ierr = PetscFree(work);CHKERRQ(ierr);
637         ierr = PetscFree(vv);CHKERRQ(ierr);
638       } else {
639         ierr = MPI_Send(a->v,mat->rmap.n*mat->cmap.N,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
640       }
641     }
642   }
643   PetscFunctionReturn(0);
644 }
645 
646 #undef __FUNCT__
647 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket"
648 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
649 {
650   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
651   PetscErrorCode    ierr;
652   PetscMPIInt       size = mdn->size,rank = mdn->rank;
653   PetscViewerType   vtype;
654   PetscTruth        iascii,isdraw;
655   PetscViewer       sviewer;
656   PetscViewerFormat format;
657 #if defined(PETSC_HAVE_PLAPACK)
658   Mat_Plapack       *lu=(Mat_Plapack*)(mat->spptr);
659 #endif
660 
661   PetscFunctionBegin;
662   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
663   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
664   if (iascii) {
665     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
666     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
667     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
668       MatInfo info;
669       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
670       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap.n,
671                    (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
672       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
673       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
674 #if defined(PETSC_HAVE_PLAPACK)
675       ierr = PetscViewerASCIIPrintf(viewer,"PLAPACK run parameters:\n");CHKERRQ(ierr);
676       ierr = PetscViewerASCIIPrintf(viewer,"  Processor mesh: nprows %d, npcols %d\n",Plapack_nprows, Plapack_npcols);CHKERRQ(ierr);
677       ierr = PetscViewerASCIIPrintf(viewer,"  Distr. block size nb: %d \n",lu->nb);CHKERRQ(ierr);
678       ierr = PetscViewerASCIIPrintf(viewer,"  Error checking: %d\n",Plapack_ierror);CHKERRQ(ierr);
679       ierr = PetscViewerASCIIPrintf(viewer,"  Algorithmic block size: %d\n",Plapack_nb_alg);CHKERRQ(ierr);
680 #endif
681       PetscFunctionReturn(0);
682     } else if (format == PETSC_VIEWER_ASCII_INFO) {
683       PetscFunctionReturn(0);
684     }
685   } else if (isdraw) {
686     PetscDraw  draw;
687     PetscTruth isnull;
688 
689     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
690     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
691     if (isnull) PetscFunctionReturn(0);
692   }
693 
694   if (size == 1) {
695     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
696   } else {
697     /* assemble the entire matrix onto first processor. */
698     Mat         A;
699     PetscInt    M = mat->rmap.N,N = mat->cmap.N,m,row,i,nz;
700     PetscInt    *cols;
701     PetscScalar *vals;
702 
703     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
704     if (!rank) {
705       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
706     } else {
707       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
708     }
709     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
710     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
711     ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL);
712     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
713 
714     /* Copy the matrix ... This isn't the most efficient means,
715        but it's quick for now */
716     A->insertmode = INSERT_VALUES;
717     row = mat->rmap.rstart; m = mdn->A->rmap.n;
718     for (i=0; i<m; i++) {
719       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
720       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
721       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
722       row++;
723     }
724 
725     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
726     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
727     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
728     if (!rank) {
729       ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
730     }
731     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
732     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
733     ierr = MatDestroy(A);CHKERRQ(ierr);
734   }
735   PetscFunctionReturn(0);
736 }
737 
738 #undef __FUNCT__
739 #define __FUNCT__ "MatView_MPIDense"
740 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
741 {
742   PetscErrorCode ierr;
743   PetscTruth     iascii,isbinary,isdraw,issocket;
744 
745   PetscFunctionBegin;
746 
747   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
748   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
749   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
750   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
751 
752   if (iascii || issocket || isdraw) {
753     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
754   } else if (isbinary) {
755     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
756   } else {
757     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name);
758   }
759   PetscFunctionReturn(0);
760 }
761 
762 #undef __FUNCT__
763 #define __FUNCT__ "MatGetInfo_MPIDense"
764 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
765 {
766   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
767   Mat            mdn = mat->A;
768   PetscErrorCode ierr;
769   PetscReal      isend[5],irecv[5];
770 
771   PetscFunctionBegin;
772   info->rows_global    = (double)A->rmap.N;
773   info->columns_global = (double)A->cmap.N;
774   info->rows_local     = (double)A->rmap.n;
775   info->columns_local  = (double)A->cmap.N;
776   info->block_size     = 1.0;
777   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
778   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
779   isend[3] = info->memory;  isend[4] = info->mallocs;
780   if (flag == MAT_LOCAL) {
781     info->nz_used      = isend[0];
782     info->nz_allocated = isend[1];
783     info->nz_unneeded  = isend[2];
784     info->memory       = isend[3];
785     info->mallocs      = isend[4];
786   } else if (flag == MAT_GLOBAL_MAX) {
787     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr);
788     info->nz_used      = irecv[0];
789     info->nz_allocated = irecv[1];
790     info->nz_unneeded  = irecv[2];
791     info->memory       = irecv[3];
792     info->mallocs      = irecv[4];
793   } else if (flag == MAT_GLOBAL_SUM) {
794     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
795     info->nz_used      = irecv[0];
796     info->nz_allocated = irecv[1];
797     info->nz_unneeded  = irecv[2];
798     info->memory       = irecv[3];
799     info->mallocs      = irecv[4];
800   }
801   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
802   info->fill_ratio_needed = 0;
803   info->factor_mallocs    = 0;
804   PetscFunctionReturn(0);
805 }
806 
807 #undef __FUNCT__
808 #define __FUNCT__ "MatSetOption_MPIDense"
809 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscTruth flg)
810 {
811   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
812   PetscErrorCode ierr;
813 
814   PetscFunctionBegin;
815   switch (op) {
816   case MAT_NEW_NONZERO_LOCATIONS:
817   case MAT_NEW_NONZERO_LOCATION_ERR:
818   case MAT_NEW_NONZERO_ALLOCATION_ERR:
819     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
820     break;
821   case MAT_ROW_ORIENTED:
822     a->roworiented = flg;
823     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
824     break;
825   case MAT_NEW_DIAGONALS:
826   case MAT_USE_HASH_TABLE:
827     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
828     break;
829   case MAT_IGNORE_OFF_PROC_ENTRIES:
830     a->donotstash = flg;
831     break;
832   case MAT_SYMMETRIC:
833   case MAT_STRUCTURALLY_SYMMETRIC:
834   case MAT_HERMITIAN:
835   case MAT_SYMMETRY_ETERNAL:
836   case MAT_IGNORE_LOWER_TRIANGULAR:
837     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
838     break;
839   default:
840     SETERRQ1(PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
841   }
842   PetscFunctionReturn(0);
843 }
844 
845 
846 #undef __FUNCT__
847 #define __FUNCT__ "MatDiagonalScale_MPIDense"
848 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
849 {
850   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
851   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
852   PetscScalar    *l,*r,x,*v;
853   PetscErrorCode ierr;
854   PetscInt       i,j,s2a,s3a,s2,s3,m=mdn->A->rmap.n,n=mdn->A->cmap.n;
855 
856   PetscFunctionBegin;
857   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
858   if (ll) {
859     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
860     if (s2a != s2) SETERRQ2(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
861     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
862     for (i=0; i<m; i++) {
863       x = l[i];
864       v = mat->v + i;
865       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
866     }
867     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
868     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
869   }
870   if (rr) {
871     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
872     if (s3a != s3) SETERRQ2(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
873     ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
874     ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
875     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
876     for (i=0; i<n; i++) {
877       x = r[i];
878       v = mat->v + i*m;
879       for (j=0; j<m; j++) { (*v++) *= x;}
880     }
881     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
882     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
883   }
884   PetscFunctionReturn(0);
885 }
886 
887 #undef __FUNCT__
888 #define __FUNCT__ "MatNorm_MPIDense"
889 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
890 {
891   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
892   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
893   PetscErrorCode ierr;
894   PetscInt       i,j;
895   PetscReal      sum = 0.0;
896   PetscScalar    *v = mat->v;
897 
898   PetscFunctionBegin;
899   if (mdn->size == 1) {
900     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
901   } else {
902     if (type == NORM_FROBENIUS) {
903       for (i=0; i<mdn->A->cmap.n*mdn->A->rmap.n; i++) {
904 #if defined(PETSC_USE_COMPLEX)
905         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
906 #else
907         sum += (*v)*(*v); v++;
908 #endif
909       }
910       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
911       *nrm = sqrt(*nrm);
912       ierr = PetscLogFlops(2*mdn->A->cmap.n*mdn->A->rmap.n);CHKERRQ(ierr);
913     } else if (type == NORM_1) {
914       PetscReal *tmp,*tmp2;
915       ierr = PetscMalloc(2*A->cmap.N*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
916       tmp2 = tmp + A->cmap.N;
917       ierr = PetscMemzero(tmp,2*A->cmap.N*sizeof(PetscReal));CHKERRQ(ierr);
918       *nrm = 0.0;
919       v = mat->v;
920       for (j=0; j<mdn->A->cmap.n; j++) {
921         for (i=0; i<mdn->A->rmap.n; i++) {
922           tmp[j] += PetscAbsScalar(*v);  v++;
923         }
924       }
925       ierr = MPI_Allreduce(tmp,tmp2,A->cmap.N,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
926       for (j=0; j<A->cmap.N; j++) {
927         if (tmp2[j] > *nrm) *nrm = tmp2[j];
928       }
929       ierr = PetscFree(tmp);CHKERRQ(ierr);
930       ierr = PetscLogFlops(A->cmap.n*A->rmap.n);CHKERRQ(ierr);
931     } else if (type == NORM_INFINITY) { /* max row norm */
932       PetscReal ntemp;
933       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
934       ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr);
935     } else {
936       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
937     }
938   }
939   PetscFunctionReturn(0);
940 }
941 
942 #undef __FUNCT__
943 #define __FUNCT__ "MatTranspose_MPIDense"
944 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
945 {
946   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
947   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
948   Mat            B;
949   PetscInt       M = A->rmap.N,N = A->cmap.N,m,n,*rwork,rstart = A->rmap.rstart;
950   PetscErrorCode ierr;
951   PetscInt       j,i;
952   PetscScalar    *v;
953 
954   PetscFunctionBegin;
955   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place");
956   if (reuse == MAT_INITIAL_MATRIX || A == *matout) {
957     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
958     ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,N,M);CHKERRQ(ierr);
959     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
960     ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr);
961   } else {
962     B = *matout;
963   }
964 
965   m = a->A->rmap.n; n = a->A->cmap.n; v = Aloc->v;
966   ierr = PetscMalloc(m*sizeof(PetscInt),&rwork);CHKERRQ(ierr);
967   for (i=0; i<m; i++) rwork[i] = rstart + i;
968   for (j=0; j<n; j++) {
969     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
970     v   += m;
971   }
972   ierr = PetscFree(rwork);CHKERRQ(ierr);
973   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
974   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
975   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
976     *matout = B;
977   } else {
978     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
979   }
980   PetscFunctionReturn(0);
981 }
982 
983 #include "petscblaslapack.h"
984 #undef __FUNCT__
985 #define __FUNCT__ "MatScale_MPIDense"
986 PetscErrorCode MatScale_MPIDense(Mat inA,PetscScalar alpha)
987 {
988   Mat_MPIDense   *A = (Mat_MPIDense*)inA->data;
989   Mat_SeqDense   *a = (Mat_SeqDense*)A->A->data;
990   PetscScalar    oalpha = alpha;
991   PetscErrorCode ierr;
992   PetscBLASInt   one = 1,nz = PetscBLASIntCast(inA->rmap.n*inA->cmap.N);
993 
994   PetscFunctionBegin;
995   BLASscal_(&nz,&oalpha,a->v,&one);
996   ierr = PetscLogFlops(nz);CHKERRQ(ierr);
997   PetscFunctionReturn(0);
998 }
999 
1000 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
1001 
1002 #undef __FUNCT__
1003 #define __FUNCT__ "MatSetUpPreallocation_MPIDense"
1004 PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A)
1005 {
1006   PetscErrorCode ierr;
1007 
1008   PetscFunctionBegin;
1009   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
1010   PetscFunctionReturn(0);
1011 }
1012 
1013 #if defined(PETSC_HAVE_PLAPACK)
1014 
1015 #undef __FUNCT__
1016 #define __FUNCT__ "MatMPIDenseCreatePlapack"
1017 PetscErrorCode MatMPIDenseCreatePlapack(Mat A)
1018 {
1019   Mat_Plapack    *lu;
1020   PetscErrorCode ierr;
1021   PetscInt       M=A->rmap.N;
1022   MPI_Comm       comm=((PetscObject)A)->comm;
1023   PetscMPIInt    size;
1024 
1025   PetscFunctionBegin;
1026   if (A->spptr) PetscFunctionReturn(0);
1027   ierr = PetscNewLog(A,Mat_Plapack,&lu);CHKERRQ(ierr);
1028   A->spptr = (void*)lu;
1029 
1030   lu = (Mat_Plapack*)(A->spptr);
1031 
1032   /* Set default Plapack parameters */
1033   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1034   lu->nb     = M/size;
1035   if (M - lu->nb*size) lu->nb++; /* without cyclic distribution */
1036 
1037   /* Set runtime options */
1038   ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"PLAPACK Options","Mat");CHKERRQ(ierr);
1039     ierr = PetscOptionsInt("-mat_plapack_nb","block size of template vector","None",lu->nb,&lu->nb,PETSC_NULL);CHKERRQ(ierr);
1040   PetscOptionsEnd();
1041 
1042   /* Create object distribution template */
1043   lu->templ = NULL;
1044   ierr = PLA_Temp_create(lu->nb, 0, &lu->templ);CHKERRQ(ierr);
1045 
1046   /* Set the datatype */
1047 #if defined(PETSC_USE_COMPLEX)
1048   lu->datatype = MPI_DOUBLE_COMPLEX;
1049 #else
1050   lu->datatype = MPI_DOUBLE;
1051 #endif
1052 
1053   ierr = PLA_Matrix_create(lu->datatype,M,A->cmap.N,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);CHKERRQ(ierr);
1054 
1055 
1056   lu->pla_solved     = PETSC_FALSE; /* MatSolve_Plapack() is called yet */
1057   PetscFunctionReturn(0);
1058 }
1059 
1060 #undef __FUNCT__
1061 #define __FUNCT__ "MatMPIDenseCopyToPlapack"
1062 PetscErrorCode MatMPIDenseCopyToPlapack(Mat A,Mat F)
1063 {
1064   Mat_Plapack    *lu = (Mat_Plapack*)(F)->spptr;
1065   PetscErrorCode ierr;
1066   PetscInt       M=A->cmap.N,m=A->rmap.n,rstart;
1067   PetscScalar    *array;
1068   PetscReal      one = 1.0;
1069 
1070   PetscFunctionBegin;
1071   /* Copy A into F->lu->A */
1072   ierr = PLA_Obj_set_to_zero(lu->A);CHKERRQ(ierr);
1073   ierr = PLA_API_begin();CHKERRQ(ierr);
1074   ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr);
1075   ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr);
1076   ierr = MatGetArray(A,&array);CHKERRQ(ierr);
1077   ierr = PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);CHKERRQ(ierr);
1078   ierr = MatRestoreArray(A,&array);CHKERRQ(ierr);
1079   ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr);
1080   ierr = PLA_API_end();CHKERRQ(ierr);
1081   lu->rstart = rstart;
1082   PetscFunctionReturn(0);
1083 }
1084 
1085 #undef __FUNCT__
1086 #define __FUNCT__ "MatMPIDenseCopyFromPlapack"
1087 PetscErrorCode MatMPIDenseCopyFromPlapack(Mat F,Mat A)
1088 {
1089   Mat_Plapack    *lu = (Mat_Plapack*)(F)->spptr;
1090   PetscErrorCode ierr;
1091   PetscInt       M=A->cmap.N,m=A->rmap.n,rstart;
1092   PetscScalar    *array;
1093   PetscReal      one = 1.0;
1094 
1095   PetscFunctionBegin;
1096   /* Copy F into A->lu->A */
1097   ierr = MatZeroEntries(A);CHKERRQ(ierr);
1098   ierr = PLA_API_begin();CHKERRQ(ierr);
1099   ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr);
1100   ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr);
1101   ierr = MatGetArray(A,&array);CHKERRQ(ierr);
1102   ierr = PLA_API_axpy_global_to_matrix(m,M, &one,lu->A,rstart,0,(void *)array,m);CHKERRQ(ierr);
1103   ierr = MatRestoreArray(A,&array);CHKERRQ(ierr);
1104   ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr);
1105   ierr = PLA_API_end();CHKERRQ(ierr);
1106   lu->rstart = rstart;
1107   PetscFunctionReturn(0);
1108 }
1109 
1110 #undef __FUNCT__
1111 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIDense"
1112 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1113 {
1114   PetscErrorCode ierr;
1115   Mat_Plapack    *luA = (Mat_Plapack*)A->spptr;
1116   Mat_Plapack    *luB = (Mat_Plapack*)B->spptr;
1117   Mat_Plapack    *luC = (Mat_Plapack*)C->spptr;
1118   PLA_Obj        alpha = NULL,beta = NULL;
1119 
1120   PetscFunctionBegin;
1121   ierr = MatMPIDenseCopyToPlapack(A,A);CHKERRQ(ierr);
1122   ierr = MatMPIDenseCopyToPlapack(B,B);CHKERRQ(ierr);
1123 
1124   ierr = PLA_Global_show("A = ",luA->A,"%g ","");CHKERRQ(ierr);
1125   ierr = PLA_Global_show("B = ",luB->A,"%g ","");CHKERRQ(ierr);
1126 
1127   /* do the multiply in PLA  */
1128   ierr = PLA_Create_constants_conf_to(luA->A,NULL,NULL,&alpha);CHKERRQ(ierr);
1129   ierr = PLA_Create_constants_conf_to(luC->A,NULL,&beta,NULL);CHKERRQ(ierr);
1130   CHKMEMQ;
1131 
1132   ierr = PLA_Gemm(PLA_NO_TRANSPOSE,PLA_NO_TRANSPOSE,alpha,luA->A,luB->A,beta,luC->A);//CHKERRQ(ierr);
1133   CHKMEMQ;
1134   ierr = PLA_Obj_free(&alpha);CHKERRQ(ierr);
1135   ierr = PLA_Obj_free(&beta);CHKERRQ(ierr);
1136 
1137   ierr = PLA_Global_show("C = ",luC->A,"%g ","");CHKERRQ(ierr);
1138   ierr = MatMPIDenseCopyFromPlapack(C,C);CHKERRQ(ierr);
1139   PetscFunctionReturn(0);
1140 }
1141 
1142 #undef __FUNCT__
1143 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIDense"
1144 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1145 {
1146   PetscErrorCode ierr;
1147   PetscInt       m=A->rmap.n,n=B->cmap.n;
1148   Mat            Cmat;
1149 
1150   PetscFunctionBegin;
1151   if (A->cmap.n != B->rmap.n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap.n %d != B->rmap.n %d\n",A->cmap.n,B->rmap.n);
1152   ierr = MatCreate(((PetscObject)B)->comm,&Cmat);CHKERRQ(ierr);
1153   ierr = MatSetSizes(Cmat,m,n,A->rmap.N,B->cmap.N);CHKERRQ(ierr);
1154   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
1155   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1156   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1157   ierr = MatMPIDenseCreatePlapack(A);CHKERRQ(ierr);
1158   ierr = MatMPIDenseCreatePlapack(B);CHKERRQ(ierr);
1159   ierr = MatMPIDenseCreatePlapack(Cmat);CHKERRQ(ierr);
1160   *C = Cmat;
1161   PetscFunctionReturn(0);
1162 }
1163 
1164 #undef __FUNCT__
1165 #define __FUNCT__ "MatMatMult_MPIDense_MPIDense"
1166 PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
1167 {
1168   PetscErrorCode ierr;
1169 
1170   PetscFunctionBegin;
1171   if (scall == MAT_INITIAL_MATRIX){
1172     ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr);
1173   }
1174   ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr);
1175   PetscFunctionReturn(0);
1176 }
1177 
1178 
1179 #undef __FUNCT__
1180 #define __FUNCT__ "MatSolve_MPIDense"
1181 PetscErrorCode MatSolve_MPIDense(Mat A,Vec b,Vec x)
1182 {
1183   MPI_Comm       comm = ((PetscObject)A)->comm,dummy_comm;
1184   Mat_Plapack    *lu = (Mat_Plapack*)A->spptr;
1185   PetscErrorCode ierr;
1186   PetscInt       M=A->rmap.N,m=A->rmap.n,rstart,i,j,*idx_pla,*idx_petsc,loc_m,loc_stride;
1187   PetscScalar    *array;
1188   PetscReal      one = 1.0;
1189   PetscMPIInt    size,r_rank,r_nproc,c_rank,c_nproc;;
1190   PLA_Obj        v_pla = NULL;
1191   PetscScalar    *loc_buf;
1192   Vec            loc_x;
1193 
1194   PetscFunctionBegin;
1195   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1196 
1197   /* Create PLAPACK vector objects, then copy b into PLAPACK b */
1198   ierr = PLA_Mvector_create(lu->datatype,M,1,lu->templ,PLA_ALIGN_FIRST,&v_pla);CHKERRQ(ierr);
1199   ierr = PLA_Obj_set_to_zero(v_pla);CHKERRQ(ierr);
1200 
1201   /* Copy b into rhs_pla */
1202   ierr = PLA_API_begin();CHKERRQ(ierr);
1203   ierr = PLA_Obj_API_open(v_pla);CHKERRQ(ierr);
1204   ierr = VecGetArray(b,&array);CHKERRQ(ierr);
1205   ierr = PLA_API_axpy_vector_to_global(m,&one,(void *)array,1,v_pla,lu->rstart);CHKERRQ(ierr);
1206   ierr = VecRestoreArray(b,&array);CHKERRQ(ierr);
1207   ierr = PLA_Obj_API_close(v_pla);CHKERRQ(ierr);
1208   ierr = PLA_API_end();CHKERRQ(ierr);
1209 
1210   if (A->factor == FACTOR_LU){
1211     /* Apply the permutations to the right hand sides */
1212     ierr = PLA_Apply_pivots_to_rows (v_pla,lu->pivots);CHKERRQ(ierr);
1213 
1214     /* Solve L y = b, overwriting b with y */
1215     ierr = PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_UNIT_DIAG,lu->A,v_pla );CHKERRQ(ierr);
1216 
1217     /* Solve U x = y (=b), overwriting b with x */
1218     ierr = PLA_Trsv( PLA_UPPER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla );CHKERRQ(ierr);
1219   } else { /* FACTOR_CHOLESKY */
1220     ierr = PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla);CHKERRQ(ierr);
1221     ierr = PLA_Trsv( PLA_LOWER_TRIANGULAR,(lu->datatype == MPI_DOUBLE ? PLA_TRANSPOSE : PLA_CONJUGATE_TRANSPOSE),PLA_NONUNIT_DIAG,lu->A,v_pla);CHKERRQ(ierr);
1222   }
1223 
1224   /* Copy PLAPACK x into Petsc vector x  */
1225   ierr = PLA_Obj_local_length(v_pla, &loc_m);CHKERRQ(ierr);
1226   ierr = PLA_Obj_local_buffer(v_pla, (void**)&loc_buf);CHKERRQ(ierr);
1227   ierr = PLA_Obj_local_stride(v_pla, &loc_stride);CHKERRQ(ierr);
1228   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,loc_m*loc_stride,loc_buf,&loc_x);CHKERRQ(ierr);
1229   if (!lu->pla_solved){
1230 
1231     ierr = PLA_Temp_comm_row_info(lu->templ,&dummy_comm,&r_rank,&r_nproc);CHKERRQ(ierr);
1232     ierr = PLA_Temp_comm_col_info(lu->templ,&dummy_comm,&c_rank,&c_nproc);CHKERRQ(ierr);
1233     /* printf(" [%d] rank: %d %d, nproc: %d %d\n",rank,r_rank,c_rank,r_nproc,c_nproc); */
1234 
1235     /* Create IS and cts for VecScatterring */
1236     ierr = PLA_Obj_local_length(v_pla, &loc_m);CHKERRQ(ierr);
1237     ierr = PLA_Obj_local_stride(v_pla, &loc_stride);CHKERRQ(ierr);
1238     ierr = PetscMalloc((2*loc_m+1)*sizeof(PetscInt),&idx_pla);CHKERRQ(ierr);
1239     idx_petsc = idx_pla + loc_m;
1240 
1241     rstart = (r_rank*c_nproc+c_rank)*lu->nb;
1242     for (i=0; i<loc_m; i+=lu->nb){
1243       j = 0;
1244       while (j < lu->nb && i+j < loc_m){
1245         idx_petsc[i+j] = rstart + j; j++;
1246       }
1247       rstart += size*lu->nb;
1248     }
1249 
1250     for (i=0; i<loc_m; i++) idx_pla[i] = i*loc_stride;
1251 
1252     ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_pla,&lu->is_pla);CHKERRQ(ierr);
1253     ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_petsc,&lu->is_petsc);CHKERRQ(ierr);
1254     ierr = PetscFree(idx_pla);CHKERRQ(ierr);
1255     ierr = VecScatterCreate(loc_x,lu->is_pla,x,lu->is_petsc,&lu->ctx);CHKERRQ(ierr);
1256   }
1257   ierr = VecScatterBegin(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1258   ierr = VecScatterEnd(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1259 
1260   /* Free data */
1261   ierr = VecDestroy(loc_x);CHKERRQ(ierr);
1262   ierr = PLA_Obj_free(&v_pla);CHKERRQ(ierr);
1263 
1264   lu->pla_solved = PETSC_TRUE;
1265   PetscFunctionReturn(0);
1266 }
1267 
1268 #undef __FUNCT__
1269 #define __FUNCT__ "MatLUFactorNumeric_MPIDense"
1270 PetscErrorCode MatLUFactorNumeric_MPIDense(Mat A,MatFactorInfo *info,Mat *F)
1271 {
1272   Mat_Plapack    *lu = (Mat_Plapack*)(*F)->spptr;
1273   PetscErrorCode ierr;
1274   PetscInt       info_pla;
1275 
1276   PetscFunctionBegin;
1277   ierr = PLA_Obj_set_to_zero(lu->A);CHKERRQ(ierr);
1278 
1279   /* Copy A into F->lu->A */
1280   ierr = MatMPIDenseCopyToPlapack(A,*F);CHKERRQ(ierr);
1281 
1282   /* Factor P A -> L U overwriting lower triangular portion of A with L, upper, U */
1283   info_pla = PLA_LU(lu->A,lu->pivots);
1284   if (info_pla) SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot encountered at row %d from PLA_LU()",info_pla);
1285 
1286   (*F)->assembled    = PETSC_TRUE;
1287   PetscFunctionReturn(0);
1288 }
1289 
1290 #undef __FUNCT__
1291 #define __FUNCT__ "MatCholeskyFactorNumeric_MPIDense"
1292 PetscErrorCode MatCholeskyFactorNumeric_MPIDense(Mat A,MatFactorInfo *info,Mat *F)
1293 {
1294   Mat_Plapack    *lu = (Mat_Plapack*)(*F)->spptr;
1295   PetscErrorCode ierr;
1296   PetscInt       info_pla;
1297 
1298   PetscFunctionBegin;
1299 
1300   /* Copy A into F->lu->A */
1301   ierr = MatMPIDenseCopyToPlapack(A,*F);CHKERRQ(ierr);
1302 
1303   /* Factor P A -> Chol */
1304   info_pla = PLA_Chol(PLA_LOWER_TRIANGULAR,lu->A);
1305   if (info_pla) SETERRQ1( PETSC_ERR_MAT_CH_ZRPVT,"Nonpositive definite matrix detected at row %d from PLA_Chol()",info_pla);
1306 
1307   (*F)->assembled = PETSC_TRUE;
1308   PetscFunctionReturn(0);
1309 }
1310 
1311 
1312 #undef __FUNCT__
1313 #define __FUNCT__ "MatFactorSymbolic_MPIDense_Private"
1314 PetscErrorCode MatFactorSymbolic_MPIDense_Private(Mat A,MatFactorInfo *info,Mat *F)
1315 {
1316   PetscErrorCode ierr;
1317 
1318   PetscFunctionBegin;
1319   /* Create the factorization matrix */
1320   ierr = MatCreate(((PetscObject)A)->comm,F);CHKERRQ(ierr);
1321   ierr = MatSetSizes(*F,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N);CHKERRQ(ierr);
1322   ierr = MatSetType(*F,((PetscObject)A)->type_name);CHKERRQ(ierr);
1323   ierr = MatMPIDenseCreatePlapack(*F);CHKERRQ(ierr);
1324   PetscFunctionReturn(0);
1325 }
1326 
1327 /* Note the Petsc r and c permutations are ignored */
1328 #undef __FUNCT__
1329 #define __FUNCT__ "MatLUFactorSymbolic_MPIDense"
1330 PetscErrorCode MatLUFactorSymbolic_MPIDense(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F)
1331 {
1332   PetscErrorCode ierr;
1333   PetscInt       M = A->rmap.N;
1334   Mat_Plapack    *lu;
1335 
1336   PetscFunctionBegin;
1337   ierr = MatFactorSymbolic_MPIDense_Private(A,info,F);CHKERRQ(ierr);
1338   lu = (Mat_Plapack*)(*F)->spptr;
1339   ierr = PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots);CHKERRQ(ierr);
1340   (*F)->factor = FACTOR_LU;
1341   PetscFunctionReturn(0);
1342 }
1343 
1344 /* Note the Petsc perm permutation is ignored */
1345 #undef __FUNCT__
1346 #define __FUNCT__ "MatCholeskyFactorSymbolic_MPIDense"
1347 PetscErrorCode MatCholeskyFactorSymbolic_MPIDense(Mat A,IS perm,MatFactorInfo *info,Mat *F)
1348 {
1349   PetscErrorCode ierr;
1350   PetscTruth     issymmetric,set;
1351 
1352   PetscFunctionBegin;
1353   ierr = MatIsSymmetricKnown(A,&set,&issymmetric); CHKERRQ(ierr);
1354   if (!set || !issymmetric) SETERRQ(PETSC_ERR_USER,"Matrix must be set as MAT_SYMMETRIC for CholeskyFactor()");
1355   ierr = MatFactorSymbolic_MPIDense_Private(A,info,F);CHKERRQ(ierr);
1356   (*F)->factor = FACTOR_CHOLESKY;
1357   PetscFunctionReturn(0);
1358 }
1359 #endif
1360 
1361 /* -------------------------------------------------------------------*/
1362 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
1363        MatGetRow_MPIDense,
1364        MatRestoreRow_MPIDense,
1365        MatMult_MPIDense,
1366 /* 4*/ MatMultAdd_MPIDense,
1367        MatMultTranspose_MPIDense,
1368        MatMultTransposeAdd_MPIDense,
1369 #if defined(PETSC_HAVE_PLAPACK)
1370        MatSolve_MPIDense,
1371 #else
1372        0,
1373 #endif
1374        0,
1375        0,
1376 /*10*/ 0,
1377        0,
1378        0,
1379        0,
1380        MatTranspose_MPIDense,
1381 /*15*/ MatGetInfo_MPIDense,
1382        MatEqual_MPIDense,
1383        MatGetDiagonal_MPIDense,
1384        MatDiagonalScale_MPIDense,
1385        MatNorm_MPIDense,
1386 /*20*/ MatAssemblyBegin_MPIDense,
1387        MatAssemblyEnd_MPIDense,
1388        0,
1389        MatSetOption_MPIDense,
1390        MatZeroEntries_MPIDense,
1391 /*25*/ MatZeroRows_MPIDense,
1392 #if defined(PETSC_HAVE_PLAPACK)
1393        MatLUFactorSymbolic_MPIDense,
1394        MatLUFactorNumeric_MPIDense,
1395        MatCholeskyFactorSymbolic_MPIDense,
1396        MatCholeskyFactorNumeric_MPIDense,
1397 #else
1398        0,
1399        0,
1400        0,
1401        0,
1402 #endif
1403 /*30*/ MatSetUpPreallocation_MPIDense,
1404        0,
1405        0,
1406        MatGetArray_MPIDense,
1407        MatRestoreArray_MPIDense,
1408 /*35*/ MatDuplicate_MPIDense,
1409        0,
1410        0,
1411        0,
1412        0,
1413 /*40*/ 0,
1414        MatGetSubMatrices_MPIDense,
1415        0,
1416        MatGetValues_MPIDense,
1417        0,
1418 /*45*/ 0,
1419        MatScale_MPIDense,
1420        0,
1421        0,
1422        0,
1423 /*50*/ 0,
1424        0,
1425        0,
1426        0,
1427        0,
1428 /*55*/ 0,
1429        0,
1430        0,
1431        0,
1432        0,
1433 /*60*/ MatGetSubMatrix_MPIDense,
1434        MatDestroy_MPIDense,
1435        MatView_MPIDense,
1436        0,
1437        0,
1438 /*65*/ 0,
1439        0,
1440        0,
1441        0,
1442        0,
1443 /*70*/ 0,
1444        0,
1445        0,
1446        0,
1447        0,
1448 /*75*/ 0,
1449        0,
1450        0,
1451        0,
1452        0,
1453 /*80*/ 0,
1454        0,
1455        0,
1456        0,
1457 /*84*/ MatLoad_MPIDense,
1458        0,
1459        0,
1460        0,
1461        0,
1462        0,
1463 /*90*/
1464 #if defined(PETSC_HAVE_PLAPACK)
1465        MatMatMult_MPIDense_MPIDense,
1466        MatMatMultSymbolic_MPIDense_MPIDense,
1467        MatMatMultNumeric_MPIDense_MPIDense,
1468 #else
1469        0,
1470        0,
1471        0,
1472 #endif
1473        0,
1474 /*95*/ 0,
1475        0,
1476        0,
1477        0};
1478 
1479 EXTERN_C_BEGIN
1480 #undef __FUNCT__
1481 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense"
1482 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1483 {
1484   Mat_MPIDense   *a;
1485   PetscErrorCode ierr;
1486 
1487   PetscFunctionBegin;
1488   mat->preallocated = PETSC_TRUE;
1489   /* Note:  For now, when data is specified above, this assumes the user correctly
1490    allocates the local dense storage space.  We should add error checking. */
1491 
1492   a    = (Mat_MPIDense*)mat->data;
1493   ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1494   ierr = MatSetSizes(a->A,mat->rmap.n,mat->cmap.N,mat->rmap.n,mat->cmap.N);CHKERRQ(ierr);
1495   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
1496   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1497   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1498   PetscFunctionReturn(0);
1499 }
1500 EXTERN_C_END
1501 
1502 EXTERN_C_BEGIN
1503 #if defined(PETSC_HAVE_PLAPACK)
1504 #undef __FUNCT__
1505 #define __FUNCT__ "MatSetSolverType_MPIDense_PLAPACK"
1506 PetscErrorCode PETSCMAT_DLLEXPORT MatSetSolverType_MPIDense_PLAPACK(Mat mat,const char *type)
1507 {
1508   PetscFunctionBegin;
1509   /* dummy function so that -mat_solver_type plapack or MatSetSolverType(mat,"plapack"); silently work (since PLAPACK is on by default) */
1510   PetscFunctionReturn(0);
1511 }
1512 #endif
1513 EXTERN_C_END
1514 
1515 /*MC
1516    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1517 
1518    Options Database Keys:
1519 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions()
1520 
1521   Level: beginner
1522 
1523   MATMPIDENSE matrices may use direct solvers (LU, Cholesky, and QR)
1524   for parallel dense matrices via the external package PLAPACK, if PLAPACK is installed
1525   (run config/configure.py with the option --download-plapack)
1526 
1527 
1528   Options Database Keys:
1529 . -mat_plapack_nprows <n> - number of rows in processor partition
1530 . -mat_plapack_npcols <n> - number of columns in processor partition
1531 . -mat_plapack_nb <n> - block size of template vector
1532 . -mat_plapack_nb_alg <n> - algorithmic block size
1533 - -mat_plapack_ckerror <n> - error checking flag
1534 
1535 .seealso: MatCreateMPIDense(), MATDENSE, MATSEQDENSE
1536 M*/
1537 
1538 EXTERN_C_BEGIN
1539 #undef __FUNCT__
1540 #define __FUNCT__ "MatCreate_MPIDense"
1541 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIDense(Mat mat)
1542 {
1543   Mat_MPIDense   *a;
1544   PetscErrorCode ierr;
1545 
1546   PetscFunctionBegin;
1547   ierr              = PetscNewLog(mat,Mat_MPIDense,&a);CHKERRQ(ierr);
1548   mat->data         = (void*)a;
1549   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1550   mat->factor       = 0;
1551   mat->mapping      = 0;
1552 
1553   mat->insertmode = NOT_SET_VALUES;
1554   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&a->rank);CHKERRQ(ierr);
1555   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&a->size);CHKERRQ(ierr);
1556 
1557   mat->rmap.bs = mat->cmap.bs = 1;
1558   ierr = PetscMapSetUp(&mat->rmap);CHKERRQ(ierr);
1559   ierr = PetscMapSetUp(&mat->cmap);CHKERRQ(ierr);
1560   a->nvec = mat->cmap.n;
1561 
1562   /* build cache for off array entries formed */
1563   a->donotstash = PETSC_FALSE;
1564   ierr = MatStashCreate_Private(((PetscObject)mat)->comm,1,&mat->stash);CHKERRQ(ierr);
1565 
1566   /* stuff used for matrix vector multiply */
1567   a->lvec        = 0;
1568   a->Mvctx       = 0;
1569   a->roworiented = PETSC_TRUE;
1570 
1571   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
1572                                      "MatGetDiagonalBlock_MPIDense",
1573                                      MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr);
1574   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C",
1575                                      "MatMPIDenseSetPreallocation_MPIDense",
1576                                      MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1577   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",
1578                                      "MatMatMult_MPIAIJ_MPIDense",
1579                                       MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1580   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",
1581                                      "MatMatMultSymbolic_MPIAIJ_MPIDense",
1582                                       MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1583   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",
1584                                      "MatMatMultNumeric_MPIAIJ_MPIDense",
1585                                       MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1586 #if defined(PETSC_HAVE_PLAPACK)
1587   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatSetSolverType_mpidense_plapack_C",
1588                                      "MatSetSolverType_MPIDense_PLAPACK",
1589                                       MatSetSolverType_MPIDense_PLAPACK);CHKERRQ(ierr);
1590 #endif
1591   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1592 
1593   PetscFunctionReturn(0);
1594 }
1595 EXTERN_C_END
1596 
1597 /*MC
1598    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1599 
1600    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1601    and MATMPIDENSE otherwise.
1602 
1603    Options Database Keys:
1604 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1605 
1606   Level: beginner
1607 
1608 
1609 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1610 M*/
1611 
1612 EXTERN_C_BEGIN
1613 #undef __FUNCT__
1614 #define __FUNCT__ "MatCreate_Dense"
1615 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_Dense(Mat A)
1616 {
1617   PetscErrorCode ierr;
1618   PetscMPIInt    size;
1619 
1620   PetscFunctionBegin;
1621   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
1622   if (size == 1) {
1623     ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr);
1624   } else {
1625     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
1626   }
1627   PetscFunctionReturn(0);
1628 }
1629 EXTERN_C_END
1630 
1631 #undef __FUNCT__
1632 #define __FUNCT__ "MatMPIDenseSetPreallocation"
1633 /*@C
1634    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1635 
1636    Not collective
1637 
1638    Input Parameters:
1639 .  A - the matrix
1640 -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
1641    to control all matrix memory allocation.
1642 
1643    Notes:
1644    The dense format is fully compatible with standard Fortran 77
1645    storage by columns.
1646 
1647    The data input variable is intended primarily for Fortran programmers
1648    who wish to allocate their own matrix memory space.  Most users should
1649    set data=PETSC_NULL.
1650 
1651    Level: intermediate
1652 
1653 .keywords: matrix,dense, parallel
1654 
1655 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1656 @*/
1657 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data)
1658 {
1659   PetscErrorCode ierr,(*f)(Mat,PetscScalar *);
1660 
1661   PetscFunctionBegin;
1662   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1663   if (f) {
1664     ierr = (*f)(mat,data);CHKERRQ(ierr);
1665   }
1666   PetscFunctionReturn(0);
1667 }
1668 
1669 #undef __FUNCT__
1670 #define __FUNCT__ "MatCreateMPIDense"
1671 /*@C
1672    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
1673 
1674    Collective on MPI_Comm
1675 
1676    Input Parameters:
1677 +  comm - MPI communicator
1678 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1679 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1680 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1681 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1682 -  data - optional location of matrix data.  Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1683    to control all matrix memory allocation.
1684 
1685    Output Parameter:
1686 .  A - the matrix
1687 
1688    Notes:
1689    The dense format is fully compatible with standard Fortran 77
1690    storage by columns.
1691 
1692    The data input variable is intended primarily for Fortran programmers
1693    who wish to allocate their own matrix memory space.  Most users should
1694    set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users).
1695 
1696    The user MUST specify either the local or global matrix dimensions
1697    (possibly both).
1698 
1699    Level: intermediate
1700 
1701 .keywords: matrix,dense, parallel
1702 
1703 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1704 @*/
1705 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1706 {
1707   PetscErrorCode ierr;
1708   PetscMPIInt    size;
1709 
1710   PetscFunctionBegin;
1711   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1712   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1713   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1714   if (size > 1) {
1715     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1716     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1717   } else {
1718     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1719     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1720   }
1721   PetscFunctionReturn(0);
1722 }
1723 
1724 #undef __FUNCT__
1725 #define __FUNCT__ "MatDuplicate_MPIDense"
1726 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1727 {
1728   Mat            mat;
1729   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
1730   PetscErrorCode ierr;
1731 
1732   PetscFunctionBegin;
1733   *newmat       = 0;
1734   ierr = MatCreate(((PetscObject)A)->comm,&mat);CHKERRQ(ierr);
1735   ierr = MatSetSizes(mat,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N);CHKERRQ(ierr);
1736   ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1737   a                 = (Mat_MPIDense*)mat->data;
1738   ierr              = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1739   mat->factor       = A->factor;
1740   mat->assembled    = PETSC_TRUE;
1741   mat->preallocated = PETSC_TRUE;
1742 
1743   mat->rmap.rstart     = A->rmap.rstart;
1744   mat->rmap.rend       = A->rmap.rend;
1745   a->size              = oldmat->size;
1746   a->rank              = oldmat->rank;
1747   mat->insertmode      = NOT_SET_VALUES;
1748   a->nvec              = oldmat->nvec;
1749   a->donotstash        = oldmat->donotstash;
1750 
1751   ierr = PetscMemcpy(mat->rmap.range,A->rmap.range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
1752   ierr = PetscMemcpy(mat->cmap.range,A->cmap.range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
1753   ierr = MatStashCreate_Private(((PetscObject)A)->comm,1,&mat->stash);CHKERRQ(ierr);
1754 
1755   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1756   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1757   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1758 
1759 #if defined(PETSC_HAVE_PLAPACK)
1760   ierr = PetscMemcpy(mat->spptr,A->spptr,sizeof(Mat_Plapack));CHKERRQ(ierr);
1761 #endif
1762   *newmat = mat;
1763   PetscFunctionReturn(0);
1764 }
1765 
1766 #include "petscsys.h"
1767 
1768 #undef __FUNCT__
1769 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile"
1770 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N, MatType type,Mat *newmat)
1771 {
1772   PetscErrorCode ierr;
1773   PetscMPIInt    rank,size;
1774   PetscInt       *rowners,i,m,nz,j;
1775   PetscScalar    *array,*vals,*vals_ptr;
1776   MPI_Status     status;
1777 
1778   PetscFunctionBegin;
1779   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1780   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1781 
1782   /* determine ownership of all rows */
1783   m          = M/size + ((M % size) > rank);
1784   ierr       = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1785   ierr       = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
1786   rowners[0] = 0;
1787   for (i=2; i<=size; i++) {
1788     rowners[i] += rowners[i-1];
1789   }
1790 
1791   ierr = MatCreate(comm,newmat);CHKERRQ(ierr);
1792   ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1793   ierr = MatSetType(*newmat,type);CHKERRQ(ierr);
1794   ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr);
1795   ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr);
1796 
1797   if (!rank) {
1798     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1799 
1800     /* read in my part of the matrix numerical values  */
1801     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1802 
1803     /* insert into matrix-by row (this is why cannot directly read into array */
1804     vals_ptr = vals;
1805     for (i=0; i<m; i++) {
1806       for (j=0; j<N; j++) {
1807         array[i + j*m] = *vals_ptr++;
1808       }
1809     }
1810 
1811     /* read in other processors and ship out */
1812     for (i=1; i<size; i++) {
1813       nz   = (rowners[i+1] - rowners[i])*N;
1814       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1815       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(*newmat))->tag,comm);CHKERRQ(ierr);
1816     }
1817   } else {
1818     /* receive numeric values */
1819     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1820 
1821     /* receive message of values*/
1822     ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(*newmat))->tag,comm,&status);CHKERRQ(ierr);
1823 
1824     /* insert into matrix-by row (this is why cannot directly read into array */
1825     vals_ptr = vals;
1826     for (i=0; i<m; i++) {
1827       for (j=0; j<N; j++) {
1828         array[i + j*m] = *vals_ptr++;
1829       }
1830     }
1831   }
1832   ierr = PetscFree(rowners);CHKERRQ(ierr);
1833   ierr = PetscFree(vals);CHKERRQ(ierr);
1834   ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1835   ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1836   PetscFunctionReturn(0);
1837 }
1838 
1839 #undef __FUNCT__
1840 #define __FUNCT__ "MatLoad_MPIDense"
1841 PetscErrorCode MatLoad_MPIDense(PetscViewer viewer, MatType type,Mat *newmat)
1842 {
1843   Mat            A;
1844   PetscScalar    *vals,*svals;
1845   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1846   MPI_Status     status;
1847   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz;
1848   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1849   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1850   PetscInt       i,nz,j,rstart,rend;
1851   int            fd;
1852   PetscErrorCode ierr;
1853 
1854   PetscFunctionBegin;
1855   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1856   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1857   if (!rank) {
1858     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1859     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1860     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1861   }
1862 
1863   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1864   M = header[1]; N = header[2]; nz = header[3];
1865 
1866   /*
1867        Handle case where matrix is stored on disk as a dense matrix
1868   */
1869   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1870     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,type,newmat);CHKERRQ(ierr);
1871     PetscFunctionReturn(0);
1872   }
1873 
1874   /* determine ownership of all rows */
1875   m          = PetscMPIIntCast(M/size + ((M % size) > rank));
1876   ierr       = PetscMalloc((size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr);
1877   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1878   rowners[0] = 0;
1879   for (i=2; i<=size; i++) {
1880     rowners[i] += rowners[i-1];
1881   }
1882   rstart = rowners[rank];
1883   rend   = rowners[rank+1];
1884 
1885   /* distribute row lengths to all processors */
1886   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&ourlens);CHKERRQ(ierr);
1887   offlens = ourlens + (rend-rstart);
1888   if (!rank) {
1889     ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
1890     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1891     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
1892     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1893     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1894     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1895   } else {
1896     ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1897   }
1898 
1899   if (!rank) {
1900     /* calculate the number of nonzeros on each processor */
1901     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
1902     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1903     for (i=0; i<size; i++) {
1904       for (j=rowners[i]; j< rowners[i+1]; j++) {
1905         procsnz[i] += rowlengths[j];
1906       }
1907     }
1908     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1909 
1910     /* determine max buffer needed and allocate it */
1911     maxnz = 0;
1912     for (i=0; i<size; i++) {
1913       maxnz = PetscMax(maxnz,procsnz[i]);
1914     }
1915     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1916 
1917     /* read in my part of the matrix column indices  */
1918     nz = procsnz[0];
1919     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1920     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1921 
1922     /* read in every one elses and ship off */
1923     for (i=1; i<size; i++) {
1924       nz   = procsnz[i];
1925       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1926       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1927     }
1928     ierr = PetscFree(cols);CHKERRQ(ierr);
1929   } else {
1930     /* determine buffer space needed for message */
1931     nz = 0;
1932     for (i=0; i<m; i++) {
1933       nz += ourlens[i];
1934     }
1935     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1936 
1937     /* receive message of column indices*/
1938     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1939     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1940     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1941   }
1942 
1943   /* loop over local rows, determining number of off diagonal entries */
1944   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
1945   jj = 0;
1946   for (i=0; i<m; i++) {
1947     for (j=0; j<ourlens[i]; j++) {
1948       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1949       jj++;
1950     }
1951   }
1952 
1953   /* create our matrix */
1954   for (i=0; i<m; i++) {
1955     ourlens[i] -= offlens[i];
1956   }
1957   ierr = MatCreate(comm,newmat);CHKERRQ(ierr);
1958   ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1959   ierr = MatSetType(*newmat,type);CHKERRQ(ierr);
1960   ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr);
1961   A = *newmat;
1962   for (i=0; i<m; i++) {
1963     ourlens[i] += offlens[i];
1964   }
1965 
1966   if (!rank) {
1967     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1968 
1969     /* read in my part of the matrix numerical values  */
1970     nz = procsnz[0];
1971     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1972 
1973     /* insert into matrix */
1974     jj      = rstart;
1975     smycols = mycols;
1976     svals   = vals;
1977     for (i=0; i<m; i++) {
1978       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1979       smycols += ourlens[i];
1980       svals   += ourlens[i];
1981       jj++;
1982     }
1983 
1984     /* read in other processors and ship out */
1985     for (i=1; i<size; i++) {
1986       nz   = procsnz[i];
1987       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1988       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
1989     }
1990     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1991   } else {
1992     /* receive numeric values */
1993     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1994 
1995     /* receive message of values*/
1996     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
1997     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1998     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1999 
2000     /* insert into matrix */
2001     jj      = rstart;
2002     smycols = mycols;
2003     svals   = vals;
2004     for (i=0; i<m; i++) {
2005       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2006       smycols += ourlens[i];
2007       svals   += ourlens[i];
2008       jj++;
2009     }
2010   }
2011   ierr = PetscFree(ourlens);CHKERRQ(ierr);
2012   ierr = PetscFree(vals);CHKERRQ(ierr);
2013   ierr = PetscFree(mycols);CHKERRQ(ierr);
2014   ierr = PetscFree(rowners);CHKERRQ(ierr);
2015 
2016   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2017   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2018   PetscFunctionReturn(0);
2019 }
2020 
2021 #undef __FUNCT__
2022 #define __FUNCT__ "MatEqual_MPIDense"
2023 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscTruth *flag)
2024 {
2025   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
2026   Mat            a,b;
2027   PetscTruth     flg;
2028   PetscErrorCode ierr;
2029 
2030   PetscFunctionBegin;
2031   a = matA->A;
2032   b = matB->A;
2033   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
2034   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
2035   PetscFunctionReturn(0);
2036 }
2037 
2038 #if defined(PETSC_HAVE_PLAPACK)
2039 
2040 #undef __FUNCT__
2041 #define __FUNCT__ "PetscPLAPACKFinalizePackage"
2042 /*@C
2043   PetscPLAPACKFinalizePackage - This function destroys everything in the Petsc interface to PLAPACK.
2044   Level: developer
2045 
2046 .keywords: Petsc, destroy, package, PLAPACK
2047 .seealso: PetscFinalize()
2048 @*/
2049 PetscErrorCode PETSC_DLLEXPORT PetscPLAPACKFinalizePackage(void)
2050 {
2051   PetscErrorCode ierr;
2052 
2053   PetscFunctionBegin;
2054   ierr = PLA_Finalize();CHKERRQ(ierr);
2055   PetscFunctionReturn(0);
2056 }
2057 
2058 #undef __FUNCT__
2059 #define __FUNCT__ "PetscPLAPACKInitializePackage"
2060 /*@C
2061   PetscPLAPACKInitializePackage - This function initializes everything in the Petsc interface to PLAPACK. It is
2062   called from PetscDLLibraryRegister() when using dynamic libraries, and on the call to PetscInitialize()
2063   when using static libraries.
2064 
2065   Input Parameter:
2066   path - The dynamic library path, or PETSC_NULL
2067 
2068   Level: developer
2069 
2070 .keywords: Petsc, initialize, package, PLAPACK
2071 .seealso: PetscInitializePackage(), PetscInitialize()
2072 @*/
2073 PetscErrorCode PETSC_DLLEXPORT PetscPLAPACKInitializePackage(const char path[])
2074 {
2075   MPI_Comm       comm = PETSC_COMM_WORLD;
2076   PetscMPIInt    size;
2077   PetscErrorCode ierr;
2078 
2079   PetscFunctionBegin;
2080   if (!PLA_Initialized(PETSC_NULL)) {
2081 
2082     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2083     Plapack_nprows = 1;
2084     Plapack_npcols = size;
2085 
2086     ierr = PetscOptionsBegin(comm,PETSC_NULL,"PLAPACK Options","Mat");CHKERRQ(ierr);
2087       ierr = PetscOptionsInt("-plapack_nprows","row dimension of 2D processor mesh","None",Plapack_nprows,&Plapack_nprows,PETSC_NULL);CHKERRQ(ierr);
2088       ierr = PetscOptionsInt("-plapack_npcols","column dimension of 2D processor mesh","None",Plapack_npcols,&Plapack_npcols,PETSC_NULL);CHKERRQ(ierr);
2089 #if defined(PETSC_USE_DEBUG)
2090       Plapack_ierror = 3;
2091 #else
2092       Plapack_ierror = 0;
2093 #endif
2094       ierr = PetscOptionsInt("-plapack_ckerror","error checking flag","None",Plapack_ierror,&Plapack_ierror,PETSC_NULL);CHKERRQ(ierr);
2095       if (Plapack_ierror){
2096 	ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_TRUE,PETSC_TRUE,PETSC_FALSE );CHKERRQ(ierr);
2097       } else {
2098 	ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_FALSE,PETSC_FALSE,PETSC_FALSE );CHKERRQ(ierr);
2099       }
2100 
2101       Plapack_nb_alg = 0;
2102       ierr = PetscOptionsInt("-plapack_nb_alg","algorithmic block size","None",Plapack_nb_alg,&Plapack_nb_alg,PETSC_NULL);CHKERRQ(ierr);
2103       if (Plapack_nb_alg) {
2104         ierr = pla_Environ_set_nb_alg (PLA_OP_ALL_ALG,Plapack_nb_alg);CHKERRQ(ierr);
2105       }
2106     PetscOptionsEnd();
2107 
2108     ierr = PLA_Comm_1D_to_2D(comm,Plapack_nprows,Plapack_npcols,&Plapack_comm_2d);CHKERRQ(ierr);
2109     ierr = PLA_Init(Plapack_comm_2d);CHKERRQ(ierr);
2110     ierr = PetscRegisterFinalize(PetscPLAPACKFinalizePackage);CHKERRQ(ierr);
2111   }
2112   PetscFunctionReturn(0);
2113 }
2114 
2115 
2116 #endif
2117