xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 668f157ea6d169bb50bcdb9ebcdd418abd089fa7) !
1 #define PETSCMAT_DLL
2 
3 /*
4    Basic functions for basic parallel dense matrices.
5 */
6 
7 
8 #include "../src/mat/impls/dense/mpi/mpidense.h"    /*I   "petscmat.h"  I*/
9 #if defined(PETSC_HAVE_PLAPACK)
10 static PetscMPIInt Plapack_nprows,Plapack_npcols,Plapack_ierror,Plapack_nb_alg;
11 static MPI_Comm Plapack_comm_2d;
12 EXTERN_C_BEGIN
13 #include "PLA.h"
14 EXTERN_C_END
15 
16 typedef struct {
17   PLA_Obj        A,pivots;
18   PLA_Template   templ;
19   MPI_Datatype   datatype;
20   PetscInt       nb,rstart;
21   VecScatter     ctx;
22   IS             is_pla,is_petsc;
23   PetscTruth     pla_solved;
24   MatStructure   mstruct;
25 } Mat_Plapack;
26 #endif
27 
28 #undef __FUNCT__
29 #define __FUNCT__ "MatDenseGetLocalMatrix"
30 /*@
31 
32       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
33               matrix that represents the operator. For sequential matrices it returns itself.
34 
35     Input Parameter:
36 .      A - the Seq or MPI dense matrix
37 
38     Output Parameter:
39 .      B - the inner matrix
40 
41     Level: intermediate
42 
43 @*/
44 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
45 {
46   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
47   PetscErrorCode ierr;
48   PetscTruth     flg;
49 
50   PetscFunctionBegin;
51   ierr = PetscTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
52   if (flg) {
53     *B = mat->A;
54   } else {
55     *B = A;
56   }
57   PetscFunctionReturn(0);
58 }
59 
60 #undef __FUNCT__
61 #define __FUNCT__ "MatGetRow_MPIDense"
62 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
63 {
64   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
65   PetscErrorCode ierr;
66   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
67 
68   PetscFunctionBegin;
69   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
70   lrow = row - rstart;
71   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr);
72   PetscFunctionReturn(0);
73 }
74 
75 #undef __FUNCT__
76 #define __FUNCT__ "MatRestoreRow_MPIDense"
77 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
78 {
79   PetscErrorCode ierr;
80 
81   PetscFunctionBegin;
82   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
83   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
84   PetscFunctionReturn(0);
85 }
86 
87 EXTERN_C_BEGIN
88 #undef __FUNCT__
89 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense"
90 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B)
91 {
92   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
93   PetscErrorCode ierr;
94   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
95   PetscScalar    *array;
96   MPI_Comm       comm;
97 
98   PetscFunctionBegin;
99   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
100 
101   /* The reuse aspect is not implemented efficiently */
102   if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);}
103 
104   ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
105   ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr);
106   ierr = MatCreate(comm,B);CHKERRQ(ierr);
107   ierr = MatSetSizes(*B,m,m,m,m);CHKERRQ(ierr);
108   ierr = MatSetType(*B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
109   ierr = MatSeqDenseSetPreallocation(*B,array+m*rstart);CHKERRQ(ierr);
110   ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr);
111   ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
112   ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
113 
114   *iscopy = PETSC_TRUE;
115   PetscFunctionReturn(0);
116 }
117 EXTERN_C_END
118 
119 #undef __FUNCT__
120 #define __FUNCT__ "MatSetValues_MPIDense"
121 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
122 {
123   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
124   PetscErrorCode ierr;
125   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
126   PetscTruth     roworiented = A->roworiented;
127 
128   PetscFunctionBegin;
129   if (v) PetscValidScalarPointer(v,6);
130   for (i=0; i<m; i++) {
131     if (idxm[i] < 0) continue;
132     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
133     if (idxm[i] >= rstart && idxm[i] < rend) {
134       row = idxm[i] - rstart;
135       if (roworiented) {
136         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
137       } else {
138         for (j=0; j<n; j++) {
139           if (idxn[j] < 0) continue;
140           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
141           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
142         }
143       }
144     } else {
145       if (!A->donotstash) {
146         if (roworiented) {
147           ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
148         } else {
149           ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
150         }
151       }
152     }
153   }
154   PetscFunctionReturn(0);
155 }
156 
157 #undef __FUNCT__
158 #define __FUNCT__ "MatGetValues_MPIDense"
159 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
160 {
161   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
162   PetscErrorCode ierr;
163   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
164 
165   PetscFunctionBegin;
166   for (i=0; i<m; i++) {
167     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
168     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
169     if (idxm[i] >= rstart && idxm[i] < rend) {
170       row = idxm[i] - rstart;
171       for (j=0; j<n; j++) {
172         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
173         if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
174         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
175       }
176     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
177   }
178   PetscFunctionReturn(0);
179 }
180 
181 #undef __FUNCT__
182 #define __FUNCT__ "MatGetArray_MPIDense"
183 PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[])
184 {
185   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
186   PetscErrorCode ierr;
187 
188   PetscFunctionBegin;
189   ierr = MatGetArray(a->A,array);CHKERRQ(ierr);
190   PetscFunctionReturn(0);
191 }
192 
193 #undef __FUNCT__
194 #define __FUNCT__ "MatGetSubMatrix_MPIDense"
195 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
196 {
197   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data,*newmatd;
198   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
199   PetscErrorCode ierr;
200   PetscInt       i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
201   const PetscInt *irow,*icol;
202   PetscScalar    *av,*bv,*v = lmat->v;
203   Mat            newmat;
204   IS             iscol_local;
205 
206   PetscFunctionBegin;
207   ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
208   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
209   ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr);
210   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
211   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
212   ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */
213 
214   /* No parallel redistribution currently supported! Should really check each index set
215      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
216      original matrix! */
217 
218   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
219   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
220 
221   /* Check submatrix call */
222   if (scall == MAT_REUSE_MATRIX) {
223     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
224     /* Really need to test rows and column sizes! */
225     newmat = *B;
226   } else {
227     /* Create and fill new matrix */
228     ierr = MatCreate(((PetscObject)A)->comm,&newmat);CHKERRQ(ierr);
229     ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
230     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
231     ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr);
232   }
233 
234   /* Now extract the data pointers and do the copy, column at a time */
235   newmatd = (Mat_MPIDense*)newmat->data;
236   bv      = ((Mat_SeqDense *)newmatd->A->data)->v;
237 
238   for (i=0; i<Ncols; i++) {
239     av = v + ((Mat_SeqDense *)mat->A->data)->lda*icol[i];
240     for (j=0; j<nrows; j++) {
241       *bv++ = av[irow[j] - rstart];
242     }
243   }
244 
245   /* Assemble the matrices so that the correct flags are set */
246   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
247   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
248 
249   /* Free work space */
250   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
251   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
252   *B = newmat;
253   PetscFunctionReturn(0);
254 }
255 
256 #undef __FUNCT__
257 #define __FUNCT__ "MatRestoreArray_MPIDense"
258 PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[])
259 {
260   PetscFunctionBegin;
261   PetscFunctionReturn(0);
262 }
263 
264 #undef __FUNCT__
265 #define __FUNCT__ "MatAssemblyBegin_MPIDense"
266 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
267 {
268   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
269   MPI_Comm       comm = ((PetscObject)mat)->comm;
270   PetscErrorCode ierr;
271   PetscInt       nstash,reallocs;
272   InsertMode     addv;
273 
274   PetscFunctionBegin;
275   /* make sure all processors are either in INSERTMODE or ADDMODE */
276   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
277   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
278   mat->insertmode = addv; /* in case this processor had no cache */
279 
280   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
281   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
282   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
283   PetscFunctionReturn(0);
284 }
285 
286 #undef __FUNCT__
287 #define __FUNCT__ "MatAssemblyEnd_MPIDense"
288 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
289 {
290   Mat_MPIDense    *mdn=(Mat_MPIDense*)mat->data;
291   PetscErrorCode  ierr;
292   PetscInt        i,*row,*col,flg,j,rstart,ncols;
293   PetscMPIInt     n;
294   PetscScalar     *val;
295   InsertMode      addv=mat->insertmode;
296 
297   PetscFunctionBegin;
298   /*  wait on receives */
299   while (1) {
300     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
301     if (!flg) break;
302 
303     for (i=0; i<n;) {
304       /* Now identify the consecutive vals belonging to the same row */
305       for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
306       if (j < n) ncols = j-i;
307       else       ncols = n-i;
308       /* Now assemble all these values with a single function call */
309       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
310       i = j;
311     }
312   }
313   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
314 
315   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
316   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
317 
318   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
319     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
320   }
321   PetscFunctionReturn(0);
322 }
323 
324 #undef __FUNCT__
325 #define __FUNCT__ "MatZeroEntries_MPIDense"
326 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
327 {
328   PetscErrorCode ierr;
329   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
330 
331   PetscFunctionBegin;
332   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
333   PetscFunctionReturn(0);
334 }
335 
336 /* the code does not do the diagonal entries correctly unless the
337    matrix is square and the column and row owerships are identical.
338    This is a BUG. The only way to fix it seems to be to access
339    mdn->A and mdn->B directly and not through the MatZeroRows()
340    routine.
341 */
342 #undef __FUNCT__
343 #define __FUNCT__ "MatZeroRows_MPIDense"
344 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
345 {
346   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
347   PetscErrorCode ierr;
348   PetscInt       i,*owners = A->rmap->range;
349   PetscInt       *nprocs,j,idx,nsends;
350   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
351   PetscInt       *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source;
352   PetscInt       *lens,*lrows,*values;
353   PetscMPIInt    n,imdex,rank = l->rank,size = l->size;
354   MPI_Comm       comm = ((PetscObject)A)->comm;
355   MPI_Request    *send_waits,*recv_waits;
356   MPI_Status     recv_status,*send_status;
357   PetscTruth     found;
358 
359   PetscFunctionBegin;
360   /*  first count number of contributors to each processor */
361   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
362   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
363   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
364   for (i=0; i<N; i++) {
365     idx = rows[i];
366     found = PETSC_FALSE;
367     for (j=0; j<size; j++) {
368       if (idx >= owners[j] && idx < owners[j+1]) {
369         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
370       }
371     }
372     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
373   }
374   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
375 
376   /* inform other processors of number of messages and max length*/
377   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
378 
379   /* post receives:   */
380   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
381   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
382   for (i=0; i<nrecvs; i++) {
383     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
384   }
385 
386   /* do sends:
387       1) starts[i] gives the starting index in svalues for stuff going to
388          the ith processor
389   */
390   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
391   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
392   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
393   starts[0]  = 0;
394   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
395   for (i=0; i<N; i++) {
396     svalues[starts[owner[i]]++] = rows[i];
397   }
398 
399   starts[0] = 0;
400   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
401   count = 0;
402   for (i=0; i<size; i++) {
403     if (nprocs[2*i+1]) {
404       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
405     }
406   }
407   ierr = PetscFree(starts);CHKERRQ(ierr);
408 
409   base = owners[rank];
410 
411   /*  wait on receives */
412   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
413   count  = nrecvs;
414   slen   = 0;
415   while (count) {
416     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
417     /* unpack receives into our local space */
418     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
419     source[imdex]  = recv_status.MPI_SOURCE;
420     lens[imdex]    = n;
421     slen += n;
422     count--;
423   }
424   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
425 
426   /* move the data into the send scatter */
427   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
428   count = 0;
429   for (i=0; i<nrecvs; i++) {
430     values = rvalues + i*nmax;
431     for (j=0; j<lens[i]; j++) {
432       lrows[count++] = values[j] - base;
433     }
434   }
435   ierr = PetscFree(rvalues);CHKERRQ(ierr);
436   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
437   ierr = PetscFree(owner);CHKERRQ(ierr);
438   ierr = PetscFree(nprocs);CHKERRQ(ierr);
439 
440   /* actually zap the local rows */
441   ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
442   ierr = PetscFree(lrows);CHKERRQ(ierr);
443 
444   /* wait on sends */
445   if (nsends) {
446     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
447     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
448     ierr = PetscFree(send_status);CHKERRQ(ierr);
449   }
450   ierr = PetscFree(send_waits);CHKERRQ(ierr);
451   ierr = PetscFree(svalues);CHKERRQ(ierr);
452 
453   PetscFunctionReturn(0);
454 }
455 
456 #undef __FUNCT__
457 #define __FUNCT__ "MatMult_MPIDense"
458 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
459 {
460   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
461   PetscErrorCode ierr;
462 
463   PetscFunctionBegin;
464   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
465   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
466   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
467   PetscFunctionReturn(0);
468 }
469 
470 #undef __FUNCT__
471 #define __FUNCT__ "MatMultAdd_MPIDense"
472 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
473 {
474   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
475   PetscErrorCode ierr;
476 
477   PetscFunctionBegin;
478   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
479   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
480   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
481   PetscFunctionReturn(0);
482 }
483 
484 #undef __FUNCT__
485 #define __FUNCT__ "MatMultTranspose_MPIDense"
486 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
487 {
488   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
489   PetscErrorCode ierr;
490   PetscScalar    zero = 0.0;
491 
492   PetscFunctionBegin;
493   ierr = VecSet(yy,zero);CHKERRQ(ierr);
494   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
495   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
496   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
497   PetscFunctionReturn(0);
498 }
499 
500 #undef __FUNCT__
501 #define __FUNCT__ "MatMultTransposeAdd_MPIDense"
502 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
503 {
504   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
505   PetscErrorCode ierr;
506 
507   PetscFunctionBegin;
508   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
509   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
510   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
511   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
512   PetscFunctionReturn(0);
513 }
514 
515 #undef __FUNCT__
516 #define __FUNCT__ "MatGetDiagonal_MPIDense"
517 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
518 {
519   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
520   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
521   PetscErrorCode ierr;
522   PetscInt       len,i,n,m = A->rmap->n,radd;
523   PetscScalar    *x,zero = 0.0;
524 
525   PetscFunctionBegin;
526   ierr = VecSet(v,zero);CHKERRQ(ierr);
527   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
528   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
529   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
530   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
531   radd = A->rmap->rstart*m;
532   for (i=0; i<len; i++) {
533     x[i] = aloc->v[radd + i*m + i];
534   }
535   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
536   PetscFunctionReturn(0);
537 }
538 
539 #undef __FUNCT__
540 #define __FUNCT__ "MatDestroy_MPIDense"
541 PetscErrorCode MatDestroy_MPIDense(Mat mat)
542 {
543   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
544   PetscErrorCode ierr;
545 #if defined(PETSC_HAVE_PLAPACK)
546   Mat_Plapack   *lu=(Mat_Plapack*)(mat->spptr);
547 #endif
548 
549   PetscFunctionBegin;
550 
551 #if defined(PETSC_USE_LOG)
552   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
553 #endif
554   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
555   ierr = MatDestroy(mdn->A);CHKERRQ(ierr);
556   if (mdn->lvec)   {ierr = VecDestroy(mdn->lvec);CHKERRQ(ierr);}
557   if (mdn->Mvctx)  {ierr = VecScatterDestroy(mdn->Mvctx);CHKERRQ(ierr);}
558 #if defined(PETSC_HAVE_PLAPACK)
559   if (lu) {
560     ierr = PLA_Obj_free(&lu->A);CHKERRQ(ierr);
561     ierr = PLA_Obj_free (&lu->pivots);CHKERRQ(ierr);
562     ierr = PLA_Temp_free(&lu->templ);CHKERRQ(ierr);
563 
564     if (lu->is_pla) {
565       ierr = ISDestroy(lu->is_pla);CHKERRQ(ierr);
566       ierr = ISDestroy(lu->is_petsc);CHKERRQ(ierr);
567       ierr = VecScatterDestroy(lu->ctx);CHKERRQ(ierr);
568     }
569   }
570 #endif
571 
572   ierr = PetscFree(mdn);CHKERRQ(ierr);
573   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
574   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
575   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
576   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
577   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
578   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
579   PetscFunctionReturn(0);
580 }
581 
582 #undef __FUNCT__
583 #define __FUNCT__ "MatView_MPIDense_Binary"
584 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
585 {
586   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
587   PetscErrorCode    ierr;
588   PetscViewerFormat format;
589   int               fd;
590   PetscInt          header[4],mmax,N = mat->cmap->N,i,j,m,k;
591   PetscMPIInt       rank,tag  = ((PetscObject)viewer)->tag,size;
592   PetscScalar       *work,*v,*vv;
593   Mat_SeqDense      *a = (Mat_SeqDense*)mdn->A->data;
594   MPI_Status        status;
595 
596   PetscFunctionBegin;
597   if (mdn->size == 1) {
598     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
599   } else {
600     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
601     ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
602     ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
603 
604     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
605     if (format == PETSC_VIEWER_NATIVE) {
606 
607       if (!rank) {
608         /* store the matrix as a dense matrix */
609         header[0] = MAT_FILE_CLASSID;
610         header[1] = mat->rmap->N;
611         header[2] = N;
612         header[3] = MATRIX_BINARY_FORMAT_DENSE;
613         ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
614 
615         /* get largest work array needed for transposing array */
616         mmax = mat->rmap->n;
617         for (i=1; i<size; i++) {
618           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
619         }
620         ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&work);CHKERRQ(ierr);
621 
622         /* write out local array, by rows */
623         m    = mat->rmap->n;
624         v    = a->v;
625         for (j=0; j<N; j++) {
626           for (i=0; i<m; i++) {
627             work[j + i*N] = *v++;
628           }
629         }
630         ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
631         /* get largest work array to receive messages from other processes, excludes process zero */
632         mmax = 0;
633         for (i=1; i<size; i++) {
634           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
635         }
636         ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&vv);CHKERRQ(ierr);
637         for(k = 1; k < size; k++) {
638           v    = vv;
639           m    = mat->rmap->range[k+1] - mat->rmap->range[k];
640           ierr = MPI_Recv(v,m*N,MPIU_SCALAR,k,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
641 
642           for(j = 0; j < N; j++) {
643             for(i = 0; i < m; i++) {
644               work[j + i*N] = *v++;
645             }
646           }
647           ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
648         }
649         ierr = PetscFree(work);CHKERRQ(ierr);
650         ierr = PetscFree(vv);CHKERRQ(ierr);
651       } else {
652         ierr = MPI_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
653       }
654     } SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerSetFormat(viewer,PETSC_VIEWER_NATIVE");
655   }
656   PetscFunctionReturn(0);
657 }
658 
659 #undef __FUNCT__
660 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket"
661 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
662 {
663   Mat_MPIDense          *mdn = (Mat_MPIDense*)mat->data;
664   PetscErrorCode        ierr;
665   PetscMPIInt           size = mdn->size,rank = mdn->rank;
666   const PetscViewerType vtype;
667   PetscTruth            iascii,isdraw;
668   PetscViewer           sviewer;
669   PetscViewerFormat     format;
670 #if defined(PETSC_HAVE_PLAPACK)
671   Mat_Plapack           *lu;
672 #endif
673 
674   PetscFunctionBegin;
675   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
676   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
677   if (iascii) {
678     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
679     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
680     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
681       MatInfo info;
682       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
683       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,
684                    (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
685       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
686 #if defined(PETSC_HAVE_PLAPACK)
687       ierr = PetscViewerASCIIPrintf(viewer,"PLAPACK run parameters:\n");CHKERRQ(ierr);
688       ierr = PetscViewerASCIIPrintf(viewer,"  Processor mesh: nprows %d, npcols %d\n",Plapack_nprows, Plapack_npcols);CHKERRQ(ierr);
689       ierr = PetscViewerASCIIPrintf(viewer,"  Error checking: %d\n",Plapack_ierror);CHKERRQ(ierr);
690       ierr = PetscViewerASCIIPrintf(viewer,"  Algorithmic block size: %d\n",Plapack_nb_alg);CHKERRQ(ierr);
691       if (mat->factortype){
692         lu=(Mat_Plapack*)(mat->spptr);
693         ierr = PetscViewerASCIIPrintf(viewer,"  Distr. block size nb: %d \n",lu->nb);CHKERRQ(ierr);
694       }
695 #else
696       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
697 #endif
698       PetscFunctionReturn(0);
699     } else if (format == PETSC_VIEWER_ASCII_INFO) {
700       PetscFunctionReturn(0);
701     }
702   } else if (isdraw) {
703     PetscDraw  draw;
704     PetscTruth isnull;
705 
706     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
707     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
708     if (isnull) PetscFunctionReturn(0);
709   }
710 
711   if (size == 1) {
712     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
713   } else {
714     /* assemble the entire matrix onto first processor. */
715     Mat         A;
716     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
717     PetscInt    *cols;
718     PetscScalar *vals;
719 
720     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
721     if (!rank) {
722       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
723     } else {
724       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
725     }
726     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
727     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
728     ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL);
729     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
730 
731     /* Copy the matrix ... This isn't the most efficient means,
732        but it's quick for now */
733     A->insertmode = INSERT_VALUES;
734     row = mat->rmap->rstart; m = mdn->A->rmap->n;
735     for (i=0; i<m; i++) {
736       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
737       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
738       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
739       row++;
740     }
741 
742     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
743     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
744     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
745     if (!rank) {
746       ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
747     }
748     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
749     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
750     ierr = MatDestroy(A);CHKERRQ(ierr);
751   }
752   PetscFunctionReturn(0);
753 }
754 
755 #undef __FUNCT__
756 #define __FUNCT__ "MatView_MPIDense"
757 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
758 {
759   PetscErrorCode ierr;
760   PetscTruth     iascii,isbinary,isdraw,issocket;
761 
762   PetscFunctionBegin;
763 
764   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
765   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
766   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
767   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
768 
769   if (iascii || issocket || isdraw) {
770     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
771   } else if (isbinary) {
772     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
773   } else {
774     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name);
775   }
776   PetscFunctionReturn(0);
777 }
778 
779 #undef __FUNCT__
780 #define __FUNCT__ "MatGetInfo_MPIDense"
781 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
782 {
783   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
784   Mat            mdn = mat->A;
785   PetscErrorCode ierr;
786   PetscReal      isend[5],irecv[5];
787 
788   PetscFunctionBegin;
789   info->block_size     = 1.0;
790   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
791   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
792   isend[3] = info->memory;  isend[4] = info->mallocs;
793   if (flag == MAT_LOCAL) {
794     info->nz_used      = isend[0];
795     info->nz_allocated = isend[1];
796     info->nz_unneeded  = isend[2];
797     info->memory       = isend[3];
798     info->mallocs      = isend[4];
799   } else if (flag == MAT_GLOBAL_MAX) {
800     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr);
801     info->nz_used      = irecv[0];
802     info->nz_allocated = irecv[1];
803     info->nz_unneeded  = irecv[2];
804     info->memory       = irecv[3];
805     info->mallocs      = irecv[4];
806   } else if (flag == MAT_GLOBAL_SUM) {
807     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
808     info->nz_used      = irecv[0];
809     info->nz_allocated = irecv[1];
810     info->nz_unneeded  = irecv[2];
811     info->memory       = irecv[3];
812     info->mallocs      = irecv[4];
813   }
814   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
815   info->fill_ratio_needed = 0;
816   info->factor_mallocs    = 0;
817   PetscFunctionReturn(0);
818 }
819 
820 #undef __FUNCT__
821 #define __FUNCT__ "MatSetOption_MPIDense"
822 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscTruth flg)
823 {
824   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
825   PetscErrorCode ierr;
826 
827   PetscFunctionBegin;
828   switch (op) {
829   case MAT_NEW_NONZERO_LOCATIONS:
830   case MAT_NEW_NONZERO_LOCATION_ERR:
831   case MAT_NEW_NONZERO_ALLOCATION_ERR:
832     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
833     break;
834   case MAT_ROW_ORIENTED:
835     a->roworiented = flg;
836     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
837     break;
838   case MAT_NEW_DIAGONALS:
839   case MAT_USE_HASH_TABLE:
840     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
841     break;
842   case MAT_IGNORE_OFF_PROC_ENTRIES:
843     a->donotstash = flg;
844     break;
845   case MAT_SYMMETRIC:
846   case MAT_STRUCTURALLY_SYMMETRIC:
847   case MAT_HERMITIAN:
848   case MAT_SYMMETRY_ETERNAL:
849   case MAT_IGNORE_LOWER_TRIANGULAR:
850     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
851     break;
852   default:
853     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
854   }
855   PetscFunctionReturn(0);
856 }
857 
858 
859 #undef __FUNCT__
860 #define __FUNCT__ "MatDiagonalScale_MPIDense"
861 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
862 {
863   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
864   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
865   PetscScalar    *l,*r,x,*v;
866   PetscErrorCode ierr;
867   PetscInt       i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n;
868 
869   PetscFunctionBegin;
870   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
871   if (ll) {
872     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
873     if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
874     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
875     for (i=0; i<m; i++) {
876       x = l[i];
877       v = mat->v + i;
878       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
879     }
880     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
881     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
882   }
883   if (rr) {
884     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
885     if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
886     ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
887     ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
888     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
889     for (i=0; i<n; i++) {
890       x = r[i];
891       v = mat->v + i*m;
892       for (j=0; j<m; j++) { (*v++) *= x;}
893     }
894     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
895     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
896   }
897   PetscFunctionReturn(0);
898 }
899 
900 #undef __FUNCT__
901 #define __FUNCT__ "MatNorm_MPIDense"
902 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
903 {
904   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
905   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
906   PetscErrorCode ierr;
907   PetscInt       i,j;
908   PetscReal      sum = 0.0;
909   PetscScalar    *v = mat->v;
910 
911   PetscFunctionBegin;
912   if (mdn->size == 1) {
913     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
914   } else {
915     if (type == NORM_FROBENIUS) {
916       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
917 #if defined(PETSC_USE_COMPLEX)
918         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
919 #else
920         sum += (*v)*(*v); v++;
921 #endif
922       }
923       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
924       *nrm = sqrt(*nrm);
925       ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr);
926     } else if (type == NORM_1) {
927       PetscReal *tmp,*tmp2;
928       ierr = PetscMalloc2(A->cmap->N,PetscReal,&tmp,A->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr);
929       ierr = PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
930       ierr = PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
931       *nrm = 0.0;
932       v = mat->v;
933       for (j=0; j<mdn->A->cmap->n; j++) {
934         for (i=0; i<mdn->A->rmap->n; i++) {
935           tmp[j] += PetscAbsScalar(*v);  v++;
936         }
937       }
938       ierr = MPI_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
939       for (j=0; j<A->cmap->N; j++) {
940         if (tmp2[j] > *nrm) *nrm = tmp2[j];
941       }
942       ierr = PetscFree2(tmp,tmp);CHKERRQ(ierr);
943       ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr);
944     } else if (type == NORM_INFINITY) { /* max row norm */
945       PetscReal ntemp;
946       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
947       ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr);
948     } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"No support for two norm");
949   }
950   PetscFunctionReturn(0);
951 }
952 
953 #undef __FUNCT__
954 #define __FUNCT__ "MatTranspose_MPIDense"
955 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
956 {
957   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
958   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
959   Mat            B;
960   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
961   PetscErrorCode ierr;
962   PetscInt       j,i;
963   PetscScalar    *v;
964 
965   PetscFunctionBegin;
966   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports square matrix only in-place");
967   if (reuse == MAT_INITIAL_MATRIX || A == *matout) {
968     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
969     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
970     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
971     ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr);
972   } else {
973     B = *matout;
974   }
975 
976   m = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v;
977   ierr = PetscMalloc(m*sizeof(PetscInt),&rwork);CHKERRQ(ierr);
978   for (i=0; i<m; i++) rwork[i] = rstart + i;
979   for (j=0; j<n; j++) {
980     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
981     v   += m;
982   }
983   ierr = PetscFree(rwork);CHKERRQ(ierr);
984   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
985   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
986   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
987     *matout = B;
988   } else {
989     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
990   }
991   PetscFunctionReturn(0);
992 }
993 
994 
995 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
996 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
997 
998 #undef __FUNCT__
999 #define __FUNCT__ "MatSetUpPreallocation_MPIDense"
1000 PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A)
1001 {
1002   PetscErrorCode ierr;
1003 
1004   PetscFunctionBegin;
1005   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
1006   PetscFunctionReturn(0);
1007 }
1008 
1009 #if defined(PETSC_HAVE_PLAPACK)
1010 
1011 #undef __FUNCT__
1012 #define __FUNCT__ "MatMPIDenseCopyToPlapack"
1013 PetscErrorCode MatMPIDenseCopyToPlapack(Mat A,Mat F)
1014 {
1015   Mat_Plapack    *lu = (Mat_Plapack*)(F)->spptr;
1016   PetscErrorCode ierr;
1017   PetscInt       M=A->cmap->N,m=A->rmap->n,rstart;
1018   PetscScalar    *array;
1019   PetscReal      one = 1.0;
1020 
1021   PetscFunctionBegin;
1022   /* Copy A into F->lu->A */
1023   ierr = PLA_Obj_set_to_zero(lu->A);CHKERRQ(ierr);
1024   ierr = PLA_API_begin();CHKERRQ(ierr);
1025   ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr);
1026   ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr);
1027   ierr = MatGetArray(A,&array);CHKERRQ(ierr);
1028   ierr = PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);CHKERRQ(ierr);
1029   ierr = MatRestoreArray(A,&array);CHKERRQ(ierr);
1030   ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr);
1031   ierr = PLA_API_end();CHKERRQ(ierr);
1032   lu->rstart = rstart;
1033   PetscFunctionReturn(0);
1034 }
1035 
1036 #undef __FUNCT__
1037 #define __FUNCT__ "MatMPIDenseCopyFromPlapack"
1038 PetscErrorCode MatMPIDenseCopyFromPlapack(Mat F,Mat A)
1039 {
1040   Mat_Plapack    *lu = (Mat_Plapack*)(F)->spptr;
1041   PetscErrorCode ierr;
1042   PetscInt       M=A->cmap->N,m=A->rmap->n,rstart;
1043   PetscScalar    *array;
1044   PetscReal      one = 1.0;
1045 
1046   PetscFunctionBegin;
1047   /* Copy F into A->lu->A */
1048   ierr = MatZeroEntries(A);CHKERRQ(ierr);
1049   ierr = PLA_API_begin();CHKERRQ(ierr);
1050   ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr);
1051   ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr);
1052   ierr = MatGetArray(A,&array);CHKERRQ(ierr);
1053   ierr = PLA_API_axpy_global_to_matrix(m,M, &one,lu->A,rstart,0,(void *)array,m);CHKERRQ(ierr);
1054   ierr = MatRestoreArray(A,&array);CHKERRQ(ierr);
1055   ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr);
1056   ierr = PLA_API_end();CHKERRQ(ierr);
1057   lu->rstart = rstart;
1058   PetscFunctionReturn(0);
1059 }
1060 
1061 #undef __FUNCT__
1062 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIDense"
1063 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1064 {
1065   PetscErrorCode ierr;
1066   Mat_Plapack    *luA = (Mat_Plapack*)A->spptr;
1067   Mat_Plapack    *luB = (Mat_Plapack*)B->spptr;
1068   Mat_Plapack    *luC = (Mat_Plapack*)C->spptr;
1069   PLA_Obj        alpha = NULL,beta = NULL;
1070 
1071   PetscFunctionBegin;
1072   ierr = MatMPIDenseCopyToPlapack(A,A);CHKERRQ(ierr);
1073   ierr = MatMPIDenseCopyToPlapack(B,B);CHKERRQ(ierr);
1074 
1075   /*
1076   ierr = PLA_Global_show("A = ",luA->A,"%g ","");CHKERRQ(ierr);
1077   ierr = PLA_Global_show("B = ",luB->A,"%g ","");CHKERRQ(ierr);
1078   */
1079 
1080   /* do the multiply in PLA  */
1081   ierr = PLA_Create_constants_conf_to(luA->A,NULL,NULL,&alpha);CHKERRQ(ierr);
1082   ierr = PLA_Create_constants_conf_to(luC->A,NULL,&beta,NULL);CHKERRQ(ierr);
1083   CHKMEMQ;
1084 
1085   ierr = PLA_Gemm(PLA_NO_TRANSPOSE,PLA_NO_TRANSPOSE,alpha,luA->A,luB->A,beta,luC->A); /* CHKERRQ(ierr); */
1086   CHKMEMQ;
1087   ierr = PLA_Obj_free(&alpha);CHKERRQ(ierr);
1088   ierr = PLA_Obj_free(&beta);CHKERRQ(ierr);
1089 
1090   /*
1091   ierr = PLA_Global_show("C = ",luC->A,"%g ","");CHKERRQ(ierr);
1092   */
1093   ierr = MatMPIDenseCopyFromPlapack(C,C);CHKERRQ(ierr);
1094   PetscFunctionReturn(0);
1095 }
1096 
1097 #undef __FUNCT__
1098 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIDense"
1099 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1100 {
1101   PetscErrorCode ierr;
1102   PetscInt       m=A->rmap->n,n=B->cmap->n;
1103   Mat            Cmat;
1104 
1105   PetscFunctionBegin;
1106   if (A->cmap->n != B->rmap->n) SETERRQ2(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
1107   SETERRQ(((PetscObject)A)->comm,PETSC_ERR_LIB,"Due to apparent bugs in PLAPACK,this is not currently supported");
1108   ierr = MatCreate(((PetscObject)B)->comm,&Cmat);CHKERRQ(ierr);
1109   ierr = MatSetSizes(Cmat,m,n,A->rmap->N,B->cmap->N);CHKERRQ(ierr);
1110   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
1111   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1112   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1113 
1114   *C = Cmat;
1115   PetscFunctionReturn(0);
1116 }
1117 
1118 #undef __FUNCT__
1119 #define __FUNCT__ "MatMatMult_MPIDense_MPIDense"
1120 PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
1121 {
1122   PetscErrorCode ierr;
1123 
1124   PetscFunctionBegin;
1125   if (scall == MAT_INITIAL_MATRIX){
1126     ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr);
1127   }
1128   ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr);
1129   PetscFunctionReturn(0);
1130 }
1131 
1132 #undef __FUNCT__
1133 #define __FUNCT__ "MatSolve_MPIDense"
1134 PetscErrorCode MatSolve_MPIDense(Mat A,Vec b,Vec x)
1135 {
1136   MPI_Comm       comm = ((PetscObject)A)->comm;
1137   Mat_Plapack    *lu = (Mat_Plapack*)A->spptr;
1138   PetscErrorCode ierr;
1139   PetscInt       M=A->rmap->N,m=A->rmap->n,rstart,i,j,*idx_pla,*idx_petsc,loc_m,loc_stride;
1140   PetscScalar    *array;
1141   PetscReal      one = 1.0;
1142   PetscMPIInt    size,rank,r_rank,r_nproc,c_rank,c_nproc;;
1143   PLA_Obj        v_pla = NULL;
1144   PetscScalar    *loc_buf;
1145   Vec            loc_x;
1146 
1147   PetscFunctionBegin;
1148   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1149   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1150 
1151   /* Create PLAPACK vector objects, then copy b into PLAPACK b */
1152   PLA_Mvector_create(lu->datatype,M,1,lu->templ,PLA_ALIGN_FIRST,&v_pla);
1153   PLA_Obj_set_to_zero(v_pla);
1154 
1155   /* Copy b into rhs_pla */
1156   PLA_API_begin();
1157   PLA_Obj_API_open(v_pla);
1158   ierr = VecGetArray(b,&array);CHKERRQ(ierr);
1159   PLA_API_axpy_vector_to_global(m,&one,(void *)array,1,v_pla,lu->rstart);
1160   ierr = VecRestoreArray(b,&array);CHKERRQ(ierr);
1161   PLA_Obj_API_close(v_pla);
1162   PLA_API_end();
1163 
1164   if (A->factortype == MAT_FACTOR_LU){
1165     /* Apply the permutations to the right hand sides */
1166     PLA_Apply_pivots_to_rows (v_pla,lu->pivots);
1167 
1168     /* Solve L y = b, overwriting b with y */
1169     PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_UNIT_DIAG,lu->A,v_pla );
1170 
1171     /* Solve U x = y (=b), overwriting b with x */
1172     PLA_Trsv( PLA_UPPER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla );
1173   } else { /* MAT_FACTOR_CHOLESKY */
1174     PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla);
1175     PLA_Trsv( PLA_LOWER_TRIANGULAR,(lu->datatype == MPI_DOUBLE ? PLA_TRANSPOSE : PLA_CONJUGATE_TRANSPOSE),
1176                                     PLA_NONUNIT_DIAG,lu->A,v_pla);
1177   }
1178 
1179   /* Copy PLAPACK x into Petsc vector x  */
1180   PLA_Obj_local_length(v_pla, &loc_m);
1181   PLA_Obj_local_buffer(v_pla, (void**)&loc_buf);
1182   PLA_Obj_local_stride(v_pla, &loc_stride);
1183   /*
1184     PetscPrintf(PETSC_COMM_SELF," [%d] b - local_m %d local_stride %d, loc_buf: %g %g, nb: %d\n",rank,loc_m,loc_stride,loc_buf[0],loc_buf[(loc_m-1)*loc_stride],lu->nb);
1185   */
1186   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,loc_m*loc_stride,loc_buf,&loc_x);CHKERRQ(ierr);
1187   if (!lu->pla_solved){
1188 
1189     PLA_Temp_comm_row_info(lu->templ,&Plapack_comm_2d,&r_rank,&r_nproc);
1190     PLA_Temp_comm_col_info(lu->templ,&Plapack_comm_2d,&c_rank,&c_nproc);
1191 
1192     /* Create IS and cts for VecScatterring */
1193     PLA_Obj_local_length(v_pla, &loc_m);
1194     PLA_Obj_local_stride(v_pla, &loc_stride);
1195     ierr = PetscMalloc2(loc_m,PetscInt,&idx_pla,loc_m,PetscInt,&idx_petsc);CHKERRQ(ierr);
1196 
1197     rstart = (r_rank*c_nproc+c_rank)*lu->nb;
1198     for (i=0; i<loc_m; i+=lu->nb){
1199       j = 0;
1200       while (j < lu->nb && i+j < loc_m){
1201         idx_petsc[i+j] = rstart + j; j++;
1202       }
1203       rstart += size*lu->nb;
1204     }
1205 
1206     for (i=0; i<loc_m; i++) idx_pla[i] = i*loc_stride;
1207 
1208     ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_pla,&lu->is_pla);CHKERRQ(ierr);
1209     ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_petsc,&lu->is_petsc);CHKERRQ(ierr);
1210     ierr = PetscFree2(idx_pla,idx_petsc);CHKERRQ(ierr);
1211     ierr = VecScatterCreate(loc_x,lu->is_pla,x,lu->is_petsc,&lu->ctx);CHKERRQ(ierr);
1212   }
1213   ierr = VecScatterBegin(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1214   ierr = VecScatterEnd(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1215 
1216   /* Free data */
1217   ierr = VecDestroy(loc_x);CHKERRQ(ierr);
1218   PLA_Obj_free(&v_pla);
1219 
1220   lu->pla_solved = PETSC_TRUE;
1221   PetscFunctionReturn(0);
1222 }
1223 
1224 #undef __FUNCT__
1225 #define __FUNCT__ "MatLUFactorNumeric_MPIDense"
1226 PetscErrorCode MatLUFactorNumeric_MPIDense(Mat F,Mat A,const MatFactorInfo *info)
1227 {
1228   Mat_Plapack    *lu = (Mat_Plapack*)(F)->spptr;
1229   PetscErrorCode ierr;
1230   PetscInt       M=A->rmap->N,m=A->rmap->n,rstart,rend;
1231   PetscInt       info_pla=0;
1232   PetscScalar    *array,one = 1.0;
1233 
1234   PetscFunctionBegin;
1235   if (lu->mstruct == SAME_NONZERO_PATTERN){
1236     PLA_Obj_free(&lu->A);
1237     PLA_Obj_free (&lu->pivots);
1238   }
1239   /* Create PLAPACK matrix object */
1240   lu->A = NULL; lu->pivots = NULL;
1241   PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);
1242   PLA_Obj_set_to_zero(lu->A);
1243   PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots);
1244 
1245   /* Copy A into lu->A */
1246   PLA_API_begin();
1247   PLA_Obj_API_open(lu->A);
1248   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
1249   ierr = MatGetArray(A,&array);CHKERRQ(ierr);
1250   PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);
1251   ierr = MatRestoreArray(A,&array);CHKERRQ(ierr);
1252   PLA_Obj_API_close(lu->A);
1253   PLA_API_end();
1254 
1255   /* Factor P A -> L U overwriting lower triangular portion of A with L, upper, U */
1256   info_pla = PLA_LU(lu->A,lu->pivots);
1257   if (info_pla != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot encountered at row %d from PLA_LU()",info_pla);
1258 
1259   lu->rstart         = rstart;
1260   lu->mstruct        = SAME_NONZERO_PATTERN;
1261   F->ops->solve      = MatSolve_MPIDense;
1262   F->assembled       = PETSC_TRUE;  /* required by -ksp_view */
1263   PetscFunctionReturn(0);
1264 }
1265 
1266 #undef __FUNCT__
1267 #define __FUNCT__ "MatCholeskyFactorNumeric_MPIDense"
1268 PetscErrorCode MatCholeskyFactorNumeric_MPIDense(Mat F,Mat A,const MatFactorInfo *info)
1269 {
1270   Mat_Plapack    *lu = (Mat_Plapack*)F->spptr;
1271   PetscErrorCode ierr;
1272   PetscInt       M=A->rmap->N,m=A->rmap->n,rstart,rend;
1273   PetscInt       info_pla=0;
1274   PetscScalar    *array,one = 1.0;
1275 
1276   PetscFunctionBegin;
1277   if (lu->mstruct == SAME_NONZERO_PATTERN){
1278     PLA_Obj_free(&lu->A);
1279   }
1280   /* Create PLAPACK matrix object */
1281   lu->A      = NULL;
1282   lu->pivots = NULL;
1283   PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);
1284 
1285   /* Copy A into lu->A */
1286   PLA_API_begin();
1287   PLA_Obj_API_open(lu->A);
1288   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
1289   ierr = MatGetArray(A,&array);CHKERRQ(ierr);
1290   PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);
1291   ierr = MatRestoreArray(A,&array);CHKERRQ(ierr);
1292   PLA_Obj_API_close(lu->A);
1293   PLA_API_end();
1294 
1295   /* Factor P A -> Chol */
1296   info_pla = PLA_Chol(PLA_LOWER_TRIANGULAR,lu->A);
1297   if (info_pla != 0) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT,"Nonpositive definite matrix detected at row %d from PLA_Chol()",info_pla);
1298 
1299   lu->rstart         = rstart;
1300   lu->mstruct        = SAME_NONZERO_PATTERN;
1301   F->ops->solve      = MatSolve_MPIDense;
1302   F->assembled       = PETSC_TRUE;  /* required by -ksp_view */
1303   PetscFunctionReturn(0);
1304 }
1305 
1306 /* Note the Petsc perm permutation is ignored */
1307 #undef __FUNCT__
1308 #define __FUNCT__ "MatCholeskyFactorSymbolic_MPIDense"
1309 PetscErrorCode MatCholeskyFactorSymbolic_MPIDense(Mat F,Mat A,IS perm,const MatFactorInfo *info)
1310 {
1311   PetscErrorCode ierr;
1312   PetscTruth     issymmetric,set;
1313 
1314   PetscFunctionBegin;
1315   ierr = MatIsSymmetricKnown(A,&set,&issymmetric);CHKERRQ(ierr);
1316   if (!set || !issymmetric) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_USER,"Matrix must be set as MAT_SYMMETRIC for CholeskyFactor()");
1317   F->ops->choleskyfactornumeric  = MatCholeskyFactorNumeric_MPIDense;
1318   PetscFunctionReturn(0);
1319 }
1320 
1321 /* Note the Petsc r and c permutations are ignored */
1322 #undef __FUNCT__
1323 #define __FUNCT__ "MatLUFactorSymbolic_MPIDense"
1324 PetscErrorCode MatLUFactorSymbolic_MPIDense(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
1325 {
1326   PetscErrorCode ierr;
1327   PetscInt       M = A->rmap->N;
1328   Mat_Plapack    *lu;
1329 
1330   PetscFunctionBegin;
1331   lu = (Mat_Plapack*)F->spptr;
1332   ierr = PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots);CHKERRQ(ierr);
1333   F->ops->lufactornumeric  = MatLUFactorNumeric_MPIDense;
1334   PetscFunctionReturn(0);
1335 }
1336 
1337 EXTERN_C_BEGIN
1338 #undef __FUNCT__
1339 #define __FUNCT__ "MatFactorGetSolverPackage_mpidense_plapack"
1340 PetscErrorCode MatFactorGetSolverPackage_mpidense_plapack(Mat A,const MatSolverPackage *type)
1341 {
1342   PetscFunctionBegin;
1343   *type = MAT_SOLVER_PLAPACK;
1344   PetscFunctionReturn(0);
1345 }
1346 EXTERN_C_END
1347 
1348 EXTERN_C_BEGIN
1349 #undef __FUNCT__
1350 #define __FUNCT__ "MatGetFactor_mpidense_plapack"
1351 PetscErrorCode MatGetFactor_mpidense_plapack(Mat A,MatFactorType ftype,Mat *F)
1352 {
1353   PetscErrorCode ierr;
1354   Mat_Plapack    *lu;
1355   PetscMPIInt    size;
1356   PetscInt       M=A->rmap->N;
1357 
1358   PetscFunctionBegin;
1359   /* Create the factorization matrix */
1360   ierr = MatCreate(((PetscObject)A)->comm,F);CHKERRQ(ierr);
1361   ierr = MatSetSizes(*F,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1362   ierr = MatSetType(*F,((PetscObject)A)->type_name);CHKERRQ(ierr);
1363   ierr = PetscNewLog(*F,Mat_Plapack,&lu);CHKERRQ(ierr);
1364   (*F)->spptr = (void*)lu;
1365 
1366   /* Set default Plapack parameters */
1367   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
1368   lu->nb = M/size;
1369   if (M - lu->nb*size) lu->nb++; /* without cyclic distribution */
1370 
1371   /* Set runtime options */
1372   ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"PLAPACK Options","Mat");CHKERRQ(ierr);
1373     ierr = PetscOptionsInt("-mat_plapack_nb","block size of template vector","None",lu->nb,&lu->nb,PETSC_NULL);CHKERRQ(ierr);
1374   PetscOptionsEnd();
1375 
1376   /* Create object distribution template */
1377   lu->templ = NULL;
1378   ierr = PLA_Temp_create(lu->nb, 0, &lu->templ);CHKERRQ(ierr);
1379 
1380   /* Set the datatype */
1381 #if defined(PETSC_USE_COMPLEX)
1382   lu->datatype = MPI_DOUBLE_COMPLEX;
1383 #else
1384   lu->datatype = MPI_DOUBLE;
1385 #endif
1386 
1387   ierr = PLA_Matrix_create(lu->datatype,M,A->cmap->N,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);CHKERRQ(ierr);
1388 
1389 
1390   lu->pla_solved     = PETSC_FALSE; /* MatSolve_Plapack() is called yet */
1391   lu->mstruct        = DIFFERENT_NONZERO_PATTERN;
1392 
1393   if (ftype == MAT_FACTOR_LU) {
1394     (*F)->ops->lufactorsymbolic = MatLUFactorSymbolic_MPIDense;
1395   } else if (ftype == MAT_FACTOR_CHOLESKY) {
1396     (*F)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MPIDense;
1397   } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"No incomplete factorizations for dense matrices");
1398   (*F)->factortype = ftype;
1399   ierr = PetscObjectComposeFunctionDynamic((PetscObject)(*F),"MatFactorGetSolverPackage_C","MatFactorGetSolverPackage_mpidense_plapack",MatFactorGetSolverPackage_mpidense_plapack);CHKERRQ(ierr);
1400   PetscFunctionReturn(0);
1401 }
1402 EXTERN_C_END
1403 #endif
1404 
1405 #undef __FUNCT__
1406 #define __FUNCT__ "MatGetFactor_mpidense_petsc"
1407 PetscErrorCode MatGetFactor_mpidense_petsc(Mat A,MatFactorType ftype,Mat *F)
1408 {
1409 #if defined(PETSC_HAVE_PLAPACK)
1410   PetscErrorCode ierr;
1411 #endif
1412 
1413   PetscFunctionBegin;
1414 #if defined(PETSC_HAVE_PLAPACK)
1415   ierr = MatGetFactor_mpidense_plapack(A,ftype,F);CHKERRQ(ierr);
1416 #else
1417   SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix format %s uses PLAPACK direct solver. Install PLAPACK",((PetscObject)A)->type_name);
1418 #endif
1419   PetscFunctionReturn(0);
1420 }
1421 
1422 #undef __FUNCT__
1423 #define __FUNCT__ "MatAXPY_MPIDense"
1424 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
1425 {
1426   PetscErrorCode ierr;
1427   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
1428 
1429   PetscFunctionBegin;
1430   ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr);
1431   PetscFunctionReturn(0);
1432 }
1433 
1434 #undef __FUNCT__
1435 #define __FUNCT__ "MatConjugate_MPIDense"
1436 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIDense(Mat mat)
1437 {
1438   Mat_MPIDense   *a = (Mat_MPIDense *)mat->data;
1439   PetscErrorCode ierr;
1440 
1441   PetscFunctionBegin;
1442   ierr = MatConjugate(a->A);CHKERRQ(ierr);
1443   PetscFunctionReturn(0);
1444 }
1445 
1446 #undef __FUNCT__
1447 #define __FUNCT__ "MatRealPart_MPIDense"
1448 PetscErrorCode MatRealPart_MPIDense(Mat A)
1449 {
1450   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1451   PetscErrorCode ierr;
1452 
1453   PetscFunctionBegin;
1454   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1455   PetscFunctionReturn(0);
1456 }
1457 
1458 #undef __FUNCT__
1459 #define __FUNCT__ "MatImaginaryPart_MPIDense"
1460 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1461 {
1462   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1463   PetscErrorCode ierr;
1464 
1465   PetscFunctionBegin;
1466   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1467   PetscFunctionReturn(0);
1468 }
1469 
1470 /* -------------------------------------------------------------------*/
1471 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
1472        MatGetRow_MPIDense,
1473        MatRestoreRow_MPIDense,
1474        MatMult_MPIDense,
1475 /* 4*/ MatMultAdd_MPIDense,
1476        MatMultTranspose_MPIDense,
1477        MatMultTransposeAdd_MPIDense,
1478        0,
1479        0,
1480        0,
1481 /*10*/ 0,
1482        0,
1483        0,
1484        0,
1485        MatTranspose_MPIDense,
1486 /*15*/ MatGetInfo_MPIDense,
1487        MatEqual_MPIDense,
1488        MatGetDiagonal_MPIDense,
1489        MatDiagonalScale_MPIDense,
1490        MatNorm_MPIDense,
1491 /*20*/ MatAssemblyBegin_MPIDense,
1492        MatAssemblyEnd_MPIDense,
1493        MatSetOption_MPIDense,
1494        MatZeroEntries_MPIDense,
1495 /*24*/ MatZeroRows_MPIDense,
1496        0,
1497        0,
1498        0,
1499        0,
1500 /*29*/ MatSetUpPreallocation_MPIDense,
1501        0,
1502        0,
1503        MatGetArray_MPIDense,
1504        MatRestoreArray_MPIDense,
1505 /*34*/ MatDuplicate_MPIDense,
1506        0,
1507        0,
1508        0,
1509        0,
1510 /*39*/ MatAXPY_MPIDense,
1511        MatGetSubMatrices_MPIDense,
1512        0,
1513        MatGetValues_MPIDense,
1514        0,
1515 /*44*/ 0,
1516        MatScale_MPIDense,
1517        0,
1518        0,
1519        0,
1520 /*49*/ 0,
1521        0,
1522        0,
1523        0,
1524        0,
1525 /*54*/ 0,
1526        0,
1527        0,
1528        0,
1529        0,
1530 /*59*/ MatGetSubMatrix_MPIDense,
1531        MatDestroy_MPIDense,
1532        MatView_MPIDense,
1533        0,
1534        0,
1535 /*64*/ 0,
1536        0,
1537        0,
1538        0,
1539        0,
1540 /*69*/ 0,
1541        0,
1542        0,
1543        0,
1544        0,
1545 /*74*/ 0,
1546        0,
1547        0,
1548        0,
1549        0,
1550 /*79*/ 0,
1551        0,
1552        0,
1553        0,
1554 /*83*/ MatLoad_MPIDense,
1555        0,
1556        0,
1557        0,
1558        0,
1559        0,
1560 /*89*/
1561 #if defined(PETSC_HAVE_PLAPACK)
1562        MatMatMult_MPIDense_MPIDense,
1563        MatMatMultSymbolic_MPIDense_MPIDense,
1564        MatMatMultNumeric_MPIDense_MPIDense,
1565 #else
1566        0,
1567        0,
1568        0,
1569 #endif
1570        0,
1571        0,
1572 /*94*/ 0,
1573        0,
1574        0,
1575        0,
1576        0,
1577 /*99*/ 0,
1578        0,
1579        0,
1580        MatConjugate_MPIDense,
1581        0,
1582 /*104*/0,
1583        MatRealPart_MPIDense,
1584        MatImaginaryPart_MPIDense,
1585        0
1586 };
1587 
1588 EXTERN_C_BEGIN
1589 #undef __FUNCT__
1590 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense"
1591 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1592 {
1593   Mat_MPIDense   *a;
1594   PetscErrorCode ierr;
1595 
1596   PetscFunctionBegin;
1597   mat->preallocated = PETSC_TRUE;
1598   /* Note:  For now, when data is specified above, this assumes the user correctly
1599    allocates the local dense storage space.  We should add error checking. */
1600 
1601   a    = (Mat_MPIDense*)mat->data;
1602   ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1603   ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr);
1604   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
1605   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1606   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1607   PetscFunctionReturn(0);
1608 }
1609 EXTERN_C_END
1610 
1611 /*MC
1612    MAT_SOLVER_PLAPACK = "mpidense" - Parallel LU and Cholesky factorization for MATMPIDENSE matrices
1613 
1614   run ./configure with the option --download-plapack
1615 
1616 
1617   Options Database Keys:
1618 . -mat_plapack_nprows <n> - number of rows in processor partition
1619 . -mat_plapack_npcols <n> - number of columns in processor partition
1620 . -mat_plapack_nb <n> - block size of template vector
1621 . -mat_plapack_nb_alg <n> - algorithmic block size
1622 - -mat_plapack_ckerror <n> - error checking flag
1623 
1624    Level: intermediate
1625 
1626 .seealso: MatCreateMPIDense(), MATDENSE, MATSEQDENSE, PCFactorSetSolverPackage(), MatSolverPackage
1627 
1628 M*/
1629 
1630 EXTERN_C_BEGIN
1631 #undef __FUNCT__
1632 #define __FUNCT__ "MatCreate_MPIDense"
1633 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIDense(Mat mat)
1634 {
1635   Mat_MPIDense   *a;
1636   PetscErrorCode ierr;
1637 
1638   PetscFunctionBegin;
1639   ierr              = PetscNewLog(mat,Mat_MPIDense,&a);CHKERRQ(ierr);
1640   mat->data         = (void*)a;
1641   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1642   mat->mapping      = 0;
1643 
1644   mat->insertmode = NOT_SET_VALUES;
1645   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&a->rank);CHKERRQ(ierr);
1646   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&a->size);CHKERRQ(ierr);
1647 
1648   ierr = PetscLayoutSetBlockSize(mat->rmap,1);CHKERRQ(ierr);
1649   ierr = PetscLayoutSetBlockSize(mat->cmap,1);CHKERRQ(ierr);
1650   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
1651   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
1652   a->nvec = mat->cmap->n;
1653 
1654   /* build cache for off array entries formed */
1655   a->donotstash = PETSC_FALSE;
1656   ierr = MatStashCreate_Private(((PetscObject)mat)->comm,1,&mat->stash);CHKERRQ(ierr);
1657 
1658   /* stuff used for matrix vector multiply */
1659   a->lvec        = 0;
1660   a->Mvctx       = 0;
1661   a->roworiented = PETSC_TRUE;
1662 
1663   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
1664                                      "MatGetDiagonalBlock_MPIDense",
1665                                      MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr);
1666   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C",
1667                                      "MatMPIDenseSetPreallocation_MPIDense",
1668                                      MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1669   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",
1670                                      "MatMatMult_MPIAIJ_MPIDense",
1671                                       MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1672   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",
1673                                      "MatMatMultSymbolic_MPIAIJ_MPIDense",
1674                                       MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1675   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",
1676                                      "MatMatMultNumeric_MPIAIJ_MPIDense",
1677                                       MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1678   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetFactor_petsc_C",
1679                                      "MatGetFactor_mpidense_petsc",
1680                                       MatGetFactor_mpidense_petsc);CHKERRQ(ierr);
1681 #if defined(PETSC_HAVE_PLAPACK)
1682   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetFactor_plapack_C",
1683                                      "MatGetFactor_mpidense_plapack",
1684                                       MatGetFactor_mpidense_plapack);CHKERRQ(ierr);
1685   ierr = PetscPLAPACKInitializePackage(((PetscObject)mat)->comm);CHKERRQ(ierr);
1686 #endif
1687   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1688 
1689   PetscFunctionReturn(0);
1690 }
1691 EXTERN_C_END
1692 
1693 /*MC
1694    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1695 
1696    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1697    and MATMPIDENSE otherwise.
1698 
1699    Options Database Keys:
1700 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1701 
1702   Level: beginner
1703 
1704 
1705 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1706 M*/
1707 
1708 EXTERN_C_BEGIN
1709 #undef __FUNCT__
1710 #define __FUNCT__ "MatCreate_Dense"
1711 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_Dense(Mat A)
1712 {
1713   PetscErrorCode ierr;
1714   PetscMPIInt    size;
1715 
1716   PetscFunctionBegin;
1717   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
1718   if (size == 1) {
1719     ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr);
1720   } else {
1721     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
1722   }
1723   PetscFunctionReturn(0);
1724 }
1725 EXTERN_C_END
1726 
1727 #undef __FUNCT__
1728 #define __FUNCT__ "MatMPIDenseSetPreallocation"
1729 /*@C
1730    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1731 
1732    Not collective
1733 
1734    Input Parameters:
1735 .  A - the matrix
1736 -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
1737    to control all matrix memory allocation.
1738 
1739    Notes:
1740    The dense format is fully compatible with standard Fortran 77
1741    storage by columns.
1742 
1743    The data input variable is intended primarily for Fortran programmers
1744    who wish to allocate their own matrix memory space.  Most users should
1745    set data=PETSC_NULL.
1746 
1747    Level: intermediate
1748 
1749 .keywords: matrix,dense, parallel
1750 
1751 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1752 @*/
1753 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data)
1754 {
1755   PetscErrorCode ierr,(*f)(Mat,PetscScalar *);
1756 
1757   PetscFunctionBegin;
1758   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1759   if (f) {
1760     ierr = (*f)(mat,data);CHKERRQ(ierr);
1761   }
1762   PetscFunctionReturn(0);
1763 }
1764 
1765 #undef __FUNCT__
1766 #define __FUNCT__ "MatCreateMPIDense"
1767 /*@C
1768    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
1769 
1770    Collective on MPI_Comm
1771 
1772    Input Parameters:
1773 +  comm - MPI communicator
1774 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1775 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1776 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1777 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1778 -  data - optional location of matrix data.  Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1779    to control all matrix memory allocation.
1780 
1781    Output Parameter:
1782 .  A - the matrix
1783 
1784    Notes:
1785    The dense format is fully compatible with standard Fortran 77
1786    storage by columns.
1787 
1788    The data input variable is intended primarily for Fortran programmers
1789    who wish to allocate their own matrix memory space.  Most users should
1790    set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users).
1791 
1792    The user MUST specify either the local or global matrix dimensions
1793    (possibly both).
1794 
1795    Level: intermediate
1796 
1797 .keywords: matrix,dense, parallel
1798 
1799 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1800 @*/
1801 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1802 {
1803   PetscErrorCode ierr;
1804   PetscMPIInt    size;
1805 
1806   PetscFunctionBegin;
1807   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1808   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1809   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1810   if (size > 1) {
1811     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1812     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1813   } else {
1814     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1815     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1816   }
1817   PetscFunctionReturn(0);
1818 }
1819 
1820 #undef __FUNCT__
1821 #define __FUNCT__ "MatDuplicate_MPIDense"
1822 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1823 {
1824   Mat            mat;
1825   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
1826   PetscErrorCode ierr;
1827 
1828   PetscFunctionBegin;
1829   *newmat       = 0;
1830   ierr = MatCreate(((PetscObject)A)->comm,&mat);CHKERRQ(ierr);
1831   ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1832   ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1833   a                 = (Mat_MPIDense*)mat->data;
1834   ierr              = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1835   mat->factortype   = A->factortype;
1836   mat->assembled    = PETSC_TRUE;
1837   mat->preallocated = PETSC_TRUE;
1838 
1839   mat->rmap->rstart     = A->rmap->rstart;
1840   mat->rmap->rend       = A->rmap->rend;
1841   a->size              = oldmat->size;
1842   a->rank              = oldmat->rank;
1843   mat->insertmode      = NOT_SET_VALUES;
1844   a->nvec              = oldmat->nvec;
1845   a->donotstash        = oldmat->donotstash;
1846 
1847   ierr = PetscMemcpy(mat->rmap->range,A->rmap->range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
1848   ierr = PetscMemcpy(mat->cmap->range,A->cmap->range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
1849 
1850   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1851   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1852   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1853 
1854   *newmat = mat;
1855   PetscFunctionReturn(0);
1856 }
1857 
1858 #undef __FUNCT__
1859 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile"
1860 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N, const MatType type,Mat *newmat)
1861 {
1862   PetscErrorCode ierr;
1863   PetscMPIInt    rank,size;
1864   PetscInt       *rowners,i,m,nz,j;
1865   PetscScalar    *array,*vals,*vals_ptr;
1866   MPI_Status     status;
1867 
1868   PetscFunctionBegin;
1869   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1870   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1871 
1872   /* determine ownership of all rows */
1873   m          = M/size + ((M % size) > rank);
1874   ierr       = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1875   ierr       = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
1876   rowners[0] = 0;
1877   for (i=2; i<=size; i++) {
1878     rowners[i] += rowners[i-1];
1879   }
1880 
1881   ierr = MatCreate(comm,newmat);CHKERRQ(ierr);
1882   ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1883   ierr = MatSetType(*newmat,type);CHKERRQ(ierr);
1884   ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr);
1885   ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr);
1886 
1887   if (!rank) {
1888     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1889 
1890     /* read in my part of the matrix numerical values  */
1891     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1892 
1893     /* insert into matrix-by row (this is why cannot directly read into array */
1894     vals_ptr = vals;
1895     for (i=0; i<m; i++) {
1896       for (j=0; j<N; j++) {
1897         array[i + j*m] = *vals_ptr++;
1898       }
1899     }
1900 
1901     /* read in other processors and ship out */
1902     for (i=1; i<size; i++) {
1903       nz   = (rowners[i+1] - rowners[i])*N;
1904       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1905       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(*newmat))->tag,comm);CHKERRQ(ierr);
1906     }
1907   } else {
1908     /* receive numeric values */
1909     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1910 
1911     /* receive message of values*/
1912     ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(*newmat))->tag,comm,&status);CHKERRQ(ierr);
1913 
1914     /* insert into matrix-by row (this is why cannot directly read into array */
1915     vals_ptr = vals;
1916     for (i=0; i<m; i++) {
1917       for (j=0; j<N; j++) {
1918         array[i + j*m] = *vals_ptr++;
1919       }
1920     }
1921   }
1922   ierr = PetscFree(rowners);CHKERRQ(ierr);
1923   ierr = PetscFree(vals);CHKERRQ(ierr);
1924   ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1925   ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1926   PetscFunctionReturn(0);
1927 }
1928 
1929 #undef __FUNCT__
1930 #define __FUNCT__ "MatLoad_MPIDense"
1931 PetscErrorCode MatLoad_MPIDense(PetscViewer viewer,const MatType type,Mat *newmat)
1932 {
1933   Mat            A;
1934   PetscScalar    *vals,*svals;
1935   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1936   MPI_Status     status;
1937   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz;
1938   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1939   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1940   PetscInt       i,nz,j,rstart,rend;
1941   int            fd;
1942   PetscErrorCode ierr;
1943 
1944   PetscFunctionBegin;
1945   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1946   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1947   if (!rank) {
1948     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1949     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1950     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1951   }
1952 
1953   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1954   M = header[1]; N = header[2]; nz = header[3];
1955 
1956   /*
1957        Handle case where matrix is stored on disk as a dense matrix
1958   */
1959   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1960     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,type,newmat);CHKERRQ(ierr);
1961     PetscFunctionReturn(0);
1962   }
1963 
1964   /* determine ownership of all rows */
1965   m          = PetscMPIIntCast(M/size + ((M % size) > rank));
1966   ierr       = PetscMalloc((size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr);
1967   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1968   rowners[0] = 0;
1969   for (i=2; i<=size; i++) {
1970     rowners[i] += rowners[i-1];
1971   }
1972   rstart = rowners[rank];
1973   rend   = rowners[rank+1];
1974 
1975   /* distribute row lengths to all processors */
1976   ierr    = PetscMalloc2(rend-rstart,PetscInt,&ourlens,rend-rstart,PetscInt,&offlens);CHKERRQ(ierr);
1977   if (!rank) {
1978     ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
1979     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1980     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
1981     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1982     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1983     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1984   } else {
1985     ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1986   }
1987 
1988   if (!rank) {
1989     /* calculate the number of nonzeros on each processor */
1990     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
1991     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1992     for (i=0; i<size; i++) {
1993       for (j=rowners[i]; j< rowners[i+1]; j++) {
1994         procsnz[i] += rowlengths[j];
1995       }
1996     }
1997     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1998 
1999     /* determine max buffer needed and allocate it */
2000     maxnz = 0;
2001     for (i=0; i<size; i++) {
2002       maxnz = PetscMax(maxnz,procsnz[i]);
2003     }
2004     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2005 
2006     /* read in my part of the matrix column indices  */
2007     nz = procsnz[0];
2008     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2009     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2010 
2011     /* read in every one elses and ship off */
2012     for (i=1; i<size; i++) {
2013       nz   = procsnz[i];
2014       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2015       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2016     }
2017     ierr = PetscFree(cols);CHKERRQ(ierr);
2018   } else {
2019     /* determine buffer space needed for message */
2020     nz = 0;
2021     for (i=0; i<m; i++) {
2022       nz += ourlens[i];
2023     }
2024     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2025 
2026     /* receive message of column indices*/
2027     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2028     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2029     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2030   }
2031 
2032   /* loop over local rows, determining number of off diagonal entries */
2033   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2034   jj = 0;
2035   for (i=0; i<m; i++) {
2036     for (j=0; j<ourlens[i]; j++) {
2037       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
2038       jj++;
2039     }
2040   }
2041 
2042   /* create our matrix */
2043   for (i=0; i<m; i++) {
2044     ourlens[i] -= offlens[i];
2045   }
2046   ierr = MatCreate(comm,newmat);CHKERRQ(ierr);
2047   ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
2048   ierr = MatSetType(*newmat,type);CHKERRQ(ierr);
2049   ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr);
2050   A = *newmat;
2051   for (i=0; i<m; i++) {
2052     ourlens[i] += offlens[i];
2053   }
2054 
2055   if (!rank) {
2056     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2057 
2058     /* read in my part of the matrix numerical values  */
2059     nz = procsnz[0];
2060     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2061 
2062     /* insert into matrix */
2063     jj      = rstart;
2064     smycols = mycols;
2065     svals   = vals;
2066     for (i=0; i<m; i++) {
2067       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2068       smycols += ourlens[i];
2069       svals   += ourlens[i];
2070       jj++;
2071     }
2072 
2073     /* read in other processors and ship out */
2074     for (i=1; i<size; i++) {
2075       nz   = procsnz[i];
2076       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2077       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
2078     }
2079     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2080   } else {
2081     /* receive numeric values */
2082     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2083 
2084     /* receive message of values*/
2085     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
2086     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2087     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2088 
2089     /* insert into matrix */
2090     jj      = rstart;
2091     smycols = mycols;
2092     svals   = vals;
2093     for (i=0; i<m; i++) {
2094       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2095       smycols += ourlens[i];
2096       svals   += ourlens[i];
2097       jj++;
2098     }
2099   }
2100   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2101   ierr = PetscFree(vals);CHKERRQ(ierr);
2102   ierr = PetscFree(mycols);CHKERRQ(ierr);
2103   ierr = PetscFree(rowners);CHKERRQ(ierr);
2104 
2105   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2106   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2107   PetscFunctionReturn(0);
2108 }
2109 
2110 #undef __FUNCT__
2111 #define __FUNCT__ "MatEqual_MPIDense"
2112 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscTruth *flag)
2113 {
2114   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
2115   Mat            a,b;
2116   PetscTruth     flg;
2117   PetscErrorCode ierr;
2118 
2119   PetscFunctionBegin;
2120   a = matA->A;
2121   b = matB->A;
2122   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
2123   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
2124   PetscFunctionReturn(0);
2125 }
2126 
2127 #if defined(PETSC_HAVE_PLAPACK)
2128 
2129 #undef __FUNCT__
2130 #define __FUNCT__ "PetscPLAPACKFinalizePackage"
2131 /*@C
2132   PetscPLAPACKFinalizePackage - This function destroys everything in the Petsc interface to PLAPACK.
2133   Level: developer
2134 
2135 .keywords: Petsc, destroy, package, PLAPACK
2136 .seealso: PetscFinalize()
2137 @*/
2138 PetscErrorCode PETSC_DLLEXPORT PetscPLAPACKFinalizePackage(void)
2139 {
2140   PetscErrorCode ierr;
2141 
2142   PetscFunctionBegin;
2143   ierr = PLA_Finalize();CHKERRQ(ierr);
2144   PetscFunctionReturn(0);
2145 }
2146 
2147 #undef __FUNCT__
2148 #define __FUNCT__ "PetscPLAPACKInitializePackage"
2149 /*@C
2150   PetscPLAPACKInitializePackage - This function initializes everything in the Petsc interface to PLAPACK. It is
2151   called from MatCreate_MPIDense() the first time an MPI dense matrix is called.
2152 
2153   Input Parameter:
2154 .   comm - the communicator the matrix lives on
2155 
2156   Level: developer
2157 
2158    Notes: PLAPACK does not have a good fit with MPI communicators; all (parallel) PLAPACK objects have to live in the
2159   same communicator (because there is some global state that is initialized and used for all matrices). In addition if
2160   PLAPACK is initialized (that is the initial matrices created) are on subcommunicators of MPI_COMM_WORLD, these subcommunicators
2161   cannot overlap.
2162 
2163 .keywords: Petsc, initialize, package, PLAPACK
2164 .seealso: PetscInitializePackage(), PetscInitialize()
2165 @*/
2166 PetscErrorCode PETSC_DLLEXPORT PetscPLAPACKInitializePackage(MPI_Comm comm)
2167 {
2168   PetscMPIInt    size;
2169   PetscErrorCode ierr;
2170 
2171   PetscFunctionBegin;
2172   if (!PLA_Initialized(PETSC_NULL)) {
2173 
2174     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2175     Plapack_nprows = 1;
2176     Plapack_npcols = size;
2177 
2178     ierr = PetscOptionsBegin(comm,PETSC_NULL,"PLAPACK Options","Mat");CHKERRQ(ierr);
2179       ierr = PetscOptionsInt("-mat_plapack_nprows","row dimension of 2D processor mesh","None",Plapack_nprows,&Plapack_nprows,PETSC_NULL);CHKERRQ(ierr);
2180       ierr = PetscOptionsInt("-mat_plapack_npcols","column dimension of 2D processor mesh","None",Plapack_npcols,&Plapack_npcols,PETSC_NULL);CHKERRQ(ierr);
2181 #if defined(PETSC_USE_DEBUG)
2182       Plapack_ierror = 3;
2183 #else
2184       Plapack_ierror = 0;
2185 #endif
2186       ierr = PetscOptionsInt("-mat_plapack_ckerror","error checking flag","None",Plapack_ierror,&Plapack_ierror,PETSC_NULL);CHKERRQ(ierr);
2187       if (Plapack_ierror){
2188 	ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_TRUE,PETSC_TRUE,PETSC_FALSE );CHKERRQ(ierr);
2189       } else {
2190 	ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_FALSE,PETSC_FALSE,PETSC_FALSE );CHKERRQ(ierr);
2191       }
2192 
2193       Plapack_nb_alg = 0;
2194       ierr = PetscOptionsInt("-mat_plapack_nb_alg","algorithmic block size","None",Plapack_nb_alg,&Plapack_nb_alg,PETSC_NULL);CHKERRQ(ierr);
2195       if (Plapack_nb_alg) {
2196         ierr = pla_Environ_set_nb_alg (PLA_OP_ALL_ALG,Plapack_nb_alg);CHKERRQ(ierr);
2197       }
2198     PetscOptionsEnd();
2199 
2200     ierr = PLA_Comm_1D_to_2D(comm,Plapack_nprows,Plapack_npcols,&Plapack_comm_2d);CHKERRQ(ierr);
2201     ierr = PLA_Init(Plapack_comm_2d);CHKERRQ(ierr);
2202     ierr = PetscRegisterFinalize(PetscPLAPACKFinalizePackage);CHKERRQ(ierr);
2203   }
2204   PetscFunctionReturn(0);
2205 }
2206 
2207 #endif
2208