xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision acfcf0e5ba359b58e6a8a7af3f239cd7334278e8)
1 #define PETSCMAT_DLL
2 
3 /*
4    Basic functions for basic parallel dense matrices.
5 */
6 
7 
8 #include "../src/mat/impls/dense/mpi/mpidense.h"    /*I   "petscmat.h"  I*/
9 #if defined(PETSC_HAVE_PLAPACK)
10 static PetscMPIInt Plapack_nprows,Plapack_npcols,Plapack_ierror,Plapack_nb_alg;
11 static MPI_Comm Plapack_comm_2d;
12 EXTERN_C_BEGIN
13 #include "PLA.h"
14 EXTERN_C_END
15 
16 typedef struct {
17   PLA_Obj        A,pivots;
18   PLA_Template   templ;
19   MPI_Datatype   datatype;
20   PetscInt       nb,rstart;
21   VecScatter     ctx;
22   IS             is_pla,is_petsc;
23   PetscBool      pla_solved;
24   MatStructure   mstruct;
25 } Mat_Plapack;
26 #endif
27 
28 #undef __FUNCT__
29 #define __FUNCT__ "MatDenseGetLocalMatrix"
30 /*@
31 
32       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
33               matrix that represents the operator. For sequential matrices it returns itself.
34 
35     Input Parameter:
36 .      A - the Seq or MPI dense matrix
37 
38     Output Parameter:
39 .      B - the inner matrix
40 
41     Level: intermediate
42 
43 @*/
44 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
45 {
46   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
47   PetscErrorCode ierr;
48   PetscBool      flg;
49 
50   PetscFunctionBegin;
51   ierr = PetscTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
52   if (flg) {
53     *B = mat->A;
54   } else {
55     *B = A;
56   }
57   PetscFunctionReturn(0);
58 }
59 
60 #undef __FUNCT__
61 #define __FUNCT__ "MatGetRow_MPIDense"
62 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
63 {
64   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
65   PetscErrorCode ierr;
66   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
67 
68   PetscFunctionBegin;
69   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
70   lrow = row - rstart;
71   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr);
72   PetscFunctionReturn(0);
73 }
74 
75 #undef __FUNCT__
76 #define __FUNCT__ "MatRestoreRow_MPIDense"
77 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
78 {
79   PetscErrorCode ierr;
80 
81   PetscFunctionBegin;
82   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
83   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
84   PetscFunctionReturn(0);
85 }
86 
87 EXTERN_C_BEGIN
88 #undef __FUNCT__
89 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense"
90 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIDense(Mat A,PetscBool  *iscopy,MatReuse reuse,Mat *B)
91 {
92   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
93   PetscErrorCode ierr;
94   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
95   PetscScalar    *array;
96   MPI_Comm       comm;
97 
98   PetscFunctionBegin;
99   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
100 
101   /* The reuse aspect is not implemented efficiently */
102   if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);}
103 
104   ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
105   ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr);
106   ierr = MatCreate(comm,B);CHKERRQ(ierr);
107   ierr = MatSetSizes(*B,m,m,m,m);CHKERRQ(ierr);
108   ierr = MatSetType(*B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
109   ierr = MatSeqDenseSetPreallocation(*B,array+m*rstart);CHKERRQ(ierr);
110   ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr);
111   ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
112   ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
113 
114   *iscopy = PETSC_TRUE;
115   PetscFunctionReturn(0);
116 }
117 EXTERN_C_END
118 
119 #undef __FUNCT__
120 #define __FUNCT__ "MatSetValues_MPIDense"
121 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
122 {
123   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
124   PetscErrorCode ierr;
125   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
126   PetscBool      roworiented = A->roworiented;
127 
128   PetscFunctionBegin;
129   if (v) PetscValidScalarPointer(v,6);
130   for (i=0; i<m; i++) {
131     if (idxm[i] < 0) continue;
132     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
133     if (idxm[i] >= rstart && idxm[i] < rend) {
134       row = idxm[i] - rstart;
135       if (roworiented) {
136         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
137       } else {
138         for (j=0; j<n; j++) {
139           if (idxn[j] < 0) continue;
140           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
141           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
142         }
143       }
144     } else {
145       if (!A->donotstash) {
146         if (roworiented) {
147           ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
148         } else {
149           ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
150         }
151       }
152     }
153   }
154   PetscFunctionReturn(0);
155 }
156 
157 #undef __FUNCT__
158 #define __FUNCT__ "MatGetValues_MPIDense"
159 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
160 {
161   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
162   PetscErrorCode ierr;
163   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
164 
165   PetscFunctionBegin;
166   for (i=0; i<m; i++) {
167     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
168     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
169     if (idxm[i] >= rstart && idxm[i] < rend) {
170       row = idxm[i] - rstart;
171       for (j=0; j<n; j++) {
172         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
173         if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
174         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
175       }
176     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
177   }
178   PetscFunctionReturn(0);
179 }
180 
181 #undef __FUNCT__
182 #define __FUNCT__ "MatGetArray_MPIDense"
183 PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[])
184 {
185   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
186   PetscErrorCode ierr;
187 
188   PetscFunctionBegin;
189   ierr = MatGetArray(a->A,array);CHKERRQ(ierr);
190   PetscFunctionReturn(0);
191 }
192 
193 #undef __FUNCT__
194 #define __FUNCT__ "MatGetSubMatrix_MPIDense"
195 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
196 {
197   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data,*newmatd;
198   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
199   PetscErrorCode ierr;
200   PetscInt       i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
201   const PetscInt *irow,*icol;
202   PetscScalar    *av,*bv,*v = lmat->v;
203   Mat            newmat;
204   IS             iscol_local;
205 
206   PetscFunctionBegin;
207   ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
208   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
209   ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr);
210   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
211   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
212   ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */
213 
214   /* No parallel redistribution currently supported! Should really check each index set
215      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
216      original matrix! */
217 
218   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
219   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
220 
221   /* Check submatrix call */
222   if (scall == MAT_REUSE_MATRIX) {
223     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
224     /* Really need to test rows and column sizes! */
225     newmat = *B;
226   } else {
227     /* Create and fill new matrix */
228     ierr = MatCreate(((PetscObject)A)->comm,&newmat);CHKERRQ(ierr);
229     ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
230     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
231     ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr);
232   }
233 
234   /* Now extract the data pointers and do the copy, column at a time */
235   newmatd = (Mat_MPIDense*)newmat->data;
236   bv      = ((Mat_SeqDense *)newmatd->A->data)->v;
237 
238   for (i=0; i<Ncols; i++) {
239     av = v + ((Mat_SeqDense *)mat->A->data)->lda*icol[i];
240     for (j=0; j<nrows; j++) {
241       *bv++ = av[irow[j] - rstart];
242     }
243   }
244 
245   /* Assemble the matrices so that the correct flags are set */
246   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
247   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
248 
249   /* Free work space */
250   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
251   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
252   *B = newmat;
253   PetscFunctionReturn(0);
254 }
255 
256 #undef __FUNCT__
257 #define __FUNCT__ "MatRestoreArray_MPIDense"
258 PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[])
259 {
260   PetscFunctionBegin;
261   PetscFunctionReturn(0);
262 }
263 
264 #undef __FUNCT__
265 #define __FUNCT__ "MatAssemblyBegin_MPIDense"
266 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
267 {
268   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
269   MPI_Comm       comm = ((PetscObject)mat)->comm;
270   PetscErrorCode ierr;
271   PetscInt       nstash,reallocs;
272   InsertMode     addv;
273 
274   PetscFunctionBegin;
275   /* make sure all processors are either in INSERTMODE or ADDMODE */
276   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
277   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
278   mat->insertmode = addv; /* in case this processor had no cache */
279 
280   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
281   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
282   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
283   PetscFunctionReturn(0);
284 }
285 
286 #undef __FUNCT__
287 #define __FUNCT__ "MatAssemblyEnd_MPIDense"
288 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
289 {
290   Mat_MPIDense    *mdn=(Mat_MPIDense*)mat->data;
291   PetscErrorCode  ierr;
292   PetscInt        i,*row,*col,flg,j,rstart,ncols;
293   PetscMPIInt     n;
294   PetscScalar     *val;
295   InsertMode      addv=mat->insertmode;
296 
297   PetscFunctionBegin;
298   /*  wait on receives */
299   while (1) {
300     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
301     if (!flg) break;
302 
303     for (i=0; i<n;) {
304       /* Now identify the consecutive vals belonging to the same row */
305       for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
306       if (j < n) ncols = j-i;
307       else       ncols = n-i;
308       /* Now assemble all these values with a single function call */
309       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
310       i = j;
311     }
312   }
313   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
314 
315   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
316   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
317 
318   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
319     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
320   }
321   PetscFunctionReturn(0);
322 }
323 
324 #undef __FUNCT__
325 #define __FUNCT__ "MatZeroEntries_MPIDense"
326 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
327 {
328   PetscErrorCode ierr;
329   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
330 
331   PetscFunctionBegin;
332   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
333   PetscFunctionReturn(0);
334 }
335 
336 /* the code does not do the diagonal entries correctly unless the
337    matrix is square and the column and row owerships are identical.
338    This is a BUG. The only way to fix it seems to be to access
339    mdn->A and mdn->B directly and not through the MatZeroRows()
340    routine.
341 */
342 #undef __FUNCT__
343 #define __FUNCT__ "MatZeroRows_MPIDense"
344 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
345 {
346   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
347   PetscErrorCode ierr;
348   PetscInt       i,*owners = A->rmap->range;
349   PetscInt       *nprocs,j,idx,nsends;
350   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
351   PetscInt       *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source;
352   PetscInt       *lens,*lrows,*values;
353   PetscMPIInt    n,imdex,rank = l->rank,size = l->size;
354   MPI_Comm       comm = ((PetscObject)A)->comm;
355   MPI_Request    *send_waits,*recv_waits;
356   MPI_Status     recv_status,*send_status;
357   PetscBool      found;
358 
359   PetscFunctionBegin;
360   /*  first count number of contributors to each processor */
361   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
362   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
363   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
364   for (i=0; i<N; i++) {
365     idx = rows[i];
366     found = PETSC_FALSE;
367     for (j=0; j<size; j++) {
368       if (idx >= owners[j] && idx < owners[j+1]) {
369         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
370       }
371     }
372     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
373   }
374   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
375 
376   /* inform other processors of number of messages and max length*/
377   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
378 
379   /* post receives:   */
380   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
381   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
382   for (i=0; i<nrecvs; i++) {
383     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
384   }
385 
386   /* do sends:
387       1) starts[i] gives the starting index in svalues for stuff going to
388          the ith processor
389   */
390   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
391   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
392   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
393   starts[0]  = 0;
394   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
395   for (i=0; i<N; i++) {
396     svalues[starts[owner[i]]++] = rows[i];
397   }
398 
399   starts[0] = 0;
400   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
401   count = 0;
402   for (i=0; i<size; i++) {
403     if (nprocs[2*i+1]) {
404       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
405     }
406   }
407   ierr = PetscFree(starts);CHKERRQ(ierr);
408 
409   base = owners[rank];
410 
411   /*  wait on receives */
412   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
413   count  = nrecvs;
414   slen   = 0;
415   while (count) {
416     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
417     /* unpack receives into our local space */
418     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
419     source[imdex]  = recv_status.MPI_SOURCE;
420     lens[imdex]    = n;
421     slen += n;
422     count--;
423   }
424   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
425 
426   /* move the data into the send scatter */
427   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
428   count = 0;
429   for (i=0; i<nrecvs; i++) {
430     values = rvalues + i*nmax;
431     for (j=0; j<lens[i]; j++) {
432       lrows[count++] = values[j] - base;
433     }
434   }
435   ierr = PetscFree(rvalues);CHKERRQ(ierr);
436   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
437   ierr = PetscFree(owner);CHKERRQ(ierr);
438   ierr = PetscFree(nprocs);CHKERRQ(ierr);
439 
440   /* actually zap the local rows */
441   ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
442   ierr = PetscFree(lrows);CHKERRQ(ierr);
443 
444   /* wait on sends */
445   if (nsends) {
446     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
447     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
448     ierr = PetscFree(send_status);CHKERRQ(ierr);
449   }
450   ierr = PetscFree(send_waits);CHKERRQ(ierr);
451   ierr = PetscFree(svalues);CHKERRQ(ierr);
452 
453   PetscFunctionReturn(0);
454 }
455 
456 #undef __FUNCT__
457 #define __FUNCT__ "MatMult_MPIDense"
458 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
459 {
460   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
461   PetscErrorCode ierr;
462 
463   PetscFunctionBegin;
464   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
465   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
466   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
467   PetscFunctionReturn(0);
468 }
469 
470 #undef __FUNCT__
471 #define __FUNCT__ "MatMultAdd_MPIDense"
472 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
473 {
474   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
475   PetscErrorCode ierr;
476 
477   PetscFunctionBegin;
478   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
479   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
480   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
481   PetscFunctionReturn(0);
482 }
483 
484 #undef __FUNCT__
485 #define __FUNCT__ "MatMultTranspose_MPIDense"
486 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
487 {
488   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
489   PetscErrorCode ierr;
490   PetscScalar    zero = 0.0;
491 
492   PetscFunctionBegin;
493   ierr = VecSet(yy,zero);CHKERRQ(ierr);
494   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
495   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
496   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
497   PetscFunctionReturn(0);
498 }
499 
500 #undef __FUNCT__
501 #define __FUNCT__ "MatMultTransposeAdd_MPIDense"
502 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
503 {
504   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
505   PetscErrorCode ierr;
506 
507   PetscFunctionBegin;
508   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
509   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
510   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
511   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
512   PetscFunctionReturn(0);
513 }
514 
515 #undef __FUNCT__
516 #define __FUNCT__ "MatGetDiagonal_MPIDense"
517 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
518 {
519   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
520   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
521   PetscErrorCode ierr;
522   PetscInt       len,i,n,m = A->rmap->n,radd;
523   PetscScalar    *x,zero = 0.0;
524 
525   PetscFunctionBegin;
526   ierr = VecSet(v,zero);CHKERRQ(ierr);
527   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
528   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
529   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
530   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
531   radd = A->rmap->rstart*m;
532   for (i=0; i<len; i++) {
533     x[i] = aloc->v[radd + i*m + i];
534   }
535   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
536   PetscFunctionReturn(0);
537 }
538 
539 #undef __FUNCT__
540 #define __FUNCT__ "MatDestroy_MPIDense"
541 PetscErrorCode MatDestroy_MPIDense(Mat mat)
542 {
543   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
544   PetscErrorCode ierr;
545 #if defined(PETSC_HAVE_PLAPACK)
546   Mat_Plapack   *lu=(Mat_Plapack*)(mat->spptr);
547 #endif
548 
549   PetscFunctionBegin;
550 
551 #if defined(PETSC_USE_LOG)
552   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
553 #endif
554   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
555   ierr = MatDestroy(mdn->A);CHKERRQ(ierr);
556   if (mdn->lvec)   {ierr = VecDestroy(mdn->lvec);CHKERRQ(ierr);}
557   if (mdn->Mvctx)  {ierr = VecScatterDestroy(mdn->Mvctx);CHKERRQ(ierr);}
558 #if defined(PETSC_HAVE_PLAPACK)
559   if (lu) {
560     ierr = PLA_Obj_free(&lu->A);CHKERRQ(ierr);
561     ierr = PLA_Obj_free (&lu->pivots);CHKERRQ(ierr);
562     ierr = PLA_Temp_free(&lu->templ);CHKERRQ(ierr);
563 
564     if (lu->is_pla) {
565       ierr = ISDestroy(lu->is_pla);CHKERRQ(ierr);
566       ierr = ISDestroy(lu->is_petsc);CHKERRQ(ierr);
567       ierr = VecScatterDestroy(lu->ctx);CHKERRQ(ierr);
568     }
569   }
570 #endif
571 
572   ierr = PetscFree(mdn);CHKERRQ(ierr);
573   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
574   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
575   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
576   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
577   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
578   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
579   PetscFunctionReturn(0);
580 }
581 
582 #undef __FUNCT__
583 #define __FUNCT__ "MatView_MPIDense_Binary"
584 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
585 {
586   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
587   PetscErrorCode    ierr;
588   PetscViewerFormat format;
589   int               fd;
590   PetscInt          header[4],mmax,N = mat->cmap->N,i,j,m,k;
591   PetscMPIInt       rank,tag  = ((PetscObject)viewer)->tag,size;
592   PetscScalar       *work,*v,*vv;
593   Mat_SeqDense      *a = (Mat_SeqDense*)mdn->A->data;
594   MPI_Status        status;
595 
596   PetscFunctionBegin;
597   if (mdn->size == 1) {
598     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
599   } else {
600     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
601     ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
602     ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
603 
604     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
605     if (format == PETSC_VIEWER_NATIVE) {
606 
607       if (!rank) {
608         /* store the matrix as a dense matrix */
609         header[0] = MAT_FILE_CLASSID;
610         header[1] = mat->rmap->N;
611         header[2] = N;
612         header[3] = MATRIX_BINARY_FORMAT_DENSE;
613         ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
614 
615         /* get largest work array needed for transposing array */
616         mmax = mat->rmap->n;
617         for (i=1; i<size; i++) {
618           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
619         }
620         ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&work);CHKERRQ(ierr);
621 
622         /* write out local array, by rows */
623         m    = mat->rmap->n;
624         v    = a->v;
625         for (j=0; j<N; j++) {
626           for (i=0; i<m; i++) {
627             work[j + i*N] = *v++;
628           }
629         }
630         ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
631         /* get largest work array to receive messages from other processes, excludes process zero */
632         mmax = 0;
633         for (i=1; i<size; i++) {
634           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
635         }
636         ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&vv);CHKERRQ(ierr);
637         for(k = 1; k < size; k++) {
638           v    = vv;
639           m    = mat->rmap->range[k+1] - mat->rmap->range[k];
640           ierr = MPI_Recv(v,m*N,MPIU_SCALAR,k,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
641 
642           for(j = 0; j < N; j++) {
643             for(i = 0; i < m; i++) {
644               work[j + i*N] = *v++;
645             }
646           }
647           ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
648         }
649         ierr = PetscFree(work);CHKERRQ(ierr);
650         ierr = PetscFree(vv);CHKERRQ(ierr);
651       } else {
652         ierr = MPI_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
653       }
654     } else {
655       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerSetFormat(viewer,PETSC_VIEWER_NATIVE)");
656     }
657   }
658   PetscFunctionReturn(0);
659 }
660 
661 #undef __FUNCT__
662 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket"
663 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
664 {
665   Mat_MPIDense          *mdn = (Mat_MPIDense*)mat->data;
666   PetscErrorCode        ierr;
667   PetscMPIInt           size = mdn->size,rank = mdn->rank;
668   const PetscViewerType vtype;
669   PetscBool             iascii,isdraw;
670   PetscViewer           sviewer;
671   PetscViewerFormat     format;
672 #if defined(PETSC_HAVE_PLAPACK)
673   Mat_Plapack           *lu;
674 #endif
675 
676   PetscFunctionBegin;
677   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
678   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
679   if (iascii) {
680     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
681     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
682     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
683       MatInfo info;
684       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
685       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,
686                    (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
687       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
688 #if defined(PETSC_HAVE_PLAPACK)
689       ierr = PetscViewerASCIIPrintf(viewer,"PLAPACK run parameters:\n");CHKERRQ(ierr);
690       ierr = PetscViewerASCIIPrintf(viewer,"  Processor mesh: nprows %d, npcols %d\n",Plapack_nprows, Plapack_npcols);CHKERRQ(ierr);
691       ierr = PetscViewerASCIIPrintf(viewer,"  Error checking: %d\n",Plapack_ierror);CHKERRQ(ierr);
692       ierr = PetscViewerASCIIPrintf(viewer,"  Algorithmic block size: %d\n",Plapack_nb_alg);CHKERRQ(ierr);
693       if (mat->factortype){
694         lu=(Mat_Plapack*)(mat->spptr);
695         ierr = PetscViewerASCIIPrintf(viewer,"  Distr. block size nb: %d \n",lu->nb);CHKERRQ(ierr);
696       }
697 #else
698       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
699 #endif
700       PetscFunctionReturn(0);
701     } else if (format == PETSC_VIEWER_ASCII_INFO) {
702       PetscFunctionReturn(0);
703     }
704   } else if (isdraw) {
705     PetscDraw  draw;
706     PetscBool  isnull;
707 
708     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
709     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
710     if (isnull) PetscFunctionReturn(0);
711   }
712 
713   if (size == 1) {
714     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
715   } else {
716     /* assemble the entire matrix onto first processor. */
717     Mat         A;
718     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
719     PetscInt    *cols;
720     PetscScalar *vals;
721 
722     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
723     if (!rank) {
724       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
725     } else {
726       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
727     }
728     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
729     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
730     ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL);
731     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
732 
733     /* Copy the matrix ... This isn't the most efficient means,
734        but it's quick for now */
735     A->insertmode = INSERT_VALUES;
736     row = mat->rmap->rstart; m = mdn->A->rmap->n;
737     for (i=0; i<m; i++) {
738       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
739       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
740       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
741       row++;
742     }
743 
744     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
745     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
746     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
747     if (!rank) {
748       ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
749       /* Set the type name to MATMPIDense so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqDense_ASCII()*/
750       PetscStrcpy(((PetscObject)((Mat_MPIDense*)(A->data))->A)->type_name,MATMPIDENSE);
751       ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
752     }
753     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
754     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
755     ierr = MatDestroy(A);CHKERRQ(ierr);
756   }
757   PetscFunctionReturn(0);
758 }
759 
760 #undef __FUNCT__
761 #define __FUNCT__ "MatView_MPIDense"
762 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
763 {
764   PetscErrorCode ierr;
765   PetscBool      iascii,isbinary,isdraw,issocket;
766 
767   PetscFunctionBegin;
768 
769   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
770   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
771   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
772   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
773 
774   if (iascii || issocket || isdraw) {
775     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
776   } else if (isbinary) {
777     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
778   } else {
779     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name);
780   }
781   PetscFunctionReturn(0);
782 }
783 
784 #undef __FUNCT__
785 #define __FUNCT__ "MatGetInfo_MPIDense"
786 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
787 {
788   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
789   Mat            mdn = mat->A;
790   PetscErrorCode ierr;
791   PetscReal      isend[5],irecv[5];
792 
793   PetscFunctionBegin;
794   info->block_size     = 1.0;
795   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
796   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
797   isend[3] = info->memory;  isend[4] = info->mallocs;
798   if (flag == MAT_LOCAL) {
799     info->nz_used      = isend[0];
800     info->nz_allocated = isend[1];
801     info->nz_unneeded  = isend[2];
802     info->memory       = isend[3];
803     info->mallocs      = isend[4];
804   } else if (flag == MAT_GLOBAL_MAX) {
805     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr);
806     info->nz_used      = irecv[0];
807     info->nz_allocated = irecv[1];
808     info->nz_unneeded  = irecv[2];
809     info->memory       = irecv[3];
810     info->mallocs      = irecv[4];
811   } else if (flag == MAT_GLOBAL_SUM) {
812     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
813     info->nz_used      = irecv[0];
814     info->nz_allocated = irecv[1];
815     info->nz_unneeded  = irecv[2];
816     info->memory       = irecv[3];
817     info->mallocs      = irecv[4];
818   }
819   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
820   info->fill_ratio_needed = 0;
821   info->factor_mallocs    = 0;
822   PetscFunctionReturn(0);
823 }
824 
825 #undef __FUNCT__
826 #define __FUNCT__ "MatSetOption_MPIDense"
827 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool  flg)
828 {
829   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
830   PetscErrorCode ierr;
831 
832   PetscFunctionBegin;
833   switch (op) {
834   case MAT_NEW_NONZERO_LOCATIONS:
835   case MAT_NEW_NONZERO_LOCATION_ERR:
836   case MAT_NEW_NONZERO_ALLOCATION_ERR:
837     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
838     break;
839   case MAT_ROW_ORIENTED:
840     a->roworiented = flg;
841     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
842     break;
843   case MAT_NEW_DIAGONALS:
844   case MAT_USE_HASH_TABLE:
845     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
846     break;
847   case MAT_IGNORE_OFF_PROC_ENTRIES:
848     a->donotstash = flg;
849     break;
850   case MAT_SYMMETRIC:
851   case MAT_STRUCTURALLY_SYMMETRIC:
852   case MAT_HERMITIAN:
853   case MAT_SYMMETRY_ETERNAL:
854   case MAT_IGNORE_LOWER_TRIANGULAR:
855     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
856     break;
857   default:
858     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
859   }
860   PetscFunctionReturn(0);
861 }
862 
863 
864 #undef __FUNCT__
865 #define __FUNCT__ "MatDiagonalScale_MPIDense"
866 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
867 {
868   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
869   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
870   PetscScalar    *l,*r,x,*v;
871   PetscErrorCode ierr;
872   PetscInt       i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n;
873 
874   PetscFunctionBegin;
875   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
876   if (ll) {
877     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
878     if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
879     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
880     for (i=0; i<m; i++) {
881       x = l[i];
882       v = mat->v + i;
883       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
884     }
885     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
886     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
887   }
888   if (rr) {
889     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
890     if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
891     ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
892     ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
893     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
894     for (i=0; i<n; i++) {
895       x = r[i];
896       v = mat->v + i*m;
897       for (j=0; j<m; j++) { (*v++) *= x;}
898     }
899     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
900     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
901   }
902   PetscFunctionReturn(0);
903 }
904 
905 #undef __FUNCT__
906 #define __FUNCT__ "MatNorm_MPIDense"
907 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
908 {
909   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
910   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
911   PetscErrorCode ierr;
912   PetscInt       i,j;
913   PetscReal      sum = 0.0;
914   PetscScalar    *v = mat->v;
915 
916   PetscFunctionBegin;
917   if (mdn->size == 1) {
918     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
919   } else {
920     if (type == NORM_FROBENIUS) {
921       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
922 #if defined(PETSC_USE_COMPLEX)
923         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
924 #else
925         sum += (*v)*(*v); v++;
926 #endif
927       }
928       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
929       *nrm = sqrt(*nrm);
930       ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr);
931     } else if (type == NORM_1) {
932       PetscReal *tmp,*tmp2;
933       ierr = PetscMalloc2(A->cmap->N,PetscReal,&tmp,A->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr);
934       ierr = PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
935       ierr = PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
936       *nrm = 0.0;
937       v = mat->v;
938       for (j=0; j<mdn->A->cmap->n; j++) {
939         for (i=0; i<mdn->A->rmap->n; i++) {
940           tmp[j] += PetscAbsScalar(*v);  v++;
941         }
942       }
943       ierr = MPI_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
944       for (j=0; j<A->cmap->N; j++) {
945         if (tmp2[j] > *nrm) *nrm = tmp2[j];
946       }
947       ierr = PetscFree2(tmp,tmp);CHKERRQ(ierr);
948       ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr);
949     } else if (type == NORM_INFINITY) { /* max row norm */
950       PetscReal ntemp;
951       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
952       ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr);
953     } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"No support for two norm");
954   }
955   PetscFunctionReturn(0);
956 }
957 
958 #undef __FUNCT__
959 #define __FUNCT__ "MatTranspose_MPIDense"
960 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
961 {
962   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
963   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
964   Mat            B;
965   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
966   PetscErrorCode ierr;
967   PetscInt       j,i;
968   PetscScalar    *v;
969 
970   PetscFunctionBegin;
971   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports square matrix only in-place");
972   if (reuse == MAT_INITIAL_MATRIX || A == *matout) {
973     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
974     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
975     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
976     ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr);
977   } else {
978     B = *matout;
979   }
980 
981   m = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v;
982   ierr = PetscMalloc(m*sizeof(PetscInt),&rwork);CHKERRQ(ierr);
983   for (i=0; i<m; i++) rwork[i] = rstart + i;
984   for (j=0; j<n; j++) {
985     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
986     v   += m;
987   }
988   ierr = PetscFree(rwork);CHKERRQ(ierr);
989   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
990   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
991   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
992     *matout = B;
993   } else {
994     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
995   }
996   PetscFunctionReturn(0);
997 }
998 
999 
1000 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
1001 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
1002 
1003 #undef __FUNCT__
1004 #define __FUNCT__ "MatSetUpPreallocation_MPIDense"
1005 PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A)
1006 {
1007   PetscErrorCode ierr;
1008 
1009   PetscFunctionBegin;
1010   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
1011   PetscFunctionReturn(0);
1012 }
1013 
1014 #if defined(PETSC_HAVE_PLAPACK)
1015 
1016 #undef __FUNCT__
1017 #define __FUNCT__ "MatMPIDenseCopyToPlapack"
1018 PetscErrorCode MatMPIDenseCopyToPlapack(Mat A,Mat F)
1019 {
1020   Mat_Plapack    *lu = (Mat_Plapack*)(F)->spptr;
1021   PetscErrorCode ierr;
1022   PetscInt       M=A->cmap->N,m=A->rmap->n,rstart;
1023   PetscScalar    *array;
1024   PetscReal      one = 1.0;
1025 
1026   PetscFunctionBegin;
1027   /* Copy A into F->lu->A */
1028   ierr = PLA_Obj_set_to_zero(lu->A);CHKERRQ(ierr);
1029   ierr = PLA_API_begin();CHKERRQ(ierr);
1030   ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr);
1031   ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr);
1032   ierr = MatGetArray(A,&array);CHKERRQ(ierr);
1033   ierr = PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);CHKERRQ(ierr);
1034   ierr = MatRestoreArray(A,&array);CHKERRQ(ierr);
1035   ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr);
1036   ierr = PLA_API_end();CHKERRQ(ierr);
1037   lu->rstart = rstart;
1038   PetscFunctionReturn(0);
1039 }
1040 
1041 #undef __FUNCT__
1042 #define __FUNCT__ "MatMPIDenseCopyFromPlapack"
1043 PetscErrorCode MatMPIDenseCopyFromPlapack(Mat F,Mat A)
1044 {
1045   Mat_Plapack    *lu = (Mat_Plapack*)(F)->spptr;
1046   PetscErrorCode ierr;
1047   PetscInt       M=A->cmap->N,m=A->rmap->n,rstart;
1048   PetscScalar    *array;
1049   PetscReal      one = 1.0;
1050 
1051   PetscFunctionBegin;
1052   /* Copy F into A->lu->A */
1053   ierr = MatZeroEntries(A);CHKERRQ(ierr);
1054   ierr = PLA_API_begin();CHKERRQ(ierr);
1055   ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr);
1056   ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr);
1057   ierr = MatGetArray(A,&array);CHKERRQ(ierr);
1058   ierr = PLA_API_axpy_global_to_matrix(m,M, &one,lu->A,rstart,0,(void *)array,m);CHKERRQ(ierr);
1059   ierr = MatRestoreArray(A,&array);CHKERRQ(ierr);
1060   ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr);
1061   ierr = PLA_API_end();CHKERRQ(ierr);
1062   lu->rstart = rstart;
1063   PetscFunctionReturn(0);
1064 }
1065 
1066 #undef __FUNCT__
1067 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIDense"
1068 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1069 {
1070   PetscErrorCode ierr;
1071   Mat_Plapack    *luA = (Mat_Plapack*)A->spptr;
1072   Mat_Plapack    *luB = (Mat_Plapack*)B->spptr;
1073   Mat_Plapack    *luC = (Mat_Plapack*)C->spptr;
1074   PLA_Obj        alpha = NULL,beta = NULL;
1075 
1076   PetscFunctionBegin;
1077   ierr = MatMPIDenseCopyToPlapack(A,A);CHKERRQ(ierr);
1078   ierr = MatMPIDenseCopyToPlapack(B,B);CHKERRQ(ierr);
1079 
1080   /*
1081   ierr = PLA_Global_show("A = ",luA->A,"%g ","");CHKERRQ(ierr);
1082   ierr = PLA_Global_show("B = ",luB->A,"%g ","");CHKERRQ(ierr);
1083   */
1084 
1085   /* do the multiply in PLA  */
1086   ierr = PLA_Create_constants_conf_to(luA->A,NULL,NULL,&alpha);CHKERRQ(ierr);
1087   ierr = PLA_Create_constants_conf_to(luC->A,NULL,&beta,NULL);CHKERRQ(ierr);
1088   CHKMEMQ;
1089 
1090   ierr = PLA_Gemm(PLA_NO_TRANSPOSE,PLA_NO_TRANSPOSE,alpha,luA->A,luB->A,beta,luC->A); /* CHKERRQ(ierr); */
1091   CHKMEMQ;
1092   ierr = PLA_Obj_free(&alpha);CHKERRQ(ierr);
1093   ierr = PLA_Obj_free(&beta);CHKERRQ(ierr);
1094 
1095   /*
1096   ierr = PLA_Global_show("C = ",luC->A,"%g ","");CHKERRQ(ierr);
1097   */
1098   ierr = MatMPIDenseCopyFromPlapack(C,C);CHKERRQ(ierr);
1099   PetscFunctionReturn(0);
1100 }
1101 
1102 #undef __FUNCT__
1103 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIDense"
1104 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1105 {
1106   PetscErrorCode ierr;
1107   PetscInt       m=A->rmap->n,n=B->cmap->n;
1108   Mat            Cmat;
1109 
1110   PetscFunctionBegin;
1111   if (A->cmap->n != B->rmap->n) SETERRQ2(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
1112   SETERRQ(((PetscObject)A)->comm,PETSC_ERR_LIB,"Due to apparent bugs in PLAPACK,this is not currently supported");
1113   ierr = MatCreate(((PetscObject)B)->comm,&Cmat);CHKERRQ(ierr);
1114   ierr = MatSetSizes(Cmat,m,n,A->rmap->N,B->cmap->N);CHKERRQ(ierr);
1115   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
1116   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1117   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1118 
1119   *C = Cmat;
1120   PetscFunctionReturn(0);
1121 }
1122 
1123 #undef __FUNCT__
1124 #define __FUNCT__ "MatMatMult_MPIDense_MPIDense"
1125 PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
1126 {
1127   PetscErrorCode ierr;
1128 
1129   PetscFunctionBegin;
1130   if (scall == MAT_INITIAL_MATRIX){
1131     ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr);
1132   }
1133   ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr);
1134   PetscFunctionReturn(0);
1135 }
1136 
1137 #undef __FUNCT__
1138 #define __FUNCT__ "MatSolve_MPIDense"
1139 PetscErrorCode MatSolve_MPIDense(Mat A,Vec b,Vec x)
1140 {
1141   MPI_Comm       comm = ((PetscObject)A)->comm;
1142   Mat_Plapack    *lu = (Mat_Plapack*)A->spptr;
1143   PetscErrorCode ierr;
1144   PetscInt       M=A->rmap->N,m=A->rmap->n,rstart,i,j,*idx_pla,*idx_petsc,loc_m,loc_stride;
1145   PetscScalar    *array;
1146   PetscReal      one = 1.0;
1147   PetscMPIInt    size,rank,r_rank,r_nproc,c_rank,c_nproc;;
1148   PLA_Obj        v_pla = NULL;
1149   PetscScalar    *loc_buf;
1150   Vec            loc_x;
1151 
1152   PetscFunctionBegin;
1153   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1154   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1155 
1156   /* Create PLAPACK vector objects, then copy b into PLAPACK b */
1157   PLA_Mvector_create(lu->datatype,M,1,lu->templ,PLA_ALIGN_FIRST,&v_pla);
1158   PLA_Obj_set_to_zero(v_pla);
1159 
1160   /* Copy b into rhs_pla */
1161   PLA_API_begin();
1162   PLA_Obj_API_open(v_pla);
1163   ierr = VecGetArray(b,&array);CHKERRQ(ierr);
1164   PLA_API_axpy_vector_to_global(m,&one,(void *)array,1,v_pla,lu->rstart);
1165   ierr = VecRestoreArray(b,&array);CHKERRQ(ierr);
1166   PLA_Obj_API_close(v_pla);
1167   PLA_API_end();
1168 
1169   if (A->factortype == MAT_FACTOR_LU){
1170     /* Apply the permutations to the right hand sides */
1171     PLA_Apply_pivots_to_rows (v_pla,lu->pivots);
1172 
1173     /* Solve L y = b, overwriting b with y */
1174     PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_UNIT_DIAG,lu->A,v_pla );
1175 
1176     /* Solve U x = y (=b), overwriting b with x */
1177     PLA_Trsv( PLA_UPPER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla );
1178   } else { /* MAT_FACTOR_CHOLESKY */
1179     PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla);
1180     PLA_Trsv( PLA_LOWER_TRIANGULAR,(lu->datatype == MPI_DOUBLE ? PLA_TRANSPOSE : PLA_CONJUGATE_TRANSPOSE),
1181                                     PLA_NONUNIT_DIAG,lu->A,v_pla);
1182   }
1183 
1184   /* Copy PLAPACK x into Petsc vector x  */
1185   PLA_Obj_local_length(v_pla, &loc_m);
1186   PLA_Obj_local_buffer(v_pla, (void**)&loc_buf);
1187   PLA_Obj_local_stride(v_pla, &loc_stride);
1188   /*
1189     PetscPrintf(PETSC_COMM_SELF," [%d] b - local_m %d local_stride %d, loc_buf: %g %g, nb: %d\n",rank,loc_m,loc_stride,loc_buf[0],loc_buf[(loc_m-1)*loc_stride],lu->nb);
1190   */
1191   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,loc_m*loc_stride,loc_buf,&loc_x);CHKERRQ(ierr);
1192   if (!lu->pla_solved){
1193 
1194     PLA_Temp_comm_row_info(lu->templ,&Plapack_comm_2d,&r_rank,&r_nproc);
1195     PLA_Temp_comm_col_info(lu->templ,&Plapack_comm_2d,&c_rank,&c_nproc);
1196 
1197     /* Create IS and cts for VecScatterring */
1198     PLA_Obj_local_length(v_pla, &loc_m);
1199     PLA_Obj_local_stride(v_pla, &loc_stride);
1200     ierr = PetscMalloc2(loc_m,PetscInt,&idx_pla,loc_m,PetscInt,&idx_petsc);CHKERRQ(ierr);
1201 
1202     rstart = (r_rank*c_nproc+c_rank)*lu->nb;
1203     for (i=0; i<loc_m; i+=lu->nb){
1204       j = 0;
1205       while (j < lu->nb && i+j < loc_m){
1206         idx_petsc[i+j] = rstart + j; j++;
1207       }
1208       rstart += size*lu->nb;
1209     }
1210 
1211     for (i=0; i<loc_m; i++) idx_pla[i] = i*loc_stride;
1212 
1213     ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_pla,PETSC_COPY_VALUES,&lu->is_pla);CHKERRQ(ierr);
1214     ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_petsc,PETSC_COPY_VALUES,&lu->is_petsc);CHKERRQ(ierr);
1215     ierr = PetscFree2(idx_pla,idx_petsc);CHKERRQ(ierr);
1216     ierr = VecScatterCreate(loc_x,lu->is_pla,x,lu->is_petsc,&lu->ctx);CHKERRQ(ierr);
1217   }
1218   ierr = VecScatterBegin(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1219   ierr = VecScatterEnd(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1220 
1221   /* Free data */
1222   ierr = VecDestroy(loc_x);CHKERRQ(ierr);
1223   PLA_Obj_free(&v_pla);
1224 
1225   lu->pla_solved = PETSC_TRUE;
1226   PetscFunctionReturn(0);
1227 }
1228 
1229 #undef __FUNCT__
1230 #define __FUNCT__ "MatLUFactorNumeric_MPIDense"
1231 PetscErrorCode MatLUFactorNumeric_MPIDense(Mat F,Mat A,const MatFactorInfo *info)
1232 {
1233   Mat_Plapack    *lu = (Mat_Plapack*)(F)->spptr;
1234   PetscErrorCode ierr;
1235   PetscInt       M=A->rmap->N,m=A->rmap->n,rstart,rend;
1236   PetscInt       info_pla=0;
1237   PetscScalar    *array,one = 1.0;
1238 
1239   PetscFunctionBegin;
1240   if (lu->mstruct == SAME_NONZERO_PATTERN){
1241     PLA_Obj_free(&lu->A);
1242     PLA_Obj_free (&lu->pivots);
1243   }
1244   /* Create PLAPACK matrix object */
1245   lu->A = NULL; lu->pivots = NULL;
1246   PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);
1247   PLA_Obj_set_to_zero(lu->A);
1248   PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots);
1249 
1250   /* Copy A into lu->A */
1251   PLA_API_begin();
1252   PLA_Obj_API_open(lu->A);
1253   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
1254   ierr = MatGetArray(A,&array);CHKERRQ(ierr);
1255   PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);
1256   ierr = MatRestoreArray(A,&array);CHKERRQ(ierr);
1257   PLA_Obj_API_close(lu->A);
1258   PLA_API_end();
1259 
1260   /* Factor P A -> L U overwriting lower triangular portion of A with L, upper, U */
1261   info_pla = PLA_LU(lu->A,lu->pivots);
1262   if (info_pla != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot encountered at row %d from PLA_LU()",info_pla);
1263 
1264   lu->rstart         = rstart;
1265   lu->mstruct        = SAME_NONZERO_PATTERN;
1266   F->ops->solve      = MatSolve_MPIDense;
1267   F->assembled       = PETSC_TRUE;  /* required by -ksp_view */
1268   PetscFunctionReturn(0);
1269 }
1270 
1271 #undef __FUNCT__
1272 #define __FUNCT__ "MatCholeskyFactorNumeric_MPIDense"
1273 PetscErrorCode MatCholeskyFactorNumeric_MPIDense(Mat F,Mat A,const MatFactorInfo *info)
1274 {
1275   Mat_Plapack    *lu = (Mat_Plapack*)F->spptr;
1276   PetscErrorCode ierr;
1277   PetscInt       M=A->rmap->N,m=A->rmap->n,rstart,rend;
1278   PetscInt       info_pla=0;
1279   PetscScalar    *array,one = 1.0;
1280 
1281   PetscFunctionBegin;
1282   if (lu->mstruct == SAME_NONZERO_PATTERN){
1283     PLA_Obj_free(&lu->A);
1284   }
1285   /* Create PLAPACK matrix object */
1286   lu->A      = NULL;
1287   lu->pivots = NULL;
1288   PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);
1289 
1290   /* Copy A into lu->A */
1291   PLA_API_begin();
1292   PLA_Obj_API_open(lu->A);
1293   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
1294   ierr = MatGetArray(A,&array);CHKERRQ(ierr);
1295   PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);
1296   ierr = MatRestoreArray(A,&array);CHKERRQ(ierr);
1297   PLA_Obj_API_close(lu->A);
1298   PLA_API_end();
1299 
1300   /* Factor P A -> Chol */
1301   info_pla = PLA_Chol(PLA_LOWER_TRIANGULAR,lu->A);
1302   if (info_pla != 0) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT,"Nonpositive definite matrix detected at row %d from PLA_Chol()",info_pla);
1303 
1304   lu->rstart         = rstart;
1305   lu->mstruct        = SAME_NONZERO_PATTERN;
1306   F->ops->solve      = MatSolve_MPIDense;
1307   F->assembled       = PETSC_TRUE;  /* required by -ksp_view */
1308   PetscFunctionReturn(0);
1309 }
1310 
1311 /* Note the Petsc perm permutation is ignored */
1312 #undef __FUNCT__
1313 #define __FUNCT__ "MatCholeskyFactorSymbolic_MPIDense"
1314 PetscErrorCode MatCholeskyFactorSymbolic_MPIDense(Mat F,Mat A,IS perm,const MatFactorInfo *info)
1315 {
1316   PetscErrorCode ierr;
1317   PetscBool      issymmetric,set;
1318 
1319   PetscFunctionBegin;
1320   ierr = MatIsSymmetricKnown(A,&set,&issymmetric);CHKERRQ(ierr);
1321   if (!set || !issymmetric) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_USER,"Matrix must be set as MAT_SYMMETRIC for CholeskyFactor()");
1322   F->ops->choleskyfactornumeric  = MatCholeskyFactorNumeric_MPIDense;
1323   PetscFunctionReturn(0);
1324 }
1325 
1326 /* Note the Petsc r and c permutations are ignored */
1327 #undef __FUNCT__
1328 #define __FUNCT__ "MatLUFactorSymbolic_MPIDense"
1329 PetscErrorCode MatLUFactorSymbolic_MPIDense(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
1330 {
1331   PetscErrorCode ierr;
1332   PetscInt       M = A->rmap->N;
1333   Mat_Plapack    *lu;
1334 
1335   PetscFunctionBegin;
1336   lu = (Mat_Plapack*)F->spptr;
1337   ierr = PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots);CHKERRQ(ierr);
1338   F->ops->lufactornumeric  = MatLUFactorNumeric_MPIDense;
1339   PetscFunctionReturn(0);
1340 }
1341 
1342 EXTERN_C_BEGIN
1343 #undef __FUNCT__
1344 #define __FUNCT__ "MatFactorGetSolverPackage_mpidense_plapack"
1345 PetscErrorCode MatFactorGetSolverPackage_mpidense_plapack(Mat A,const MatSolverPackage *type)
1346 {
1347   PetscFunctionBegin;
1348   *type = MATSOLVERPLAPACK;
1349   PetscFunctionReturn(0);
1350 }
1351 EXTERN_C_END
1352 
1353 EXTERN_C_BEGIN
1354 #undef __FUNCT__
1355 #define __FUNCT__ "MatGetFactor_mpidense_plapack"
1356 PetscErrorCode MatGetFactor_mpidense_plapack(Mat A,MatFactorType ftype,Mat *F)
1357 {
1358   PetscErrorCode ierr;
1359   Mat_Plapack    *lu;
1360   PetscMPIInt    size;
1361   PetscInt       M=A->rmap->N;
1362 
1363   PetscFunctionBegin;
1364   /* Create the factorization matrix */
1365   ierr = MatCreate(((PetscObject)A)->comm,F);CHKERRQ(ierr);
1366   ierr = MatSetSizes(*F,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1367   ierr = MatSetType(*F,((PetscObject)A)->type_name);CHKERRQ(ierr);
1368   ierr = PetscNewLog(*F,Mat_Plapack,&lu);CHKERRQ(ierr);
1369   (*F)->spptr = (void*)lu;
1370 
1371   /* Set default Plapack parameters */
1372   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
1373   lu->nb = M/size;
1374   if (M - lu->nb*size) lu->nb++; /* without cyclic distribution */
1375 
1376   /* Set runtime options */
1377   ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"PLAPACK Options","Mat");CHKERRQ(ierr);
1378     ierr = PetscOptionsInt("-mat_plapack_nb","block size of template vector","None",lu->nb,&lu->nb,PETSC_NULL);CHKERRQ(ierr);
1379   PetscOptionsEnd();
1380 
1381   /* Create object distribution template */
1382   lu->templ = NULL;
1383   ierr = PLA_Temp_create(lu->nb, 0, &lu->templ);CHKERRQ(ierr);
1384 
1385   /* Set the datatype */
1386 #if defined(PETSC_USE_COMPLEX)
1387   lu->datatype = MPI_DOUBLE_COMPLEX;
1388 #else
1389   lu->datatype = MPI_DOUBLE;
1390 #endif
1391 
1392   ierr = PLA_Matrix_create(lu->datatype,M,A->cmap->N,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);CHKERRQ(ierr);
1393 
1394 
1395   lu->pla_solved     = PETSC_FALSE; /* MatSolve_Plapack() is called yet */
1396   lu->mstruct        = DIFFERENT_NONZERO_PATTERN;
1397 
1398   if (ftype == MAT_FACTOR_LU) {
1399     (*F)->ops->lufactorsymbolic = MatLUFactorSymbolic_MPIDense;
1400   } else if (ftype == MAT_FACTOR_CHOLESKY) {
1401     (*F)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MPIDense;
1402   } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"No incomplete factorizations for dense matrices");
1403   (*F)->factortype = ftype;
1404   ierr = PetscObjectComposeFunctionDynamic((PetscObject)(*F),"MatFactorGetSolverPackage_C","MatFactorGetSolverPackage_mpidense_plapack",MatFactorGetSolverPackage_mpidense_plapack);CHKERRQ(ierr);
1405   PetscFunctionReturn(0);
1406 }
1407 EXTERN_C_END
1408 #endif
1409 
1410 #undef __FUNCT__
1411 #define __FUNCT__ "MatGetFactor_mpidense_petsc"
1412 PetscErrorCode MatGetFactor_mpidense_petsc(Mat A,MatFactorType ftype,Mat *F)
1413 {
1414 #if defined(PETSC_HAVE_PLAPACK)
1415   PetscErrorCode ierr;
1416 #endif
1417 
1418   PetscFunctionBegin;
1419 #if defined(PETSC_HAVE_PLAPACK)
1420   ierr = MatGetFactor_mpidense_plapack(A,ftype,F);CHKERRQ(ierr);
1421 #else
1422   SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix format %s uses PLAPACK direct solver. Install PLAPACK",((PetscObject)A)->type_name);
1423 #endif
1424   PetscFunctionReturn(0);
1425 }
1426 
1427 #undef __FUNCT__
1428 #define __FUNCT__ "MatAXPY_MPIDense"
1429 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
1430 {
1431   PetscErrorCode ierr;
1432   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
1433 
1434   PetscFunctionBegin;
1435   ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr);
1436   PetscFunctionReturn(0);
1437 }
1438 
1439 #undef __FUNCT__
1440 #define __FUNCT__ "MatConjugate_MPIDense"
1441 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIDense(Mat mat)
1442 {
1443   Mat_MPIDense   *a = (Mat_MPIDense *)mat->data;
1444   PetscErrorCode ierr;
1445 
1446   PetscFunctionBegin;
1447   ierr = MatConjugate(a->A);CHKERRQ(ierr);
1448   PetscFunctionReturn(0);
1449 }
1450 
1451 #undef __FUNCT__
1452 #define __FUNCT__ "MatRealPart_MPIDense"
1453 PetscErrorCode MatRealPart_MPIDense(Mat A)
1454 {
1455   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1456   PetscErrorCode ierr;
1457 
1458   PetscFunctionBegin;
1459   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1460   PetscFunctionReturn(0);
1461 }
1462 
1463 #undef __FUNCT__
1464 #define __FUNCT__ "MatImaginaryPart_MPIDense"
1465 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1466 {
1467   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1468   PetscErrorCode ierr;
1469 
1470   PetscFunctionBegin;
1471   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1472   PetscFunctionReturn(0);
1473 }
1474 
1475 /* -------------------------------------------------------------------*/
1476 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
1477        MatGetRow_MPIDense,
1478        MatRestoreRow_MPIDense,
1479        MatMult_MPIDense,
1480 /* 4*/ MatMultAdd_MPIDense,
1481        MatMultTranspose_MPIDense,
1482        MatMultTransposeAdd_MPIDense,
1483        0,
1484        0,
1485        0,
1486 /*10*/ 0,
1487        0,
1488        0,
1489        0,
1490        MatTranspose_MPIDense,
1491 /*15*/ MatGetInfo_MPIDense,
1492        MatEqual_MPIDense,
1493        MatGetDiagonal_MPIDense,
1494        MatDiagonalScale_MPIDense,
1495        MatNorm_MPIDense,
1496 /*20*/ MatAssemblyBegin_MPIDense,
1497        MatAssemblyEnd_MPIDense,
1498        MatSetOption_MPIDense,
1499        MatZeroEntries_MPIDense,
1500 /*24*/ MatZeroRows_MPIDense,
1501        0,
1502        0,
1503        0,
1504        0,
1505 /*29*/ MatSetUpPreallocation_MPIDense,
1506        0,
1507        0,
1508        MatGetArray_MPIDense,
1509        MatRestoreArray_MPIDense,
1510 /*34*/ MatDuplicate_MPIDense,
1511        0,
1512        0,
1513        0,
1514        0,
1515 /*39*/ MatAXPY_MPIDense,
1516        MatGetSubMatrices_MPIDense,
1517        0,
1518        MatGetValues_MPIDense,
1519        0,
1520 /*44*/ 0,
1521        MatScale_MPIDense,
1522        0,
1523        0,
1524        0,
1525 /*49*/ 0,
1526        0,
1527        0,
1528        0,
1529        0,
1530 /*54*/ 0,
1531        0,
1532        0,
1533        0,
1534        0,
1535 /*59*/ MatGetSubMatrix_MPIDense,
1536        MatDestroy_MPIDense,
1537        MatView_MPIDense,
1538        0,
1539        0,
1540 /*64*/ 0,
1541        0,
1542        0,
1543        0,
1544        0,
1545 /*69*/ 0,
1546        0,
1547        0,
1548        0,
1549        0,
1550 /*74*/ 0,
1551        0,
1552        0,
1553        0,
1554        0,
1555 /*79*/ 0,
1556        0,
1557        0,
1558        0,
1559 /*83*/ MatLoad_MPIDense,
1560        0,
1561        0,
1562        0,
1563        0,
1564        0,
1565 /*89*/
1566 #if defined(PETSC_HAVE_PLAPACK)
1567        MatMatMult_MPIDense_MPIDense,
1568        MatMatMultSymbolic_MPIDense_MPIDense,
1569        MatMatMultNumeric_MPIDense_MPIDense,
1570 #else
1571        0,
1572        0,
1573        0,
1574 #endif
1575        0,
1576        0,
1577 /*94*/ 0,
1578        0,
1579        0,
1580        0,
1581        0,
1582 /*99*/ 0,
1583        0,
1584        0,
1585        MatConjugate_MPIDense,
1586        0,
1587 /*104*/0,
1588        MatRealPart_MPIDense,
1589        MatImaginaryPart_MPIDense,
1590        0,
1591        0,
1592 /*109*/0,
1593        0,
1594        0,
1595        0,
1596        0,
1597 /*114*/0,
1598        0,
1599        0,
1600        0,
1601        0,
1602 /*119*/0,
1603        0,
1604        0,
1605        0
1606 };
1607 
1608 EXTERN_C_BEGIN
1609 #undef __FUNCT__
1610 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense"
1611 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1612 {
1613   Mat_MPIDense   *a;
1614   PetscErrorCode ierr;
1615 
1616   PetscFunctionBegin;
1617   mat->preallocated = PETSC_TRUE;
1618   /* Note:  For now, when data is specified above, this assumes the user correctly
1619    allocates the local dense storage space.  We should add error checking. */
1620 
1621   a    = (Mat_MPIDense*)mat->data;
1622   ierr = PetscLayoutSetBlockSize(mat->rmap,1);CHKERRQ(ierr);
1623   ierr = PetscLayoutSetBlockSize(mat->cmap,1);CHKERRQ(ierr);
1624   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
1625   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
1626   a->nvec = mat->cmap->n;
1627 
1628   ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1629   ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr);
1630   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
1631   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1632   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1633   PetscFunctionReturn(0);
1634 }
1635 EXTERN_C_END
1636 
1637 /*MC
1638    MATSOLVERPLAPACK = "mpidense" - Parallel LU and Cholesky factorization for MATMPIDENSE matrices
1639 
1640   run ./configure with the option --download-plapack
1641 
1642 
1643   Options Database Keys:
1644 . -mat_plapack_nprows <n> - number of rows in processor partition
1645 . -mat_plapack_npcols <n> - number of columns in processor partition
1646 . -mat_plapack_nb <n> - block size of template vector
1647 . -mat_plapack_nb_alg <n> - algorithmic block size
1648 - -mat_plapack_ckerror <n> - error checking flag
1649 
1650    Level: intermediate
1651 
1652 .seealso: MatCreateMPIDense(), MATDENSE, MATSEQDENSE, PCFactorSetSolverPackage(), MatSolverPackage
1653 
1654 M*/
1655 
1656 EXTERN_C_BEGIN
1657 #undef __FUNCT__
1658 #define __FUNCT__ "MatCreate_MPIDense"
1659 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIDense(Mat mat)
1660 {
1661   Mat_MPIDense   *a;
1662   PetscErrorCode ierr;
1663 
1664   PetscFunctionBegin;
1665   ierr              = PetscNewLog(mat,Mat_MPIDense,&a);CHKERRQ(ierr);
1666   mat->data         = (void*)a;
1667   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1668   mat->mapping      = 0;
1669 
1670   mat->insertmode = NOT_SET_VALUES;
1671   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&a->rank);CHKERRQ(ierr);
1672   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&a->size);CHKERRQ(ierr);
1673 
1674   /* build cache for off array entries formed */
1675   a->donotstash = PETSC_FALSE;
1676   ierr = MatStashCreate_Private(((PetscObject)mat)->comm,1,&mat->stash);CHKERRQ(ierr);
1677 
1678   /* stuff used for matrix vector multiply */
1679   a->lvec        = 0;
1680   a->Mvctx       = 0;
1681   a->roworiented = PETSC_TRUE;
1682 
1683   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
1684                                      "MatGetDiagonalBlock_MPIDense",
1685                                      MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr);
1686   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C",
1687                                      "MatMPIDenseSetPreallocation_MPIDense",
1688                                      MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1689   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",
1690                                      "MatMatMult_MPIAIJ_MPIDense",
1691                                       MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1692   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",
1693                                      "MatMatMultSymbolic_MPIAIJ_MPIDense",
1694                                       MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1695   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",
1696                                      "MatMatMultNumeric_MPIAIJ_MPIDense",
1697                                       MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1698   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetFactor_petsc_C",
1699                                      "MatGetFactor_mpidense_petsc",
1700                                       MatGetFactor_mpidense_petsc);CHKERRQ(ierr);
1701 #if defined(PETSC_HAVE_PLAPACK)
1702   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetFactor_plapack_C",
1703                                      "MatGetFactor_mpidense_plapack",
1704                                       MatGetFactor_mpidense_plapack);CHKERRQ(ierr);
1705   ierr = PetscPLAPACKInitializePackage(((PetscObject)mat)->comm);CHKERRQ(ierr);
1706 #endif
1707   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1708 
1709   PetscFunctionReturn(0);
1710 }
1711 EXTERN_C_END
1712 
1713 /*MC
1714    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1715 
1716    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1717    and MATMPIDENSE otherwise.
1718 
1719    Options Database Keys:
1720 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1721 
1722   Level: beginner
1723 
1724 
1725 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1726 M*/
1727 
1728 EXTERN_C_BEGIN
1729 #undef __FUNCT__
1730 #define __FUNCT__ "MatCreate_Dense"
1731 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_Dense(Mat A)
1732 {
1733   PetscErrorCode ierr;
1734   PetscMPIInt    size;
1735 
1736   PetscFunctionBegin;
1737   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
1738   if (size == 1) {
1739     ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr);
1740   } else {
1741     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
1742   }
1743   PetscFunctionReturn(0);
1744 }
1745 EXTERN_C_END
1746 
1747 #undef __FUNCT__
1748 #define __FUNCT__ "MatMPIDenseSetPreallocation"
1749 /*@C
1750    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1751 
1752    Not collective
1753 
1754    Input Parameters:
1755 .  A - the matrix
1756 -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
1757    to control all matrix memory allocation.
1758 
1759    Notes:
1760    The dense format is fully compatible with standard Fortran 77
1761    storage by columns.
1762 
1763    The data input variable is intended primarily for Fortran programmers
1764    who wish to allocate their own matrix memory space.  Most users should
1765    set data=PETSC_NULL.
1766 
1767    Level: intermediate
1768 
1769 .keywords: matrix,dense, parallel
1770 
1771 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1772 @*/
1773 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data)
1774 {
1775   PetscErrorCode ierr;
1776 
1777   PetscFunctionBegin;
1778   ierr = PetscTryMethod(mat,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar *),(mat,data));CHKERRQ(ierr);
1779   PetscFunctionReturn(0);
1780 }
1781 
1782 #undef __FUNCT__
1783 #define __FUNCT__ "MatCreateMPIDense"
1784 /*@C
1785    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
1786 
1787    Collective on MPI_Comm
1788 
1789    Input Parameters:
1790 +  comm - MPI communicator
1791 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1792 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1793 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1794 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1795 -  data - optional location of matrix data.  Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1796    to control all matrix memory allocation.
1797 
1798    Output Parameter:
1799 .  A - the matrix
1800 
1801    Notes:
1802    The dense format is fully compatible with standard Fortran 77
1803    storage by columns.
1804 
1805    The data input variable is intended primarily for Fortran programmers
1806    who wish to allocate their own matrix memory space.  Most users should
1807    set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users).
1808 
1809    The user MUST specify either the local or global matrix dimensions
1810    (possibly both).
1811 
1812    Level: intermediate
1813 
1814 .keywords: matrix,dense, parallel
1815 
1816 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1817 @*/
1818 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1819 {
1820   PetscErrorCode ierr;
1821   PetscMPIInt    size;
1822 
1823   PetscFunctionBegin;
1824   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1825   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1826   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1827   if (size > 1) {
1828     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1829     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1830   } else {
1831     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1832     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1833   }
1834   PetscFunctionReturn(0);
1835 }
1836 
1837 #undef __FUNCT__
1838 #define __FUNCT__ "MatDuplicate_MPIDense"
1839 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1840 {
1841   Mat            mat;
1842   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
1843   PetscErrorCode ierr;
1844 
1845   PetscFunctionBegin;
1846   *newmat       = 0;
1847   ierr = MatCreate(((PetscObject)A)->comm,&mat);CHKERRQ(ierr);
1848   ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1849   ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1850   a                 = (Mat_MPIDense*)mat->data;
1851   ierr              = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1852 
1853   mat->factortype   = A->factortype;
1854   mat->assembled    = PETSC_TRUE;
1855   mat->preallocated = PETSC_TRUE;
1856 
1857   a->size           = oldmat->size;
1858   a->rank           = oldmat->rank;
1859   mat->insertmode   = NOT_SET_VALUES;
1860   a->nvec           = oldmat->nvec;
1861   a->donotstash     = oldmat->donotstash;
1862 
1863   ierr = PetscLayoutCopy(A->rmap,&mat->rmap);CHKERRQ(ierr);
1864   ierr = PetscLayoutCopy(A->cmap,&mat->cmap);CHKERRQ(ierr);
1865 
1866   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1867   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1868   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1869 
1870   *newmat = mat;
1871   PetscFunctionReturn(0);
1872 }
1873 
1874 #undef __FUNCT__
1875 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile"
1876 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat,PetscInt sizesset)
1877 {
1878   PetscErrorCode ierr;
1879   PetscMPIInt    rank,size;
1880   PetscInt       *rowners,i,m,nz,j;
1881   PetscScalar    *array,*vals,*vals_ptr;
1882   MPI_Status     status;
1883 
1884   PetscFunctionBegin;
1885   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1886   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1887 
1888   /* determine ownership of all rows */
1889   if (newmat->rmap->n < 0) m          = M/size + ((M % size) > rank);
1890   else m = newmat->rmap->n;
1891   ierr       = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1892   ierr       = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
1893   rowners[0] = 0;
1894   for (i=2; i<=size; i++) {
1895     rowners[i] += rowners[i-1];
1896   }
1897 
1898   if (!sizesset) {
1899     ierr = MatSetSizes(newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1900   }
1901   ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr);
1902   ierr = MatGetArray(newmat,&array);CHKERRQ(ierr);
1903 
1904   if (!rank) {
1905     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1906 
1907     /* read in my part of the matrix numerical values  */
1908     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1909 
1910     /* insert into matrix-by row (this is why cannot directly read into array */
1911     vals_ptr = vals;
1912     for (i=0; i<m; i++) {
1913       for (j=0; j<N; j++) {
1914         array[i + j*m] = *vals_ptr++;
1915       }
1916     }
1917 
1918     /* read in other processors and ship out */
1919     for (i=1; i<size; i++) {
1920       nz   = (rowners[i+1] - rowners[i])*N;
1921       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1922       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr);
1923     }
1924   } else {
1925     /* receive numeric values */
1926     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1927 
1928     /* receive message of values*/
1929     ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm,&status);CHKERRQ(ierr);
1930 
1931     /* insert into matrix-by row (this is why cannot directly read into array */
1932     vals_ptr = vals;
1933     for (i=0; i<m; i++) {
1934       for (j=0; j<N; j++) {
1935         array[i + j*m] = *vals_ptr++;
1936       }
1937     }
1938   }
1939   ierr = PetscFree(rowners);CHKERRQ(ierr);
1940   ierr = PetscFree(vals);CHKERRQ(ierr);
1941   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1942   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1943   PetscFunctionReturn(0);
1944 }
1945 
1946 #undef __FUNCT__
1947 #define __FUNCT__ "MatLoad_MPIDense"
1948 PetscErrorCode MatLoad_MPIDense(Mat newmat,PetscViewer viewer)
1949 {
1950   PetscScalar    *vals,*svals;
1951   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1952   MPI_Status     status;
1953   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz;
1954   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1955   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1956   PetscInt       i,nz,j,rstart,rend,sizesset=1,grows,gcols;
1957   int            fd;
1958   PetscErrorCode ierr;
1959 
1960   PetscFunctionBegin;
1961   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1962   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1963   if (!rank) {
1964     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1965     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1966     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1967   }
1968   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
1969 
1970   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1971   M = header[1]; N = header[2]; nz = header[3];
1972 
1973   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
1974   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
1975   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
1976 
1977   /* If global sizes are set, check if they are consistent with that given in the file */
1978   if (sizesset) {
1979     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
1980   }
1981   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
1982   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
1983 
1984   /*
1985        Handle case where matrix is stored on disk as a dense matrix
1986   */
1987   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1988     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat,sizesset);CHKERRQ(ierr);
1989     PetscFunctionReturn(0);
1990   }
1991 
1992   /* determine ownership of all rows */
1993   if (newmat->rmap->n < 0) m          = PetscMPIIntCast(M/size + ((M % size) > rank));
1994   else m = PetscMPIIntCast(newmat->rmap->n);
1995   ierr       = PetscMalloc((size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr);
1996   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1997   rowners[0] = 0;
1998   for (i=2; i<=size; i++) {
1999     rowners[i] += rowners[i-1];
2000   }
2001   rstart = rowners[rank];
2002   rend   = rowners[rank+1];
2003 
2004   /* distribute row lengths to all processors */
2005   ierr    = PetscMalloc2(rend-rstart,PetscInt,&ourlens,rend-rstart,PetscInt,&offlens);CHKERRQ(ierr);
2006   if (!rank) {
2007     ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2008     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2009     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
2010     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
2011     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
2012     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2013   } else {
2014     ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
2015   }
2016 
2017   if (!rank) {
2018     /* calculate the number of nonzeros on each processor */
2019     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2020     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2021     for (i=0; i<size; i++) {
2022       for (j=rowners[i]; j< rowners[i+1]; j++) {
2023         procsnz[i] += rowlengths[j];
2024       }
2025     }
2026     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2027 
2028     /* determine max buffer needed and allocate it */
2029     maxnz = 0;
2030     for (i=0; i<size; i++) {
2031       maxnz = PetscMax(maxnz,procsnz[i]);
2032     }
2033     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2034 
2035     /* read in my part of the matrix column indices  */
2036     nz = procsnz[0];
2037     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2038     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2039 
2040     /* read in every one elses and ship off */
2041     for (i=1; i<size; i++) {
2042       nz   = procsnz[i];
2043       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2044       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2045     }
2046     ierr = PetscFree(cols);CHKERRQ(ierr);
2047   } else {
2048     /* determine buffer space needed for message */
2049     nz = 0;
2050     for (i=0; i<m; i++) {
2051       nz += ourlens[i];
2052     }
2053     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2054 
2055     /* receive message of column indices*/
2056     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2057     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2058     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2059   }
2060 
2061   /* loop over local rows, determining number of off diagonal entries */
2062   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2063   jj = 0;
2064   for (i=0; i<m; i++) {
2065     for (j=0; j<ourlens[i]; j++) {
2066       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
2067       jj++;
2068     }
2069   }
2070 
2071   /* create our matrix */
2072   for (i=0; i<m; i++) {
2073     ourlens[i] -= offlens[i];
2074   }
2075 
2076   if (!sizesset) {
2077     ierr = MatSetSizes(newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
2078   }
2079   ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr);
2080   for (i=0; i<m; i++) {
2081     ourlens[i] += offlens[i];
2082   }
2083 
2084   if (!rank) {
2085     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2086 
2087     /* read in my part of the matrix numerical values  */
2088     nz = procsnz[0];
2089     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2090 
2091     /* insert into matrix */
2092     jj      = rstart;
2093     smycols = mycols;
2094     svals   = vals;
2095     for (i=0; i<m; i++) {
2096       ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2097       smycols += ourlens[i];
2098       svals   += ourlens[i];
2099       jj++;
2100     }
2101 
2102     /* read in other processors and ship out */
2103     for (i=1; i<size; i++) {
2104       nz   = procsnz[i];
2105       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2106       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2107     }
2108     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2109   } else {
2110     /* receive numeric values */
2111     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2112 
2113     /* receive message of values*/
2114     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
2115     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2116     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2117 
2118     /* insert into matrix */
2119     jj      = rstart;
2120     smycols = mycols;
2121     svals   = vals;
2122     for (i=0; i<m; i++) {
2123       ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2124       smycols += ourlens[i];
2125       svals   += ourlens[i];
2126       jj++;
2127     }
2128   }
2129   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2130   ierr = PetscFree(vals);CHKERRQ(ierr);
2131   ierr = PetscFree(mycols);CHKERRQ(ierr);
2132   ierr = PetscFree(rowners);CHKERRQ(ierr);
2133 
2134   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2135   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2136   PetscFunctionReturn(0);
2137 }
2138 
2139 #undef __FUNCT__
2140 #define __FUNCT__ "MatEqual_MPIDense"
2141 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool  *flag)
2142 {
2143   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
2144   Mat            a,b;
2145   PetscBool      flg;
2146   PetscErrorCode ierr;
2147 
2148   PetscFunctionBegin;
2149   a = matA->A;
2150   b = matB->A;
2151   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
2152   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
2153   PetscFunctionReturn(0);
2154 }
2155 
2156 #if defined(PETSC_HAVE_PLAPACK)
2157 
2158 #undef __FUNCT__
2159 #define __FUNCT__ "PetscPLAPACKFinalizePackage"
2160 /*@C
2161   PetscPLAPACKFinalizePackage - This function destroys everything in the Petsc interface to PLAPACK.
2162   Level: developer
2163 
2164 .keywords: Petsc, destroy, package, PLAPACK
2165 .seealso: PetscFinalize()
2166 @*/
2167 PetscErrorCode PETSCMAT_DLLEXPORT PetscPLAPACKFinalizePackage(void)
2168 {
2169   PetscErrorCode ierr;
2170 
2171   PetscFunctionBegin;
2172   ierr = PLA_Finalize();CHKERRQ(ierr);
2173   PetscFunctionReturn(0);
2174 }
2175 
2176 #undef __FUNCT__
2177 #define __FUNCT__ "PetscPLAPACKInitializePackage"
2178 /*@C
2179   PetscPLAPACKInitializePackage - This function initializes everything in the Petsc interface to PLAPACK. It is
2180   called from MatCreate_MPIDense() the first time an MPI dense matrix is called.
2181 
2182   Input Parameter:
2183 .   comm - the communicator the matrix lives on
2184 
2185   Level: developer
2186 
2187    Notes: PLAPACK does not have a good fit with MPI communicators; all (parallel) PLAPACK objects have to live in the
2188   same communicator (because there is some global state that is initialized and used for all matrices). In addition if
2189   PLAPACK is initialized (that is the initial matrices created) are on subcommunicators of MPI_COMM_WORLD, these subcommunicators
2190   cannot overlap.
2191 
2192 .keywords: Petsc, initialize, package, PLAPACK
2193 .seealso: PetscSysInitializePackage(), PetscInitialize()
2194 @*/
2195 PetscErrorCode PETSCMAT_DLLEXPORT PetscPLAPACKInitializePackage(MPI_Comm comm)
2196 {
2197   PetscMPIInt    size;
2198   PetscErrorCode ierr;
2199 
2200   PetscFunctionBegin;
2201   if (!PLA_Initialized(PETSC_NULL)) {
2202 
2203     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2204     Plapack_nprows = 1;
2205     Plapack_npcols = size;
2206 
2207     ierr = PetscOptionsBegin(comm,PETSC_NULL,"PLAPACK Options","Mat");CHKERRQ(ierr);
2208       ierr = PetscOptionsInt("-mat_plapack_nprows","row dimension of 2D processor mesh","None",Plapack_nprows,&Plapack_nprows,PETSC_NULL);CHKERRQ(ierr);
2209       ierr = PetscOptionsInt("-mat_plapack_npcols","column dimension of 2D processor mesh","None",Plapack_npcols,&Plapack_npcols,PETSC_NULL);CHKERRQ(ierr);
2210 #if defined(PETSC_USE_DEBUG)
2211       Plapack_ierror = 3;
2212 #else
2213       Plapack_ierror = 0;
2214 #endif
2215       ierr = PetscOptionsInt("-mat_plapack_ckerror","error checking flag","None",Plapack_ierror,&Plapack_ierror,PETSC_NULL);CHKERRQ(ierr);
2216       if (Plapack_ierror){
2217 	ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_TRUE,PETSC_TRUE,PETSC_FALSE );CHKERRQ(ierr);
2218       } else {
2219 	ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_FALSE,PETSC_FALSE,PETSC_FALSE );CHKERRQ(ierr);
2220       }
2221 
2222       Plapack_nb_alg = 0;
2223       ierr = PetscOptionsInt("-mat_plapack_nb_alg","algorithmic block size","None",Plapack_nb_alg,&Plapack_nb_alg,PETSC_NULL);CHKERRQ(ierr);
2224       if (Plapack_nb_alg) {
2225         ierr = pla_Environ_set_nb_alg (PLA_OP_ALL_ALG,Plapack_nb_alg);CHKERRQ(ierr);
2226       }
2227     PetscOptionsEnd();
2228 
2229     ierr = PLA_Comm_1D_to_2D(comm,Plapack_nprows,Plapack_npcols,&Plapack_comm_2d);CHKERRQ(ierr);
2230     ierr = PLA_Init(Plapack_comm_2d);CHKERRQ(ierr);
2231     ierr = PetscRegisterFinalize(PetscPLAPACKFinalizePackage);CHKERRQ(ierr);
2232   }
2233   PetscFunctionReturn(0);
2234 }
2235 
2236 #endif
2237