xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 7d0a6c19129e7069c8a40e210b34ed62989173db)
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4 */
5 
6 
7 #include "../src/mat/impls/dense/mpi/mpidense.h"    /*I   "petscmat.h"  I*/
8 #if defined(PETSC_HAVE_PLAPACK)
9 static PetscMPIInt Plapack_nprows,Plapack_npcols,Plapack_ierror,Plapack_nb_alg;
10 static MPI_Comm Plapack_comm_2d;
11 EXTERN_C_BEGIN
12 #include "PLA.h"
13 EXTERN_C_END
14 
15 typedef struct {
16   PLA_Obj        A,pivots;
17   PLA_Template   templ;
18   MPI_Datatype   datatype;
19   PetscInt       nb,rstart;
20   VecScatter     ctx;
21   IS             is_pla,is_petsc;
22   PetscBool      pla_solved;
23   MatStructure   mstruct;
24 } Mat_Plapack;
25 #endif
26 
27 #undef __FUNCT__
28 #define __FUNCT__ "MatDenseGetLocalMatrix"
29 /*@
30 
31       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
32               matrix that represents the operator. For sequential matrices it returns itself.
33 
34     Input Parameter:
35 .      A - the Seq or MPI dense matrix
36 
37     Output Parameter:
38 .      B - the inner matrix
39 
40     Level: intermediate
41 
42 @*/
43 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
44 {
45   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
46   PetscErrorCode ierr;
47   PetscBool      flg;
48 
49   PetscFunctionBegin;
50   ierr = PetscTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
51   if (flg) {
52     *B = mat->A;
53   } else {
54     *B = A;
55   }
56   PetscFunctionReturn(0);
57 }
58 
59 #undef __FUNCT__
60 #define __FUNCT__ "MatGetRow_MPIDense"
61 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
62 {
63   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
64   PetscErrorCode ierr;
65   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
66 
67   PetscFunctionBegin;
68   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
69   lrow = row - rstart;
70   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr);
71   PetscFunctionReturn(0);
72 }
73 
74 #undef __FUNCT__
75 #define __FUNCT__ "MatRestoreRow_MPIDense"
76 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
77 {
78   PetscErrorCode ierr;
79 
80   PetscFunctionBegin;
81   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
82   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
83   PetscFunctionReturn(0);
84 }
85 
86 EXTERN_C_BEGIN
87 #undef __FUNCT__
88 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense"
89 PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,PetscBool  *iscopy,MatReuse reuse,Mat *B)
90 {
91   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
92   PetscErrorCode ierr;
93   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
94   PetscScalar    *array;
95   MPI_Comm       comm;
96 
97   PetscFunctionBegin;
98   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
99 
100   /* The reuse aspect is not implemented efficiently */
101   if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);}
102 
103   ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
104   ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr);
105   ierr = MatCreate(comm,B);CHKERRQ(ierr);
106   ierr = MatSetSizes(*B,m,m,m,m);CHKERRQ(ierr);
107   ierr = MatSetType(*B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
108   ierr = MatSeqDenseSetPreallocation(*B,array+m*rstart);CHKERRQ(ierr);
109   ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr);
110   ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
111   ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
112 
113   *iscopy = PETSC_TRUE;
114   PetscFunctionReturn(0);
115 }
116 EXTERN_C_END
117 
118 #undef __FUNCT__
119 #define __FUNCT__ "MatSetValues_MPIDense"
120 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
121 {
122   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
123   PetscErrorCode ierr;
124   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
125   PetscBool      roworiented = A->roworiented;
126 
127   PetscFunctionBegin;
128   if (v) PetscValidScalarPointer(v,6);
129   for (i=0; i<m; i++) {
130     if (idxm[i] < 0) continue;
131     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
132     if (idxm[i] >= rstart && idxm[i] < rend) {
133       row = idxm[i] - rstart;
134       if (roworiented) {
135         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
136       } else {
137         for (j=0; j<n; j++) {
138           if (idxn[j] < 0) continue;
139           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
140           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
141         }
142       }
143     } else {
144       if (!A->donotstash) {
145         if (roworiented) {
146           ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
147         } else {
148           ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
149         }
150       }
151     }
152   }
153   PetscFunctionReturn(0);
154 }
155 
156 #undef __FUNCT__
157 #define __FUNCT__ "MatGetValues_MPIDense"
158 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
159 {
160   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
161   PetscErrorCode ierr;
162   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
163 
164   PetscFunctionBegin;
165   for (i=0; i<m; i++) {
166     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
167     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
168     if (idxm[i] >= rstart && idxm[i] < rend) {
169       row = idxm[i] - rstart;
170       for (j=0; j<n; j++) {
171         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
172         if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
173         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
174       }
175     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
176   }
177   PetscFunctionReturn(0);
178 }
179 
180 #undef __FUNCT__
181 #define __FUNCT__ "MatGetArray_MPIDense"
182 PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[])
183 {
184   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
185   PetscErrorCode ierr;
186 
187   PetscFunctionBegin;
188   ierr = MatGetArray(a->A,array);CHKERRQ(ierr);
189   PetscFunctionReturn(0);
190 }
191 
192 #undef __FUNCT__
193 #define __FUNCT__ "MatGetSubMatrix_MPIDense"
194 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
195 {
196   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data,*newmatd;
197   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
198   PetscErrorCode ierr;
199   PetscInt       i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
200   const PetscInt *irow,*icol;
201   PetscScalar    *av,*bv,*v = lmat->v;
202   Mat            newmat;
203   IS             iscol_local;
204 
205   PetscFunctionBegin;
206   ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
207   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
208   ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr);
209   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
210   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
211   ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */
212 
213   /* No parallel redistribution currently supported! Should really check each index set
214      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
215      original matrix! */
216 
217   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
218   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
219 
220   /* Check submatrix call */
221   if (scall == MAT_REUSE_MATRIX) {
222     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
223     /* Really need to test rows and column sizes! */
224     newmat = *B;
225   } else {
226     /* Create and fill new matrix */
227     ierr = MatCreate(((PetscObject)A)->comm,&newmat);CHKERRQ(ierr);
228     ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
229     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
230     ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr);
231   }
232 
233   /* Now extract the data pointers and do the copy, column at a time */
234   newmatd = (Mat_MPIDense*)newmat->data;
235   bv      = ((Mat_SeqDense *)newmatd->A->data)->v;
236 
237   for (i=0; i<Ncols; i++) {
238     av = v + ((Mat_SeqDense *)mat->A->data)->lda*icol[i];
239     for (j=0; j<nrows; j++) {
240       *bv++ = av[irow[j] - rstart];
241     }
242   }
243 
244   /* Assemble the matrices so that the correct flags are set */
245   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
246   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
247 
248   /* Free work space */
249   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
250   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
251   *B = newmat;
252   PetscFunctionReturn(0);
253 }
254 
255 #undef __FUNCT__
256 #define __FUNCT__ "MatRestoreArray_MPIDense"
257 PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[])
258 {
259   PetscFunctionBegin;
260   PetscFunctionReturn(0);
261 }
262 
263 #undef __FUNCT__
264 #define __FUNCT__ "MatAssemblyBegin_MPIDense"
265 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
266 {
267   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
268   MPI_Comm       comm = ((PetscObject)mat)->comm;
269   PetscErrorCode ierr;
270   PetscInt       nstash,reallocs;
271   InsertMode     addv;
272 
273   PetscFunctionBegin;
274   /* make sure all processors are either in INSERTMODE or ADDMODE */
275   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
276   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
277   mat->insertmode = addv; /* in case this processor had no cache */
278 
279   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
280   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
281   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
282   PetscFunctionReturn(0);
283 }
284 
285 #undef __FUNCT__
286 #define __FUNCT__ "MatAssemblyEnd_MPIDense"
287 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
288 {
289   Mat_MPIDense    *mdn=(Mat_MPIDense*)mat->data;
290   PetscErrorCode  ierr;
291   PetscInt        i,*row,*col,flg,j,rstart,ncols;
292   PetscMPIInt     n;
293   PetscScalar     *val;
294   InsertMode      addv=mat->insertmode;
295 
296   PetscFunctionBegin;
297   /*  wait on receives */
298   while (1) {
299     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
300     if (!flg) break;
301 
302     for (i=0; i<n;) {
303       /* Now identify the consecutive vals belonging to the same row */
304       for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
305       if (j < n) ncols = j-i;
306       else       ncols = n-i;
307       /* Now assemble all these values with a single function call */
308       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
309       i = j;
310     }
311   }
312   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
313 
314   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
315   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
316 
317   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
318     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
319   }
320   PetscFunctionReturn(0);
321 }
322 
323 #undef __FUNCT__
324 #define __FUNCT__ "MatZeroEntries_MPIDense"
325 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
326 {
327   PetscErrorCode ierr;
328   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
329 
330   PetscFunctionBegin;
331   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
332   PetscFunctionReturn(0);
333 }
334 
335 /* the code does not do the diagonal entries correctly unless the
336    matrix is square and the column and row owerships are identical.
337    This is a BUG. The only way to fix it seems to be to access
338    mdn->A and mdn->B directly and not through the MatZeroRows()
339    routine.
340 */
341 #undef __FUNCT__
342 #define __FUNCT__ "MatZeroRows_MPIDense"
343 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
344 {
345   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
346   PetscErrorCode    ierr;
347   PetscInt          i,*owners = A->rmap->range;
348   PetscInt          *nprocs,j,idx,nsends;
349   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
350   PetscInt          *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source;
351   PetscInt          *lens,*lrows,*values;
352   PetscMPIInt       n,imdex,rank = l->rank,size = l->size;
353   MPI_Comm          comm = ((PetscObject)A)->comm;
354   MPI_Request       *send_waits,*recv_waits;
355   MPI_Status        recv_status,*send_status;
356   PetscBool         found;
357   const PetscScalar *xx;
358   PetscScalar       *bb;
359 
360   PetscFunctionBegin;
361   /*  first count number of contributors to each processor */
362   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
363   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
364   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
365   for (i=0; i<N; i++) {
366     idx = rows[i];
367     found = PETSC_FALSE;
368     for (j=0; j<size; j++) {
369       if (idx >= owners[j] && idx < owners[j+1]) {
370         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
371       }
372     }
373     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
374   }
375   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
376 
377   /* inform other processors of number of messages and max length*/
378   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
379 
380   /* post receives:   */
381   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
382   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
383   for (i=0; i<nrecvs; i++) {
384     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
385   }
386 
387   /* do sends:
388       1) starts[i] gives the starting index in svalues for stuff going to
389          the ith processor
390   */
391   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
392   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
393   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
394   starts[0]  = 0;
395   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
396   for (i=0; i<N; i++) {
397     svalues[starts[owner[i]]++] = rows[i];
398   }
399 
400   starts[0] = 0;
401   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
402   count = 0;
403   for (i=0; i<size; i++) {
404     if (nprocs[2*i+1]) {
405       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
406     }
407   }
408   ierr = PetscFree(starts);CHKERRQ(ierr);
409 
410   base = owners[rank];
411 
412   /*  wait on receives */
413   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
414   count  = nrecvs;
415   slen   = 0;
416   while (count) {
417     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
418     /* unpack receives into our local space */
419     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
420     source[imdex]  = recv_status.MPI_SOURCE;
421     lens[imdex]    = n;
422     slen += n;
423     count--;
424   }
425   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
426 
427   /* move the data into the send scatter */
428   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
429   count = 0;
430   for (i=0; i<nrecvs; i++) {
431     values = rvalues + i*nmax;
432     for (j=0; j<lens[i]; j++) {
433       lrows[count++] = values[j] - base;
434     }
435   }
436   ierr = PetscFree(rvalues);CHKERRQ(ierr);
437   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
438   ierr = PetscFree(owner);CHKERRQ(ierr);
439   ierr = PetscFree(nprocs);CHKERRQ(ierr);
440 
441   /* fix right hand side if needed */
442   if (x && b) {
443     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
444     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
445     for (i=0; i<slen; i++) {
446       bb[lrows[i]] = diag*xx[lrows[i]];
447     }
448     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
449     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
450   }
451 
452   /* actually zap the local rows */
453   ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
454   ierr = PetscFree(lrows);CHKERRQ(ierr);
455 
456   /* wait on sends */
457   if (nsends) {
458     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
459     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
460     ierr = PetscFree(send_status);CHKERRQ(ierr);
461   }
462   ierr = PetscFree(send_waits);CHKERRQ(ierr);
463   ierr = PetscFree(svalues);CHKERRQ(ierr);
464 
465   PetscFunctionReturn(0);
466 }
467 
468 #undef __FUNCT__
469 #define __FUNCT__ "MatMult_MPIDense"
470 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
471 {
472   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
473   PetscErrorCode ierr;
474 
475   PetscFunctionBegin;
476   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
477   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
478   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
479   PetscFunctionReturn(0);
480 }
481 
482 #undef __FUNCT__
483 #define __FUNCT__ "MatMultAdd_MPIDense"
484 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
485 {
486   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
487   PetscErrorCode ierr;
488 
489   PetscFunctionBegin;
490   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
491   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
492   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
493   PetscFunctionReturn(0);
494 }
495 
496 #undef __FUNCT__
497 #define __FUNCT__ "MatMultTranspose_MPIDense"
498 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
499 {
500   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
501   PetscErrorCode ierr;
502   PetscScalar    zero = 0.0;
503 
504   PetscFunctionBegin;
505   ierr = VecSet(yy,zero);CHKERRQ(ierr);
506   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
507   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
508   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
509   PetscFunctionReturn(0);
510 }
511 
512 #undef __FUNCT__
513 #define __FUNCT__ "MatMultTransposeAdd_MPIDense"
514 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
515 {
516   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
517   PetscErrorCode ierr;
518 
519   PetscFunctionBegin;
520   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
521   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
522   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
523   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
524   PetscFunctionReturn(0);
525 }
526 
527 #undef __FUNCT__
528 #define __FUNCT__ "MatGetDiagonal_MPIDense"
529 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
530 {
531   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
532   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
533   PetscErrorCode ierr;
534   PetscInt       len,i,n,m = A->rmap->n,radd;
535   PetscScalar    *x,zero = 0.0;
536 
537   PetscFunctionBegin;
538   ierr = VecSet(v,zero);CHKERRQ(ierr);
539   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
540   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
541   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
542   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
543   radd = A->rmap->rstart*m;
544   for (i=0; i<len; i++) {
545     x[i] = aloc->v[radd + i*m + i];
546   }
547   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
548   PetscFunctionReturn(0);
549 }
550 
551 #undef __FUNCT__
552 #define __FUNCT__ "MatDestroy_MPIDense"
553 PetscErrorCode MatDestroy_MPIDense(Mat mat)
554 {
555   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
556   PetscErrorCode ierr;
557 #if defined(PETSC_HAVE_PLAPACK)
558   Mat_Plapack   *lu=(Mat_Plapack*)(mat->spptr);
559 #endif
560 
561   PetscFunctionBegin;
562 
563 #if defined(PETSC_USE_LOG)
564   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
565 #endif
566   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
567   ierr = MatDestroy(mdn->A);CHKERRQ(ierr);
568   if (mdn->lvec)   {ierr = VecDestroy(mdn->lvec);CHKERRQ(ierr);}
569   if (mdn->Mvctx)  {ierr = VecScatterDestroy(mdn->Mvctx);CHKERRQ(ierr);}
570 #if defined(PETSC_HAVE_PLAPACK)
571   if (lu) {
572     ierr = PLA_Obj_free(&lu->A);CHKERRQ(ierr);
573     ierr = PLA_Obj_free (&lu->pivots);CHKERRQ(ierr);
574     ierr = PLA_Temp_free(&lu->templ);CHKERRQ(ierr);
575 
576     if (lu->is_pla) {
577       ierr = ISDestroy(lu->is_pla);CHKERRQ(ierr);
578       ierr = ISDestroy(lu->is_petsc);CHKERRQ(ierr);
579       ierr = VecScatterDestroy(lu->ctx);CHKERRQ(ierr);
580     }
581   }
582 #endif
583 
584   ierr = PetscFree(mdn);CHKERRQ(ierr);
585   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
586   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
587   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
588   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
589   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
590   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
591   PetscFunctionReturn(0);
592 }
593 
594 #undef __FUNCT__
595 #define __FUNCT__ "MatView_MPIDense_Binary"
596 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
597 {
598   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
599   PetscErrorCode    ierr;
600   PetscViewerFormat format;
601   int               fd;
602   PetscInt          header[4],mmax,N = mat->cmap->N,i,j,m,k;
603   PetscMPIInt       rank,tag  = ((PetscObject)viewer)->tag,size;
604   PetscScalar       *work,*v,*vv;
605   Mat_SeqDense      *a = (Mat_SeqDense*)mdn->A->data;
606   MPI_Status        status;
607 
608   PetscFunctionBegin;
609   if (mdn->size == 1) {
610     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
611   } else {
612     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
613     ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
614     ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
615 
616     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
617     if (format == PETSC_VIEWER_NATIVE) {
618 
619       if (!rank) {
620         /* store the matrix as a dense matrix */
621         header[0] = MAT_FILE_CLASSID;
622         header[1] = mat->rmap->N;
623         header[2] = N;
624         header[3] = MATRIX_BINARY_FORMAT_DENSE;
625         ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
626 
627         /* get largest work array needed for transposing array */
628         mmax = mat->rmap->n;
629         for (i=1; i<size; i++) {
630           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
631         }
632         ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&work);CHKERRQ(ierr);
633 
634         /* write out local array, by rows */
635         m    = mat->rmap->n;
636         v    = a->v;
637         for (j=0; j<N; j++) {
638           for (i=0; i<m; i++) {
639             work[j + i*N] = *v++;
640           }
641         }
642         ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
643         /* get largest work array to receive messages from other processes, excludes process zero */
644         mmax = 0;
645         for (i=1; i<size; i++) {
646           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
647         }
648         ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&vv);CHKERRQ(ierr);
649         for(k = 1; k < size; k++) {
650           v    = vv;
651           m    = mat->rmap->range[k+1] - mat->rmap->range[k];
652           ierr = MPI_Recv(v,m*N,MPIU_SCALAR,k,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
653 
654           for(j = 0; j < N; j++) {
655             for(i = 0; i < m; i++) {
656               work[j + i*N] = *v++;
657             }
658           }
659           ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
660         }
661         ierr = PetscFree(work);CHKERRQ(ierr);
662         ierr = PetscFree(vv);CHKERRQ(ierr);
663       } else {
664         ierr = MPI_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
665       }
666     } else {
667       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerSetFormat(viewer,PETSC_VIEWER_NATIVE)");
668     }
669   }
670   PetscFunctionReturn(0);
671 }
672 
673 #undef __FUNCT__
674 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket"
675 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
676 {
677   Mat_MPIDense          *mdn = (Mat_MPIDense*)mat->data;
678   PetscErrorCode        ierr;
679   PetscMPIInt           size = mdn->size,rank = mdn->rank;
680   const PetscViewerType vtype;
681   PetscBool             iascii,isdraw;
682   PetscViewer           sviewer;
683   PetscViewerFormat     format;
684 #if defined(PETSC_HAVE_PLAPACK)
685   Mat_Plapack           *lu;
686 #endif
687 
688   PetscFunctionBegin;
689   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
690   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
691   if (iascii) {
692     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
693     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
694     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
695       MatInfo info;
696       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
697       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,
698                    (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
699       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
700 #if defined(PETSC_HAVE_PLAPACK)
701       ierr = PetscViewerASCIIPrintf(viewer,"PLAPACK run parameters:\n");CHKERRQ(ierr);
702       ierr = PetscViewerASCIIPrintf(viewer,"  Processor mesh: nprows %d, npcols %d\n",Plapack_nprows, Plapack_npcols);CHKERRQ(ierr);
703       ierr = PetscViewerASCIIPrintf(viewer,"  Error checking: %d\n",Plapack_ierror);CHKERRQ(ierr);
704       ierr = PetscViewerASCIIPrintf(viewer,"  Algorithmic block size: %d\n",Plapack_nb_alg);CHKERRQ(ierr);
705       if (mat->factortype){
706         lu=(Mat_Plapack*)(mat->spptr);
707         ierr = PetscViewerASCIIPrintf(viewer,"  Distr. block size nb: %d \n",lu->nb);CHKERRQ(ierr);
708       }
709 #else
710       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
711 #endif
712       PetscFunctionReturn(0);
713     } else if (format == PETSC_VIEWER_ASCII_INFO) {
714       PetscFunctionReturn(0);
715     }
716   } else if (isdraw) {
717     PetscDraw  draw;
718     PetscBool  isnull;
719 
720     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
721     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
722     if (isnull) PetscFunctionReturn(0);
723   }
724 
725   if (size == 1) {
726     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
727   } else {
728     /* assemble the entire matrix onto first processor. */
729     Mat         A;
730     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
731     PetscInt    *cols;
732     PetscScalar *vals;
733 
734     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
735     if (!rank) {
736       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
737     } else {
738       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
739     }
740     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
741     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
742     ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL);
743     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
744 
745     /* Copy the matrix ... This isn't the most efficient means,
746        but it's quick for now */
747     A->insertmode = INSERT_VALUES;
748     row = mat->rmap->rstart; m = mdn->A->rmap->n;
749     for (i=0; i<m; i++) {
750       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
751       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
752       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
753       row++;
754     }
755 
756     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
757     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
758     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
759     if (!rank) {
760       ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
761       /* Set the type name to MATMPIDense so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqDense_ASCII()*/
762       PetscStrcpy(((PetscObject)((Mat_MPIDense*)(A->data))->A)->type_name,MATMPIDENSE);
763       ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
764     }
765     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
766     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
767     ierr = MatDestroy(A);CHKERRQ(ierr);
768   }
769   PetscFunctionReturn(0);
770 }
771 
772 #undef __FUNCT__
773 #define __FUNCT__ "MatView_MPIDense"
774 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
775 {
776   PetscErrorCode ierr;
777   PetscBool      iascii,isbinary,isdraw,issocket;
778 
779   PetscFunctionBegin;
780 
781   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
782   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
783   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
784   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
785 
786   if (iascii || issocket || isdraw) {
787     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
788   } else if (isbinary) {
789     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
790   } else {
791     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name);
792   }
793   PetscFunctionReturn(0);
794 }
795 
796 #undef __FUNCT__
797 #define __FUNCT__ "MatGetInfo_MPIDense"
798 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
799 {
800   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
801   Mat            mdn = mat->A;
802   PetscErrorCode ierr;
803   PetscReal      isend[5],irecv[5];
804 
805   PetscFunctionBegin;
806   info->block_size     = 1.0;
807   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
808   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
809   isend[3] = info->memory;  isend[4] = info->mallocs;
810   if (flag == MAT_LOCAL) {
811     info->nz_used      = isend[0];
812     info->nz_allocated = isend[1];
813     info->nz_unneeded  = isend[2];
814     info->memory       = isend[3];
815     info->mallocs      = isend[4];
816   } else if (flag == MAT_GLOBAL_MAX) {
817     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr);
818     info->nz_used      = irecv[0];
819     info->nz_allocated = irecv[1];
820     info->nz_unneeded  = irecv[2];
821     info->memory       = irecv[3];
822     info->mallocs      = irecv[4];
823   } else if (flag == MAT_GLOBAL_SUM) {
824     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
825     info->nz_used      = irecv[0];
826     info->nz_allocated = irecv[1];
827     info->nz_unneeded  = irecv[2];
828     info->memory       = irecv[3];
829     info->mallocs      = irecv[4];
830   }
831   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
832   info->fill_ratio_needed = 0;
833   info->factor_mallocs    = 0;
834   PetscFunctionReturn(0);
835 }
836 
837 #undef __FUNCT__
838 #define __FUNCT__ "MatSetOption_MPIDense"
839 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool  flg)
840 {
841   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
842   PetscErrorCode ierr;
843 
844   PetscFunctionBegin;
845   switch (op) {
846   case MAT_NEW_NONZERO_LOCATIONS:
847   case MAT_NEW_NONZERO_LOCATION_ERR:
848   case MAT_NEW_NONZERO_ALLOCATION_ERR:
849     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
850     break;
851   case MAT_ROW_ORIENTED:
852     a->roworiented = flg;
853     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
854     break;
855   case MAT_NEW_DIAGONALS:
856   case MAT_USE_HASH_TABLE:
857     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
858     break;
859   case MAT_IGNORE_OFF_PROC_ENTRIES:
860     a->donotstash = flg;
861     break;
862   case MAT_SYMMETRIC:
863   case MAT_STRUCTURALLY_SYMMETRIC:
864   case MAT_HERMITIAN:
865   case MAT_SYMMETRY_ETERNAL:
866   case MAT_IGNORE_LOWER_TRIANGULAR:
867     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
868     break;
869   default:
870     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
871   }
872   PetscFunctionReturn(0);
873 }
874 
875 
876 #undef __FUNCT__
877 #define __FUNCT__ "MatDiagonalScale_MPIDense"
878 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
879 {
880   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
881   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
882   PetscScalar    *l,*r,x,*v;
883   PetscErrorCode ierr;
884   PetscInt       i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n;
885 
886   PetscFunctionBegin;
887   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
888   if (ll) {
889     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
890     if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
891     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
892     for (i=0; i<m; i++) {
893       x = l[i];
894       v = mat->v + i;
895       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
896     }
897     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
898     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
899   }
900   if (rr) {
901     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
902     if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
903     ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
904     ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
905     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
906     for (i=0; i<n; i++) {
907       x = r[i];
908       v = mat->v + i*m;
909       for (j=0; j<m; j++) { (*v++) *= x;}
910     }
911     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
912     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
913   }
914   PetscFunctionReturn(0);
915 }
916 
917 #undef __FUNCT__
918 #define __FUNCT__ "MatNorm_MPIDense"
919 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
920 {
921   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
922   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
923   PetscErrorCode ierr;
924   PetscInt       i,j;
925   PetscReal      sum = 0.0;
926   PetscScalar    *v = mat->v;
927 
928   PetscFunctionBegin;
929   if (mdn->size == 1) {
930     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
931   } else {
932     if (type == NORM_FROBENIUS) {
933       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
934 #if defined(PETSC_USE_COMPLEX)
935         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
936 #else
937         sum += (*v)*(*v); v++;
938 #endif
939       }
940       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
941       *nrm = sqrt(*nrm);
942       ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr);
943     } else if (type == NORM_1) {
944       PetscReal *tmp,*tmp2;
945       ierr = PetscMalloc2(A->cmap->N,PetscReal,&tmp,A->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr);
946       ierr = PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
947       ierr = PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
948       *nrm = 0.0;
949       v = mat->v;
950       for (j=0; j<mdn->A->cmap->n; j++) {
951         for (i=0; i<mdn->A->rmap->n; i++) {
952           tmp[j] += PetscAbsScalar(*v);  v++;
953         }
954       }
955       ierr = MPI_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
956       for (j=0; j<A->cmap->N; j++) {
957         if (tmp2[j] > *nrm) *nrm = tmp2[j];
958       }
959       ierr = PetscFree2(tmp,tmp);CHKERRQ(ierr);
960       ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr);
961     } else if (type == NORM_INFINITY) { /* max row norm */
962       PetscReal ntemp;
963       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
964       ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr);
965     } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"No support for two norm");
966   }
967   PetscFunctionReturn(0);
968 }
969 
970 #undef __FUNCT__
971 #define __FUNCT__ "MatTranspose_MPIDense"
972 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
973 {
974   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
975   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
976   Mat            B;
977   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
978   PetscErrorCode ierr;
979   PetscInt       j,i;
980   PetscScalar    *v;
981 
982   PetscFunctionBegin;
983   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports square matrix only in-place");
984   if (reuse == MAT_INITIAL_MATRIX || A == *matout) {
985     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
986     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
987     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
988     ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr);
989   } else {
990     B = *matout;
991   }
992 
993   m = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v;
994   ierr = PetscMalloc(m*sizeof(PetscInt),&rwork);CHKERRQ(ierr);
995   for (i=0; i<m; i++) rwork[i] = rstart + i;
996   for (j=0; j<n; j++) {
997     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
998     v   += m;
999   }
1000   ierr = PetscFree(rwork);CHKERRQ(ierr);
1001   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1002   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1003   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1004     *matout = B;
1005   } else {
1006     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
1007   }
1008   PetscFunctionReturn(0);
1009 }
1010 
1011 
1012 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
1013 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
1014 
1015 #undef __FUNCT__
1016 #define __FUNCT__ "MatSetUpPreallocation_MPIDense"
1017 PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A)
1018 {
1019   PetscErrorCode ierr;
1020 
1021   PetscFunctionBegin;
1022   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
1023   PetscFunctionReturn(0);
1024 }
1025 
1026 #if defined(PETSC_HAVE_PLAPACK)
1027 
1028 #undef __FUNCT__
1029 #define __FUNCT__ "MatMPIDenseCopyToPlapack"
1030 PetscErrorCode MatMPIDenseCopyToPlapack(Mat A,Mat F)
1031 {
1032   Mat_Plapack    *lu = (Mat_Plapack*)(F)->spptr;
1033   PetscErrorCode ierr;
1034   PetscInt       M=A->cmap->N,m=A->rmap->n,rstart;
1035   PetscScalar    *array;
1036   PetscReal      one = 1.0;
1037 
1038   PetscFunctionBegin;
1039   /* Copy A into F->lu->A */
1040   ierr = PLA_Obj_set_to_zero(lu->A);CHKERRQ(ierr);
1041   ierr = PLA_API_begin();CHKERRQ(ierr);
1042   ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr);
1043   ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr);
1044   ierr = MatGetArray(A,&array);CHKERRQ(ierr);
1045   ierr = PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);CHKERRQ(ierr);
1046   ierr = MatRestoreArray(A,&array);CHKERRQ(ierr);
1047   ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr);
1048   ierr = PLA_API_end();CHKERRQ(ierr);
1049   lu->rstart = rstart;
1050   PetscFunctionReturn(0);
1051 }
1052 
1053 #undef __FUNCT__
1054 #define __FUNCT__ "MatMPIDenseCopyFromPlapack"
1055 PetscErrorCode MatMPIDenseCopyFromPlapack(Mat F,Mat A)
1056 {
1057   Mat_Plapack    *lu = (Mat_Plapack*)(F)->spptr;
1058   PetscErrorCode ierr;
1059   PetscInt       M=A->cmap->N,m=A->rmap->n,rstart;
1060   PetscScalar    *array;
1061   PetscReal      one = 1.0;
1062 
1063   PetscFunctionBegin;
1064   /* Copy F into A->lu->A */
1065   ierr = MatZeroEntries(A);CHKERRQ(ierr);
1066   ierr = PLA_API_begin();CHKERRQ(ierr);
1067   ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr);
1068   ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr);
1069   ierr = MatGetArray(A,&array);CHKERRQ(ierr);
1070   ierr = PLA_API_axpy_global_to_matrix(m,M, &one,lu->A,rstart,0,(void *)array,m);CHKERRQ(ierr);
1071   ierr = MatRestoreArray(A,&array);CHKERRQ(ierr);
1072   ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr);
1073   ierr = PLA_API_end();CHKERRQ(ierr);
1074   lu->rstart = rstart;
1075   PetscFunctionReturn(0);
1076 }
1077 
1078 #undef __FUNCT__
1079 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIDense"
1080 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1081 {
1082   PetscErrorCode ierr;
1083   Mat_Plapack    *luA = (Mat_Plapack*)A->spptr;
1084   Mat_Plapack    *luB = (Mat_Plapack*)B->spptr;
1085   Mat_Plapack    *luC = (Mat_Plapack*)C->spptr;
1086   PLA_Obj        alpha = NULL,beta = NULL;
1087 
1088   PetscFunctionBegin;
1089   ierr = MatMPIDenseCopyToPlapack(A,A);CHKERRQ(ierr);
1090   ierr = MatMPIDenseCopyToPlapack(B,B);CHKERRQ(ierr);
1091 
1092   /*
1093   ierr = PLA_Global_show("A = ",luA->A,"%g ","");CHKERRQ(ierr);
1094   ierr = PLA_Global_show("B = ",luB->A,"%g ","");CHKERRQ(ierr);
1095   */
1096 
1097   /* do the multiply in PLA  */
1098   ierr = PLA_Create_constants_conf_to(luA->A,NULL,NULL,&alpha);CHKERRQ(ierr);
1099   ierr = PLA_Create_constants_conf_to(luC->A,NULL,&beta,NULL);CHKERRQ(ierr);
1100   CHKMEMQ;
1101 
1102   ierr = PLA_Gemm(PLA_NO_TRANSPOSE,PLA_NO_TRANSPOSE,alpha,luA->A,luB->A,beta,luC->A); /* CHKERRQ(ierr); */
1103   CHKMEMQ;
1104   ierr = PLA_Obj_free(&alpha);CHKERRQ(ierr);
1105   ierr = PLA_Obj_free(&beta);CHKERRQ(ierr);
1106 
1107   /*
1108   ierr = PLA_Global_show("C = ",luC->A,"%g ","");CHKERRQ(ierr);
1109   */
1110   ierr = MatMPIDenseCopyFromPlapack(C,C);CHKERRQ(ierr);
1111   PetscFunctionReturn(0);
1112 }
1113 
1114 #undef __FUNCT__
1115 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIDense"
1116 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1117 {
1118   PetscErrorCode ierr;
1119   PetscInt       m=A->rmap->n,n=B->cmap->n;
1120   Mat            Cmat;
1121 
1122   PetscFunctionBegin;
1123   if (A->cmap->n != B->rmap->n) SETERRQ2(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
1124   SETERRQ(((PetscObject)A)->comm,PETSC_ERR_LIB,"Due to apparent bugs in PLAPACK,this is not currently supported");
1125   ierr = MatCreate(((PetscObject)B)->comm,&Cmat);CHKERRQ(ierr);
1126   ierr = MatSetSizes(Cmat,m,n,A->rmap->N,B->cmap->N);CHKERRQ(ierr);
1127   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
1128   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1129   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1130 
1131   *C = Cmat;
1132   PetscFunctionReturn(0);
1133 }
1134 
1135 #undef __FUNCT__
1136 #define __FUNCT__ "MatMatMult_MPIDense_MPIDense"
1137 PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
1138 {
1139   PetscErrorCode ierr;
1140 
1141   PetscFunctionBegin;
1142   if (scall == MAT_INITIAL_MATRIX){
1143     ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr);
1144   }
1145   ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr);
1146   PetscFunctionReturn(0);
1147 }
1148 
1149 #undef __FUNCT__
1150 #define __FUNCT__ "MatSolve_MPIDense"
1151 PetscErrorCode MatSolve_MPIDense(Mat A,Vec b,Vec x)
1152 {
1153   MPI_Comm       comm = ((PetscObject)A)->comm;
1154   Mat_Plapack    *lu = (Mat_Plapack*)A->spptr;
1155   PetscErrorCode ierr;
1156   PetscInt       M=A->rmap->N,m=A->rmap->n,rstart,i,j,*idx_pla,*idx_petsc,loc_m,loc_stride;
1157   PetscScalar    *array;
1158   PetscReal      one = 1.0;
1159   PetscMPIInt    size,rank,r_rank,r_nproc,c_rank,c_nproc;;
1160   PLA_Obj        v_pla = NULL;
1161   PetscScalar    *loc_buf;
1162   Vec            loc_x;
1163 
1164   PetscFunctionBegin;
1165   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1166   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1167 
1168   /* Create PLAPACK vector objects, then copy b into PLAPACK b */
1169   PLA_Mvector_create(lu->datatype,M,1,lu->templ,PLA_ALIGN_FIRST,&v_pla);
1170   PLA_Obj_set_to_zero(v_pla);
1171 
1172   /* Copy b into rhs_pla */
1173   PLA_API_begin();
1174   PLA_Obj_API_open(v_pla);
1175   ierr = VecGetArray(b,&array);CHKERRQ(ierr);
1176   PLA_API_axpy_vector_to_global(m,&one,(void *)array,1,v_pla,lu->rstart);
1177   ierr = VecRestoreArray(b,&array);CHKERRQ(ierr);
1178   PLA_Obj_API_close(v_pla);
1179   PLA_API_end();
1180 
1181   if (A->factortype == MAT_FACTOR_LU){
1182     /* Apply the permutations to the right hand sides */
1183     PLA_Apply_pivots_to_rows (v_pla,lu->pivots);
1184 
1185     /* Solve L y = b, overwriting b with y */
1186     PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_UNIT_DIAG,lu->A,v_pla );
1187 
1188     /* Solve U x = y (=b), overwriting b with x */
1189     PLA_Trsv( PLA_UPPER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla );
1190   } else { /* MAT_FACTOR_CHOLESKY */
1191     PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla);
1192     PLA_Trsv( PLA_LOWER_TRIANGULAR,(lu->datatype == MPI_DOUBLE ? PLA_TRANSPOSE : PLA_CONJUGATE_TRANSPOSE),
1193                                     PLA_NONUNIT_DIAG,lu->A,v_pla);
1194   }
1195 
1196   /* Copy PLAPACK x into Petsc vector x  */
1197   PLA_Obj_local_length(v_pla, &loc_m);
1198   PLA_Obj_local_buffer(v_pla, (void**)&loc_buf);
1199   PLA_Obj_local_stride(v_pla, &loc_stride);
1200   /*
1201     PetscPrintf(PETSC_COMM_SELF," [%d] b - local_m %d local_stride %d, loc_buf: %g %g, nb: %d\n",rank,loc_m,loc_stride,loc_buf[0],loc_buf[(loc_m-1)*loc_stride],lu->nb);
1202   */
1203   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,loc_m*loc_stride,loc_buf,&loc_x);CHKERRQ(ierr);
1204   if (!lu->pla_solved){
1205 
1206     PLA_Temp_comm_row_info(lu->templ,&Plapack_comm_2d,&r_rank,&r_nproc);
1207     PLA_Temp_comm_col_info(lu->templ,&Plapack_comm_2d,&c_rank,&c_nproc);
1208 
1209     /* Create IS and cts for VecScatterring */
1210     PLA_Obj_local_length(v_pla, &loc_m);
1211     PLA_Obj_local_stride(v_pla, &loc_stride);
1212     ierr = PetscMalloc2(loc_m,PetscInt,&idx_pla,loc_m,PetscInt,&idx_petsc);CHKERRQ(ierr);
1213 
1214     rstart = (r_rank*c_nproc+c_rank)*lu->nb;
1215     for (i=0; i<loc_m; i+=lu->nb){
1216       j = 0;
1217       while (j < lu->nb && i+j < loc_m){
1218         idx_petsc[i+j] = rstart + j; j++;
1219       }
1220       rstart += size*lu->nb;
1221     }
1222 
1223     for (i=0; i<loc_m; i++) idx_pla[i] = i*loc_stride;
1224 
1225     ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_pla,PETSC_COPY_VALUES,&lu->is_pla);CHKERRQ(ierr);
1226     ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_petsc,PETSC_COPY_VALUES,&lu->is_petsc);CHKERRQ(ierr);
1227     ierr = PetscFree2(idx_pla,idx_petsc);CHKERRQ(ierr);
1228     ierr = VecScatterCreate(loc_x,lu->is_pla,x,lu->is_petsc,&lu->ctx);CHKERRQ(ierr);
1229   }
1230   ierr = VecScatterBegin(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1231   ierr = VecScatterEnd(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1232 
1233   /* Free data */
1234   ierr = VecDestroy(loc_x);CHKERRQ(ierr);
1235   PLA_Obj_free(&v_pla);
1236 
1237   lu->pla_solved = PETSC_TRUE;
1238   PetscFunctionReturn(0);
1239 }
1240 
1241 #undef __FUNCT__
1242 #define __FUNCT__ "MatLUFactorNumeric_MPIDense"
1243 PetscErrorCode MatLUFactorNumeric_MPIDense(Mat F,Mat A,const MatFactorInfo *info)
1244 {
1245   Mat_Plapack    *lu = (Mat_Plapack*)(F)->spptr;
1246   PetscErrorCode ierr;
1247   PetscInt       M=A->rmap->N,m=A->rmap->n,rstart,rend;
1248   PetscInt       info_pla=0;
1249   PetscScalar    *array,one = 1.0;
1250 
1251   PetscFunctionBegin;
1252   if (lu->mstruct == SAME_NONZERO_PATTERN){
1253     PLA_Obj_free(&lu->A);
1254     PLA_Obj_free (&lu->pivots);
1255   }
1256   /* Create PLAPACK matrix object */
1257   lu->A = NULL; lu->pivots = NULL;
1258   PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);
1259   PLA_Obj_set_to_zero(lu->A);
1260   PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots);
1261 
1262   /* Copy A into lu->A */
1263   PLA_API_begin();
1264   PLA_Obj_API_open(lu->A);
1265   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
1266   ierr = MatGetArray(A,&array);CHKERRQ(ierr);
1267   PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);
1268   ierr = MatRestoreArray(A,&array);CHKERRQ(ierr);
1269   PLA_Obj_API_close(lu->A);
1270   PLA_API_end();
1271 
1272   /* Factor P A -> L U overwriting lower triangular portion of A with L, upper, U */
1273   info_pla = PLA_LU(lu->A,lu->pivots);
1274   if (info_pla != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot encountered at row %d from PLA_LU()",info_pla);
1275 
1276   lu->rstart         = rstart;
1277   lu->mstruct        = SAME_NONZERO_PATTERN;
1278   F->ops->solve      = MatSolve_MPIDense;
1279   F->assembled       = PETSC_TRUE;  /* required by -ksp_view */
1280   PetscFunctionReturn(0);
1281 }
1282 
1283 #undef __FUNCT__
1284 #define __FUNCT__ "MatCholeskyFactorNumeric_MPIDense"
1285 PetscErrorCode MatCholeskyFactorNumeric_MPIDense(Mat F,Mat A,const MatFactorInfo *info)
1286 {
1287   Mat_Plapack    *lu = (Mat_Plapack*)F->spptr;
1288   PetscErrorCode ierr;
1289   PetscInt       M=A->rmap->N,m=A->rmap->n,rstart,rend;
1290   PetscInt       info_pla=0;
1291   PetscScalar    *array,one = 1.0;
1292 
1293   PetscFunctionBegin;
1294   if (lu->mstruct == SAME_NONZERO_PATTERN){
1295     PLA_Obj_free(&lu->A);
1296   }
1297   /* Create PLAPACK matrix object */
1298   lu->A      = NULL;
1299   lu->pivots = NULL;
1300   PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);
1301 
1302   /* Copy A into lu->A */
1303   PLA_API_begin();
1304   PLA_Obj_API_open(lu->A);
1305   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
1306   ierr = MatGetArray(A,&array);CHKERRQ(ierr);
1307   PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);
1308   ierr = MatRestoreArray(A,&array);CHKERRQ(ierr);
1309   PLA_Obj_API_close(lu->A);
1310   PLA_API_end();
1311 
1312   /* Factor P A -> Chol */
1313   info_pla = PLA_Chol(PLA_LOWER_TRIANGULAR,lu->A);
1314   if (info_pla != 0) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT,"Nonpositive definite matrix detected at row %d from PLA_Chol()",info_pla);
1315 
1316   lu->rstart         = rstart;
1317   lu->mstruct        = SAME_NONZERO_PATTERN;
1318   F->ops->solve      = MatSolve_MPIDense;
1319   F->assembled       = PETSC_TRUE;  /* required by -ksp_view */
1320   PetscFunctionReturn(0);
1321 }
1322 
1323 /* Note the Petsc perm permutation is ignored */
1324 #undef __FUNCT__
1325 #define __FUNCT__ "MatCholeskyFactorSymbolic_MPIDense"
1326 PetscErrorCode MatCholeskyFactorSymbolic_MPIDense(Mat F,Mat A,IS perm,const MatFactorInfo *info)
1327 {
1328   PetscErrorCode ierr;
1329   PetscBool      issymmetric,set;
1330 
1331   PetscFunctionBegin;
1332   ierr = MatIsSymmetricKnown(A,&set,&issymmetric);CHKERRQ(ierr);
1333   if (!set || !issymmetric) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_USER,"Matrix must be set as MAT_SYMMETRIC for CholeskyFactor()");
1334   F->ops->choleskyfactornumeric  = MatCholeskyFactorNumeric_MPIDense;
1335   PetscFunctionReturn(0);
1336 }
1337 
1338 /* Note the Petsc r and c permutations are ignored */
1339 #undef __FUNCT__
1340 #define __FUNCT__ "MatLUFactorSymbolic_MPIDense"
1341 PetscErrorCode MatLUFactorSymbolic_MPIDense(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
1342 {
1343   PetscErrorCode ierr;
1344   PetscInt       M = A->rmap->N;
1345   Mat_Plapack    *lu;
1346 
1347   PetscFunctionBegin;
1348   lu = (Mat_Plapack*)F->spptr;
1349   ierr = PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots);CHKERRQ(ierr);
1350   F->ops->lufactornumeric  = MatLUFactorNumeric_MPIDense;
1351   PetscFunctionReturn(0);
1352 }
1353 
1354 EXTERN_C_BEGIN
1355 #undef __FUNCT__
1356 #define __FUNCT__ "MatFactorGetSolverPackage_mpidense_plapack"
1357 PetscErrorCode MatFactorGetSolverPackage_mpidense_plapack(Mat A,const MatSolverPackage *type)
1358 {
1359   PetscFunctionBegin;
1360   *type = MATSOLVERPLAPACK;
1361   PetscFunctionReturn(0);
1362 }
1363 EXTERN_C_END
1364 
1365 EXTERN_C_BEGIN
1366 #undef __FUNCT__
1367 #define __FUNCT__ "MatGetFactor_mpidense_plapack"
1368 PetscErrorCode MatGetFactor_mpidense_plapack(Mat A,MatFactorType ftype,Mat *F)
1369 {
1370   PetscErrorCode ierr;
1371   Mat_Plapack    *lu;
1372   PetscMPIInt    size;
1373   PetscInt       M=A->rmap->N;
1374 
1375   PetscFunctionBegin;
1376   /* Create the factorization matrix */
1377   ierr = MatCreate(((PetscObject)A)->comm,F);CHKERRQ(ierr);
1378   ierr = MatSetSizes(*F,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1379   ierr = MatSetType(*F,((PetscObject)A)->type_name);CHKERRQ(ierr);
1380   ierr = PetscNewLog(*F,Mat_Plapack,&lu);CHKERRQ(ierr);
1381   (*F)->spptr = (void*)lu;
1382 
1383   /* Set default Plapack parameters */
1384   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
1385   lu->nb = M/size;
1386   if (M - lu->nb*size) lu->nb++; /* without cyclic distribution */
1387 
1388   /* Set runtime options */
1389   ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"PLAPACK Options","Mat");CHKERRQ(ierr);
1390     ierr = PetscOptionsInt("-mat_plapack_nb","block size of template vector","None",lu->nb,&lu->nb,PETSC_NULL);CHKERRQ(ierr);
1391   PetscOptionsEnd();
1392 
1393   /* Create object distribution template */
1394   lu->templ = NULL;
1395   ierr = PLA_Temp_create(lu->nb, 0, &lu->templ);CHKERRQ(ierr);
1396 
1397   /* Set the datatype */
1398 #if defined(PETSC_USE_COMPLEX)
1399   lu->datatype = MPI_DOUBLE_COMPLEX;
1400 #else
1401   lu->datatype = MPI_DOUBLE;
1402 #endif
1403 
1404   ierr = PLA_Matrix_create(lu->datatype,M,A->cmap->N,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);CHKERRQ(ierr);
1405 
1406 
1407   lu->pla_solved     = PETSC_FALSE; /* MatSolve_Plapack() is called yet */
1408   lu->mstruct        = DIFFERENT_NONZERO_PATTERN;
1409 
1410   if (ftype == MAT_FACTOR_LU) {
1411     (*F)->ops->lufactorsymbolic = MatLUFactorSymbolic_MPIDense;
1412   } else if (ftype == MAT_FACTOR_CHOLESKY) {
1413     (*F)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MPIDense;
1414   } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"No incomplete factorizations for dense matrices");
1415   (*F)->factortype = ftype;
1416   ierr = PetscObjectComposeFunctionDynamic((PetscObject)(*F),"MatFactorGetSolverPackage_C","MatFactorGetSolverPackage_mpidense_plapack",MatFactorGetSolverPackage_mpidense_plapack);CHKERRQ(ierr);
1417   PetscFunctionReturn(0);
1418 }
1419 EXTERN_C_END
1420 #endif
1421 
1422 #undef __FUNCT__
1423 #define __FUNCT__ "MatGetFactor_mpidense_petsc"
1424 PetscErrorCode MatGetFactor_mpidense_petsc(Mat A,MatFactorType ftype,Mat *F)
1425 {
1426 #if defined(PETSC_HAVE_PLAPACK)
1427   PetscErrorCode ierr;
1428 #endif
1429 
1430   PetscFunctionBegin;
1431 #if defined(PETSC_HAVE_PLAPACK)
1432   ierr = MatGetFactor_mpidense_plapack(A,ftype,F);CHKERRQ(ierr);
1433 #else
1434   SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix format %s uses PLAPACK direct solver. Install PLAPACK",((PetscObject)A)->type_name);
1435 #endif
1436   PetscFunctionReturn(0);
1437 }
1438 
1439 #undef __FUNCT__
1440 #define __FUNCT__ "MatAXPY_MPIDense"
1441 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
1442 {
1443   PetscErrorCode ierr;
1444   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
1445 
1446   PetscFunctionBegin;
1447   ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr);
1448   PetscFunctionReturn(0);
1449 }
1450 
1451 #undef __FUNCT__
1452 #define __FUNCT__ "MatConjugate_MPIDense"
1453 PetscErrorCode  MatConjugate_MPIDense(Mat mat)
1454 {
1455   Mat_MPIDense   *a = (Mat_MPIDense *)mat->data;
1456   PetscErrorCode ierr;
1457 
1458   PetscFunctionBegin;
1459   ierr = MatConjugate(a->A);CHKERRQ(ierr);
1460   PetscFunctionReturn(0);
1461 }
1462 
1463 #undef __FUNCT__
1464 #define __FUNCT__ "MatRealPart_MPIDense"
1465 PetscErrorCode MatRealPart_MPIDense(Mat A)
1466 {
1467   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1468   PetscErrorCode ierr;
1469 
1470   PetscFunctionBegin;
1471   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1472   PetscFunctionReturn(0);
1473 }
1474 
1475 #undef __FUNCT__
1476 #define __FUNCT__ "MatImaginaryPart_MPIDense"
1477 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1478 {
1479   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1480   PetscErrorCode ierr;
1481 
1482   PetscFunctionBegin;
1483   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1484   PetscFunctionReturn(0);
1485 }
1486 
1487 /* -------------------------------------------------------------------*/
1488 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
1489        MatGetRow_MPIDense,
1490        MatRestoreRow_MPIDense,
1491        MatMult_MPIDense,
1492 /* 4*/ MatMultAdd_MPIDense,
1493        MatMultTranspose_MPIDense,
1494        MatMultTransposeAdd_MPIDense,
1495        0,
1496        0,
1497        0,
1498 /*10*/ 0,
1499        0,
1500        0,
1501        0,
1502        MatTranspose_MPIDense,
1503 /*15*/ MatGetInfo_MPIDense,
1504        MatEqual_MPIDense,
1505        MatGetDiagonal_MPIDense,
1506        MatDiagonalScale_MPIDense,
1507        MatNorm_MPIDense,
1508 /*20*/ MatAssemblyBegin_MPIDense,
1509        MatAssemblyEnd_MPIDense,
1510        MatSetOption_MPIDense,
1511        MatZeroEntries_MPIDense,
1512 /*24*/ MatZeroRows_MPIDense,
1513        0,
1514        0,
1515        0,
1516        0,
1517 /*29*/ MatSetUpPreallocation_MPIDense,
1518        0,
1519        0,
1520        MatGetArray_MPIDense,
1521        MatRestoreArray_MPIDense,
1522 /*34*/ MatDuplicate_MPIDense,
1523        0,
1524        0,
1525        0,
1526        0,
1527 /*39*/ MatAXPY_MPIDense,
1528        MatGetSubMatrices_MPIDense,
1529        0,
1530        MatGetValues_MPIDense,
1531        0,
1532 /*44*/ 0,
1533        MatScale_MPIDense,
1534        0,
1535        0,
1536        0,
1537 /*49*/ 0,
1538        0,
1539        0,
1540        0,
1541        0,
1542 /*54*/ 0,
1543        0,
1544        0,
1545        0,
1546        0,
1547 /*59*/ MatGetSubMatrix_MPIDense,
1548        MatDestroy_MPIDense,
1549        MatView_MPIDense,
1550        0,
1551        0,
1552 /*64*/ 0,
1553        0,
1554        0,
1555        0,
1556        0,
1557 /*69*/ 0,
1558        0,
1559        0,
1560        0,
1561        0,
1562 /*74*/ 0,
1563        0,
1564        0,
1565        0,
1566        0,
1567 /*79*/ 0,
1568        0,
1569        0,
1570        0,
1571 /*83*/ MatLoad_MPIDense,
1572        0,
1573        0,
1574        0,
1575        0,
1576        0,
1577 /*89*/
1578 #if defined(PETSC_HAVE_PLAPACK)
1579        MatMatMult_MPIDense_MPIDense,
1580        MatMatMultSymbolic_MPIDense_MPIDense,
1581        MatMatMultNumeric_MPIDense_MPIDense,
1582 #else
1583        0,
1584        0,
1585        0,
1586 #endif
1587        0,
1588        0,
1589 /*94*/ 0,
1590        0,
1591        0,
1592        0,
1593        0,
1594 /*99*/ 0,
1595        0,
1596        0,
1597        MatConjugate_MPIDense,
1598        0,
1599 /*104*/0,
1600        MatRealPart_MPIDense,
1601        MatImaginaryPart_MPIDense,
1602        0,
1603        0,
1604 /*109*/0,
1605        0,
1606        0,
1607        0,
1608        0,
1609 /*114*/0,
1610        0,
1611        0,
1612        0,
1613        0,
1614 /*119*/0,
1615        0,
1616        0,
1617        0
1618 };
1619 
1620 EXTERN_C_BEGIN
1621 #undef __FUNCT__
1622 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense"
1623 PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1624 {
1625   Mat_MPIDense   *a;
1626   PetscErrorCode ierr;
1627 
1628   PetscFunctionBegin;
1629   mat->preallocated = PETSC_TRUE;
1630   /* Note:  For now, when data is specified above, this assumes the user correctly
1631    allocates the local dense storage space.  We should add error checking. */
1632 
1633   a    = (Mat_MPIDense*)mat->data;
1634   ierr = PetscLayoutSetBlockSize(mat->rmap,1);CHKERRQ(ierr);
1635   ierr = PetscLayoutSetBlockSize(mat->cmap,1);CHKERRQ(ierr);
1636   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
1637   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
1638   a->nvec = mat->cmap->n;
1639 
1640   ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1641   ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr);
1642   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
1643   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1644   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1645   PetscFunctionReturn(0);
1646 }
1647 EXTERN_C_END
1648 
1649 /*MC
1650    MATSOLVERPLAPACK = "mpidense" - Parallel LU and Cholesky factorization for MATMPIDENSE matrices
1651 
1652   run ./configure with the option --download-plapack
1653 
1654 
1655   Options Database Keys:
1656 . -mat_plapack_nprows <n> - number of rows in processor partition
1657 . -mat_plapack_npcols <n> - number of columns in processor partition
1658 . -mat_plapack_nb <n> - block size of template vector
1659 . -mat_plapack_nb_alg <n> - algorithmic block size
1660 - -mat_plapack_ckerror <n> - error checking flag
1661 
1662    Level: intermediate
1663 
1664 .seealso: MatCreateMPIDense(), MATDENSE, MATSEQDENSE, PCFactorSetSolverPackage(), MatSolverPackage
1665 
1666 M*/
1667 
1668 EXTERN_C_BEGIN
1669 #undef __FUNCT__
1670 #define __FUNCT__ "MatCreate_MPIDense"
1671 PetscErrorCode  MatCreate_MPIDense(Mat mat)
1672 {
1673   Mat_MPIDense   *a;
1674   PetscErrorCode ierr;
1675 
1676   PetscFunctionBegin;
1677   ierr              = PetscNewLog(mat,Mat_MPIDense,&a);CHKERRQ(ierr);
1678   mat->data         = (void*)a;
1679   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1680 
1681   mat->insertmode = NOT_SET_VALUES;
1682   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&a->rank);CHKERRQ(ierr);
1683   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&a->size);CHKERRQ(ierr);
1684 
1685   /* build cache for off array entries formed */
1686   a->donotstash = PETSC_FALSE;
1687   ierr = MatStashCreate_Private(((PetscObject)mat)->comm,1,&mat->stash);CHKERRQ(ierr);
1688 
1689   /* stuff used for matrix vector multiply */
1690   a->lvec        = 0;
1691   a->Mvctx       = 0;
1692   a->roworiented = PETSC_TRUE;
1693 
1694   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
1695                                      "MatGetDiagonalBlock_MPIDense",
1696                                      MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr);
1697   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C",
1698                                      "MatMPIDenseSetPreallocation_MPIDense",
1699                                      MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1700   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",
1701                                      "MatMatMult_MPIAIJ_MPIDense",
1702                                       MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1703   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",
1704                                      "MatMatMultSymbolic_MPIAIJ_MPIDense",
1705                                       MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1706   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",
1707                                      "MatMatMultNumeric_MPIAIJ_MPIDense",
1708                                       MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1709   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetFactor_petsc_C",
1710                                      "MatGetFactor_mpidense_petsc",
1711                                       MatGetFactor_mpidense_petsc);CHKERRQ(ierr);
1712 #if defined(PETSC_HAVE_PLAPACK)
1713   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetFactor_plapack_C",
1714                                      "MatGetFactor_mpidense_plapack",
1715                                       MatGetFactor_mpidense_plapack);CHKERRQ(ierr);
1716   ierr = PetscPLAPACKInitializePackage(((PetscObject)mat)->comm);CHKERRQ(ierr);
1717 #endif
1718   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1719 
1720   PetscFunctionReturn(0);
1721 }
1722 EXTERN_C_END
1723 
1724 /*MC
1725    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1726 
1727    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1728    and MATMPIDENSE otherwise.
1729 
1730    Options Database Keys:
1731 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1732 
1733   Level: beginner
1734 
1735 
1736 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1737 M*/
1738 
1739 EXTERN_C_BEGIN
1740 #undef __FUNCT__
1741 #define __FUNCT__ "MatCreate_Dense"
1742 PetscErrorCode  MatCreate_Dense(Mat A)
1743 {
1744   PetscErrorCode ierr;
1745   PetscMPIInt    size;
1746 
1747   PetscFunctionBegin;
1748   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
1749   if (size == 1) {
1750     ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr);
1751   } else {
1752     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
1753   }
1754   PetscFunctionReturn(0);
1755 }
1756 EXTERN_C_END
1757 
1758 #undef __FUNCT__
1759 #define __FUNCT__ "MatMPIDenseSetPreallocation"
1760 /*@C
1761    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1762 
1763    Not collective
1764 
1765    Input Parameters:
1766 .  A - the matrix
1767 -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
1768    to control all matrix memory allocation.
1769 
1770    Notes:
1771    The dense format is fully compatible with standard Fortran 77
1772    storage by columns.
1773 
1774    The data input variable is intended primarily for Fortran programmers
1775    who wish to allocate their own matrix memory space.  Most users should
1776    set data=PETSC_NULL.
1777 
1778    Level: intermediate
1779 
1780 .keywords: matrix,dense, parallel
1781 
1782 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1783 @*/
1784 PetscErrorCode  MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data)
1785 {
1786   PetscErrorCode ierr;
1787 
1788   PetscFunctionBegin;
1789   ierr = PetscTryMethod(mat,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar *),(mat,data));CHKERRQ(ierr);
1790   PetscFunctionReturn(0);
1791 }
1792 
1793 #undef __FUNCT__
1794 #define __FUNCT__ "MatCreateMPIDense"
1795 /*@C
1796    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
1797 
1798    Collective on MPI_Comm
1799 
1800    Input Parameters:
1801 +  comm - MPI communicator
1802 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1803 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1804 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1805 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1806 -  data - optional location of matrix data.  Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1807    to control all matrix memory allocation.
1808 
1809    Output Parameter:
1810 .  A - the matrix
1811 
1812    Notes:
1813    The dense format is fully compatible with standard Fortran 77
1814    storage by columns.
1815 
1816    The data input variable is intended primarily for Fortran programmers
1817    who wish to allocate their own matrix memory space.  Most users should
1818    set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users).
1819 
1820    The user MUST specify either the local or global matrix dimensions
1821    (possibly both).
1822 
1823    Level: intermediate
1824 
1825 .keywords: matrix,dense, parallel
1826 
1827 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1828 @*/
1829 PetscErrorCode  MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1830 {
1831   PetscErrorCode ierr;
1832   PetscMPIInt    size;
1833 
1834   PetscFunctionBegin;
1835   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1836   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1837   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1838   if (size > 1) {
1839     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1840     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1841   } else {
1842     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1843     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1844   }
1845   PetscFunctionReturn(0);
1846 }
1847 
1848 #undef __FUNCT__
1849 #define __FUNCT__ "MatDuplicate_MPIDense"
1850 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1851 {
1852   Mat            mat;
1853   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
1854   PetscErrorCode ierr;
1855 
1856   PetscFunctionBegin;
1857   *newmat       = 0;
1858   ierr = MatCreate(((PetscObject)A)->comm,&mat);CHKERRQ(ierr);
1859   ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1860   ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1861   a                 = (Mat_MPIDense*)mat->data;
1862   ierr              = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1863 
1864   mat->factortype   = A->factortype;
1865   mat->assembled    = PETSC_TRUE;
1866   mat->preallocated = PETSC_TRUE;
1867 
1868   a->size           = oldmat->size;
1869   a->rank           = oldmat->rank;
1870   mat->insertmode   = NOT_SET_VALUES;
1871   a->nvec           = oldmat->nvec;
1872   a->donotstash     = oldmat->donotstash;
1873 
1874   ierr = PetscLayoutCopy(A->rmap,&mat->rmap);CHKERRQ(ierr);
1875   ierr = PetscLayoutCopy(A->cmap,&mat->cmap);CHKERRQ(ierr);
1876 
1877   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1878   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1879   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1880 
1881   *newmat = mat;
1882   PetscFunctionReturn(0);
1883 }
1884 
1885 #undef __FUNCT__
1886 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile"
1887 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat,PetscInt sizesset)
1888 {
1889   PetscErrorCode ierr;
1890   PetscMPIInt    rank,size;
1891   PetscInt       *rowners,i,m,nz,j;
1892   PetscScalar    *array,*vals,*vals_ptr;
1893   MPI_Status     status;
1894 
1895   PetscFunctionBegin;
1896   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1897   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1898 
1899   /* determine ownership of all rows */
1900   if (newmat->rmap->n < 0) m          = M/size + ((M % size) > rank);
1901   else m = newmat->rmap->n;
1902   ierr       = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1903   ierr       = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
1904   rowners[0] = 0;
1905   for (i=2; i<=size; i++) {
1906     rowners[i] += rowners[i-1];
1907   }
1908 
1909   if (!sizesset) {
1910     ierr = MatSetSizes(newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1911   }
1912   ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr);
1913   ierr = MatGetArray(newmat,&array);CHKERRQ(ierr);
1914 
1915   if (!rank) {
1916     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1917 
1918     /* read in my part of the matrix numerical values  */
1919     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1920 
1921     /* insert into matrix-by row (this is why cannot directly read into array */
1922     vals_ptr = vals;
1923     for (i=0; i<m; i++) {
1924       for (j=0; j<N; j++) {
1925         array[i + j*m] = *vals_ptr++;
1926       }
1927     }
1928 
1929     /* read in other processors and ship out */
1930     for (i=1; i<size; i++) {
1931       nz   = (rowners[i+1] - rowners[i])*N;
1932       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1933       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr);
1934     }
1935   } else {
1936     /* receive numeric values */
1937     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1938 
1939     /* receive message of values*/
1940     ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm,&status);CHKERRQ(ierr);
1941 
1942     /* insert into matrix-by row (this is why cannot directly read into array */
1943     vals_ptr = vals;
1944     for (i=0; i<m; i++) {
1945       for (j=0; j<N; j++) {
1946         array[i + j*m] = *vals_ptr++;
1947       }
1948     }
1949   }
1950   ierr = PetscFree(rowners);CHKERRQ(ierr);
1951   ierr = PetscFree(vals);CHKERRQ(ierr);
1952   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1953   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1954   PetscFunctionReturn(0);
1955 }
1956 
1957 #undef __FUNCT__
1958 #define __FUNCT__ "MatLoad_MPIDense"
1959 PetscErrorCode MatLoad_MPIDense(Mat newmat,PetscViewer viewer)
1960 {
1961   PetscScalar    *vals,*svals;
1962   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1963   MPI_Status     status;
1964   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz;
1965   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1966   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1967   PetscInt       i,nz,j,rstart,rend,sizesset=1,grows,gcols;
1968   int            fd;
1969   PetscErrorCode ierr;
1970 
1971   PetscFunctionBegin;
1972   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1973   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1974   if (!rank) {
1975     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1976     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1977     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1978   }
1979   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
1980 
1981   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1982   M = header[1]; N = header[2]; nz = header[3];
1983 
1984   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
1985   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
1986   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
1987 
1988   /* If global sizes are set, check if they are consistent with that given in the file */
1989   if (sizesset) {
1990     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
1991   }
1992   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
1993   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
1994 
1995   /*
1996        Handle case where matrix is stored on disk as a dense matrix
1997   */
1998   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1999     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat,sizesset);CHKERRQ(ierr);
2000     PetscFunctionReturn(0);
2001   }
2002 
2003   /* determine ownership of all rows */
2004   if (newmat->rmap->n < 0) m          = PetscMPIIntCast(M/size + ((M % size) > rank));
2005   else m = PetscMPIIntCast(newmat->rmap->n);
2006   ierr       = PetscMalloc((size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr);
2007   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2008   rowners[0] = 0;
2009   for (i=2; i<=size; i++) {
2010     rowners[i] += rowners[i-1];
2011   }
2012   rstart = rowners[rank];
2013   rend   = rowners[rank+1];
2014 
2015   /* distribute row lengths to all processors */
2016   ierr    = PetscMalloc2(rend-rstart,PetscInt,&ourlens,rend-rstart,PetscInt,&offlens);CHKERRQ(ierr);
2017   if (!rank) {
2018     ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2019     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2020     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
2021     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
2022     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
2023     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2024   } else {
2025     ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
2026   }
2027 
2028   if (!rank) {
2029     /* calculate the number of nonzeros on each processor */
2030     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2031     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2032     for (i=0; i<size; i++) {
2033       for (j=rowners[i]; j< rowners[i+1]; j++) {
2034         procsnz[i] += rowlengths[j];
2035       }
2036     }
2037     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2038 
2039     /* determine max buffer needed and allocate it */
2040     maxnz = 0;
2041     for (i=0; i<size; i++) {
2042       maxnz = PetscMax(maxnz,procsnz[i]);
2043     }
2044     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2045 
2046     /* read in my part of the matrix column indices  */
2047     nz = procsnz[0];
2048     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2049     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2050 
2051     /* read in every one elses and ship off */
2052     for (i=1; i<size; i++) {
2053       nz   = procsnz[i];
2054       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2055       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2056     }
2057     ierr = PetscFree(cols);CHKERRQ(ierr);
2058   } else {
2059     /* determine buffer space needed for message */
2060     nz = 0;
2061     for (i=0; i<m; i++) {
2062       nz += ourlens[i];
2063     }
2064     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2065 
2066     /* receive message of column indices*/
2067     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2068     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2069     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2070   }
2071 
2072   /* loop over local rows, determining number of off diagonal entries */
2073   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2074   jj = 0;
2075   for (i=0; i<m; i++) {
2076     for (j=0; j<ourlens[i]; j++) {
2077       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
2078       jj++;
2079     }
2080   }
2081 
2082   /* create our matrix */
2083   for (i=0; i<m; i++) {
2084     ourlens[i] -= offlens[i];
2085   }
2086 
2087   if (!sizesset) {
2088     ierr = MatSetSizes(newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
2089   }
2090   ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr);
2091   for (i=0; i<m; i++) {
2092     ourlens[i] += offlens[i];
2093   }
2094 
2095   if (!rank) {
2096     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2097 
2098     /* read in my part of the matrix numerical values  */
2099     nz = procsnz[0];
2100     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2101 
2102     /* insert into matrix */
2103     jj      = rstart;
2104     smycols = mycols;
2105     svals   = vals;
2106     for (i=0; i<m; i++) {
2107       ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2108       smycols += ourlens[i];
2109       svals   += ourlens[i];
2110       jj++;
2111     }
2112 
2113     /* read in other processors and ship out */
2114     for (i=1; i<size; i++) {
2115       nz   = procsnz[i];
2116       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2117       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2118     }
2119     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2120   } else {
2121     /* receive numeric values */
2122     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2123 
2124     /* receive message of values*/
2125     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
2126     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2127     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2128 
2129     /* insert into matrix */
2130     jj      = rstart;
2131     smycols = mycols;
2132     svals   = vals;
2133     for (i=0; i<m; i++) {
2134       ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2135       smycols += ourlens[i];
2136       svals   += ourlens[i];
2137       jj++;
2138     }
2139   }
2140   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2141   ierr = PetscFree(vals);CHKERRQ(ierr);
2142   ierr = PetscFree(mycols);CHKERRQ(ierr);
2143   ierr = PetscFree(rowners);CHKERRQ(ierr);
2144 
2145   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2146   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2147   PetscFunctionReturn(0);
2148 }
2149 
2150 #undef __FUNCT__
2151 #define __FUNCT__ "MatEqual_MPIDense"
2152 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool  *flag)
2153 {
2154   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
2155   Mat            a,b;
2156   PetscBool      flg;
2157   PetscErrorCode ierr;
2158 
2159   PetscFunctionBegin;
2160   a = matA->A;
2161   b = matB->A;
2162   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
2163   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
2164   PetscFunctionReturn(0);
2165 }
2166 
2167 #if defined(PETSC_HAVE_PLAPACK)
2168 
2169 #undef __FUNCT__
2170 #define __FUNCT__ "PetscPLAPACKFinalizePackage"
2171 /*@C
2172   PetscPLAPACKFinalizePackage - This function destroys everything in the Petsc interface to PLAPACK.
2173   Level: developer
2174 
2175 .keywords: Petsc, destroy, package, PLAPACK
2176 .seealso: PetscFinalize()
2177 @*/
2178 PetscErrorCode  PetscPLAPACKFinalizePackage(void)
2179 {
2180   PetscErrorCode ierr;
2181 
2182   PetscFunctionBegin;
2183   ierr = PLA_Finalize();CHKERRQ(ierr);
2184   PetscFunctionReturn(0);
2185 }
2186 
2187 #undef __FUNCT__
2188 #define __FUNCT__ "PetscPLAPACKInitializePackage"
2189 /*@C
2190   PetscPLAPACKInitializePackage - This function initializes everything in the Petsc interface to PLAPACK. It is
2191   called from MatCreate_MPIDense() the first time an MPI dense matrix is called.
2192 
2193   Input Parameter:
2194 .   comm - the communicator the matrix lives on
2195 
2196   Level: developer
2197 
2198    Notes: PLAPACK does not have a good fit with MPI communicators; all (parallel) PLAPACK objects have to live in the
2199   same communicator (because there is some global state that is initialized and used for all matrices). In addition if
2200   PLAPACK is initialized (that is the initial matrices created) are on subcommunicators of MPI_COMM_WORLD, these subcommunicators
2201   cannot overlap.
2202 
2203 .keywords: Petsc, initialize, package, PLAPACK
2204 .seealso: PetscSysInitializePackage(), PetscInitialize()
2205 @*/
2206 PetscErrorCode  PetscPLAPACKInitializePackage(MPI_Comm comm)
2207 {
2208   PetscMPIInt    size;
2209   PetscErrorCode ierr;
2210 
2211   PetscFunctionBegin;
2212   if (!PLA_Initialized(PETSC_NULL)) {
2213 
2214     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2215     Plapack_nprows = 1;
2216     Plapack_npcols = size;
2217 
2218     ierr = PetscOptionsBegin(comm,PETSC_NULL,"PLAPACK Options","Mat");CHKERRQ(ierr);
2219       ierr = PetscOptionsInt("-mat_plapack_nprows","row dimension of 2D processor mesh","None",Plapack_nprows,&Plapack_nprows,PETSC_NULL);CHKERRQ(ierr);
2220       ierr = PetscOptionsInt("-mat_plapack_npcols","column dimension of 2D processor mesh","None",Plapack_npcols,&Plapack_npcols,PETSC_NULL);CHKERRQ(ierr);
2221 #if defined(PETSC_USE_DEBUG)
2222       Plapack_ierror = 3;
2223 #else
2224       Plapack_ierror = 0;
2225 #endif
2226       ierr = PetscOptionsInt("-mat_plapack_ckerror","error checking flag","None",Plapack_ierror,&Plapack_ierror,PETSC_NULL);CHKERRQ(ierr);
2227       if (Plapack_ierror){
2228 	ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_TRUE,PETSC_TRUE,PETSC_FALSE );CHKERRQ(ierr);
2229       } else {
2230 	ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_FALSE,PETSC_FALSE,PETSC_FALSE );CHKERRQ(ierr);
2231       }
2232 
2233       Plapack_nb_alg = 0;
2234       ierr = PetscOptionsInt("-mat_plapack_nb_alg","algorithmic block size","None",Plapack_nb_alg,&Plapack_nb_alg,PETSC_NULL);CHKERRQ(ierr);
2235       if (Plapack_nb_alg) {
2236         ierr = pla_Environ_set_nb_alg (PLA_OP_ALL_ALG,Plapack_nb_alg);CHKERRQ(ierr);
2237       }
2238     PetscOptionsEnd();
2239 
2240     ierr = PLA_Comm_1D_to_2D(comm,Plapack_nprows,Plapack_npcols,&Plapack_comm_2d);CHKERRQ(ierr);
2241     ierr = PLA_Init(Plapack_comm_2d);CHKERRQ(ierr);
2242     ierr = PetscRegisterFinalize(PetscPLAPACKFinalizePackage);CHKERRQ(ierr);
2243   }
2244   PetscFunctionReturn(0);
2245 }
2246 
2247 #endif
2248