xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 09db5ad80398bac2facebf4ce22fd64f10e0f388)
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4 */
5 
6 
7 #include <../src/mat/impls/dense/mpi/mpidense.h>    /*I   "petscmat.h"  I*/
8 #if defined(PETSC_HAVE_PLAPACK)
9 static PetscMPIInt Plapack_nprows,Plapack_npcols,Plapack_ierror,Plapack_nb_alg;
10 static MPI_Comm Plapack_comm_2d;
11 EXTERN_C_BEGIN
12 #include <PLA.h>
13 EXTERN_C_END
14 
15 typedef struct {
16   PLA_Obj        A,pivots;
17   PLA_Template   templ;
18   MPI_Datatype   datatype;
19   PetscInt       nb,rstart;
20   VecScatter     ctx;
21   IS             is_pla,is_petsc;
22   PetscBool      pla_solved;
23   MatStructure   mstruct;
24 } Mat_Plapack;
25 #endif
26 
27 #undef __FUNCT__
28 #define __FUNCT__ "MatDenseGetLocalMatrix"
29 /*@
30 
31       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
32               matrix that represents the operator. For sequential matrices it returns itself.
33 
34     Input Parameter:
35 .      A - the Seq or MPI dense matrix
36 
37     Output Parameter:
38 .      B - the inner matrix
39 
40     Level: intermediate
41 
42 @*/
43 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
44 {
45   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
46   PetscErrorCode ierr;
47   PetscBool      flg;
48 
49   PetscFunctionBegin;
50   ierr = PetscTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
51   if (flg) {
52     *B = mat->A;
53   } else {
54     *B = A;
55   }
56   PetscFunctionReturn(0);
57 }
58 
59 #undef __FUNCT__
60 #define __FUNCT__ "MatGetRow_MPIDense"
61 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
62 {
63   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
64   PetscErrorCode ierr;
65   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
66 
67   PetscFunctionBegin;
68   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
69   lrow = row - rstart;
70   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr);
71   PetscFunctionReturn(0);
72 }
73 
74 #undef __FUNCT__
75 #define __FUNCT__ "MatRestoreRow_MPIDense"
76 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
77 {
78   PetscErrorCode ierr;
79 
80   PetscFunctionBegin;
81   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
82   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
83   PetscFunctionReturn(0);
84 }
85 
86 EXTERN_C_BEGIN
87 #undef __FUNCT__
88 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense"
89 PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
90 {
91   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
92   PetscErrorCode ierr;
93   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
94   PetscScalar    *array;
95   MPI_Comm       comm;
96   Mat            B;
97 
98   PetscFunctionBegin;
99   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
100 
101   ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr);
102   if (!B) {
103     ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
104     ierr = MatCreate(comm,&B);CHKERRQ(ierr);
105     ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr);
106     ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
107     ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr);
108     ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr);
109     ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr);
110     ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
111     ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
112     ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr);
113     *a = B;
114     ierr = MatDestroy(&B);CHKERRQ(ierr);
115   } else {
116     *a = B;
117   }
118   PetscFunctionReturn(0);
119 }
120 EXTERN_C_END
121 
122 #undef __FUNCT__
123 #define __FUNCT__ "MatSetValues_MPIDense"
124 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
125 {
126   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
127   PetscErrorCode ierr;
128   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
129   PetscBool      roworiented = A->roworiented;
130 
131   PetscFunctionBegin;
132   if (v) PetscValidScalarPointer(v,6);
133   for (i=0; i<m; i++) {
134     if (idxm[i] < 0) continue;
135     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
136     if (idxm[i] >= rstart && idxm[i] < rend) {
137       row = idxm[i] - rstart;
138       if (roworiented) {
139         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
140       } else {
141         for (j=0; j<n; j++) {
142           if (idxn[j] < 0) continue;
143           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
144           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
145         }
146       }
147     } else {
148       if (!A->donotstash) {
149         if (roworiented) {
150           ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
151         } else {
152           ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
153         }
154       }
155     }
156   }
157   PetscFunctionReturn(0);
158 }
159 
160 #undef __FUNCT__
161 #define __FUNCT__ "MatGetValues_MPIDense"
162 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
163 {
164   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
165   PetscErrorCode ierr;
166   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
167 
168   PetscFunctionBegin;
169   for (i=0; i<m; i++) {
170     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
171     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
172     if (idxm[i] >= rstart && idxm[i] < rend) {
173       row = idxm[i] - rstart;
174       for (j=0; j<n; j++) {
175         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
176         if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
177         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
178       }
179     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
180   }
181   PetscFunctionReturn(0);
182 }
183 
184 #undef __FUNCT__
185 #define __FUNCT__ "MatGetArray_MPIDense"
186 PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[])
187 {
188   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
189   PetscErrorCode ierr;
190 
191   PetscFunctionBegin;
192   ierr = MatGetArray(a->A,array);CHKERRQ(ierr);
193   PetscFunctionReturn(0);
194 }
195 
196 #undef __FUNCT__
197 #define __FUNCT__ "MatGetSubMatrix_MPIDense"
198 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
199 {
200   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data,*newmatd;
201   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
202   PetscErrorCode ierr;
203   PetscInt       i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
204   const PetscInt *irow,*icol;
205   PetscScalar    *av,*bv,*v = lmat->v;
206   Mat            newmat;
207   IS             iscol_local;
208 
209   PetscFunctionBegin;
210   ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
211   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
212   ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr);
213   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
214   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
215   ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */
216 
217   /* No parallel redistribution currently supported! Should really check each index set
218      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
219      original matrix! */
220 
221   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
222   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
223 
224   /* Check submatrix call */
225   if (scall == MAT_REUSE_MATRIX) {
226     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
227     /* Really need to test rows and column sizes! */
228     newmat = *B;
229   } else {
230     /* Create and fill new matrix */
231     ierr = MatCreate(((PetscObject)A)->comm,&newmat);CHKERRQ(ierr);
232     ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
233     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
234     ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr);
235   }
236 
237   /* Now extract the data pointers and do the copy, column at a time */
238   newmatd = (Mat_MPIDense*)newmat->data;
239   bv      = ((Mat_SeqDense *)newmatd->A->data)->v;
240 
241   for (i=0; i<Ncols; i++) {
242     av = v + ((Mat_SeqDense *)mat->A->data)->lda*icol[i];
243     for (j=0; j<nrows; j++) {
244       *bv++ = av[irow[j] - rstart];
245     }
246   }
247 
248   /* Assemble the matrices so that the correct flags are set */
249   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
250   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
251 
252   /* Free work space */
253   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
254   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
255   *B = newmat;
256   PetscFunctionReturn(0);
257 }
258 
259 #undef __FUNCT__
260 #define __FUNCT__ "MatRestoreArray_MPIDense"
261 PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[])
262 {
263   PetscFunctionBegin;
264   PetscFunctionReturn(0);
265 }
266 
267 #undef __FUNCT__
268 #define __FUNCT__ "MatAssemblyBegin_MPIDense"
269 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
270 {
271   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
272   MPI_Comm       comm = ((PetscObject)mat)->comm;
273   PetscErrorCode ierr;
274   PetscInt       nstash,reallocs;
275   InsertMode     addv;
276 
277   PetscFunctionBegin;
278   /* make sure all processors are either in INSERTMODE or ADDMODE */
279   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
280   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
281   mat->insertmode = addv; /* in case this processor had no cache */
282 
283   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
284   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
285   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
286   PetscFunctionReturn(0);
287 }
288 
289 #undef __FUNCT__
290 #define __FUNCT__ "MatAssemblyEnd_MPIDense"
291 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
292 {
293   Mat_MPIDense    *mdn=(Mat_MPIDense*)mat->data;
294   PetscErrorCode  ierr;
295   PetscInt        i,*row,*col,flg,j,rstart,ncols;
296   PetscMPIInt     n;
297   PetscScalar     *val;
298   InsertMode      addv=mat->insertmode;
299 
300   PetscFunctionBegin;
301   /*  wait on receives */
302   while (1) {
303     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
304     if (!flg) break;
305 
306     for (i=0; i<n;) {
307       /* Now identify the consecutive vals belonging to the same row */
308       for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
309       if (j < n) ncols = j-i;
310       else       ncols = n-i;
311       /* Now assemble all these values with a single function call */
312       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
313       i = j;
314     }
315   }
316   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
317 
318   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
319   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
320 
321   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
322     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
323   }
324   PetscFunctionReturn(0);
325 }
326 
327 #undef __FUNCT__
328 #define __FUNCT__ "MatZeroEntries_MPIDense"
329 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
330 {
331   PetscErrorCode ierr;
332   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
333 
334   PetscFunctionBegin;
335   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
336   PetscFunctionReturn(0);
337 }
338 
339 /* the code does not do the diagonal entries correctly unless the
340    matrix is square and the column and row owerships are identical.
341    This is a BUG. The only way to fix it seems to be to access
342    mdn->A and mdn->B directly and not through the MatZeroRows()
343    routine.
344 */
345 #undef __FUNCT__
346 #define __FUNCT__ "MatZeroRows_MPIDense"
347 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
348 {
349   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
350   PetscErrorCode    ierr;
351   PetscInt          i,*owners = A->rmap->range;
352   PetscInt          *nprocs,j,idx,nsends;
353   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
354   PetscInt          *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source;
355   PetscInt          *lens,*lrows,*values;
356   PetscMPIInt       n,imdex,rank = l->rank,size = l->size;
357   MPI_Comm          comm = ((PetscObject)A)->comm;
358   MPI_Request       *send_waits,*recv_waits;
359   MPI_Status        recv_status,*send_status;
360   PetscBool         found;
361   const PetscScalar *xx;
362   PetscScalar       *bb;
363 
364   PetscFunctionBegin;
365   /*  first count number of contributors to each processor */
366   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
367   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
368   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
369   for (i=0; i<N; i++) {
370     idx = rows[i];
371     found = PETSC_FALSE;
372     for (j=0; j<size; j++) {
373       if (idx >= owners[j] && idx < owners[j+1]) {
374         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
375       }
376     }
377     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
378   }
379   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
380 
381   /* inform other processors of number of messages and max length*/
382   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
383 
384   /* post receives:   */
385   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
386   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
387   for (i=0; i<nrecvs; i++) {
388     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
389   }
390 
391   /* do sends:
392       1) starts[i] gives the starting index in svalues for stuff going to
393          the ith processor
394   */
395   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
396   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
397   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
398   starts[0]  = 0;
399   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
400   for (i=0; i<N; i++) {
401     svalues[starts[owner[i]]++] = rows[i];
402   }
403 
404   starts[0] = 0;
405   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
406   count = 0;
407   for (i=0; i<size; i++) {
408     if (nprocs[2*i+1]) {
409       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
410     }
411   }
412   ierr = PetscFree(starts);CHKERRQ(ierr);
413 
414   base = owners[rank];
415 
416   /*  wait on receives */
417   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
418   count  = nrecvs;
419   slen   = 0;
420   while (count) {
421     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
422     /* unpack receives into our local space */
423     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
424     source[imdex]  = recv_status.MPI_SOURCE;
425     lens[imdex]    = n;
426     slen += n;
427     count--;
428   }
429   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
430 
431   /* move the data into the send scatter */
432   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
433   count = 0;
434   for (i=0; i<nrecvs; i++) {
435     values = rvalues + i*nmax;
436     for (j=0; j<lens[i]; j++) {
437       lrows[count++] = values[j] - base;
438     }
439   }
440   ierr = PetscFree(rvalues);CHKERRQ(ierr);
441   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
442   ierr = PetscFree(owner);CHKERRQ(ierr);
443   ierr = PetscFree(nprocs);CHKERRQ(ierr);
444 
445   /* fix right hand side if needed */
446   if (x && b) {
447     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
448     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
449     for (i=0; i<slen; i++) {
450       bb[lrows[i]] = diag*xx[lrows[i]];
451     }
452     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
453     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
454   }
455 
456   /* actually zap the local rows */
457   ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
458   ierr = PetscFree(lrows);CHKERRQ(ierr);
459 
460   /* wait on sends */
461   if (nsends) {
462     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
463     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
464     ierr = PetscFree(send_status);CHKERRQ(ierr);
465   }
466   ierr = PetscFree(send_waits);CHKERRQ(ierr);
467   ierr = PetscFree(svalues);CHKERRQ(ierr);
468 
469   PetscFunctionReturn(0);
470 }
471 
472 #undef __FUNCT__
473 #define __FUNCT__ "MatMult_MPIDense"
474 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
475 {
476   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
477   PetscErrorCode ierr;
478 
479   PetscFunctionBegin;
480   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
481   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
482   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
483   PetscFunctionReturn(0);
484 }
485 
486 #undef __FUNCT__
487 #define __FUNCT__ "MatMultAdd_MPIDense"
488 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
489 {
490   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
491   PetscErrorCode ierr;
492 
493   PetscFunctionBegin;
494   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
495   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
496   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
497   PetscFunctionReturn(0);
498 }
499 
500 #undef __FUNCT__
501 #define __FUNCT__ "MatMultTranspose_MPIDense"
502 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
503 {
504   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
505   PetscErrorCode ierr;
506   PetscScalar    zero = 0.0;
507 
508   PetscFunctionBegin;
509   ierr = VecSet(yy,zero);CHKERRQ(ierr);
510   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
511   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
512   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
513   PetscFunctionReturn(0);
514 }
515 
516 #undef __FUNCT__
517 #define __FUNCT__ "MatMultTransposeAdd_MPIDense"
518 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
519 {
520   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
521   PetscErrorCode ierr;
522 
523   PetscFunctionBegin;
524   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
525   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
526   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
527   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
528   PetscFunctionReturn(0);
529 }
530 
531 #undef __FUNCT__
532 #define __FUNCT__ "MatGetDiagonal_MPIDense"
533 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
534 {
535   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
536   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
537   PetscErrorCode ierr;
538   PetscInt       len,i,n,m = A->rmap->n,radd;
539   PetscScalar    *x,zero = 0.0;
540 
541   PetscFunctionBegin;
542   ierr = VecSet(v,zero);CHKERRQ(ierr);
543   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
544   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
545   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
546   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
547   radd = A->rmap->rstart*m;
548   for (i=0; i<len; i++) {
549     x[i] = aloc->v[radd + i*m + i];
550   }
551   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
552   PetscFunctionReturn(0);
553 }
554 
555 #undef __FUNCT__
556 #define __FUNCT__ "MatDestroy_MPIDense"
557 PetscErrorCode MatDestroy_MPIDense(Mat mat)
558 {
559   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
560   PetscErrorCode ierr;
561 #if defined(PETSC_HAVE_PLAPACK)
562   Mat_Plapack   *lu=(Mat_Plapack*)mat->spptr;
563 #endif
564 
565   PetscFunctionBegin;
566 
567 #if defined(PETSC_USE_LOG)
568   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
569 #endif
570   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
571   ierr = MatDestroy(&mdn->A);CHKERRQ(ierr);
572   ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr);
573   ierr = VecScatterDestroy(&mdn->Mvctx);CHKERRQ(ierr);
574 #if defined(PETSC_HAVE_PLAPACK)
575   if (lu) {
576     ierr = PLA_Obj_free(&lu->A);CHKERRQ(ierr);
577     ierr = PLA_Obj_free (&lu->pivots);CHKERRQ(ierr);
578     ierr = PLA_Temp_free(&lu->templ);CHKERRQ(ierr);
579     ierr = ISDestroy(&lu->is_pla);CHKERRQ(ierr);
580     ierr = ISDestroy(&lu->is_petsc);CHKERRQ(ierr);
581     ierr = VecScatterDestroy(&lu->ctx);CHKERRQ(ierr);
582   }
583   ierr = PetscFree(mat->spptr);CHKERRQ(ierr);
584 #endif
585 
586   ierr = PetscFree(mat->data);CHKERRQ(ierr);
587   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
588   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
589   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
590   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
591   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
592   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
593   PetscFunctionReturn(0);
594 }
595 
596 #undef __FUNCT__
597 #define __FUNCT__ "MatView_MPIDense_Binary"
598 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
599 {
600   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
601   PetscErrorCode    ierr;
602   PetscViewerFormat format;
603   int               fd;
604   PetscInt          header[4],mmax,N = mat->cmap->N,i,j,m,k;
605   PetscMPIInt       rank,tag  = ((PetscObject)viewer)->tag,size;
606   PetscScalar       *work,*v,*vv;
607   Mat_SeqDense      *a = (Mat_SeqDense*)mdn->A->data;
608   MPI_Status        status;
609 
610   PetscFunctionBegin;
611   if (mdn->size == 1) {
612     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
613   } else {
614     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
615     ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
616     ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
617 
618     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
619     if (format == PETSC_VIEWER_NATIVE) {
620 
621       if (!rank) {
622         /* store the matrix as a dense matrix */
623         header[0] = MAT_FILE_CLASSID;
624         header[1] = mat->rmap->N;
625         header[2] = N;
626         header[3] = MATRIX_BINARY_FORMAT_DENSE;
627         ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
628 
629         /* get largest work array needed for transposing array */
630         mmax = mat->rmap->n;
631         for (i=1; i<size; i++) {
632           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
633         }
634         ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&work);CHKERRQ(ierr);
635 
636         /* write out local array, by rows */
637         m    = mat->rmap->n;
638         v    = a->v;
639         for (j=0; j<N; j++) {
640           for (i=0; i<m; i++) {
641             work[j + i*N] = *v++;
642           }
643         }
644         ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
645         /* get largest work array to receive messages from other processes, excludes process zero */
646         mmax = 0;
647         for (i=1; i<size; i++) {
648           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
649         }
650         ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&vv);CHKERRQ(ierr);
651         for(k = 1; k < size; k++) {
652           v    = vv;
653           m    = mat->rmap->range[k+1] - mat->rmap->range[k];
654           ierr = MPI_Recv(v,m*N,MPIU_SCALAR,k,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
655 
656           for(j = 0; j < N; j++) {
657             for(i = 0; i < m; i++) {
658               work[j + i*N] = *v++;
659             }
660           }
661           ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
662         }
663         ierr = PetscFree(work);CHKERRQ(ierr);
664         ierr = PetscFree(vv);CHKERRQ(ierr);
665       } else {
666         ierr = MPI_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
667       }
668     } else {
669       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerSetFormat(viewer,PETSC_VIEWER_NATIVE)");
670     }
671   }
672   PetscFunctionReturn(0);
673 }
674 
675 #undef __FUNCT__
676 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket"
677 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
678 {
679   Mat_MPIDense          *mdn = (Mat_MPIDense*)mat->data;
680   PetscErrorCode        ierr;
681   PetscMPIInt           size = mdn->size,rank = mdn->rank;
682   const PetscViewerType vtype;
683   PetscBool             iascii,isdraw;
684   PetscViewer           sviewer;
685   PetscViewerFormat     format;
686 #if defined(PETSC_HAVE_PLAPACK)
687   Mat_Plapack           *lu;
688 #endif
689 
690   PetscFunctionBegin;
691   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
692   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
693   if (iascii) {
694     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
695     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
696     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
697       MatInfo info;
698       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
699       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
700       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
701       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
702       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
703 #if defined(PETSC_HAVE_PLAPACK)
704       ierr = PetscViewerASCIIPrintf(viewer,"PLAPACK run parameters:\n");CHKERRQ(ierr);
705       ierr = PetscViewerASCIIPrintf(viewer,"  Processor mesh: nprows %d, npcols %d\n",Plapack_nprows, Plapack_npcols);CHKERRQ(ierr);
706       ierr = PetscViewerASCIIPrintf(viewer,"  Error checking: %d\n",Plapack_ierror);CHKERRQ(ierr);
707       ierr = PetscViewerASCIIPrintf(viewer,"  Algorithmic block size: %d\n",Plapack_nb_alg);CHKERRQ(ierr);
708       if (mat->factortype){
709         lu=(Mat_Plapack*)(mat->spptr);
710         ierr = PetscViewerASCIIPrintf(viewer,"  Distr. block size nb: %d \n",lu->nb);CHKERRQ(ierr);
711       }
712 #else
713       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
714 #endif
715       PetscFunctionReturn(0);
716     } else if (format == PETSC_VIEWER_ASCII_INFO) {
717       PetscFunctionReturn(0);
718     }
719   } else if (isdraw) {
720     PetscDraw  draw;
721     PetscBool  isnull;
722 
723     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
724     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
725     if (isnull) PetscFunctionReturn(0);
726   }
727 
728   if (size == 1) {
729     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
730   } else {
731     /* assemble the entire matrix onto first processor. */
732     Mat         A;
733     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
734     PetscInt    *cols;
735     PetscScalar *vals;
736 
737     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
738     if (!rank) {
739       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
740     } else {
741       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
742     }
743     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
744     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
745     ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL);
746     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
747 
748     /* Copy the matrix ... This isn't the most efficient means,
749        but it's quick for now */
750     A->insertmode = INSERT_VALUES;
751     row = mat->rmap->rstart; m = mdn->A->rmap->n;
752     for (i=0; i<m; i++) {
753       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
754       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
755       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
756       row++;
757     }
758 
759     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
760     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
761     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
762     if (!rank) {
763       ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
764       /* Set the type name to MATMPIDense so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqDense_ASCII()*/
765       PetscStrcpy(((PetscObject)((Mat_MPIDense*)(A->data))->A)->type_name,MATMPIDENSE);
766       ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
767     }
768     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
769     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
770     ierr = MatDestroy(&A);CHKERRQ(ierr);
771   }
772   PetscFunctionReturn(0);
773 }
774 
775 #undef __FUNCT__
776 #define __FUNCT__ "MatView_MPIDense"
777 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
778 {
779   PetscErrorCode ierr;
780   PetscBool      iascii,isbinary,isdraw,issocket;
781 
782   PetscFunctionBegin;
783 
784   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
785   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
786   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
787   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
788 
789   if (iascii || issocket || isdraw) {
790     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
791   } else if (isbinary) {
792     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
793   } else {
794     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name);
795   }
796   PetscFunctionReturn(0);
797 }
798 
799 #undef __FUNCT__
800 #define __FUNCT__ "MatGetInfo_MPIDense"
801 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
802 {
803   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
804   Mat            mdn = mat->A;
805   PetscErrorCode ierr;
806   PetscReal      isend[5],irecv[5];
807 
808   PetscFunctionBegin;
809   info->block_size     = 1.0;
810   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
811   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
812   isend[3] = info->memory;  isend[4] = info->mallocs;
813   if (flag == MAT_LOCAL) {
814     info->nz_used      = isend[0];
815     info->nz_allocated = isend[1];
816     info->nz_unneeded  = isend[2];
817     info->memory       = isend[3];
818     info->mallocs      = isend[4];
819   } else if (flag == MAT_GLOBAL_MAX) {
820     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)A)->comm);CHKERRQ(ierr);
821     info->nz_used      = irecv[0];
822     info->nz_allocated = irecv[1];
823     info->nz_unneeded  = irecv[2];
824     info->memory       = irecv[3];
825     info->mallocs      = irecv[4];
826   } else if (flag == MAT_GLOBAL_SUM) {
827     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
828     info->nz_used      = irecv[0];
829     info->nz_allocated = irecv[1];
830     info->nz_unneeded  = irecv[2];
831     info->memory       = irecv[3];
832     info->mallocs      = irecv[4];
833   }
834   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
835   info->fill_ratio_needed = 0;
836   info->factor_mallocs    = 0;
837   PetscFunctionReturn(0);
838 }
839 
840 #undef __FUNCT__
841 #define __FUNCT__ "MatSetOption_MPIDense"
842 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool  flg)
843 {
844   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
845   PetscErrorCode ierr;
846 
847   PetscFunctionBegin;
848   switch (op) {
849   case MAT_NEW_NONZERO_LOCATIONS:
850   case MAT_NEW_NONZERO_LOCATION_ERR:
851   case MAT_NEW_NONZERO_ALLOCATION_ERR:
852     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
853     break;
854   case MAT_ROW_ORIENTED:
855     a->roworiented = flg;
856     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
857     break;
858   case MAT_NEW_DIAGONALS:
859   case MAT_USE_HASH_TABLE:
860     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
861     break;
862   case MAT_IGNORE_OFF_PROC_ENTRIES:
863     a->donotstash = flg;
864     break;
865   case MAT_SYMMETRIC:
866   case MAT_STRUCTURALLY_SYMMETRIC:
867   case MAT_HERMITIAN:
868   case MAT_SYMMETRY_ETERNAL:
869   case MAT_IGNORE_LOWER_TRIANGULAR:
870     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
871     break;
872   default:
873     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
874   }
875   PetscFunctionReturn(0);
876 }
877 
878 
879 #undef __FUNCT__
880 #define __FUNCT__ "MatDiagonalScale_MPIDense"
881 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
882 {
883   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
884   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
885   PetscScalar    *l,*r,x,*v;
886   PetscErrorCode ierr;
887   PetscInt       i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n;
888 
889   PetscFunctionBegin;
890   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
891   if (ll) {
892     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
893     if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
894     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
895     for (i=0; i<m; i++) {
896       x = l[i];
897       v = mat->v + i;
898       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
899     }
900     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
901     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
902   }
903   if (rr) {
904     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
905     if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
906     ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
907     ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
908     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
909     for (i=0; i<n; i++) {
910       x = r[i];
911       v = mat->v + i*m;
912       for (j=0; j<m; j++) { (*v++) *= x;}
913     }
914     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
915     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
916   }
917   PetscFunctionReturn(0);
918 }
919 
920 #undef __FUNCT__
921 #define __FUNCT__ "MatNorm_MPIDense"
922 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
923 {
924   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
925   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
926   PetscErrorCode ierr;
927   PetscInt       i,j;
928   PetscReal      sum = 0.0;
929   PetscScalar    *v = mat->v;
930 
931   PetscFunctionBegin;
932   if (mdn->size == 1) {
933     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
934   } else {
935     if (type == NORM_FROBENIUS) {
936       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
937 #if defined(PETSC_USE_COMPLEX)
938         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
939 #else
940         sum += (*v)*(*v); v++;
941 #endif
942       }
943       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
944       *nrm = sqrt(*nrm);
945       ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr);
946     } else if (type == NORM_1) {
947       PetscReal *tmp,*tmp2;
948       ierr = PetscMalloc2(A->cmap->N,PetscReal,&tmp,A->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr);
949       ierr = PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
950       ierr = PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
951       *nrm = 0.0;
952       v = mat->v;
953       for (j=0; j<mdn->A->cmap->n; j++) {
954         for (i=0; i<mdn->A->rmap->n; i++) {
955           tmp[j] += PetscAbsScalar(*v);  v++;
956         }
957       }
958       ierr = MPI_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
959       for (j=0; j<A->cmap->N; j++) {
960         if (tmp2[j] > *nrm) *nrm = tmp2[j];
961       }
962       ierr = PetscFree2(tmp,tmp);CHKERRQ(ierr);
963       ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr);
964     } else if (type == NORM_INFINITY) { /* max row norm */
965       PetscReal ntemp;
966       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
967       ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,((PetscObject)A)->comm);CHKERRQ(ierr);
968     } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"No support for two norm");
969   }
970   PetscFunctionReturn(0);
971 }
972 
973 #undef __FUNCT__
974 #define __FUNCT__ "MatTranspose_MPIDense"
975 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
976 {
977   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
978   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
979   Mat            B;
980   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
981   PetscErrorCode ierr;
982   PetscInt       j,i;
983   PetscScalar    *v;
984 
985   PetscFunctionBegin;
986   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports square matrix only in-place");
987   if (reuse == MAT_INITIAL_MATRIX || A == *matout) {
988     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
989     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
990     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
991     ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr);
992   } else {
993     B = *matout;
994   }
995 
996   m = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v;
997   ierr = PetscMalloc(m*sizeof(PetscInt),&rwork);CHKERRQ(ierr);
998   for (i=0; i<m; i++) rwork[i] = rstart + i;
999   for (j=0; j<n; j++) {
1000     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
1001     v   += m;
1002   }
1003   ierr = PetscFree(rwork);CHKERRQ(ierr);
1004   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1005   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1006   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1007     *matout = B;
1008   } else {
1009     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
1010   }
1011   PetscFunctionReturn(0);
1012 }
1013 
1014 
1015 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
1016 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
1017 
1018 #undef __FUNCT__
1019 #define __FUNCT__ "MatSetUpPreallocation_MPIDense"
1020 PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A)
1021 {
1022   PetscErrorCode ierr;
1023 
1024   PetscFunctionBegin;
1025   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
1026   PetscFunctionReturn(0);
1027 }
1028 
1029 #if defined(PETSC_HAVE_PLAPACK)
1030 
1031 #undef __FUNCT__
1032 #define __FUNCT__ "MatMPIDenseCopyToPlapack"
1033 PetscErrorCode MatMPIDenseCopyToPlapack(Mat A,Mat F)
1034 {
1035   Mat_Plapack    *lu = (Mat_Plapack*)(F)->spptr;
1036   PetscErrorCode ierr;
1037   PetscInt       M=A->cmap->N,m=A->rmap->n,rstart;
1038   PetscScalar    *array;
1039   PetscReal      one = 1.0;
1040 
1041   PetscFunctionBegin;
1042   /* Copy A into F->lu->A */
1043   ierr = PLA_Obj_set_to_zero(lu->A);CHKERRQ(ierr);
1044   ierr = PLA_API_begin();CHKERRQ(ierr);
1045   ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr);
1046   ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr);
1047   ierr = MatGetArray(A,&array);CHKERRQ(ierr);
1048   ierr = PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);CHKERRQ(ierr);
1049   ierr = MatRestoreArray(A,&array);CHKERRQ(ierr);
1050   ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr);
1051   ierr = PLA_API_end();CHKERRQ(ierr);
1052   lu->rstart = rstart;
1053   PetscFunctionReturn(0);
1054 }
1055 
1056 #undef __FUNCT__
1057 #define __FUNCT__ "MatMPIDenseCopyFromPlapack"
1058 PetscErrorCode MatMPIDenseCopyFromPlapack(Mat F,Mat A)
1059 {
1060   Mat_Plapack    *lu = (Mat_Plapack*)(F)->spptr;
1061   PetscErrorCode ierr;
1062   PetscInt       M=A->cmap->N,m=A->rmap->n,rstart;
1063   PetscScalar    *array;
1064   PetscReal      one = 1.0;
1065 
1066   PetscFunctionBegin;
1067   /* Copy F into A->lu->A */
1068   ierr = MatZeroEntries(A);CHKERRQ(ierr);
1069   ierr = PLA_API_begin();CHKERRQ(ierr);
1070   ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr);
1071   ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr);
1072   ierr = MatGetArray(A,&array);CHKERRQ(ierr);
1073   ierr = PLA_API_axpy_global_to_matrix(m,M, &one,lu->A,rstart,0,(void *)array,m);CHKERRQ(ierr);
1074   ierr = MatRestoreArray(A,&array);CHKERRQ(ierr);
1075   ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr);
1076   ierr = PLA_API_end();CHKERRQ(ierr);
1077   lu->rstart = rstart;
1078   PetscFunctionReturn(0);
1079 }
1080 
1081 #undef __FUNCT__
1082 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIDense"
1083 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1084 {
1085   PetscErrorCode ierr;
1086   Mat_Plapack    *luA = (Mat_Plapack*)A->spptr;
1087   Mat_Plapack    *luB = (Mat_Plapack*)B->spptr;
1088   Mat_Plapack    *luC = (Mat_Plapack*)C->spptr;
1089   PLA_Obj        alpha = NULL,beta = NULL;
1090 
1091   PetscFunctionBegin;
1092   ierr = MatMPIDenseCopyToPlapack(A,A);CHKERRQ(ierr);
1093   ierr = MatMPIDenseCopyToPlapack(B,B);CHKERRQ(ierr);
1094 
1095   /*
1096   ierr = PLA_Global_show("A = ",luA->A,"%g ","");CHKERRQ(ierr);
1097   ierr = PLA_Global_show("B = ",luB->A,"%g ","");CHKERRQ(ierr);
1098   */
1099 
1100   /* do the multiply in PLA  */
1101   ierr = PLA_Create_constants_conf_to(luA->A,NULL,NULL,&alpha);CHKERRQ(ierr);
1102   ierr = PLA_Create_constants_conf_to(luC->A,NULL,&beta,NULL);CHKERRQ(ierr);
1103   CHKMEMQ;
1104 
1105   ierr = PLA_Gemm(PLA_NO_TRANSPOSE,PLA_NO_TRANSPOSE,alpha,luA->A,luB->A,beta,luC->A); /* CHKERRQ(ierr); */
1106   CHKMEMQ;
1107   ierr = PLA_Obj_free(&alpha);CHKERRQ(ierr);
1108   ierr = PLA_Obj_free(&beta);CHKERRQ(ierr);
1109 
1110   /*
1111   ierr = PLA_Global_show("C = ",luC->A,"%g ","");CHKERRQ(ierr);
1112   */
1113   ierr = MatMPIDenseCopyFromPlapack(C,C);CHKERRQ(ierr);
1114   PetscFunctionReturn(0);
1115 }
1116 
1117 #undef __FUNCT__
1118 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIDense"
1119 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1120 {
1121   PetscErrorCode ierr;
1122   PetscInt       m=A->rmap->n,n=B->cmap->n;
1123   Mat            Cmat;
1124 
1125   PetscFunctionBegin;
1126   if (A->cmap->n != B->rmap->n) SETERRQ2(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
1127   SETERRQ(((PetscObject)A)->comm,PETSC_ERR_LIB,"Due to apparent bugs in PLAPACK,this is not currently supported");
1128   ierr = MatCreate(((PetscObject)B)->comm,&Cmat);CHKERRQ(ierr);
1129   ierr = MatSetSizes(Cmat,m,n,A->rmap->N,B->cmap->N);CHKERRQ(ierr);
1130   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
1131   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1132   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1133 
1134   *C = Cmat;
1135   PetscFunctionReturn(0);
1136 }
1137 
1138 #undef __FUNCT__
1139 #define __FUNCT__ "MatMatMult_MPIDense_MPIDense"
1140 PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
1141 {
1142   PetscErrorCode ierr;
1143 
1144   PetscFunctionBegin;
1145   if (scall == MAT_INITIAL_MATRIX){
1146     ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr);
1147   }
1148   ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr);
1149   PetscFunctionReturn(0);
1150 }
1151 
1152 #undef __FUNCT__
1153 #define __FUNCT__ "MatSolve_MPIDense"
1154 PetscErrorCode MatSolve_MPIDense(Mat A,Vec b,Vec x)
1155 {
1156   MPI_Comm       comm = ((PetscObject)A)->comm;
1157   Mat_Plapack    *lu = (Mat_Plapack*)A->spptr;
1158   PetscErrorCode ierr;
1159   PetscInt       M=A->rmap->N,m=A->rmap->n,rstart,i,j,*idx_pla,*idx_petsc,loc_m,loc_stride;
1160   PetscScalar    *array;
1161   PetscReal      one = 1.0;
1162   PetscMPIInt    size,rank,r_rank,r_nproc,c_rank,c_nproc;;
1163   PLA_Obj        v_pla = NULL;
1164   PetscScalar    *loc_buf;
1165   Vec            loc_x;
1166 
1167   PetscFunctionBegin;
1168   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1169   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1170 
1171   /* Create PLAPACK vector objects, then copy b into PLAPACK b */
1172   PLA_Mvector_create(lu->datatype,M,1,lu->templ,PLA_ALIGN_FIRST,&v_pla);
1173   PLA_Obj_set_to_zero(v_pla);
1174 
1175   /* Copy b into rhs_pla */
1176   PLA_API_begin();
1177   PLA_Obj_API_open(v_pla);
1178   ierr = VecGetArray(b,&array);CHKERRQ(ierr);
1179   PLA_API_axpy_vector_to_global(m,&one,(void *)array,1,v_pla,lu->rstart);
1180   ierr = VecRestoreArray(b,&array);CHKERRQ(ierr);
1181   PLA_Obj_API_close(v_pla);
1182   PLA_API_end();
1183 
1184   if (A->factortype == MAT_FACTOR_LU){
1185     /* Apply the permutations to the right hand sides */
1186     PLA_Apply_pivots_to_rows (v_pla,lu->pivots);
1187 
1188     /* Solve L y = b, overwriting b with y */
1189     PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_UNIT_DIAG,lu->A,v_pla );
1190 
1191     /* Solve U x = y (=b), overwriting b with x */
1192     PLA_Trsv( PLA_UPPER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla );
1193   } else { /* MAT_FACTOR_CHOLESKY */
1194     PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla);
1195     PLA_Trsv( PLA_LOWER_TRIANGULAR,(lu->datatype == MPI_DOUBLE ? PLA_TRANSPOSE : PLA_CONJUGATE_TRANSPOSE),
1196                                     PLA_NONUNIT_DIAG,lu->A,v_pla);
1197   }
1198 
1199   /* Copy PLAPACK x into Petsc vector x  */
1200   PLA_Obj_local_length(v_pla, &loc_m);
1201   PLA_Obj_local_buffer(v_pla, (void**)&loc_buf);
1202   PLA_Obj_local_stride(v_pla, &loc_stride);
1203   /*
1204     PetscPrintf(PETSC_COMM_SELF," [%d] b - local_m %d local_stride %d, loc_buf: %g %g, nb: %d\n",rank,loc_m,loc_stride,loc_buf[0],loc_buf[(loc_m-1)*loc_stride],lu->nb);
1205   */
1206   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,loc_m*loc_stride,loc_buf,&loc_x);CHKERRQ(ierr);
1207   if (!lu->pla_solved){
1208 
1209     PLA_Temp_comm_row_info(lu->templ,&Plapack_comm_2d,&r_rank,&r_nproc);
1210     PLA_Temp_comm_col_info(lu->templ,&Plapack_comm_2d,&c_rank,&c_nproc);
1211 
1212     /* Create IS and cts for VecScatterring */
1213     PLA_Obj_local_length(v_pla, &loc_m);
1214     PLA_Obj_local_stride(v_pla, &loc_stride);
1215     ierr = PetscMalloc2(loc_m,PetscInt,&idx_pla,loc_m,PetscInt,&idx_petsc);CHKERRQ(ierr);
1216 
1217     rstart = (r_rank*c_nproc+c_rank)*lu->nb;
1218     for (i=0; i<loc_m; i+=lu->nb){
1219       j = 0;
1220       while (j < lu->nb && i+j < loc_m){
1221         idx_petsc[i+j] = rstart + j; j++;
1222       }
1223       rstart += size*lu->nb;
1224     }
1225 
1226     for (i=0; i<loc_m; i++) idx_pla[i] = i*loc_stride;
1227 
1228     ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_pla,PETSC_COPY_VALUES,&lu->is_pla);CHKERRQ(ierr);
1229     ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_petsc,PETSC_COPY_VALUES,&lu->is_petsc);CHKERRQ(ierr);
1230     ierr = PetscFree2(idx_pla,idx_petsc);CHKERRQ(ierr);
1231     ierr = VecScatterCreate(loc_x,lu->is_pla,x,lu->is_petsc,&lu->ctx);CHKERRQ(ierr);
1232   }
1233   ierr = VecScatterBegin(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1234   ierr = VecScatterEnd(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1235 
1236   /* Free data */
1237   ierr = VecDestroy(&loc_x);CHKERRQ(ierr);
1238   PLA_Obj_free(&v_pla);
1239 
1240   lu->pla_solved = PETSC_TRUE;
1241   PetscFunctionReturn(0);
1242 }
1243 
1244 #undef __FUNCT__
1245 #define __FUNCT__ "MatLUFactorNumeric_MPIDense"
1246 PetscErrorCode MatLUFactorNumeric_MPIDense(Mat F,Mat A,const MatFactorInfo *info)
1247 {
1248   Mat_Plapack    *lu = (Mat_Plapack*)(F)->spptr;
1249   PetscErrorCode ierr;
1250   PetscInt       M=A->rmap->N,m=A->rmap->n,rstart,rend;
1251   PetscInt       info_pla=0;
1252   PetscScalar    *array,one = 1.0;
1253 
1254   PetscFunctionBegin;
1255   if (lu->mstruct == SAME_NONZERO_PATTERN){
1256     PLA_Obj_free(&lu->A);
1257     PLA_Obj_free (&lu->pivots);
1258   }
1259   /* Create PLAPACK matrix object */
1260   lu->A = NULL; lu->pivots = NULL;
1261   PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);
1262   PLA_Obj_set_to_zero(lu->A);
1263   PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots);
1264 
1265   /* Copy A into lu->A */
1266   PLA_API_begin();
1267   PLA_Obj_API_open(lu->A);
1268   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
1269   ierr = MatGetArray(A,&array);CHKERRQ(ierr);
1270   PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);
1271   ierr = MatRestoreArray(A,&array);CHKERRQ(ierr);
1272   PLA_Obj_API_close(lu->A);
1273   PLA_API_end();
1274 
1275   /* Factor P A -> L U overwriting lower triangular portion of A with L, upper, U */
1276   info_pla = PLA_LU(lu->A,lu->pivots);
1277   if (info_pla != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot encountered at row %d from PLA_LU()",info_pla);
1278 
1279   lu->rstart         = rstart;
1280   lu->mstruct        = SAME_NONZERO_PATTERN;
1281   F->ops->solve      = MatSolve_MPIDense;
1282   F->assembled       = PETSC_TRUE;  /* required by -ksp_view */
1283   PetscFunctionReturn(0);
1284 }
1285 
1286 #undef __FUNCT__
1287 #define __FUNCT__ "MatCholeskyFactorNumeric_MPIDense"
1288 PetscErrorCode MatCholeskyFactorNumeric_MPIDense(Mat F,Mat A,const MatFactorInfo *info)
1289 {
1290   Mat_Plapack    *lu = (Mat_Plapack*)F->spptr;
1291   PetscErrorCode ierr;
1292   PetscInt       M=A->rmap->N,m=A->rmap->n,rstart,rend;
1293   PetscInt       info_pla=0;
1294   PetscScalar    *array,one = 1.0;
1295 
1296   PetscFunctionBegin;
1297   if (lu->mstruct == SAME_NONZERO_PATTERN){
1298     PLA_Obj_free(&lu->A);
1299   }
1300   /* Create PLAPACK matrix object */
1301   lu->A      = NULL;
1302   lu->pivots = NULL;
1303   PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);
1304 
1305   /* Copy A into lu->A */
1306   PLA_API_begin();
1307   PLA_Obj_API_open(lu->A);
1308   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
1309   ierr = MatGetArray(A,&array);CHKERRQ(ierr);
1310   PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);
1311   ierr = MatRestoreArray(A,&array);CHKERRQ(ierr);
1312   PLA_Obj_API_close(lu->A);
1313   PLA_API_end();
1314 
1315   /* Factor P A -> Chol */
1316   info_pla = PLA_Chol(PLA_LOWER_TRIANGULAR,lu->A);
1317   if (info_pla != 0) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT,"Nonpositive definite matrix detected at row %d from PLA_Chol()",info_pla);
1318 
1319   lu->rstart         = rstart;
1320   lu->mstruct        = SAME_NONZERO_PATTERN;
1321   F->ops->solve      = MatSolve_MPIDense;
1322   F->assembled       = PETSC_TRUE;  /* required by -ksp_view */
1323   PetscFunctionReturn(0);
1324 }
1325 
1326 /* Note the Petsc perm permutation is ignored */
1327 #undef __FUNCT__
1328 #define __FUNCT__ "MatCholeskyFactorSymbolic_MPIDense"
1329 PetscErrorCode MatCholeskyFactorSymbolic_MPIDense(Mat F,Mat A,IS perm,const MatFactorInfo *info)
1330 {
1331   PetscErrorCode ierr;
1332   PetscBool      issymmetric,set;
1333 
1334   PetscFunctionBegin;
1335   ierr = MatIsSymmetricKnown(A,&set,&issymmetric);CHKERRQ(ierr);
1336   if (!set || !issymmetric) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_USER,"Matrix must be set as MAT_SYMMETRIC for CholeskyFactor()");
1337   F->ops->choleskyfactornumeric  = MatCholeskyFactorNumeric_MPIDense;
1338   PetscFunctionReturn(0);
1339 }
1340 
1341 /* Note the Petsc r and c permutations are ignored */
1342 #undef __FUNCT__
1343 #define __FUNCT__ "MatLUFactorSymbolic_MPIDense"
1344 PetscErrorCode MatLUFactorSymbolic_MPIDense(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
1345 {
1346   PetscErrorCode ierr;
1347   PetscInt       M = A->rmap->N;
1348   Mat_Plapack    *lu;
1349 
1350   PetscFunctionBegin;
1351   lu = (Mat_Plapack*)F->spptr;
1352   ierr = PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots);CHKERRQ(ierr);
1353   F->ops->lufactornumeric  = MatLUFactorNumeric_MPIDense;
1354   PetscFunctionReturn(0);
1355 }
1356 
1357 EXTERN_C_BEGIN
1358 #undef __FUNCT__
1359 #define __FUNCT__ "MatFactorGetSolverPackage_mpidense_plapack"
1360 PetscErrorCode MatFactorGetSolverPackage_mpidense_plapack(Mat A,const MatSolverPackage *type)
1361 {
1362   PetscFunctionBegin;
1363   *type = MATSOLVERPLAPACK;
1364   PetscFunctionReturn(0);
1365 }
1366 EXTERN_C_END
1367 
1368 EXTERN_C_BEGIN
1369 #undef __FUNCT__
1370 #define __FUNCT__ "MatGetFactor_mpidense_plapack"
1371 PetscErrorCode MatGetFactor_mpidense_plapack(Mat A,MatFactorType ftype,Mat *F)
1372 {
1373   PetscErrorCode ierr;
1374   Mat_Plapack    *lu;
1375   PetscMPIInt    size;
1376   PetscInt       M=A->rmap->N;
1377 
1378   PetscFunctionBegin;
1379   /* Create the factorization matrix */
1380   ierr = MatCreate(((PetscObject)A)->comm,F);CHKERRQ(ierr);
1381   ierr = MatSetSizes(*F,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1382   ierr = MatSetType(*F,((PetscObject)A)->type_name);CHKERRQ(ierr);
1383   ierr = PetscNewLog(*F,Mat_Plapack,&lu);CHKERRQ(ierr);
1384   (*F)->spptr = (void*)lu;
1385 
1386   /* Set default Plapack parameters */
1387   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
1388   lu->nb = M/size;
1389   if (M - lu->nb*size) lu->nb++; /* without cyclic distribution */
1390 
1391   /* Set runtime options */
1392   ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"PLAPACK Options","Mat");CHKERRQ(ierr);
1393     ierr = PetscOptionsInt("-mat_plapack_nb","block size of template vector","None",lu->nb,&lu->nb,PETSC_NULL);CHKERRQ(ierr);
1394   PetscOptionsEnd();
1395 
1396   /* Create object distribution template */
1397   lu->templ = NULL;
1398   ierr = PLA_Temp_create(lu->nb, 0, &lu->templ);CHKERRQ(ierr);
1399 
1400   /* Set the datatype */
1401 #if defined(PETSC_USE_COMPLEX)
1402   lu->datatype = MPI_DOUBLE_COMPLEX;
1403 #else
1404   lu->datatype = MPI_DOUBLE;
1405 #endif
1406 
1407   ierr = PLA_Matrix_create(lu->datatype,M,A->cmap->N,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);CHKERRQ(ierr);
1408 
1409 
1410   lu->pla_solved     = PETSC_FALSE; /* MatSolve_Plapack() is called yet */
1411   lu->mstruct        = DIFFERENT_NONZERO_PATTERN;
1412 
1413   if (ftype == MAT_FACTOR_LU) {
1414     (*F)->ops->lufactorsymbolic = MatLUFactorSymbolic_MPIDense;
1415   } else if (ftype == MAT_FACTOR_CHOLESKY) {
1416     (*F)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MPIDense;
1417   } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"No incomplete factorizations for dense matrices");
1418   (*F)->factortype = ftype;
1419   ierr = PetscObjectComposeFunctionDynamic((PetscObject)(*F),"MatFactorGetSolverPackage_C","MatFactorGetSolverPackage_mpidense_plapack",MatFactorGetSolverPackage_mpidense_plapack);CHKERRQ(ierr);
1420   PetscFunctionReturn(0);
1421 }
1422 EXTERN_C_END
1423 #endif
1424 
1425 #undef __FUNCT__
1426 #define __FUNCT__ "MatGetFactor_mpidense_petsc"
1427 PetscErrorCode MatGetFactor_mpidense_petsc(Mat A,MatFactorType ftype,Mat *F)
1428 {
1429 #if defined(PETSC_HAVE_PLAPACK)
1430   PetscErrorCode ierr;
1431 #endif
1432 
1433   PetscFunctionBegin;
1434 #if defined(PETSC_HAVE_PLAPACK)
1435   ierr = MatGetFactor_mpidense_plapack(A,ftype,F);CHKERRQ(ierr);
1436 #else
1437   SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix format %s uses PLAPACK direct solver. Install PLAPACK",((PetscObject)A)->type_name);
1438 #endif
1439   PetscFunctionReturn(0);
1440 }
1441 
1442 #undef __FUNCT__
1443 #define __FUNCT__ "MatAXPY_MPIDense"
1444 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
1445 {
1446   PetscErrorCode ierr;
1447   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
1448 
1449   PetscFunctionBegin;
1450   ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr);
1451   PetscFunctionReturn(0);
1452 }
1453 
1454 #undef __FUNCT__
1455 #define __FUNCT__ "MatConjugate_MPIDense"
1456 PetscErrorCode  MatConjugate_MPIDense(Mat mat)
1457 {
1458   Mat_MPIDense   *a = (Mat_MPIDense *)mat->data;
1459   PetscErrorCode ierr;
1460 
1461   PetscFunctionBegin;
1462   ierr = MatConjugate(a->A);CHKERRQ(ierr);
1463   PetscFunctionReturn(0);
1464 }
1465 
1466 #undef __FUNCT__
1467 #define __FUNCT__ "MatRealPart_MPIDense"
1468 PetscErrorCode MatRealPart_MPIDense(Mat A)
1469 {
1470   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1471   PetscErrorCode ierr;
1472 
1473   PetscFunctionBegin;
1474   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1475   PetscFunctionReturn(0);
1476 }
1477 
1478 #undef __FUNCT__
1479 #define __FUNCT__ "MatImaginaryPart_MPIDense"
1480 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1481 {
1482   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1483   PetscErrorCode ierr;
1484 
1485   PetscFunctionBegin;
1486   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1487   PetscFunctionReturn(0);
1488 }
1489 
1490 /* -------------------------------------------------------------------*/
1491 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
1492        MatGetRow_MPIDense,
1493        MatRestoreRow_MPIDense,
1494        MatMult_MPIDense,
1495 /* 4*/ MatMultAdd_MPIDense,
1496        MatMultTranspose_MPIDense,
1497        MatMultTransposeAdd_MPIDense,
1498        0,
1499        0,
1500        0,
1501 /*10*/ 0,
1502        0,
1503        0,
1504        0,
1505        MatTranspose_MPIDense,
1506 /*15*/ MatGetInfo_MPIDense,
1507        MatEqual_MPIDense,
1508        MatGetDiagonal_MPIDense,
1509        MatDiagonalScale_MPIDense,
1510        MatNorm_MPIDense,
1511 /*20*/ MatAssemblyBegin_MPIDense,
1512        MatAssemblyEnd_MPIDense,
1513        MatSetOption_MPIDense,
1514        MatZeroEntries_MPIDense,
1515 /*24*/ MatZeroRows_MPIDense,
1516        0,
1517        0,
1518        0,
1519        0,
1520 /*29*/ MatSetUpPreallocation_MPIDense,
1521        0,
1522        0,
1523        MatGetArray_MPIDense,
1524        MatRestoreArray_MPIDense,
1525 /*34*/ MatDuplicate_MPIDense,
1526        0,
1527        0,
1528        0,
1529        0,
1530 /*39*/ MatAXPY_MPIDense,
1531        MatGetSubMatrices_MPIDense,
1532        0,
1533        MatGetValues_MPIDense,
1534        0,
1535 /*44*/ 0,
1536        MatScale_MPIDense,
1537        0,
1538        0,
1539        0,
1540 /*49*/ 0,
1541        0,
1542        0,
1543        0,
1544        0,
1545 /*54*/ 0,
1546        0,
1547        0,
1548        0,
1549        0,
1550 /*59*/ MatGetSubMatrix_MPIDense,
1551        MatDestroy_MPIDense,
1552        MatView_MPIDense,
1553        0,
1554        0,
1555 /*64*/ 0,
1556        0,
1557        0,
1558        0,
1559        0,
1560 /*69*/ 0,
1561        0,
1562        0,
1563        0,
1564        0,
1565 /*74*/ 0,
1566        0,
1567        0,
1568        0,
1569        0,
1570 /*79*/ 0,
1571        0,
1572        0,
1573        0,
1574 /*83*/ MatLoad_MPIDense,
1575        0,
1576        0,
1577        0,
1578        0,
1579        0,
1580 /*89*/
1581 #if defined(PETSC_HAVE_PLAPACK)
1582        MatMatMult_MPIDense_MPIDense,
1583        MatMatMultSymbolic_MPIDense_MPIDense,
1584        MatMatMultNumeric_MPIDense_MPIDense,
1585 #else
1586        0,
1587        0,
1588        0,
1589 #endif
1590        0,
1591        0,
1592 /*94*/ 0,
1593        0,
1594        0,
1595        0,
1596        0,
1597 /*99*/ 0,
1598        0,
1599        0,
1600        MatConjugate_MPIDense,
1601        0,
1602 /*104*/0,
1603        MatRealPart_MPIDense,
1604        MatImaginaryPart_MPIDense,
1605        0,
1606        0,
1607 /*109*/0,
1608        0,
1609        0,
1610        0,
1611        0,
1612 /*114*/0,
1613        0,
1614        0,
1615        0,
1616        0,
1617 /*119*/0,
1618        0,
1619        0,
1620        0
1621 };
1622 
1623 EXTERN_C_BEGIN
1624 #undef __FUNCT__
1625 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense"
1626 PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1627 {
1628   Mat_MPIDense   *a;
1629   PetscErrorCode ierr;
1630 
1631   PetscFunctionBegin;
1632   mat->preallocated = PETSC_TRUE;
1633   /* Note:  For now, when data is specified above, this assumes the user correctly
1634    allocates the local dense storage space.  We should add error checking. */
1635 
1636   a    = (Mat_MPIDense*)mat->data;
1637   ierr = PetscLayoutSetBlockSize(mat->rmap,1);CHKERRQ(ierr);
1638   ierr = PetscLayoutSetBlockSize(mat->cmap,1);CHKERRQ(ierr);
1639   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
1640   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
1641   a->nvec = mat->cmap->n;
1642 
1643   ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1644   ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr);
1645   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
1646   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1647   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1648   PetscFunctionReturn(0);
1649 }
1650 EXTERN_C_END
1651 
1652 /*MC
1653    MATSOLVERPLAPACK = "mpidense" - Parallel LU and Cholesky factorization for MATMPIDENSE matrices
1654 
1655   run ./configure with the option --download-plapack
1656 
1657 
1658   Options Database Keys:
1659 . -mat_plapack_nprows <n> - number of rows in processor partition
1660 . -mat_plapack_npcols <n> - number of columns in processor partition
1661 . -mat_plapack_nb <n> - block size of template vector
1662 . -mat_plapack_nb_alg <n> - algorithmic block size
1663 - -mat_plapack_ckerror <n> - error checking flag
1664 
1665    Level: intermediate
1666 
1667 .seealso: MatCreateMPIDense(), MATDENSE, MATSEQDENSE, PCFactorSetSolverPackage(), MatSolverPackage
1668 
1669 M*/
1670 
1671 EXTERN_C_BEGIN
1672 #undef __FUNCT__
1673 #define __FUNCT__ "MatCreate_MPIDense"
1674 PetscErrorCode  MatCreate_MPIDense(Mat mat)
1675 {
1676   Mat_MPIDense   *a;
1677   PetscErrorCode ierr;
1678 
1679   PetscFunctionBegin;
1680   ierr              = PetscNewLog(mat,Mat_MPIDense,&a);CHKERRQ(ierr);
1681   mat->data         = (void*)a;
1682   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1683 
1684   mat->insertmode = NOT_SET_VALUES;
1685   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&a->rank);CHKERRQ(ierr);
1686   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&a->size);CHKERRQ(ierr);
1687 
1688   /* build cache for off array entries formed */
1689   a->donotstash = PETSC_FALSE;
1690   ierr = MatStashCreate_Private(((PetscObject)mat)->comm,1,&mat->stash);CHKERRQ(ierr);
1691 
1692   /* stuff used for matrix vector multiply */
1693   a->lvec        = 0;
1694   a->Mvctx       = 0;
1695   a->roworiented = PETSC_TRUE;
1696 
1697   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
1698                                      "MatGetDiagonalBlock_MPIDense",
1699                                      MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr);
1700   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C",
1701                                      "MatMPIDenseSetPreallocation_MPIDense",
1702                                      MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1703   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",
1704                                      "MatMatMult_MPIAIJ_MPIDense",
1705                                       MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1706   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",
1707                                      "MatMatMultSymbolic_MPIAIJ_MPIDense",
1708                                       MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1709   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",
1710                                      "MatMatMultNumeric_MPIAIJ_MPIDense",
1711                                       MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1712   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetFactor_petsc_C",
1713                                      "MatGetFactor_mpidense_petsc",
1714                                       MatGetFactor_mpidense_petsc);CHKERRQ(ierr);
1715 #if defined(PETSC_HAVE_PLAPACK)
1716   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetFactor_plapack_C",
1717                                      "MatGetFactor_mpidense_plapack",
1718                                       MatGetFactor_mpidense_plapack);CHKERRQ(ierr);
1719   ierr = PetscPLAPACKInitializePackage(((PetscObject)mat)->comm);CHKERRQ(ierr);
1720 #endif
1721   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1722 
1723   PetscFunctionReturn(0);
1724 }
1725 EXTERN_C_END
1726 
1727 /*MC
1728    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1729 
1730    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1731    and MATMPIDENSE otherwise.
1732 
1733    Options Database Keys:
1734 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1735 
1736   Level: beginner
1737 
1738 
1739 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1740 M*/
1741 
1742 EXTERN_C_BEGIN
1743 #undef __FUNCT__
1744 #define __FUNCT__ "MatCreate_Dense"
1745 PetscErrorCode  MatCreate_Dense(Mat A)
1746 {
1747   PetscErrorCode ierr;
1748   PetscMPIInt    size;
1749 
1750   PetscFunctionBegin;
1751   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
1752   if (size == 1) {
1753     ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr);
1754   } else {
1755     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
1756   }
1757   PetscFunctionReturn(0);
1758 }
1759 EXTERN_C_END
1760 
1761 #undef __FUNCT__
1762 #define __FUNCT__ "MatMPIDenseSetPreallocation"
1763 /*@C
1764    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1765 
1766    Not collective
1767 
1768    Input Parameters:
1769 .  A - the matrix
1770 -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
1771    to control all matrix memory allocation.
1772 
1773    Notes:
1774    The dense format is fully compatible with standard Fortran 77
1775    storage by columns.
1776 
1777    The data input variable is intended primarily for Fortran programmers
1778    who wish to allocate their own matrix memory space.  Most users should
1779    set data=PETSC_NULL.
1780 
1781    Level: intermediate
1782 
1783 .keywords: matrix,dense, parallel
1784 
1785 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1786 @*/
1787 PetscErrorCode  MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data)
1788 {
1789   PetscErrorCode ierr;
1790 
1791   PetscFunctionBegin;
1792   ierr = PetscTryMethod(mat,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar *),(mat,data));CHKERRQ(ierr);
1793   PetscFunctionReturn(0);
1794 }
1795 
1796 #undef __FUNCT__
1797 #define __FUNCT__ "MatCreateMPIDense"
1798 /*@C
1799    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
1800 
1801    Collective on MPI_Comm
1802 
1803    Input Parameters:
1804 +  comm - MPI communicator
1805 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1806 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1807 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1808 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1809 -  data - optional location of matrix data.  Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1810    to control all matrix memory allocation.
1811 
1812    Output Parameter:
1813 .  A - the matrix
1814 
1815    Notes:
1816    The dense format is fully compatible with standard Fortran 77
1817    storage by columns.
1818 
1819    The data input variable is intended primarily for Fortran programmers
1820    who wish to allocate their own matrix memory space.  Most users should
1821    set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users).
1822 
1823    The user MUST specify either the local or global matrix dimensions
1824    (possibly both).
1825 
1826    Level: intermediate
1827 
1828 .keywords: matrix,dense, parallel
1829 
1830 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1831 @*/
1832 PetscErrorCode  MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1833 {
1834   PetscErrorCode ierr;
1835   PetscMPIInt    size;
1836 
1837   PetscFunctionBegin;
1838   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1839   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1840   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1841   if (size > 1) {
1842     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1843     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1844   } else {
1845     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1846     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1847   }
1848   PetscFunctionReturn(0);
1849 }
1850 
1851 #undef __FUNCT__
1852 #define __FUNCT__ "MatDuplicate_MPIDense"
1853 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1854 {
1855   Mat            mat;
1856   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
1857   PetscErrorCode ierr;
1858 
1859   PetscFunctionBegin;
1860   *newmat       = 0;
1861   ierr = MatCreate(((PetscObject)A)->comm,&mat);CHKERRQ(ierr);
1862   ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1863   ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1864   a                 = (Mat_MPIDense*)mat->data;
1865   ierr              = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1866 
1867   mat->factortype   = A->factortype;
1868   mat->assembled    = PETSC_TRUE;
1869   mat->preallocated = PETSC_TRUE;
1870 
1871   a->size           = oldmat->size;
1872   a->rank           = oldmat->rank;
1873   mat->insertmode   = NOT_SET_VALUES;
1874   a->nvec           = oldmat->nvec;
1875   a->donotstash     = oldmat->donotstash;
1876 
1877   ierr = PetscLayoutCopy(A->rmap,&mat->rmap);CHKERRQ(ierr);
1878   ierr = PetscLayoutCopy(A->cmap,&mat->cmap);CHKERRQ(ierr);
1879 
1880   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1881   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1882   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1883 
1884   *newmat = mat;
1885   PetscFunctionReturn(0);
1886 }
1887 
1888 #undef __FUNCT__
1889 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile"
1890 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat,PetscInt sizesset)
1891 {
1892   PetscErrorCode ierr;
1893   PetscMPIInt    rank,size;
1894   PetscInt       *rowners,i,m,nz,j;
1895   PetscScalar    *array,*vals,*vals_ptr;
1896   MPI_Status     status;
1897 
1898   PetscFunctionBegin;
1899   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1900   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1901 
1902   /* determine ownership of all rows */
1903   if (newmat->rmap->n < 0) m          = M/size + ((M % size) > rank);
1904   else m = newmat->rmap->n;
1905   ierr       = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1906   ierr       = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
1907   rowners[0] = 0;
1908   for (i=2; i<=size; i++) {
1909     rowners[i] += rowners[i-1];
1910   }
1911 
1912   if (!sizesset) {
1913     ierr = MatSetSizes(newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1914   }
1915   ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr);
1916   ierr = MatGetArray(newmat,&array);CHKERRQ(ierr);
1917 
1918   if (!rank) {
1919     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1920 
1921     /* read in my part of the matrix numerical values  */
1922     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1923 
1924     /* insert into matrix-by row (this is why cannot directly read into array */
1925     vals_ptr = vals;
1926     for (i=0; i<m; i++) {
1927       for (j=0; j<N; j++) {
1928         array[i + j*m] = *vals_ptr++;
1929       }
1930     }
1931 
1932     /* read in other processors and ship out */
1933     for (i=1; i<size; i++) {
1934       nz   = (rowners[i+1] - rowners[i])*N;
1935       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1936       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr);
1937     }
1938   } else {
1939     /* receive numeric values */
1940     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1941 
1942     /* receive message of values*/
1943     ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm,&status);CHKERRQ(ierr);
1944 
1945     /* insert into matrix-by row (this is why cannot directly read into array */
1946     vals_ptr = vals;
1947     for (i=0; i<m; i++) {
1948       for (j=0; j<N; j++) {
1949         array[i + j*m] = *vals_ptr++;
1950       }
1951     }
1952   }
1953   ierr = PetscFree(rowners);CHKERRQ(ierr);
1954   ierr = PetscFree(vals);CHKERRQ(ierr);
1955   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1956   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1957   PetscFunctionReturn(0);
1958 }
1959 
1960 #undef __FUNCT__
1961 #define __FUNCT__ "MatLoad_MPIDense"
1962 PetscErrorCode MatLoad_MPIDense(Mat newmat,PetscViewer viewer)
1963 {
1964   PetscScalar    *vals,*svals;
1965   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1966   MPI_Status     status;
1967   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz;
1968   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1969   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1970   PetscInt       i,nz,j,rstart,rend,sizesset=1,grows,gcols;
1971   int            fd;
1972   PetscErrorCode ierr;
1973 
1974   PetscFunctionBegin;
1975   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1976   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1977   if (!rank) {
1978     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1979     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1980     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1981   }
1982   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
1983 
1984   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1985   M = header[1]; N = header[2]; nz = header[3];
1986 
1987   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
1988   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
1989   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
1990 
1991   /* If global sizes are set, check if they are consistent with that given in the file */
1992   if (sizesset) {
1993     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
1994   }
1995   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
1996   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
1997 
1998   /*
1999        Handle case where matrix is stored on disk as a dense matrix
2000   */
2001   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
2002     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat,sizesset);CHKERRQ(ierr);
2003     PetscFunctionReturn(0);
2004   }
2005 
2006   /* determine ownership of all rows */
2007   if (newmat->rmap->n < 0) m          = PetscMPIIntCast(M/size + ((M % size) > rank));
2008   else m = PetscMPIIntCast(newmat->rmap->n);
2009   ierr       = PetscMalloc((size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr);
2010   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2011   rowners[0] = 0;
2012   for (i=2; i<=size; i++) {
2013     rowners[i] += rowners[i-1];
2014   }
2015   rstart = rowners[rank];
2016   rend   = rowners[rank+1];
2017 
2018   /* distribute row lengths to all processors */
2019   ierr    = PetscMalloc2(rend-rstart,PetscInt,&ourlens,rend-rstart,PetscInt,&offlens);CHKERRQ(ierr);
2020   if (!rank) {
2021     ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2022     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2023     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
2024     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
2025     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
2026     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2027   } else {
2028     ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
2029   }
2030 
2031   if (!rank) {
2032     /* calculate the number of nonzeros on each processor */
2033     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2034     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2035     for (i=0; i<size; i++) {
2036       for (j=rowners[i]; j< rowners[i+1]; j++) {
2037         procsnz[i] += rowlengths[j];
2038       }
2039     }
2040     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2041 
2042     /* determine max buffer needed and allocate it */
2043     maxnz = 0;
2044     for (i=0; i<size; i++) {
2045       maxnz = PetscMax(maxnz,procsnz[i]);
2046     }
2047     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2048 
2049     /* read in my part of the matrix column indices  */
2050     nz = procsnz[0];
2051     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2052     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2053 
2054     /* read in every one elses and ship off */
2055     for (i=1; i<size; i++) {
2056       nz   = procsnz[i];
2057       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2058       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2059     }
2060     ierr = PetscFree(cols);CHKERRQ(ierr);
2061   } else {
2062     /* determine buffer space needed for message */
2063     nz = 0;
2064     for (i=0; i<m; i++) {
2065       nz += ourlens[i];
2066     }
2067     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2068 
2069     /* receive message of column indices*/
2070     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2071     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2072     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2073   }
2074 
2075   /* loop over local rows, determining number of off diagonal entries */
2076   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2077   jj = 0;
2078   for (i=0; i<m; i++) {
2079     for (j=0; j<ourlens[i]; j++) {
2080       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
2081       jj++;
2082     }
2083   }
2084 
2085   /* create our matrix */
2086   for (i=0; i<m; i++) {
2087     ourlens[i] -= offlens[i];
2088   }
2089 
2090   if (!sizesset) {
2091     ierr = MatSetSizes(newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
2092   }
2093   ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr);
2094   for (i=0; i<m; i++) {
2095     ourlens[i] += offlens[i];
2096   }
2097 
2098   if (!rank) {
2099     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2100 
2101     /* read in my part of the matrix numerical values  */
2102     nz = procsnz[0];
2103     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2104 
2105     /* insert into matrix */
2106     jj      = rstart;
2107     smycols = mycols;
2108     svals   = vals;
2109     for (i=0; i<m; i++) {
2110       ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2111       smycols += ourlens[i];
2112       svals   += ourlens[i];
2113       jj++;
2114     }
2115 
2116     /* read in other processors and ship out */
2117     for (i=1; i<size; i++) {
2118       nz   = procsnz[i];
2119       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2120       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2121     }
2122     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2123   } else {
2124     /* receive numeric values */
2125     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2126 
2127     /* receive message of values*/
2128     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
2129     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2130     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2131 
2132     /* insert into matrix */
2133     jj      = rstart;
2134     smycols = mycols;
2135     svals   = vals;
2136     for (i=0; i<m; i++) {
2137       ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2138       smycols += ourlens[i];
2139       svals   += ourlens[i];
2140       jj++;
2141     }
2142   }
2143   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2144   ierr = PetscFree(vals);CHKERRQ(ierr);
2145   ierr = PetscFree(mycols);CHKERRQ(ierr);
2146   ierr = PetscFree(rowners);CHKERRQ(ierr);
2147 
2148   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2149   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2150   PetscFunctionReturn(0);
2151 }
2152 
2153 #undef __FUNCT__
2154 #define __FUNCT__ "MatEqual_MPIDense"
2155 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool  *flag)
2156 {
2157   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
2158   Mat            a,b;
2159   PetscBool      flg;
2160   PetscErrorCode ierr;
2161 
2162   PetscFunctionBegin;
2163   a = matA->A;
2164   b = matB->A;
2165   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
2166   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
2167   PetscFunctionReturn(0);
2168 }
2169 
2170 #if defined(PETSC_HAVE_PLAPACK)
2171 
2172 #undef __FUNCT__
2173 #define __FUNCT__ "PetscPLAPACKFinalizePackage"
2174 /*@C
2175   PetscPLAPACKFinalizePackage - This function destroys everything in the Petsc interface to PLAPACK.
2176   Level: developer
2177 
2178 .keywords: Petsc, destroy, package, PLAPACK
2179 .seealso: PetscFinalize()
2180 @*/
2181 PetscErrorCode  PetscPLAPACKFinalizePackage(void)
2182 {
2183   PetscErrorCode ierr;
2184 
2185   PetscFunctionBegin;
2186   ierr = PLA_Finalize();CHKERRQ(ierr);
2187   PetscFunctionReturn(0);
2188 }
2189 
2190 #undef __FUNCT__
2191 #define __FUNCT__ "PetscPLAPACKInitializePackage"
2192 /*@C
2193   PetscPLAPACKInitializePackage - This function initializes everything in the Petsc interface to PLAPACK. It is
2194   called from MatCreate_MPIDense() the first time an MPI dense matrix is called.
2195 
2196   Input Parameter:
2197 .   comm - the communicator the matrix lives on
2198 
2199   Level: developer
2200 
2201    Notes: PLAPACK does not have a good fit with MPI communicators; all (parallel) PLAPACK objects have to live in the
2202   same communicator (because there is some global state that is initialized and used for all matrices). In addition if
2203   PLAPACK is initialized (that is the initial matrices created) are on subcommunicators of MPI_COMM_WORLD, these subcommunicators
2204   cannot overlap.
2205 
2206 .keywords: Petsc, initialize, package, PLAPACK
2207 .seealso: PetscSysInitializePackage(), PetscInitialize()
2208 @*/
2209 PetscErrorCode  PetscPLAPACKInitializePackage(MPI_Comm comm)
2210 {
2211   PetscMPIInt    size;
2212   PetscErrorCode ierr;
2213 
2214   PetscFunctionBegin;
2215   if (!PLA_Initialized(PETSC_NULL)) {
2216 
2217     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2218     Plapack_nprows = 1;
2219     Plapack_npcols = size;
2220 
2221     ierr = PetscOptionsBegin(comm,PETSC_NULL,"PLAPACK Options","Mat");CHKERRQ(ierr);
2222       ierr = PetscOptionsInt("-mat_plapack_nprows","row dimension of 2D processor mesh","None",Plapack_nprows,&Plapack_nprows,PETSC_NULL);CHKERRQ(ierr);
2223       ierr = PetscOptionsInt("-mat_plapack_npcols","column dimension of 2D processor mesh","None",Plapack_npcols,&Plapack_npcols,PETSC_NULL);CHKERRQ(ierr);
2224 #if defined(PETSC_USE_DEBUG)
2225       Plapack_ierror = 3;
2226 #else
2227       Plapack_ierror = 0;
2228 #endif
2229       ierr = PetscOptionsInt("-mat_plapack_ckerror","error checking flag","None",Plapack_ierror,&Plapack_ierror,PETSC_NULL);CHKERRQ(ierr);
2230       if (Plapack_ierror){
2231 	ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_TRUE,PETSC_TRUE,PETSC_FALSE );CHKERRQ(ierr);
2232       } else {
2233 	ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_FALSE,PETSC_FALSE,PETSC_FALSE );CHKERRQ(ierr);
2234       }
2235 
2236       Plapack_nb_alg = 0;
2237       ierr = PetscOptionsInt("-mat_plapack_nb_alg","algorithmic block size","None",Plapack_nb_alg,&Plapack_nb_alg,PETSC_NULL);CHKERRQ(ierr);
2238       if (Plapack_nb_alg) {
2239         ierr = pla_Environ_set_nb_alg (PLA_OP_ALL_ALG,Plapack_nb_alg);CHKERRQ(ierr);
2240       }
2241     PetscOptionsEnd();
2242 
2243     ierr = PLA_Comm_1D_to_2D(comm,Plapack_nprows,Plapack_npcols,&Plapack_comm_2d);CHKERRQ(ierr);
2244     ierr = PLA_Init(Plapack_comm_2d);CHKERRQ(ierr);
2245     ierr = PetscRegisterFinalize(PetscPLAPACKFinalizePackage);CHKERRQ(ierr);
2246   }
2247   PetscFunctionReturn(0);
2248 }
2249 
2250 #endif
2251