xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision fcb1c9af1a39adbb75e1011fa083d9d197867fec)
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4 */
5 
6 
7 #include <../src/mat/impls/dense/mpi/mpidense.h>    /*I   "petscmat.h"  I*/
8 #if defined(PETSC_HAVE_PLAPACK)
9 static PetscMPIInt Plapack_nprows,Plapack_npcols,Plapack_ierror,Plapack_nb_alg;
10 static MPI_Comm Plapack_comm_2d;
11 EXTERN_C_BEGIN
12 #include <PLA.h>
13 EXTERN_C_END
14 
15 typedef struct {
16   PLA_Obj        A,pivots;
17   PLA_Template   templ;
18   MPI_Datatype   datatype;
19   PetscInt       nb,rstart;
20   VecScatter     ctx;
21   IS             is_pla,is_petsc;
22   PetscBool      pla_solved;
23   MatStructure   mstruct;
24 } Mat_Plapack;
25 #endif
26 
27 #undef __FUNCT__
28 #define __FUNCT__ "MatDenseGetLocalMatrix"
29 /*@
30 
31       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
32               matrix that represents the operator. For sequential matrices it returns itself.
33 
34     Input Parameter:
35 .      A - the Seq or MPI dense matrix
36 
37     Output Parameter:
38 .      B - the inner matrix
39 
40     Level: intermediate
41 
42 @*/
43 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
44 {
45   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
46   PetscErrorCode ierr;
47   PetscBool      flg;
48 
49   PetscFunctionBegin;
50   ierr = PetscTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
51   if (flg) {
52     *B = mat->A;
53   } else {
54     *B = A;
55   }
56   PetscFunctionReturn(0);
57 }
58 
59 #undef __FUNCT__
60 #define __FUNCT__ "MatGetRow_MPIDense"
61 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
62 {
63   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
64   PetscErrorCode ierr;
65   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
66 
67   PetscFunctionBegin;
68   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
69   lrow = row - rstart;
70   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr);
71   PetscFunctionReturn(0);
72 }
73 
74 #undef __FUNCT__
75 #define __FUNCT__ "MatRestoreRow_MPIDense"
76 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
77 {
78   PetscErrorCode ierr;
79 
80   PetscFunctionBegin;
81   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
82   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
83   PetscFunctionReturn(0);
84 }
85 
86 EXTERN_C_BEGIN
87 #undef __FUNCT__
88 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense"
89 PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,PetscBool  *iscopy,MatReuse reuse,Mat *B)
90 {
91   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
92   PetscErrorCode ierr;
93   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
94   PetscScalar    *array;
95   MPI_Comm       comm;
96 
97   PetscFunctionBegin;
98   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
99 
100   /* The reuse aspect is not implemented efficiently */
101   if (reuse) { ierr = MatDestroy(B);CHKERRQ(ierr);}
102 
103   ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
104   ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr);
105   ierr = MatCreate(comm,B);CHKERRQ(ierr);
106   ierr = MatSetSizes(*B,m,m,m,m);CHKERRQ(ierr);
107   ierr = MatSetType(*B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
108   ierr = MatSeqDenseSetPreallocation(*B,array+m*rstart);CHKERRQ(ierr);
109   ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr);
110   ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
111   ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
112 
113   *iscopy = PETSC_TRUE;
114   PetscFunctionReturn(0);
115 }
116 EXTERN_C_END
117 
118 #undef __FUNCT__
119 #define __FUNCT__ "MatSetValues_MPIDense"
120 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
121 {
122   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
123   PetscErrorCode ierr;
124   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
125   PetscBool      roworiented = A->roworiented;
126 
127   PetscFunctionBegin;
128   if (v) PetscValidScalarPointer(v,6);
129   for (i=0; i<m; i++) {
130     if (idxm[i] < 0) continue;
131     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
132     if (idxm[i] >= rstart && idxm[i] < rend) {
133       row = idxm[i] - rstart;
134       if (roworiented) {
135         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
136       } else {
137         for (j=0; j<n; j++) {
138           if (idxn[j] < 0) continue;
139           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
140           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
141         }
142       }
143     } else {
144       if (!A->donotstash) {
145         if (roworiented) {
146           ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
147         } else {
148           ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
149         }
150       }
151     }
152   }
153   PetscFunctionReturn(0);
154 }
155 
156 #undef __FUNCT__
157 #define __FUNCT__ "MatGetValues_MPIDense"
158 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
159 {
160   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
161   PetscErrorCode ierr;
162   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
163 
164   PetscFunctionBegin;
165   for (i=0; i<m; i++) {
166     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
167     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
168     if (idxm[i] >= rstart && idxm[i] < rend) {
169       row = idxm[i] - rstart;
170       for (j=0; j<n; j++) {
171         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
172         if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
173         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
174       }
175     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
176   }
177   PetscFunctionReturn(0);
178 }
179 
180 #undef __FUNCT__
181 #define __FUNCT__ "MatGetArray_MPIDense"
182 PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[])
183 {
184   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
185   PetscErrorCode ierr;
186 
187   PetscFunctionBegin;
188   ierr = MatGetArray(a->A,array);CHKERRQ(ierr);
189   PetscFunctionReturn(0);
190 }
191 
192 #undef __FUNCT__
193 #define __FUNCT__ "MatGetSubMatrix_MPIDense"
194 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
195 {
196   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data,*newmatd;
197   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
198   PetscErrorCode ierr;
199   PetscInt       i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
200   const PetscInt *irow,*icol;
201   PetscScalar    *av,*bv,*v = lmat->v;
202   Mat            newmat;
203   IS             iscol_local;
204 
205   PetscFunctionBegin;
206   ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
207   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
208   ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr);
209   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
210   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
211   ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */
212 
213   /* No parallel redistribution currently supported! Should really check each index set
214      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
215      original matrix! */
216 
217   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
218   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
219 
220   /* Check submatrix call */
221   if (scall == MAT_REUSE_MATRIX) {
222     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
223     /* Really need to test rows and column sizes! */
224     newmat = *B;
225   } else {
226     /* Create and fill new matrix */
227     ierr = MatCreate(((PetscObject)A)->comm,&newmat);CHKERRQ(ierr);
228     ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
229     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
230     ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr);
231   }
232 
233   /* Now extract the data pointers and do the copy, column at a time */
234   newmatd = (Mat_MPIDense*)newmat->data;
235   bv      = ((Mat_SeqDense *)newmatd->A->data)->v;
236 
237   for (i=0; i<Ncols; i++) {
238     av = v + ((Mat_SeqDense *)mat->A->data)->lda*icol[i];
239     for (j=0; j<nrows; j++) {
240       *bv++ = av[irow[j] - rstart];
241     }
242   }
243 
244   /* Assemble the matrices so that the correct flags are set */
245   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
246   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
247 
248   /* Free work space */
249   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
250   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
251   *B = newmat;
252   PetscFunctionReturn(0);
253 }
254 
255 #undef __FUNCT__
256 #define __FUNCT__ "MatRestoreArray_MPIDense"
257 PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[])
258 {
259   PetscFunctionBegin;
260   PetscFunctionReturn(0);
261 }
262 
263 #undef __FUNCT__
264 #define __FUNCT__ "MatAssemblyBegin_MPIDense"
265 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
266 {
267   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
268   MPI_Comm       comm = ((PetscObject)mat)->comm;
269   PetscErrorCode ierr;
270   PetscInt       nstash,reallocs;
271   InsertMode     addv;
272 
273   PetscFunctionBegin;
274   /* make sure all processors are either in INSERTMODE or ADDMODE */
275   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
276   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
277   mat->insertmode = addv; /* in case this processor had no cache */
278 
279   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
280   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
281   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
282   PetscFunctionReturn(0);
283 }
284 
285 #undef __FUNCT__
286 #define __FUNCT__ "MatAssemblyEnd_MPIDense"
287 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
288 {
289   Mat_MPIDense    *mdn=(Mat_MPIDense*)mat->data;
290   PetscErrorCode  ierr;
291   PetscInt        i,*row,*col,flg,j,rstart,ncols;
292   PetscMPIInt     n;
293   PetscScalar     *val;
294   InsertMode      addv=mat->insertmode;
295 
296   PetscFunctionBegin;
297   /*  wait on receives */
298   while (1) {
299     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
300     if (!flg) break;
301 
302     for (i=0; i<n;) {
303       /* Now identify the consecutive vals belonging to the same row */
304       for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
305       if (j < n) ncols = j-i;
306       else       ncols = n-i;
307       /* Now assemble all these values with a single function call */
308       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
309       i = j;
310     }
311   }
312   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
313 
314   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
315   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
316 
317   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
318     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
319   }
320   PetscFunctionReturn(0);
321 }
322 
323 #undef __FUNCT__
324 #define __FUNCT__ "MatZeroEntries_MPIDense"
325 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
326 {
327   PetscErrorCode ierr;
328   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
329 
330   PetscFunctionBegin;
331   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
332   PetscFunctionReturn(0);
333 }
334 
335 /* the code does not do the diagonal entries correctly unless the
336    matrix is square and the column and row owerships are identical.
337    This is a BUG. The only way to fix it seems to be to access
338    mdn->A and mdn->B directly and not through the MatZeroRows()
339    routine.
340 */
341 #undef __FUNCT__
342 #define __FUNCT__ "MatZeroRows_MPIDense"
343 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
344 {
345   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
346   PetscErrorCode    ierr;
347   PetscInt          i,*owners = A->rmap->range;
348   PetscInt          *nprocs,j,idx,nsends;
349   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
350   PetscInt          *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source;
351   PetscInt          *lens,*lrows,*values;
352   PetscMPIInt       n,imdex,rank = l->rank,size = l->size;
353   MPI_Comm          comm = ((PetscObject)A)->comm;
354   MPI_Request       *send_waits,*recv_waits;
355   MPI_Status        recv_status,*send_status;
356   PetscBool         found;
357   const PetscScalar *xx;
358   PetscScalar       *bb;
359 
360   PetscFunctionBegin;
361   /*  first count number of contributors to each processor */
362   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
363   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
364   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
365   for (i=0; i<N; i++) {
366     idx = rows[i];
367     found = PETSC_FALSE;
368     for (j=0; j<size; j++) {
369       if (idx >= owners[j] && idx < owners[j+1]) {
370         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
371       }
372     }
373     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
374   }
375   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
376 
377   /* inform other processors of number of messages and max length*/
378   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
379 
380   /* post receives:   */
381   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
382   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
383   for (i=0; i<nrecvs; i++) {
384     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
385   }
386 
387   /* do sends:
388       1) starts[i] gives the starting index in svalues for stuff going to
389          the ith processor
390   */
391   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
392   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
393   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
394   starts[0]  = 0;
395   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
396   for (i=0; i<N; i++) {
397     svalues[starts[owner[i]]++] = rows[i];
398   }
399 
400   starts[0] = 0;
401   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
402   count = 0;
403   for (i=0; i<size; i++) {
404     if (nprocs[2*i+1]) {
405       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
406     }
407   }
408   ierr = PetscFree(starts);CHKERRQ(ierr);
409 
410   base = owners[rank];
411 
412   /*  wait on receives */
413   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
414   count  = nrecvs;
415   slen   = 0;
416   while (count) {
417     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
418     /* unpack receives into our local space */
419     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
420     source[imdex]  = recv_status.MPI_SOURCE;
421     lens[imdex]    = n;
422     slen += n;
423     count--;
424   }
425   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
426 
427   /* move the data into the send scatter */
428   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
429   count = 0;
430   for (i=0; i<nrecvs; i++) {
431     values = rvalues + i*nmax;
432     for (j=0; j<lens[i]; j++) {
433       lrows[count++] = values[j] - base;
434     }
435   }
436   ierr = PetscFree(rvalues);CHKERRQ(ierr);
437   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
438   ierr = PetscFree(owner);CHKERRQ(ierr);
439   ierr = PetscFree(nprocs);CHKERRQ(ierr);
440 
441   /* fix right hand side if needed */
442   if (x && b) {
443     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
444     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
445     for (i=0; i<slen; i++) {
446       bb[lrows[i]] = diag*xx[lrows[i]];
447     }
448     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
449     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
450   }
451 
452   /* actually zap the local rows */
453   ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
454   ierr = PetscFree(lrows);CHKERRQ(ierr);
455 
456   /* wait on sends */
457   if (nsends) {
458     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
459     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
460     ierr = PetscFree(send_status);CHKERRQ(ierr);
461   }
462   ierr = PetscFree(send_waits);CHKERRQ(ierr);
463   ierr = PetscFree(svalues);CHKERRQ(ierr);
464 
465   PetscFunctionReturn(0);
466 }
467 
468 #undef __FUNCT__
469 #define __FUNCT__ "MatMult_MPIDense"
470 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
471 {
472   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
473   PetscErrorCode ierr;
474 
475   PetscFunctionBegin;
476   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
477   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
478   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
479   PetscFunctionReturn(0);
480 }
481 
482 #undef __FUNCT__
483 #define __FUNCT__ "MatMultAdd_MPIDense"
484 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
485 {
486   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
487   PetscErrorCode ierr;
488 
489   PetscFunctionBegin;
490   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
491   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
492   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
493   PetscFunctionReturn(0);
494 }
495 
496 #undef __FUNCT__
497 #define __FUNCT__ "MatMultTranspose_MPIDense"
498 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
499 {
500   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
501   PetscErrorCode ierr;
502   PetscScalar    zero = 0.0;
503 
504   PetscFunctionBegin;
505   ierr = VecSet(yy,zero);CHKERRQ(ierr);
506   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
507   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
508   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
509   PetscFunctionReturn(0);
510 }
511 
512 #undef __FUNCT__
513 #define __FUNCT__ "MatMultTransposeAdd_MPIDense"
514 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
515 {
516   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
517   PetscErrorCode ierr;
518 
519   PetscFunctionBegin;
520   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
521   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
522   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
523   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
524   PetscFunctionReturn(0);
525 }
526 
527 #undef __FUNCT__
528 #define __FUNCT__ "MatGetDiagonal_MPIDense"
529 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
530 {
531   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
532   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
533   PetscErrorCode ierr;
534   PetscInt       len,i,n,m = A->rmap->n,radd;
535   PetscScalar    *x,zero = 0.0;
536 
537   PetscFunctionBegin;
538   ierr = VecSet(v,zero);CHKERRQ(ierr);
539   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
540   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
541   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
542   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
543   radd = A->rmap->rstart*m;
544   for (i=0; i<len; i++) {
545     x[i] = aloc->v[radd + i*m + i];
546   }
547   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
548   PetscFunctionReturn(0);
549 }
550 
551 #undef __FUNCT__
552 #define __FUNCT__ "MatDestroy_MPIDense"
553 PetscErrorCode MatDestroy_MPIDense(Mat mat)
554 {
555   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
556   PetscErrorCode ierr;
557 #if defined(PETSC_HAVE_PLAPACK)
558   Mat_Plapack   *lu=(Mat_Plapack*)mat->spptr;
559 #endif
560 
561   PetscFunctionBegin;
562 
563 #if defined(PETSC_USE_LOG)
564   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
565 #endif
566   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
567   ierr = MatDestroy(&mdn->A);CHKERRQ(ierr);
568   ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr);
569   ierr = VecScatterDestroy(&mdn->Mvctx);CHKERRQ(ierr);
570 #if defined(PETSC_HAVE_PLAPACK)
571   if (lu) {
572     ierr = PLA_Obj_free(&lu->A);CHKERRQ(ierr);
573     ierr = PLA_Obj_free (&lu->pivots);CHKERRQ(ierr);
574     ierr = PLA_Temp_free(&lu->templ);CHKERRQ(ierr);
575     ierr = ISDestroy(&lu->is_pla);CHKERRQ(ierr);
576     ierr = ISDestroy(&lu->is_petsc);CHKERRQ(ierr);
577     ierr = VecScatterDestroy(&lu->ctx);CHKERRQ(ierr);
578   }
579   ierr = PetscFree(mat->spptr);CHKERRQ(ierr);
580 #endif
581 
582   ierr = PetscFree(mat->data);CHKERRQ(ierr);
583   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
584   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
585   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
586   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
587   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
588   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
589   PetscFunctionReturn(0);
590 }
591 
592 #undef __FUNCT__
593 #define __FUNCT__ "MatView_MPIDense_Binary"
594 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
595 {
596   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
597   PetscErrorCode    ierr;
598   PetscViewerFormat format;
599   int               fd;
600   PetscInt          header[4],mmax,N = mat->cmap->N,i,j,m,k;
601   PetscMPIInt       rank,tag  = ((PetscObject)viewer)->tag,size;
602   PetscScalar       *work,*v,*vv;
603   Mat_SeqDense      *a = (Mat_SeqDense*)mdn->A->data;
604   MPI_Status        status;
605 
606   PetscFunctionBegin;
607   if (mdn->size == 1) {
608     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
609   } else {
610     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
611     ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
612     ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
613 
614     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
615     if (format == PETSC_VIEWER_NATIVE) {
616 
617       if (!rank) {
618         /* store the matrix as a dense matrix */
619         header[0] = MAT_FILE_CLASSID;
620         header[1] = mat->rmap->N;
621         header[2] = N;
622         header[3] = MATRIX_BINARY_FORMAT_DENSE;
623         ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
624 
625         /* get largest work array needed for transposing array */
626         mmax = mat->rmap->n;
627         for (i=1; i<size; i++) {
628           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
629         }
630         ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&work);CHKERRQ(ierr);
631 
632         /* write out local array, by rows */
633         m    = mat->rmap->n;
634         v    = a->v;
635         for (j=0; j<N; j++) {
636           for (i=0; i<m; i++) {
637             work[j + i*N] = *v++;
638           }
639         }
640         ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
641         /* get largest work array to receive messages from other processes, excludes process zero */
642         mmax = 0;
643         for (i=1; i<size; i++) {
644           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
645         }
646         ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&vv);CHKERRQ(ierr);
647         for(k = 1; k < size; k++) {
648           v    = vv;
649           m    = mat->rmap->range[k+1] - mat->rmap->range[k];
650           ierr = MPI_Recv(v,m*N,MPIU_SCALAR,k,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
651 
652           for(j = 0; j < N; j++) {
653             for(i = 0; i < m; i++) {
654               work[j + i*N] = *v++;
655             }
656           }
657           ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
658         }
659         ierr = PetscFree(work);CHKERRQ(ierr);
660         ierr = PetscFree(vv);CHKERRQ(ierr);
661       } else {
662         ierr = MPI_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
663       }
664     } else {
665       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerSetFormat(viewer,PETSC_VIEWER_NATIVE)");
666     }
667   }
668   PetscFunctionReturn(0);
669 }
670 
671 #undef __FUNCT__
672 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket"
673 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
674 {
675   Mat_MPIDense          *mdn = (Mat_MPIDense*)mat->data;
676   PetscErrorCode        ierr;
677   PetscMPIInt           size = mdn->size,rank = mdn->rank;
678   const PetscViewerType vtype;
679   PetscBool             iascii,isdraw;
680   PetscViewer           sviewer;
681   PetscViewerFormat     format;
682 #if defined(PETSC_HAVE_PLAPACK)
683   Mat_Plapack           *lu;
684 #endif
685 
686   PetscFunctionBegin;
687   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
688   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
689   if (iascii) {
690     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
691     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
692     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
693       MatInfo info;
694       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
695       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
696       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
697       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
698       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
699 #if defined(PETSC_HAVE_PLAPACK)
700       ierr = PetscViewerASCIIPrintf(viewer,"PLAPACK run parameters:\n");CHKERRQ(ierr);
701       ierr = PetscViewerASCIIPrintf(viewer,"  Processor mesh: nprows %d, npcols %d\n",Plapack_nprows, Plapack_npcols);CHKERRQ(ierr);
702       ierr = PetscViewerASCIIPrintf(viewer,"  Error checking: %d\n",Plapack_ierror);CHKERRQ(ierr);
703       ierr = PetscViewerASCIIPrintf(viewer,"  Algorithmic block size: %d\n",Plapack_nb_alg);CHKERRQ(ierr);
704       if (mat->factortype){
705         lu=(Mat_Plapack*)(mat->spptr);
706         ierr = PetscViewerASCIIPrintf(viewer,"  Distr. block size nb: %d \n",lu->nb);CHKERRQ(ierr);
707       }
708 #else
709       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
710 #endif
711       PetscFunctionReturn(0);
712     } else if (format == PETSC_VIEWER_ASCII_INFO) {
713       PetscFunctionReturn(0);
714     }
715   } else if (isdraw) {
716     PetscDraw  draw;
717     PetscBool  isnull;
718 
719     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
720     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
721     if (isnull) PetscFunctionReturn(0);
722   }
723 
724   if (size == 1) {
725     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
726   } else {
727     /* assemble the entire matrix onto first processor. */
728     Mat         A;
729     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
730     PetscInt    *cols;
731     PetscScalar *vals;
732 
733     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
734     if (!rank) {
735       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
736     } else {
737       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
738     }
739     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
740     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
741     ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL);
742     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
743 
744     /* Copy the matrix ... This isn't the most efficient means,
745        but it's quick for now */
746     A->insertmode = INSERT_VALUES;
747     row = mat->rmap->rstart; m = mdn->A->rmap->n;
748     for (i=0; i<m; i++) {
749       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
750       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
751       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
752       row++;
753     }
754 
755     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
756     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
757     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
758     if (!rank) {
759       ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
760       /* Set the type name to MATMPIDense so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqDense_ASCII()*/
761       PetscStrcpy(((PetscObject)((Mat_MPIDense*)(A->data))->A)->type_name,MATMPIDENSE);
762       ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
763     }
764     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
765     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
766     ierr = MatDestroy(&A);CHKERRQ(ierr);
767   }
768   PetscFunctionReturn(0);
769 }
770 
771 #undef __FUNCT__
772 #define __FUNCT__ "MatView_MPIDense"
773 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
774 {
775   PetscErrorCode ierr;
776   PetscBool      iascii,isbinary,isdraw,issocket;
777 
778   PetscFunctionBegin;
779 
780   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
781   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
782   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
783   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
784 
785   if (iascii || issocket || isdraw) {
786     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
787   } else if (isbinary) {
788     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
789   } else {
790     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name);
791   }
792   PetscFunctionReturn(0);
793 }
794 
795 #undef __FUNCT__
796 #define __FUNCT__ "MatGetInfo_MPIDense"
797 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
798 {
799   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
800   Mat            mdn = mat->A;
801   PetscErrorCode ierr;
802   PetscReal      isend[5],irecv[5];
803 
804   PetscFunctionBegin;
805   info->block_size     = 1.0;
806   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
807   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
808   isend[3] = info->memory;  isend[4] = info->mallocs;
809   if (flag == MAT_LOCAL) {
810     info->nz_used      = isend[0];
811     info->nz_allocated = isend[1];
812     info->nz_unneeded  = isend[2];
813     info->memory       = isend[3];
814     info->mallocs      = isend[4];
815   } else if (flag == MAT_GLOBAL_MAX) {
816     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)A)->comm);CHKERRQ(ierr);
817     info->nz_used      = irecv[0];
818     info->nz_allocated = irecv[1];
819     info->nz_unneeded  = irecv[2];
820     info->memory       = irecv[3];
821     info->mallocs      = irecv[4];
822   } else if (flag == MAT_GLOBAL_SUM) {
823     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
824     info->nz_used      = irecv[0];
825     info->nz_allocated = irecv[1];
826     info->nz_unneeded  = irecv[2];
827     info->memory       = irecv[3];
828     info->mallocs      = irecv[4];
829   }
830   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
831   info->fill_ratio_needed = 0;
832   info->factor_mallocs    = 0;
833   PetscFunctionReturn(0);
834 }
835 
836 #undef __FUNCT__
837 #define __FUNCT__ "MatSetOption_MPIDense"
838 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool  flg)
839 {
840   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
841   PetscErrorCode ierr;
842 
843   PetscFunctionBegin;
844   switch (op) {
845   case MAT_NEW_NONZERO_LOCATIONS:
846   case MAT_NEW_NONZERO_LOCATION_ERR:
847   case MAT_NEW_NONZERO_ALLOCATION_ERR:
848     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
849     break;
850   case MAT_ROW_ORIENTED:
851     a->roworiented = flg;
852     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
853     break;
854   case MAT_NEW_DIAGONALS:
855   case MAT_USE_HASH_TABLE:
856     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
857     break;
858   case MAT_IGNORE_OFF_PROC_ENTRIES:
859     a->donotstash = flg;
860     break;
861   case MAT_SYMMETRIC:
862   case MAT_STRUCTURALLY_SYMMETRIC:
863   case MAT_HERMITIAN:
864   case MAT_SYMMETRY_ETERNAL:
865   case MAT_IGNORE_LOWER_TRIANGULAR:
866     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
867     break;
868   default:
869     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
870   }
871   PetscFunctionReturn(0);
872 }
873 
874 
875 #undef __FUNCT__
876 #define __FUNCT__ "MatDiagonalScale_MPIDense"
877 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
878 {
879   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
880   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
881   PetscScalar    *l,*r,x,*v;
882   PetscErrorCode ierr;
883   PetscInt       i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n;
884 
885   PetscFunctionBegin;
886   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
887   if (ll) {
888     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
889     if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
890     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
891     for (i=0; i<m; i++) {
892       x = l[i];
893       v = mat->v + i;
894       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
895     }
896     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
897     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
898   }
899   if (rr) {
900     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
901     if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
902     ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
903     ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
904     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
905     for (i=0; i<n; i++) {
906       x = r[i];
907       v = mat->v + i*m;
908       for (j=0; j<m; j++) { (*v++) *= x;}
909     }
910     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
911     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
912   }
913   PetscFunctionReturn(0);
914 }
915 
916 #undef __FUNCT__
917 #define __FUNCT__ "MatNorm_MPIDense"
918 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
919 {
920   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
921   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
922   PetscErrorCode ierr;
923   PetscInt       i,j;
924   PetscReal      sum = 0.0;
925   PetscScalar    *v = mat->v;
926 
927   PetscFunctionBegin;
928   if (mdn->size == 1) {
929     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
930   } else {
931     if (type == NORM_FROBENIUS) {
932       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
933 #if defined(PETSC_USE_COMPLEX)
934         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
935 #else
936         sum += (*v)*(*v); v++;
937 #endif
938       }
939       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
940       *nrm = sqrt(*nrm);
941       ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr);
942     } else if (type == NORM_1) {
943       PetscReal *tmp,*tmp2;
944       ierr = PetscMalloc2(A->cmap->N,PetscReal,&tmp,A->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr);
945       ierr = PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
946       ierr = PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
947       *nrm = 0.0;
948       v = mat->v;
949       for (j=0; j<mdn->A->cmap->n; j++) {
950         for (i=0; i<mdn->A->rmap->n; i++) {
951           tmp[j] += PetscAbsScalar(*v);  v++;
952         }
953       }
954       ierr = MPI_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
955       for (j=0; j<A->cmap->N; j++) {
956         if (tmp2[j] > *nrm) *nrm = tmp2[j];
957       }
958       ierr = PetscFree2(tmp,tmp);CHKERRQ(ierr);
959       ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr);
960     } else if (type == NORM_INFINITY) { /* max row norm */
961       PetscReal ntemp;
962       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
963       ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,((PetscObject)A)->comm);CHKERRQ(ierr);
964     } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"No support for two norm");
965   }
966   PetscFunctionReturn(0);
967 }
968 
969 #undef __FUNCT__
970 #define __FUNCT__ "MatTranspose_MPIDense"
971 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
972 {
973   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
974   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
975   Mat            B;
976   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
977   PetscErrorCode ierr;
978   PetscInt       j,i;
979   PetscScalar    *v;
980 
981   PetscFunctionBegin;
982   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports square matrix only in-place");
983   if (reuse == MAT_INITIAL_MATRIX || A == *matout) {
984     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
985     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
986     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
987     ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr);
988   } else {
989     B = *matout;
990   }
991 
992   m = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v;
993   ierr = PetscMalloc(m*sizeof(PetscInt),&rwork);CHKERRQ(ierr);
994   for (i=0; i<m; i++) rwork[i] = rstart + i;
995   for (j=0; j<n; j++) {
996     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
997     v   += m;
998   }
999   ierr = PetscFree(rwork);CHKERRQ(ierr);
1000   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1001   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1002   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1003     *matout = B;
1004   } else {
1005     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
1006   }
1007   PetscFunctionReturn(0);
1008 }
1009 
1010 
1011 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
1012 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
1013 
1014 #undef __FUNCT__
1015 #define __FUNCT__ "MatSetUpPreallocation_MPIDense"
1016 PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A)
1017 {
1018   PetscErrorCode ierr;
1019 
1020   PetscFunctionBegin;
1021   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
1022   PetscFunctionReturn(0);
1023 }
1024 
1025 #if defined(PETSC_HAVE_PLAPACK)
1026 
1027 #undef __FUNCT__
1028 #define __FUNCT__ "MatMPIDenseCopyToPlapack"
1029 PetscErrorCode MatMPIDenseCopyToPlapack(Mat A,Mat F)
1030 {
1031   Mat_Plapack    *lu = (Mat_Plapack*)(F)->spptr;
1032   PetscErrorCode ierr;
1033   PetscInt       M=A->cmap->N,m=A->rmap->n,rstart;
1034   PetscScalar    *array;
1035   PetscReal      one = 1.0;
1036 
1037   PetscFunctionBegin;
1038   /* Copy A into F->lu->A */
1039   ierr = PLA_Obj_set_to_zero(lu->A);CHKERRQ(ierr);
1040   ierr = PLA_API_begin();CHKERRQ(ierr);
1041   ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr);
1042   ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr);
1043   ierr = MatGetArray(A,&array);CHKERRQ(ierr);
1044   ierr = PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);CHKERRQ(ierr);
1045   ierr = MatRestoreArray(A,&array);CHKERRQ(ierr);
1046   ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr);
1047   ierr = PLA_API_end();CHKERRQ(ierr);
1048   lu->rstart = rstart;
1049   PetscFunctionReturn(0);
1050 }
1051 
1052 #undef __FUNCT__
1053 #define __FUNCT__ "MatMPIDenseCopyFromPlapack"
1054 PetscErrorCode MatMPIDenseCopyFromPlapack(Mat F,Mat A)
1055 {
1056   Mat_Plapack    *lu = (Mat_Plapack*)(F)->spptr;
1057   PetscErrorCode ierr;
1058   PetscInt       M=A->cmap->N,m=A->rmap->n,rstart;
1059   PetscScalar    *array;
1060   PetscReal      one = 1.0;
1061 
1062   PetscFunctionBegin;
1063   /* Copy F into A->lu->A */
1064   ierr = MatZeroEntries(A);CHKERRQ(ierr);
1065   ierr = PLA_API_begin();CHKERRQ(ierr);
1066   ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr);
1067   ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr);
1068   ierr = MatGetArray(A,&array);CHKERRQ(ierr);
1069   ierr = PLA_API_axpy_global_to_matrix(m,M, &one,lu->A,rstart,0,(void *)array,m);CHKERRQ(ierr);
1070   ierr = MatRestoreArray(A,&array);CHKERRQ(ierr);
1071   ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr);
1072   ierr = PLA_API_end();CHKERRQ(ierr);
1073   lu->rstart = rstart;
1074   PetscFunctionReturn(0);
1075 }
1076 
1077 #undef __FUNCT__
1078 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIDense"
1079 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1080 {
1081   PetscErrorCode ierr;
1082   Mat_Plapack    *luA = (Mat_Plapack*)A->spptr;
1083   Mat_Plapack    *luB = (Mat_Plapack*)B->spptr;
1084   Mat_Plapack    *luC = (Mat_Plapack*)C->spptr;
1085   PLA_Obj        alpha = NULL,beta = NULL;
1086 
1087   PetscFunctionBegin;
1088   ierr = MatMPIDenseCopyToPlapack(A,A);CHKERRQ(ierr);
1089   ierr = MatMPIDenseCopyToPlapack(B,B);CHKERRQ(ierr);
1090 
1091   /*
1092   ierr = PLA_Global_show("A = ",luA->A,"%g ","");CHKERRQ(ierr);
1093   ierr = PLA_Global_show("B = ",luB->A,"%g ","");CHKERRQ(ierr);
1094   */
1095 
1096   /* do the multiply in PLA  */
1097   ierr = PLA_Create_constants_conf_to(luA->A,NULL,NULL,&alpha);CHKERRQ(ierr);
1098   ierr = PLA_Create_constants_conf_to(luC->A,NULL,&beta,NULL);CHKERRQ(ierr);
1099   CHKMEMQ;
1100 
1101   ierr = PLA_Gemm(PLA_NO_TRANSPOSE,PLA_NO_TRANSPOSE,alpha,luA->A,luB->A,beta,luC->A); /* CHKERRQ(ierr); */
1102   CHKMEMQ;
1103   ierr = PLA_Obj_free(&alpha);CHKERRQ(ierr);
1104   ierr = PLA_Obj_free(&beta);CHKERRQ(ierr);
1105 
1106   /*
1107   ierr = PLA_Global_show("C = ",luC->A,"%g ","");CHKERRQ(ierr);
1108   */
1109   ierr = MatMPIDenseCopyFromPlapack(C,C);CHKERRQ(ierr);
1110   PetscFunctionReturn(0);
1111 }
1112 
1113 #undef __FUNCT__
1114 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIDense"
1115 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1116 {
1117   PetscErrorCode ierr;
1118   PetscInt       m=A->rmap->n,n=B->cmap->n;
1119   Mat            Cmat;
1120 
1121   PetscFunctionBegin;
1122   if (A->cmap->n != B->rmap->n) SETERRQ2(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
1123   SETERRQ(((PetscObject)A)->comm,PETSC_ERR_LIB,"Due to apparent bugs in PLAPACK,this is not currently supported");
1124   ierr = MatCreate(((PetscObject)B)->comm,&Cmat);CHKERRQ(ierr);
1125   ierr = MatSetSizes(Cmat,m,n,A->rmap->N,B->cmap->N);CHKERRQ(ierr);
1126   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
1127   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1128   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1129 
1130   *C = Cmat;
1131   PetscFunctionReturn(0);
1132 }
1133 
1134 #undef __FUNCT__
1135 #define __FUNCT__ "MatMatMult_MPIDense_MPIDense"
1136 PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
1137 {
1138   PetscErrorCode ierr;
1139 
1140   PetscFunctionBegin;
1141   if (scall == MAT_INITIAL_MATRIX){
1142     ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr);
1143   }
1144   ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr);
1145   PetscFunctionReturn(0);
1146 }
1147 
1148 #undef __FUNCT__
1149 #define __FUNCT__ "MatSolve_MPIDense"
1150 PetscErrorCode MatSolve_MPIDense(Mat A,Vec b,Vec x)
1151 {
1152   MPI_Comm       comm = ((PetscObject)A)->comm;
1153   Mat_Plapack    *lu = (Mat_Plapack*)A->spptr;
1154   PetscErrorCode ierr;
1155   PetscInt       M=A->rmap->N,m=A->rmap->n,rstart,i,j,*idx_pla,*idx_petsc,loc_m,loc_stride;
1156   PetscScalar    *array;
1157   PetscReal      one = 1.0;
1158   PetscMPIInt    size,rank,r_rank,r_nproc,c_rank,c_nproc;;
1159   PLA_Obj        v_pla = NULL;
1160   PetscScalar    *loc_buf;
1161   Vec            loc_x;
1162 
1163   PetscFunctionBegin;
1164   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1165   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1166 
1167   /* Create PLAPACK vector objects, then copy b into PLAPACK b */
1168   PLA_Mvector_create(lu->datatype,M,1,lu->templ,PLA_ALIGN_FIRST,&v_pla);
1169   PLA_Obj_set_to_zero(v_pla);
1170 
1171   /* Copy b into rhs_pla */
1172   PLA_API_begin();
1173   PLA_Obj_API_open(v_pla);
1174   ierr = VecGetArray(b,&array);CHKERRQ(ierr);
1175   PLA_API_axpy_vector_to_global(m,&one,(void *)array,1,v_pla,lu->rstart);
1176   ierr = VecRestoreArray(b,&array);CHKERRQ(ierr);
1177   PLA_Obj_API_close(v_pla);
1178   PLA_API_end();
1179 
1180   if (A->factortype == MAT_FACTOR_LU){
1181     /* Apply the permutations to the right hand sides */
1182     PLA_Apply_pivots_to_rows (v_pla,lu->pivots);
1183 
1184     /* Solve L y = b, overwriting b with y */
1185     PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_UNIT_DIAG,lu->A,v_pla );
1186 
1187     /* Solve U x = y (=b), overwriting b with x */
1188     PLA_Trsv( PLA_UPPER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla );
1189   } else { /* MAT_FACTOR_CHOLESKY */
1190     PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla);
1191     PLA_Trsv( PLA_LOWER_TRIANGULAR,(lu->datatype == MPI_DOUBLE ? PLA_TRANSPOSE : PLA_CONJUGATE_TRANSPOSE),
1192                                     PLA_NONUNIT_DIAG,lu->A,v_pla);
1193   }
1194 
1195   /* Copy PLAPACK x into Petsc vector x  */
1196   PLA_Obj_local_length(v_pla, &loc_m);
1197   PLA_Obj_local_buffer(v_pla, (void**)&loc_buf);
1198   PLA_Obj_local_stride(v_pla, &loc_stride);
1199   /*
1200     PetscPrintf(PETSC_COMM_SELF," [%d] b - local_m %d local_stride %d, loc_buf: %g %g, nb: %d\n",rank,loc_m,loc_stride,loc_buf[0],loc_buf[(loc_m-1)*loc_stride],lu->nb);
1201   */
1202   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,loc_m*loc_stride,loc_buf,&loc_x);CHKERRQ(ierr);
1203   if (!lu->pla_solved){
1204 
1205     PLA_Temp_comm_row_info(lu->templ,&Plapack_comm_2d,&r_rank,&r_nproc);
1206     PLA_Temp_comm_col_info(lu->templ,&Plapack_comm_2d,&c_rank,&c_nproc);
1207 
1208     /* Create IS and cts for VecScatterring */
1209     PLA_Obj_local_length(v_pla, &loc_m);
1210     PLA_Obj_local_stride(v_pla, &loc_stride);
1211     ierr = PetscMalloc2(loc_m,PetscInt,&idx_pla,loc_m,PetscInt,&idx_petsc);CHKERRQ(ierr);
1212 
1213     rstart = (r_rank*c_nproc+c_rank)*lu->nb;
1214     for (i=0; i<loc_m; i+=lu->nb){
1215       j = 0;
1216       while (j < lu->nb && i+j < loc_m){
1217         idx_petsc[i+j] = rstart + j; j++;
1218       }
1219       rstart += size*lu->nb;
1220     }
1221 
1222     for (i=0; i<loc_m; i++) idx_pla[i] = i*loc_stride;
1223 
1224     ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_pla,PETSC_COPY_VALUES,&lu->is_pla);CHKERRQ(ierr);
1225     ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_petsc,PETSC_COPY_VALUES,&lu->is_petsc);CHKERRQ(ierr);
1226     ierr = PetscFree2(idx_pla,idx_petsc);CHKERRQ(ierr);
1227     ierr = VecScatterCreate(loc_x,lu->is_pla,x,lu->is_petsc,&lu->ctx);CHKERRQ(ierr);
1228   }
1229   ierr = VecScatterBegin(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1230   ierr = VecScatterEnd(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1231 
1232   /* Free data */
1233   ierr = VecDestroy(&loc_x);CHKERRQ(ierr);
1234   PLA_Obj_free(&v_pla);
1235 
1236   lu->pla_solved = PETSC_TRUE;
1237   PetscFunctionReturn(0);
1238 }
1239 
1240 #undef __FUNCT__
1241 #define __FUNCT__ "MatLUFactorNumeric_MPIDense"
1242 PetscErrorCode MatLUFactorNumeric_MPIDense(Mat F,Mat A,const MatFactorInfo *info)
1243 {
1244   Mat_Plapack    *lu = (Mat_Plapack*)(F)->spptr;
1245   PetscErrorCode ierr;
1246   PetscInt       M=A->rmap->N,m=A->rmap->n,rstart,rend;
1247   PetscInt       info_pla=0;
1248   PetscScalar    *array,one = 1.0;
1249 
1250   PetscFunctionBegin;
1251   if (lu->mstruct == SAME_NONZERO_PATTERN){
1252     PLA_Obj_free(&lu->A);
1253     PLA_Obj_free (&lu->pivots);
1254   }
1255   /* Create PLAPACK matrix object */
1256   lu->A = NULL; lu->pivots = NULL;
1257   PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);
1258   PLA_Obj_set_to_zero(lu->A);
1259   PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots);
1260 
1261   /* Copy A into lu->A */
1262   PLA_API_begin();
1263   PLA_Obj_API_open(lu->A);
1264   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
1265   ierr = MatGetArray(A,&array);CHKERRQ(ierr);
1266   PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);
1267   ierr = MatRestoreArray(A,&array);CHKERRQ(ierr);
1268   PLA_Obj_API_close(lu->A);
1269   PLA_API_end();
1270 
1271   /* Factor P A -> L U overwriting lower triangular portion of A with L, upper, U */
1272   info_pla = PLA_LU(lu->A,lu->pivots);
1273   if (info_pla != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot encountered at row %d from PLA_LU()",info_pla);
1274 
1275   lu->rstart         = rstart;
1276   lu->mstruct        = SAME_NONZERO_PATTERN;
1277   F->ops->solve      = MatSolve_MPIDense;
1278   F->assembled       = PETSC_TRUE;  /* required by -ksp_view */
1279   PetscFunctionReturn(0);
1280 }
1281 
1282 #undef __FUNCT__
1283 #define __FUNCT__ "MatCholeskyFactorNumeric_MPIDense"
1284 PetscErrorCode MatCholeskyFactorNumeric_MPIDense(Mat F,Mat A,const MatFactorInfo *info)
1285 {
1286   Mat_Plapack    *lu = (Mat_Plapack*)F->spptr;
1287   PetscErrorCode ierr;
1288   PetscInt       M=A->rmap->N,m=A->rmap->n,rstart,rend;
1289   PetscInt       info_pla=0;
1290   PetscScalar    *array,one = 1.0;
1291 
1292   PetscFunctionBegin;
1293   if (lu->mstruct == SAME_NONZERO_PATTERN){
1294     PLA_Obj_free(&lu->A);
1295   }
1296   /* Create PLAPACK matrix object */
1297   lu->A      = NULL;
1298   lu->pivots = NULL;
1299   PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);
1300 
1301   /* Copy A into lu->A */
1302   PLA_API_begin();
1303   PLA_Obj_API_open(lu->A);
1304   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
1305   ierr = MatGetArray(A,&array);CHKERRQ(ierr);
1306   PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);
1307   ierr = MatRestoreArray(A,&array);CHKERRQ(ierr);
1308   PLA_Obj_API_close(lu->A);
1309   PLA_API_end();
1310 
1311   /* Factor P A -> Chol */
1312   info_pla = PLA_Chol(PLA_LOWER_TRIANGULAR,lu->A);
1313   if (info_pla != 0) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT,"Nonpositive definite matrix detected at row %d from PLA_Chol()",info_pla);
1314 
1315   lu->rstart         = rstart;
1316   lu->mstruct        = SAME_NONZERO_PATTERN;
1317   F->ops->solve      = MatSolve_MPIDense;
1318   F->assembled       = PETSC_TRUE;  /* required by -ksp_view */
1319   PetscFunctionReturn(0);
1320 }
1321 
1322 /* Note the Petsc perm permutation is ignored */
1323 #undef __FUNCT__
1324 #define __FUNCT__ "MatCholeskyFactorSymbolic_MPIDense"
1325 PetscErrorCode MatCholeskyFactorSymbolic_MPIDense(Mat F,Mat A,IS perm,const MatFactorInfo *info)
1326 {
1327   PetscErrorCode ierr;
1328   PetscBool      issymmetric,set;
1329 
1330   PetscFunctionBegin;
1331   ierr = MatIsSymmetricKnown(A,&set,&issymmetric);CHKERRQ(ierr);
1332   if (!set || !issymmetric) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_USER,"Matrix must be set as MAT_SYMMETRIC for CholeskyFactor()");
1333   F->ops->choleskyfactornumeric  = MatCholeskyFactorNumeric_MPIDense;
1334   PetscFunctionReturn(0);
1335 }
1336 
1337 /* Note the Petsc r and c permutations are ignored */
1338 #undef __FUNCT__
1339 #define __FUNCT__ "MatLUFactorSymbolic_MPIDense"
1340 PetscErrorCode MatLUFactorSymbolic_MPIDense(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
1341 {
1342   PetscErrorCode ierr;
1343   PetscInt       M = A->rmap->N;
1344   Mat_Plapack    *lu;
1345 
1346   PetscFunctionBegin;
1347   lu = (Mat_Plapack*)F->spptr;
1348   ierr = PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots);CHKERRQ(ierr);
1349   F->ops->lufactornumeric  = MatLUFactorNumeric_MPIDense;
1350   PetscFunctionReturn(0);
1351 }
1352 
1353 EXTERN_C_BEGIN
1354 #undef __FUNCT__
1355 #define __FUNCT__ "MatFactorGetSolverPackage_mpidense_plapack"
1356 PetscErrorCode MatFactorGetSolverPackage_mpidense_plapack(Mat A,const MatSolverPackage *type)
1357 {
1358   PetscFunctionBegin;
1359   *type = MATSOLVERPLAPACK;
1360   PetscFunctionReturn(0);
1361 }
1362 EXTERN_C_END
1363 
1364 EXTERN_C_BEGIN
1365 #undef __FUNCT__
1366 #define __FUNCT__ "MatGetFactor_mpidense_plapack"
1367 PetscErrorCode MatGetFactor_mpidense_plapack(Mat A,MatFactorType ftype,Mat *F)
1368 {
1369   PetscErrorCode ierr;
1370   Mat_Plapack    *lu;
1371   PetscMPIInt    size;
1372   PetscInt       M=A->rmap->N;
1373 
1374   PetscFunctionBegin;
1375   /* Create the factorization matrix */
1376   ierr = MatCreate(((PetscObject)A)->comm,F);CHKERRQ(ierr);
1377   ierr = MatSetSizes(*F,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1378   ierr = MatSetType(*F,((PetscObject)A)->type_name);CHKERRQ(ierr);
1379   ierr = PetscNewLog(*F,Mat_Plapack,&lu);CHKERRQ(ierr);
1380   (*F)->spptr = (void*)lu;
1381 
1382   /* Set default Plapack parameters */
1383   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
1384   lu->nb = M/size;
1385   if (M - lu->nb*size) lu->nb++; /* without cyclic distribution */
1386 
1387   /* Set runtime options */
1388   ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"PLAPACK Options","Mat");CHKERRQ(ierr);
1389     ierr = PetscOptionsInt("-mat_plapack_nb","block size of template vector","None",lu->nb,&lu->nb,PETSC_NULL);CHKERRQ(ierr);
1390   PetscOptionsEnd();
1391 
1392   /* Create object distribution template */
1393   lu->templ = NULL;
1394   ierr = PLA_Temp_create(lu->nb, 0, &lu->templ);CHKERRQ(ierr);
1395 
1396   /* Set the datatype */
1397 #if defined(PETSC_USE_COMPLEX)
1398   lu->datatype = MPI_DOUBLE_COMPLEX;
1399 #else
1400   lu->datatype = MPI_DOUBLE;
1401 #endif
1402 
1403   ierr = PLA_Matrix_create(lu->datatype,M,A->cmap->N,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);CHKERRQ(ierr);
1404 
1405 
1406   lu->pla_solved     = PETSC_FALSE; /* MatSolve_Plapack() is called yet */
1407   lu->mstruct        = DIFFERENT_NONZERO_PATTERN;
1408 
1409   if (ftype == MAT_FACTOR_LU) {
1410     (*F)->ops->lufactorsymbolic = MatLUFactorSymbolic_MPIDense;
1411   } else if (ftype == MAT_FACTOR_CHOLESKY) {
1412     (*F)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MPIDense;
1413   } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"No incomplete factorizations for dense matrices");
1414   (*F)->factortype = ftype;
1415   ierr = PetscObjectComposeFunctionDynamic((PetscObject)(*F),"MatFactorGetSolverPackage_C","MatFactorGetSolverPackage_mpidense_plapack",MatFactorGetSolverPackage_mpidense_plapack);CHKERRQ(ierr);
1416   PetscFunctionReturn(0);
1417 }
1418 EXTERN_C_END
1419 #endif
1420 
1421 #undef __FUNCT__
1422 #define __FUNCT__ "MatGetFactor_mpidense_petsc"
1423 PetscErrorCode MatGetFactor_mpidense_petsc(Mat A,MatFactorType ftype,Mat *F)
1424 {
1425 #if defined(PETSC_HAVE_PLAPACK)
1426   PetscErrorCode ierr;
1427 #endif
1428 
1429   PetscFunctionBegin;
1430 #if defined(PETSC_HAVE_PLAPACK)
1431   ierr = MatGetFactor_mpidense_plapack(A,ftype,F);CHKERRQ(ierr);
1432 #else
1433   SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix format %s uses PLAPACK direct solver. Install PLAPACK",((PetscObject)A)->type_name);
1434 #endif
1435   PetscFunctionReturn(0);
1436 }
1437 
1438 #undef __FUNCT__
1439 #define __FUNCT__ "MatAXPY_MPIDense"
1440 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
1441 {
1442   PetscErrorCode ierr;
1443   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
1444 
1445   PetscFunctionBegin;
1446   ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr);
1447   PetscFunctionReturn(0);
1448 }
1449 
1450 #undef __FUNCT__
1451 #define __FUNCT__ "MatConjugate_MPIDense"
1452 PetscErrorCode  MatConjugate_MPIDense(Mat mat)
1453 {
1454   Mat_MPIDense   *a = (Mat_MPIDense *)mat->data;
1455   PetscErrorCode ierr;
1456 
1457   PetscFunctionBegin;
1458   ierr = MatConjugate(a->A);CHKERRQ(ierr);
1459   PetscFunctionReturn(0);
1460 }
1461 
1462 #undef __FUNCT__
1463 #define __FUNCT__ "MatRealPart_MPIDense"
1464 PetscErrorCode MatRealPart_MPIDense(Mat A)
1465 {
1466   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1467   PetscErrorCode ierr;
1468 
1469   PetscFunctionBegin;
1470   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1471   PetscFunctionReturn(0);
1472 }
1473 
1474 #undef __FUNCT__
1475 #define __FUNCT__ "MatImaginaryPart_MPIDense"
1476 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1477 {
1478   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1479   PetscErrorCode ierr;
1480 
1481   PetscFunctionBegin;
1482   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1483   PetscFunctionReturn(0);
1484 }
1485 
1486 /* -------------------------------------------------------------------*/
1487 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
1488        MatGetRow_MPIDense,
1489        MatRestoreRow_MPIDense,
1490        MatMult_MPIDense,
1491 /* 4*/ MatMultAdd_MPIDense,
1492        MatMultTranspose_MPIDense,
1493        MatMultTransposeAdd_MPIDense,
1494        0,
1495        0,
1496        0,
1497 /*10*/ 0,
1498        0,
1499        0,
1500        0,
1501        MatTranspose_MPIDense,
1502 /*15*/ MatGetInfo_MPIDense,
1503        MatEqual_MPIDense,
1504        MatGetDiagonal_MPIDense,
1505        MatDiagonalScale_MPIDense,
1506        MatNorm_MPIDense,
1507 /*20*/ MatAssemblyBegin_MPIDense,
1508        MatAssemblyEnd_MPIDense,
1509        MatSetOption_MPIDense,
1510        MatZeroEntries_MPIDense,
1511 /*24*/ MatZeroRows_MPIDense,
1512        0,
1513        0,
1514        0,
1515        0,
1516 /*29*/ MatSetUpPreallocation_MPIDense,
1517        0,
1518        0,
1519        MatGetArray_MPIDense,
1520        MatRestoreArray_MPIDense,
1521 /*34*/ MatDuplicate_MPIDense,
1522        0,
1523        0,
1524        0,
1525        0,
1526 /*39*/ MatAXPY_MPIDense,
1527        MatGetSubMatrices_MPIDense,
1528        0,
1529        MatGetValues_MPIDense,
1530        0,
1531 /*44*/ 0,
1532        MatScale_MPIDense,
1533        0,
1534        0,
1535        0,
1536 /*49*/ 0,
1537        0,
1538        0,
1539        0,
1540        0,
1541 /*54*/ 0,
1542        0,
1543        0,
1544        0,
1545        0,
1546 /*59*/ MatGetSubMatrix_MPIDense,
1547        MatDestroy_MPIDense,
1548        MatView_MPIDense,
1549        0,
1550        0,
1551 /*64*/ 0,
1552        0,
1553        0,
1554        0,
1555        0,
1556 /*69*/ 0,
1557        0,
1558        0,
1559        0,
1560        0,
1561 /*74*/ 0,
1562        0,
1563        0,
1564        0,
1565        0,
1566 /*79*/ 0,
1567        0,
1568        0,
1569        0,
1570 /*83*/ MatLoad_MPIDense,
1571        0,
1572        0,
1573        0,
1574        0,
1575        0,
1576 /*89*/
1577 #if defined(PETSC_HAVE_PLAPACK)
1578        MatMatMult_MPIDense_MPIDense,
1579        MatMatMultSymbolic_MPIDense_MPIDense,
1580        MatMatMultNumeric_MPIDense_MPIDense,
1581 #else
1582        0,
1583        0,
1584        0,
1585 #endif
1586        0,
1587        0,
1588 /*94*/ 0,
1589        0,
1590        0,
1591        0,
1592        0,
1593 /*99*/ 0,
1594        0,
1595        0,
1596        MatConjugate_MPIDense,
1597        0,
1598 /*104*/0,
1599        MatRealPart_MPIDense,
1600        MatImaginaryPart_MPIDense,
1601        0,
1602        0,
1603 /*109*/0,
1604        0,
1605        0,
1606        0,
1607        0,
1608 /*114*/0,
1609        0,
1610        0,
1611        0,
1612        0,
1613 /*119*/0,
1614        0,
1615        0,
1616        0
1617 };
1618 
1619 EXTERN_C_BEGIN
1620 #undef __FUNCT__
1621 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense"
1622 PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1623 {
1624   Mat_MPIDense   *a;
1625   PetscErrorCode ierr;
1626 
1627   PetscFunctionBegin;
1628   mat->preallocated = PETSC_TRUE;
1629   /* Note:  For now, when data is specified above, this assumes the user correctly
1630    allocates the local dense storage space.  We should add error checking. */
1631 
1632   a    = (Mat_MPIDense*)mat->data;
1633   ierr = PetscLayoutSetBlockSize(mat->rmap,1);CHKERRQ(ierr);
1634   ierr = PetscLayoutSetBlockSize(mat->cmap,1);CHKERRQ(ierr);
1635   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
1636   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
1637   a->nvec = mat->cmap->n;
1638 
1639   ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1640   ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr);
1641   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
1642   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1643   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1644   PetscFunctionReturn(0);
1645 }
1646 EXTERN_C_END
1647 
1648 /*MC
1649    MATSOLVERPLAPACK = "mpidense" - Parallel LU and Cholesky factorization for MATMPIDENSE matrices
1650 
1651   run ./configure with the option --download-plapack
1652 
1653 
1654   Options Database Keys:
1655 . -mat_plapack_nprows <n> - number of rows in processor partition
1656 . -mat_plapack_npcols <n> - number of columns in processor partition
1657 . -mat_plapack_nb <n> - block size of template vector
1658 . -mat_plapack_nb_alg <n> - algorithmic block size
1659 - -mat_plapack_ckerror <n> - error checking flag
1660 
1661    Level: intermediate
1662 
1663 .seealso: MatCreateMPIDense(), MATDENSE, MATSEQDENSE, PCFactorSetSolverPackage(), MatSolverPackage
1664 
1665 M*/
1666 
1667 EXTERN_C_BEGIN
1668 #undef __FUNCT__
1669 #define __FUNCT__ "MatCreate_MPIDense"
1670 PetscErrorCode  MatCreate_MPIDense(Mat mat)
1671 {
1672   Mat_MPIDense   *a;
1673   PetscErrorCode ierr;
1674 
1675   PetscFunctionBegin;
1676   ierr              = PetscNewLog(mat,Mat_MPIDense,&a);CHKERRQ(ierr);
1677   mat->data         = (void*)a;
1678   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1679 
1680   mat->insertmode = NOT_SET_VALUES;
1681   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&a->rank);CHKERRQ(ierr);
1682   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&a->size);CHKERRQ(ierr);
1683 
1684   /* build cache for off array entries formed */
1685   a->donotstash = PETSC_FALSE;
1686   ierr = MatStashCreate_Private(((PetscObject)mat)->comm,1,&mat->stash);CHKERRQ(ierr);
1687 
1688   /* stuff used for matrix vector multiply */
1689   a->lvec        = 0;
1690   a->Mvctx       = 0;
1691   a->roworiented = PETSC_TRUE;
1692 
1693   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
1694                                      "MatGetDiagonalBlock_MPIDense",
1695                                      MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr);
1696   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C",
1697                                      "MatMPIDenseSetPreallocation_MPIDense",
1698                                      MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1699   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",
1700                                      "MatMatMult_MPIAIJ_MPIDense",
1701                                       MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1702   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",
1703                                      "MatMatMultSymbolic_MPIAIJ_MPIDense",
1704                                       MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1705   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",
1706                                      "MatMatMultNumeric_MPIAIJ_MPIDense",
1707                                       MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1708   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetFactor_petsc_C",
1709                                      "MatGetFactor_mpidense_petsc",
1710                                       MatGetFactor_mpidense_petsc);CHKERRQ(ierr);
1711 #if defined(PETSC_HAVE_PLAPACK)
1712   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetFactor_plapack_C",
1713                                      "MatGetFactor_mpidense_plapack",
1714                                       MatGetFactor_mpidense_plapack);CHKERRQ(ierr);
1715   ierr = PetscPLAPACKInitializePackage(((PetscObject)mat)->comm);CHKERRQ(ierr);
1716 #endif
1717   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1718 
1719   PetscFunctionReturn(0);
1720 }
1721 EXTERN_C_END
1722 
1723 /*MC
1724    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1725 
1726    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1727    and MATMPIDENSE otherwise.
1728 
1729    Options Database Keys:
1730 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1731 
1732   Level: beginner
1733 
1734 
1735 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1736 M*/
1737 
1738 EXTERN_C_BEGIN
1739 #undef __FUNCT__
1740 #define __FUNCT__ "MatCreate_Dense"
1741 PetscErrorCode  MatCreate_Dense(Mat A)
1742 {
1743   PetscErrorCode ierr;
1744   PetscMPIInt    size;
1745 
1746   PetscFunctionBegin;
1747   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
1748   if (size == 1) {
1749     ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr);
1750   } else {
1751     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
1752   }
1753   PetscFunctionReturn(0);
1754 }
1755 EXTERN_C_END
1756 
1757 #undef __FUNCT__
1758 #define __FUNCT__ "MatMPIDenseSetPreallocation"
1759 /*@C
1760    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1761 
1762    Not collective
1763 
1764    Input Parameters:
1765 .  A - the matrix
1766 -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
1767    to control all matrix memory allocation.
1768 
1769    Notes:
1770    The dense format is fully compatible with standard Fortran 77
1771    storage by columns.
1772 
1773    The data input variable is intended primarily for Fortran programmers
1774    who wish to allocate their own matrix memory space.  Most users should
1775    set data=PETSC_NULL.
1776 
1777    Level: intermediate
1778 
1779 .keywords: matrix,dense, parallel
1780 
1781 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1782 @*/
1783 PetscErrorCode  MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data)
1784 {
1785   PetscErrorCode ierr;
1786 
1787   PetscFunctionBegin;
1788   ierr = PetscTryMethod(mat,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar *),(mat,data));CHKERRQ(ierr);
1789   PetscFunctionReturn(0);
1790 }
1791 
1792 #undef __FUNCT__
1793 #define __FUNCT__ "MatCreateMPIDense"
1794 /*@C
1795    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
1796 
1797    Collective on MPI_Comm
1798 
1799    Input Parameters:
1800 +  comm - MPI communicator
1801 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1802 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1803 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1804 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1805 -  data - optional location of matrix data.  Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1806    to control all matrix memory allocation.
1807 
1808    Output Parameter:
1809 .  A - the matrix
1810 
1811    Notes:
1812    The dense format is fully compatible with standard Fortran 77
1813    storage by columns.
1814 
1815    The data input variable is intended primarily for Fortran programmers
1816    who wish to allocate their own matrix memory space.  Most users should
1817    set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users).
1818 
1819    The user MUST specify either the local or global matrix dimensions
1820    (possibly both).
1821 
1822    Level: intermediate
1823 
1824 .keywords: matrix,dense, parallel
1825 
1826 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1827 @*/
1828 PetscErrorCode  MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1829 {
1830   PetscErrorCode ierr;
1831   PetscMPIInt    size;
1832 
1833   PetscFunctionBegin;
1834   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1835   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1836   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1837   if (size > 1) {
1838     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1839     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1840   } else {
1841     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1842     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1843   }
1844   PetscFunctionReturn(0);
1845 }
1846 
1847 #undef __FUNCT__
1848 #define __FUNCT__ "MatDuplicate_MPIDense"
1849 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1850 {
1851   Mat            mat;
1852   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
1853   PetscErrorCode ierr;
1854 
1855   PetscFunctionBegin;
1856   *newmat       = 0;
1857   ierr = MatCreate(((PetscObject)A)->comm,&mat);CHKERRQ(ierr);
1858   ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1859   ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1860   a                 = (Mat_MPIDense*)mat->data;
1861   ierr              = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1862 
1863   mat->factortype   = A->factortype;
1864   mat->assembled    = PETSC_TRUE;
1865   mat->preallocated = PETSC_TRUE;
1866 
1867   a->size           = oldmat->size;
1868   a->rank           = oldmat->rank;
1869   mat->insertmode   = NOT_SET_VALUES;
1870   a->nvec           = oldmat->nvec;
1871   a->donotstash     = oldmat->donotstash;
1872 
1873   ierr = PetscLayoutCopy(A->rmap,&mat->rmap);CHKERRQ(ierr);
1874   ierr = PetscLayoutCopy(A->cmap,&mat->cmap);CHKERRQ(ierr);
1875 
1876   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1877   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1878   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1879 
1880   *newmat = mat;
1881   PetscFunctionReturn(0);
1882 }
1883 
1884 #undef __FUNCT__
1885 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile"
1886 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat,PetscInt sizesset)
1887 {
1888   PetscErrorCode ierr;
1889   PetscMPIInt    rank,size;
1890   PetscInt       *rowners,i,m,nz,j;
1891   PetscScalar    *array,*vals,*vals_ptr;
1892   MPI_Status     status;
1893 
1894   PetscFunctionBegin;
1895   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1896   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1897 
1898   /* determine ownership of all rows */
1899   if (newmat->rmap->n < 0) m          = M/size + ((M % size) > rank);
1900   else m = newmat->rmap->n;
1901   ierr       = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1902   ierr       = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
1903   rowners[0] = 0;
1904   for (i=2; i<=size; i++) {
1905     rowners[i] += rowners[i-1];
1906   }
1907 
1908   if (!sizesset) {
1909     ierr = MatSetSizes(newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1910   }
1911   ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr);
1912   ierr = MatGetArray(newmat,&array);CHKERRQ(ierr);
1913 
1914   if (!rank) {
1915     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1916 
1917     /* read in my part of the matrix numerical values  */
1918     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1919 
1920     /* insert into matrix-by row (this is why cannot directly read into array */
1921     vals_ptr = vals;
1922     for (i=0; i<m; i++) {
1923       for (j=0; j<N; j++) {
1924         array[i + j*m] = *vals_ptr++;
1925       }
1926     }
1927 
1928     /* read in other processors and ship out */
1929     for (i=1; i<size; i++) {
1930       nz   = (rowners[i+1] - rowners[i])*N;
1931       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1932       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr);
1933     }
1934   } else {
1935     /* receive numeric values */
1936     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1937 
1938     /* receive message of values*/
1939     ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm,&status);CHKERRQ(ierr);
1940 
1941     /* insert into matrix-by row (this is why cannot directly read into array */
1942     vals_ptr = vals;
1943     for (i=0; i<m; i++) {
1944       for (j=0; j<N; j++) {
1945         array[i + j*m] = *vals_ptr++;
1946       }
1947     }
1948   }
1949   ierr = PetscFree(rowners);CHKERRQ(ierr);
1950   ierr = PetscFree(vals);CHKERRQ(ierr);
1951   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1952   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1953   PetscFunctionReturn(0);
1954 }
1955 
1956 #undef __FUNCT__
1957 #define __FUNCT__ "MatLoad_MPIDense"
1958 PetscErrorCode MatLoad_MPIDense(Mat newmat,PetscViewer viewer)
1959 {
1960   PetscScalar    *vals,*svals;
1961   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1962   MPI_Status     status;
1963   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz;
1964   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1965   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1966   PetscInt       i,nz,j,rstart,rend,sizesset=1,grows,gcols;
1967   int            fd;
1968   PetscErrorCode ierr;
1969 
1970   PetscFunctionBegin;
1971   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1972   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1973   if (!rank) {
1974     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1975     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1976     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1977   }
1978   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
1979 
1980   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1981   M = header[1]; N = header[2]; nz = header[3];
1982 
1983   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
1984   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
1985   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
1986 
1987   /* If global sizes are set, check if they are consistent with that given in the file */
1988   if (sizesset) {
1989     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
1990   }
1991   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
1992   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
1993 
1994   /*
1995        Handle case where matrix is stored on disk as a dense matrix
1996   */
1997   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1998     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat,sizesset);CHKERRQ(ierr);
1999     PetscFunctionReturn(0);
2000   }
2001 
2002   /* determine ownership of all rows */
2003   if (newmat->rmap->n < 0) m          = PetscMPIIntCast(M/size + ((M % size) > rank));
2004   else m = PetscMPIIntCast(newmat->rmap->n);
2005   ierr       = PetscMalloc((size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr);
2006   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2007   rowners[0] = 0;
2008   for (i=2; i<=size; i++) {
2009     rowners[i] += rowners[i-1];
2010   }
2011   rstart = rowners[rank];
2012   rend   = rowners[rank+1];
2013 
2014   /* distribute row lengths to all processors */
2015   ierr    = PetscMalloc2(rend-rstart,PetscInt,&ourlens,rend-rstart,PetscInt,&offlens);CHKERRQ(ierr);
2016   if (!rank) {
2017     ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2018     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2019     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
2020     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
2021     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
2022     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2023   } else {
2024     ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
2025   }
2026 
2027   if (!rank) {
2028     /* calculate the number of nonzeros on each processor */
2029     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2030     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2031     for (i=0; i<size; i++) {
2032       for (j=rowners[i]; j< rowners[i+1]; j++) {
2033         procsnz[i] += rowlengths[j];
2034       }
2035     }
2036     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2037 
2038     /* determine max buffer needed and allocate it */
2039     maxnz = 0;
2040     for (i=0; i<size; i++) {
2041       maxnz = PetscMax(maxnz,procsnz[i]);
2042     }
2043     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2044 
2045     /* read in my part of the matrix column indices  */
2046     nz = procsnz[0];
2047     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2048     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2049 
2050     /* read in every one elses and ship off */
2051     for (i=1; i<size; i++) {
2052       nz   = procsnz[i];
2053       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2054       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2055     }
2056     ierr = PetscFree(cols);CHKERRQ(ierr);
2057   } else {
2058     /* determine buffer space needed for message */
2059     nz = 0;
2060     for (i=0; i<m; i++) {
2061       nz += ourlens[i];
2062     }
2063     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2064 
2065     /* receive message of column indices*/
2066     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2067     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2068     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2069   }
2070 
2071   /* loop over local rows, determining number of off diagonal entries */
2072   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2073   jj = 0;
2074   for (i=0; i<m; i++) {
2075     for (j=0; j<ourlens[i]; j++) {
2076       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
2077       jj++;
2078     }
2079   }
2080 
2081   /* create our matrix */
2082   for (i=0; i<m; i++) {
2083     ourlens[i] -= offlens[i];
2084   }
2085 
2086   if (!sizesset) {
2087     ierr = MatSetSizes(newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
2088   }
2089   ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr);
2090   for (i=0; i<m; i++) {
2091     ourlens[i] += offlens[i];
2092   }
2093 
2094   if (!rank) {
2095     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2096 
2097     /* read in my part of the matrix numerical values  */
2098     nz = procsnz[0];
2099     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2100 
2101     /* insert into matrix */
2102     jj      = rstart;
2103     smycols = mycols;
2104     svals   = vals;
2105     for (i=0; i<m; i++) {
2106       ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2107       smycols += ourlens[i];
2108       svals   += ourlens[i];
2109       jj++;
2110     }
2111 
2112     /* read in other processors and ship out */
2113     for (i=1; i<size; i++) {
2114       nz   = procsnz[i];
2115       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2116       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2117     }
2118     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2119   } else {
2120     /* receive numeric values */
2121     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2122 
2123     /* receive message of values*/
2124     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
2125     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2126     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2127 
2128     /* insert into matrix */
2129     jj      = rstart;
2130     smycols = mycols;
2131     svals   = vals;
2132     for (i=0; i<m; i++) {
2133       ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2134       smycols += ourlens[i];
2135       svals   += ourlens[i];
2136       jj++;
2137     }
2138   }
2139   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2140   ierr = PetscFree(vals);CHKERRQ(ierr);
2141   ierr = PetscFree(mycols);CHKERRQ(ierr);
2142   ierr = PetscFree(rowners);CHKERRQ(ierr);
2143 
2144   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2145   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2146   PetscFunctionReturn(0);
2147 }
2148 
2149 #undef __FUNCT__
2150 #define __FUNCT__ "MatEqual_MPIDense"
2151 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool  *flag)
2152 {
2153   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
2154   Mat            a,b;
2155   PetscBool      flg;
2156   PetscErrorCode ierr;
2157 
2158   PetscFunctionBegin;
2159   a = matA->A;
2160   b = matB->A;
2161   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
2162   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
2163   PetscFunctionReturn(0);
2164 }
2165 
2166 #if defined(PETSC_HAVE_PLAPACK)
2167 
2168 #undef __FUNCT__
2169 #define __FUNCT__ "PetscPLAPACKFinalizePackage"
2170 /*@C
2171   PetscPLAPACKFinalizePackage - This function destroys everything in the Petsc interface to PLAPACK.
2172   Level: developer
2173 
2174 .keywords: Petsc, destroy, package, PLAPACK
2175 .seealso: PetscFinalize()
2176 @*/
2177 PetscErrorCode  PetscPLAPACKFinalizePackage(void)
2178 {
2179   PetscErrorCode ierr;
2180 
2181   PetscFunctionBegin;
2182   ierr = PLA_Finalize();CHKERRQ(ierr);
2183   PetscFunctionReturn(0);
2184 }
2185 
2186 #undef __FUNCT__
2187 #define __FUNCT__ "PetscPLAPACKInitializePackage"
2188 /*@C
2189   PetscPLAPACKInitializePackage - This function initializes everything in the Petsc interface to PLAPACK. It is
2190   called from MatCreate_MPIDense() the first time an MPI dense matrix is called.
2191 
2192   Input Parameter:
2193 .   comm - the communicator the matrix lives on
2194 
2195   Level: developer
2196 
2197    Notes: PLAPACK does not have a good fit with MPI communicators; all (parallel) PLAPACK objects have to live in the
2198   same communicator (because there is some global state that is initialized and used for all matrices). In addition if
2199   PLAPACK is initialized (that is the initial matrices created) are on subcommunicators of MPI_COMM_WORLD, these subcommunicators
2200   cannot overlap.
2201 
2202 .keywords: Petsc, initialize, package, PLAPACK
2203 .seealso: PetscSysInitializePackage(), PetscInitialize()
2204 @*/
2205 PetscErrorCode  PetscPLAPACKInitializePackage(MPI_Comm comm)
2206 {
2207   PetscMPIInt    size;
2208   PetscErrorCode ierr;
2209 
2210   PetscFunctionBegin;
2211   if (!PLA_Initialized(PETSC_NULL)) {
2212 
2213     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2214     Plapack_nprows = 1;
2215     Plapack_npcols = size;
2216 
2217     ierr = PetscOptionsBegin(comm,PETSC_NULL,"PLAPACK Options","Mat");CHKERRQ(ierr);
2218       ierr = PetscOptionsInt("-mat_plapack_nprows","row dimension of 2D processor mesh","None",Plapack_nprows,&Plapack_nprows,PETSC_NULL);CHKERRQ(ierr);
2219       ierr = PetscOptionsInt("-mat_plapack_npcols","column dimension of 2D processor mesh","None",Plapack_npcols,&Plapack_npcols,PETSC_NULL);CHKERRQ(ierr);
2220 #if defined(PETSC_USE_DEBUG)
2221       Plapack_ierror = 3;
2222 #else
2223       Plapack_ierror = 0;
2224 #endif
2225       ierr = PetscOptionsInt("-mat_plapack_ckerror","error checking flag","None",Plapack_ierror,&Plapack_ierror,PETSC_NULL);CHKERRQ(ierr);
2226       if (Plapack_ierror){
2227 	ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_TRUE,PETSC_TRUE,PETSC_FALSE );CHKERRQ(ierr);
2228       } else {
2229 	ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_FALSE,PETSC_FALSE,PETSC_FALSE );CHKERRQ(ierr);
2230       }
2231 
2232       Plapack_nb_alg = 0;
2233       ierr = PetscOptionsInt("-mat_plapack_nb_alg","algorithmic block size","None",Plapack_nb_alg,&Plapack_nb_alg,PETSC_NULL);CHKERRQ(ierr);
2234       if (Plapack_nb_alg) {
2235         ierr = pla_Environ_set_nb_alg (PLA_OP_ALL_ALG,Plapack_nb_alg);CHKERRQ(ierr);
2236       }
2237     PetscOptionsEnd();
2238 
2239     ierr = PLA_Comm_1D_to_2D(comm,Plapack_nprows,Plapack_npcols,&Plapack_comm_2d);CHKERRQ(ierr);
2240     ierr = PLA_Init(Plapack_comm_2d);CHKERRQ(ierr);
2241     ierr = PetscRegisterFinalize(PetscPLAPACKFinalizePackage);CHKERRQ(ierr);
2242   }
2243   PetscFunctionReturn(0);
2244 }
2245 
2246 #endif
2247