xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision d9262e5436a7c5656b4d5eb7a9491a5bb2e89b7f)
1 #define PETSCMAT_DLL
2 
3 /*
4    Basic functions for basic parallel dense matrices.
5 */
6 
7 
8 #include "../src/mat/impls/dense/mpi/mpidense.h"    /*I   "petscmat.h"  I*/
9 #if defined(PETSC_HAVE_PLAPACK)
10 static PetscMPIInt Plapack_nprows,Plapack_npcols,Plapack_ierror,Plapack_nb_alg;
11 static MPI_Comm Plapack_comm_2d;
12 EXTERN_C_BEGIN
13 #include "PLA.h"
14 EXTERN_C_END
15 
16 typedef struct {
17   PLA_Obj        A,pivots;
18   PLA_Template   templ;
19   MPI_Datatype   datatype;
20   PetscInt       nb,rstart;
21   VecScatter     ctx;
22   IS             is_pla,is_petsc;
23   PetscTruth     pla_solved;
24   MatStructure   mstruct;
25 } Mat_Plapack;
26 #endif
27 
28 #undef __FUNCT__
29 #define __FUNCT__ "MatDenseGetLocalMatrix"
30 /*@
31 
32       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
33               matrix that represents the operator. For sequential matrices it returns itself.
34 
35     Input Parameter:
36 .      A - the Seq or MPI dense matrix
37 
38     Output Parameter:
39 .      B - the inner matrix
40 
41     Level: intermediate
42 
43 @*/
44 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
45 {
46   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
47   PetscErrorCode ierr;
48   PetscTruth     flg;
49 
50   PetscFunctionBegin;
51   ierr = PetscTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
52   if (flg) {
53     *B = mat->A;
54   } else {
55     *B = A;
56   }
57   PetscFunctionReturn(0);
58 }
59 
60 #undef __FUNCT__
61 #define __FUNCT__ "MatGetRow_MPIDense"
62 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
63 {
64   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
65   PetscErrorCode ierr;
66   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
67 
68   PetscFunctionBegin;
69   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows")
70   lrow = row - rstart;
71   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr);
72   PetscFunctionReturn(0);
73 }
74 
75 #undef __FUNCT__
76 #define __FUNCT__ "MatRestoreRow_MPIDense"
77 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
78 {
79   PetscErrorCode ierr;
80 
81   PetscFunctionBegin;
82   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
83   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
84   PetscFunctionReturn(0);
85 }
86 
87 EXTERN_C_BEGIN
88 #undef __FUNCT__
89 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense"
90 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B)
91 {
92   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
93   PetscErrorCode ierr;
94   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
95   PetscScalar    *array;
96   MPI_Comm       comm;
97 
98   PetscFunctionBegin;
99   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported.");
100 
101   /* The reuse aspect is not implemented efficiently */
102   if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);}
103 
104   ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
105   ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr);
106   ierr = MatCreate(comm,B);CHKERRQ(ierr);
107   ierr = MatSetSizes(*B,m,m,m,m);CHKERRQ(ierr);
108   ierr = MatSetType(*B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
109   ierr = MatSeqDenseSetPreallocation(*B,array+m*rstart);CHKERRQ(ierr);
110   ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr);
111   ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
112   ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
113 
114   *iscopy = PETSC_TRUE;
115   PetscFunctionReturn(0);
116 }
117 EXTERN_C_END
118 
119 #undef __FUNCT__
120 #define __FUNCT__ "MatSetValues_MPIDense"
121 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
122 {
123   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
124   PetscErrorCode ierr;
125   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
126   PetscTruth     roworiented = A->roworiented;
127 
128   PetscFunctionBegin;
129   if (v) PetscValidScalarPointer(v,6);
130   for (i=0; i<m; i++) {
131     if (idxm[i] < 0) continue;
132     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
133     if (idxm[i] >= rstart && idxm[i] < rend) {
134       row = idxm[i] - rstart;
135       if (roworiented) {
136         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
137       } else {
138         for (j=0; j<n; j++) {
139           if (idxn[j] < 0) continue;
140           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
141           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
142         }
143       }
144     } else {
145       if (!A->donotstash) {
146         if (roworiented) {
147           ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
148         } else {
149           ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
150         }
151       }
152     }
153   }
154   PetscFunctionReturn(0);
155 }
156 
157 #undef __FUNCT__
158 #define __FUNCT__ "MatGetValues_MPIDense"
159 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
160 {
161   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
162   PetscErrorCode ierr;
163   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
164 
165   PetscFunctionBegin;
166   for (i=0; i<m; i++) {
167     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
168     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
169     if (idxm[i] >= rstart && idxm[i] < rend) {
170       row = idxm[i] - rstart;
171       for (j=0; j<n; j++) {
172         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
173         if (idxn[j] >= mat->cmap->N) {
174           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
175         }
176         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
177       }
178     } else {
179       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
180     }
181   }
182   PetscFunctionReturn(0);
183 }
184 
185 #undef __FUNCT__
186 #define __FUNCT__ "MatGetArray_MPIDense"
187 PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[])
188 {
189   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
190   PetscErrorCode ierr;
191 
192   PetscFunctionBegin;
193   ierr = MatGetArray(a->A,array);CHKERRQ(ierr);
194   PetscFunctionReturn(0);
195 }
196 
197 #undef __FUNCT__
198 #define __FUNCT__ "MatGetSubMatrix_MPIDense"
199 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
200 {
201   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data,*newmatd;
202   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
203   PetscErrorCode ierr;
204   PetscInt       i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
205   const PetscInt *irow,*icol;
206   PetscScalar    *av,*bv,*v = lmat->v;
207   Mat            newmat;
208   IS             iscol_local;
209 
210   PetscFunctionBegin;
211   ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
212   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
213   ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr);
214   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
215   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
216   ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */
217 
218   /* No parallel redistribution currently supported! Should really check each index set
219      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
220      original matrix! */
221 
222   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
223   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
224 
225   /* Check submatrix call */
226   if (scall == MAT_REUSE_MATRIX) {
227     /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
228     /* Really need to test rows and column sizes! */
229     newmat = *B;
230   } else {
231     /* Create and fill new matrix */
232     ierr = MatCreate(((PetscObject)A)->comm,&newmat);CHKERRQ(ierr);
233     ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
234     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
235     ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr);
236   }
237 
238   /* Now extract the data pointers and do the copy, column at a time */
239   newmatd = (Mat_MPIDense*)newmat->data;
240   bv      = ((Mat_SeqDense *)newmatd->A->data)->v;
241 
242   for (i=0; i<Ncols; i++) {
243     av = v + ((Mat_SeqDense *)mat->A->data)->lda*icol[i];
244     for (j=0; j<nrows; j++) {
245       *bv++ = av[irow[j] - rstart];
246     }
247   }
248 
249   /* Assemble the matrices so that the correct flags are set */
250   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
251   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
252 
253   /* Free work space */
254   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
255   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
256   *B = newmat;
257   PetscFunctionReturn(0);
258 }
259 
260 #undef __FUNCT__
261 #define __FUNCT__ "MatRestoreArray_MPIDense"
262 PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[])
263 {
264   PetscFunctionBegin;
265   PetscFunctionReturn(0);
266 }
267 
268 #undef __FUNCT__
269 #define __FUNCT__ "MatAssemblyBegin_MPIDense"
270 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
271 {
272   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
273   MPI_Comm       comm = ((PetscObject)mat)->comm;
274   PetscErrorCode ierr;
275   PetscInt       nstash,reallocs;
276   InsertMode     addv;
277 
278   PetscFunctionBegin;
279   /* make sure all processors are either in INSERTMODE or ADDMODE */
280   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
281   if (addv == (ADD_VALUES|INSERT_VALUES)) {
282     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
283   }
284   mat->insertmode = addv; /* in case this processor had no cache */
285 
286   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
287   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
288   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
289   PetscFunctionReturn(0);
290 }
291 
292 #undef __FUNCT__
293 #define __FUNCT__ "MatAssemblyEnd_MPIDense"
294 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
295 {
296   Mat_MPIDense    *mdn=(Mat_MPIDense*)mat->data;
297   PetscErrorCode  ierr;
298   PetscInt        i,*row,*col,flg,j,rstart,ncols;
299   PetscMPIInt     n;
300   PetscScalar     *val;
301   InsertMode      addv=mat->insertmode;
302 
303   PetscFunctionBegin;
304   /*  wait on receives */
305   while (1) {
306     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
307     if (!flg) break;
308 
309     for (i=0; i<n;) {
310       /* Now identify the consecutive vals belonging to the same row */
311       for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
312       if (j < n) ncols = j-i;
313       else       ncols = n-i;
314       /* Now assemble all these values with a single function call */
315       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
316       i = j;
317     }
318   }
319   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
320 
321   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
322   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
323 
324   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
325     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
326   }
327   PetscFunctionReturn(0);
328 }
329 
330 #undef __FUNCT__
331 #define __FUNCT__ "MatZeroEntries_MPIDense"
332 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
333 {
334   PetscErrorCode ierr;
335   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
336 
337   PetscFunctionBegin;
338   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
339   PetscFunctionReturn(0);
340 }
341 
342 /* the code does not do the diagonal entries correctly unless the
343    matrix is square and the column and row owerships are identical.
344    This is a BUG. The only way to fix it seems to be to access
345    mdn->A and mdn->B directly and not through the MatZeroRows()
346    routine.
347 */
348 #undef __FUNCT__
349 #define __FUNCT__ "MatZeroRows_MPIDense"
350 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
351 {
352   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
353   PetscErrorCode ierr;
354   PetscInt       i,*owners = A->rmap->range;
355   PetscInt       *nprocs,j,idx,nsends;
356   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
357   PetscInt       *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source;
358   PetscInt       *lens,*lrows,*values;
359   PetscMPIInt    n,imdex,rank = l->rank,size = l->size;
360   MPI_Comm       comm = ((PetscObject)A)->comm;
361   MPI_Request    *send_waits,*recv_waits;
362   MPI_Status     recv_status,*send_status;
363   PetscTruth     found;
364 
365   PetscFunctionBegin;
366   /*  first count number of contributors to each processor */
367   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
368   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
369   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
370   for (i=0; i<N; i++) {
371     idx = rows[i];
372     found = PETSC_FALSE;
373     for (j=0; j<size; j++) {
374       if (idx >= owners[j] && idx < owners[j+1]) {
375         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
376       }
377     }
378     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
379   }
380   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
381 
382   /* inform other processors of number of messages and max length*/
383   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
384 
385   /* post receives:   */
386   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
387   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
388   for (i=0; i<nrecvs; i++) {
389     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
390   }
391 
392   /* do sends:
393       1) starts[i] gives the starting index in svalues for stuff going to
394          the ith processor
395   */
396   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
397   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
398   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
399   starts[0]  = 0;
400   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
401   for (i=0; i<N; i++) {
402     svalues[starts[owner[i]]++] = rows[i];
403   }
404 
405   starts[0] = 0;
406   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
407   count = 0;
408   for (i=0; i<size; i++) {
409     if (nprocs[2*i+1]) {
410       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
411     }
412   }
413   ierr = PetscFree(starts);CHKERRQ(ierr);
414 
415   base = owners[rank];
416 
417   /*  wait on receives */
418   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
419   count  = nrecvs;
420   slen   = 0;
421   while (count) {
422     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
423     /* unpack receives into our local space */
424     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
425     source[imdex]  = recv_status.MPI_SOURCE;
426     lens[imdex]    = n;
427     slen += n;
428     count--;
429   }
430   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
431 
432   /* move the data into the send scatter */
433   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
434   count = 0;
435   for (i=0; i<nrecvs; i++) {
436     values = rvalues + i*nmax;
437     for (j=0; j<lens[i]; j++) {
438       lrows[count++] = values[j] - base;
439     }
440   }
441   ierr = PetscFree(rvalues);CHKERRQ(ierr);
442   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
443   ierr = PetscFree(owner);CHKERRQ(ierr);
444   ierr = PetscFree(nprocs);CHKERRQ(ierr);
445 
446   /* actually zap the local rows */
447   ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
448   ierr = PetscFree(lrows);CHKERRQ(ierr);
449 
450   /* wait on sends */
451   if (nsends) {
452     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
453     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
454     ierr = PetscFree(send_status);CHKERRQ(ierr);
455   }
456   ierr = PetscFree(send_waits);CHKERRQ(ierr);
457   ierr = PetscFree(svalues);CHKERRQ(ierr);
458 
459   PetscFunctionReturn(0);
460 }
461 
462 #undef __FUNCT__
463 #define __FUNCT__ "MatMult_MPIDense"
464 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
465 {
466   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
467   PetscErrorCode ierr;
468 
469   PetscFunctionBegin;
470   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
471   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
472   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
473   PetscFunctionReturn(0);
474 }
475 
476 #undef __FUNCT__
477 #define __FUNCT__ "MatMultAdd_MPIDense"
478 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
479 {
480   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
481   PetscErrorCode ierr;
482 
483   PetscFunctionBegin;
484   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
485   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
486   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
487   PetscFunctionReturn(0);
488 }
489 
490 #undef __FUNCT__
491 #define __FUNCT__ "MatMultTranspose_MPIDense"
492 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
493 {
494   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
495   PetscErrorCode ierr;
496   PetscScalar    zero = 0.0;
497 
498   PetscFunctionBegin;
499   ierr = VecSet(yy,zero);CHKERRQ(ierr);
500   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
501   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
502   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
503   PetscFunctionReturn(0);
504 }
505 
506 #undef __FUNCT__
507 #define __FUNCT__ "MatMultTransposeAdd_MPIDense"
508 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
509 {
510   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
511   PetscErrorCode ierr;
512 
513   PetscFunctionBegin;
514   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
515   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
516   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
517   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
518   PetscFunctionReturn(0);
519 }
520 
521 #undef __FUNCT__
522 #define __FUNCT__ "MatGetDiagonal_MPIDense"
523 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
524 {
525   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
526   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
527   PetscErrorCode ierr;
528   PetscInt       len,i,n,m = A->rmap->n,radd;
529   PetscScalar    *x,zero = 0.0;
530 
531   PetscFunctionBegin;
532   ierr = VecSet(v,zero);CHKERRQ(ierr);
533   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
534   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
535   if (n != A->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
536   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
537   radd = A->rmap->rstart*m;
538   for (i=0; i<len; i++) {
539     x[i] = aloc->v[radd + i*m + i];
540   }
541   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
542   PetscFunctionReturn(0);
543 }
544 
545 #undef __FUNCT__
546 #define __FUNCT__ "MatDestroy_MPIDense"
547 PetscErrorCode MatDestroy_MPIDense(Mat mat)
548 {
549   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
550   PetscErrorCode ierr;
551 #if defined(PETSC_HAVE_PLAPACK)
552   Mat_Plapack   *lu=(Mat_Plapack*)(mat->spptr);
553 #endif
554 
555   PetscFunctionBegin;
556 
557 #if defined(PETSC_USE_LOG)
558   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
559 #endif
560   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
561   ierr = MatDestroy(mdn->A);CHKERRQ(ierr);
562   if (mdn->lvec)   {ierr = VecDestroy(mdn->lvec);CHKERRQ(ierr);}
563   if (mdn->Mvctx)  {ierr = VecScatterDestroy(mdn->Mvctx);CHKERRQ(ierr);}
564 #if defined(PETSC_HAVE_PLAPACK)
565   if (lu) {
566     ierr = PLA_Obj_free(&lu->A);CHKERRQ(ierr);
567     ierr = PLA_Obj_free (&lu->pivots);CHKERRQ(ierr);
568     ierr = PLA_Temp_free(&lu->templ);CHKERRQ(ierr);
569 
570     if (lu->is_pla) {
571       ierr = ISDestroy(lu->is_pla);CHKERRQ(ierr);
572       ierr = ISDestroy(lu->is_petsc);CHKERRQ(ierr);
573       ierr = VecScatterDestroy(lu->ctx);CHKERRQ(ierr);
574     }
575   }
576 #endif
577 
578   ierr = PetscFree(mdn);CHKERRQ(ierr);
579   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
580   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
581   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
582   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
583   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
584   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
585   PetscFunctionReturn(0);
586 }
587 
588 #undef __FUNCT__
589 #define __FUNCT__ "MatView_MPIDense_Binary"
590 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
591 {
592   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
593   PetscErrorCode    ierr;
594   PetscViewerFormat format;
595   int               fd;
596   PetscInt          header[4],mmax,N = mat->cmap->N,i,j,m,k;
597   PetscMPIInt       rank,tag  = ((PetscObject)viewer)->tag,size;
598   PetscScalar       *work,*v,*vv;
599   Mat_SeqDense      *a = (Mat_SeqDense*)mdn->A->data;
600   MPI_Status        status;
601 
602   PetscFunctionBegin;
603   if (mdn->size == 1) {
604     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
605   } else {
606     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
607     ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
608     ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
609 
610     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
611     if (format == PETSC_VIEWER_NATIVE) {
612 
613       if (!rank) {
614         /* store the matrix as a dense matrix */
615         header[0] = MAT_FILE_CLASSID;
616         header[1] = mat->rmap->N;
617         header[2] = N;
618         header[3] = MATRIX_BINARY_FORMAT_DENSE;
619         ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
620 
621         /* get largest work array needed for transposing array */
622         mmax = mat->rmap->n;
623         for (i=1; i<size; i++) {
624           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
625         }
626         ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&work);CHKERRQ(ierr);
627 
628         /* write out local array, by rows */
629         m    = mat->rmap->n;
630         v    = a->v;
631         for (j=0; j<N; j++) {
632           for (i=0; i<m; i++) {
633             work[j + i*N] = *v++;
634           }
635         }
636         ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
637         /* get largest work array to receive messages from other processes, excludes process zero */
638         mmax = 0;
639         for (i=1; i<size; i++) {
640           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
641         }
642         ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&vv);CHKERRQ(ierr);
643         for(k = 1; k < size; k++) {
644           v    = vv;
645           m    = mat->rmap->range[k+1] - mat->rmap->range[k];
646           ierr = MPI_Recv(v,m*N,MPIU_SCALAR,k,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
647 
648           for(j = 0; j < N; j++) {
649             for(i = 0; i < m; i++) {
650               work[j + i*N] = *v++;
651             }
652           }
653           ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
654         }
655         ierr = PetscFree(work);CHKERRQ(ierr);
656         ierr = PetscFree(vv);CHKERRQ(ierr);
657       } else {
658         ierr = MPI_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
659       }
660     } else {
661       SETERRQ(PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerSetFormat(viewer,PETSC_VIEWER_NATIVE");
662     }
663   }
664   PetscFunctionReturn(0);
665 }
666 
667 #undef __FUNCT__
668 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket"
669 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
670 {
671   Mat_MPIDense          *mdn = (Mat_MPIDense*)mat->data;
672   PetscErrorCode        ierr;
673   PetscMPIInt           size = mdn->size,rank = mdn->rank;
674   const PetscViewerType vtype;
675   PetscTruth            iascii,isdraw;
676   PetscViewer           sviewer;
677   PetscViewerFormat     format;
678 #if defined(PETSC_HAVE_PLAPACK)
679   Mat_Plapack           *lu;
680 #endif
681 
682   PetscFunctionBegin;
683   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
684   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
685   if (iascii) {
686     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
687     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
688     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
689       MatInfo info;
690       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
691       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,
692                    (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
693       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
694 #if defined(PETSC_HAVE_PLAPACK)
695       ierr = PetscViewerASCIIPrintf(viewer,"PLAPACK run parameters:\n");CHKERRQ(ierr);
696       ierr = PetscViewerASCIIPrintf(viewer,"  Processor mesh: nprows %d, npcols %d\n",Plapack_nprows, Plapack_npcols);CHKERRQ(ierr);
697       ierr = PetscViewerASCIIPrintf(viewer,"  Error checking: %d\n",Plapack_ierror);CHKERRQ(ierr);
698       ierr = PetscViewerASCIIPrintf(viewer,"  Algorithmic block size: %d\n",Plapack_nb_alg);CHKERRQ(ierr);
699       if (mat->factortype){
700         lu=(Mat_Plapack*)(mat->spptr);
701         ierr = PetscViewerASCIIPrintf(viewer,"  Distr. block size nb: %d \n",lu->nb);CHKERRQ(ierr);
702       }
703 #else
704       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
705 #endif
706       PetscFunctionReturn(0);
707     } else if (format == PETSC_VIEWER_ASCII_INFO) {
708       PetscFunctionReturn(0);
709     }
710   } else if (isdraw) {
711     PetscDraw  draw;
712     PetscTruth isnull;
713 
714     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
715     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
716     if (isnull) PetscFunctionReturn(0);
717   }
718 
719   if (size == 1) {
720     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
721   } else {
722     /* assemble the entire matrix onto first processor. */
723     Mat         A;
724     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
725     PetscInt    *cols;
726     PetscScalar *vals;
727 
728     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
729     if (!rank) {
730       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
731     } else {
732       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
733     }
734     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
735     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
736     ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL);
737     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
738 
739     /* Copy the matrix ... This isn't the most efficient means,
740        but it's quick for now */
741     A->insertmode = INSERT_VALUES;
742     row = mat->rmap->rstart; m = mdn->A->rmap->n;
743     for (i=0; i<m; i++) {
744       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
745       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
746       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
747       row++;
748     }
749 
750     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
751     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
752     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
753     if (!rank) {
754       ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
755     }
756     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
757     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
758     ierr = MatDestroy(A);CHKERRQ(ierr);
759   }
760   PetscFunctionReturn(0);
761 }
762 
763 #undef __FUNCT__
764 #define __FUNCT__ "MatView_MPIDense"
765 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
766 {
767   PetscErrorCode ierr;
768   PetscTruth     iascii,isbinary,isdraw,issocket;
769 
770   PetscFunctionBegin;
771 
772   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
773   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
774   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
775   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
776 
777   if (iascii || issocket || isdraw) {
778     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
779   } else if (isbinary) {
780     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
781   } else {
782     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name);
783   }
784   PetscFunctionReturn(0);
785 }
786 
787 #undef __FUNCT__
788 #define __FUNCT__ "MatGetInfo_MPIDense"
789 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
790 {
791   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
792   Mat            mdn = mat->A;
793   PetscErrorCode ierr;
794   PetscReal      isend[5],irecv[5];
795 
796   PetscFunctionBegin;
797   info->block_size     = 1.0;
798   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
799   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
800   isend[3] = info->memory;  isend[4] = info->mallocs;
801   if (flag == MAT_LOCAL) {
802     info->nz_used      = isend[0];
803     info->nz_allocated = isend[1];
804     info->nz_unneeded  = isend[2];
805     info->memory       = isend[3];
806     info->mallocs      = isend[4];
807   } else if (flag == MAT_GLOBAL_MAX) {
808     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr);
809     info->nz_used      = irecv[0];
810     info->nz_allocated = irecv[1];
811     info->nz_unneeded  = irecv[2];
812     info->memory       = irecv[3];
813     info->mallocs      = irecv[4];
814   } else if (flag == MAT_GLOBAL_SUM) {
815     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
816     info->nz_used      = irecv[0];
817     info->nz_allocated = irecv[1];
818     info->nz_unneeded  = irecv[2];
819     info->memory       = irecv[3];
820     info->mallocs      = irecv[4];
821   }
822   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
823   info->fill_ratio_needed = 0;
824   info->factor_mallocs    = 0;
825   PetscFunctionReturn(0);
826 }
827 
828 #undef __FUNCT__
829 #define __FUNCT__ "MatSetOption_MPIDense"
830 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscTruth flg)
831 {
832   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
833   PetscErrorCode ierr;
834 
835   PetscFunctionBegin;
836   switch (op) {
837   case MAT_NEW_NONZERO_LOCATIONS:
838   case MAT_NEW_NONZERO_LOCATION_ERR:
839   case MAT_NEW_NONZERO_ALLOCATION_ERR:
840     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
841     break;
842   case MAT_ROW_ORIENTED:
843     a->roworiented = flg;
844     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
845     break;
846   case MAT_NEW_DIAGONALS:
847   case MAT_USE_HASH_TABLE:
848     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
849     break;
850   case MAT_IGNORE_OFF_PROC_ENTRIES:
851     a->donotstash = flg;
852     break;
853   case MAT_SYMMETRIC:
854   case MAT_STRUCTURALLY_SYMMETRIC:
855   case MAT_HERMITIAN:
856   case MAT_SYMMETRY_ETERNAL:
857   case MAT_IGNORE_LOWER_TRIANGULAR:
858     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
859     break;
860   default:
861     SETERRQ1(PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
862   }
863   PetscFunctionReturn(0);
864 }
865 
866 
867 #undef __FUNCT__
868 #define __FUNCT__ "MatDiagonalScale_MPIDense"
869 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
870 {
871   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
872   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
873   PetscScalar    *l,*r,x,*v;
874   PetscErrorCode ierr;
875   PetscInt       i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n;
876 
877   PetscFunctionBegin;
878   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
879   if (ll) {
880     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
881     if (s2a != s2) SETERRQ2(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
882     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
883     for (i=0; i<m; i++) {
884       x = l[i];
885       v = mat->v + i;
886       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
887     }
888     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
889     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
890   }
891   if (rr) {
892     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
893     if (s3a != s3) SETERRQ2(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
894     ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
895     ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
896     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
897     for (i=0; i<n; i++) {
898       x = r[i];
899       v = mat->v + i*m;
900       for (j=0; j<m; j++) { (*v++) *= x;}
901     }
902     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
903     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
904   }
905   PetscFunctionReturn(0);
906 }
907 
908 #undef __FUNCT__
909 #define __FUNCT__ "MatNorm_MPIDense"
910 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
911 {
912   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
913   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
914   PetscErrorCode ierr;
915   PetscInt       i,j;
916   PetscReal      sum = 0.0;
917   PetscScalar    *v = mat->v;
918 
919   PetscFunctionBegin;
920   if (mdn->size == 1) {
921     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
922   } else {
923     if (type == NORM_FROBENIUS) {
924       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
925 #if defined(PETSC_USE_COMPLEX)
926         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
927 #else
928         sum += (*v)*(*v); v++;
929 #endif
930       }
931       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
932       *nrm = sqrt(*nrm);
933       ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr);
934     } else if (type == NORM_1) {
935       PetscReal *tmp,*tmp2;
936       ierr = PetscMalloc2(A->cmap->N,PetscReal,&tmp,A->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr);
937       ierr = PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
938       ierr = PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
939       *nrm = 0.0;
940       v = mat->v;
941       for (j=0; j<mdn->A->cmap->n; j++) {
942         for (i=0; i<mdn->A->rmap->n; i++) {
943           tmp[j] += PetscAbsScalar(*v);  v++;
944         }
945       }
946       ierr = MPI_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
947       for (j=0; j<A->cmap->N; j++) {
948         if (tmp2[j] > *nrm) *nrm = tmp2[j];
949       }
950       ierr = PetscFree2(tmp,tmp);CHKERRQ(ierr);
951       ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr);
952     } else if (type == NORM_INFINITY) { /* max row norm */
953       PetscReal ntemp;
954       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
955       ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr);
956     } else {
957       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
958     }
959   }
960   PetscFunctionReturn(0);
961 }
962 
963 #undef __FUNCT__
964 #define __FUNCT__ "MatTranspose_MPIDense"
965 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
966 {
967   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
968   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
969   Mat            B;
970   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
971   PetscErrorCode ierr;
972   PetscInt       j,i;
973   PetscScalar    *v;
974 
975   PetscFunctionBegin;
976   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place");
977   if (reuse == MAT_INITIAL_MATRIX || A == *matout) {
978     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
979     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
980     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
981     ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr);
982   } else {
983     B = *matout;
984   }
985 
986   m = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v;
987   ierr = PetscMalloc(m*sizeof(PetscInt),&rwork);CHKERRQ(ierr);
988   for (i=0; i<m; i++) rwork[i] = rstart + i;
989   for (j=0; j<n; j++) {
990     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
991     v   += m;
992   }
993   ierr = PetscFree(rwork);CHKERRQ(ierr);
994   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
995   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
996   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
997     *matout = B;
998   } else {
999     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1000   }
1001   PetscFunctionReturn(0);
1002 }
1003 
1004 
1005 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
1006 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
1007 
1008 #undef __FUNCT__
1009 #define __FUNCT__ "MatSetUpPreallocation_MPIDense"
1010 PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A)
1011 {
1012   PetscErrorCode ierr;
1013 
1014   PetscFunctionBegin;
1015   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
1016   PetscFunctionReturn(0);
1017 }
1018 
1019 #if defined(PETSC_HAVE_PLAPACK)
1020 
1021 #undef __FUNCT__
1022 #define __FUNCT__ "MatMPIDenseCopyToPlapack"
1023 PetscErrorCode MatMPIDenseCopyToPlapack(Mat A,Mat F)
1024 {
1025   Mat_Plapack    *lu = (Mat_Plapack*)(F)->spptr;
1026   PetscErrorCode ierr;
1027   PetscInt       M=A->cmap->N,m=A->rmap->n,rstart;
1028   PetscScalar    *array;
1029   PetscReal      one = 1.0;
1030 
1031   PetscFunctionBegin;
1032   /* Copy A into F->lu->A */
1033   ierr = PLA_Obj_set_to_zero(lu->A);CHKERRQ(ierr);
1034   ierr = PLA_API_begin();CHKERRQ(ierr);
1035   ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr);
1036   ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr);
1037   ierr = MatGetArray(A,&array);CHKERRQ(ierr);
1038   ierr = PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);CHKERRQ(ierr);
1039   ierr = MatRestoreArray(A,&array);CHKERRQ(ierr);
1040   ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr);
1041   ierr = PLA_API_end();CHKERRQ(ierr);
1042   lu->rstart = rstart;
1043   PetscFunctionReturn(0);
1044 }
1045 
1046 #undef __FUNCT__
1047 #define __FUNCT__ "MatMPIDenseCopyFromPlapack"
1048 PetscErrorCode MatMPIDenseCopyFromPlapack(Mat F,Mat A)
1049 {
1050   Mat_Plapack    *lu = (Mat_Plapack*)(F)->spptr;
1051   PetscErrorCode ierr;
1052   PetscInt       M=A->cmap->N,m=A->rmap->n,rstart;
1053   PetscScalar    *array;
1054   PetscReal      one = 1.0;
1055 
1056   PetscFunctionBegin;
1057   /* Copy F into A->lu->A */
1058   ierr = MatZeroEntries(A);CHKERRQ(ierr);
1059   ierr = PLA_API_begin();CHKERRQ(ierr);
1060   ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr);
1061   ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr);
1062   ierr = MatGetArray(A,&array);CHKERRQ(ierr);
1063   ierr = PLA_API_axpy_global_to_matrix(m,M, &one,lu->A,rstart,0,(void *)array,m);CHKERRQ(ierr);
1064   ierr = MatRestoreArray(A,&array);CHKERRQ(ierr);
1065   ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr);
1066   ierr = PLA_API_end();CHKERRQ(ierr);
1067   lu->rstart = rstart;
1068   PetscFunctionReturn(0);
1069 }
1070 
1071 #undef __FUNCT__
1072 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIDense"
1073 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1074 {
1075   PetscErrorCode ierr;
1076   Mat_Plapack    *luA = (Mat_Plapack*)A->spptr;
1077   Mat_Plapack    *luB = (Mat_Plapack*)B->spptr;
1078   Mat_Plapack    *luC = (Mat_Plapack*)C->spptr;
1079   PLA_Obj        alpha = NULL,beta = NULL;
1080 
1081   PetscFunctionBegin;
1082   ierr = MatMPIDenseCopyToPlapack(A,A);CHKERRQ(ierr);
1083   ierr = MatMPIDenseCopyToPlapack(B,B);CHKERRQ(ierr);
1084 
1085   /*
1086   ierr = PLA_Global_show("A = ",luA->A,"%g ","");CHKERRQ(ierr);
1087   ierr = PLA_Global_show("B = ",luB->A,"%g ","");CHKERRQ(ierr);
1088   */
1089 
1090   /* do the multiply in PLA  */
1091   ierr = PLA_Create_constants_conf_to(luA->A,NULL,NULL,&alpha);CHKERRQ(ierr);
1092   ierr = PLA_Create_constants_conf_to(luC->A,NULL,&beta,NULL);CHKERRQ(ierr);
1093   CHKMEMQ;
1094 
1095   ierr = PLA_Gemm(PLA_NO_TRANSPOSE,PLA_NO_TRANSPOSE,alpha,luA->A,luB->A,beta,luC->A); /* CHKERRQ(ierr); */
1096   CHKMEMQ;
1097   ierr = PLA_Obj_free(&alpha);CHKERRQ(ierr);
1098   ierr = PLA_Obj_free(&beta);CHKERRQ(ierr);
1099 
1100   /*
1101   ierr = PLA_Global_show("C = ",luC->A,"%g ","");CHKERRQ(ierr);
1102   */
1103   ierr = MatMPIDenseCopyFromPlapack(C,C);CHKERRQ(ierr);
1104   PetscFunctionReturn(0);
1105 }
1106 
1107 #undef __FUNCT__
1108 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIDense"
1109 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1110 {
1111   PetscErrorCode ierr;
1112   PetscInt       m=A->rmap->n,n=B->cmap->n;
1113   Mat            Cmat;
1114 
1115   PetscFunctionBegin;
1116   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
1117   SETERRQ(PETSC_ERR_LIB,"Due to apparent bugs in PLAPACK,this is not currently supported");
1118   ierr = MatCreate(((PetscObject)B)->comm,&Cmat);CHKERRQ(ierr);
1119   ierr = MatSetSizes(Cmat,m,n,A->rmap->N,B->cmap->N);CHKERRQ(ierr);
1120   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
1121   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1122   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1123 
1124   *C = Cmat;
1125   PetscFunctionReturn(0);
1126 }
1127 
1128 #undef __FUNCT__
1129 #define __FUNCT__ "MatMatMult_MPIDense_MPIDense"
1130 PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
1131 {
1132   PetscErrorCode ierr;
1133 
1134   PetscFunctionBegin;
1135   if (scall == MAT_INITIAL_MATRIX){
1136     ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr);
1137   }
1138   ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr);
1139   PetscFunctionReturn(0);
1140 }
1141 
1142 #undef __FUNCT__
1143 #define __FUNCT__ "MatSolve_MPIDense"
1144 PetscErrorCode MatSolve_MPIDense(Mat A,Vec b,Vec x)
1145 {
1146   MPI_Comm       comm = ((PetscObject)A)->comm;
1147   Mat_Plapack    *lu = (Mat_Plapack*)A->spptr;
1148   PetscErrorCode ierr;
1149   PetscInt       M=A->rmap->N,m=A->rmap->n,rstart,i,j,*idx_pla,*idx_petsc,loc_m,loc_stride;
1150   PetscScalar    *array;
1151   PetscReal      one = 1.0;
1152   PetscMPIInt    size,rank,r_rank,r_nproc,c_rank,c_nproc;;
1153   PLA_Obj        v_pla = NULL;
1154   PetscScalar    *loc_buf;
1155   Vec            loc_x;
1156 
1157   PetscFunctionBegin;
1158   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1159   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1160 
1161   /* Create PLAPACK vector objects, then copy b into PLAPACK b */
1162   PLA_Mvector_create(lu->datatype,M,1,lu->templ,PLA_ALIGN_FIRST,&v_pla);
1163   PLA_Obj_set_to_zero(v_pla);
1164 
1165   /* Copy b into rhs_pla */
1166   PLA_API_begin();
1167   PLA_Obj_API_open(v_pla);
1168   ierr = VecGetArray(b,&array);CHKERRQ(ierr);
1169   PLA_API_axpy_vector_to_global(m,&one,(void *)array,1,v_pla,lu->rstart);
1170   ierr = VecRestoreArray(b,&array);CHKERRQ(ierr);
1171   PLA_Obj_API_close(v_pla);
1172   PLA_API_end();
1173 
1174   if (A->factortype == MAT_FACTOR_LU){
1175     /* Apply the permutations to the right hand sides */
1176     PLA_Apply_pivots_to_rows (v_pla,lu->pivots);
1177 
1178     /* Solve L y = b, overwriting b with y */
1179     PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_UNIT_DIAG,lu->A,v_pla );
1180 
1181     /* Solve U x = y (=b), overwriting b with x */
1182     PLA_Trsv( PLA_UPPER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla );
1183   } else { /* MAT_FACTOR_CHOLESKY */
1184     PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla);
1185     PLA_Trsv( PLA_LOWER_TRIANGULAR,(lu->datatype == MPI_DOUBLE ? PLA_TRANSPOSE : PLA_CONJUGATE_TRANSPOSE),
1186                                     PLA_NONUNIT_DIAG,lu->A,v_pla);
1187   }
1188 
1189   /* Copy PLAPACK x into Petsc vector x  */
1190   PLA_Obj_local_length(v_pla, &loc_m);
1191   PLA_Obj_local_buffer(v_pla, (void**)&loc_buf);
1192   PLA_Obj_local_stride(v_pla, &loc_stride);
1193   /*
1194     PetscPrintf(PETSC_COMM_SELF," [%d] b - local_m %d local_stride %d, loc_buf: %g %g, nb: %d\n",rank,loc_m,loc_stride,loc_buf[0],loc_buf[(loc_m-1)*loc_stride],lu->nb);
1195   */
1196   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,loc_m*loc_stride,loc_buf,&loc_x);CHKERRQ(ierr);
1197   if (!lu->pla_solved){
1198 
1199     PLA_Temp_comm_row_info(lu->templ,&Plapack_comm_2d,&r_rank,&r_nproc);
1200     PLA_Temp_comm_col_info(lu->templ,&Plapack_comm_2d,&c_rank,&c_nproc);
1201 
1202     /* Create IS and cts for VecScatterring */
1203     PLA_Obj_local_length(v_pla, &loc_m);
1204     PLA_Obj_local_stride(v_pla, &loc_stride);
1205     ierr = PetscMalloc2(loc_m,PetscInt,&idx_pla,loc_m,PetscInt,&idx_petsc);CHKERRQ(ierr);
1206 
1207     rstart = (r_rank*c_nproc+c_rank)*lu->nb;
1208     for (i=0; i<loc_m; i+=lu->nb){
1209       j = 0;
1210       while (j < lu->nb && i+j < loc_m){
1211         idx_petsc[i+j] = rstart + j; j++;
1212       }
1213       rstart += size*lu->nb;
1214     }
1215 
1216     for (i=0; i<loc_m; i++) idx_pla[i] = i*loc_stride;
1217 
1218     ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_pla,&lu->is_pla);CHKERRQ(ierr);
1219     ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_petsc,&lu->is_petsc);CHKERRQ(ierr);
1220     ierr = PetscFree2(idx_pla,idx_petsc);CHKERRQ(ierr);
1221     ierr = VecScatterCreate(loc_x,lu->is_pla,x,lu->is_petsc,&lu->ctx);CHKERRQ(ierr);
1222   }
1223   ierr = VecScatterBegin(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1224   ierr = VecScatterEnd(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1225 
1226   /* Free data */
1227   ierr = VecDestroy(loc_x);CHKERRQ(ierr);
1228   PLA_Obj_free(&v_pla);
1229 
1230   lu->pla_solved = PETSC_TRUE;
1231   PetscFunctionReturn(0);
1232 }
1233 
1234 #undef __FUNCT__
1235 #define __FUNCT__ "MatLUFactorNumeric_MPIDense"
1236 PetscErrorCode MatLUFactorNumeric_MPIDense(Mat F,Mat A,const MatFactorInfo *info)
1237 {
1238   Mat_Plapack    *lu = (Mat_Plapack*)(F)->spptr;
1239   PetscErrorCode ierr;
1240   PetscInt       M=A->rmap->N,m=A->rmap->n,rstart,rend;
1241   PetscInt       info_pla=0;
1242   PetscScalar    *array,one = 1.0;
1243 
1244   PetscFunctionBegin;
1245   if (lu->mstruct == SAME_NONZERO_PATTERN){
1246     PLA_Obj_free(&lu->A);
1247     PLA_Obj_free (&lu->pivots);
1248   }
1249   /* Create PLAPACK matrix object */
1250   lu->A = NULL; lu->pivots = NULL;
1251   PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);
1252   PLA_Obj_set_to_zero(lu->A);
1253   PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots);
1254 
1255   /* Copy A into lu->A */
1256   PLA_API_begin();
1257   PLA_Obj_API_open(lu->A);
1258   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
1259   ierr = MatGetArray(A,&array);CHKERRQ(ierr);
1260   PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);
1261   ierr = MatRestoreArray(A,&array);CHKERRQ(ierr);
1262   PLA_Obj_API_close(lu->A);
1263   PLA_API_end();
1264 
1265   /* Factor P A -> L U overwriting lower triangular portion of A with L, upper, U */
1266   info_pla = PLA_LU(lu->A,lu->pivots);
1267   if (info_pla != 0)
1268     SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot encountered at row %d from PLA_LU()",info_pla);
1269 
1270   lu->rstart         = rstart;
1271   lu->mstruct        = SAME_NONZERO_PATTERN;
1272   F->ops->solve      = MatSolve_MPIDense;
1273   F->assembled       = PETSC_TRUE;  /* required by -ksp_view */
1274   PetscFunctionReturn(0);
1275 }
1276 
1277 #undef __FUNCT__
1278 #define __FUNCT__ "MatCholeskyFactorNumeric_MPIDense"
1279 PetscErrorCode MatCholeskyFactorNumeric_MPIDense(Mat F,Mat A,const MatFactorInfo *info)
1280 {
1281   Mat_Plapack    *lu = (Mat_Plapack*)F->spptr;
1282   PetscErrorCode ierr;
1283   PetscInt       M=A->rmap->N,m=A->rmap->n,rstart,rend;
1284   PetscInt       info_pla=0;
1285   PetscScalar    *array,one = 1.0;
1286 
1287   PetscFunctionBegin;
1288   if (lu->mstruct == SAME_NONZERO_PATTERN){
1289     PLA_Obj_free(&lu->A);
1290   }
1291   /* Create PLAPACK matrix object */
1292   lu->A      = NULL;
1293   lu->pivots = NULL;
1294   PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);
1295 
1296   /* Copy A into lu->A */
1297   PLA_API_begin();
1298   PLA_Obj_API_open(lu->A);
1299   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
1300   ierr = MatGetArray(A,&array);CHKERRQ(ierr);
1301   PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);
1302   ierr = MatRestoreArray(A,&array);CHKERRQ(ierr);
1303   PLA_Obj_API_close(lu->A);
1304   PLA_API_end();
1305 
1306   /* Factor P A -> Chol */
1307   info_pla = PLA_Chol(PLA_LOWER_TRIANGULAR,lu->A);
1308   if (info_pla != 0)
1309     SETERRQ1( PETSC_ERR_MAT_CH_ZRPVT,"Nonpositive definite matrix detected at row %d from PLA_Chol()",info_pla);
1310 
1311   lu->rstart         = rstart;
1312   lu->mstruct        = SAME_NONZERO_PATTERN;
1313   F->ops->solve      = MatSolve_MPIDense;
1314   F->assembled       = PETSC_TRUE;  /* required by -ksp_view */
1315   PetscFunctionReturn(0);
1316 }
1317 
1318 /* Note the Petsc perm permutation is ignored */
1319 #undef __FUNCT__
1320 #define __FUNCT__ "MatCholeskyFactorSymbolic_MPIDense"
1321 PetscErrorCode MatCholeskyFactorSymbolic_MPIDense(Mat F,Mat A,IS perm,const MatFactorInfo *info)
1322 {
1323   PetscErrorCode ierr;
1324   PetscTruth     issymmetric,set;
1325 
1326   PetscFunctionBegin;
1327   ierr = MatIsSymmetricKnown(A,&set,&issymmetric);CHKERRQ(ierr);
1328   if (!set || !issymmetric) SETERRQ(PETSC_ERR_USER,"Matrix must be set as MAT_SYMMETRIC for CholeskyFactor()");
1329   F->ops->choleskyfactornumeric  = MatCholeskyFactorNumeric_MPIDense;
1330   PetscFunctionReturn(0);
1331 }
1332 
1333 /* Note the Petsc r and c permutations are ignored */
1334 #undef __FUNCT__
1335 #define __FUNCT__ "MatLUFactorSymbolic_MPIDense"
1336 PetscErrorCode MatLUFactorSymbolic_MPIDense(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
1337 {
1338   PetscErrorCode ierr;
1339   PetscInt       M = A->rmap->N;
1340   Mat_Plapack    *lu;
1341 
1342   PetscFunctionBegin;
1343   lu = (Mat_Plapack*)F->spptr;
1344   ierr = PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots);CHKERRQ(ierr);
1345   F->ops->lufactornumeric  = MatLUFactorNumeric_MPIDense;
1346   PetscFunctionReturn(0);
1347 }
1348 
1349 EXTERN_C_BEGIN
1350 #undef __FUNCT__
1351 #define __FUNCT__ "MatFactorGetSolverPackage_mpidense_plapack"
1352 PetscErrorCode MatFactorGetSolverPackage_mpidense_plapack(Mat A,const MatSolverPackage *type)
1353 {
1354   PetscFunctionBegin;
1355   *type = MAT_SOLVER_PLAPACK;
1356   PetscFunctionReturn(0);
1357 }
1358 EXTERN_C_END
1359 
1360 EXTERN_C_BEGIN
1361 #undef __FUNCT__
1362 #define __FUNCT__ "MatGetFactor_mpidense_plapack"
1363 PetscErrorCode MatGetFactor_mpidense_plapack(Mat A,MatFactorType ftype,Mat *F)
1364 {
1365   PetscErrorCode ierr;
1366   Mat_Plapack    *lu;
1367   PetscMPIInt    size;
1368   PetscInt       M=A->rmap->N;
1369 
1370   PetscFunctionBegin;
1371   /* Create the factorization matrix */
1372   ierr = MatCreate(((PetscObject)A)->comm,F);CHKERRQ(ierr);
1373   ierr = MatSetSizes(*F,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1374   ierr = MatSetType(*F,((PetscObject)A)->type_name);CHKERRQ(ierr);
1375   ierr = PetscNewLog(*F,Mat_Plapack,&lu);CHKERRQ(ierr);
1376   (*F)->spptr = (void*)lu;
1377 
1378   /* Set default Plapack parameters */
1379   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
1380   lu->nb = M/size;
1381   if (M - lu->nb*size) lu->nb++; /* without cyclic distribution */
1382 
1383   /* Set runtime options */
1384   ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"PLAPACK Options","Mat");CHKERRQ(ierr);
1385     ierr = PetscOptionsInt("-mat_plapack_nb","block size of template vector","None",lu->nb,&lu->nb,PETSC_NULL);CHKERRQ(ierr);
1386   PetscOptionsEnd();
1387 
1388   /* Create object distribution template */
1389   lu->templ = NULL;
1390   ierr = PLA_Temp_create(lu->nb, 0, &lu->templ);CHKERRQ(ierr);
1391 
1392   /* Set the datatype */
1393 #if defined(PETSC_USE_COMPLEX)
1394   lu->datatype = MPI_DOUBLE_COMPLEX;
1395 #else
1396   lu->datatype = MPI_DOUBLE;
1397 #endif
1398 
1399   ierr = PLA_Matrix_create(lu->datatype,M,A->cmap->N,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);CHKERRQ(ierr);
1400 
1401 
1402   lu->pla_solved     = PETSC_FALSE; /* MatSolve_Plapack() is called yet */
1403   lu->mstruct        = DIFFERENT_NONZERO_PATTERN;
1404 
1405   if (ftype == MAT_FACTOR_LU) {
1406     (*F)->ops->lufactorsymbolic = MatLUFactorSymbolic_MPIDense;
1407   } else if (ftype == MAT_FACTOR_CHOLESKY) {
1408     (*F)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MPIDense;
1409   } else SETERRQ(PETSC_ERR_SUP,"No incomplete factorizations for dense matrices");
1410   (*F)->factortype = ftype;
1411   ierr = PetscObjectComposeFunctionDynamic((PetscObject)(*F),"MatFactorGetSolverPackage_C","MatFactorGetSolverPackage_mpidense_plapack",MatFactorGetSolverPackage_mpidense_plapack);CHKERRQ(ierr);
1412   PetscFunctionReturn(0);
1413 }
1414 EXTERN_C_END
1415 #endif
1416 
1417 #undef __FUNCT__
1418 #define __FUNCT__ "MatGetFactor_mpidense_petsc"
1419 PetscErrorCode MatGetFactor_mpidense_petsc(Mat A,MatFactorType ftype,Mat *F)
1420 {
1421 #if defined(PETSC_HAVE_PLAPACK)
1422   PetscErrorCode ierr;
1423 #endif
1424 
1425   PetscFunctionBegin;
1426 #if defined(PETSC_HAVE_PLAPACK)
1427   ierr = MatGetFactor_mpidense_plapack(A,ftype,F);CHKERRQ(ierr);
1428 #else
1429   SETERRQ1(PETSC_ERR_SUP,"Matrix format %s uses PLAPACK direct solver. Install PLAPACK",((PetscObject)A)->type_name);
1430 #endif
1431   PetscFunctionReturn(0);
1432 }
1433 
1434 #undef __FUNCT__
1435 #define __FUNCT__ "MatAXPY_MPIDense"
1436 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
1437 {
1438   PetscErrorCode ierr;
1439   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
1440 
1441   PetscFunctionBegin;
1442   ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr);
1443   PetscFunctionReturn(0);
1444 }
1445 
1446 #undef __FUNCT__
1447 #define __FUNCT__ "MatConjugate_MPIDense"
1448 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIDense(Mat mat)
1449 {
1450   Mat_MPIDense   *a = (Mat_MPIDense *)mat->data;
1451   PetscErrorCode ierr;
1452 
1453   PetscFunctionBegin;
1454   ierr = MatConjugate(a->A);CHKERRQ(ierr);
1455   PetscFunctionReturn(0);
1456 }
1457 
1458 #undef __FUNCT__
1459 #define __FUNCT__ "MatRealPart_MPIDense"
1460 PetscErrorCode MatRealPart_MPIDense(Mat A)
1461 {
1462   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1463   PetscErrorCode ierr;
1464 
1465   PetscFunctionBegin;
1466   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1467   PetscFunctionReturn(0);
1468 }
1469 
1470 #undef __FUNCT__
1471 #define __FUNCT__ "MatImaginaryPart_MPIDense"
1472 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1473 {
1474   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1475   PetscErrorCode ierr;
1476 
1477   PetscFunctionBegin;
1478   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1479   PetscFunctionReturn(0);
1480 }
1481 
1482 /* -------------------------------------------------------------------*/
1483 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
1484        MatGetRow_MPIDense,
1485        MatRestoreRow_MPIDense,
1486        MatMult_MPIDense,
1487 /* 4*/ MatMultAdd_MPIDense,
1488        MatMultTranspose_MPIDense,
1489        MatMultTransposeAdd_MPIDense,
1490        0,
1491        0,
1492        0,
1493 /*10*/ 0,
1494        0,
1495        0,
1496        0,
1497        MatTranspose_MPIDense,
1498 /*15*/ MatGetInfo_MPIDense,
1499        MatEqual_MPIDense,
1500        MatGetDiagonal_MPIDense,
1501        MatDiagonalScale_MPIDense,
1502        MatNorm_MPIDense,
1503 /*20*/ MatAssemblyBegin_MPIDense,
1504        MatAssemblyEnd_MPIDense,
1505        MatSetOption_MPIDense,
1506        MatZeroEntries_MPIDense,
1507 /*24*/ MatZeroRows_MPIDense,
1508        0,
1509        0,
1510        0,
1511        0,
1512 /*29*/ MatSetUpPreallocation_MPIDense,
1513        0,
1514        0,
1515        MatGetArray_MPIDense,
1516        MatRestoreArray_MPIDense,
1517 /*34*/ MatDuplicate_MPIDense,
1518        0,
1519        0,
1520        0,
1521        0,
1522 /*39*/ MatAXPY_MPIDense,
1523        MatGetSubMatrices_MPIDense,
1524        0,
1525        MatGetValues_MPIDense,
1526        0,
1527 /*44*/ 0,
1528        MatScale_MPIDense,
1529        0,
1530        0,
1531        0,
1532 /*49*/ 0,
1533        0,
1534        0,
1535        0,
1536        0,
1537 /*54*/ 0,
1538        0,
1539        0,
1540        0,
1541        0,
1542 /*59*/ MatGetSubMatrix_MPIDense,
1543        MatDestroy_MPIDense,
1544        MatView_MPIDense,
1545        0,
1546        0,
1547 /*64*/ 0,
1548        0,
1549        0,
1550        0,
1551        0,
1552 /*69*/ 0,
1553        0,
1554        0,
1555        0,
1556        0,
1557 /*74*/ 0,
1558        0,
1559        0,
1560        0,
1561        0,
1562 /*79*/ 0,
1563        0,
1564        0,
1565        0,
1566 /*83*/ MatLoad_MPIDense,
1567        0,
1568        0,
1569        0,
1570        0,
1571        0,
1572 /*89*/
1573 #if defined(PETSC_HAVE_PLAPACK)
1574        MatMatMult_MPIDense_MPIDense,
1575        MatMatMultSymbolic_MPIDense_MPIDense,
1576        MatMatMultNumeric_MPIDense_MPIDense,
1577 #else
1578        0,
1579        0,
1580        0,
1581 #endif
1582        0,
1583        0,
1584 /*94*/ 0,
1585        0,
1586        0,
1587        0,
1588        0,
1589 /*99*/ 0,
1590        0,
1591        0,
1592        MatConjugate_MPIDense,
1593        0,
1594 /*104*/0,
1595        MatRealPart_MPIDense,
1596        MatImaginaryPart_MPIDense,
1597        0
1598 };
1599 
1600 EXTERN_C_BEGIN
1601 #undef __FUNCT__
1602 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense"
1603 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1604 {
1605   Mat_MPIDense   *a;
1606   PetscErrorCode ierr;
1607 
1608   PetscFunctionBegin;
1609   mat->preallocated = PETSC_TRUE;
1610   /* Note:  For now, when data is specified above, this assumes the user correctly
1611    allocates the local dense storage space.  We should add error checking. */
1612 
1613   a    = (Mat_MPIDense*)mat->data;
1614   ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1615   ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr);
1616   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
1617   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1618   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1619   PetscFunctionReturn(0);
1620 }
1621 EXTERN_C_END
1622 
1623 /*MC
1624    MAT_SOLVER_PLAPACK = "mpidense" - Parallel LU and Cholesky factorization for MATMPIDENSE matrices
1625 
1626   run config/configure.py with the option --download-plapack
1627 
1628 
1629   Options Database Keys:
1630 . -mat_plapack_nprows <n> - number of rows in processor partition
1631 . -mat_plapack_npcols <n> - number of columns in processor partition
1632 . -mat_plapack_nb <n> - block size of template vector
1633 . -mat_plapack_nb_alg <n> - algorithmic block size
1634 - -mat_plapack_ckerror <n> - error checking flag
1635 
1636    Level: intermediate
1637 
1638 .seealso: MatCreateMPIDense(), MATDENSE, MATSEQDENSE, PCFactorSetSolverPackage(), MatSolverPackage
1639 
1640 M*/
1641 
1642 EXTERN_C_BEGIN
1643 #undef __FUNCT__
1644 #define __FUNCT__ "MatCreate_MPIDense"
1645 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIDense(Mat mat)
1646 {
1647   Mat_MPIDense   *a;
1648   PetscErrorCode ierr;
1649 
1650   PetscFunctionBegin;
1651   ierr              = PetscNewLog(mat,Mat_MPIDense,&a);CHKERRQ(ierr);
1652   mat->data         = (void*)a;
1653   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1654   mat->mapping      = 0;
1655 
1656   mat->insertmode = NOT_SET_VALUES;
1657   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&a->rank);CHKERRQ(ierr);
1658   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&a->size);CHKERRQ(ierr);
1659 
1660   ierr = PetscLayoutSetBlockSize(mat->rmap,1);CHKERRQ(ierr);
1661   ierr = PetscLayoutSetBlockSize(mat->cmap,1);CHKERRQ(ierr);
1662   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
1663   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
1664   a->nvec = mat->cmap->n;
1665 
1666   /* build cache for off array entries formed */
1667   a->donotstash = PETSC_FALSE;
1668   ierr = MatStashCreate_Private(((PetscObject)mat)->comm,1,&mat->stash);CHKERRQ(ierr);
1669 
1670   /* stuff used for matrix vector multiply */
1671   a->lvec        = 0;
1672   a->Mvctx       = 0;
1673   a->roworiented = PETSC_TRUE;
1674 
1675   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
1676                                      "MatGetDiagonalBlock_MPIDense",
1677                                      MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr);
1678   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C",
1679                                      "MatMPIDenseSetPreallocation_MPIDense",
1680                                      MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1681   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",
1682                                      "MatMatMult_MPIAIJ_MPIDense",
1683                                       MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1684   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",
1685                                      "MatMatMultSymbolic_MPIAIJ_MPIDense",
1686                                       MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1687   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",
1688                                      "MatMatMultNumeric_MPIAIJ_MPIDense",
1689                                       MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1690   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetFactor_petsc_C",
1691                                      "MatGetFactor_mpidense_petsc",
1692                                       MatGetFactor_mpidense_petsc);CHKERRQ(ierr);
1693 #if defined(PETSC_HAVE_PLAPACK)
1694   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetFactor_plapack_C",
1695                                      "MatGetFactor_mpidense_plapack",
1696                                       MatGetFactor_mpidense_plapack);CHKERRQ(ierr);
1697   ierr = PetscPLAPACKInitializePackage(((PetscObject)mat)->comm);CHKERRQ(ierr);
1698 #endif
1699   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1700 
1701   PetscFunctionReturn(0);
1702 }
1703 EXTERN_C_END
1704 
1705 /*MC
1706    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1707 
1708    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1709    and MATMPIDENSE otherwise.
1710 
1711    Options Database Keys:
1712 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1713 
1714   Level: beginner
1715 
1716 
1717 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1718 M*/
1719 
1720 EXTERN_C_BEGIN
1721 #undef __FUNCT__
1722 #define __FUNCT__ "MatCreate_Dense"
1723 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_Dense(Mat A)
1724 {
1725   PetscErrorCode ierr;
1726   PetscMPIInt    size;
1727 
1728   PetscFunctionBegin;
1729   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
1730   if (size == 1) {
1731     ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr);
1732   } else {
1733     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
1734   }
1735   PetscFunctionReturn(0);
1736 }
1737 EXTERN_C_END
1738 
1739 #undef __FUNCT__
1740 #define __FUNCT__ "MatMPIDenseSetPreallocation"
1741 /*@C
1742    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1743 
1744    Not collective
1745 
1746    Input Parameters:
1747 .  A - the matrix
1748 -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
1749    to control all matrix memory allocation.
1750 
1751    Notes:
1752    The dense format is fully compatible with standard Fortran 77
1753    storage by columns.
1754 
1755    The data input variable is intended primarily for Fortran programmers
1756    who wish to allocate their own matrix memory space.  Most users should
1757    set data=PETSC_NULL.
1758 
1759    Level: intermediate
1760 
1761 .keywords: matrix,dense, parallel
1762 
1763 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1764 @*/
1765 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data)
1766 {
1767   PetscErrorCode ierr,(*f)(Mat,PetscScalar *);
1768 
1769   PetscFunctionBegin;
1770   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1771   if (f) {
1772     ierr = (*f)(mat,data);CHKERRQ(ierr);
1773   }
1774   PetscFunctionReturn(0);
1775 }
1776 
1777 #undef __FUNCT__
1778 #define __FUNCT__ "MatCreateMPIDense"
1779 /*@C
1780    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
1781 
1782    Collective on MPI_Comm
1783 
1784    Input Parameters:
1785 +  comm - MPI communicator
1786 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1787 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1788 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1789 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1790 -  data - optional location of matrix data.  Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1791    to control all matrix memory allocation.
1792 
1793    Output Parameter:
1794 .  A - the matrix
1795 
1796    Notes:
1797    The dense format is fully compatible with standard Fortran 77
1798    storage by columns.
1799 
1800    The data input variable is intended primarily for Fortran programmers
1801    who wish to allocate their own matrix memory space.  Most users should
1802    set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users).
1803 
1804    The user MUST specify either the local or global matrix dimensions
1805    (possibly both).
1806 
1807    Level: intermediate
1808 
1809 .keywords: matrix,dense, parallel
1810 
1811 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1812 @*/
1813 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1814 {
1815   PetscErrorCode ierr;
1816   PetscMPIInt    size;
1817 
1818   PetscFunctionBegin;
1819   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1820   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1821   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1822   if (size > 1) {
1823     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1824     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1825   } else {
1826     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1827     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1828   }
1829   PetscFunctionReturn(0);
1830 }
1831 
1832 #undef __FUNCT__
1833 #define __FUNCT__ "MatDuplicate_MPIDense"
1834 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1835 {
1836   Mat            mat;
1837   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
1838   PetscErrorCode ierr;
1839 
1840   PetscFunctionBegin;
1841   *newmat       = 0;
1842   ierr = MatCreate(((PetscObject)A)->comm,&mat);CHKERRQ(ierr);
1843   ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1844   ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1845   a                 = (Mat_MPIDense*)mat->data;
1846   ierr              = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1847   mat->factortype   = A->factortype;
1848   mat->assembled    = PETSC_TRUE;
1849   mat->preallocated = PETSC_TRUE;
1850 
1851   mat->rmap->rstart     = A->rmap->rstart;
1852   mat->rmap->rend       = A->rmap->rend;
1853   a->size              = oldmat->size;
1854   a->rank              = oldmat->rank;
1855   mat->insertmode      = NOT_SET_VALUES;
1856   a->nvec              = oldmat->nvec;
1857   a->donotstash        = oldmat->donotstash;
1858 
1859   ierr = PetscMemcpy(mat->rmap->range,A->rmap->range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
1860   ierr = PetscMemcpy(mat->cmap->range,A->cmap->range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
1861 
1862   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1863   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1864   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1865 
1866   *newmat = mat;
1867   PetscFunctionReturn(0);
1868 }
1869 
1870 #undef __FUNCT__
1871 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile"
1872 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N, const MatType type,Mat *newmat)
1873 {
1874   PetscErrorCode ierr;
1875   PetscMPIInt    rank,size;
1876   PetscInt       *rowners,i,m,nz,j;
1877   PetscScalar    *array,*vals,*vals_ptr;
1878   MPI_Status     status;
1879 
1880   PetscFunctionBegin;
1881   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1882   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1883 
1884   /* determine ownership of all rows */
1885   m          = M/size + ((M % size) > rank);
1886   ierr       = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1887   ierr       = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
1888   rowners[0] = 0;
1889   for (i=2; i<=size; i++) {
1890     rowners[i] += rowners[i-1];
1891   }
1892 
1893   ierr = MatCreate(comm,newmat);CHKERRQ(ierr);
1894   ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1895   ierr = MatSetType(*newmat,type);CHKERRQ(ierr);
1896   ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr);
1897   ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr);
1898 
1899   if (!rank) {
1900     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1901 
1902     /* read in my part of the matrix numerical values  */
1903     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1904 
1905     /* insert into matrix-by row (this is why cannot directly read into array */
1906     vals_ptr = vals;
1907     for (i=0; i<m; i++) {
1908       for (j=0; j<N; j++) {
1909         array[i + j*m] = *vals_ptr++;
1910       }
1911     }
1912 
1913     /* read in other processors and ship out */
1914     for (i=1; i<size; i++) {
1915       nz   = (rowners[i+1] - rowners[i])*N;
1916       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1917       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(*newmat))->tag,comm);CHKERRQ(ierr);
1918     }
1919   } else {
1920     /* receive numeric values */
1921     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1922 
1923     /* receive message of values*/
1924     ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(*newmat))->tag,comm,&status);CHKERRQ(ierr);
1925 
1926     /* insert into matrix-by row (this is why cannot directly read into array */
1927     vals_ptr = vals;
1928     for (i=0; i<m; i++) {
1929       for (j=0; j<N; j++) {
1930         array[i + j*m] = *vals_ptr++;
1931       }
1932     }
1933   }
1934   ierr = PetscFree(rowners);CHKERRQ(ierr);
1935   ierr = PetscFree(vals);CHKERRQ(ierr);
1936   ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1937   ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1938   PetscFunctionReturn(0);
1939 }
1940 
1941 #undef __FUNCT__
1942 #define __FUNCT__ "MatLoad_MPIDense"
1943 PetscErrorCode MatLoad_MPIDense(PetscViewer viewer,const MatType type,Mat *newmat)
1944 {
1945   Mat            A;
1946   PetscScalar    *vals,*svals;
1947   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1948   MPI_Status     status;
1949   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz;
1950   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1951   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1952   PetscInt       i,nz,j,rstart,rend;
1953   int            fd;
1954   PetscErrorCode ierr;
1955 
1956   PetscFunctionBegin;
1957   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1958   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1959   if (!rank) {
1960     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1961     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1962     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1963   }
1964 
1965   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1966   M = header[1]; N = header[2]; nz = header[3];
1967 
1968   /*
1969        Handle case where matrix is stored on disk as a dense matrix
1970   */
1971   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1972     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,type,newmat);CHKERRQ(ierr);
1973     PetscFunctionReturn(0);
1974   }
1975 
1976   /* determine ownership of all rows */
1977   m          = PetscMPIIntCast(M/size + ((M % size) > rank));
1978   ierr       = PetscMalloc((size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr);
1979   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1980   rowners[0] = 0;
1981   for (i=2; i<=size; i++) {
1982     rowners[i] += rowners[i-1];
1983   }
1984   rstart = rowners[rank];
1985   rend   = rowners[rank+1];
1986 
1987   /* distribute row lengths to all processors */
1988   ierr    = PetscMalloc2(rend-rstart,PetscInt,&ourlens,rend-rstart,PetscInt,&offlens);CHKERRQ(ierr);
1989   if (!rank) {
1990     ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
1991     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1992     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
1993     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1994     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1995     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1996   } else {
1997     ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1998   }
1999 
2000   if (!rank) {
2001     /* calculate the number of nonzeros on each processor */
2002     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2003     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2004     for (i=0; i<size; i++) {
2005       for (j=rowners[i]; j< rowners[i+1]; j++) {
2006         procsnz[i] += rowlengths[j];
2007       }
2008     }
2009     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2010 
2011     /* determine max buffer needed and allocate it */
2012     maxnz = 0;
2013     for (i=0; i<size; i++) {
2014       maxnz = PetscMax(maxnz,procsnz[i]);
2015     }
2016     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2017 
2018     /* read in my part of the matrix column indices  */
2019     nz = procsnz[0];
2020     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2021     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2022 
2023     /* read in every one elses and ship off */
2024     for (i=1; i<size; i++) {
2025       nz   = procsnz[i];
2026       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2027       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2028     }
2029     ierr = PetscFree(cols);CHKERRQ(ierr);
2030   } else {
2031     /* determine buffer space needed for message */
2032     nz = 0;
2033     for (i=0; i<m; i++) {
2034       nz += ourlens[i];
2035     }
2036     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2037 
2038     /* receive message of column indices*/
2039     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2040     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2041     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2042   }
2043 
2044   /* loop over local rows, determining number of off diagonal entries */
2045   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2046   jj = 0;
2047   for (i=0; i<m; i++) {
2048     for (j=0; j<ourlens[i]; j++) {
2049       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
2050       jj++;
2051     }
2052   }
2053 
2054   /* create our matrix */
2055   for (i=0; i<m; i++) {
2056     ourlens[i] -= offlens[i];
2057   }
2058   ierr = MatCreate(comm,newmat);CHKERRQ(ierr);
2059   ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
2060   ierr = MatSetType(*newmat,type);CHKERRQ(ierr);
2061   ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr);
2062   A = *newmat;
2063   for (i=0; i<m; i++) {
2064     ourlens[i] += offlens[i];
2065   }
2066 
2067   if (!rank) {
2068     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2069 
2070     /* read in my part of the matrix numerical values  */
2071     nz = procsnz[0];
2072     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2073 
2074     /* insert into matrix */
2075     jj      = rstart;
2076     smycols = mycols;
2077     svals   = vals;
2078     for (i=0; i<m; i++) {
2079       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2080       smycols += ourlens[i];
2081       svals   += ourlens[i];
2082       jj++;
2083     }
2084 
2085     /* read in other processors and ship out */
2086     for (i=1; i<size; i++) {
2087       nz   = procsnz[i];
2088       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2089       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
2090     }
2091     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2092   } else {
2093     /* receive numeric values */
2094     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2095 
2096     /* receive message of values*/
2097     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
2098     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2099     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2100 
2101     /* insert into matrix */
2102     jj      = rstart;
2103     smycols = mycols;
2104     svals   = vals;
2105     for (i=0; i<m; i++) {
2106       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2107       smycols += ourlens[i];
2108       svals   += ourlens[i];
2109       jj++;
2110     }
2111   }
2112   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2113   ierr = PetscFree(vals);CHKERRQ(ierr);
2114   ierr = PetscFree(mycols);CHKERRQ(ierr);
2115   ierr = PetscFree(rowners);CHKERRQ(ierr);
2116 
2117   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2118   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2119   PetscFunctionReturn(0);
2120 }
2121 
2122 #undef __FUNCT__
2123 #define __FUNCT__ "MatEqual_MPIDense"
2124 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscTruth *flag)
2125 {
2126   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
2127   Mat            a,b;
2128   PetscTruth     flg;
2129   PetscErrorCode ierr;
2130 
2131   PetscFunctionBegin;
2132   a = matA->A;
2133   b = matB->A;
2134   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
2135   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
2136   PetscFunctionReturn(0);
2137 }
2138 
2139 #if defined(PETSC_HAVE_PLAPACK)
2140 
2141 #undef __FUNCT__
2142 #define __FUNCT__ "PetscPLAPACKFinalizePackage"
2143 /*@C
2144   PetscPLAPACKFinalizePackage - This function destroys everything in the Petsc interface to PLAPACK.
2145   Level: developer
2146 
2147 .keywords: Petsc, destroy, package, PLAPACK
2148 .seealso: PetscFinalize()
2149 @*/
2150 PetscErrorCode PETSC_DLLEXPORT PetscPLAPACKFinalizePackage(void)
2151 {
2152   PetscErrorCode ierr;
2153 
2154   PetscFunctionBegin;
2155   ierr = PLA_Finalize();CHKERRQ(ierr);
2156   PetscFunctionReturn(0);
2157 }
2158 
2159 #undef __FUNCT__
2160 #define __FUNCT__ "PetscPLAPACKInitializePackage"
2161 /*@C
2162   PetscPLAPACKInitializePackage - This function initializes everything in the Petsc interface to PLAPACK. It is
2163   called from MatCreate_MPIDense() the first time an MPI dense matrix is called.
2164 
2165   Input Parameter:
2166 .   comm - the communicator the matrix lives on
2167 
2168   Level: developer
2169 
2170    Notes: PLAPACK does not have a good fit with MPI communicators; all (parallel) PLAPACK objects have to live in the
2171   same communicator (because there is some global state that is initialized and used for all matrices). In addition if
2172   PLAPACK is initialized (that is the initial matrices created) are on subcommunicators of MPI_COMM_WORLD, these subcommunicators
2173   cannot overlap.
2174 
2175 .keywords: Petsc, initialize, package, PLAPACK
2176 .seealso: PetscInitializePackage(), PetscInitialize()
2177 @*/
2178 PetscErrorCode PETSC_DLLEXPORT PetscPLAPACKInitializePackage(MPI_Comm comm)
2179 {
2180   PetscMPIInt    size;
2181   PetscErrorCode ierr;
2182 
2183   PetscFunctionBegin;
2184   if (!PLA_Initialized(PETSC_NULL)) {
2185 
2186     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2187     Plapack_nprows = 1;
2188     Plapack_npcols = size;
2189 
2190     ierr = PetscOptionsBegin(comm,PETSC_NULL,"PLAPACK Options","Mat");CHKERRQ(ierr);
2191       ierr = PetscOptionsInt("-mat_plapack_nprows","row dimension of 2D processor mesh","None",Plapack_nprows,&Plapack_nprows,PETSC_NULL);CHKERRQ(ierr);
2192       ierr = PetscOptionsInt("-mat_plapack_npcols","column dimension of 2D processor mesh","None",Plapack_npcols,&Plapack_npcols,PETSC_NULL);CHKERRQ(ierr);
2193 #if defined(PETSC_USE_DEBUG)
2194       Plapack_ierror = 3;
2195 #else
2196       Plapack_ierror = 0;
2197 #endif
2198       ierr = PetscOptionsInt("-mat_plapack_ckerror","error checking flag","None",Plapack_ierror,&Plapack_ierror,PETSC_NULL);CHKERRQ(ierr);
2199       if (Plapack_ierror){
2200 	ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_TRUE,PETSC_TRUE,PETSC_FALSE );CHKERRQ(ierr);
2201       } else {
2202 	ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_FALSE,PETSC_FALSE,PETSC_FALSE );CHKERRQ(ierr);
2203       }
2204 
2205       Plapack_nb_alg = 0;
2206       ierr = PetscOptionsInt("-mat_plapack_nb_alg","algorithmic block size","None",Plapack_nb_alg,&Plapack_nb_alg,PETSC_NULL);CHKERRQ(ierr);
2207       if (Plapack_nb_alg) {
2208         ierr = pla_Environ_set_nb_alg (PLA_OP_ALL_ALG,Plapack_nb_alg);CHKERRQ(ierr);
2209       }
2210     PetscOptionsEnd();
2211 
2212     ierr = PLA_Comm_1D_to_2D(comm,Plapack_nprows,Plapack_npcols,&Plapack_comm_2d);CHKERRQ(ierr);
2213     ierr = PLA_Init(Plapack_comm_2d);CHKERRQ(ierr);
2214     ierr = PetscRegisterFinalize(PetscPLAPACKFinalizePackage);CHKERRQ(ierr);
2215   }
2216   PetscFunctionReturn(0);
2217 }
2218 
2219 #endif
2220