xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 7c4f633dc6bb6149cca88d301ead35a99e103cbb)
1 #define PETSCMAT_DLL
2 
3 /*
4    Basic functions for basic parallel dense matrices.
5 */
6 
7 
8 #include "../src/mat/impls/dense/mpi/mpidense.h"    /*I   "petscmat.h"  I*/
9 #if defined(PETSC_HAVE_PLAPACK)
10 static PetscMPIInt Plapack_nprows,Plapack_npcols,Plapack_ierror,Plapack_nb_alg;
11 static MPI_Comm Plapack_comm_2d;
12 #endif
13 
14 #undef __FUNCT__
15 #define __FUNCT__ "MatDenseGetLocalMatrix"
16 /*@
17 
18       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
19               matrix that represents the operator. For sequential matrices it returns itself.
20 
21     Input Parameter:
22 .      A - the Seq or MPI dense matrix
23 
24     Output Parameter:
25 .      B - the inner matrix
26 
27     Level: intermediate
28 
29 @*/
30 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
31 {
32   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
33   PetscErrorCode ierr;
34   PetscTruth     flg;
35 
36   PetscFunctionBegin;
37   ierr = PetscTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
38   if (flg) {
39     *B = mat->A;
40   } else {
41     *B = A;
42   }
43   PetscFunctionReturn(0);
44 }
45 
46 #undef __FUNCT__
47 #define __FUNCT__ "MatGetRow_MPIDense"
48 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
49 {
50   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
51   PetscErrorCode ierr;
52   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
53 
54   PetscFunctionBegin;
55   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows")
56   lrow = row - rstart;
57   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr);
58   PetscFunctionReturn(0);
59 }
60 
61 #undef __FUNCT__
62 #define __FUNCT__ "MatRestoreRow_MPIDense"
63 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
64 {
65   PetscErrorCode ierr;
66 
67   PetscFunctionBegin;
68   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
69   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
70   PetscFunctionReturn(0);
71 }
72 
73 EXTERN_C_BEGIN
74 #undef __FUNCT__
75 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense"
76 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B)
77 {
78   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
79   PetscErrorCode ierr;
80   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
81   PetscScalar    *array;
82   MPI_Comm       comm;
83 
84   PetscFunctionBegin;
85   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported.");
86 
87   /* The reuse aspect is not implemented efficiently */
88   if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);}
89 
90   ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
91   ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr);
92   ierr = MatCreate(comm,B);CHKERRQ(ierr);
93   ierr = MatSetSizes(*B,m,m,m,m);CHKERRQ(ierr);
94   ierr = MatSetType(*B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
95   ierr = MatSeqDenseSetPreallocation(*B,array+m*rstart);CHKERRQ(ierr);
96   ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr);
97   ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
98   ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
99 
100   *iscopy = PETSC_TRUE;
101   PetscFunctionReturn(0);
102 }
103 EXTERN_C_END
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatSetValues_MPIDense"
107 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
108 {
109   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
110   PetscErrorCode ierr;
111   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
112   PetscTruth     roworiented = A->roworiented;
113 
114   PetscFunctionBegin;
115   for (i=0; i<m; i++) {
116     if (idxm[i] < 0) continue;
117     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
118     if (idxm[i] >= rstart && idxm[i] < rend) {
119       row = idxm[i] - rstart;
120       if (roworiented) {
121         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
122       } else {
123         for (j=0; j<n; j++) {
124           if (idxn[j] < 0) continue;
125           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
126           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
127         }
128       }
129     } else {
130       if (!A->donotstash) {
131         if (roworiented) {
132           ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr);
133         } else {
134           ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr);
135         }
136       }
137     }
138   }
139   PetscFunctionReturn(0);
140 }
141 
142 #undef __FUNCT__
143 #define __FUNCT__ "MatGetValues_MPIDense"
144 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
145 {
146   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
147   PetscErrorCode ierr;
148   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
149 
150   PetscFunctionBegin;
151   for (i=0; i<m; i++) {
152     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
153     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
154     if (idxm[i] >= rstart && idxm[i] < rend) {
155       row = idxm[i] - rstart;
156       for (j=0; j<n; j++) {
157         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
158         if (idxn[j] >= mat->cmap->N) {
159           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
160         }
161         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
162       }
163     } else {
164       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
165     }
166   }
167   PetscFunctionReturn(0);
168 }
169 
170 #undef __FUNCT__
171 #define __FUNCT__ "MatGetArray_MPIDense"
172 PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[])
173 {
174   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
175   PetscErrorCode ierr;
176 
177   PetscFunctionBegin;
178   ierr = MatGetArray(a->A,array);CHKERRQ(ierr);
179   PetscFunctionReturn(0);
180 }
181 
182 #undef __FUNCT__
183 #define __FUNCT__ "MatGetSubMatrix_MPIDense"
184 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B)
185 {
186   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data,*newmatd;
187   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
188   PetscErrorCode ierr;
189   PetscInt       i,j,rstart,rend,nrows,ncols,nlrows,nlcols;
190   const PetscInt *irow,*icol;
191   PetscScalar    *av,*bv,*v = lmat->v;
192   Mat            newmat;
193 
194   PetscFunctionBegin;
195   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
196   ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr);
197   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
198   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
199 
200   /* No parallel redistribution currently supported! Should really check each index set
201      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
202      original matrix! */
203 
204   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
205   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
206 
207   /* Check submatrix call */
208   if (scall == MAT_REUSE_MATRIX) {
209     /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
210     /* Really need to test rows and column sizes! */
211     newmat = *B;
212   } else {
213     /* Create and fill new matrix */
214     ierr = MatCreate(((PetscObject)A)->comm,&newmat);CHKERRQ(ierr);
215     ierr = MatSetSizes(newmat,nrows,cs,PETSC_DECIDE,ncols);CHKERRQ(ierr);
216     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
217     ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr);
218   }
219 
220   /* Now extract the data pointers and do the copy, column at a time */
221   newmatd = (Mat_MPIDense*)newmat->data;
222   bv      = ((Mat_SeqDense *)newmatd->A->data)->v;
223 
224   for (i=0; i<ncols; i++) {
225     av = v + nlrows*icol[i];
226     for (j=0; j<nrows; j++) {
227       *bv++ = av[irow[j] - rstart];
228     }
229   }
230 
231   /* Assemble the matrices so that the correct flags are set */
232   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
233   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
234 
235   /* Free work space */
236   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
237   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
238   *B = newmat;
239   PetscFunctionReturn(0);
240 }
241 
242 #undef __FUNCT__
243 #define __FUNCT__ "MatRestoreArray_MPIDense"
244 PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[])
245 {
246   PetscFunctionBegin;
247   PetscFunctionReturn(0);
248 }
249 
250 #undef __FUNCT__
251 #define __FUNCT__ "MatAssemblyBegin_MPIDense"
252 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
253 {
254   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
255   MPI_Comm       comm = ((PetscObject)mat)->comm;
256   PetscErrorCode ierr;
257   PetscInt       nstash,reallocs;
258   InsertMode     addv;
259 
260   PetscFunctionBegin;
261   /* make sure all processors are either in INSERTMODE or ADDMODE */
262   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
263   if (addv == (ADD_VALUES|INSERT_VALUES)) {
264     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
265   }
266   mat->insertmode = addv; /* in case this processor had no cache */
267 
268   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
269   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
270   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
271   PetscFunctionReturn(0);
272 }
273 
274 #undef __FUNCT__
275 #define __FUNCT__ "MatAssemblyEnd_MPIDense"
276 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
277 {
278   Mat_MPIDense    *mdn=(Mat_MPIDense*)mat->data;
279   PetscErrorCode  ierr;
280   PetscInt        i,*row,*col,flg,j,rstart,ncols;
281   PetscMPIInt     n;
282   PetscScalar     *val;
283   InsertMode      addv=mat->insertmode;
284 
285   PetscFunctionBegin;
286   /*  wait on receives */
287   while (1) {
288     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
289     if (!flg) break;
290 
291     for (i=0; i<n;) {
292       /* Now identify the consecutive vals belonging to the same row */
293       for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
294       if (j < n) ncols = j-i;
295       else       ncols = n-i;
296       /* Now assemble all these values with a single function call */
297       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
298       i = j;
299     }
300   }
301   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
302 
303   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
304   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
305 
306   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
307     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
308   }
309   PetscFunctionReturn(0);
310 }
311 
312 #undef __FUNCT__
313 #define __FUNCT__ "MatZeroEntries_MPIDense"
314 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
315 {
316   PetscErrorCode ierr;
317   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
318 
319   PetscFunctionBegin;
320   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
321   PetscFunctionReturn(0);
322 }
323 
324 /* the code does not do the diagonal entries correctly unless the
325    matrix is square and the column and row owerships are identical.
326    This is a BUG. The only way to fix it seems to be to access
327    mdn->A and mdn->B directly and not through the MatZeroRows()
328    routine.
329 */
330 #undef __FUNCT__
331 #define __FUNCT__ "MatZeroRows_MPIDense"
332 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
333 {
334   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
335   PetscErrorCode ierr;
336   PetscInt       i,*owners = A->rmap->range;
337   PetscInt       *nprocs,j,idx,nsends;
338   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
339   PetscInt       *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source;
340   PetscInt       *lens,*lrows,*values;
341   PetscMPIInt    n,imdex,rank = l->rank,size = l->size;
342   MPI_Comm       comm = ((PetscObject)A)->comm;
343   MPI_Request    *send_waits,*recv_waits;
344   MPI_Status     recv_status,*send_status;
345   PetscTruth     found;
346 
347   PetscFunctionBegin;
348   /*  first count number of contributors to each processor */
349   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
350   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
351   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
352   for (i=0; i<N; i++) {
353     idx = rows[i];
354     found = PETSC_FALSE;
355     for (j=0; j<size; j++) {
356       if (idx >= owners[j] && idx < owners[j+1]) {
357         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
358       }
359     }
360     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
361   }
362   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
363 
364   /* inform other processors of number of messages and max length*/
365   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
366 
367   /* post receives:   */
368   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
369   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
370   for (i=0; i<nrecvs; i++) {
371     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
372   }
373 
374   /* do sends:
375       1) starts[i] gives the starting index in svalues for stuff going to
376          the ith processor
377   */
378   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
379   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
380   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
381   starts[0]  = 0;
382   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
383   for (i=0; i<N; i++) {
384     svalues[starts[owner[i]]++] = rows[i];
385   }
386 
387   starts[0] = 0;
388   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
389   count = 0;
390   for (i=0; i<size; i++) {
391     if (nprocs[2*i+1]) {
392       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
393     }
394   }
395   ierr = PetscFree(starts);CHKERRQ(ierr);
396 
397   base = owners[rank];
398 
399   /*  wait on receives */
400   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
401   source = lens + nrecvs;
402   count  = nrecvs; slen = 0;
403   while (count) {
404     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
405     /* unpack receives into our local space */
406     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
407     source[imdex]  = recv_status.MPI_SOURCE;
408     lens[imdex]    = n;
409     slen += n;
410     count--;
411   }
412   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
413 
414   /* move the data into the send scatter */
415   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
416   count = 0;
417   for (i=0; i<nrecvs; i++) {
418     values = rvalues + i*nmax;
419     for (j=0; j<lens[i]; j++) {
420       lrows[count++] = values[j] - base;
421     }
422   }
423   ierr = PetscFree(rvalues);CHKERRQ(ierr);
424   ierr = PetscFree(lens);CHKERRQ(ierr);
425   ierr = PetscFree(owner);CHKERRQ(ierr);
426   ierr = PetscFree(nprocs);CHKERRQ(ierr);
427 
428   /* actually zap the local rows */
429   ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
430   ierr = PetscFree(lrows);CHKERRQ(ierr);
431 
432   /* wait on sends */
433   if (nsends) {
434     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
435     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
436     ierr = PetscFree(send_status);CHKERRQ(ierr);
437   }
438   ierr = PetscFree(send_waits);CHKERRQ(ierr);
439   ierr = PetscFree(svalues);CHKERRQ(ierr);
440 
441   PetscFunctionReturn(0);
442 }
443 
444 #undef __FUNCT__
445 #define __FUNCT__ "MatMult_MPIDense"
446 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
447 {
448   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
449   PetscErrorCode ierr;
450 
451   PetscFunctionBegin;
452   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
453   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
454   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
455   PetscFunctionReturn(0);
456 }
457 
458 #undef __FUNCT__
459 #define __FUNCT__ "MatMultAdd_MPIDense"
460 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
461 {
462   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
463   PetscErrorCode ierr;
464 
465   PetscFunctionBegin;
466   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
467   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
468   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
469   PetscFunctionReturn(0);
470 }
471 
472 #undef __FUNCT__
473 #define __FUNCT__ "MatMultTranspose_MPIDense"
474 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
475 {
476   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
477   PetscErrorCode ierr;
478   PetscScalar    zero = 0.0;
479 
480   PetscFunctionBegin;
481   ierr = VecSet(yy,zero);CHKERRQ(ierr);
482   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
483   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
484   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
485   PetscFunctionReturn(0);
486 }
487 
488 #undef __FUNCT__
489 #define __FUNCT__ "MatMultTransposeAdd_MPIDense"
490 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
491 {
492   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
493   PetscErrorCode ierr;
494 
495   PetscFunctionBegin;
496   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
497   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
498   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
499   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
500   PetscFunctionReturn(0);
501 }
502 
503 #undef __FUNCT__
504 #define __FUNCT__ "MatGetDiagonal_MPIDense"
505 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
506 {
507   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
508   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
509   PetscErrorCode ierr;
510   PetscInt       len,i,n,m = A->rmap->n,radd;
511   PetscScalar    *x,zero = 0.0;
512 
513   PetscFunctionBegin;
514   ierr = VecSet(v,zero);CHKERRQ(ierr);
515   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
516   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
517   if (n != A->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
518   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
519   radd = A->rmap->rstart*m;
520   for (i=0; i<len; i++) {
521     x[i] = aloc->v[radd + i*m + i];
522   }
523   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
524   PetscFunctionReturn(0);
525 }
526 
527 #undef __FUNCT__
528 #define __FUNCT__ "MatDestroy_MPIDense"
529 PetscErrorCode MatDestroy_MPIDense(Mat mat)
530 {
531   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
532   PetscErrorCode ierr;
533 #if defined(PETSC_HAVE_PLAPACK)
534   Mat_Plapack   *lu=(Mat_Plapack*)(mat->spptr);
535 #endif
536 
537   PetscFunctionBegin;
538 
539 #if defined(PETSC_USE_LOG)
540   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
541 #endif
542   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
543   ierr = MatDestroy(mdn->A);CHKERRQ(ierr);
544   if (mdn->lvec)   {ierr = VecDestroy(mdn->lvec);CHKERRQ(ierr);}
545   if (mdn->Mvctx)  {ierr = VecScatterDestroy(mdn->Mvctx);CHKERRQ(ierr);}
546 #if defined(PETSC_HAVE_PLAPACK)
547   if (lu) {
548     ierr = PLA_Obj_free(&lu->A);CHKERRQ(ierr);
549     ierr = PLA_Obj_free (&lu->pivots);CHKERRQ(ierr);
550     ierr = PLA_Temp_free(&lu->templ);CHKERRQ(ierr);
551 
552     if (lu->is_pla) {
553       ierr = ISDestroy(lu->is_pla);CHKERRQ(ierr);
554       ierr = ISDestroy(lu->is_petsc);CHKERRQ(ierr);
555       ierr = VecScatterDestroy(lu->ctx);CHKERRQ(ierr);
556     }
557   }
558 #endif
559 
560   ierr = PetscFree(mdn);CHKERRQ(ierr);
561   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
562   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
563   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
564   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
565   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
566   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
567   PetscFunctionReturn(0);
568 }
569 
570 #undef __FUNCT__
571 #define __FUNCT__ "MatView_MPIDense_Binary"
572 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
573 {
574   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
575   PetscErrorCode    ierr;
576   PetscViewerFormat format;
577   int               fd;
578   PetscInt          header[4],mmax,N = mat->cmap->N,i,j,m,k;
579   PetscMPIInt       rank,tag  = ((PetscObject)viewer)->tag,size;
580   PetscScalar       *work,*v,*vv;
581   Mat_SeqDense      *a = (Mat_SeqDense*)mdn->A->data;
582   MPI_Status        status;
583 
584   PetscFunctionBegin;
585   if (mdn->size == 1) {
586     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
587   } else {
588     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
589     ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
590     ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
591 
592     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
593     if (format == PETSC_VIEWER_NATIVE) {
594 
595       if (!rank) {
596         /* store the matrix as a dense matrix */
597         header[0] = MAT_FILE_COOKIE;
598         header[1] = mat->rmap->N;
599         header[2] = N;
600         header[3] = MATRIX_BINARY_FORMAT_DENSE;
601         ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
602 
603         /* get largest work array needed for transposing array */
604         mmax = mat->rmap->n;
605         for (i=1; i<size; i++) {
606           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
607         }
608         ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&work);CHKERRQ(ierr);
609 
610         /* write out local array, by rows */
611         m    = mat->rmap->n;
612         v    = a->v;
613         for (j=0; j<N; j++) {
614           for (i=0; i<m; i++) {
615             work[j + i*N] = *v++;
616           }
617         }
618         ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
619         /* get largest work array to receive messages from other processes, excludes process zero */
620         mmax = 0;
621         for (i=1; i<size; i++) {
622           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
623         }
624         ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&vv);CHKERRQ(ierr);
625         for(k = 1; k < size; k++) {
626           v    = vv;
627           m    = mat->rmap->range[k+1] - mat->rmap->range[k];
628           ierr = MPI_Recv(v,m*N,MPIU_SCALAR,k,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
629 
630           for(j = 0; j < N; j++) {
631             for(i = 0; i < m; i++) {
632               work[j + i*N] = *v++;
633             }
634           }
635           ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
636         }
637         ierr = PetscFree(work);CHKERRQ(ierr);
638         ierr = PetscFree(vv);CHKERRQ(ierr);
639       } else {
640         ierr = MPI_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
641       }
642     } else {
643       SETERRQ(PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerSetFormat(viewer,PETSC_VIEWER_NATIVE");
644     }
645   }
646   PetscFunctionReturn(0);
647 }
648 
649 #undef __FUNCT__
650 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket"
651 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
652 {
653   Mat_MPIDense          *mdn = (Mat_MPIDense*)mat->data;
654   PetscErrorCode        ierr;
655   PetscMPIInt           size = mdn->size,rank = mdn->rank;
656   const PetscViewerType vtype;
657   PetscTruth            iascii,isdraw;
658   PetscViewer           sviewer;
659   PetscViewerFormat     format;
660 #if defined(PETSC_HAVE_PLAPACK)
661   Mat_Plapack           *lu=(Mat_Plapack*)(mat->spptr);
662 #endif
663 
664   PetscFunctionBegin;
665   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
666   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
667   if (iascii) {
668     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
669     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
670     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
671       MatInfo info;
672       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
673       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,
674                    (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
675       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
676       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
677 #if defined(PETSC_HAVE_PLAPACK)
678       ierr = PetscViewerASCIIPrintf(viewer,"PLAPACK run parameters:\n");CHKERRQ(ierr);
679       ierr = PetscViewerASCIIPrintf(viewer,"  Processor mesh: nprows %d, npcols %d\n",lu->Plapack_nprows, lu->Plapack_npcols);CHKERRQ(ierr);
680       ierr = PetscViewerASCIIPrintf(viewer,"  Distr. block size nb: %d \n",lu->nb);CHKERRQ(ierr);
681       ierr = PetscViewerASCIIPrintf(viewer,"  Error checking: %d\n",lu->Plapack_ierror);CHKERRQ(ierr);
682       ierr = PetscViewerASCIIPrintf(viewer,"  Algorithmic block size: %d\n",lu->Plapack_nb_alg);CHKERRQ(ierr);
683 #endif
684       PetscFunctionReturn(0);
685     } else if (format == PETSC_VIEWER_ASCII_INFO) {
686       PetscFunctionReturn(0);
687     }
688   } else if (isdraw) {
689     PetscDraw  draw;
690     PetscTruth isnull;
691 
692     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
693     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
694     if (isnull) PetscFunctionReturn(0);
695   }
696 
697   if (size == 1) {
698     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
699   } else {
700     /* assemble the entire matrix onto first processor. */
701     Mat         A;
702     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
703     PetscInt    *cols;
704     PetscScalar *vals;
705 
706     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
707     if (!rank) {
708       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
709     } else {
710       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
711     }
712     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
713     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
714     ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL);
715     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
716 
717     /* Copy the matrix ... This isn't the most efficient means,
718        but it's quick for now */
719     A->insertmode = INSERT_VALUES;
720     row = mat->rmap->rstart; m = mdn->A->rmap->n;
721     for (i=0; i<m; i++) {
722       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
723       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
724       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
725       row++;
726     }
727 
728     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
729     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
730     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
731     if (!rank) {
732       ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
733     }
734     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
735     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
736     ierr = MatDestroy(A);CHKERRQ(ierr);
737   }
738   PetscFunctionReturn(0);
739 }
740 
741 #undef __FUNCT__
742 #define __FUNCT__ "MatView_MPIDense"
743 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
744 {
745   PetscErrorCode ierr;
746   PetscTruth     iascii,isbinary,isdraw,issocket;
747 
748   PetscFunctionBegin;
749 
750   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
751   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
752   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
753   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
754 
755   if (iascii || issocket || isdraw) {
756     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
757   } else if (isbinary) {
758     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
759   } else {
760     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name);
761   }
762   PetscFunctionReturn(0);
763 }
764 
765 #undef __FUNCT__
766 #define __FUNCT__ "MatGetInfo_MPIDense"
767 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
768 {
769   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
770   Mat            mdn = mat->A;
771   PetscErrorCode ierr;
772   PetscReal      isend[5],irecv[5];
773 
774   PetscFunctionBegin;
775   info->block_size     = 1.0;
776   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
777   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
778   isend[3] = info->memory;  isend[4] = info->mallocs;
779   if (flag == MAT_LOCAL) {
780     info->nz_used      = isend[0];
781     info->nz_allocated = isend[1];
782     info->nz_unneeded  = isend[2];
783     info->memory       = isend[3];
784     info->mallocs      = isend[4];
785   } else if (flag == MAT_GLOBAL_MAX) {
786     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr);
787     info->nz_used      = irecv[0];
788     info->nz_allocated = irecv[1];
789     info->nz_unneeded  = irecv[2];
790     info->memory       = irecv[3];
791     info->mallocs      = irecv[4];
792   } else if (flag == MAT_GLOBAL_SUM) {
793     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
794     info->nz_used      = irecv[0];
795     info->nz_allocated = irecv[1];
796     info->nz_unneeded  = irecv[2];
797     info->memory       = irecv[3];
798     info->mallocs      = irecv[4];
799   }
800   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
801   info->fill_ratio_needed = 0;
802   info->factor_mallocs    = 0;
803   PetscFunctionReturn(0);
804 }
805 
806 #undef __FUNCT__
807 #define __FUNCT__ "MatSetOption_MPIDense"
808 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscTruth flg)
809 {
810   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
811   PetscErrorCode ierr;
812 
813   PetscFunctionBegin;
814   switch (op) {
815   case MAT_NEW_NONZERO_LOCATIONS:
816   case MAT_NEW_NONZERO_LOCATION_ERR:
817   case MAT_NEW_NONZERO_ALLOCATION_ERR:
818     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
819     break;
820   case MAT_ROW_ORIENTED:
821     a->roworiented = flg;
822     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
823     break;
824   case MAT_NEW_DIAGONALS:
825   case MAT_USE_HASH_TABLE:
826     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
827     break;
828   case MAT_IGNORE_OFF_PROC_ENTRIES:
829     a->donotstash = flg;
830     break;
831   case MAT_SYMMETRIC:
832   case MAT_STRUCTURALLY_SYMMETRIC:
833   case MAT_HERMITIAN:
834   case MAT_SYMMETRY_ETERNAL:
835   case MAT_IGNORE_LOWER_TRIANGULAR:
836     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
837     break;
838   default:
839     SETERRQ1(PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
840   }
841   PetscFunctionReturn(0);
842 }
843 
844 
845 #undef __FUNCT__
846 #define __FUNCT__ "MatDiagonalScale_MPIDense"
847 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
848 {
849   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
850   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
851   PetscScalar    *l,*r,x,*v;
852   PetscErrorCode ierr;
853   PetscInt       i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n;
854 
855   PetscFunctionBegin;
856   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
857   if (ll) {
858     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
859     if (s2a != s2) SETERRQ2(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
860     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
861     for (i=0; i<m; i++) {
862       x = l[i];
863       v = mat->v + i;
864       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
865     }
866     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
867     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
868   }
869   if (rr) {
870     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
871     if (s3a != s3) SETERRQ2(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
872     ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
873     ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
874     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
875     for (i=0; i<n; i++) {
876       x = r[i];
877       v = mat->v + i*m;
878       for (j=0; j<m; j++) { (*v++) *= x;}
879     }
880     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
881     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
882   }
883   PetscFunctionReturn(0);
884 }
885 
886 #undef __FUNCT__
887 #define __FUNCT__ "MatNorm_MPIDense"
888 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
889 {
890   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
891   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
892   PetscErrorCode ierr;
893   PetscInt       i,j;
894   PetscReal      sum = 0.0;
895   PetscScalar    *v = mat->v;
896 
897   PetscFunctionBegin;
898   if (mdn->size == 1) {
899     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
900   } else {
901     if (type == NORM_FROBENIUS) {
902       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
903 #if defined(PETSC_USE_COMPLEX)
904         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
905 #else
906         sum += (*v)*(*v); v++;
907 #endif
908       }
909       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
910       *nrm = sqrt(*nrm);
911       ierr = PetscLogFlops(2*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr);
912     } else if (type == NORM_1) {
913       PetscReal *tmp,*tmp2;
914       ierr = PetscMalloc(2*A->cmap->N*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
915       tmp2 = tmp + A->cmap->N;
916       ierr = PetscMemzero(tmp,2*A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
917       *nrm = 0.0;
918       v = mat->v;
919       for (j=0; j<mdn->A->cmap->n; j++) {
920         for (i=0; i<mdn->A->rmap->n; i++) {
921           tmp[j] += PetscAbsScalar(*v);  v++;
922         }
923       }
924       ierr = MPI_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
925       for (j=0; j<A->cmap->N; j++) {
926         if (tmp2[j] > *nrm) *nrm = tmp2[j];
927       }
928       ierr = PetscFree(tmp);CHKERRQ(ierr);
929       ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr);
930     } else if (type == NORM_INFINITY) { /* max row norm */
931       PetscReal ntemp;
932       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
933       ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr);
934     } else {
935       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
936     }
937   }
938   PetscFunctionReturn(0);
939 }
940 
941 #undef __FUNCT__
942 #define __FUNCT__ "MatTranspose_MPIDense"
943 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
944 {
945   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
946   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
947   Mat            B;
948   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
949   PetscErrorCode ierr;
950   PetscInt       j,i;
951   PetscScalar    *v;
952 
953   PetscFunctionBegin;
954   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place");
955   if (reuse == MAT_INITIAL_MATRIX || A == *matout) {
956     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
957     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
958     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
959     ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr);
960   } else {
961     B = *matout;
962   }
963 
964   m = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v;
965   ierr = PetscMalloc(m*sizeof(PetscInt),&rwork);CHKERRQ(ierr);
966   for (i=0; i<m; i++) rwork[i] = rstart + i;
967   for (j=0; j<n; j++) {
968     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
969     v   += m;
970   }
971   ierr = PetscFree(rwork);CHKERRQ(ierr);
972   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
973   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
974   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
975     *matout = B;
976   } else {
977     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
978   }
979   PetscFunctionReturn(0);
980 }
981 
982 #include "petscblaslapack.h"
983 #undef __FUNCT__
984 #define __FUNCT__ "MatScale_MPIDense"
985 PetscErrorCode MatScale_MPIDense(Mat inA,PetscScalar alpha)
986 {
987   Mat_MPIDense   *A = (Mat_MPIDense*)inA->data;
988   Mat_SeqDense   *a = (Mat_SeqDense*)A->A->data;
989   PetscScalar    oalpha = alpha;
990   PetscErrorCode ierr;
991   PetscBLASInt   one = 1,nz = PetscBLASIntCast(inA->rmap->n*inA->cmap->N);
992 
993   PetscFunctionBegin;
994   BLASscal_(&nz,&oalpha,a->v,&one);
995   ierr = PetscLogFlops(nz);CHKERRQ(ierr);
996   PetscFunctionReturn(0);
997 }
998 
999 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
1000 
1001 #undef __FUNCT__
1002 #define __FUNCT__ "MatSetUpPreallocation_MPIDense"
1003 PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A)
1004 {
1005   PetscErrorCode ierr;
1006 
1007   PetscFunctionBegin;
1008   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
1009   PetscFunctionReturn(0);
1010 }
1011 
1012 #if defined(PETSC_HAVE_PLAPACK)
1013 
1014 #undef __FUNCT__
1015 #define __FUNCT__ "MatMPIDenseCopyToPlapack"
1016 PetscErrorCode MatMPIDenseCopyToPlapack(Mat A,Mat F)
1017 {
1018   Mat_Plapack    *lu = (Mat_Plapack*)(F)->spptr;
1019   PetscErrorCode ierr;
1020   PetscInt       M=A->cmap->N,m=A->rmap->n,rstart;
1021   PetscScalar    *array;
1022   PetscReal      one = 1.0;
1023 
1024   PetscFunctionBegin;
1025   /* Copy A into F->lu->A */
1026   ierr = PLA_Obj_set_to_zero(lu->A);CHKERRQ(ierr);
1027   ierr = PLA_API_begin();CHKERRQ(ierr);
1028   ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr);
1029   ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr);
1030   ierr = MatGetArray(A,&array);CHKERRQ(ierr);
1031   ierr = PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);CHKERRQ(ierr);
1032   ierr = MatRestoreArray(A,&array);CHKERRQ(ierr);
1033   ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr);
1034   ierr = PLA_API_end();CHKERRQ(ierr);
1035   lu->rstart = rstart;
1036   PetscFunctionReturn(0);
1037 }
1038 
1039 #undef __FUNCT__
1040 #define __FUNCT__ "MatMPIDenseCopyFromPlapack"
1041 PetscErrorCode MatMPIDenseCopyFromPlapack(Mat F,Mat A)
1042 {
1043   Mat_Plapack    *lu = (Mat_Plapack*)(F)->spptr;
1044   PetscErrorCode ierr;
1045   PetscInt       M=A->cmap->N,m=A->rmap->n,rstart;
1046   PetscScalar    *array;
1047   PetscReal      one = 1.0;
1048 
1049   PetscFunctionBegin;
1050   /* Copy F into A->lu->A */
1051   ierr = MatZeroEntries(A);CHKERRQ(ierr);
1052   ierr = PLA_API_begin();CHKERRQ(ierr);
1053   ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr);
1054   ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr);
1055   ierr = MatGetArray(A,&array);CHKERRQ(ierr);
1056   ierr = PLA_API_axpy_global_to_matrix(m,M, &one,lu->A,rstart,0,(void *)array,m);CHKERRQ(ierr);
1057   ierr = MatRestoreArray(A,&array);CHKERRQ(ierr);
1058   ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr);
1059   ierr = PLA_API_end();CHKERRQ(ierr);
1060   lu->rstart = rstart;
1061   PetscFunctionReturn(0);
1062 }
1063 
1064 #undef __FUNCT__
1065 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIDense"
1066 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1067 {
1068   PetscErrorCode ierr;
1069   Mat_Plapack    *luA = (Mat_Plapack*)A->spptr;
1070   Mat_Plapack    *luB = (Mat_Plapack*)B->spptr;
1071   Mat_Plapack    *luC = (Mat_Plapack*)C->spptr;
1072   PLA_Obj        alpha = NULL,beta = NULL;
1073 
1074   PetscFunctionBegin;
1075   ierr = MatMPIDenseCopyToPlapack(A,A);CHKERRQ(ierr);
1076   ierr = MatMPIDenseCopyToPlapack(B,B);CHKERRQ(ierr);
1077 
1078   /*
1079   ierr = PLA_Global_show("A = ",luA->A,"%g ","");CHKERRQ(ierr);
1080   ierr = PLA_Global_show("B = ",luB->A,"%g ","");CHKERRQ(ierr);
1081   */
1082 
1083   /* do the multiply in PLA  */
1084   ierr = PLA_Create_constants_conf_to(luA->A,NULL,NULL,&alpha);CHKERRQ(ierr);
1085   ierr = PLA_Create_constants_conf_to(luC->A,NULL,&beta,NULL);CHKERRQ(ierr);
1086   CHKMEMQ;
1087 
1088   ierr = PLA_Gemm(PLA_NO_TRANSPOSE,PLA_NO_TRANSPOSE,alpha,luA->A,luB->A,beta,luC->A); /* CHKERRQ(ierr); */
1089   CHKMEMQ;
1090   ierr = PLA_Obj_free(&alpha);CHKERRQ(ierr);
1091   ierr = PLA_Obj_free(&beta);CHKERRQ(ierr);
1092 
1093   /*
1094   ierr = PLA_Global_show("C = ",luC->A,"%g ","");CHKERRQ(ierr);
1095   */
1096   ierr = MatMPIDenseCopyFromPlapack(C,C);CHKERRQ(ierr);
1097   PetscFunctionReturn(0);
1098 }
1099 
1100 #undef __FUNCT__
1101 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIDense"
1102 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1103 {
1104   PetscErrorCode ierr;
1105   PetscInt       m=A->rmap->n,n=B->cmap->n;
1106   Mat            Cmat;
1107 
1108   PetscFunctionBegin;
1109   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
1110   SETERRQ(PETSC_ERR_LIB,"Due to aparent bugs in PLAPACK,this is not currently supported");
1111   ierr = MatCreate(((PetscObject)B)->comm,&Cmat);CHKERRQ(ierr);
1112   ierr = MatSetSizes(Cmat,m,n,A->rmap->N,B->cmap->N);CHKERRQ(ierr);
1113   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
1114   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1115   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1116 
1117   *C = Cmat;
1118   PetscFunctionReturn(0);
1119 }
1120 
1121 #undef __FUNCT__
1122 #define __FUNCT__ "MatMatMult_MPIDense_MPIDense"
1123 PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
1124 {
1125   PetscErrorCode ierr;
1126 
1127   PetscFunctionBegin;
1128   if (scall == MAT_INITIAL_MATRIX){
1129     ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr);
1130   }
1131   ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr);
1132   PetscFunctionReturn(0);
1133 }
1134 
1135 #undef __FUNCT__
1136 #define __FUNCT__ "MatSolve_MPIDense"
1137 PetscErrorCode MatSolve_MPIDense(Mat A,Vec b,Vec x)
1138 {
1139   MPI_Comm       comm = ((PetscObject)A)->comm;
1140   Mat_Plapack    *lu = (Mat_Plapack*)A->spptr;
1141   PetscErrorCode ierr;
1142   PetscInt       M=A->rmap->N,m=A->rmap->n,rstart,i,j,*idx_pla,*idx_petsc,loc_m,loc_stride;
1143   PetscScalar    *array;
1144   PetscReal      one = 1.0;
1145   PetscMPIInt    size,rank,r_rank,r_nproc,c_rank,c_nproc;;
1146   PLA_Obj        v_pla = NULL;
1147   PetscScalar    *loc_buf;
1148   Vec            loc_x;
1149 
1150   PetscFunctionBegin;
1151   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1152   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1153 
1154   /* Create PLAPACK vector objects, then copy b into PLAPACK b */
1155   PLA_Mvector_create(lu->datatype,M,1,lu->templ,PLA_ALIGN_FIRST,&v_pla);
1156   PLA_Obj_set_to_zero(v_pla);
1157 
1158   /* Copy b into rhs_pla */
1159   PLA_API_begin();
1160   PLA_Obj_API_open(v_pla);
1161   ierr = VecGetArray(b,&array);CHKERRQ(ierr);
1162   PLA_API_axpy_vector_to_global(m,&one,(void *)array,1,v_pla,lu->rstart);
1163   ierr = VecRestoreArray(b,&array);CHKERRQ(ierr);
1164   PLA_Obj_API_close(v_pla);
1165   PLA_API_end();
1166 
1167   if (A->factor == MAT_FACTOR_LU){
1168     /* Apply the permutations to the right hand sides */
1169     PLA_Apply_pivots_to_rows (v_pla,lu->pivots);
1170 
1171     /* Solve L y = b, overwriting b with y */
1172     PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_UNIT_DIAG,lu->A,v_pla );
1173 
1174     /* Solve U x = y (=b), overwriting b with x */
1175     PLA_Trsv( PLA_UPPER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla );
1176   } else { /*MAT_FACTOR_CHOLESKY */
1177     PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla);
1178     PLA_Trsv( PLA_LOWER_TRIANGULAR,(lu->datatype == MPI_DOUBLE ? PLA_TRANSPOSE : PLA_CONJUGATE_TRANSPOSE),
1179                                     PLA_NONUNIT_DIAG,lu->A,v_pla);
1180   }
1181 
1182   /* Copy PLAPACK x into Petsc vector x  */
1183   PLA_Obj_local_length(v_pla, &loc_m);
1184   PLA_Obj_local_buffer(v_pla, (void**)&loc_buf);
1185   PLA_Obj_local_stride(v_pla, &loc_stride);
1186   /*
1187     PetscPrintf(PETSC_COMM_SELF," [%d] b - local_m %d local_stride %d, loc_buf: %g %g, nb: %d\n",rank,loc_m,loc_stride,loc_buf[0],loc_buf[(loc_m-1)*loc_stride],lu->nb);
1188   */
1189   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,loc_m*loc_stride,loc_buf,&loc_x);CHKERRQ(ierr);
1190   if (!lu->pla_solved){
1191 
1192     PLA_Temp_comm_row_info(lu->templ,&lu->comm_2d,&r_rank,&r_nproc);
1193     PLA_Temp_comm_col_info(lu->templ,&lu->comm_2d,&c_rank,&c_nproc);
1194     /* printf(" [%d] rank: %d %d, nproc: %d %d\n",rank,r_rank,c_rank,r_nproc,c_nproc); */
1195 
1196     /* Create IS and cts for VecScatterring */
1197     PLA_Obj_local_length(v_pla, &loc_m);
1198     PLA_Obj_local_stride(v_pla, &loc_stride);
1199     ierr = PetscMalloc((2*loc_m+1)*sizeof(PetscInt),&idx_pla);CHKERRQ(ierr);
1200     idx_petsc = idx_pla + loc_m;
1201 
1202     rstart = (r_rank*c_nproc+c_rank)*lu->nb;
1203     for (i=0; i<loc_m; i+=lu->nb){
1204       j = 0;
1205       while (j < lu->nb && i+j < loc_m){
1206         idx_petsc[i+j] = rstart + j; j++;
1207       }
1208       rstart += size*lu->nb;
1209     }
1210 
1211     for (i=0; i<loc_m; i++) idx_pla[i] = i*loc_stride;
1212 
1213     ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_pla,&lu->is_pla);CHKERRQ(ierr);
1214     ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_petsc,&lu->is_petsc);CHKERRQ(ierr);
1215     ierr = PetscFree(idx_pla);CHKERRQ(ierr);
1216     ierr = VecScatterCreate(loc_x,lu->is_pla,x,lu->is_petsc,&lu->ctx);CHKERRQ(ierr);
1217   }
1218   ierr = VecScatterBegin(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1219   ierr = VecScatterEnd(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1220 
1221   /* Free data */
1222   ierr = VecDestroy(loc_x);CHKERRQ(ierr);
1223   PLA_Obj_free(&v_pla);
1224 
1225   lu->pla_solved = PETSC_TRUE;
1226   PetscFunctionReturn(0);
1227 }
1228 
1229 #undef __FUNCT__
1230 #define __FUNCT__ "MatLUFactorNumeric_MPIDense"
1231 PetscErrorCode MatLUFactorNumeric_MPIDense(Mat F,Mat A,const MatFactorInfo *info)
1232 {
1233   Mat_Plapack    *lu = (Mat_Plapack*)(F)->spptr;
1234   PetscErrorCode ierr;
1235   PetscInt       M=A->rmap->N,m=A->rmap->n,rstart,rend;
1236   PetscInt       info_pla=0;
1237   PetscScalar    *array,one = 1.0;
1238 
1239   PetscFunctionBegin;
1240   if (lu->mstruct == SAME_NONZERO_PATTERN){
1241     PLA_Obj_free(&lu->A);
1242     PLA_Obj_free (&lu->pivots);
1243   }
1244   /* Create PLAPACK matrix object */
1245   lu->A = NULL; lu->pivots = NULL;
1246   PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);
1247   PLA_Obj_set_to_zero(lu->A);
1248   PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots);
1249 
1250   /* Copy A into lu->A */
1251   PLA_API_begin();
1252   PLA_Obj_API_open(lu->A);
1253   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
1254   ierr = MatGetArray(A,&array);CHKERRQ(ierr);
1255   PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);
1256   ierr = MatRestoreArray(A,&array);CHKERRQ(ierr);
1257   PLA_Obj_API_close(lu->A);
1258   PLA_API_end();
1259 
1260   /* Factor P A -> L U overwriting lower triangular portion of A with L, upper, U */
1261   info_pla = PLA_LU(lu->A,lu->pivots);
1262   if (info_pla != 0)
1263     SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot encountered at row %d from PLA_LU()",info_pla);
1264 
1265   lu->CleanUpPlapack = PETSC_TRUE;
1266   lu->rstart         = rstart;
1267   lu->mstruct        = SAME_NONZERO_PATTERN;
1268   F->ops->solve      = MatSolve_MPIDense;
1269   F->assembled       = PETSC_TRUE;  /* required by -ksp_view */
1270   PetscFunctionReturn(0);
1271 }
1272 
1273 #undef __FUNCT__
1274 #define __FUNCT__ "MatCholeskyFactorNumeric_MPIDense"
1275 PetscErrorCode MatCholeskyFactorNumeric_MPIDense(Mat F,Mat A,const MatFactorInfo *info)
1276 {
1277   Mat_Plapack    *lu = (Mat_Plapack*)F->spptr;
1278   PetscErrorCode ierr;
1279   PetscInt       M=A->rmap->N,m=A->rmap->n,rstart,rend;
1280   PetscInt       info_pla=0;
1281   PetscScalar    *array,one = 1.0;
1282 
1283   PetscFunctionBegin;
1284   if (lu->mstruct == SAME_NONZERO_PATTERN){
1285     PLA_Obj_free(&lu->A);
1286   }
1287   /* Create PLAPACK matrix object */
1288   lu->A      = NULL;
1289   lu->pivots = NULL;
1290   PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);
1291 
1292   /* Copy A into lu->A */
1293   PLA_API_begin();
1294   PLA_Obj_API_open(lu->A);
1295   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
1296   ierr = MatGetArray(A,&array);CHKERRQ(ierr);
1297   PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);
1298   ierr = MatRestoreArray(A,&array);CHKERRQ(ierr);
1299   PLA_Obj_API_close(lu->A);
1300   PLA_API_end();
1301 
1302   /* Factor P A -> Chol */
1303   info_pla = PLA_Chol(PLA_LOWER_TRIANGULAR,lu->A);
1304   if (info_pla != 0)
1305     SETERRQ1( PETSC_ERR_MAT_CH_ZRPVT,"Nonpositive definite matrix detected at row %d from PLA_Chol()",info_pla);
1306 
1307   lu->CleanUpPlapack = PETSC_TRUE;
1308   lu->rstart         = rstart;
1309   lu->mstruct        = SAME_NONZERO_PATTERN;
1310   F->ops->solve      = MatSolve_MPIDense;
1311   F->assembled       = PETSC_TRUE;  /* required by -ksp_view */
1312   PetscFunctionReturn(0);
1313 }
1314 
1315 #undef __FUNCT__
1316 #define __FUNCT__ "MatFactorSymbolic_Plapack_Private"
1317 PetscErrorCode MatFactorSymbolic_Plapack_Private(Mat F,Mat A,const MatFactorInfo *info)
1318 {
1319   Mat            B = F;
1320   Mat_Plapack    *lu;
1321   PetscErrorCode ierr;
1322   PetscInt       M=A->rmap->N;
1323   MPI_Comm       comm=((PetscObject)A)->comm,comm_2d;
1324   PetscMPIInt    size;
1325   PetscInt       ierror;
1326 
1327   PetscFunctionBegin;
1328   lu = (Mat_Plapack*)(B->spptr);
1329 
1330   /* Set default Plapack parameters */
1331   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1332   lu->nprows = 1; lu->npcols = size;
1333   ierror = 0;
1334   lu->nb     = M/size;
1335   if (M - lu->nb*size) lu->nb++; /* without cyclic distribution */
1336 
1337   /* Set runtime options */
1338   ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"PLAPACK Options","Mat");CHKERRQ(ierr);
1339   ierr = PetscOptionsInt("-mat_plapack_nprows","row dimension of 2D processor mesh","None",lu->nprows,&lu->nprows,PETSC_NULL);CHKERRQ(ierr);
1340   ierr = PetscOptionsInt("-mat_plapack_npcols","column dimension of 2D processor mesh","None",lu->npcols,&lu->npcols,PETSC_NULL);CHKERRQ(ierr);
1341 
1342   ierr = PetscOptionsInt("-mat_plapack_nb","block size of template vector","None",lu->nb,&lu->nb,PETSC_NULL);CHKERRQ(ierr);
1343   ierr = PetscOptionsInt("-mat_plapack_ckerror","error checking flag","None",ierror,&ierror,PETSC_NULL);CHKERRQ(ierr);
1344   if (ierror){
1345     PLA_Set_error_checking(ierror,PETSC_TRUE,PETSC_TRUE,PETSC_FALSE );
1346   } else {
1347     PLA_Set_error_checking(ierror,PETSC_FALSE,PETSC_FALSE,PETSC_FALSE );
1348   }
1349   lu->ierror = ierror;
1350 
1351   lu->nb_alg = 0;
1352   ierr = PetscOptionsInt("-mat_plapack_nb_alg","algorithmic block size","None",lu->nb_alg,&lu->nb_alg,PETSC_NULL);CHKERRQ(ierr);
1353   if (lu->nb_alg){
1354     pla_Environ_set_nb_alg (PLA_OP_ALL_ALG,lu->nb_alg);
1355   }
1356   PetscOptionsEnd();
1357 
1358 
1359   /* Create a 2D communicator */
1360   PLA_Comm_1D_to_2D(comm,lu->nprows,lu->npcols,&comm_2d);
1361   lu->comm_2d = comm_2d;
1362 
1363   /* Create object distribution template */
1364   lu->templ = NULL;
1365   PLA_Temp_create(lu->nb, 0, &lu->templ);
1366 
1367   /* Use suggested nb_alg if it is not provided by user */
1368   if (lu->nb_alg == 0){
1369     PLA_Environ_nb_alg(PLA_OP_PAN_PAN,lu->templ,&lu->nb_alg);
1370     pla_Environ_set_nb_alg(PLA_OP_ALL_ALG,lu->nb_alg);
1371   }
1372 
1373   /* Set the datatype */
1374 #if defined(PETSC_USE_COMPLEX)
1375   lu->datatype = MPI_DOUBLE_COMPLEX;
1376 #else
1377   lu->datatype = MPI_DOUBLE;
1378 #endif
1379 
1380   lu->pla_solved     = PETSC_FALSE; /* MatSolve_Plapack() is called yet */
1381   lu->mstruct        = DIFFERENT_NONZERO_PATTERN;
1382   lu->CleanUpPlapack = PETSC_TRUE;
1383   PetscFunctionReturn(0);
1384 }
1385 
1386 /* Note the Petsc perm permutation is ignored */
1387 #undef __FUNCT__
1388 #define __FUNCT__ "MatCholeskyFactorSymbolic_MPIDense"
1389 PetscErrorCode MatCholeskyFactorSymbolic_MPIDense(Mat F,Mat A,IS perm,const MatFactorInfo *info)
1390 {
1391   PetscErrorCode ierr;
1392   PetscTruth     issymmetric,set;
1393 
1394   PetscFunctionBegin;
1395   ierr = MatIsSymmetricKnown(A,&set,&issymmetric); CHKERRQ(ierr);
1396   if (!set || !issymmetric) SETERRQ(PETSC_ERR_USER,"Matrix must be set as MAT_SYMMETRIC for CholeskyFactor()");
1397   ierr = MatFactorSymbolic_Plapack_Private(F,A,info);CHKERRQ(ierr);
1398   F->ops->choleskyfactornumeric  = MatCholeskyFactorNumeric_MPIDense;
1399   PetscFunctionReturn(0);
1400 }
1401 
1402 /* Note the Petsc r and c permutations are ignored */
1403 #undef __FUNCT__
1404 #define __FUNCT__ "MatLUFactorSymbolic_MPIDense"
1405 PetscErrorCode MatLUFactorSymbolic_MPIDense(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
1406 {
1407   PetscErrorCode ierr;
1408   PetscInt       M = A->rmap->N;
1409   Mat_Plapack    *lu;
1410 
1411   PetscFunctionBegin;
1412   ierr = MatFactorSymbolic_Plapack_Private(F,A,info);CHKERRQ(ierr);
1413   lu = (Mat_Plapack*)F->spptr;
1414   ierr = PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots);CHKERRQ(ierr);
1415   F->ops->lufactornumeric  = MatLUFactorNumeric_MPIDense;
1416   PetscFunctionReturn(0);
1417 }
1418 
1419 #undef __FUNCT__
1420 #define __FUNCT__ "MatGetFactor_mpidense_petsc"
1421 PetscErrorCode MatGetFactor_mpidense_petsc(Mat A,MatFactorType ftype,Mat *F)
1422 {
1423   PetscErrorCode ierr;
1424   Mat_Plapack    *lu;
1425   PetscMPIInt    size;
1426   PetscInt       M=A->rmap->N;
1427 
1428   PetscFunctionBegin;
1429   /* Create the factorization matrix */
1430   ierr = MatCreate(((PetscObject)A)->comm,F);CHKERRQ(ierr);
1431   ierr = MatSetSizes(*F,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1432   ierr = MatSetType(*F,((PetscObject)A)->type_name);CHKERRQ(ierr);
1433   ierr = PetscNewLog(*F,Mat_Plapack,&lu);CHKERRQ(ierr);
1434   (*F)->spptr = (void*)lu;
1435 
1436   /* Set default Plapack parameters */
1437   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
1438   lu->nb     = M/size;
1439   if (M - lu->nb*size) lu->nb++; /* without cyclic distribution */
1440 
1441   /* Set runtime options */
1442   ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"PLAPACK Options","Mat");CHKERRQ(ierr);
1443     ierr = PetscOptionsInt("-mat_plapack_nb","block size of template vector","None",lu->nb,&lu->nb,PETSC_NULL);CHKERRQ(ierr);
1444   PetscOptionsEnd();
1445 
1446   /* Create object distribution template */
1447   lu->templ = NULL;
1448   ierr = PLA_Temp_create(lu->nb, 0, &lu->templ);CHKERRQ(ierr);
1449 
1450   /* Set the datatype */
1451 #if defined(PETSC_USE_COMPLEX)
1452   lu->datatype = MPI_DOUBLE_COMPLEX;
1453 #else
1454   lu->datatype = MPI_DOUBLE;
1455 #endif
1456 
1457   ierr = PLA_Matrix_create(lu->datatype,M,A->cmap->N,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);CHKERRQ(ierr);
1458 
1459 
1460   lu->pla_solved     = PETSC_FALSE; /* MatSolve_Plapack() is called yet */
1461 
1462   if (ftype == MAT_FACTOR_LU) {
1463     (*F)->ops->lufactorsymbolic = MatLUFactorSymbolic_MPIDense;
1464   } else if (ftype == MAT_FACTOR_CHOLESKY) {
1465     (*F)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MPIDense;
1466   } else SETERRQ(PETSC_ERR_SUP,"No incomplete factorizations for dense matrices");
1467   (*F)->factor = ftype;
1468   PetscFunctionReturn(0);
1469 }
1470 #endif
1471 
1472 /* -------------------------------------------------------------------*/
1473 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
1474        MatGetRow_MPIDense,
1475        MatRestoreRow_MPIDense,
1476        MatMult_MPIDense,
1477 /* 4*/ MatMultAdd_MPIDense,
1478        MatMultTranspose_MPIDense,
1479        MatMultTransposeAdd_MPIDense,
1480        0,
1481        0,
1482        0,
1483 /*10*/ 0,
1484        0,
1485        0,
1486        0,
1487        MatTranspose_MPIDense,
1488 /*15*/ MatGetInfo_MPIDense,
1489        MatEqual_MPIDense,
1490        MatGetDiagonal_MPIDense,
1491        MatDiagonalScale_MPIDense,
1492        MatNorm_MPIDense,
1493 /*20*/ MatAssemblyBegin_MPIDense,
1494        MatAssemblyEnd_MPIDense,
1495        0,
1496        MatSetOption_MPIDense,
1497        MatZeroEntries_MPIDense,
1498 /*25*/ MatZeroRows_MPIDense,
1499        0,
1500        0,
1501        0,
1502        0,
1503 /*30*/ MatSetUpPreallocation_MPIDense,
1504        0,
1505        0,
1506        MatGetArray_MPIDense,
1507        MatRestoreArray_MPIDense,
1508 /*35*/ MatDuplicate_MPIDense,
1509        0,
1510        0,
1511        0,
1512        0,
1513 /*40*/ 0,
1514        MatGetSubMatrices_MPIDense,
1515        0,
1516        MatGetValues_MPIDense,
1517        0,
1518 /*45*/ 0,
1519        MatScale_MPIDense,
1520        0,
1521        0,
1522        0,
1523 /*50*/ 0,
1524        0,
1525        0,
1526        0,
1527        0,
1528 /*55*/ 0,
1529        0,
1530        0,
1531        0,
1532        0,
1533 /*60*/ MatGetSubMatrix_MPIDense,
1534        MatDestroy_MPIDense,
1535        MatView_MPIDense,
1536        0,
1537        0,
1538 /*65*/ 0,
1539        0,
1540        0,
1541        0,
1542        0,
1543 /*70*/ 0,
1544        0,
1545        0,
1546        0,
1547        0,
1548 /*75*/ 0,
1549        0,
1550        0,
1551        0,
1552        0,
1553 /*80*/ 0,
1554        0,
1555        0,
1556        0,
1557 /*84*/ MatLoad_MPIDense,
1558        0,
1559        0,
1560        0,
1561        0,
1562        0,
1563 /*90*/
1564 #if defined(PETSC_HAVE_PLAPACK)
1565        MatMatMult_MPIDense_MPIDense,
1566        MatMatMultSymbolic_MPIDense_MPIDense,
1567        MatMatMultNumeric_MPIDense_MPIDense,
1568 #else
1569        0,
1570        0,
1571        0,
1572 #endif
1573        0,
1574 /*95*/ 0,
1575        0,
1576        0,
1577        0};
1578 
1579 EXTERN_C_BEGIN
1580 #undef __FUNCT__
1581 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense"
1582 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1583 {
1584   Mat_MPIDense   *a;
1585   PetscErrorCode ierr;
1586 
1587   PetscFunctionBegin;
1588   mat->preallocated = PETSC_TRUE;
1589   /* Note:  For now, when data is specified above, this assumes the user correctly
1590    allocates the local dense storage space.  We should add error checking. */
1591 
1592   a    = (Mat_MPIDense*)mat->data;
1593   ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1594   ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr);
1595   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
1596   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1597   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1598   PetscFunctionReturn(0);
1599 }
1600 EXTERN_C_END
1601 
1602 /*MC
1603    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1604 
1605    Options Database Keys:
1606 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions()
1607 
1608   Level: beginner
1609 
1610   MATMPIDENSE matrices may use direct solvers (LU, Cholesky, and QR)
1611   for parallel dense matrices via the external package PLAPACK, if PLAPACK is installed
1612   (run config/configure.py with the option --download-plapack)
1613 
1614 
1615   Options Database Keys:
1616 . -mat_plapack_nprows <n> - number of rows in processor partition
1617 . -mat_plapack_npcols <n> - number of columns in processor partition
1618 . -mat_plapack_nb <n> - block size of template vector
1619 . -mat_plapack_nb_alg <n> - algorithmic block size
1620 - -mat_plapack_ckerror <n> - error checking flag
1621 
1622 .seealso: MatCreateMPIDense(), MATDENSE, MATSEQDENSE
1623 M*/
1624 
1625 EXTERN_C_BEGIN
1626 #undef __FUNCT__
1627 #define __FUNCT__ "MatCreate_MPIDense"
1628 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIDense(Mat mat)
1629 {
1630   Mat_MPIDense   *a;
1631   PetscErrorCode ierr;
1632 
1633   PetscFunctionBegin;
1634   ierr              = PetscNewLog(mat,Mat_MPIDense,&a);CHKERRQ(ierr);
1635   mat->data         = (void*)a;
1636   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1637   mat->mapping      = 0;
1638 
1639   mat->insertmode = NOT_SET_VALUES;
1640   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&a->rank);CHKERRQ(ierr);
1641   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&a->size);CHKERRQ(ierr);
1642 
1643   mat->rmap->bs = mat->cmap->bs = 1;
1644   ierr = PetscMapSetUp(mat->rmap);CHKERRQ(ierr);
1645   ierr = PetscMapSetUp(mat->cmap);CHKERRQ(ierr);
1646   a->nvec = mat->cmap->n;
1647 
1648   /* build cache for off array entries formed */
1649   a->donotstash = PETSC_FALSE;
1650   ierr = MatStashCreate_Private(((PetscObject)mat)->comm,1,&mat->stash);CHKERRQ(ierr);
1651 
1652   /* stuff used for matrix vector multiply */
1653   a->lvec        = 0;
1654   a->Mvctx       = 0;
1655   a->roworiented = PETSC_TRUE;
1656 
1657   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
1658                                      "MatGetDiagonalBlock_MPIDense",
1659                                      MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr);
1660   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C",
1661                                      "MatMPIDenseSetPreallocation_MPIDense",
1662                                      MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1663   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",
1664                                      "MatMatMult_MPIAIJ_MPIDense",
1665                                       MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1666   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",
1667                                      "MatMatMultSymbolic_MPIAIJ_MPIDense",
1668                                       MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1669   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",
1670                                      "MatMatMultNumeric_MPIAIJ_MPIDense",
1671                                       MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1672 #if defined(PETSC_HAVE_PLAPACK)
1673   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetFactor_mpidense_petsc_C",
1674                                      "MatGetFactor_mpidense_petsc",
1675                                       MatGetFactor_mpidense_petsc);CHKERRQ(ierr);
1676 #if defined(PETSC_HAVE_PLAPACK)
1677   ierr = PetscPLAPACKInitializePackage(((PetscObject)mat)->comm);CHKERRQ(ierr);
1678 #endif
1679 
1680 #endif
1681   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1682 
1683   PetscFunctionReturn(0);
1684 }
1685 EXTERN_C_END
1686 
1687 /*MC
1688    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1689 
1690    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1691    and MATMPIDENSE otherwise.
1692 
1693    Options Database Keys:
1694 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1695 
1696   Level: beginner
1697 
1698 
1699 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1700 M*/
1701 
1702 EXTERN_C_BEGIN
1703 #undef __FUNCT__
1704 #define __FUNCT__ "MatCreate_Dense"
1705 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_Dense(Mat A)
1706 {
1707   PetscErrorCode ierr;
1708   PetscMPIInt    size;
1709 
1710   PetscFunctionBegin;
1711   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
1712   if (size == 1) {
1713     ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr);
1714   } else {
1715     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
1716   }
1717   PetscFunctionReturn(0);
1718 }
1719 EXTERN_C_END
1720 
1721 #undef __FUNCT__
1722 #define __FUNCT__ "MatMPIDenseSetPreallocation"
1723 /*@C
1724    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1725 
1726    Not collective
1727 
1728    Input Parameters:
1729 .  A - the matrix
1730 -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
1731    to control all matrix memory allocation.
1732 
1733    Notes:
1734    The dense format is fully compatible with standard Fortran 77
1735    storage by columns.
1736 
1737    The data input variable is intended primarily for Fortran programmers
1738    who wish to allocate their own matrix memory space.  Most users should
1739    set data=PETSC_NULL.
1740 
1741    Level: intermediate
1742 
1743 .keywords: matrix,dense, parallel
1744 
1745 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1746 @*/
1747 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data)
1748 {
1749   PetscErrorCode ierr,(*f)(Mat,PetscScalar *);
1750 
1751   PetscFunctionBegin;
1752   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1753   if (f) {
1754     ierr = (*f)(mat,data);CHKERRQ(ierr);
1755   }
1756   PetscFunctionReturn(0);
1757 }
1758 
1759 #undef __FUNCT__
1760 #define __FUNCT__ "MatCreateMPIDense"
1761 /*@C
1762    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
1763 
1764    Collective on MPI_Comm
1765 
1766    Input Parameters:
1767 +  comm - MPI communicator
1768 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1769 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1770 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1771 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1772 -  data - optional location of matrix data.  Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1773    to control all matrix memory allocation.
1774 
1775    Output Parameter:
1776 .  A - the matrix
1777 
1778    Notes:
1779    The dense format is fully compatible with standard Fortran 77
1780    storage by columns.
1781 
1782    The data input variable is intended primarily for Fortran programmers
1783    who wish to allocate their own matrix memory space.  Most users should
1784    set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users).
1785 
1786    The user MUST specify either the local or global matrix dimensions
1787    (possibly both).
1788 
1789    Level: intermediate
1790 
1791 .keywords: matrix,dense, parallel
1792 
1793 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1794 @*/
1795 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1796 {
1797   PetscErrorCode ierr;
1798   PetscMPIInt    size;
1799 
1800   PetscFunctionBegin;
1801   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1802   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1803   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1804   if (size > 1) {
1805     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1806     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1807   } else {
1808     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1809     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1810   }
1811   PetscFunctionReturn(0);
1812 }
1813 
1814 #undef __FUNCT__
1815 #define __FUNCT__ "MatDuplicate_MPIDense"
1816 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1817 {
1818   Mat            mat;
1819   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
1820   PetscErrorCode ierr;
1821 
1822   PetscFunctionBegin;
1823   *newmat       = 0;
1824   ierr = MatCreate(((PetscObject)A)->comm,&mat);CHKERRQ(ierr);
1825   ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1826   ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1827   a                 = (Mat_MPIDense*)mat->data;
1828   ierr              = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1829   mat->factor       = A->factor;
1830   mat->assembled    = PETSC_TRUE;
1831   mat->preallocated = PETSC_TRUE;
1832 
1833   mat->rmap->rstart     = A->rmap->rstart;
1834   mat->rmap->rend       = A->rmap->rend;
1835   a->size              = oldmat->size;
1836   a->rank              = oldmat->rank;
1837   mat->insertmode      = NOT_SET_VALUES;
1838   a->nvec              = oldmat->nvec;
1839   a->donotstash        = oldmat->donotstash;
1840 
1841   ierr = PetscMemcpy(mat->rmap->range,A->rmap->range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
1842   ierr = PetscMemcpy(mat->cmap->range,A->cmap->range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
1843   ierr = MatStashCreate_Private(((PetscObject)A)->comm,1,&mat->stash);CHKERRQ(ierr);
1844 
1845   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1846   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1847   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1848 
1849   *newmat = mat;
1850   PetscFunctionReturn(0);
1851 }
1852 
1853 #include "petscsys.h"
1854 
1855 #undef __FUNCT__
1856 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile"
1857 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N, const MatType type,Mat *newmat)
1858 {
1859   PetscErrorCode ierr;
1860   PetscMPIInt    rank,size;
1861   PetscInt       *rowners,i,m,nz,j;
1862   PetscScalar    *array,*vals,*vals_ptr;
1863   MPI_Status     status;
1864 
1865   PetscFunctionBegin;
1866   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1867   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1868 
1869   /* determine ownership of all rows */
1870   m          = M/size + ((M % size) > rank);
1871   ierr       = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1872   ierr       = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
1873   rowners[0] = 0;
1874   for (i=2; i<=size; i++) {
1875     rowners[i] += rowners[i-1];
1876   }
1877 
1878   ierr = MatCreate(comm,newmat);CHKERRQ(ierr);
1879   ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1880   ierr = MatSetType(*newmat,type);CHKERRQ(ierr);
1881   ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr);
1882   ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr);
1883 
1884   if (!rank) {
1885     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1886 
1887     /* read in my part of the matrix numerical values  */
1888     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1889 
1890     /* insert into matrix-by row (this is why cannot directly read into array */
1891     vals_ptr = vals;
1892     for (i=0; i<m; i++) {
1893       for (j=0; j<N; j++) {
1894         array[i + j*m] = *vals_ptr++;
1895       }
1896     }
1897 
1898     /* read in other processors and ship out */
1899     for (i=1; i<size; i++) {
1900       nz   = (rowners[i+1] - rowners[i])*N;
1901       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1902       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(*newmat))->tag,comm);CHKERRQ(ierr);
1903     }
1904   } else {
1905     /* receive numeric values */
1906     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1907 
1908     /* receive message of values*/
1909     ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(*newmat))->tag,comm,&status);CHKERRQ(ierr);
1910 
1911     /* insert into matrix-by row (this is why cannot directly read into array */
1912     vals_ptr = vals;
1913     for (i=0; i<m; i++) {
1914       for (j=0; j<N; j++) {
1915         array[i + j*m] = *vals_ptr++;
1916       }
1917     }
1918   }
1919   ierr = PetscFree(rowners);CHKERRQ(ierr);
1920   ierr = PetscFree(vals);CHKERRQ(ierr);
1921   ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1922   ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1923   PetscFunctionReturn(0);
1924 }
1925 
1926 #undef __FUNCT__
1927 #define __FUNCT__ "MatLoad_MPIDense"
1928 PetscErrorCode MatLoad_MPIDense(PetscViewer viewer,const MatType type,Mat *newmat)
1929 {
1930   Mat            A;
1931   PetscScalar    *vals,*svals;
1932   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1933   MPI_Status     status;
1934   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz;
1935   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1936   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1937   PetscInt       i,nz,j,rstart,rend;
1938   int            fd;
1939   PetscErrorCode ierr;
1940 
1941   PetscFunctionBegin;
1942   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1943   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1944   if (!rank) {
1945     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1946     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1947     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1948   }
1949 
1950   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1951   M = header[1]; N = header[2]; nz = header[3];
1952 
1953   /*
1954        Handle case where matrix is stored on disk as a dense matrix
1955   */
1956   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1957     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,type,newmat);CHKERRQ(ierr);
1958     PetscFunctionReturn(0);
1959   }
1960 
1961   /* determine ownership of all rows */
1962   m          = PetscMPIIntCast(M/size + ((M % size) > rank));
1963   ierr       = PetscMalloc((size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr);
1964   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1965   rowners[0] = 0;
1966   for (i=2; i<=size; i++) {
1967     rowners[i] += rowners[i-1];
1968   }
1969   rstart = rowners[rank];
1970   rend   = rowners[rank+1];
1971 
1972   /* distribute row lengths to all processors */
1973   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&ourlens);CHKERRQ(ierr);
1974   offlens = ourlens + (rend-rstart);
1975   if (!rank) {
1976     ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
1977     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1978     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
1979     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1980     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1981     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1982   } else {
1983     ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1984   }
1985 
1986   if (!rank) {
1987     /* calculate the number of nonzeros on each processor */
1988     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
1989     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1990     for (i=0; i<size; i++) {
1991       for (j=rowners[i]; j< rowners[i+1]; j++) {
1992         procsnz[i] += rowlengths[j];
1993       }
1994     }
1995     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1996 
1997     /* determine max buffer needed and allocate it */
1998     maxnz = 0;
1999     for (i=0; i<size; i++) {
2000       maxnz = PetscMax(maxnz,procsnz[i]);
2001     }
2002     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2003 
2004     /* read in my part of the matrix column indices  */
2005     nz = procsnz[0];
2006     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2007     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2008 
2009     /* read in every one elses and ship off */
2010     for (i=1; i<size; i++) {
2011       nz   = procsnz[i];
2012       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2013       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2014     }
2015     ierr = PetscFree(cols);CHKERRQ(ierr);
2016   } else {
2017     /* determine buffer space needed for message */
2018     nz = 0;
2019     for (i=0; i<m; i++) {
2020       nz += ourlens[i];
2021     }
2022     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2023 
2024     /* receive message of column indices*/
2025     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2026     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2027     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2028   }
2029 
2030   /* loop over local rows, determining number of off diagonal entries */
2031   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2032   jj = 0;
2033   for (i=0; i<m; i++) {
2034     for (j=0; j<ourlens[i]; j++) {
2035       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
2036       jj++;
2037     }
2038   }
2039 
2040   /* create our matrix */
2041   for (i=0; i<m; i++) {
2042     ourlens[i] -= offlens[i];
2043   }
2044   ierr = MatCreate(comm,newmat);CHKERRQ(ierr);
2045   ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
2046   ierr = MatSetType(*newmat,type);CHKERRQ(ierr);
2047   ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr);
2048   A = *newmat;
2049   for (i=0; i<m; i++) {
2050     ourlens[i] += offlens[i];
2051   }
2052 
2053   if (!rank) {
2054     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2055 
2056     /* read in my part of the matrix numerical values  */
2057     nz = procsnz[0];
2058     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2059 
2060     /* insert into matrix */
2061     jj      = rstart;
2062     smycols = mycols;
2063     svals   = vals;
2064     for (i=0; i<m; i++) {
2065       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2066       smycols += ourlens[i];
2067       svals   += ourlens[i];
2068       jj++;
2069     }
2070 
2071     /* read in other processors and ship out */
2072     for (i=1; i<size; i++) {
2073       nz   = procsnz[i];
2074       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2075       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
2076     }
2077     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2078   } else {
2079     /* receive numeric values */
2080     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2081 
2082     /* receive message of values*/
2083     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
2084     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2085     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2086 
2087     /* insert into matrix */
2088     jj      = rstart;
2089     smycols = mycols;
2090     svals   = vals;
2091     for (i=0; i<m; i++) {
2092       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2093       smycols += ourlens[i];
2094       svals   += ourlens[i];
2095       jj++;
2096     }
2097   }
2098   ierr = PetscFree(ourlens);CHKERRQ(ierr);
2099   ierr = PetscFree(vals);CHKERRQ(ierr);
2100   ierr = PetscFree(mycols);CHKERRQ(ierr);
2101   ierr = PetscFree(rowners);CHKERRQ(ierr);
2102 
2103   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2104   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2105   PetscFunctionReturn(0);
2106 }
2107 
2108 #undef __FUNCT__
2109 #define __FUNCT__ "MatEqual_MPIDense"
2110 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscTruth *flag)
2111 {
2112   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
2113   Mat            a,b;
2114   PetscTruth     flg;
2115   PetscErrorCode ierr;
2116 
2117   PetscFunctionBegin;
2118   a = matA->A;
2119   b = matB->A;
2120   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
2121   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
2122   PetscFunctionReturn(0);
2123 }
2124 
2125 #if defined(PETSC_HAVE_PLAPACK)
2126 
2127 #undef __FUNCT__
2128 #define __FUNCT__ "PetscPLAPACKFinalizePackage"
2129 /*@C
2130   PetscPLAPACKFinalizePackage - This function destroys everything in the Petsc interface to PLAPACK.
2131   Level: developer
2132 
2133 .keywords: Petsc, destroy, package, PLAPACK
2134 .seealso: PetscFinalize()
2135 @*/
2136 PetscErrorCode PETSC_DLLEXPORT PetscPLAPACKFinalizePackage(void)
2137 {
2138   PetscErrorCode ierr;
2139 
2140   PetscFunctionBegin;
2141   ierr = PLA_Finalize();CHKERRQ(ierr);
2142   PetscFunctionReturn(0);
2143 }
2144 
2145 #undef __FUNCT__
2146 #define __FUNCT__ "PetscPLAPACKInitializePackage"
2147 /*@C
2148   PetscPLAPACKInitializePackage - This function initializes everything in the Petsc interface to PLAPACK. It is
2149   called from MatCreate_MPIDense() the first time an MPI dense matrix is called.
2150 
2151   Input Parameter:
2152 .   comm - the communicator the matrix lives on
2153 
2154   Level: developer
2155 
2156    Notes: PLAPACK does not have a good fit with MPI communicators; all (parallel) PLAPACK objects have to live in the
2157   same communicator (because there is some global state that is initialized and used for all matrices). In addition if
2158   PLAPACK is initialized (that is the initial matrices created) are on subcommunicators of MPI_COMM_WORLD, these subcommunicators
2159   cannot overlap.
2160 
2161 .keywords: Petsc, initialize, package, PLAPACK
2162 .seealso: PetscInitializePackage(), PetscInitialize()
2163 @*/
2164 PetscErrorCode PETSC_DLLEXPORT PetscPLAPACKInitializePackage(MPI_Comm comm)
2165 {
2166   PetscMPIInt    size;
2167   PetscErrorCode ierr;
2168 
2169   PetscFunctionBegin;
2170   if (!PLA_Initialized(PETSC_NULL)) {
2171 
2172     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2173     Plapack_nprows = 1;
2174     Plapack_npcols = size;
2175 
2176     ierr = PetscOptionsBegin(comm,PETSC_NULL,"PLAPACK Options","Mat");CHKERRQ(ierr);
2177       ierr = PetscOptionsInt("-plapack_nprows","row dimension of 2D processor mesh","None",Plapack_nprows,&Plapack_nprows,PETSC_NULL);CHKERRQ(ierr);
2178       ierr = PetscOptionsInt("-plapack_npcols","column dimension of 2D processor mesh","None",Plapack_npcols,&Plapack_npcols,PETSC_NULL);CHKERRQ(ierr);
2179 #if defined(PETSC_USE_DEBUG)
2180       Plapack_ierror = 3;
2181 #else
2182       Plapack_ierror = 0;
2183 #endif
2184       ierr = PetscOptionsInt("-plapack_ckerror","error checking flag","None",Plapack_ierror,&Plapack_ierror,PETSC_NULL);CHKERRQ(ierr);
2185       if (Plapack_ierror){
2186 	ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_TRUE,PETSC_TRUE,PETSC_FALSE );CHKERRQ(ierr);
2187       } else {
2188 	ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_FALSE,PETSC_FALSE,PETSC_FALSE );CHKERRQ(ierr);
2189       }
2190 
2191       Plapack_nb_alg = 0;
2192       ierr = PetscOptionsInt("-plapack_nb_alg","algorithmic block size","None",Plapack_nb_alg,&Plapack_nb_alg,PETSC_NULL);CHKERRQ(ierr);
2193       if (Plapack_nb_alg) {
2194         ierr = pla_Environ_set_nb_alg (PLA_OP_ALL_ALG,Plapack_nb_alg);CHKERRQ(ierr);
2195       }
2196     PetscOptionsEnd();
2197 
2198     ierr = PLA_Comm_1D_to_2D(comm,Plapack_nprows,Plapack_npcols,&Plapack_comm_2d);CHKERRQ(ierr);
2199     ierr = PLA_Init(Plapack_comm_2d);CHKERRQ(ierr);
2200     ierr = PetscRegisterFinalize(PetscPLAPACKFinalizePackage);CHKERRQ(ierr);
2201   }
2202   PetscFunctionReturn(0);
2203 }
2204 
2205 
2206 #endif
2207