xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision c4e433427e906e3f2dbd70553bb6d0901adfc1f1)
1 #define PETSCMAT_DLL
2 
3 /*
4    Basic functions for basic parallel dense matrices.
5 */
6 
7 
8 #include "../src/mat/impls/dense/mpi/mpidense.h"    /*I   "petscmat.h"  I*/
9 #if defined(PETSC_HAVE_PLAPACK)
10 static PetscMPIInt Plapack_nprows,Plapack_npcols,Plapack_ierror,Plapack_nb_alg;
11 static MPI_Comm Plapack_comm_2d;
12 #endif
13 
14 #undef __FUNCT__
15 #define __FUNCT__ "MatDenseGetLocalMatrix"
16 /*@
17 
18       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
19               matrix that represents the operator. For sequential matrices it returns itself.
20 
21     Input Parameter:
22 .      A - the Seq or MPI dense matrix
23 
24     Output Parameter:
25 .      B - the inner matrix
26 
27     Level: intermediate
28 
29 @*/
30 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
31 {
32   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
33   PetscErrorCode ierr;
34   PetscTruth     flg;
35 
36   PetscFunctionBegin;
37   ierr = PetscTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
38   if (flg) {
39     *B = mat->A;
40   } else {
41     *B = A;
42   }
43   PetscFunctionReturn(0);
44 }
45 
46 #undef __FUNCT__
47 #define __FUNCT__ "MatGetRow_MPIDense"
48 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
49 {
50   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
51   PetscErrorCode ierr;
52   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
53 
54   PetscFunctionBegin;
55   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows")
56   lrow = row - rstart;
57   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr);
58   PetscFunctionReturn(0);
59 }
60 
61 #undef __FUNCT__
62 #define __FUNCT__ "MatRestoreRow_MPIDense"
63 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
64 {
65   PetscErrorCode ierr;
66 
67   PetscFunctionBegin;
68   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
69   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
70   PetscFunctionReturn(0);
71 }
72 
73 EXTERN_C_BEGIN
74 #undef __FUNCT__
75 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense"
76 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B)
77 {
78   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
79   PetscErrorCode ierr;
80   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
81   PetscScalar    *array;
82   MPI_Comm       comm;
83 
84   PetscFunctionBegin;
85   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported.");
86 
87   /* The reuse aspect is not implemented efficiently */
88   if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);}
89 
90   ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
91   ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr);
92   ierr = MatCreate(comm,B);CHKERRQ(ierr);
93   ierr = MatSetSizes(*B,m,m,m,m);CHKERRQ(ierr);
94   ierr = MatSetType(*B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
95   ierr = MatSeqDenseSetPreallocation(*B,array+m*rstart);CHKERRQ(ierr);
96   ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr);
97   ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
98   ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
99 
100   *iscopy = PETSC_TRUE;
101   PetscFunctionReturn(0);
102 }
103 EXTERN_C_END
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatSetValues_MPIDense"
107 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
108 {
109   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
110   PetscErrorCode ierr;
111   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
112   PetscTruth     roworiented = A->roworiented;
113 
114   PetscFunctionBegin;
115   for (i=0; i<m; i++) {
116     if (idxm[i] < 0) continue;
117     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
118     if (idxm[i] >= rstart && idxm[i] < rend) {
119       row = idxm[i] - rstart;
120       if (roworiented) {
121         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
122       } else {
123         for (j=0; j<n; j++) {
124           if (idxn[j] < 0) continue;
125           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
126           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
127         }
128       }
129     } else {
130       if (!A->donotstash) {
131         if (roworiented) {
132           ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr);
133         } else {
134           ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr);
135         }
136       }
137     }
138   }
139   PetscFunctionReturn(0);
140 }
141 
142 #undef __FUNCT__
143 #define __FUNCT__ "MatGetValues_MPIDense"
144 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
145 {
146   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
147   PetscErrorCode ierr;
148   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
149 
150   PetscFunctionBegin;
151   for (i=0; i<m; i++) {
152     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
153     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
154     if (idxm[i] >= rstart && idxm[i] < rend) {
155       row = idxm[i] - rstart;
156       for (j=0; j<n; j++) {
157         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
158         if (idxn[j] >= mat->cmap->N) {
159           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
160         }
161         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
162       }
163     } else {
164       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
165     }
166   }
167   PetscFunctionReturn(0);
168 }
169 
170 #undef __FUNCT__
171 #define __FUNCT__ "MatGetArray_MPIDense"
172 PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[])
173 {
174   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
175   PetscErrorCode ierr;
176 
177   PetscFunctionBegin;
178   ierr = MatGetArray(a->A,array);CHKERRQ(ierr);
179   PetscFunctionReturn(0);
180 }
181 
182 #undef __FUNCT__
183 #define __FUNCT__ "MatGetSubMatrix_MPIDense"
184 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B)
185 {
186   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data,*newmatd;
187   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
188   PetscErrorCode ierr;
189   PetscInt       i,j,rstart,rend,nrows,ncols,nlrows,nlcols;
190   const PetscInt *irow,*icol;
191   PetscScalar    *av,*bv,*v = lmat->v;
192   Mat            newmat;
193 
194   PetscFunctionBegin;
195   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
196   ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr);
197   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
198   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
199 
200   /* No parallel redistribution currently supported! Should really check each index set
201      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
202      original matrix! */
203 
204   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
205   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
206 
207   /* Check submatrix call */
208   if (scall == MAT_REUSE_MATRIX) {
209     /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
210     /* Really need to test rows and column sizes! */
211     newmat = *B;
212   } else {
213     /* Create and fill new matrix */
214     ierr = MatCreate(((PetscObject)A)->comm,&newmat);CHKERRQ(ierr);
215     ierr = MatSetSizes(newmat,nrows,cs,PETSC_DECIDE,ncols);CHKERRQ(ierr);
216     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
217     ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr);
218   }
219 
220   /* Now extract the data pointers and do the copy, column at a time */
221   newmatd = (Mat_MPIDense*)newmat->data;
222   bv      = ((Mat_SeqDense *)newmatd->A->data)->v;
223 
224   for (i=0; i<ncols; i++) {
225     av = v + nlrows*icol[i];
226     for (j=0; j<nrows; j++) {
227       *bv++ = av[irow[j] - rstart];
228     }
229   }
230 
231   /* Assemble the matrices so that the correct flags are set */
232   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
233   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
234 
235   /* Free work space */
236   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
237   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
238   *B = newmat;
239   PetscFunctionReturn(0);
240 }
241 
242 #undef __FUNCT__
243 #define __FUNCT__ "MatRestoreArray_MPIDense"
244 PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[])
245 {
246   PetscFunctionBegin;
247   PetscFunctionReturn(0);
248 }
249 
250 #undef __FUNCT__
251 #define __FUNCT__ "MatAssemblyBegin_MPIDense"
252 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
253 {
254   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
255   MPI_Comm       comm = ((PetscObject)mat)->comm;
256   PetscErrorCode ierr;
257   PetscInt       nstash,reallocs;
258   InsertMode     addv;
259 
260   PetscFunctionBegin;
261   /* make sure all processors are either in INSERTMODE or ADDMODE */
262   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
263   if (addv == (ADD_VALUES|INSERT_VALUES)) {
264     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
265   }
266   mat->insertmode = addv; /* in case this processor had no cache */
267 
268   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
269   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
270   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
271   PetscFunctionReturn(0);
272 }
273 
274 #undef __FUNCT__
275 #define __FUNCT__ "MatAssemblyEnd_MPIDense"
276 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
277 {
278   Mat_MPIDense    *mdn=(Mat_MPIDense*)mat->data;
279   PetscErrorCode  ierr;
280   PetscInt        i,*row,*col,flg,j,rstart,ncols;
281   PetscMPIInt     n;
282   PetscScalar     *val;
283   InsertMode      addv=mat->insertmode;
284 
285   PetscFunctionBegin;
286   /*  wait on receives */
287   while (1) {
288     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
289     if (!flg) break;
290 
291     for (i=0; i<n;) {
292       /* Now identify the consecutive vals belonging to the same row */
293       for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
294       if (j < n) ncols = j-i;
295       else       ncols = n-i;
296       /* Now assemble all these values with a single function call */
297       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
298       i = j;
299     }
300   }
301   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
302 
303   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
304   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
305 
306   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
307     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
308   }
309   PetscFunctionReturn(0);
310 }
311 
312 #undef __FUNCT__
313 #define __FUNCT__ "MatZeroEntries_MPIDense"
314 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
315 {
316   PetscErrorCode ierr;
317   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
318 
319   PetscFunctionBegin;
320   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
321   PetscFunctionReturn(0);
322 }
323 
324 /* the code does not do the diagonal entries correctly unless the
325    matrix is square and the column and row owerships are identical.
326    This is a BUG. The only way to fix it seems to be to access
327    mdn->A and mdn->B directly and not through the MatZeroRows()
328    routine.
329 */
330 #undef __FUNCT__
331 #define __FUNCT__ "MatZeroRows_MPIDense"
332 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
333 {
334   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
335   PetscErrorCode ierr;
336   PetscInt       i,*owners = A->rmap->range;
337   PetscInt       *nprocs,j,idx,nsends;
338   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
339   PetscInt       *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source;
340   PetscInt       *lens,*lrows,*values;
341   PetscMPIInt    n,imdex,rank = l->rank,size = l->size;
342   MPI_Comm       comm = ((PetscObject)A)->comm;
343   MPI_Request    *send_waits,*recv_waits;
344   MPI_Status     recv_status,*send_status;
345   PetscTruth     found;
346 
347   PetscFunctionBegin;
348   /*  first count number of contributors to each processor */
349   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
350   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
351   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
352   for (i=0; i<N; i++) {
353     idx = rows[i];
354     found = PETSC_FALSE;
355     for (j=0; j<size; j++) {
356       if (idx >= owners[j] && idx < owners[j+1]) {
357         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
358       }
359     }
360     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
361   }
362   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
363 
364   /* inform other processors of number of messages and max length*/
365   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
366 
367   /* post receives:   */
368   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
369   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
370   for (i=0; i<nrecvs; i++) {
371     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
372   }
373 
374   /* do sends:
375       1) starts[i] gives the starting index in svalues for stuff going to
376          the ith processor
377   */
378   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
379   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
380   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
381   starts[0]  = 0;
382   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
383   for (i=0; i<N; i++) {
384     svalues[starts[owner[i]]++] = rows[i];
385   }
386 
387   starts[0] = 0;
388   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
389   count = 0;
390   for (i=0; i<size; i++) {
391     if (nprocs[2*i+1]) {
392       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
393     }
394   }
395   ierr = PetscFree(starts);CHKERRQ(ierr);
396 
397   base = owners[rank];
398 
399   /*  wait on receives */
400   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
401   source = lens + nrecvs;
402   count  = nrecvs; slen = 0;
403   while (count) {
404     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
405     /* unpack receives into our local space */
406     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
407     source[imdex]  = recv_status.MPI_SOURCE;
408     lens[imdex]    = n;
409     slen += n;
410     count--;
411   }
412   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
413 
414   /* move the data into the send scatter */
415   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
416   count = 0;
417   for (i=0; i<nrecvs; i++) {
418     values = rvalues + i*nmax;
419     for (j=0; j<lens[i]; j++) {
420       lrows[count++] = values[j] - base;
421     }
422   }
423   ierr = PetscFree(rvalues);CHKERRQ(ierr);
424   ierr = PetscFree(lens);CHKERRQ(ierr);
425   ierr = PetscFree(owner);CHKERRQ(ierr);
426   ierr = PetscFree(nprocs);CHKERRQ(ierr);
427 
428   /* actually zap the local rows */
429   ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
430   ierr = PetscFree(lrows);CHKERRQ(ierr);
431 
432   /* wait on sends */
433   if (nsends) {
434     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
435     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
436     ierr = PetscFree(send_status);CHKERRQ(ierr);
437   }
438   ierr = PetscFree(send_waits);CHKERRQ(ierr);
439   ierr = PetscFree(svalues);CHKERRQ(ierr);
440 
441   PetscFunctionReturn(0);
442 }
443 
444 #undef __FUNCT__
445 #define __FUNCT__ "MatMult_MPIDense"
446 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
447 {
448   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
449   PetscErrorCode ierr;
450 
451   PetscFunctionBegin;
452   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
453   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
454   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
455   PetscFunctionReturn(0);
456 }
457 
458 #undef __FUNCT__
459 #define __FUNCT__ "MatMultAdd_MPIDense"
460 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
461 {
462   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
463   PetscErrorCode ierr;
464 
465   PetscFunctionBegin;
466   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
467   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
468   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
469   PetscFunctionReturn(0);
470 }
471 
472 #undef __FUNCT__
473 #define __FUNCT__ "MatMultTranspose_MPIDense"
474 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
475 {
476   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
477   PetscErrorCode ierr;
478   PetscScalar    zero = 0.0;
479 
480   PetscFunctionBegin;
481   ierr = VecSet(yy,zero);CHKERRQ(ierr);
482   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
483   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
484   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
485   PetscFunctionReturn(0);
486 }
487 
488 #undef __FUNCT__
489 #define __FUNCT__ "MatMultTransposeAdd_MPIDense"
490 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
491 {
492   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
493   PetscErrorCode ierr;
494 
495   PetscFunctionBegin;
496   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
497   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
498   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
499   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
500   PetscFunctionReturn(0);
501 }
502 
503 #undef __FUNCT__
504 #define __FUNCT__ "MatGetDiagonal_MPIDense"
505 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
506 {
507   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
508   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
509   PetscErrorCode ierr;
510   PetscInt       len,i,n,m = A->rmap->n,radd;
511   PetscScalar    *x,zero = 0.0;
512 
513   PetscFunctionBegin;
514   ierr = VecSet(v,zero);CHKERRQ(ierr);
515   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
516   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
517   if (n != A->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
518   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
519   radd = A->rmap->rstart*m;
520   for (i=0; i<len; i++) {
521     x[i] = aloc->v[radd + i*m + i];
522   }
523   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
524   PetscFunctionReturn(0);
525 }
526 
527 #undef __FUNCT__
528 #define __FUNCT__ "MatDestroy_MPIDense"
529 PetscErrorCode MatDestroy_MPIDense(Mat mat)
530 {
531   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
532   PetscErrorCode ierr;
533 #if defined(PETSC_HAVE_PLAPACK)
534   Mat_Plapack   *lu=(Mat_Plapack*)(mat->spptr);
535 #endif
536 
537   PetscFunctionBegin;
538 
539 #if defined(PETSC_USE_LOG)
540   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
541 #endif
542   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
543   ierr = MatDestroy(mdn->A);CHKERRQ(ierr);
544   if (mdn->lvec)   {ierr = VecDestroy(mdn->lvec);CHKERRQ(ierr);}
545   if (mdn->Mvctx)  {ierr = VecScatterDestroy(mdn->Mvctx);CHKERRQ(ierr);}
546 #if defined(PETSC_HAVE_PLAPACK)
547   if (lu) {
548     ierr = PLA_Obj_free(&lu->A);CHKERRQ(ierr);
549     ierr = PLA_Obj_free (&lu->pivots);CHKERRQ(ierr);
550     ierr = PLA_Temp_free(&lu->templ);CHKERRQ(ierr);
551 
552     if (lu->is_pla) {
553       ierr = ISDestroy(lu->is_pla);CHKERRQ(ierr);
554       ierr = ISDestroy(lu->is_petsc);CHKERRQ(ierr);
555       ierr = VecScatterDestroy(lu->ctx);CHKERRQ(ierr);
556     }
557   }
558 #endif
559 
560   ierr = PetscFree(mdn);CHKERRQ(ierr);
561   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
562   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
563   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
564   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
565   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
566   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
567   PetscFunctionReturn(0);
568 }
569 
570 #undef __FUNCT__
571 #define __FUNCT__ "MatView_MPIDense_Binary"
572 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
573 {
574   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
575   PetscErrorCode    ierr;
576   PetscViewerFormat format;
577   int               fd;
578   PetscInt          header[4],mmax,N = mat->cmap->N,i,j,m,k;
579   PetscMPIInt       rank,tag  = ((PetscObject)viewer)->tag,size;
580   PetscScalar       *work,*v,*vv;
581   Mat_SeqDense      *a = (Mat_SeqDense*)mdn->A->data;
582   MPI_Status        status;
583 
584   PetscFunctionBegin;
585   if (mdn->size == 1) {
586     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
587   } else {
588     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
589     ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
590     ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
591 
592     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
593     if (format == PETSC_VIEWER_NATIVE) {
594 
595       if (!rank) {
596         /* store the matrix as a dense matrix */
597         header[0] = MAT_FILE_COOKIE;
598         header[1] = mat->rmap->N;
599         header[2] = N;
600         header[3] = MATRIX_BINARY_FORMAT_DENSE;
601         ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
602 
603         /* get largest work array needed for transposing array */
604         mmax = mat->rmap->n;
605         for (i=1; i<size; i++) {
606           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
607         }
608         ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&work);CHKERRQ(ierr);
609 
610         /* write out local array, by rows */
611         m    = mat->rmap->n;
612         v    = a->v;
613         for (j=0; j<N; j++) {
614           for (i=0; i<m; i++) {
615             work[j + i*N] = *v++;
616           }
617         }
618         ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
619         /* get largest work array to receive messages from other processes, excludes process zero */
620         mmax = 0;
621         for (i=1; i<size; i++) {
622           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
623         }
624         ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&vv);CHKERRQ(ierr);
625         for(k = 1; k < size; k++) {
626           v    = vv;
627           m    = mat->rmap->range[k+1] - mat->rmap->range[k];
628           ierr = MPI_Recv(v,m*N,MPIU_SCALAR,k,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
629 
630           for(j = 0; j < N; j++) {
631             for(i = 0; i < m; i++) {
632               work[j + i*N] = *v++;
633             }
634           }
635           ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
636         }
637         ierr = PetscFree(work);CHKERRQ(ierr);
638         ierr = PetscFree(vv);CHKERRQ(ierr);
639       } else {
640         ierr = MPI_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
641       }
642     } else {
643       SETERRQ(PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerSetFormat(viewer,PETSC_VIEWER_NATIVE");
644     }
645   }
646   PetscFunctionReturn(0);
647 }
648 
649 #undef __FUNCT__
650 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket"
651 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
652 {
653   Mat_MPIDense          *mdn = (Mat_MPIDense*)mat->data;
654   PetscErrorCode        ierr;
655   PetscMPIInt           size = mdn->size,rank = mdn->rank;
656   const PetscViewerType vtype;
657   PetscTruth            iascii,isdraw;
658   PetscViewer           sviewer;
659   PetscViewerFormat     format;
660 #if defined(PETSC_HAVE_PLAPACK)
661   Mat_Plapack           *lu;
662 #endif
663 
664   PetscFunctionBegin;
665   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
666   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
667   if (iascii) {
668     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
669     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
670     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
671       MatInfo info;
672       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
673       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,
674                    (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
675       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
676 #if defined(PETSC_HAVE_PLAPACK)
677       ierr = PetscViewerASCIIPrintf(viewer,"PLAPACK run parameters:\n");CHKERRQ(ierr);
678       ierr = PetscViewerASCIIPrintf(viewer,"  Processor mesh: nprows %d, npcols %d\n",Plapack_nprows, Plapack_npcols);CHKERRQ(ierr);
679       ierr = PetscViewerASCIIPrintf(viewer,"  Error checking: %d\n",Plapack_ierror);CHKERRQ(ierr);
680       ierr = PetscViewerASCIIPrintf(viewer,"  Algorithmic block size: %d\n",Plapack_nb_alg);CHKERRQ(ierr);
681       if (mat->factor){
682         lu=(Mat_Plapack*)(mat->spptr);
683         ierr = PetscViewerASCIIPrintf(viewer,"  Distr. block size nb: %d \n",lu->nb);CHKERRQ(ierr);
684       }
685 #else
686       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
687 #endif
688       PetscFunctionReturn(0);
689     } else if (format == PETSC_VIEWER_ASCII_INFO) {
690       PetscFunctionReturn(0);
691     }
692   } else if (isdraw) {
693     PetscDraw  draw;
694     PetscTruth isnull;
695 
696     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
697     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
698     if (isnull) PetscFunctionReturn(0);
699   }
700 
701   if (size == 1) {
702     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
703   } else {
704     /* assemble the entire matrix onto first processor. */
705     Mat         A;
706     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
707     PetscInt    *cols;
708     PetscScalar *vals;
709 
710     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
711     if (!rank) {
712       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
713     } else {
714       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
715     }
716     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
717     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
718     ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL);
719     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
720 
721     /* Copy the matrix ... This isn't the most efficient means,
722        but it's quick for now */
723     A->insertmode = INSERT_VALUES;
724     row = mat->rmap->rstart; m = mdn->A->rmap->n;
725     for (i=0; i<m; i++) {
726       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
727       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
728       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
729       row++;
730     }
731 
732     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
733     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
734     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
735     if (!rank) {
736       ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
737     }
738     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
739     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
740     ierr = MatDestroy(A);CHKERRQ(ierr);
741   }
742   PetscFunctionReturn(0);
743 }
744 
745 #undef __FUNCT__
746 #define __FUNCT__ "MatView_MPIDense"
747 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
748 {
749   PetscErrorCode ierr;
750   PetscTruth     iascii,isbinary,isdraw,issocket;
751 
752   PetscFunctionBegin;
753 
754   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
755   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
756   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
757   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
758 
759   if (iascii || issocket || isdraw) {
760     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
761   } else if (isbinary) {
762     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
763   } else {
764     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name);
765   }
766   PetscFunctionReturn(0);
767 }
768 
769 #undef __FUNCT__
770 #define __FUNCT__ "MatGetInfo_MPIDense"
771 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
772 {
773   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
774   Mat            mdn = mat->A;
775   PetscErrorCode ierr;
776   PetscReal      isend[5],irecv[5];
777 
778   PetscFunctionBegin;
779   info->block_size     = 1.0;
780   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
781   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
782   isend[3] = info->memory;  isend[4] = info->mallocs;
783   if (flag == MAT_LOCAL) {
784     info->nz_used      = isend[0];
785     info->nz_allocated = isend[1];
786     info->nz_unneeded  = isend[2];
787     info->memory       = isend[3];
788     info->mallocs      = isend[4];
789   } else if (flag == MAT_GLOBAL_MAX) {
790     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr);
791     info->nz_used      = irecv[0];
792     info->nz_allocated = irecv[1];
793     info->nz_unneeded  = irecv[2];
794     info->memory       = irecv[3];
795     info->mallocs      = irecv[4];
796   } else if (flag == MAT_GLOBAL_SUM) {
797     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
798     info->nz_used      = irecv[0];
799     info->nz_allocated = irecv[1];
800     info->nz_unneeded  = irecv[2];
801     info->memory       = irecv[3];
802     info->mallocs      = irecv[4];
803   }
804   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
805   info->fill_ratio_needed = 0;
806   info->factor_mallocs    = 0;
807   PetscFunctionReturn(0);
808 }
809 
810 #undef __FUNCT__
811 #define __FUNCT__ "MatSetOption_MPIDense"
812 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscTruth flg)
813 {
814   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
815   PetscErrorCode ierr;
816 
817   PetscFunctionBegin;
818   switch (op) {
819   case MAT_NEW_NONZERO_LOCATIONS:
820   case MAT_NEW_NONZERO_LOCATION_ERR:
821   case MAT_NEW_NONZERO_ALLOCATION_ERR:
822     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
823     break;
824   case MAT_ROW_ORIENTED:
825     a->roworiented = flg;
826     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
827     break;
828   case MAT_NEW_DIAGONALS:
829   case MAT_USE_HASH_TABLE:
830     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
831     break;
832   case MAT_IGNORE_OFF_PROC_ENTRIES:
833     a->donotstash = flg;
834     break;
835   case MAT_SYMMETRIC:
836   case MAT_STRUCTURALLY_SYMMETRIC:
837   case MAT_HERMITIAN:
838   case MAT_SYMMETRY_ETERNAL:
839   case MAT_IGNORE_LOWER_TRIANGULAR:
840     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
841     break;
842   default:
843     SETERRQ1(PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
844   }
845   PetscFunctionReturn(0);
846 }
847 
848 
849 #undef __FUNCT__
850 #define __FUNCT__ "MatDiagonalScale_MPIDense"
851 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
852 {
853   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
854   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
855   PetscScalar    *l,*r,x,*v;
856   PetscErrorCode ierr;
857   PetscInt       i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n;
858 
859   PetscFunctionBegin;
860   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
861   if (ll) {
862     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
863     if (s2a != s2) SETERRQ2(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
864     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
865     for (i=0; i<m; i++) {
866       x = l[i];
867       v = mat->v + i;
868       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
869     }
870     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
871     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
872   }
873   if (rr) {
874     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
875     if (s3a != s3) SETERRQ2(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
876     ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
877     ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
878     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
879     for (i=0; i<n; i++) {
880       x = r[i];
881       v = mat->v + i*m;
882       for (j=0; j<m; j++) { (*v++) *= x;}
883     }
884     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
885     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
886   }
887   PetscFunctionReturn(0);
888 }
889 
890 #undef __FUNCT__
891 #define __FUNCT__ "MatNorm_MPIDense"
892 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
893 {
894   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
895   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
896   PetscErrorCode ierr;
897   PetscInt       i,j;
898   PetscReal      sum = 0.0;
899   PetscScalar    *v = mat->v;
900 
901   PetscFunctionBegin;
902   if (mdn->size == 1) {
903     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
904   } else {
905     if (type == NORM_FROBENIUS) {
906       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
907 #if defined(PETSC_USE_COMPLEX)
908         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
909 #else
910         sum += (*v)*(*v); v++;
911 #endif
912       }
913       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
914       *nrm = sqrt(*nrm);
915       ierr = PetscLogFlops(2*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr);
916     } else if (type == NORM_1) {
917       PetscReal *tmp,*tmp2;
918       ierr = PetscMalloc(2*A->cmap->N*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
919       tmp2 = tmp + A->cmap->N;
920       ierr = PetscMemzero(tmp,2*A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
921       *nrm = 0.0;
922       v = mat->v;
923       for (j=0; j<mdn->A->cmap->n; j++) {
924         for (i=0; i<mdn->A->rmap->n; i++) {
925           tmp[j] += PetscAbsScalar(*v);  v++;
926         }
927       }
928       ierr = MPI_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
929       for (j=0; j<A->cmap->N; j++) {
930         if (tmp2[j] > *nrm) *nrm = tmp2[j];
931       }
932       ierr = PetscFree(tmp);CHKERRQ(ierr);
933       ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr);
934     } else if (type == NORM_INFINITY) { /* max row norm */
935       PetscReal ntemp;
936       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
937       ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr);
938     } else {
939       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
940     }
941   }
942   PetscFunctionReturn(0);
943 }
944 
945 #undef __FUNCT__
946 #define __FUNCT__ "MatTranspose_MPIDense"
947 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
948 {
949   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
950   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
951   Mat            B;
952   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
953   PetscErrorCode ierr;
954   PetscInt       j,i;
955   PetscScalar    *v;
956 
957   PetscFunctionBegin;
958   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place");
959   if (reuse == MAT_INITIAL_MATRIX || A == *matout) {
960     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
961     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
962     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
963     ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr);
964   } else {
965     B = *matout;
966   }
967 
968   m = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v;
969   ierr = PetscMalloc(m*sizeof(PetscInt),&rwork);CHKERRQ(ierr);
970   for (i=0; i<m; i++) rwork[i] = rstart + i;
971   for (j=0; j<n; j++) {
972     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
973     v   += m;
974   }
975   ierr = PetscFree(rwork);CHKERRQ(ierr);
976   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
977   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
978   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
979     *matout = B;
980   } else {
981     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
982   }
983   PetscFunctionReturn(0);
984 }
985 
986 #include "petscblaslapack.h"
987 #undef __FUNCT__
988 #define __FUNCT__ "MatScale_MPIDense"
989 PetscErrorCode MatScale_MPIDense(Mat inA,PetscScalar alpha)
990 {
991   Mat_MPIDense   *A = (Mat_MPIDense*)inA->data;
992   Mat_SeqDense   *a = (Mat_SeqDense*)A->A->data;
993   PetscScalar    oalpha = alpha;
994   PetscErrorCode ierr;
995   PetscBLASInt   one = 1,nz = PetscBLASIntCast(inA->rmap->n*inA->cmap->N);
996 
997   PetscFunctionBegin;
998   BLASscal_(&nz,&oalpha,a->v,&one);
999   ierr = PetscLogFlops(nz);CHKERRQ(ierr);
1000   PetscFunctionReturn(0);
1001 }
1002 
1003 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
1004 
1005 #undef __FUNCT__
1006 #define __FUNCT__ "MatSetUpPreallocation_MPIDense"
1007 PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A)
1008 {
1009   PetscErrorCode ierr;
1010 
1011   PetscFunctionBegin;
1012   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
1013   PetscFunctionReturn(0);
1014 }
1015 
1016 #if defined(PETSC_HAVE_PLAPACK)
1017 
1018 #undef __FUNCT__
1019 #define __FUNCT__ "MatMPIDenseCopyToPlapack"
1020 PetscErrorCode MatMPIDenseCopyToPlapack(Mat A,Mat F)
1021 {
1022   Mat_Plapack    *lu = (Mat_Plapack*)(F)->spptr;
1023   PetscErrorCode ierr;
1024   PetscInt       M=A->cmap->N,m=A->rmap->n,rstart;
1025   PetscScalar    *array;
1026   PetscReal      one = 1.0;
1027 
1028   PetscFunctionBegin;
1029   /* Copy A into F->lu->A */
1030   ierr = PLA_Obj_set_to_zero(lu->A);CHKERRQ(ierr);
1031   ierr = PLA_API_begin();CHKERRQ(ierr);
1032   ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr);
1033   ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr);
1034   ierr = MatGetArray(A,&array);CHKERRQ(ierr);
1035   ierr = PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);CHKERRQ(ierr);
1036   ierr = MatRestoreArray(A,&array);CHKERRQ(ierr);
1037   ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr);
1038   ierr = PLA_API_end();CHKERRQ(ierr);
1039   lu->rstart = rstart;
1040   PetscFunctionReturn(0);
1041 }
1042 
1043 #undef __FUNCT__
1044 #define __FUNCT__ "MatMPIDenseCopyFromPlapack"
1045 PetscErrorCode MatMPIDenseCopyFromPlapack(Mat F,Mat A)
1046 {
1047   Mat_Plapack    *lu = (Mat_Plapack*)(F)->spptr;
1048   PetscErrorCode ierr;
1049   PetscInt       M=A->cmap->N,m=A->rmap->n,rstart;
1050   PetscScalar    *array;
1051   PetscReal      one = 1.0;
1052 
1053   PetscFunctionBegin;
1054   /* Copy F into A->lu->A */
1055   ierr = MatZeroEntries(A);CHKERRQ(ierr);
1056   ierr = PLA_API_begin();CHKERRQ(ierr);
1057   ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr);
1058   ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr);
1059   ierr = MatGetArray(A,&array);CHKERRQ(ierr);
1060   ierr = PLA_API_axpy_global_to_matrix(m,M, &one,lu->A,rstart,0,(void *)array,m);CHKERRQ(ierr);
1061   ierr = MatRestoreArray(A,&array);CHKERRQ(ierr);
1062   ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr);
1063   ierr = PLA_API_end();CHKERRQ(ierr);
1064   lu->rstart = rstart;
1065   PetscFunctionReturn(0);
1066 }
1067 
1068 #undef __FUNCT__
1069 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIDense"
1070 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1071 {
1072   PetscErrorCode ierr;
1073   Mat_Plapack    *luA = (Mat_Plapack*)A->spptr;
1074   Mat_Plapack    *luB = (Mat_Plapack*)B->spptr;
1075   Mat_Plapack    *luC = (Mat_Plapack*)C->spptr;
1076   PLA_Obj        alpha = NULL,beta = NULL;
1077 
1078   PetscFunctionBegin;
1079   ierr = MatMPIDenseCopyToPlapack(A,A);CHKERRQ(ierr);
1080   ierr = MatMPIDenseCopyToPlapack(B,B);CHKERRQ(ierr);
1081 
1082   /*
1083   ierr = PLA_Global_show("A = ",luA->A,"%g ","");CHKERRQ(ierr);
1084   ierr = PLA_Global_show("B = ",luB->A,"%g ","");CHKERRQ(ierr);
1085   */
1086 
1087   /* do the multiply in PLA  */
1088   ierr = PLA_Create_constants_conf_to(luA->A,NULL,NULL,&alpha);CHKERRQ(ierr);
1089   ierr = PLA_Create_constants_conf_to(luC->A,NULL,&beta,NULL);CHKERRQ(ierr);
1090   CHKMEMQ;
1091 
1092   ierr = PLA_Gemm(PLA_NO_TRANSPOSE,PLA_NO_TRANSPOSE,alpha,luA->A,luB->A,beta,luC->A); /* CHKERRQ(ierr); */
1093   CHKMEMQ;
1094   ierr = PLA_Obj_free(&alpha);CHKERRQ(ierr);
1095   ierr = PLA_Obj_free(&beta);CHKERRQ(ierr);
1096 
1097   /*
1098   ierr = PLA_Global_show("C = ",luC->A,"%g ","");CHKERRQ(ierr);
1099   */
1100   ierr = MatMPIDenseCopyFromPlapack(C,C);CHKERRQ(ierr);
1101   PetscFunctionReturn(0);
1102 }
1103 
1104 #undef __FUNCT__
1105 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIDense"
1106 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1107 {
1108   PetscErrorCode ierr;
1109   PetscInt       m=A->rmap->n,n=B->cmap->n;
1110   Mat            Cmat;
1111 
1112   PetscFunctionBegin;
1113   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
1114   SETERRQ(PETSC_ERR_LIB,"Due to aparent bugs in PLAPACK,this is not currently supported");
1115   ierr = MatCreate(((PetscObject)B)->comm,&Cmat);CHKERRQ(ierr);
1116   ierr = MatSetSizes(Cmat,m,n,A->rmap->N,B->cmap->N);CHKERRQ(ierr);
1117   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
1118   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1119   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1120 
1121   *C = Cmat;
1122   PetscFunctionReturn(0);
1123 }
1124 
1125 #undef __FUNCT__
1126 #define __FUNCT__ "MatMatMult_MPIDense_MPIDense"
1127 PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
1128 {
1129   PetscErrorCode ierr;
1130 
1131   PetscFunctionBegin;
1132   if (scall == MAT_INITIAL_MATRIX){
1133     ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr);
1134   }
1135   ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr);
1136   PetscFunctionReturn(0);
1137 }
1138 
1139 #undef __FUNCT__
1140 #define __FUNCT__ "MatSolve_MPIDense"
1141 PetscErrorCode MatSolve_MPIDense(Mat A,Vec b,Vec x)
1142 {
1143   MPI_Comm       comm = ((PetscObject)A)->comm;
1144   Mat_Plapack    *lu = (Mat_Plapack*)A->spptr;
1145   PetscErrorCode ierr;
1146   PetscInt       M=A->rmap->N,m=A->rmap->n,rstart,i,j,*idx_pla,*idx_petsc,loc_m,loc_stride;
1147   PetscScalar    *array;
1148   PetscReal      one = 1.0;
1149   PetscMPIInt    size,rank,r_rank,r_nproc,c_rank,c_nproc;;
1150   PLA_Obj        v_pla = NULL;
1151   PetscScalar    *loc_buf;
1152   Vec            loc_x;
1153 
1154   PetscFunctionBegin;
1155   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1156   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1157 
1158   /* Create PLAPACK vector objects, then copy b into PLAPACK b */
1159   PLA_Mvector_create(lu->datatype,M,1,lu->templ,PLA_ALIGN_FIRST,&v_pla);
1160   PLA_Obj_set_to_zero(v_pla);
1161 
1162   /* Copy b into rhs_pla */
1163   PLA_API_begin();
1164   PLA_Obj_API_open(v_pla);
1165   ierr = VecGetArray(b,&array);CHKERRQ(ierr);
1166   PLA_API_axpy_vector_to_global(m,&one,(void *)array,1,v_pla,lu->rstart);
1167   ierr = VecRestoreArray(b,&array);CHKERRQ(ierr);
1168   PLA_Obj_API_close(v_pla);
1169   PLA_API_end();
1170 
1171   if (A->factor == MAT_FACTOR_LU){
1172     /* Apply the permutations to the right hand sides */
1173     PLA_Apply_pivots_to_rows (v_pla,lu->pivots);
1174 
1175     /* Solve L y = b, overwriting b with y */
1176     PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_UNIT_DIAG,lu->A,v_pla );
1177 
1178     /* Solve U x = y (=b), overwriting b with x */
1179     PLA_Trsv( PLA_UPPER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla );
1180   } else { /*MAT_FACTOR_CHOLESKY */
1181     PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla);
1182     PLA_Trsv( PLA_LOWER_TRIANGULAR,(lu->datatype == MPI_DOUBLE ? PLA_TRANSPOSE : PLA_CONJUGATE_TRANSPOSE),
1183                                     PLA_NONUNIT_DIAG,lu->A,v_pla);
1184   }
1185 
1186   /* Copy PLAPACK x into Petsc vector x  */
1187   PLA_Obj_local_length(v_pla, &loc_m);
1188   PLA_Obj_local_buffer(v_pla, (void**)&loc_buf);
1189   PLA_Obj_local_stride(v_pla, &loc_stride);
1190   /*
1191     PetscPrintf(PETSC_COMM_SELF," [%d] b - local_m %d local_stride %d, loc_buf: %g %g, nb: %d\n",rank,loc_m,loc_stride,loc_buf[0],loc_buf[(loc_m-1)*loc_stride],lu->nb);
1192   */
1193   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,loc_m*loc_stride,loc_buf,&loc_x);CHKERRQ(ierr);
1194   if (!lu->pla_solved){
1195 
1196     PLA_Temp_comm_row_info(lu->templ,&Plapack_comm_2d,&r_rank,&r_nproc);
1197     PLA_Temp_comm_col_info(lu->templ,&Plapack_comm_2d,&c_rank,&c_nproc);
1198 
1199     /* Create IS and cts for VecScatterring */
1200     PLA_Obj_local_length(v_pla, &loc_m);
1201     PLA_Obj_local_stride(v_pla, &loc_stride);
1202     ierr = PetscMalloc((2*loc_m+1)*sizeof(PetscInt),&idx_pla);CHKERRQ(ierr);
1203     idx_petsc = idx_pla + loc_m;
1204 
1205     rstart = (r_rank*c_nproc+c_rank)*lu->nb;
1206     for (i=0; i<loc_m; i+=lu->nb){
1207       j = 0;
1208       while (j < lu->nb && i+j < loc_m){
1209         idx_petsc[i+j] = rstart + j; j++;
1210       }
1211       rstart += size*lu->nb;
1212     }
1213 
1214     for (i=0; i<loc_m; i++) idx_pla[i] = i*loc_stride;
1215 
1216     ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_pla,&lu->is_pla);CHKERRQ(ierr);
1217     ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_petsc,&lu->is_petsc);CHKERRQ(ierr);
1218     ierr = PetscFree(idx_pla);CHKERRQ(ierr);
1219     ierr = VecScatterCreate(loc_x,lu->is_pla,x,lu->is_petsc,&lu->ctx);CHKERRQ(ierr);
1220   }
1221   ierr = VecScatterBegin(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1222   ierr = VecScatterEnd(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1223 
1224   /* Free data */
1225   ierr = VecDestroy(loc_x);CHKERRQ(ierr);
1226   PLA_Obj_free(&v_pla);
1227 
1228   lu->pla_solved = PETSC_TRUE;
1229   PetscFunctionReturn(0);
1230 }
1231 
1232 #undef __FUNCT__
1233 #define __FUNCT__ "MatLUFactorNumeric_MPIDense"
1234 PetscErrorCode MatLUFactorNumeric_MPIDense(Mat F,Mat A,const MatFactorInfo *info)
1235 {
1236   Mat_Plapack    *lu = (Mat_Plapack*)(F)->spptr;
1237   PetscErrorCode ierr;
1238   PetscInt       M=A->rmap->N,m=A->rmap->n,rstart,rend;
1239   PetscInt       info_pla=0;
1240   PetscScalar    *array,one = 1.0;
1241 
1242   PetscFunctionBegin;
1243   if (lu->mstruct == SAME_NONZERO_PATTERN){
1244     PLA_Obj_free(&lu->A);
1245     PLA_Obj_free (&lu->pivots);
1246   }
1247   /* Create PLAPACK matrix object */
1248   lu->A = NULL; lu->pivots = NULL;
1249   PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);
1250   PLA_Obj_set_to_zero(lu->A);
1251   PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots);
1252 
1253   /* Copy A into lu->A */
1254   PLA_API_begin();
1255   PLA_Obj_API_open(lu->A);
1256   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
1257   ierr = MatGetArray(A,&array);CHKERRQ(ierr);
1258   PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);
1259   ierr = MatRestoreArray(A,&array);CHKERRQ(ierr);
1260   PLA_Obj_API_close(lu->A);
1261   PLA_API_end();
1262 
1263   /* Factor P A -> L U overwriting lower triangular portion of A with L, upper, U */
1264   info_pla = PLA_LU(lu->A,lu->pivots);
1265   if (info_pla != 0)
1266     SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot encountered at row %d from PLA_LU()",info_pla);
1267 
1268   lu->rstart         = rstart;
1269   lu->mstruct        = SAME_NONZERO_PATTERN;
1270   F->ops->solve      = MatSolve_MPIDense;
1271   F->assembled       = PETSC_TRUE;  /* required by -ksp_view */
1272   PetscFunctionReturn(0);
1273 }
1274 
1275 #undef __FUNCT__
1276 #define __FUNCT__ "MatCholeskyFactorNumeric_MPIDense"
1277 PetscErrorCode MatCholeskyFactorNumeric_MPIDense(Mat F,Mat A,const MatFactorInfo *info)
1278 {
1279   Mat_Plapack    *lu = (Mat_Plapack*)F->spptr;
1280   PetscErrorCode ierr;
1281   PetscInt       M=A->rmap->N,m=A->rmap->n,rstart,rend;
1282   PetscInt       info_pla=0;
1283   PetscScalar    *array,one = 1.0;
1284 
1285   PetscFunctionBegin;
1286   if (lu->mstruct == SAME_NONZERO_PATTERN){
1287     PLA_Obj_free(&lu->A);
1288   }
1289   /* Create PLAPACK matrix object */
1290   lu->A      = NULL;
1291   lu->pivots = NULL;
1292   PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);
1293 
1294   /* Copy A into lu->A */
1295   PLA_API_begin();
1296   PLA_Obj_API_open(lu->A);
1297   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
1298   ierr = MatGetArray(A,&array);CHKERRQ(ierr);
1299   PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);
1300   ierr = MatRestoreArray(A,&array);CHKERRQ(ierr);
1301   PLA_Obj_API_close(lu->A);
1302   PLA_API_end();
1303 
1304   /* Factor P A -> Chol */
1305   info_pla = PLA_Chol(PLA_LOWER_TRIANGULAR,lu->A);
1306   if (info_pla != 0)
1307     SETERRQ1( PETSC_ERR_MAT_CH_ZRPVT,"Nonpositive definite matrix detected at row %d from PLA_Chol()",info_pla);
1308 
1309   lu->rstart         = rstart;
1310   lu->mstruct        = SAME_NONZERO_PATTERN;
1311   F->ops->solve      = MatSolve_MPIDense;
1312   F->assembled       = PETSC_TRUE;  /* required by -ksp_view */
1313   PetscFunctionReturn(0);
1314 }
1315 
1316 /* Note the Petsc perm permutation is ignored */
1317 #undef __FUNCT__
1318 #define __FUNCT__ "MatCholeskyFactorSymbolic_MPIDense"
1319 PetscErrorCode MatCholeskyFactorSymbolic_MPIDense(Mat F,Mat A,IS perm,const MatFactorInfo *info)
1320 {
1321   PetscErrorCode ierr;
1322   PetscTruth     issymmetric,set;
1323 
1324   PetscFunctionBegin;
1325   ierr = MatIsSymmetricKnown(A,&set,&issymmetric); CHKERRQ(ierr);
1326   if (!set || !issymmetric) SETERRQ(PETSC_ERR_USER,"Matrix must be set as MAT_SYMMETRIC for CholeskyFactor()");
1327   F->ops->choleskyfactornumeric  = MatCholeskyFactorNumeric_MPIDense;
1328   PetscFunctionReturn(0);
1329 }
1330 
1331 /* Note the Petsc r and c permutations are ignored */
1332 #undef __FUNCT__
1333 #define __FUNCT__ "MatLUFactorSymbolic_MPIDense"
1334 PetscErrorCode MatLUFactorSymbolic_MPIDense(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
1335 {
1336   PetscErrorCode ierr;
1337   PetscInt       M = A->rmap->N;
1338   Mat_Plapack    *lu;
1339 
1340   PetscFunctionBegin;
1341   lu = (Mat_Plapack*)F->spptr;
1342   ierr = PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots);CHKERRQ(ierr);
1343   F->ops->lufactornumeric  = MatLUFactorNumeric_MPIDense;
1344   PetscFunctionReturn(0);
1345 }
1346 
1347 #undef __FUNCT__
1348 #define __FUNCT__ "MatGetFactor_mpidense_petsc"
1349 PetscErrorCode MatGetFactor_mpidense_petsc(Mat A,MatFactorType ftype,Mat *F)
1350 {
1351   PetscErrorCode ierr;
1352   Mat_Plapack    *lu;
1353   PetscMPIInt    size;
1354   PetscInt       M=A->rmap->N;
1355 
1356   PetscFunctionBegin;
1357   /* Create the factorization matrix */
1358   ierr = MatCreate(((PetscObject)A)->comm,F);CHKERRQ(ierr);
1359   ierr = MatSetSizes(*F,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1360   ierr = MatSetType(*F,((PetscObject)A)->type_name);CHKERRQ(ierr);
1361   ierr = PetscNewLog(*F,Mat_Plapack,&lu);CHKERRQ(ierr);
1362   (*F)->spptr = (void*)lu;
1363 
1364   /* Set default Plapack parameters */
1365   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
1366   lu->nb = M/size;
1367   if (M - lu->nb*size) lu->nb++; /* without cyclic distribution */
1368 
1369   /* Set runtime options */
1370   ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"PLAPACK Options","Mat");CHKERRQ(ierr);
1371     ierr = PetscOptionsInt("-mat_plapack_nb","block size of template vector","None",lu->nb,&lu->nb,PETSC_NULL);CHKERRQ(ierr);
1372   PetscOptionsEnd();
1373 
1374   /* Create object distribution template */
1375   lu->templ = NULL;
1376   ierr = PLA_Temp_create(lu->nb, 0, &lu->templ);CHKERRQ(ierr);
1377 
1378   /* Set the datatype */
1379 #if defined(PETSC_USE_COMPLEX)
1380   lu->datatype = MPI_DOUBLE_COMPLEX;
1381 #else
1382   lu->datatype = MPI_DOUBLE;
1383 #endif
1384 
1385   ierr = PLA_Matrix_create(lu->datatype,M,A->cmap->N,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);CHKERRQ(ierr);
1386 
1387 
1388   lu->pla_solved     = PETSC_FALSE; /* MatSolve_Plapack() is called yet */
1389   lu->mstruct        = DIFFERENT_NONZERO_PATTERN;
1390 
1391   if (ftype == MAT_FACTOR_LU) {
1392     (*F)->ops->lufactorsymbolic = MatLUFactorSymbolic_MPIDense;
1393   } else if (ftype == MAT_FACTOR_CHOLESKY) {
1394     (*F)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MPIDense;
1395   } else SETERRQ(PETSC_ERR_SUP,"No incomplete factorizations for dense matrices");
1396   (*F)->factor = ftype;
1397   PetscFunctionReturn(0);
1398 }
1399 #endif
1400 
1401 /* -------------------------------------------------------------------*/
1402 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
1403        MatGetRow_MPIDense,
1404        MatRestoreRow_MPIDense,
1405        MatMult_MPIDense,
1406 /* 4*/ MatMultAdd_MPIDense,
1407        MatMultTranspose_MPIDense,
1408        MatMultTransposeAdd_MPIDense,
1409        0,
1410        0,
1411        0,
1412 /*10*/ 0,
1413        0,
1414        0,
1415        0,
1416        MatTranspose_MPIDense,
1417 /*15*/ MatGetInfo_MPIDense,
1418        MatEqual_MPIDense,
1419        MatGetDiagonal_MPIDense,
1420        MatDiagonalScale_MPIDense,
1421        MatNorm_MPIDense,
1422 /*20*/ MatAssemblyBegin_MPIDense,
1423        MatAssemblyEnd_MPIDense,
1424        0,
1425        MatSetOption_MPIDense,
1426        MatZeroEntries_MPIDense,
1427 /*25*/ MatZeroRows_MPIDense,
1428        0,
1429        0,
1430        0,
1431        0,
1432 /*30*/ MatSetUpPreallocation_MPIDense,
1433        0,
1434        0,
1435        MatGetArray_MPIDense,
1436        MatRestoreArray_MPIDense,
1437 /*35*/ MatDuplicate_MPIDense,
1438        0,
1439        0,
1440        0,
1441        0,
1442 /*40*/ 0,
1443        MatGetSubMatrices_MPIDense,
1444        0,
1445        MatGetValues_MPIDense,
1446        0,
1447 /*45*/ 0,
1448        MatScale_MPIDense,
1449        0,
1450        0,
1451        0,
1452 /*50*/ 0,
1453        0,
1454        0,
1455        0,
1456        0,
1457 /*55*/ 0,
1458        0,
1459        0,
1460        0,
1461        0,
1462 /*60*/ MatGetSubMatrix_MPIDense,
1463        MatDestroy_MPIDense,
1464        MatView_MPIDense,
1465        0,
1466        0,
1467 /*65*/ 0,
1468        0,
1469        0,
1470        0,
1471        0,
1472 /*70*/ 0,
1473        0,
1474        0,
1475        0,
1476        0,
1477 /*75*/ 0,
1478        0,
1479        0,
1480        0,
1481        0,
1482 /*80*/ 0,
1483        0,
1484        0,
1485        0,
1486 /*84*/ MatLoad_MPIDense,
1487        0,
1488        0,
1489        0,
1490        0,
1491        0,
1492 /*90*/
1493 #if defined(PETSC_HAVE_PLAPACK)
1494        MatMatMult_MPIDense_MPIDense,
1495        MatMatMultSymbolic_MPIDense_MPIDense,
1496        MatMatMultNumeric_MPIDense_MPIDense,
1497 #else
1498        0,
1499        0,
1500        0,
1501 #endif
1502        0,
1503 /*95*/ 0,
1504        0,
1505        0,
1506        0};
1507 
1508 EXTERN_C_BEGIN
1509 #undef __FUNCT__
1510 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense"
1511 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1512 {
1513   Mat_MPIDense   *a;
1514   PetscErrorCode ierr;
1515 
1516   PetscFunctionBegin;
1517   mat->preallocated = PETSC_TRUE;
1518   /* Note:  For now, when data is specified above, this assumes the user correctly
1519    allocates the local dense storage space.  We should add error checking. */
1520 
1521   a    = (Mat_MPIDense*)mat->data;
1522   ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1523   ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr);
1524   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
1525   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1526   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1527   PetscFunctionReturn(0);
1528 }
1529 EXTERN_C_END
1530 
1531 /*MC
1532    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1533 
1534    Options Database Keys:
1535 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions()
1536 
1537   Level: beginner
1538 
1539   MATMPIDENSE matrices may use direct solvers (LU, Cholesky, and QR)
1540   for parallel dense matrices via the external package PLAPACK, if PLAPACK is installed
1541   (run config/configure.py with the option --download-plapack)
1542 
1543 
1544   Options Database Keys:
1545 . -mat_plapack_nprows <n> - number of rows in processor partition
1546 . -mat_plapack_npcols <n> - number of columns in processor partition
1547 . -mat_plapack_nb <n> - block size of template vector
1548 . -mat_plapack_nb_alg <n> - algorithmic block size
1549 - -mat_plapack_ckerror <n> - error checking flag
1550 
1551 .seealso: MatCreateMPIDense(), MATDENSE, MATSEQDENSE
1552 M*/
1553 
1554 EXTERN_C_BEGIN
1555 #undef __FUNCT__
1556 #define __FUNCT__ "MatCreate_MPIDense"
1557 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIDense(Mat mat)
1558 {
1559   Mat_MPIDense   *a;
1560   PetscErrorCode ierr;
1561 
1562   PetscFunctionBegin;
1563   ierr              = PetscNewLog(mat,Mat_MPIDense,&a);CHKERRQ(ierr);
1564   mat->data         = (void*)a;
1565   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1566   mat->mapping      = 0;
1567 
1568   mat->insertmode = NOT_SET_VALUES;
1569   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&a->rank);CHKERRQ(ierr);
1570   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&a->size);CHKERRQ(ierr);
1571 
1572   mat->rmap->bs = mat->cmap->bs = 1;
1573   ierr = PetscMapSetUp(mat->rmap);CHKERRQ(ierr);
1574   ierr = PetscMapSetUp(mat->cmap);CHKERRQ(ierr);
1575   a->nvec = mat->cmap->n;
1576 
1577   /* build cache for off array entries formed */
1578   a->donotstash = PETSC_FALSE;
1579   ierr = MatStashCreate_Private(((PetscObject)mat)->comm,1,&mat->stash);CHKERRQ(ierr);
1580 
1581   /* stuff used for matrix vector multiply */
1582   a->lvec        = 0;
1583   a->Mvctx       = 0;
1584   a->roworiented = PETSC_TRUE;
1585 
1586   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
1587                                      "MatGetDiagonalBlock_MPIDense",
1588                                      MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr);
1589   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C",
1590                                      "MatMPIDenseSetPreallocation_MPIDense",
1591                                      MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1592   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",
1593                                      "MatMatMult_MPIAIJ_MPIDense",
1594                                       MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1595   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",
1596                                      "MatMatMultSymbolic_MPIAIJ_MPIDense",
1597                                       MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1598   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",
1599                                      "MatMatMultNumeric_MPIAIJ_MPIDense",
1600                                       MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1601 #if defined(PETSC_HAVE_PLAPACK)
1602   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetFactor_mpidense_petsc_C",
1603                                      "MatGetFactor_mpidense_petsc",
1604                                       MatGetFactor_mpidense_petsc);CHKERRQ(ierr);
1605 #if defined(PETSC_HAVE_PLAPACK)
1606   ierr = PetscPLAPACKInitializePackage(((PetscObject)mat)->comm);CHKERRQ(ierr);
1607 #endif
1608 
1609 #endif
1610   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1611 
1612   PetscFunctionReturn(0);
1613 }
1614 EXTERN_C_END
1615 
1616 /*MC
1617    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1618 
1619    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1620    and MATMPIDENSE otherwise.
1621 
1622    Options Database Keys:
1623 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1624 
1625   Level: beginner
1626 
1627 
1628 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1629 M*/
1630 
1631 EXTERN_C_BEGIN
1632 #undef __FUNCT__
1633 #define __FUNCT__ "MatCreate_Dense"
1634 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_Dense(Mat A)
1635 {
1636   PetscErrorCode ierr;
1637   PetscMPIInt    size;
1638 
1639   PetscFunctionBegin;
1640   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
1641   if (size == 1) {
1642     ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr);
1643   } else {
1644     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
1645   }
1646   PetscFunctionReturn(0);
1647 }
1648 EXTERN_C_END
1649 
1650 #undef __FUNCT__
1651 #define __FUNCT__ "MatMPIDenseSetPreallocation"
1652 /*@C
1653    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1654 
1655    Not collective
1656 
1657    Input Parameters:
1658 .  A - the matrix
1659 -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
1660    to control all matrix memory allocation.
1661 
1662    Notes:
1663    The dense format is fully compatible with standard Fortran 77
1664    storage by columns.
1665 
1666    The data input variable is intended primarily for Fortran programmers
1667    who wish to allocate their own matrix memory space.  Most users should
1668    set data=PETSC_NULL.
1669 
1670    Level: intermediate
1671 
1672 .keywords: matrix,dense, parallel
1673 
1674 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1675 @*/
1676 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data)
1677 {
1678   PetscErrorCode ierr,(*f)(Mat,PetscScalar *);
1679 
1680   PetscFunctionBegin;
1681   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1682   if (f) {
1683     ierr = (*f)(mat,data);CHKERRQ(ierr);
1684   }
1685   PetscFunctionReturn(0);
1686 }
1687 
1688 #undef __FUNCT__
1689 #define __FUNCT__ "MatCreateMPIDense"
1690 /*@C
1691    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
1692 
1693    Collective on MPI_Comm
1694 
1695    Input Parameters:
1696 +  comm - MPI communicator
1697 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1698 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1699 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1700 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1701 -  data - optional location of matrix data.  Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1702    to control all matrix memory allocation.
1703 
1704    Output Parameter:
1705 .  A - the matrix
1706 
1707    Notes:
1708    The dense format is fully compatible with standard Fortran 77
1709    storage by columns.
1710 
1711    The data input variable is intended primarily for Fortran programmers
1712    who wish to allocate their own matrix memory space.  Most users should
1713    set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users).
1714 
1715    The user MUST specify either the local or global matrix dimensions
1716    (possibly both).
1717 
1718    Level: intermediate
1719 
1720 .keywords: matrix,dense, parallel
1721 
1722 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1723 @*/
1724 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1725 {
1726   PetscErrorCode ierr;
1727   PetscMPIInt    size;
1728 
1729   PetscFunctionBegin;
1730   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1731   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1732   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1733   if (size > 1) {
1734     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1735     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1736   } else {
1737     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1738     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1739   }
1740   PetscFunctionReturn(0);
1741 }
1742 
1743 #undef __FUNCT__
1744 #define __FUNCT__ "MatDuplicate_MPIDense"
1745 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1746 {
1747   Mat            mat;
1748   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
1749   PetscErrorCode ierr;
1750 
1751   PetscFunctionBegin;
1752   *newmat       = 0;
1753   ierr = MatCreate(((PetscObject)A)->comm,&mat);CHKERRQ(ierr);
1754   ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1755   ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1756   a                 = (Mat_MPIDense*)mat->data;
1757   ierr              = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1758   mat->factor       = A->factor;
1759   mat->assembled    = PETSC_TRUE;
1760   mat->preallocated = PETSC_TRUE;
1761 
1762   mat->rmap->rstart     = A->rmap->rstart;
1763   mat->rmap->rend       = A->rmap->rend;
1764   a->size              = oldmat->size;
1765   a->rank              = oldmat->rank;
1766   mat->insertmode      = NOT_SET_VALUES;
1767   a->nvec              = oldmat->nvec;
1768   a->donotstash        = oldmat->donotstash;
1769 
1770   ierr = PetscMemcpy(mat->rmap->range,A->rmap->range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
1771   ierr = PetscMemcpy(mat->cmap->range,A->cmap->range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
1772   ierr = MatStashCreate_Private(((PetscObject)A)->comm,1,&mat->stash);CHKERRQ(ierr);
1773 
1774   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1775   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1776   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1777 
1778   *newmat = mat;
1779   PetscFunctionReturn(0);
1780 }
1781 
1782 #include "petscsys.h"
1783 
1784 #undef __FUNCT__
1785 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile"
1786 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N, const MatType type,Mat *newmat)
1787 {
1788   PetscErrorCode ierr;
1789   PetscMPIInt    rank,size;
1790   PetscInt       *rowners,i,m,nz,j;
1791   PetscScalar    *array,*vals,*vals_ptr;
1792   MPI_Status     status;
1793 
1794   PetscFunctionBegin;
1795   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1796   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1797 
1798   /* determine ownership of all rows */
1799   m          = M/size + ((M % size) > rank);
1800   ierr       = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1801   ierr       = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
1802   rowners[0] = 0;
1803   for (i=2; i<=size; i++) {
1804     rowners[i] += rowners[i-1];
1805   }
1806 
1807   ierr = MatCreate(comm,newmat);CHKERRQ(ierr);
1808   ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1809   ierr = MatSetType(*newmat,type);CHKERRQ(ierr);
1810   ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr);
1811   ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr);
1812 
1813   if (!rank) {
1814     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1815 
1816     /* read in my part of the matrix numerical values  */
1817     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1818 
1819     /* insert into matrix-by row (this is why cannot directly read into array */
1820     vals_ptr = vals;
1821     for (i=0; i<m; i++) {
1822       for (j=0; j<N; j++) {
1823         array[i + j*m] = *vals_ptr++;
1824       }
1825     }
1826 
1827     /* read in other processors and ship out */
1828     for (i=1; i<size; i++) {
1829       nz   = (rowners[i+1] - rowners[i])*N;
1830       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1831       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(*newmat))->tag,comm);CHKERRQ(ierr);
1832     }
1833   } else {
1834     /* receive numeric values */
1835     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1836 
1837     /* receive message of values*/
1838     ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(*newmat))->tag,comm,&status);CHKERRQ(ierr);
1839 
1840     /* insert into matrix-by row (this is why cannot directly read into array */
1841     vals_ptr = vals;
1842     for (i=0; i<m; i++) {
1843       for (j=0; j<N; j++) {
1844         array[i + j*m] = *vals_ptr++;
1845       }
1846     }
1847   }
1848   ierr = PetscFree(rowners);CHKERRQ(ierr);
1849   ierr = PetscFree(vals);CHKERRQ(ierr);
1850   ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1851   ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1852   PetscFunctionReturn(0);
1853 }
1854 
1855 #undef __FUNCT__
1856 #define __FUNCT__ "MatLoad_MPIDense"
1857 PetscErrorCode MatLoad_MPIDense(PetscViewer viewer,const MatType type,Mat *newmat)
1858 {
1859   Mat            A;
1860   PetscScalar    *vals,*svals;
1861   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1862   MPI_Status     status;
1863   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz;
1864   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1865   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1866   PetscInt       i,nz,j,rstart,rend;
1867   int            fd;
1868   PetscErrorCode ierr;
1869 
1870   PetscFunctionBegin;
1871   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1872   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1873   if (!rank) {
1874     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1875     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1876     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1877   }
1878 
1879   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1880   M = header[1]; N = header[2]; nz = header[3];
1881 
1882   /*
1883        Handle case where matrix is stored on disk as a dense matrix
1884   */
1885   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1886     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,type,newmat);CHKERRQ(ierr);
1887     PetscFunctionReturn(0);
1888   }
1889 
1890   /* determine ownership of all rows */
1891   m          = PetscMPIIntCast(M/size + ((M % size) > rank));
1892   ierr       = PetscMalloc((size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr);
1893   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1894   rowners[0] = 0;
1895   for (i=2; i<=size; i++) {
1896     rowners[i] += rowners[i-1];
1897   }
1898   rstart = rowners[rank];
1899   rend   = rowners[rank+1];
1900 
1901   /* distribute row lengths to all processors */
1902   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&ourlens);CHKERRQ(ierr);
1903   offlens = ourlens + (rend-rstart);
1904   if (!rank) {
1905     ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
1906     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1907     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
1908     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1909     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1910     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1911   } else {
1912     ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1913   }
1914 
1915   if (!rank) {
1916     /* calculate the number of nonzeros on each processor */
1917     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
1918     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1919     for (i=0; i<size; i++) {
1920       for (j=rowners[i]; j< rowners[i+1]; j++) {
1921         procsnz[i] += rowlengths[j];
1922       }
1923     }
1924     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1925 
1926     /* determine max buffer needed and allocate it */
1927     maxnz = 0;
1928     for (i=0; i<size; i++) {
1929       maxnz = PetscMax(maxnz,procsnz[i]);
1930     }
1931     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1932 
1933     /* read in my part of the matrix column indices  */
1934     nz = procsnz[0];
1935     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1936     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1937 
1938     /* read in every one elses and ship off */
1939     for (i=1; i<size; i++) {
1940       nz   = procsnz[i];
1941       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1942       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1943     }
1944     ierr = PetscFree(cols);CHKERRQ(ierr);
1945   } else {
1946     /* determine buffer space needed for message */
1947     nz = 0;
1948     for (i=0; i<m; i++) {
1949       nz += ourlens[i];
1950     }
1951     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1952 
1953     /* receive message of column indices*/
1954     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1955     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1956     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1957   }
1958 
1959   /* loop over local rows, determining number of off diagonal entries */
1960   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
1961   jj = 0;
1962   for (i=0; i<m; i++) {
1963     for (j=0; j<ourlens[i]; j++) {
1964       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1965       jj++;
1966     }
1967   }
1968 
1969   /* create our matrix */
1970   for (i=0; i<m; i++) {
1971     ourlens[i] -= offlens[i];
1972   }
1973   ierr = MatCreate(comm,newmat);CHKERRQ(ierr);
1974   ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1975   ierr = MatSetType(*newmat,type);CHKERRQ(ierr);
1976   ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr);
1977   A = *newmat;
1978   for (i=0; i<m; i++) {
1979     ourlens[i] += offlens[i];
1980   }
1981 
1982   if (!rank) {
1983     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1984 
1985     /* read in my part of the matrix numerical values  */
1986     nz = procsnz[0];
1987     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1988 
1989     /* insert into matrix */
1990     jj      = rstart;
1991     smycols = mycols;
1992     svals   = vals;
1993     for (i=0; i<m; i++) {
1994       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1995       smycols += ourlens[i];
1996       svals   += ourlens[i];
1997       jj++;
1998     }
1999 
2000     /* read in other processors and ship out */
2001     for (i=1; i<size; i++) {
2002       nz   = procsnz[i];
2003       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2004       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
2005     }
2006     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2007   } else {
2008     /* receive numeric values */
2009     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2010 
2011     /* receive message of values*/
2012     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
2013     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2014     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2015 
2016     /* insert into matrix */
2017     jj      = rstart;
2018     smycols = mycols;
2019     svals   = vals;
2020     for (i=0; i<m; i++) {
2021       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2022       smycols += ourlens[i];
2023       svals   += ourlens[i];
2024       jj++;
2025     }
2026   }
2027   ierr = PetscFree(ourlens);CHKERRQ(ierr);
2028   ierr = PetscFree(vals);CHKERRQ(ierr);
2029   ierr = PetscFree(mycols);CHKERRQ(ierr);
2030   ierr = PetscFree(rowners);CHKERRQ(ierr);
2031 
2032   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2033   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2034   PetscFunctionReturn(0);
2035 }
2036 
2037 #undef __FUNCT__
2038 #define __FUNCT__ "MatEqual_MPIDense"
2039 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscTruth *flag)
2040 {
2041   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
2042   Mat            a,b;
2043   PetscTruth     flg;
2044   PetscErrorCode ierr;
2045 
2046   PetscFunctionBegin;
2047   a = matA->A;
2048   b = matB->A;
2049   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
2050   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
2051   PetscFunctionReturn(0);
2052 }
2053 
2054 #if defined(PETSC_HAVE_PLAPACK)
2055 
2056 #undef __FUNCT__
2057 #define __FUNCT__ "PetscPLAPACKFinalizePackage"
2058 /*@C
2059   PetscPLAPACKFinalizePackage - This function destroys everything in the Petsc interface to PLAPACK.
2060   Level: developer
2061 
2062 .keywords: Petsc, destroy, package, PLAPACK
2063 .seealso: PetscFinalize()
2064 @*/
2065 PetscErrorCode PETSC_DLLEXPORT PetscPLAPACKFinalizePackage(void)
2066 {
2067   PetscErrorCode ierr;
2068 
2069   PetscFunctionBegin;
2070   ierr = PLA_Finalize();CHKERRQ(ierr);
2071   PetscFunctionReturn(0);
2072 }
2073 
2074 #undef __FUNCT__
2075 #define __FUNCT__ "PetscPLAPACKInitializePackage"
2076 /*@C
2077   PetscPLAPACKInitializePackage - This function initializes everything in the Petsc interface to PLAPACK. It is
2078   called from MatCreate_MPIDense() the first time an MPI dense matrix is called.
2079 
2080   Input Parameter:
2081 .   comm - the communicator the matrix lives on
2082 
2083   Level: developer
2084 
2085    Notes: PLAPACK does not have a good fit with MPI communicators; all (parallel) PLAPACK objects have to live in the
2086   same communicator (because there is some global state that is initialized and used for all matrices). In addition if
2087   PLAPACK is initialized (that is the initial matrices created) are on subcommunicators of MPI_COMM_WORLD, these subcommunicators
2088   cannot overlap.
2089 
2090 .keywords: Petsc, initialize, package, PLAPACK
2091 .seealso: PetscInitializePackage(), PetscInitialize()
2092 @*/
2093 PetscErrorCode PETSC_DLLEXPORT PetscPLAPACKInitializePackage(MPI_Comm comm)
2094 {
2095   PetscMPIInt    size;
2096   PetscErrorCode ierr;
2097 
2098   PetscFunctionBegin;
2099   if (!PLA_Initialized(PETSC_NULL)) {
2100 
2101     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2102     Plapack_nprows = 1;
2103     Plapack_npcols = size;
2104 
2105     ierr = PetscOptionsBegin(comm,PETSC_NULL,"PLAPACK Options","Mat");CHKERRQ(ierr);
2106       ierr = PetscOptionsInt("-mat_plapack_nprows","row dimension of 2D processor mesh","None",Plapack_nprows,&Plapack_nprows,PETSC_NULL);CHKERRQ(ierr);
2107       ierr = PetscOptionsInt("-mat_plapack_npcols","column dimension of 2D processor mesh","None",Plapack_npcols,&Plapack_npcols,PETSC_NULL);CHKERRQ(ierr);
2108 #if defined(PETSC_USE_DEBUG)
2109       Plapack_ierror = 3;
2110 #else
2111       Plapack_ierror = 0;
2112 #endif
2113       ierr = PetscOptionsInt("-mat_plapack_ckerror","error checking flag","None",Plapack_ierror,&Plapack_ierror,PETSC_NULL);CHKERRQ(ierr);
2114       if (Plapack_ierror){
2115 	ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_TRUE,PETSC_TRUE,PETSC_FALSE );CHKERRQ(ierr);
2116       } else {
2117 	ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_FALSE,PETSC_FALSE,PETSC_FALSE );CHKERRQ(ierr);
2118       }
2119 
2120       Plapack_nb_alg = 0;
2121       ierr = PetscOptionsInt("-mat_plapack_nb_alg","algorithmic block size","None",Plapack_nb_alg,&Plapack_nb_alg,PETSC_NULL);CHKERRQ(ierr);
2122       if (Plapack_nb_alg) {
2123         ierr = pla_Environ_set_nb_alg (PLA_OP_ALL_ALG,Plapack_nb_alg);CHKERRQ(ierr);
2124       }
2125     PetscOptionsEnd();
2126 
2127     ierr = PLA_Comm_1D_to_2D(comm,Plapack_nprows,Plapack_npcols,&Plapack_comm_2d);CHKERRQ(ierr);
2128     ierr = PLA_Init(Plapack_comm_2d);CHKERRQ(ierr);
2129     ierr = PetscRegisterFinalize(PetscPLAPACKFinalizePackage);CHKERRQ(ierr);
2130   }
2131   PetscFunctionReturn(0);
2132 }
2133 
2134 #endif
2135