xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 704ba839cd484dc2900d48fe4907f851bfd76db1)
1 #define PETSCMAT_DLL
2 
3 /*
4    Basic functions for basic parallel dense matrices.
5 */
6 
7 
8 #include "src/mat/impls/dense/mpi/mpidense.h"    /*I   "petscmat.h"  I*/
9 #if defined(PETSC_HAVE_PLAPACK)
10 static PetscMPIInt Plapack_nprows,Plapack_npcols,Plapack_ierror,Plapack_nb_alg;
11 static MPI_Comm Plapack_comm_2d;
12 #endif
13 
14 #undef __FUNCT__
15 #define __FUNCT__ "MatDenseGetLocalMatrix"
16 /*@
17 
18       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
19               matrix that represents the operator. For sequential matrices it returns itself.
20 
21     Input Parameter:
22 .      A - the Seq or MPI dense matrix
23 
24     Output Parameter:
25 .      B - the inner matrix
26 
27     Level: intermediate
28 
29 @*/
30 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
31 {
32   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
33   PetscErrorCode ierr;
34   PetscTruth     flg;
35 
36   PetscFunctionBegin;
37   ierr = PetscTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
38   if (flg) {
39     *B = mat->A;
40   } else {
41     *B = A;
42   }
43   PetscFunctionReturn(0);
44 }
45 
46 #undef __FUNCT__
47 #define __FUNCT__ "MatGetRow_MPIDense"
48 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
49 {
50   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
51   PetscErrorCode ierr;
52   PetscInt       lrow,rstart = A->rmap.rstart,rend = A->rmap.rend;
53 
54   PetscFunctionBegin;
55   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows")
56   lrow = row - rstart;
57   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr);
58   PetscFunctionReturn(0);
59 }
60 
61 #undef __FUNCT__
62 #define __FUNCT__ "MatRestoreRow_MPIDense"
63 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
64 {
65   PetscErrorCode ierr;
66 
67   PetscFunctionBegin;
68   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
69   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
70   PetscFunctionReturn(0);
71 }
72 
73 EXTERN_C_BEGIN
74 #undef __FUNCT__
75 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense"
76 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B)
77 {
78   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
79   PetscErrorCode ierr;
80   PetscInt       m = A->rmap.n,rstart = A->rmap.rstart;
81   PetscScalar    *array;
82   MPI_Comm       comm;
83 
84   PetscFunctionBegin;
85   if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported.");
86 
87   /* The reuse aspect is not implemented efficiently */
88   if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);}
89 
90   ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
91   ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr);
92   ierr = MatCreate(comm,B);CHKERRQ(ierr);
93   ierr = MatSetSizes(*B,m,m,m,m);CHKERRQ(ierr);
94   ierr = MatSetType(*B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
95   ierr = MatSeqDenseSetPreallocation(*B,array+m*rstart);CHKERRQ(ierr);
96   ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr);
97   ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
98   ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
99 
100   *iscopy = PETSC_TRUE;
101   PetscFunctionReturn(0);
102 }
103 EXTERN_C_END
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatSetValues_MPIDense"
107 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
108 {
109   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
110   PetscErrorCode ierr;
111   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend,row;
112   PetscTruth     roworiented = A->roworiented;
113 
114   PetscFunctionBegin;
115   for (i=0; i<m; i++) {
116     if (idxm[i] < 0) continue;
117     if (idxm[i] >= mat->rmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
118     if (idxm[i] >= rstart && idxm[i] < rend) {
119       row = idxm[i] - rstart;
120       if (roworiented) {
121         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
122       } else {
123         for (j=0; j<n; j++) {
124           if (idxn[j] < 0) continue;
125           if (idxn[j] >= mat->cmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
126           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
127         }
128       }
129     } else {
130       if (!A->donotstash) {
131         if (roworiented) {
132           ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr);
133         } else {
134           ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr);
135         }
136       }
137     }
138   }
139   PetscFunctionReturn(0);
140 }
141 
142 #undef __FUNCT__
143 #define __FUNCT__ "MatGetValues_MPIDense"
144 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
145 {
146   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
147   PetscErrorCode ierr;
148   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend,row;
149 
150   PetscFunctionBegin;
151   for (i=0; i<m; i++) {
152     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
153     if (idxm[i] >= mat->rmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
154     if (idxm[i] >= rstart && idxm[i] < rend) {
155       row = idxm[i] - rstart;
156       for (j=0; j<n; j++) {
157         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
158         if (idxn[j] >= mat->cmap.N) {
159           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
160         }
161         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
162       }
163     } else {
164       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
165     }
166   }
167   PetscFunctionReturn(0);
168 }
169 
170 #undef __FUNCT__
171 #define __FUNCT__ "MatGetArray_MPIDense"
172 PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[])
173 {
174   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
175   PetscErrorCode ierr;
176 
177   PetscFunctionBegin;
178   ierr = MatGetArray(a->A,array);CHKERRQ(ierr);
179   PetscFunctionReturn(0);
180 }
181 
182 #undef __FUNCT__
183 #define __FUNCT__ "MatGetSubMatrix_MPIDense"
184 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B)
185 {
186   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data,*newmatd;
187   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
188   PetscErrorCode ierr;
189   PetscInt       i,j,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols;
190   PetscScalar    *av,*bv,*v = lmat->v;
191   Mat            newmat;
192 
193   PetscFunctionBegin;
194   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
195   ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr);
196   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
197   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
198 
199   /* No parallel redistribution currently supported! Should really check each index set
200      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
201      original matrix! */
202 
203   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
204   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
205 
206   /* Check submatrix call */
207   if (scall == MAT_REUSE_MATRIX) {
208     /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
209     /* Really need to test rows and column sizes! */
210     newmat = *B;
211   } else {
212     /* Create and fill new matrix */
213     ierr = MatCreate(((PetscObject)A)->comm,&newmat);CHKERRQ(ierr);
214     ierr = MatSetSizes(newmat,nrows,cs,PETSC_DECIDE,ncols);CHKERRQ(ierr);
215     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
216     ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr);
217   }
218 
219   /* Now extract the data pointers and do the copy, column at a time */
220   newmatd = (Mat_MPIDense*)newmat->data;
221   bv      = ((Mat_SeqDense *)newmatd->A->data)->v;
222 
223   for (i=0; i<ncols; i++) {
224     av = v + nlrows*icol[i];
225     for (j=0; j<nrows; j++) {
226       *bv++ = av[irow[j] - rstart];
227     }
228   }
229 
230   /* Assemble the matrices so that the correct flags are set */
231   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
232   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
233 
234   /* Free work space */
235   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
236   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
237   *B = newmat;
238   PetscFunctionReturn(0);
239 }
240 
241 #undef __FUNCT__
242 #define __FUNCT__ "MatRestoreArray_MPIDense"
243 PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[])
244 {
245   PetscFunctionBegin;
246   PetscFunctionReturn(0);
247 }
248 
249 #undef __FUNCT__
250 #define __FUNCT__ "MatAssemblyBegin_MPIDense"
251 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
252 {
253   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
254   MPI_Comm       comm = ((PetscObject)mat)->comm;
255   PetscErrorCode ierr;
256   PetscInt       nstash,reallocs;
257   InsertMode     addv;
258 
259   PetscFunctionBegin;
260   /* make sure all processors are either in INSERTMODE or ADDMODE */
261   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
262   if (addv == (ADD_VALUES|INSERT_VALUES)) {
263     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
264   }
265   mat->insertmode = addv; /* in case this processor had no cache */
266 
267   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap.range);CHKERRQ(ierr);
268   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
269   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
270   PetscFunctionReturn(0);
271 }
272 
273 #undef __FUNCT__
274 #define __FUNCT__ "MatAssemblyEnd_MPIDense"
275 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
276 {
277   Mat_MPIDense    *mdn=(Mat_MPIDense*)mat->data;
278   PetscErrorCode  ierr;
279   PetscInt        i,*row,*col,flg,j,rstart,ncols;
280   PetscMPIInt     n;
281   PetscScalar     *val;
282   InsertMode      addv=mat->insertmode;
283 
284   PetscFunctionBegin;
285   /*  wait on receives */
286   while (1) {
287     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
288     if (!flg) break;
289 
290     for (i=0; i<n;) {
291       /* Now identify the consecutive vals belonging to the same row */
292       for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
293       if (j < n) ncols = j-i;
294       else       ncols = n-i;
295       /* Now assemble all these values with a single function call */
296       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
297       i = j;
298     }
299   }
300   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
301 
302   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
303   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
304 
305   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
306     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
307   }
308   PetscFunctionReturn(0);
309 }
310 
311 #undef __FUNCT__
312 #define __FUNCT__ "MatZeroEntries_MPIDense"
313 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
314 {
315   PetscErrorCode ierr;
316   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
317 
318   PetscFunctionBegin;
319   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
320   PetscFunctionReturn(0);
321 }
322 
323 /* the code does not do the diagonal entries correctly unless the
324    matrix is square and the column and row owerships are identical.
325    This is a BUG. The only way to fix it seems to be to access
326    mdn->A and mdn->B directly and not through the MatZeroRows()
327    routine.
328 */
329 #undef __FUNCT__
330 #define __FUNCT__ "MatZeroRows_MPIDense"
331 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
332 {
333   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
334   PetscErrorCode ierr;
335   PetscInt       i,*owners = A->rmap.range;
336   PetscInt       *nprocs,j,idx,nsends;
337   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
338   PetscInt       *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source;
339   PetscInt       *lens,*lrows,*values;
340   PetscMPIInt    n,imdex,rank = l->rank,size = l->size;
341   MPI_Comm       comm = ((PetscObject)A)->comm;
342   MPI_Request    *send_waits,*recv_waits;
343   MPI_Status     recv_status,*send_status;
344   PetscTruth     found;
345 
346   PetscFunctionBegin;
347   /*  first count number of contributors to each processor */
348   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
349   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
350   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
351   for (i=0; i<N; i++) {
352     idx = rows[i];
353     found = PETSC_FALSE;
354     for (j=0; j<size; j++) {
355       if (idx >= owners[j] && idx < owners[j+1]) {
356         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
357       }
358     }
359     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
360   }
361   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
362 
363   /* inform other processors of number of messages and max length*/
364   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
365 
366   /* post receives:   */
367   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
368   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
369   for (i=0; i<nrecvs; i++) {
370     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
371   }
372 
373   /* do sends:
374       1) starts[i] gives the starting index in svalues for stuff going to
375          the ith processor
376   */
377   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
378   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
379   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
380   starts[0]  = 0;
381   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
382   for (i=0; i<N; i++) {
383     svalues[starts[owner[i]]++] = rows[i];
384   }
385 
386   starts[0] = 0;
387   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
388   count = 0;
389   for (i=0; i<size; i++) {
390     if (nprocs[2*i+1]) {
391       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
392     }
393   }
394   ierr = PetscFree(starts);CHKERRQ(ierr);
395 
396   base = owners[rank];
397 
398   /*  wait on receives */
399   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
400   source = lens + nrecvs;
401   count  = nrecvs; slen = 0;
402   while (count) {
403     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
404     /* unpack receives into our local space */
405     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
406     source[imdex]  = recv_status.MPI_SOURCE;
407     lens[imdex]    = n;
408     slen += n;
409     count--;
410   }
411   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
412 
413   /* move the data into the send scatter */
414   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
415   count = 0;
416   for (i=0; i<nrecvs; i++) {
417     values = rvalues + i*nmax;
418     for (j=0; j<lens[i]; j++) {
419       lrows[count++] = values[j] - base;
420     }
421   }
422   ierr = PetscFree(rvalues);CHKERRQ(ierr);
423   ierr = PetscFree(lens);CHKERRQ(ierr);
424   ierr = PetscFree(owner);CHKERRQ(ierr);
425   ierr = PetscFree(nprocs);CHKERRQ(ierr);
426 
427   /* actually zap the local rows */
428   ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
429   ierr = PetscFree(lrows);CHKERRQ(ierr);
430 
431   /* wait on sends */
432   if (nsends) {
433     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
434     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
435     ierr = PetscFree(send_status);CHKERRQ(ierr);
436   }
437   ierr = PetscFree(send_waits);CHKERRQ(ierr);
438   ierr = PetscFree(svalues);CHKERRQ(ierr);
439 
440   PetscFunctionReturn(0);
441 }
442 
443 #undef __FUNCT__
444 #define __FUNCT__ "MatMult_MPIDense"
445 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
446 {
447   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
448   PetscErrorCode ierr;
449 
450   PetscFunctionBegin;
451   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
452   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
453   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
454   PetscFunctionReturn(0);
455 }
456 
457 #undef __FUNCT__
458 #define __FUNCT__ "MatMultAdd_MPIDense"
459 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
460 {
461   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
462   PetscErrorCode ierr;
463 
464   PetscFunctionBegin;
465   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
466   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
467   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
468   PetscFunctionReturn(0);
469 }
470 
471 #undef __FUNCT__
472 #define __FUNCT__ "MatMultTranspose_MPIDense"
473 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
474 {
475   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
476   PetscErrorCode ierr;
477   PetscScalar    zero = 0.0;
478 
479   PetscFunctionBegin;
480   ierr = VecSet(yy,zero);CHKERRQ(ierr);
481   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
482   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
483   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
484   PetscFunctionReturn(0);
485 }
486 
487 #undef __FUNCT__
488 #define __FUNCT__ "MatMultTransposeAdd_MPIDense"
489 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
490 {
491   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
492   PetscErrorCode ierr;
493 
494   PetscFunctionBegin;
495   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
496   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
497   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
498   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
499   PetscFunctionReturn(0);
500 }
501 
502 #undef __FUNCT__
503 #define __FUNCT__ "MatGetDiagonal_MPIDense"
504 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
505 {
506   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
507   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
508   PetscErrorCode ierr;
509   PetscInt       len,i,n,m = A->rmap.n,radd;
510   PetscScalar    *x,zero = 0.0;
511 
512   PetscFunctionBegin;
513   ierr = VecSet(v,zero);CHKERRQ(ierr);
514   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
515   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
516   if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
517   len  = PetscMin(a->A->rmap.n,a->A->cmap.n);
518   radd = A->rmap.rstart*m;
519   for (i=0; i<len; i++) {
520     x[i] = aloc->v[radd + i*m + i];
521   }
522   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
523   PetscFunctionReturn(0);
524 }
525 
526 #undef __FUNCT__
527 #define __FUNCT__ "MatDestroy_MPIDense"
528 PetscErrorCode MatDestroy_MPIDense(Mat mat)
529 {
530   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
531   PetscErrorCode ierr;
532 #if defined(PETSC_HAVE_PLAPACK)
533   Mat_Plapack   *lu=(Mat_Plapack*)(mat->spptr);
534 #endif
535 
536   PetscFunctionBegin;
537 
538 #if defined(PETSC_USE_LOG)
539   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N);
540 #endif
541   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
542   ierr = MatDestroy(mdn->A);CHKERRQ(ierr);
543   if (mdn->lvec)   {ierr = VecDestroy(mdn->lvec);CHKERRQ(ierr);}
544   if (mdn->Mvctx)  {ierr = VecScatterDestroy(mdn->Mvctx);CHKERRQ(ierr);}
545 #if defined(PETSC_HAVE_PLAPACK)
546   if (lu) {
547     ierr = PLA_Obj_free(&lu->A);CHKERRQ(ierr);
548     ierr = PLA_Obj_free (&lu->pivots);CHKERRQ(ierr);
549     ierr = PLA_Temp_free(&lu->templ);CHKERRQ(ierr);
550 
551     if (lu->is_pla) {
552       ierr = ISDestroy(lu->is_pla);CHKERRQ(ierr);
553       ierr = ISDestroy(lu->is_petsc);CHKERRQ(ierr);
554       ierr = VecScatterDestroy(lu->ctx);CHKERRQ(ierr);
555     }
556   }
557 #endif
558 
559   ierr = PetscFree(mdn);CHKERRQ(ierr);
560   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
561   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
562   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
563   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
564   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
565   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
566   PetscFunctionReturn(0);
567 }
568 
569 #undef __FUNCT__
570 #define __FUNCT__ "MatView_MPIDense_Binary"
571 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
572 {
573   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
574   PetscErrorCode    ierr;
575   PetscViewerFormat format;
576   int               fd;
577   PetscInt          header[4],mmax,N = mat->cmap.N,i,j,m,k;
578   PetscMPIInt       rank,tag  = ((PetscObject)viewer)->tag,size;
579   PetscScalar       *work,*v,*vv;
580   Mat_SeqDense      *a = (Mat_SeqDense*)mdn->A->data;
581   MPI_Status        status;
582 
583   PetscFunctionBegin;
584   if (mdn->size == 1) {
585     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
586   } else {
587     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
588     ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
589     ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
590 
591     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
592     if (format == PETSC_VIEWER_BINARY_NATIVE) {
593 
594       if (!rank) {
595 	/* store the matrix as a dense matrix */
596 	header[0] = MAT_FILE_COOKIE;
597 	header[1] = mat->rmap.N;
598 	header[2] = N;
599 	header[3] = MATRIX_BINARY_FORMAT_DENSE;
600 	ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
601 
602 	/* get largest work array needed for transposing array */
603         mmax = mat->rmap.n;
604         for (i=1; i<size; i++) {
605           mmax = PetscMax(mmax,mat->rmap.range[i+1] - mat->rmap.range[i]);
606         }
607 	ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&work);CHKERRQ(ierr);
608 
609 	/* write out local array, by rows */
610         m    = mat->rmap.n;
611 	v    = a->v;
612         for (j=0; j<N; j++) {
613           for (i=0; i<m; i++) {
614 	    work[j + i*N] = *v++;
615 	  }
616 	}
617 	ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
618         /* get largest work array to receive messages from other processes, excludes process zero */
619         mmax = 0;
620         for (i=1; i<size; i++) {
621           mmax = PetscMax(mmax,mat->rmap.range[i+1] - mat->rmap.range[i]);
622         }
623 	ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&vv);CHKERRQ(ierr);
624         v = vv;
625         for (k=1; k<size; k++) {
626           m    = mat->rmap.range[k+1] - mat->rmap.range[k];
627           ierr = MPI_Recv(v,m*N,MPIU_SCALAR,k,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
628 
629           for (j=0; j<N; j++) {
630             for (i=0; i<m; i++) {
631               work[j + i*N] = *v++;
632 	    }
633 	  }
634 	  ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
635         }
636         ierr = PetscFree(work);CHKERRQ(ierr);
637         ierr = PetscFree(vv);CHKERRQ(ierr);
638       } else {
639         ierr = MPI_Send(a->v,mat->rmap.n*mat->cmap.N,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
640       }
641     }
642   }
643   PetscFunctionReturn(0);
644 }
645 
646 #undef __FUNCT__
647 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket"
648 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
649 {
650   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
651   PetscErrorCode    ierr;
652   PetscMPIInt       size = mdn->size,rank = mdn->rank;
653   PetscViewerType   vtype;
654   PetscTruth        iascii,isdraw;
655   PetscViewer       sviewer;
656   PetscViewerFormat format;
657 #if defined(PETSC_HAVE_PLAPACK)
658   Mat_Plapack       *lu=(Mat_Plapack*)(mat->spptr);
659 #endif
660 
661   PetscFunctionBegin;
662   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
663   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
664   if (iascii) {
665     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
666     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
667     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
668       MatInfo info;
669       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
670       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap.n,
671                    (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
672       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
673       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
674 #if defined(PETSC_HAVE_PLAPACK)
675       ierr = PetscViewerASCIIPrintf(viewer,"PLAPACK run parameters:\n");CHKERRQ(ierr);
676       ierr = PetscViewerASCIIPrintf(viewer,"  Processor mesh: nprows %d, npcols %d\n",Plapack_nprows, Plapack_npcols);CHKERRQ(ierr);
677       ierr = PetscViewerASCIIPrintf(viewer,"  Distr. block size nb: %d \n",lu->nb);CHKERRQ(ierr);
678       ierr = PetscViewerASCIIPrintf(viewer,"  Error checking: %d\n",Plapack_ierror);CHKERRQ(ierr);
679       ierr = PetscViewerASCIIPrintf(viewer,"  Algorithmic block size: %d\n",Plapack_nb_alg);CHKERRQ(ierr);
680 #endif
681       PetscFunctionReturn(0);
682     } else if (format == PETSC_VIEWER_ASCII_INFO) {
683       PetscFunctionReturn(0);
684     }
685   } else if (isdraw) {
686     PetscDraw  draw;
687     PetscTruth isnull;
688 
689     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
690     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
691     if (isnull) PetscFunctionReturn(0);
692   }
693 
694   if (size == 1) {
695     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
696   } else {
697     /* assemble the entire matrix onto first processor. */
698     Mat         A;
699     PetscInt    M = mat->rmap.N,N = mat->cmap.N,m,row,i,nz;
700     PetscInt    *cols;
701     PetscScalar *vals;
702 
703     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
704     if (!rank) {
705       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
706     } else {
707       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
708     }
709     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
710     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
711     ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL);
712     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
713 
714     /* Copy the matrix ... This isn't the most efficient means,
715        but it's quick for now */
716     A->insertmode = INSERT_VALUES;
717     row = mat->rmap.rstart; m = mdn->A->rmap.n;
718     for (i=0; i<m; i++) {
719       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
720       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
721       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
722       row++;
723     }
724 
725     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
726     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
727     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
728     if (!rank) {
729       ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
730     }
731     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
732     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
733     ierr = MatDestroy(A);CHKERRQ(ierr);
734   }
735   PetscFunctionReturn(0);
736 }
737 
738 #undef __FUNCT__
739 #define __FUNCT__ "MatView_MPIDense"
740 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
741 {
742   PetscErrorCode ierr;
743   PetscTruth     iascii,isbinary,isdraw,issocket;
744 
745   PetscFunctionBegin;
746 
747   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
748   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
749   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
750   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
751 
752   if (iascii || issocket || isdraw) {
753     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
754   } else if (isbinary) {
755     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
756   } else {
757     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name);
758   }
759   PetscFunctionReturn(0);
760 }
761 
762 #undef __FUNCT__
763 #define __FUNCT__ "MatGetInfo_MPIDense"
764 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
765 {
766   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
767   Mat            mdn = mat->A;
768   PetscErrorCode ierr;
769   PetscReal      isend[5],irecv[5];
770 
771   PetscFunctionBegin;
772   info->rows_global    = (double)A->rmap.N;
773   info->columns_global = (double)A->cmap.N;
774   info->rows_local     = (double)A->rmap.n;
775   info->columns_local  = (double)A->cmap.N;
776   info->block_size     = 1.0;
777   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
778   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
779   isend[3] = info->memory;  isend[4] = info->mallocs;
780   if (flag == MAT_LOCAL) {
781     info->nz_used      = isend[0];
782     info->nz_allocated = isend[1];
783     info->nz_unneeded  = isend[2];
784     info->memory       = isend[3];
785     info->mallocs      = isend[4];
786   } else if (flag == MAT_GLOBAL_MAX) {
787     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr);
788     info->nz_used      = irecv[0];
789     info->nz_allocated = irecv[1];
790     info->nz_unneeded  = irecv[2];
791     info->memory       = irecv[3];
792     info->mallocs      = irecv[4];
793   } else if (flag == MAT_GLOBAL_SUM) {
794     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
795     info->nz_used      = irecv[0];
796     info->nz_allocated = irecv[1];
797     info->nz_unneeded  = irecv[2];
798     info->memory       = irecv[3];
799     info->mallocs      = irecv[4];
800   }
801   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
802   info->fill_ratio_needed = 0;
803   info->factor_mallocs    = 0;
804   PetscFunctionReturn(0);
805 }
806 
807 #undef __FUNCT__
808 #define __FUNCT__ "MatSetOption_MPIDense"
809 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscTruth flg)
810 {
811   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
812   PetscErrorCode ierr;
813 
814   PetscFunctionBegin;
815   switch (op) {
816   case MAT_NEW_NONZERO_LOCATIONS:
817   case MAT_NEW_NONZERO_LOCATION_ERR:
818   case MAT_NEW_NONZERO_ALLOCATION_ERR:
819     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
820     break;
821   case MAT_ROW_ORIENTED:
822     a->roworiented = flg;
823     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
824     break;
825   case MAT_NEW_DIAGONALS:
826   case MAT_USE_HASH_TABLE:
827     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
828     break;
829   case MAT_IGNORE_OFF_PROC_ENTRIES:
830     a->donotstash = flg;
831     break;
832   case MAT_SYMMETRIC:
833   case MAT_STRUCTURALLY_SYMMETRIC:
834   case MAT_HERMITIAN:
835   case MAT_SYMMETRY_ETERNAL:
836   case MAT_IGNORE_LOWER_TRIANGULAR:
837     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
838     break;
839   default:
840     SETERRQ1(PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
841   }
842   PetscFunctionReturn(0);
843 }
844 
845 
846 #undef __FUNCT__
847 #define __FUNCT__ "MatDiagonalScale_MPIDense"
848 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
849 {
850   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
851   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
852   PetscScalar    *l,*r,x,*v;
853   PetscErrorCode ierr;
854   PetscInt       i,j,s2a,s3a,s2,s3,m=mdn->A->rmap.n,n=mdn->A->cmap.n;
855 
856   PetscFunctionBegin;
857   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
858   if (ll) {
859     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
860     if (s2a != s2) SETERRQ2(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
861     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
862     for (i=0; i<m; i++) {
863       x = l[i];
864       v = mat->v + i;
865       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
866     }
867     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
868     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
869   }
870   if (rr) {
871     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
872     if (s3a != s3) SETERRQ2(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
873     ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
874     ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
875     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
876     for (i=0; i<n; i++) {
877       x = r[i];
878       v = mat->v + i*m;
879       for (j=0; j<m; j++) { (*v++) *= x;}
880     }
881     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
882     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
883   }
884   PetscFunctionReturn(0);
885 }
886 
887 #undef __FUNCT__
888 #define __FUNCT__ "MatNorm_MPIDense"
889 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
890 {
891   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
892   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
893   PetscErrorCode ierr;
894   PetscInt       i,j;
895   PetscReal      sum = 0.0;
896   PetscScalar    *v = mat->v;
897 
898   PetscFunctionBegin;
899   if (mdn->size == 1) {
900     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
901   } else {
902     if (type == NORM_FROBENIUS) {
903       for (i=0; i<mdn->A->cmap.n*mdn->A->rmap.n; i++) {
904 #if defined(PETSC_USE_COMPLEX)
905         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
906 #else
907         sum += (*v)*(*v); v++;
908 #endif
909       }
910       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
911       *nrm = sqrt(*nrm);
912       ierr = PetscLogFlops(2*mdn->A->cmap.n*mdn->A->rmap.n);CHKERRQ(ierr);
913     } else if (type == NORM_1) {
914       PetscReal *tmp,*tmp2;
915       ierr = PetscMalloc(2*A->cmap.N*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
916       tmp2 = tmp + A->cmap.N;
917       ierr = PetscMemzero(tmp,2*A->cmap.N*sizeof(PetscReal));CHKERRQ(ierr);
918       *nrm = 0.0;
919       v = mat->v;
920       for (j=0; j<mdn->A->cmap.n; j++) {
921         for (i=0; i<mdn->A->rmap.n; i++) {
922           tmp[j] += PetscAbsScalar(*v);  v++;
923         }
924       }
925       ierr = MPI_Allreduce(tmp,tmp2,A->cmap.N,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
926       for (j=0; j<A->cmap.N; j++) {
927         if (tmp2[j] > *nrm) *nrm = tmp2[j];
928       }
929       ierr = PetscFree(tmp);CHKERRQ(ierr);
930       ierr = PetscLogFlops(A->cmap.n*A->rmap.n);CHKERRQ(ierr);
931     } else if (type == NORM_INFINITY) { /* max row norm */
932       PetscReal ntemp;
933       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
934       ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr);
935     } else {
936       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
937     }
938   }
939   PetscFunctionReturn(0);
940 }
941 
942 #undef __FUNCT__
943 #define __FUNCT__ "MatTranspose_MPIDense"
944 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
945 {
946   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
947   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
948   Mat            B;
949   PetscInt       M = A->rmap.N,N = A->cmap.N,m,n,*rwork,rstart = A->rmap.rstart;
950   PetscErrorCode ierr;
951   PetscInt       j,i;
952   PetscScalar    *v;
953 
954   PetscFunctionBegin;
955   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place");
956   if (reuse == MAT_INITIAL_MATRIX || A == *matout) {
957     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
958     ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,N,M);CHKERRQ(ierr);
959     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
960     ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr);
961   } else {
962     B = *matout;
963   }
964 
965   m = a->A->rmap.n; n = a->A->cmap.n; v = Aloc->v;
966   ierr = PetscMalloc(m*sizeof(PetscInt),&rwork);CHKERRQ(ierr);
967   for (i=0; i<m; i++) rwork[i] = rstart + i;
968   for (j=0; j<n; j++) {
969     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
970     v   += m;
971   }
972   ierr = PetscFree(rwork);CHKERRQ(ierr);
973   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
974   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
975   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
976     *matout = B;
977   } else {
978     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
979   }
980   PetscFunctionReturn(0);
981 }
982 
983 #include "petscblaslapack.h"
984 #undef __FUNCT__
985 #define __FUNCT__ "MatScale_MPIDense"
986 PetscErrorCode MatScale_MPIDense(Mat inA,PetscScalar alpha)
987 {
988   Mat_MPIDense   *A = (Mat_MPIDense*)inA->data;
989   Mat_SeqDense   *a = (Mat_SeqDense*)A->A->data;
990   PetscScalar    oalpha = alpha;
991   PetscErrorCode ierr;
992   PetscBLASInt   one = 1,nz = PetscBLASIntCast(inA->rmap.n*inA->cmap.N);
993 
994   PetscFunctionBegin;
995   BLASscal_(&nz,&oalpha,a->v,&one);
996   ierr = PetscLogFlops(nz);CHKERRQ(ierr);
997   PetscFunctionReturn(0);
998 }
999 
1000 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
1001 
1002 #undef __FUNCT__
1003 #define __FUNCT__ "MatSetUpPreallocation_MPIDense"
1004 PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A)
1005 {
1006   PetscErrorCode ierr;
1007 
1008   PetscFunctionBegin;
1009   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
1010   PetscFunctionReturn(0);
1011 }
1012 
1013 #if defined(PETSC_HAVE_PLAPACK)
1014 
1015 #undef __FUNCT__
1016 #define __FUNCT__ "MatMPIDenseCreatePlapack"
1017 PetscErrorCode MatMPIDenseCreatePlapack(Mat A)
1018 {
1019   Mat_Plapack    *lu;
1020   PetscErrorCode ierr;
1021   PetscInt       M=A->rmap.N;
1022   MPI_Comm       comm=((PetscObject)A)->comm;
1023   PetscMPIInt    size;
1024 
1025   PetscFunctionBegin;
1026   if (A->spptr) PetscFunctionReturn(0);
1027   ierr = PetscNewLog(A,Mat_Plapack,&lu);CHKERRQ(ierr);
1028   A->spptr = (void*)lu;
1029 
1030   lu = (Mat_Plapack*)(A->spptr);
1031 
1032   /* Set default Plapack parameters */
1033   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1034   lu->nb     = M/size;
1035   if (M - lu->nb*size) lu->nb++; /* without cyclic distribution */
1036 
1037   /* Set runtime options */
1038   ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"PLAPACK Options","Mat");CHKERRQ(ierr);
1039     ierr = PetscOptionsInt("-mat_plapack_nb","block size of template vector","None",lu->nb,&lu->nb,PETSC_NULL);CHKERRQ(ierr);
1040   PetscOptionsEnd();
1041 
1042   /* Create object distribution template */
1043   lu->templ = NULL;
1044   ierr = PLA_Temp_create(lu->nb, 0, &lu->templ);CHKERRQ(ierr);
1045 
1046   /* Set the datatype */
1047 #if defined(PETSC_USE_COMPLEX)
1048   lu->datatype = MPI_DOUBLE_COMPLEX;
1049 #else
1050   lu->datatype = MPI_DOUBLE;
1051 #endif
1052 
1053   ierr = PLA_Matrix_create(lu->datatype,M,A->cmap.N,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);CHKERRQ(ierr);
1054 
1055 
1056   lu->pla_solved     = PETSC_FALSE; /* MatSolve_Plapack() is called yet */
1057   PetscFunctionReturn(0);
1058 }
1059 
1060 #undef __FUNCT__
1061 #define __FUNCT__ "MatMPIDenseCopyToPlapack"
1062 PetscErrorCode MatMPIDenseCopyToPlapack(Mat A,Mat F)
1063 {
1064   Mat_Plapack    *lu = (Mat_Plapack*)(F)->spptr;
1065   PetscErrorCode ierr;
1066   PetscInt       M=A->cmap.N,m=A->rmap.n,rstart;
1067   PetscScalar    *array;
1068   PetscReal      one = 1.0;
1069 
1070   PetscFunctionBegin;
1071   /* Copy A into F->lu->A */
1072   ierr = PLA_Obj_set_to_zero(lu->A);CHKERRQ(ierr);
1073   ierr = PLA_API_begin();CHKERRQ(ierr);
1074   ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr);
1075   ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr);
1076   ierr = MatGetArray(A,&array);CHKERRQ(ierr);
1077   ierr = PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);CHKERRQ(ierr);
1078   ierr = MatRestoreArray(A,&array);CHKERRQ(ierr);
1079   ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr);
1080   ierr = PLA_API_end();CHKERRQ(ierr);
1081   lu->rstart = rstart;
1082   PetscFunctionReturn(0);
1083 }
1084 
1085 #undef __FUNCT__
1086 #define __FUNCT__ "MatMPIDenseCopyFromPlapack"
1087 PetscErrorCode MatMPIDenseCopyFromPlapack(Mat F,Mat A)
1088 {
1089   Mat_Plapack    *lu = (Mat_Plapack*)(F)->spptr;
1090   PetscErrorCode ierr;
1091   PetscInt       M=A->cmap.N,m=A->rmap.n,rstart;
1092   PetscScalar    *array;
1093   PetscReal      one = 1.0;
1094 
1095   PetscFunctionBegin;
1096   /* Copy F into A->lu->A */
1097   ierr = MatZeroEntries(A);CHKERRQ(ierr);
1098   ierr = PLA_API_begin();CHKERRQ(ierr);
1099   ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr);
1100   ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr);
1101   ierr = MatGetArray(A,&array);CHKERRQ(ierr);
1102   ierr = PLA_API_axpy_global_to_matrix(m,M, &one,lu->A,rstart,0,(void *)array,m);CHKERRQ(ierr);
1103   ierr = MatRestoreArray(A,&array);CHKERRQ(ierr);
1104   ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr);
1105   ierr = PLA_API_end();CHKERRQ(ierr);
1106   lu->rstart = rstart;
1107   PetscFunctionReturn(0);
1108 }
1109 
1110 #undef __FUNCT__
1111 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIDense"
1112 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1113 {
1114   PetscErrorCode ierr;
1115   Mat_Plapack    *luA = (Mat_Plapack*)A->spptr;
1116   Mat_Plapack    *luB = (Mat_Plapack*)B->spptr;
1117   Mat_Plapack    *luC = (Mat_Plapack*)C->spptr;
1118   PLA_Obj        alpha = NULL,beta = NULL;
1119 
1120   PetscFunctionBegin;
1121   ierr = MatMPIDenseCopyToPlapack(A,A);CHKERRQ(ierr);
1122   ierr = MatMPIDenseCopyToPlapack(B,B);CHKERRQ(ierr);
1123 
1124   /*
1125   ierr = PLA_Global_show("A = ",luA->A,"%g ","");CHKERRQ(ierr);
1126   ierr = PLA_Global_show("B = ",luB->A,"%g ","");CHKERRQ(ierr);
1127   */
1128 
1129   /* do the multiply in PLA  */
1130   ierr = PLA_Create_constants_conf_to(luA->A,NULL,NULL,&alpha);CHKERRQ(ierr);
1131   ierr = PLA_Create_constants_conf_to(luC->A,NULL,&beta,NULL);CHKERRQ(ierr);
1132   CHKMEMQ;
1133 
1134   ierr = PLA_Gemm(PLA_NO_TRANSPOSE,PLA_NO_TRANSPOSE,alpha,luA->A,luB->A,beta,luC->A);//CHKERRQ(ierr);
1135   CHKMEMQ;
1136   ierr = PLA_Obj_free(&alpha);CHKERRQ(ierr);
1137   ierr = PLA_Obj_free(&beta);CHKERRQ(ierr);
1138 
1139   /*
1140   ierr = PLA_Global_show("C = ",luC->A,"%g ","");CHKERRQ(ierr);
1141   */
1142   ierr = MatMPIDenseCopyFromPlapack(C,C);CHKERRQ(ierr);
1143   PetscFunctionReturn(0);
1144 }
1145 
1146 #undef __FUNCT__
1147 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIDense"
1148 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1149 {
1150   PetscErrorCode ierr;
1151   PetscInt       m=A->rmap.n,n=B->cmap.n;
1152   Mat            Cmat;
1153 
1154   PetscFunctionBegin;
1155   if (A->cmap.n != B->rmap.n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap.n %d != B->rmap.n %d\n",A->cmap.n,B->rmap.n);
1156   SETERRQ(PETSC_ERR_LIB,"Due to aparent bugs in PLAPACK,this is not currently supported");
1157   ierr = MatCreate(((PetscObject)B)->comm,&Cmat);CHKERRQ(ierr);
1158   ierr = MatSetSizes(Cmat,m,n,A->rmap.N,B->cmap.N);CHKERRQ(ierr);
1159   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
1160   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1161   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1162   ierr = MatMPIDenseCreatePlapack(A);CHKERRQ(ierr);
1163   ierr = MatMPIDenseCreatePlapack(B);CHKERRQ(ierr);
1164   ierr = MatMPIDenseCreatePlapack(Cmat);CHKERRQ(ierr);
1165   *C = Cmat;
1166   PetscFunctionReturn(0);
1167 }
1168 
1169 #undef __FUNCT__
1170 #define __FUNCT__ "MatMatMult_MPIDense_MPIDense"
1171 PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
1172 {
1173   PetscErrorCode ierr;
1174 
1175   PetscFunctionBegin;
1176   if (scall == MAT_INITIAL_MATRIX){
1177     ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr);
1178   }
1179   ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr);
1180   PetscFunctionReturn(0);
1181 }
1182 
1183 
1184 #undef __FUNCT__
1185 #define __FUNCT__ "MatSolve_MPIDense"
1186 PetscErrorCode MatSolve_MPIDense(Mat A,Vec b,Vec x)
1187 {
1188   MPI_Comm       comm = ((PetscObject)A)->comm,dummy_comm;
1189   Mat_Plapack    *lu = (Mat_Plapack*)A->spptr;
1190   PetscErrorCode ierr;
1191   PetscInt       M=A->rmap.N,m=A->rmap.n,rstart,i,j,*idx_pla,*idx_petsc,loc_m,loc_stride;
1192   PetscScalar    *array;
1193   PetscReal      one = 1.0;
1194   PetscMPIInt    size,r_rank,r_nproc,c_rank,c_nproc;;
1195   PLA_Obj        v_pla = NULL;
1196   PetscScalar    *loc_buf;
1197   Vec            loc_x;
1198 
1199   PetscFunctionBegin;
1200   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1201 
1202   /* Create PLAPACK vector objects, then copy b into PLAPACK b */
1203   ierr = PLA_Mvector_create(lu->datatype,M,1,lu->templ,PLA_ALIGN_FIRST,&v_pla);CHKERRQ(ierr);
1204   ierr = PLA_Obj_set_to_zero(v_pla);CHKERRQ(ierr);
1205 
1206   /* Copy b into rhs_pla */
1207   ierr = PLA_API_begin();CHKERRQ(ierr);
1208   ierr = PLA_Obj_API_open(v_pla);CHKERRQ(ierr);
1209   ierr = VecGetArray(b,&array);CHKERRQ(ierr);
1210   ierr = PLA_API_axpy_vector_to_global(m,&one,(void *)array,1,v_pla,lu->rstart);CHKERRQ(ierr);
1211   ierr = VecRestoreArray(b,&array);CHKERRQ(ierr);
1212   ierr = PLA_Obj_API_close(v_pla);CHKERRQ(ierr);
1213   ierr = PLA_API_end();CHKERRQ(ierr);
1214 
1215   if (A->factor == FACTOR_LU){
1216     /* Apply the permutations to the right hand sides */
1217     ierr = PLA_Apply_pivots_to_rows (v_pla,lu->pivots);CHKERRQ(ierr);
1218 
1219     /* Solve L y = b, overwriting b with y */
1220     ierr = PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_UNIT_DIAG,lu->A,v_pla );CHKERRQ(ierr);
1221 
1222     /* Solve U x = y (=b), overwriting b with x */
1223     ierr = PLA_Trsv( PLA_UPPER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla );CHKERRQ(ierr);
1224   } else { /* FACTOR_CHOLESKY */
1225     ierr = PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla);CHKERRQ(ierr);
1226     ierr = PLA_Trsv( PLA_LOWER_TRIANGULAR,(lu->datatype == MPI_DOUBLE ? PLA_TRANSPOSE : PLA_CONJUGATE_TRANSPOSE),PLA_NONUNIT_DIAG,lu->A,v_pla);CHKERRQ(ierr);
1227   }
1228 
1229   /* Copy PLAPACK x into Petsc vector x  */
1230   ierr = PLA_Obj_local_length(v_pla, &loc_m);CHKERRQ(ierr);
1231   ierr = PLA_Obj_local_buffer(v_pla, (void**)&loc_buf);CHKERRQ(ierr);
1232   ierr = PLA_Obj_local_stride(v_pla, &loc_stride);CHKERRQ(ierr);
1233   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,loc_m*loc_stride,loc_buf,&loc_x);CHKERRQ(ierr);
1234   if (!lu->pla_solved){
1235 
1236     ierr = PLA_Temp_comm_row_info(lu->templ,&dummy_comm,&r_rank,&r_nproc);CHKERRQ(ierr);
1237     ierr = PLA_Temp_comm_col_info(lu->templ,&dummy_comm,&c_rank,&c_nproc);CHKERRQ(ierr);
1238     /* printf(" [%d] rank: %d %d, nproc: %d %d\n",rank,r_rank,c_rank,r_nproc,c_nproc); */
1239 
1240     /* Create IS and cts for VecScatterring */
1241     ierr = PLA_Obj_local_length(v_pla, &loc_m);CHKERRQ(ierr);
1242     ierr = PLA_Obj_local_stride(v_pla, &loc_stride);CHKERRQ(ierr);
1243     ierr = PetscMalloc((2*loc_m+1)*sizeof(PetscInt),&idx_pla);CHKERRQ(ierr);
1244     idx_petsc = idx_pla + loc_m;
1245 
1246     rstart = (r_rank*c_nproc+c_rank)*lu->nb;
1247     for (i=0; i<loc_m; i+=lu->nb){
1248       j = 0;
1249       while (j < lu->nb && i+j < loc_m){
1250         idx_petsc[i+j] = rstart + j; j++;
1251       }
1252       rstart += size*lu->nb;
1253     }
1254 
1255     for (i=0; i<loc_m; i++) idx_pla[i] = i*loc_stride;
1256 
1257     ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_pla,&lu->is_pla);CHKERRQ(ierr);
1258     ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_petsc,&lu->is_petsc);CHKERRQ(ierr);
1259     ierr = PetscFree(idx_pla);CHKERRQ(ierr);
1260     ierr = VecScatterCreate(loc_x,lu->is_pla,x,lu->is_petsc,&lu->ctx);CHKERRQ(ierr);
1261   }
1262   ierr = VecScatterBegin(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1263   ierr = VecScatterEnd(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1264 
1265   /* Free data */
1266   ierr = VecDestroy(loc_x);CHKERRQ(ierr);
1267   ierr = PLA_Obj_free(&v_pla);CHKERRQ(ierr);
1268 
1269   lu->pla_solved = PETSC_TRUE;
1270   PetscFunctionReturn(0);
1271 }
1272 
1273 #undef __FUNCT__
1274 #define __FUNCT__ "MatLUFactorNumeric_MPIDense"
1275 PetscErrorCode MatLUFactorNumeric_MPIDense(Mat A,MatFactorInfo *info,Mat *F)
1276 {
1277   Mat_Plapack    *lu = (Mat_Plapack*)(*F)->spptr;
1278   PetscErrorCode ierr;
1279   PetscInt       info_pla;
1280 
1281   PetscFunctionBegin;
1282   ierr = PLA_Obj_set_to_zero(lu->A);CHKERRQ(ierr);
1283 
1284   /* Copy A into F->lu->A */
1285   ierr = MatMPIDenseCopyToPlapack(A,*F);CHKERRQ(ierr);
1286 
1287   /* Factor P A -> L U overwriting lower triangular portion of A with L, upper, U */
1288   info_pla = PLA_LU(lu->A,lu->pivots);
1289   if (info_pla) SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot encountered at row %d from PLA_LU()",info_pla);
1290 
1291   (*F)->assembled    = PETSC_TRUE;
1292   PetscFunctionReturn(0);
1293 }
1294 
1295 #undef __FUNCT__
1296 #define __FUNCT__ "MatCholeskyFactorNumeric_MPIDense"
1297 PetscErrorCode MatCholeskyFactorNumeric_MPIDense(Mat A,MatFactorInfo *info,Mat *F)
1298 {
1299   Mat_Plapack    *lu = (Mat_Plapack*)(*F)->spptr;
1300   PetscErrorCode ierr;
1301   PetscInt       info_pla;
1302 
1303   PetscFunctionBegin;
1304 
1305   /* Copy A into F->lu->A */
1306   ierr = MatMPIDenseCopyToPlapack(A,*F);CHKERRQ(ierr);
1307 
1308   /* Factor P A -> Chol */
1309   info_pla = PLA_Chol(PLA_LOWER_TRIANGULAR,lu->A);
1310   if (info_pla) SETERRQ1( PETSC_ERR_MAT_CH_ZRPVT,"Nonpositive definite matrix detected at row %d from PLA_Chol()",info_pla);
1311 
1312   (*F)->assembled = PETSC_TRUE;
1313   PetscFunctionReturn(0);
1314 }
1315 
1316 
1317 #undef __FUNCT__
1318 #define __FUNCT__ "MatFactorSymbolic_MPIDense_Private"
1319 PetscErrorCode MatFactorSymbolic_MPIDense_Private(Mat A,MatFactorInfo *info,Mat *F)
1320 {
1321   PetscErrorCode ierr;
1322 
1323   PetscFunctionBegin;
1324   /* Create the factorization matrix */
1325   ierr = MatCreate(((PetscObject)A)->comm,F);CHKERRQ(ierr);
1326   ierr = MatSetSizes(*F,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N);CHKERRQ(ierr);
1327   ierr = MatSetType(*F,((PetscObject)A)->type_name);CHKERRQ(ierr);
1328   ierr = MatMPIDenseCreatePlapack(*F);CHKERRQ(ierr);
1329   PetscFunctionReturn(0);
1330 }
1331 
1332 /* Note the Petsc r and c permutations are ignored */
1333 #undef __FUNCT__
1334 #define __FUNCT__ "MatLUFactorSymbolic_MPIDense"
1335 PetscErrorCode MatLUFactorSymbolic_MPIDense(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F)
1336 {
1337   PetscErrorCode ierr;
1338   PetscInt       M = A->rmap.N;
1339   Mat_Plapack    *lu;
1340 
1341   PetscFunctionBegin;
1342   ierr = MatFactorSymbolic_MPIDense_Private(A,info,F);CHKERRQ(ierr);
1343   lu = (Mat_Plapack*)(*F)->spptr;
1344   ierr = PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots);CHKERRQ(ierr);
1345   (*F)->factor = FACTOR_LU;
1346   PetscFunctionReturn(0);
1347 }
1348 
1349 /* Note the Petsc perm permutation is ignored */
1350 #undef __FUNCT__
1351 #define __FUNCT__ "MatCholeskyFactorSymbolic_MPIDense"
1352 PetscErrorCode MatCholeskyFactorSymbolic_MPIDense(Mat A,IS perm,MatFactorInfo *info,Mat *F)
1353 {
1354   PetscErrorCode ierr;
1355   PetscTruth     issymmetric,set;
1356 
1357   PetscFunctionBegin;
1358   ierr = MatIsSymmetricKnown(A,&set,&issymmetric); CHKERRQ(ierr);
1359   if (!set || !issymmetric) SETERRQ(PETSC_ERR_USER,"Matrix must be set as MAT_SYMMETRIC for CholeskyFactor()");
1360   ierr = MatFactorSymbolic_MPIDense_Private(A,info,F);CHKERRQ(ierr);
1361   (*F)->factor = FACTOR_CHOLESKY;
1362   PetscFunctionReturn(0);
1363 }
1364 #endif
1365 
1366 /* -------------------------------------------------------------------*/
1367 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
1368        MatGetRow_MPIDense,
1369        MatRestoreRow_MPIDense,
1370        MatMult_MPIDense,
1371 /* 4*/ MatMultAdd_MPIDense,
1372        MatMultTranspose_MPIDense,
1373        MatMultTransposeAdd_MPIDense,
1374 #if defined(PETSC_HAVE_PLAPACK)
1375        MatSolve_MPIDense,
1376 #else
1377        0,
1378 #endif
1379        0,
1380        0,
1381 /*10*/ 0,
1382        0,
1383        0,
1384        0,
1385        MatTranspose_MPIDense,
1386 /*15*/ MatGetInfo_MPIDense,
1387        MatEqual_MPIDense,
1388        MatGetDiagonal_MPIDense,
1389        MatDiagonalScale_MPIDense,
1390        MatNorm_MPIDense,
1391 /*20*/ MatAssemblyBegin_MPIDense,
1392        MatAssemblyEnd_MPIDense,
1393        0,
1394        MatSetOption_MPIDense,
1395        MatZeroEntries_MPIDense,
1396 /*25*/ MatZeroRows_MPIDense,
1397 #if defined(PETSC_HAVE_PLAPACK)
1398        MatLUFactorSymbolic_MPIDense,
1399        MatLUFactorNumeric_MPIDense,
1400        MatCholeskyFactorSymbolic_MPIDense,
1401        MatCholeskyFactorNumeric_MPIDense,
1402 #else
1403        0,
1404        0,
1405        0,
1406        0,
1407 #endif
1408 /*30*/ MatSetUpPreallocation_MPIDense,
1409        0,
1410        0,
1411        MatGetArray_MPIDense,
1412        MatRestoreArray_MPIDense,
1413 /*35*/ MatDuplicate_MPIDense,
1414        0,
1415        0,
1416        0,
1417        0,
1418 /*40*/ 0,
1419        MatGetSubMatrices_MPIDense,
1420        0,
1421        MatGetValues_MPIDense,
1422        0,
1423 /*45*/ 0,
1424        MatScale_MPIDense,
1425        0,
1426        0,
1427        0,
1428 /*50*/ 0,
1429        0,
1430        0,
1431        0,
1432        0,
1433 /*55*/ 0,
1434        0,
1435        0,
1436        0,
1437        0,
1438 /*60*/ MatGetSubMatrix_MPIDense,
1439        MatDestroy_MPIDense,
1440        MatView_MPIDense,
1441        0,
1442        0,
1443 /*65*/ 0,
1444        0,
1445        0,
1446        0,
1447        0,
1448 /*70*/ 0,
1449        0,
1450        0,
1451        0,
1452        0,
1453 /*75*/ 0,
1454        0,
1455        0,
1456        0,
1457        0,
1458 /*80*/ 0,
1459        0,
1460        0,
1461        0,
1462 /*84*/ MatLoad_MPIDense,
1463        0,
1464        0,
1465        0,
1466        0,
1467        0,
1468 /*90*/
1469 #if defined(PETSC_HAVE_PLAPACK)
1470        MatMatMult_MPIDense_MPIDense,
1471        MatMatMultSymbolic_MPIDense_MPIDense,
1472        MatMatMultNumeric_MPIDense_MPIDense,
1473 #else
1474        0,
1475        0,
1476        0,
1477 #endif
1478        0,
1479 /*95*/ 0,
1480        0,
1481        0,
1482        0};
1483 
1484 EXTERN_C_BEGIN
1485 #undef __FUNCT__
1486 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense"
1487 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1488 {
1489   Mat_MPIDense   *a;
1490   PetscErrorCode ierr;
1491 
1492   PetscFunctionBegin;
1493   mat->preallocated = PETSC_TRUE;
1494   /* Note:  For now, when data is specified above, this assumes the user correctly
1495    allocates the local dense storage space.  We should add error checking. */
1496 
1497   a    = (Mat_MPIDense*)mat->data;
1498   ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1499   ierr = MatSetSizes(a->A,mat->rmap.n,mat->cmap.N,mat->rmap.n,mat->cmap.N);CHKERRQ(ierr);
1500   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
1501   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1502   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1503   PetscFunctionReturn(0);
1504 }
1505 EXTERN_C_END
1506 
1507 EXTERN_C_BEGIN
1508 #if defined(PETSC_HAVE_PLAPACK)
1509 #undef __FUNCT__
1510 #define __FUNCT__ "MatSetSolverType_MPIDense_PLAPACK"
1511 PetscErrorCode PETSCMAT_DLLEXPORT MatSetSolverType_MPIDense_PLAPACK(Mat mat,const char *type)
1512 {
1513   PetscFunctionBegin;
1514   /* dummy function so that -mat_solver_type plapack or MatSetSolverType(mat,"plapack"); silently work (since PLAPACK is on by default) */
1515   PetscFunctionReturn(0);
1516 }
1517 #endif
1518 EXTERN_C_END
1519 
1520 /*MC
1521    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1522 
1523    Options Database Keys:
1524 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions()
1525 
1526   Level: beginner
1527 
1528   MATMPIDENSE matrices may use direct solvers (LU, Cholesky, and QR)
1529   for parallel dense matrices via the external package PLAPACK, if PLAPACK is installed
1530   (run config/configure.py with the option --download-plapack)
1531 
1532 
1533   Options Database Keys:
1534 . -mat_plapack_nprows <n> - number of rows in processor partition
1535 . -mat_plapack_npcols <n> - number of columns in processor partition
1536 . -mat_plapack_nb <n> - block size of template vector
1537 . -mat_plapack_nb_alg <n> - algorithmic block size
1538 - -mat_plapack_ckerror <n> - error checking flag
1539 
1540 .seealso: MatCreateMPIDense(), MATDENSE, MATSEQDENSE
1541 M*/
1542 
1543 EXTERN_C_BEGIN
1544 #undef __FUNCT__
1545 #define __FUNCT__ "MatCreate_MPIDense"
1546 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIDense(Mat mat)
1547 {
1548   Mat_MPIDense   *a;
1549   PetscErrorCode ierr;
1550 
1551   PetscFunctionBegin;
1552   ierr              = PetscNewLog(mat,Mat_MPIDense,&a);CHKERRQ(ierr);
1553   mat->data         = (void*)a;
1554   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1555   mat->factor       = 0;
1556   mat->mapping      = 0;
1557 
1558   mat->insertmode = NOT_SET_VALUES;
1559   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&a->rank);CHKERRQ(ierr);
1560   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&a->size);CHKERRQ(ierr);
1561 
1562   mat->rmap.bs = mat->cmap.bs = 1;
1563   ierr = PetscMapSetUp(&mat->rmap);CHKERRQ(ierr);
1564   ierr = PetscMapSetUp(&mat->cmap);CHKERRQ(ierr);
1565   a->nvec = mat->cmap.n;
1566 
1567   /* build cache for off array entries formed */
1568   a->donotstash = PETSC_FALSE;
1569   ierr = MatStashCreate_Private(((PetscObject)mat)->comm,1,&mat->stash);CHKERRQ(ierr);
1570 
1571   /* stuff used for matrix vector multiply */
1572   a->lvec        = 0;
1573   a->Mvctx       = 0;
1574   a->roworiented = PETSC_TRUE;
1575 
1576   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
1577                                      "MatGetDiagonalBlock_MPIDense",
1578                                      MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr);
1579   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C",
1580                                      "MatMPIDenseSetPreallocation_MPIDense",
1581                                      MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1582   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",
1583                                      "MatMatMult_MPIAIJ_MPIDense",
1584                                       MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1585   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",
1586                                      "MatMatMultSymbolic_MPIAIJ_MPIDense",
1587                                       MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1588   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",
1589                                      "MatMatMultNumeric_MPIAIJ_MPIDense",
1590                                       MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1591 #if defined(PETSC_HAVE_PLAPACK)
1592   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatSetSolverType_mpidense_plapack_C",
1593                                      "MatSetSolverType_MPIDense_PLAPACK",
1594                                       MatSetSolverType_MPIDense_PLAPACK);CHKERRQ(ierr);
1595 #endif
1596   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1597 
1598   PetscFunctionReturn(0);
1599 }
1600 EXTERN_C_END
1601 
1602 /*MC
1603    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1604 
1605    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1606    and MATMPIDENSE otherwise.
1607 
1608    Options Database Keys:
1609 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1610 
1611   Level: beginner
1612 
1613 
1614 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1615 M*/
1616 
1617 EXTERN_C_BEGIN
1618 #undef __FUNCT__
1619 #define __FUNCT__ "MatCreate_Dense"
1620 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_Dense(Mat A)
1621 {
1622   PetscErrorCode ierr;
1623   PetscMPIInt    size;
1624 
1625   PetscFunctionBegin;
1626   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
1627   if (size == 1) {
1628     ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr);
1629   } else {
1630     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
1631   }
1632   PetscFunctionReturn(0);
1633 }
1634 EXTERN_C_END
1635 
1636 #undef __FUNCT__
1637 #define __FUNCT__ "MatMPIDenseSetPreallocation"
1638 /*@C
1639    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1640 
1641    Not collective
1642 
1643    Input Parameters:
1644 .  A - the matrix
1645 -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
1646    to control all matrix memory allocation.
1647 
1648    Notes:
1649    The dense format is fully compatible with standard Fortran 77
1650    storage by columns.
1651 
1652    The data input variable is intended primarily for Fortran programmers
1653    who wish to allocate their own matrix memory space.  Most users should
1654    set data=PETSC_NULL.
1655 
1656    Level: intermediate
1657 
1658 .keywords: matrix,dense, parallel
1659 
1660 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1661 @*/
1662 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data)
1663 {
1664   PetscErrorCode ierr,(*f)(Mat,PetscScalar *);
1665 
1666   PetscFunctionBegin;
1667   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1668   if (f) {
1669     ierr = (*f)(mat,data);CHKERRQ(ierr);
1670   }
1671   PetscFunctionReturn(0);
1672 }
1673 
1674 #undef __FUNCT__
1675 #define __FUNCT__ "MatCreateMPIDense"
1676 /*@C
1677    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
1678 
1679    Collective on MPI_Comm
1680 
1681    Input Parameters:
1682 +  comm - MPI communicator
1683 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1684 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1685 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1686 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1687 -  data - optional location of matrix data.  Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1688    to control all matrix memory allocation.
1689 
1690    Output Parameter:
1691 .  A - the matrix
1692 
1693    Notes:
1694    The dense format is fully compatible with standard Fortran 77
1695    storage by columns.
1696 
1697    The data input variable is intended primarily for Fortran programmers
1698    who wish to allocate their own matrix memory space.  Most users should
1699    set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users).
1700 
1701    The user MUST specify either the local or global matrix dimensions
1702    (possibly both).
1703 
1704    Level: intermediate
1705 
1706 .keywords: matrix,dense, parallel
1707 
1708 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1709 @*/
1710 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1711 {
1712   PetscErrorCode ierr;
1713   PetscMPIInt    size;
1714 
1715   PetscFunctionBegin;
1716   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1717   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1718   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1719   if (size > 1) {
1720     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1721     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1722   } else {
1723     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1724     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1725   }
1726   PetscFunctionReturn(0);
1727 }
1728 
1729 #undef __FUNCT__
1730 #define __FUNCT__ "MatDuplicate_MPIDense"
1731 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1732 {
1733   Mat            mat;
1734   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
1735   PetscErrorCode ierr;
1736 
1737   PetscFunctionBegin;
1738   *newmat       = 0;
1739   ierr = MatCreate(((PetscObject)A)->comm,&mat);CHKERRQ(ierr);
1740   ierr = MatSetSizes(mat,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N);CHKERRQ(ierr);
1741   ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1742   a                 = (Mat_MPIDense*)mat->data;
1743   ierr              = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1744   mat->factor       = A->factor;
1745   mat->assembled    = PETSC_TRUE;
1746   mat->preallocated = PETSC_TRUE;
1747 
1748   mat->rmap.rstart     = A->rmap.rstart;
1749   mat->rmap.rend       = A->rmap.rend;
1750   a->size              = oldmat->size;
1751   a->rank              = oldmat->rank;
1752   mat->insertmode      = NOT_SET_VALUES;
1753   a->nvec              = oldmat->nvec;
1754   a->donotstash        = oldmat->donotstash;
1755 
1756   ierr = PetscMemcpy(mat->rmap.range,A->rmap.range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
1757   ierr = PetscMemcpy(mat->cmap.range,A->cmap.range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
1758   ierr = MatStashCreate_Private(((PetscObject)A)->comm,1,&mat->stash);CHKERRQ(ierr);
1759 
1760   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1761   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1762   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1763 
1764 #if defined(PETSC_HAVE_PLAPACK)
1765   ierr = PetscMemcpy(mat->spptr,A->spptr,sizeof(Mat_Plapack));CHKERRQ(ierr);
1766 #endif
1767   *newmat = mat;
1768   PetscFunctionReturn(0);
1769 }
1770 
1771 #include "petscsys.h"
1772 
1773 #undef __FUNCT__
1774 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile"
1775 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N, MatType type,Mat *newmat)
1776 {
1777   PetscErrorCode ierr;
1778   PetscMPIInt    rank,size;
1779   PetscInt       *rowners,i,m,nz,j;
1780   PetscScalar    *array,*vals,*vals_ptr;
1781   MPI_Status     status;
1782 
1783   PetscFunctionBegin;
1784   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1785   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1786 
1787   /* determine ownership of all rows */
1788   m          = M/size + ((M % size) > rank);
1789   ierr       = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1790   ierr       = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
1791   rowners[0] = 0;
1792   for (i=2; i<=size; i++) {
1793     rowners[i] += rowners[i-1];
1794   }
1795 
1796   ierr = MatCreate(comm,newmat);CHKERRQ(ierr);
1797   ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1798   ierr = MatSetType(*newmat,type);CHKERRQ(ierr);
1799   ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr);
1800   ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr);
1801 
1802   if (!rank) {
1803     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1804 
1805     /* read in my part of the matrix numerical values  */
1806     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1807 
1808     /* insert into matrix-by row (this is why cannot directly read into array */
1809     vals_ptr = vals;
1810     for (i=0; i<m; i++) {
1811       for (j=0; j<N; j++) {
1812         array[i + j*m] = *vals_ptr++;
1813       }
1814     }
1815 
1816     /* read in other processors and ship out */
1817     for (i=1; i<size; i++) {
1818       nz   = (rowners[i+1] - rowners[i])*N;
1819       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1820       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(*newmat))->tag,comm);CHKERRQ(ierr);
1821     }
1822   } else {
1823     /* receive numeric values */
1824     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1825 
1826     /* receive message of values*/
1827     ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(*newmat))->tag,comm,&status);CHKERRQ(ierr);
1828 
1829     /* insert into matrix-by row (this is why cannot directly read into array */
1830     vals_ptr = vals;
1831     for (i=0; i<m; i++) {
1832       for (j=0; j<N; j++) {
1833         array[i + j*m] = *vals_ptr++;
1834       }
1835     }
1836   }
1837   ierr = PetscFree(rowners);CHKERRQ(ierr);
1838   ierr = PetscFree(vals);CHKERRQ(ierr);
1839   ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1840   ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1841   PetscFunctionReturn(0);
1842 }
1843 
1844 #undef __FUNCT__
1845 #define __FUNCT__ "MatLoad_MPIDense"
1846 PetscErrorCode MatLoad_MPIDense(PetscViewer viewer, MatType type,Mat *newmat)
1847 {
1848   Mat            A;
1849   PetscScalar    *vals,*svals;
1850   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1851   MPI_Status     status;
1852   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz;
1853   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1854   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1855   PetscInt       i,nz,j,rstart,rend;
1856   int            fd;
1857   PetscErrorCode ierr;
1858 
1859   PetscFunctionBegin;
1860   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1861   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1862   if (!rank) {
1863     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1864     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1865     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1866   }
1867 
1868   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1869   M = header[1]; N = header[2]; nz = header[3];
1870 
1871   /*
1872        Handle case where matrix is stored on disk as a dense matrix
1873   */
1874   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1875     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,type,newmat);CHKERRQ(ierr);
1876     PetscFunctionReturn(0);
1877   }
1878 
1879   /* determine ownership of all rows */
1880   m          = PetscMPIIntCast(M/size + ((M % size) > rank));
1881   ierr       = PetscMalloc((size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr);
1882   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1883   rowners[0] = 0;
1884   for (i=2; i<=size; i++) {
1885     rowners[i] += rowners[i-1];
1886   }
1887   rstart = rowners[rank];
1888   rend   = rowners[rank+1];
1889 
1890   /* distribute row lengths to all processors */
1891   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&ourlens);CHKERRQ(ierr);
1892   offlens = ourlens + (rend-rstart);
1893   if (!rank) {
1894     ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
1895     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1896     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
1897     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1898     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1899     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1900   } else {
1901     ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1902   }
1903 
1904   if (!rank) {
1905     /* calculate the number of nonzeros on each processor */
1906     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
1907     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1908     for (i=0; i<size; i++) {
1909       for (j=rowners[i]; j< rowners[i+1]; j++) {
1910         procsnz[i] += rowlengths[j];
1911       }
1912     }
1913     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1914 
1915     /* determine max buffer needed and allocate it */
1916     maxnz = 0;
1917     for (i=0; i<size; i++) {
1918       maxnz = PetscMax(maxnz,procsnz[i]);
1919     }
1920     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1921 
1922     /* read in my part of the matrix column indices  */
1923     nz = procsnz[0];
1924     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1925     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1926 
1927     /* read in every one elses and ship off */
1928     for (i=1; i<size; i++) {
1929       nz   = procsnz[i];
1930       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1931       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1932     }
1933     ierr = PetscFree(cols);CHKERRQ(ierr);
1934   } else {
1935     /* determine buffer space needed for message */
1936     nz = 0;
1937     for (i=0; i<m; i++) {
1938       nz += ourlens[i];
1939     }
1940     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1941 
1942     /* receive message of column indices*/
1943     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1944     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1945     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1946   }
1947 
1948   /* loop over local rows, determining number of off diagonal entries */
1949   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
1950   jj = 0;
1951   for (i=0; i<m; i++) {
1952     for (j=0; j<ourlens[i]; j++) {
1953       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1954       jj++;
1955     }
1956   }
1957 
1958   /* create our matrix */
1959   for (i=0; i<m; i++) {
1960     ourlens[i] -= offlens[i];
1961   }
1962   ierr = MatCreate(comm,newmat);CHKERRQ(ierr);
1963   ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1964   ierr = MatSetType(*newmat,type);CHKERRQ(ierr);
1965   ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr);
1966   A = *newmat;
1967   for (i=0; i<m; i++) {
1968     ourlens[i] += offlens[i];
1969   }
1970 
1971   if (!rank) {
1972     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1973 
1974     /* read in my part of the matrix numerical values  */
1975     nz = procsnz[0];
1976     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1977 
1978     /* insert into matrix */
1979     jj      = rstart;
1980     smycols = mycols;
1981     svals   = vals;
1982     for (i=0; i<m; i++) {
1983       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1984       smycols += ourlens[i];
1985       svals   += ourlens[i];
1986       jj++;
1987     }
1988 
1989     /* read in other processors and ship out */
1990     for (i=1; i<size; i++) {
1991       nz   = procsnz[i];
1992       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1993       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
1994     }
1995     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1996   } else {
1997     /* receive numeric values */
1998     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1999 
2000     /* receive message of values*/
2001     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
2002     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2003     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2004 
2005     /* insert into matrix */
2006     jj      = rstart;
2007     smycols = mycols;
2008     svals   = vals;
2009     for (i=0; i<m; i++) {
2010       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2011       smycols += ourlens[i];
2012       svals   += ourlens[i];
2013       jj++;
2014     }
2015   }
2016   ierr = PetscFree(ourlens);CHKERRQ(ierr);
2017   ierr = PetscFree(vals);CHKERRQ(ierr);
2018   ierr = PetscFree(mycols);CHKERRQ(ierr);
2019   ierr = PetscFree(rowners);CHKERRQ(ierr);
2020 
2021   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2022   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2023   PetscFunctionReturn(0);
2024 }
2025 
2026 #undef __FUNCT__
2027 #define __FUNCT__ "MatEqual_MPIDense"
2028 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscTruth *flag)
2029 {
2030   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
2031   Mat            a,b;
2032   PetscTruth     flg;
2033   PetscErrorCode ierr;
2034 
2035   PetscFunctionBegin;
2036   a = matA->A;
2037   b = matB->A;
2038   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
2039   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
2040   PetscFunctionReturn(0);
2041 }
2042 
2043 #if defined(PETSC_HAVE_PLAPACK)
2044 
2045 #undef __FUNCT__
2046 #define __FUNCT__ "PetscPLAPACKFinalizePackage"
2047 /*@C
2048   PetscPLAPACKFinalizePackage - This function destroys everything in the Petsc interface to PLAPACK.
2049   Level: developer
2050 
2051 .keywords: Petsc, destroy, package, PLAPACK
2052 .seealso: PetscFinalize()
2053 @*/
2054 PetscErrorCode PETSC_DLLEXPORT PetscPLAPACKFinalizePackage(void)
2055 {
2056   PetscErrorCode ierr;
2057 
2058   PetscFunctionBegin;
2059   ierr = PLA_Finalize();CHKERRQ(ierr);
2060   PetscFunctionReturn(0);
2061 }
2062 
2063 #undef __FUNCT__
2064 #define __FUNCT__ "PetscPLAPACKInitializePackage"
2065 /*@C
2066   PetscPLAPACKInitializePackage - This function initializes everything in the Petsc interface to PLAPACK. It is
2067   called from PetscDLLibraryRegister() when using dynamic libraries, and on the call to PetscInitialize()
2068   when using static libraries.
2069 
2070   Input Parameter:
2071   path - The dynamic library path, or PETSC_NULL
2072 
2073   Level: developer
2074 
2075 .keywords: Petsc, initialize, package, PLAPACK
2076 .seealso: PetscInitializePackage(), PetscInitialize()
2077 @*/
2078 PetscErrorCode PETSC_DLLEXPORT PetscPLAPACKInitializePackage(const char path[])
2079 {
2080   MPI_Comm       comm = PETSC_COMM_WORLD;
2081   PetscMPIInt    size;
2082   PetscErrorCode ierr;
2083 
2084   PetscFunctionBegin;
2085   if (!PLA_Initialized(PETSC_NULL)) {
2086 
2087     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2088     Plapack_nprows = 1;
2089     Plapack_npcols = size;
2090 
2091     ierr = PetscOptionsBegin(comm,PETSC_NULL,"PLAPACK Options","Mat");CHKERRQ(ierr);
2092       ierr = PetscOptionsInt("-plapack_nprows","row dimension of 2D processor mesh","None",Plapack_nprows,&Plapack_nprows,PETSC_NULL);CHKERRQ(ierr);
2093       ierr = PetscOptionsInt("-plapack_npcols","column dimension of 2D processor mesh","None",Plapack_npcols,&Plapack_npcols,PETSC_NULL);CHKERRQ(ierr);
2094 #if defined(PETSC_USE_DEBUG)
2095       Plapack_ierror = 3;
2096 #else
2097       Plapack_ierror = 0;
2098 #endif
2099       ierr = PetscOptionsInt("-plapack_ckerror","error checking flag","None",Plapack_ierror,&Plapack_ierror,PETSC_NULL);CHKERRQ(ierr);
2100       if (Plapack_ierror){
2101 	ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_TRUE,PETSC_TRUE,PETSC_FALSE );CHKERRQ(ierr);
2102       } else {
2103 	ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_FALSE,PETSC_FALSE,PETSC_FALSE );CHKERRQ(ierr);
2104       }
2105 
2106       Plapack_nb_alg = 0;
2107       ierr = PetscOptionsInt("-plapack_nb_alg","algorithmic block size","None",Plapack_nb_alg,&Plapack_nb_alg,PETSC_NULL);CHKERRQ(ierr);
2108       if (Plapack_nb_alg) {
2109         ierr = pla_Environ_set_nb_alg (PLA_OP_ALL_ALG,Plapack_nb_alg);CHKERRQ(ierr);
2110       }
2111     PetscOptionsEnd();
2112 
2113     ierr = PLA_Comm_1D_to_2D(comm,Plapack_nprows,Plapack_npcols,&Plapack_comm_2d);CHKERRQ(ierr);
2114     ierr = PLA_Init(Plapack_comm_2d);CHKERRQ(ierr);
2115     ierr = PetscRegisterFinalize(PetscPLAPACKFinalizePackage);CHKERRQ(ierr);
2116   }
2117   PetscFunctionReturn(0);
2118 }
2119 
2120 
2121 #endif
2122