xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 79b0a62de21de044427c1942db46c1b7385760ac)
1 #define PETSCMAT_DLL
2 
3 /*
4    Basic functions for basic parallel dense matrices.
5 */
6 
7 
8 #include "src/mat/impls/dense/mpi/mpidense.h"    /*I   "petscmat.h"  I*/
9 #if defined(PETSC_HAVE_PLAPACK)
10 static PetscMPIInt Plapack_nprows,Plapack_npcols,Plapack_ierror,Plapack_nb_alg;
11 static MPI_Comm Plapack_comm_2d;
12 #endif
13 
14 #undef __FUNCT__
15 #define __FUNCT__ "MatDenseGetLocalMatrix"
16 /*@
17 
18       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
19               matrix that represents the operator. For sequential matrices it returns itself.
20 
21     Input Parameter:
22 .      A - the Seq or MPI dense matrix
23 
24     Output Parameter:
25 .      B - the inner matrix
26 
27     Level: intermediate
28 
29 @*/
30 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
31 {
32   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
33   PetscErrorCode ierr;
34   PetscTruth     flg;
35 
36   PetscFunctionBegin;
37   ierr = PetscTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
38   if (flg) {
39     *B = mat->A;
40   } else {
41     *B = A;
42   }
43   PetscFunctionReturn(0);
44 }
45 
46 #undef __FUNCT__
47 #define __FUNCT__ "MatGetRow_MPIDense"
48 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
49 {
50   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
51   PetscErrorCode ierr;
52   PetscInt       lrow,rstart = A->rmap.rstart,rend = A->rmap.rend;
53 
54   PetscFunctionBegin;
55   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows")
56   lrow = row - rstart;
57   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr);
58   PetscFunctionReturn(0);
59 }
60 
61 #undef __FUNCT__
62 #define __FUNCT__ "MatRestoreRow_MPIDense"
63 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
64 {
65   PetscErrorCode ierr;
66 
67   PetscFunctionBegin;
68   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
69   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
70   PetscFunctionReturn(0);
71 }
72 
73 EXTERN_C_BEGIN
74 #undef __FUNCT__
75 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense"
76 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B)
77 {
78   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
79   PetscErrorCode ierr;
80   PetscInt       m = A->rmap.n,rstart = A->rmap.rstart;
81   PetscScalar    *array;
82   MPI_Comm       comm;
83 
84   PetscFunctionBegin;
85   if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported.");
86 
87   /* The reuse aspect is not implemented efficiently */
88   if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);}
89 
90   ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
91   ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr);
92   ierr = MatCreate(comm,B);CHKERRQ(ierr);
93   ierr = MatSetSizes(*B,m,m,m,m);CHKERRQ(ierr);
94   ierr = MatSetType(*B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
95   ierr = MatSeqDenseSetPreallocation(*B,array+m*rstart);CHKERRQ(ierr);
96   ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr);
97   ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
98   ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
99 
100   *iscopy = PETSC_TRUE;
101   PetscFunctionReturn(0);
102 }
103 EXTERN_C_END
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatSetValues_MPIDense"
107 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
108 {
109   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
110   PetscErrorCode ierr;
111   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend,row;
112   PetscTruth     roworiented = A->roworiented;
113 
114   PetscFunctionBegin;
115   for (i=0; i<m; i++) {
116     if (idxm[i] < 0) continue;
117     if (idxm[i] >= mat->rmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
118     if (idxm[i] >= rstart && idxm[i] < rend) {
119       row = idxm[i] - rstart;
120       if (roworiented) {
121         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
122       } else {
123         for (j=0; j<n; j++) {
124           if (idxn[j] < 0) continue;
125           if (idxn[j] >= mat->cmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
126           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
127         }
128       }
129     } else {
130       if (!A->donotstash) {
131         if (roworiented) {
132           ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr);
133         } else {
134           ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr);
135         }
136       }
137     }
138   }
139   PetscFunctionReturn(0);
140 }
141 
142 #undef __FUNCT__
143 #define __FUNCT__ "MatGetValues_MPIDense"
144 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
145 {
146   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
147   PetscErrorCode ierr;
148   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend,row;
149 
150   PetscFunctionBegin;
151   for (i=0; i<m; i++) {
152     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
153     if (idxm[i] >= mat->rmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
154     if (idxm[i] >= rstart && idxm[i] < rend) {
155       row = idxm[i] - rstart;
156       for (j=0; j<n; j++) {
157         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
158         if (idxn[j] >= mat->cmap.N) {
159           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
160         }
161         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
162       }
163     } else {
164       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
165     }
166   }
167   PetscFunctionReturn(0);
168 }
169 
170 #undef __FUNCT__
171 #define __FUNCT__ "MatGetArray_MPIDense"
172 PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[])
173 {
174   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
175   PetscErrorCode ierr;
176 
177   PetscFunctionBegin;
178   ierr = MatGetArray(a->A,array);CHKERRQ(ierr);
179   PetscFunctionReturn(0);
180 }
181 
182 #undef __FUNCT__
183 #define __FUNCT__ "MatGetSubMatrix_MPIDense"
184 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B)
185 {
186   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data,*newmatd;
187   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
188   PetscErrorCode ierr;
189   PetscInt       i,j,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols;
190   PetscScalar    *av,*bv,*v = lmat->v;
191   Mat            newmat;
192 
193   PetscFunctionBegin;
194   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
195   ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr);
196   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
197   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
198 
199   /* No parallel redistribution currently supported! Should really check each index set
200      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
201      original matrix! */
202 
203   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
204   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
205 
206   /* Check submatrix call */
207   if (scall == MAT_REUSE_MATRIX) {
208     /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
209     /* Really need to test rows and column sizes! */
210     newmat = *B;
211   } else {
212     /* Create and fill new matrix */
213     ierr = MatCreate(((PetscObject)A)->comm,&newmat);CHKERRQ(ierr);
214     ierr = MatSetSizes(newmat,nrows,cs,PETSC_DECIDE,ncols);CHKERRQ(ierr);
215     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
216     ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr);
217   }
218 
219   /* Now extract the data pointers and do the copy, column at a time */
220   newmatd = (Mat_MPIDense*)newmat->data;
221   bv      = ((Mat_SeqDense *)newmatd->A->data)->v;
222 
223   for (i=0; i<ncols; i++) {
224     av = v + nlrows*icol[i];
225     for (j=0; j<nrows; j++) {
226       *bv++ = av[irow[j] - rstart];
227     }
228   }
229 
230   /* Assemble the matrices so that the correct flags are set */
231   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
232   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
233 
234   /* Free work space */
235   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
236   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
237   *B = newmat;
238   PetscFunctionReturn(0);
239 }
240 
241 #undef __FUNCT__
242 #define __FUNCT__ "MatRestoreArray_MPIDense"
243 PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[])
244 {
245   PetscFunctionBegin;
246   PetscFunctionReturn(0);
247 }
248 
249 #undef __FUNCT__
250 #define __FUNCT__ "MatAssemblyBegin_MPIDense"
251 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
252 {
253   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
254   MPI_Comm       comm = ((PetscObject)mat)->comm;
255   PetscErrorCode ierr;
256   PetscInt       nstash,reallocs;
257   InsertMode     addv;
258 
259   PetscFunctionBegin;
260   /* make sure all processors are either in INSERTMODE or ADDMODE */
261   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
262   if (addv == (ADD_VALUES|INSERT_VALUES)) {
263     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
264   }
265   mat->insertmode = addv; /* in case this processor had no cache */
266 
267   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap.range);CHKERRQ(ierr);
268   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
269   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
270   PetscFunctionReturn(0);
271 }
272 
273 #undef __FUNCT__
274 #define __FUNCT__ "MatAssemblyEnd_MPIDense"
275 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
276 {
277   Mat_MPIDense    *mdn=(Mat_MPIDense*)mat->data;
278   PetscErrorCode  ierr;
279   PetscInt        i,*row,*col,flg,j,rstart,ncols;
280   PetscMPIInt     n;
281   PetscScalar     *val;
282   InsertMode      addv=mat->insertmode;
283 
284   PetscFunctionBegin;
285   /*  wait on receives */
286   while (1) {
287     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
288     if (!flg) break;
289 
290     for (i=0; i<n;) {
291       /* Now identify the consecutive vals belonging to the same row */
292       for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
293       if (j < n) ncols = j-i;
294       else       ncols = n-i;
295       /* Now assemble all these values with a single function call */
296       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
297       i = j;
298     }
299   }
300   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
301 
302   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
303   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
304 
305   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
306     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
307   }
308   PetscFunctionReturn(0);
309 }
310 
311 #undef __FUNCT__
312 #define __FUNCT__ "MatZeroEntries_MPIDense"
313 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
314 {
315   PetscErrorCode ierr;
316   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
317 
318   PetscFunctionBegin;
319   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
320   PetscFunctionReturn(0);
321 }
322 
323 /* the code does not do the diagonal entries correctly unless the
324    matrix is square and the column and row owerships are identical.
325    This is a BUG. The only way to fix it seems to be to access
326    mdn->A and mdn->B directly and not through the MatZeroRows()
327    routine.
328 */
329 #undef __FUNCT__
330 #define __FUNCT__ "MatZeroRows_MPIDense"
331 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
332 {
333   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
334   PetscErrorCode ierr;
335   PetscInt       i,*owners = A->rmap.range;
336   PetscInt       *nprocs,j,idx,nsends;
337   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
338   PetscInt       *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source;
339   PetscInt       *lens,*lrows,*values;
340   PetscMPIInt    n,imdex,rank = l->rank,size = l->size;
341   MPI_Comm       comm = ((PetscObject)A)->comm;
342   MPI_Request    *send_waits,*recv_waits;
343   MPI_Status     recv_status,*send_status;
344   PetscTruth     found;
345 
346   PetscFunctionBegin;
347   /*  first count number of contributors to each processor */
348   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
349   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
350   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
351   for (i=0; i<N; i++) {
352     idx = rows[i];
353     found = PETSC_FALSE;
354     for (j=0; j<size; j++) {
355       if (idx >= owners[j] && idx < owners[j+1]) {
356         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
357       }
358     }
359     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
360   }
361   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
362 
363   /* inform other processors of number of messages and max length*/
364   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
365 
366   /* post receives:   */
367   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
368   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
369   for (i=0; i<nrecvs; i++) {
370     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
371   }
372 
373   /* do sends:
374       1) starts[i] gives the starting index in svalues for stuff going to
375          the ith processor
376   */
377   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
378   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
379   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
380   starts[0]  = 0;
381   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
382   for (i=0; i<N; i++) {
383     svalues[starts[owner[i]]++] = rows[i];
384   }
385 
386   starts[0] = 0;
387   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
388   count = 0;
389   for (i=0; i<size; i++) {
390     if (nprocs[2*i+1]) {
391       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
392     }
393   }
394   ierr = PetscFree(starts);CHKERRQ(ierr);
395 
396   base = owners[rank];
397 
398   /*  wait on receives */
399   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
400   source = lens + nrecvs;
401   count  = nrecvs; slen = 0;
402   while (count) {
403     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
404     /* unpack receives into our local space */
405     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
406     source[imdex]  = recv_status.MPI_SOURCE;
407     lens[imdex]    = n;
408     slen += n;
409     count--;
410   }
411   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
412 
413   /* move the data into the send scatter */
414   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
415   count = 0;
416   for (i=0; i<nrecvs; i++) {
417     values = rvalues + i*nmax;
418     for (j=0; j<lens[i]; j++) {
419       lrows[count++] = values[j] - base;
420     }
421   }
422   ierr = PetscFree(rvalues);CHKERRQ(ierr);
423   ierr = PetscFree(lens);CHKERRQ(ierr);
424   ierr = PetscFree(owner);CHKERRQ(ierr);
425   ierr = PetscFree(nprocs);CHKERRQ(ierr);
426 
427   /* actually zap the local rows */
428   ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
429   ierr = PetscFree(lrows);CHKERRQ(ierr);
430 
431   /* wait on sends */
432   if (nsends) {
433     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
434     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
435     ierr = PetscFree(send_status);CHKERRQ(ierr);
436   }
437   ierr = PetscFree(send_waits);CHKERRQ(ierr);
438   ierr = PetscFree(svalues);CHKERRQ(ierr);
439 
440   PetscFunctionReturn(0);
441 }
442 
443 #undef __FUNCT__
444 #define __FUNCT__ "MatMult_MPIDense"
445 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
446 {
447   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
448   PetscErrorCode ierr;
449 
450   PetscFunctionBegin;
451   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
452   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
453   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
454   PetscFunctionReturn(0);
455 }
456 
457 #undef __FUNCT__
458 #define __FUNCT__ "MatMultAdd_MPIDense"
459 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
460 {
461   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
462   PetscErrorCode ierr;
463 
464   PetscFunctionBegin;
465   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
466   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
467   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
468   PetscFunctionReturn(0);
469 }
470 
471 #undef __FUNCT__
472 #define __FUNCT__ "MatMultTranspose_MPIDense"
473 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
474 {
475   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
476   PetscErrorCode ierr;
477   PetscScalar    zero = 0.0;
478 
479   PetscFunctionBegin;
480   ierr = VecSet(yy,zero);CHKERRQ(ierr);
481   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
482   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
483   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
484   PetscFunctionReturn(0);
485 }
486 
487 #undef __FUNCT__
488 #define __FUNCT__ "MatMultTransposeAdd_MPIDense"
489 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
490 {
491   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
492   PetscErrorCode ierr;
493 
494   PetscFunctionBegin;
495   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
496   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
497   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
498   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
499   PetscFunctionReturn(0);
500 }
501 
502 #undef __FUNCT__
503 #define __FUNCT__ "MatGetDiagonal_MPIDense"
504 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
505 {
506   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
507   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
508   PetscErrorCode ierr;
509   PetscInt       len,i,n,m = A->rmap.n,radd;
510   PetscScalar    *x,zero = 0.0;
511 
512   PetscFunctionBegin;
513   ierr = VecSet(v,zero);CHKERRQ(ierr);
514   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
515   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
516   if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
517   len  = PetscMin(a->A->rmap.n,a->A->cmap.n);
518   radd = A->rmap.rstart*m;
519   for (i=0; i<len; i++) {
520     x[i] = aloc->v[radd + i*m + i];
521   }
522   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
523   PetscFunctionReturn(0);
524 }
525 
526 #undef __FUNCT__
527 #define __FUNCT__ "MatDestroy_MPIDense"
528 PetscErrorCode MatDestroy_MPIDense(Mat mat)
529 {
530   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
531   PetscErrorCode ierr;
532 #if defined(PETSC_HAVE_PLAPACK)
533   Mat_Plapack   *lu=(Mat_Plapack*)(mat->spptr);
534 #endif
535 
536   PetscFunctionBegin;
537 
538 #if defined(PETSC_USE_LOG)
539   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N);
540 #endif
541   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
542   ierr = MatDestroy(mdn->A);CHKERRQ(ierr);
543   if (mdn->lvec)   {ierr = VecDestroy(mdn->lvec);CHKERRQ(ierr);}
544   if (mdn->Mvctx)  {ierr = VecScatterDestroy(mdn->Mvctx);CHKERRQ(ierr);}
545 #if defined(PETSC_HAVE_PLAPACK)
546   if (lu) {
547     ierr = PLA_Obj_free(&lu->A);CHKERRQ(ierr);
548     ierr = PLA_Obj_free (&lu->pivots);CHKERRQ(ierr);
549     ierr = PLA_Temp_free(&lu->templ);CHKERRQ(ierr);
550 
551     if (lu->is_pla) {
552       ierr = ISDestroy(lu->is_pla);CHKERRQ(ierr);
553       ierr = ISDestroy(lu->is_petsc);CHKERRQ(ierr);
554       ierr = VecScatterDestroy(lu->ctx);CHKERRQ(ierr);
555     }
556   }
557 #endif
558 
559   ierr = PetscFree(mdn);CHKERRQ(ierr);
560   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
561   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
562   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
563   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
564   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
565   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
566   PetscFunctionReturn(0);
567 }
568 
569 #undef __FUNCT__
570 #define __FUNCT__ "MatView_MPIDense_Binary"
571 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
572 {
573   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
574   PetscErrorCode    ierr;
575   PetscViewerFormat format;
576   int               fd;
577   PetscInt          header[4],mmax,N = mat->cmap.N,i,j,m,k;
578   PetscMPIInt       rank,tag  = ((PetscObject)viewer)->tag,size;
579   PetscScalar       *work,*v,*vv;
580   Mat_SeqDense      *a = (Mat_SeqDense*)mdn->A->data;
581   MPI_Status        status;
582 
583   PetscFunctionBegin;
584   if (mdn->size == 1) {
585     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
586   } else {
587     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
588     ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
589     ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
590 
591     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
592     if (format == PETSC_VIEWER_BINARY_NATIVE) {
593 
594       if (!rank) {
595 	/* store the matrix as a dense matrix */
596 	header[0] = MAT_FILE_COOKIE;
597 	header[1] = mat->rmap.N;
598 	header[2] = N;
599 	header[3] = MATRIX_BINARY_FORMAT_DENSE;
600 	ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
601 
602 	/* get largest work array needed for transposing array */
603         mmax = mat->rmap.n;
604         for (i=1; i<size; i++) {
605           mmax = PetscMax(mmax,mat->rmap.range[i+1] - mat->rmap.range[i]);
606         }
607 	ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&work);CHKERRQ(ierr);
608 
609 	/* write out local array, by rows */
610         m    = mat->rmap.n;
611 	v    = a->v;
612         for (j=0; j<N; j++) {
613           for (i=0; i<m; i++) {
614 	    work[j + i*N] = *v++;
615 	  }
616 	}
617 	ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
618         /* get largest work array to receive messages from other processes, excludes process zero */
619         mmax = 0;
620         for (i=1; i<size; i++) {
621           mmax = PetscMax(mmax,mat->rmap.range[i+1] - mat->rmap.range[i]);
622         }
623 	ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&vv);CHKERRQ(ierr);
624         v = vv;
625         for (k=1; k<size; k++) {
626           m    = mat->rmap.range[k+1] - mat->rmap.range[k];
627           ierr = MPI_Recv(v,m*N,MPIU_SCALAR,k,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
628 
629           for (j=0; j<N; j++) {
630             for (i=0; i<m; i++) {
631               work[j + i*N] = *v++;
632 	    }
633 	  }
634 	  ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
635         }
636         ierr = PetscFree(work);CHKERRQ(ierr);
637         ierr = PetscFree(vv);CHKERRQ(ierr);
638       } else {
639         ierr = MPI_Send(a->v,mat->rmap.n*mat->cmap.N,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
640       }
641     }
642   }
643   PetscFunctionReturn(0);
644 }
645 
646 #undef __FUNCT__
647 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket"
648 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
649 {
650   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
651   PetscErrorCode    ierr;
652   PetscMPIInt       size = mdn->size,rank = mdn->rank;
653   PetscViewerType   vtype;
654   PetscTruth        iascii,isdraw;
655   PetscViewer       sviewer;
656   PetscViewerFormat format;
657 #if defined(PETSC_HAVE_PLAPACK)
658   Mat_Plapack       *lu=(Mat_Plapack*)(mat->spptr);
659 #endif
660 
661   PetscFunctionBegin;
662   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
663   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
664   if (iascii) {
665     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
666     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
667     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
668       MatInfo info;
669       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
670       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap.n,
671                    (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
672       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
673       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
674 #if defined(PETSC_HAVE_PLAPACK)
675       ierr = PetscViewerASCIIPrintf(viewer,"PLAPACK run parameters:\n");CHKERRQ(ierr);
676       ierr = PetscViewerASCIIPrintf(viewer,"  Processor mesh: nprows %d, npcols %d\n",Plapack_nprows, Plapack_npcols);CHKERRQ(ierr);
677       ierr = PetscViewerASCIIPrintf(viewer,"  Distr. block size nb: %d \n",lu->nb);CHKERRQ(ierr);
678       ierr = PetscViewerASCIIPrintf(viewer,"  Error checking: %d\n",Plapack_ierror);CHKERRQ(ierr);
679       ierr = PetscViewerASCIIPrintf(viewer,"  Algorithmic block size: %d\n",Plapack_nb_alg);CHKERRQ(ierr);
680 #endif
681       PetscFunctionReturn(0);
682     } else if (format == PETSC_VIEWER_ASCII_INFO) {
683       PetscFunctionReturn(0);
684     }
685   } else if (isdraw) {
686     PetscDraw  draw;
687     PetscTruth isnull;
688 
689     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
690     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
691     if (isnull) PetscFunctionReturn(0);
692   }
693 
694   if (size == 1) {
695     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
696   } else {
697     /* assemble the entire matrix onto first processor. */
698     Mat         A;
699     PetscInt    M = mat->rmap.N,N = mat->cmap.N,m,row,i,nz;
700     PetscInt    *cols;
701     PetscScalar *vals;
702 
703     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
704     if (!rank) {
705       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
706     } else {
707       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
708     }
709     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
710     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
711     ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL);
712     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
713 
714     /* Copy the matrix ... This isn't the most efficient means,
715        but it's quick for now */
716     A->insertmode = INSERT_VALUES;
717     row = mat->rmap.rstart; m = mdn->A->rmap.n;
718     for (i=0; i<m; i++) {
719       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
720       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
721       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
722       row++;
723     }
724 
725     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
726     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
727     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
728     if (!rank) {
729       ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
730     }
731     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
732     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
733     ierr = MatDestroy(A);CHKERRQ(ierr);
734   }
735   PetscFunctionReturn(0);
736 }
737 
738 #undef __FUNCT__
739 #define __FUNCT__ "MatView_MPIDense"
740 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
741 {
742   PetscErrorCode ierr;
743   PetscTruth     iascii,isbinary,isdraw,issocket;
744 
745   PetscFunctionBegin;
746 
747   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
748   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
749   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
750   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
751 
752   if (iascii || issocket || isdraw) {
753     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
754   } else if (isbinary) {
755     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
756   } else {
757     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name);
758   }
759   PetscFunctionReturn(0);
760 }
761 
762 #undef __FUNCT__
763 #define __FUNCT__ "MatGetInfo_MPIDense"
764 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
765 {
766   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
767   Mat            mdn = mat->A;
768   PetscErrorCode ierr;
769   PetscReal      isend[5],irecv[5];
770 
771   PetscFunctionBegin;
772   info->rows_global    = (double)A->rmap.N;
773   info->columns_global = (double)A->cmap.N;
774   info->rows_local     = (double)A->rmap.n;
775   info->columns_local  = (double)A->cmap.N;
776   info->block_size     = 1.0;
777   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
778   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
779   isend[3] = info->memory;  isend[4] = info->mallocs;
780   if (flag == MAT_LOCAL) {
781     info->nz_used      = isend[0];
782     info->nz_allocated = isend[1];
783     info->nz_unneeded  = isend[2];
784     info->memory       = isend[3];
785     info->mallocs      = isend[4];
786   } else if (flag == MAT_GLOBAL_MAX) {
787     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr);
788     info->nz_used      = irecv[0];
789     info->nz_allocated = irecv[1];
790     info->nz_unneeded  = irecv[2];
791     info->memory       = irecv[3];
792     info->mallocs      = irecv[4];
793   } else if (flag == MAT_GLOBAL_SUM) {
794     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
795     info->nz_used      = irecv[0];
796     info->nz_allocated = irecv[1];
797     info->nz_unneeded  = irecv[2];
798     info->memory       = irecv[3];
799     info->mallocs      = irecv[4];
800   }
801   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
802   info->fill_ratio_needed = 0;
803   info->factor_mallocs    = 0;
804   PetscFunctionReturn(0);
805 }
806 
807 #undef __FUNCT__
808 #define __FUNCT__ "MatSetOption_MPIDense"
809 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscTruth flg)
810 {
811   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
812   PetscErrorCode ierr;
813 
814   PetscFunctionBegin;
815   switch (op) {
816   case MAT_NEW_NONZERO_LOCATIONS:
817   case MAT_NEW_NONZERO_LOCATION_ERR:
818   case MAT_NEW_NONZERO_ALLOCATION_ERR:
819     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
820     break;
821   case MAT_ROW_ORIENTED:
822     a->roworiented = flg;
823     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
824     break;
825   case MAT_NEW_DIAGONALS:
826   case MAT_USE_HASH_TABLE:
827     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
828     break;
829   case MAT_IGNORE_OFF_PROC_ENTRIES:
830     a->donotstash = flg;
831     break;
832   case MAT_SYMMETRIC:
833   case MAT_STRUCTURALLY_SYMMETRIC:
834   case MAT_HERMITIAN:
835   case MAT_SYMMETRY_ETERNAL:
836   case MAT_IGNORE_LOWER_TRIANGULAR:
837     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
838     break;
839   default:
840     SETERRQ1(PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
841   }
842   PetscFunctionReturn(0);
843 }
844 
845 
846 #undef __FUNCT__
847 #define __FUNCT__ "MatDiagonalScale_MPIDense"
848 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
849 {
850   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
851   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
852   PetscScalar    *l,*r,x,*v;
853   PetscErrorCode ierr;
854   PetscInt       i,j,s2a,s3a,s2,s3,m=mdn->A->rmap.n,n=mdn->A->cmap.n;
855 
856   PetscFunctionBegin;
857   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
858   if (ll) {
859     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
860     if (s2a != s2) SETERRQ2(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
861     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
862     for (i=0; i<m; i++) {
863       x = l[i];
864       v = mat->v + i;
865       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
866     }
867     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
868     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
869   }
870   if (rr) {
871     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
872     if (s3a != s3) SETERRQ2(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
873     ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
874     ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
875     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
876     for (i=0; i<n; i++) {
877       x = r[i];
878       v = mat->v + i*m;
879       for (j=0; j<m; j++) { (*v++) *= x;}
880     }
881     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
882     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
883   }
884   PetscFunctionReturn(0);
885 }
886 
887 #undef __FUNCT__
888 #define __FUNCT__ "MatNorm_MPIDense"
889 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
890 {
891   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
892   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
893   PetscErrorCode ierr;
894   PetscInt       i,j;
895   PetscReal      sum = 0.0;
896   PetscScalar    *v = mat->v;
897 
898   PetscFunctionBegin;
899   if (mdn->size == 1) {
900     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
901   } else {
902     if (type == NORM_FROBENIUS) {
903       for (i=0; i<mdn->A->cmap.n*mdn->A->rmap.n; i++) {
904 #if defined(PETSC_USE_COMPLEX)
905         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
906 #else
907         sum += (*v)*(*v); v++;
908 #endif
909       }
910       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
911       *nrm = sqrt(*nrm);
912       ierr = PetscLogFlops(2*mdn->A->cmap.n*mdn->A->rmap.n);CHKERRQ(ierr);
913     } else if (type == NORM_1) {
914       PetscReal *tmp,*tmp2;
915       ierr = PetscMalloc(2*A->cmap.N*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
916       tmp2 = tmp + A->cmap.N;
917       ierr = PetscMemzero(tmp,2*A->cmap.N*sizeof(PetscReal));CHKERRQ(ierr);
918       *nrm = 0.0;
919       v = mat->v;
920       for (j=0; j<mdn->A->cmap.n; j++) {
921         for (i=0; i<mdn->A->rmap.n; i++) {
922           tmp[j] += PetscAbsScalar(*v);  v++;
923         }
924       }
925       ierr = MPI_Allreduce(tmp,tmp2,A->cmap.N,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
926       for (j=0; j<A->cmap.N; j++) {
927         if (tmp2[j] > *nrm) *nrm = tmp2[j];
928       }
929       ierr = PetscFree(tmp);CHKERRQ(ierr);
930       ierr = PetscLogFlops(A->cmap.n*A->rmap.n);CHKERRQ(ierr);
931     } else if (type == NORM_INFINITY) { /* max row norm */
932       PetscReal ntemp;
933       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
934       ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr);
935     } else {
936       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
937     }
938   }
939   PetscFunctionReturn(0);
940 }
941 
942 #undef __FUNCT__
943 #define __FUNCT__ "MatTranspose_MPIDense"
944 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
945 {
946   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
947   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
948   Mat            B;
949   PetscInt       M = A->rmap.N,N = A->cmap.N,m,n,*rwork,rstart = A->rmap.rstart;
950   PetscErrorCode ierr;
951   PetscInt       j,i;
952   PetscScalar    *v;
953 
954   PetscFunctionBegin;
955   if (A == *matout && M != N) {
956     SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place");
957   }
958   if (reuse == MAT_INITIAL_MATRIX || A == *matout) {
959     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
960     ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,N,M);CHKERRQ(ierr);
961     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
962     ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr);
963   } else {
964     B = *matout;
965   }
966 
967   m = a->A->rmap.n; n = a->A->cmap.n; v = Aloc->v;
968   ierr = PetscMalloc(m*sizeof(PetscInt),&rwork);CHKERRQ(ierr);
969   for (i=0; i<m; i++) rwork[i] = rstart + i;
970   for (j=0; j<n; j++) {
971     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
972     v   += m;
973   }
974   ierr = PetscFree(rwork);CHKERRQ(ierr);
975   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
976   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
977   if (*matout != A) {
978     *matout = B;
979   } else {
980     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
981   }
982   PetscFunctionReturn(0);
983 }
984 
985 #include "petscblaslapack.h"
986 #undef __FUNCT__
987 #define __FUNCT__ "MatScale_MPIDense"
988 PetscErrorCode MatScale_MPIDense(Mat inA,PetscScalar alpha)
989 {
990   Mat_MPIDense   *A = (Mat_MPIDense*)inA->data;
991   Mat_SeqDense   *a = (Mat_SeqDense*)A->A->data;
992   PetscScalar    oalpha = alpha;
993   PetscErrorCode ierr;
994   PetscBLASInt   one = 1,nz = PetscBLASIntCast(inA->rmap.n*inA->cmap.N);
995 
996   PetscFunctionBegin;
997   BLASscal_(&nz,&oalpha,a->v,&one);
998   ierr = PetscLogFlops(nz);CHKERRQ(ierr);
999   PetscFunctionReturn(0);
1000 }
1001 
1002 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
1003 
1004 #undef __FUNCT__
1005 #define __FUNCT__ "MatSetUpPreallocation_MPIDense"
1006 PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A)
1007 {
1008   PetscErrorCode ierr;
1009 
1010   PetscFunctionBegin;
1011   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
1012   PetscFunctionReturn(0);
1013 }
1014 
1015 #if defined(PETSC_HAVE_PLAPACK)
1016 
1017 #undef __FUNCT__
1018 #define __FUNCT__ "MatMPIDenseCreatePlapack"
1019 PetscErrorCode MatMPIDenseCreatePlapack(Mat A)
1020 {
1021   Mat_Plapack    *lu;
1022   PetscErrorCode ierr;
1023   PetscInt       M=A->rmap.N;
1024   MPI_Comm       comm=((PetscObject)A)->comm;
1025   PetscMPIInt    size;
1026 
1027   PetscFunctionBegin;
1028   if (A->spptr) PetscFunctionReturn(0);
1029   ierr = PetscNewLog(A,Mat_Plapack,&lu);CHKERRQ(ierr);
1030   A->spptr = (void*)lu;
1031 
1032   lu = (Mat_Plapack*)(A->spptr);
1033 
1034   /* Set default Plapack parameters */
1035   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1036   lu->nb     = M/size;
1037   if (M - lu->nb*size) lu->nb++; /* without cyclic distribution */
1038 
1039   /* Set runtime options */
1040   ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"PLAPACK Options","Mat");CHKERRQ(ierr);
1041     ierr = PetscOptionsInt("-mat_plapack_nb","block size of template vector","None",lu->nb,&lu->nb,PETSC_NULL);CHKERRQ(ierr);
1042   PetscOptionsEnd();
1043 
1044   /* Create object distribution template */
1045   lu->templ = NULL;
1046   ierr = PLA_Temp_create(lu->nb, 0, &lu->templ);CHKERRQ(ierr);
1047 
1048   /* Set the datatype */
1049 #if defined(PETSC_USE_COMPLEX)
1050   lu->datatype = MPI_DOUBLE_COMPLEX;
1051 #else
1052   lu->datatype = MPI_DOUBLE;
1053 #endif
1054 
1055   ierr = PLA_Matrix_create(lu->datatype,M,A->cmap.N,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);CHKERRQ(ierr);
1056 
1057 
1058   lu->pla_solved     = PETSC_FALSE; /* MatSolve_Plapack() is called yet */
1059   PetscFunctionReturn(0);
1060 }
1061 
1062 #undef __FUNCT__
1063 #define __FUNCT__ "MatMPIDenseCopyToPlapack"
1064 PetscErrorCode MatMPIDenseCopyToPlapack(Mat A,Mat F)
1065 {
1066   Mat_Plapack    *lu = (Mat_Plapack*)(F)->spptr;
1067   PetscErrorCode ierr;
1068   PetscInt       M=A->cmap.N,m=A->rmap.n,rstart;
1069   PetscScalar    *array;
1070   PetscReal      one = 1.0;
1071 
1072   PetscFunctionBegin;
1073   /* Copy A into F->lu->A */
1074   ierr = PLA_Obj_set_to_zero(lu->A);CHKERRQ(ierr);
1075   ierr = PLA_API_begin();CHKERRQ(ierr);
1076   ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr);
1077   ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr);
1078   ierr = MatGetArray(A,&array);CHKERRQ(ierr);
1079   ierr = PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);CHKERRQ(ierr);
1080   ierr = MatRestoreArray(A,&array);CHKERRQ(ierr);
1081   ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr);
1082   ierr = PLA_API_end();CHKERRQ(ierr);
1083   lu->rstart = rstart;
1084   PetscFunctionReturn(0);
1085 }
1086 
1087 #undef __FUNCT__
1088 #define __FUNCT__ "MatMPIDenseCopyFromPlapack"
1089 PetscErrorCode MatMPIDenseCopyFromPlapack(Mat F,Mat A)
1090 {
1091   Mat_Plapack    *lu = (Mat_Plapack*)(F)->spptr;
1092   PetscErrorCode ierr;
1093   PetscInt       M=A->cmap.N,m=A->rmap.n,rstart;
1094   PetscScalar    *array;
1095   PetscReal      one = 1.0;
1096 
1097   PetscFunctionBegin;
1098   /* Copy F into A->lu->A */
1099   ierr = MatZeroEntries(A);CHKERRQ(ierr);
1100   ierr = PLA_API_begin();CHKERRQ(ierr);
1101   ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr);
1102   ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr);
1103   ierr = MatGetArray(A,&array);CHKERRQ(ierr);
1104   ierr = PLA_API_axpy_global_to_matrix(m,M, &one,lu->A,rstart,0,(void *)array,m);CHKERRQ(ierr);
1105   ierr = MatRestoreArray(A,&array);CHKERRQ(ierr);
1106   ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr);
1107   ierr = PLA_API_end();CHKERRQ(ierr);
1108   lu->rstart = rstart;
1109   PetscFunctionReturn(0);
1110 }
1111 
1112 #undef __FUNCT__
1113 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIDense"
1114 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1115 {
1116   PetscErrorCode ierr;
1117   Mat_Plapack    *luA = (Mat_Plapack*)A->spptr;
1118   Mat_Plapack    *luB = (Mat_Plapack*)B->spptr;
1119   Mat_Plapack    *luC = (Mat_Plapack*)C->spptr;
1120   PLA_Obj        alpha = NULL,beta = NULL;
1121 
1122   PetscFunctionBegin;
1123   ierr = MatMPIDenseCopyToPlapack(A,A);CHKERRQ(ierr);
1124   ierr = MatMPIDenseCopyToPlapack(B,B);CHKERRQ(ierr);
1125 
1126   /* do the multiply in PLA  */
1127   ierr = PLA_Create_constants_conf_to(luA->A,NULL,NULL,&alpha);CHKERRQ(ierr);
1128   ierr = PLA_Create_constants_conf_to(luC->A,NULL,&beta,NULL);CHKERRQ(ierr);
1129   CHKMEMQ;
1130 
1131   ierr = PLA_Gemm(PLA_NO_TRANSPOSE,PLA_NO_TRANSPOSE,alpha,luA->A,luB->A,beta,luC->A);CHKERRQ(ierr);
1132   CHKMEMQ;
1133   ierr = PLA_Obj_free(&alpha);CHKERRQ(ierr);
1134   ierr = PLA_Obj_free(&beta);CHKERRQ(ierr);
1135 
1136   ierr = MatMPIDenseCopyFromPlapack(C,C);CHKERRQ(ierr);
1137   PetscFunctionReturn(0);
1138 }
1139 
1140 #undef __FUNCT__
1141 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIDense"
1142 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1143 {
1144   PetscErrorCode ierr;
1145   PetscInt       m=A->rmap.n,n=B->cmap.n;
1146   Mat            Cmat;
1147 
1148   PetscFunctionBegin;
1149   if (A->cmap.n != B->rmap.n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap.n %d != B->rmap.n %d\n",A->cmap.n,B->rmap.n);
1150   ierr = MatCreate(((PetscObject)B)->comm,&Cmat);CHKERRQ(ierr);
1151   ierr = MatSetSizes(Cmat,m,n,A->rmap.N,B->cmap.N);CHKERRQ(ierr);
1152   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
1153   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1154   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1155   ierr = MatMPIDenseCreatePlapack(A);CHKERRQ(ierr);
1156   ierr = MatMPIDenseCreatePlapack(B);CHKERRQ(ierr);
1157   ierr = MatMPIDenseCreatePlapack(Cmat);CHKERRQ(ierr);
1158   *C = Cmat;
1159   PetscFunctionReturn(0);
1160 }
1161 
1162 #undef __FUNCT__
1163 #define __FUNCT__ "MatMatMult_MPIDense_MPIDense"
1164 PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
1165 {
1166   PetscErrorCode ierr;
1167 
1168   PetscFunctionBegin;
1169   if (scall == MAT_INITIAL_MATRIX){
1170     ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr);
1171   }
1172   ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr);
1173   PetscFunctionReturn(0);
1174 }
1175 
1176 
1177 #undef __FUNCT__
1178 #define __FUNCT__ "MatSolve_MPIDense"
1179 PetscErrorCode MatSolve_MPIDense(Mat A,Vec b,Vec x)
1180 {
1181   MPI_Comm       comm = ((PetscObject)A)->comm,dummy_comm;
1182   Mat_Plapack    *lu = (Mat_Plapack*)A->spptr;
1183   PetscErrorCode ierr;
1184   PetscInt       M=A->rmap.N,m=A->rmap.n,rstart,i,j,*idx_pla,*idx_petsc,loc_m,loc_stride;
1185   PetscScalar    *array;
1186   PetscReal      one = 1.0;
1187   PetscMPIInt    size,r_rank,r_nproc,c_rank,c_nproc;;
1188   PLA_Obj        v_pla = NULL;
1189   PetscScalar    *loc_buf;
1190   Vec            loc_x;
1191 
1192   PetscFunctionBegin;
1193   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1194 
1195   /* Create PLAPACK vector objects, then copy b into PLAPACK b */
1196   ierr = PLA_Mvector_create(lu->datatype,M,1,lu->templ,PLA_ALIGN_FIRST,&v_pla);CHKERRQ(ierr);
1197   ierr = PLA_Obj_set_to_zero(v_pla);CHKERRQ(ierr);
1198 
1199   /* Copy b into rhs_pla */
1200   ierr = PLA_API_begin();CHKERRQ(ierr);
1201   ierr = PLA_Obj_API_open(v_pla);CHKERRQ(ierr);
1202   ierr = VecGetArray(b,&array);CHKERRQ(ierr);
1203   ierr = PLA_API_axpy_vector_to_global(m,&one,(void *)array,1,v_pla,lu->rstart);CHKERRQ(ierr);
1204   ierr = VecRestoreArray(b,&array);CHKERRQ(ierr);
1205   ierr = PLA_Obj_API_close(v_pla);CHKERRQ(ierr);
1206   ierr = PLA_API_end();CHKERRQ(ierr);
1207 
1208   if (A->factor == FACTOR_LU){
1209     /* Apply the permutations to the right hand sides */
1210     ierr = PLA_Apply_pivots_to_rows (v_pla,lu->pivots);CHKERRQ(ierr);
1211 
1212     /* Solve L y = b, overwriting b with y */
1213     ierr = PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_UNIT_DIAG,lu->A,v_pla );CHKERRQ(ierr);
1214 
1215     /* Solve U x = y (=b), overwriting b with x */
1216     ierr = PLA_Trsv( PLA_UPPER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla );CHKERRQ(ierr);
1217   } else { /* FACTOR_CHOLESKY */
1218     ierr = PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla);CHKERRQ(ierr);
1219     ierr = PLA_Trsv( PLA_LOWER_TRIANGULAR,(lu->datatype == MPI_DOUBLE ? PLA_TRANSPOSE : PLA_CONJUGATE_TRANSPOSE),PLA_NONUNIT_DIAG,lu->A,v_pla);CHKERRQ(ierr);
1220   }
1221 
1222   /* Copy PLAPACK x into Petsc vector x  */
1223   ierr = PLA_Obj_local_length(v_pla, &loc_m);CHKERRQ(ierr);
1224   ierr = PLA_Obj_local_buffer(v_pla, (void**)&loc_buf);CHKERRQ(ierr);
1225   ierr = PLA_Obj_local_stride(v_pla, &loc_stride);CHKERRQ(ierr);
1226   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,loc_m*loc_stride,loc_buf,&loc_x);CHKERRQ(ierr);
1227   if (!lu->pla_solved){
1228 
1229     ierr = PLA_Temp_comm_row_info(lu->templ,&dummy_comm,&r_rank,&r_nproc);CHKERRQ(ierr);
1230     ierr = PLA_Temp_comm_col_info(lu->templ,&dummy_comm,&c_rank,&c_nproc);CHKERRQ(ierr);
1231     /* printf(" [%d] rank: %d %d, nproc: %d %d\n",rank,r_rank,c_rank,r_nproc,c_nproc); */
1232 
1233     /* Create IS and cts for VecScatterring */
1234     ierr = PLA_Obj_local_length(v_pla, &loc_m);CHKERRQ(ierr);
1235     ierr = PLA_Obj_local_stride(v_pla, &loc_stride);CHKERRQ(ierr);
1236     ierr = PetscMalloc((2*loc_m+1)*sizeof(PetscInt),&idx_pla);CHKERRQ(ierr);
1237     idx_petsc = idx_pla + loc_m;
1238 
1239     rstart = (r_rank*c_nproc+c_rank)*lu->nb;
1240     for (i=0; i<loc_m; i+=lu->nb){
1241       j = 0;
1242       while (j < lu->nb && i+j < loc_m){
1243         idx_petsc[i+j] = rstart + j; j++;
1244       }
1245       rstart += size*lu->nb;
1246     }
1247 
1248     for (i=0; i<loc_m; i++) idx_pla[i] = i*loc_stride;
1249 
1250     ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_pla,&lu->is_pla);CHKERRQ(ierr);
1251     ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_petsc,&lu->is_petsc);CHKERRQ(ierr);
1252     ierr = PetscFree(idx_pla);CHKERRQ(ierr);
1253     ierr = VecScatterCreate(loc_x,lu->is_pla,x,lu->is_petsc,&lu->ctx);CHKERRQ(ierr);
1254   }
1255   ierr = VecScatterBegin(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1256   ierr = VecScatterEnd(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1257 
1258   /* Free data */
1259   ierr = VecDestroy(loc_x);CHKERRQ(ierr);
1260   ierr = PLA_Obj_free(&v_pla);CHKERRQ(ierr);
1261 
1262   lu->pla_solved = PETSC_TRUE;
1263   PetscFunctionReturn(0);
1264 }
1265 
1266 #undef __FUNCT__
1267 #define __FUNCT__ "MatLUFactorNumeric_MPIDense"
1268 PetscErrorCode MatLUFactorNumeric_MPIDense(Mat A,MatFactorInfo *info,Mat *F)
1269 {
1270   Mat_Plapack    *lu = (Mat_Plapack*)(*F)->spptr;
1271   PetscErrorCode ierr;
1272   PetscInt       info_pla;
1273 
1274   PetscFunctionBegin;
1275   ierr = PLA_Obj_set_to_zero(lu->A);CHKERRQ(ierr);
1276 
1277   /* Copy A into F->lu->A */
1278   ierr = MatMPIDenseCopyToPlapack(A,*F);CHKERRQ(ierr);
1279 
1280   /* Factor P A -> L U overwriting lower triangular portion of A with L, upper, U */
1281   info_pla = PLA_LU(lu->A,lu->pivots);
1282   if (info_pla) SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot encountered at row %d from PLA_LU()",info_pla);
1283 
1284   (*F)->assembled    = PETSC_TRUE;
1285   PetscFunctionReturn(0);
1286 }
1287 
1288 #undef __FUNCT__
1289 #define __FUNCT__ "MatCholeskyFactorNumeric_MPIDense"
1290 PetscErrorCode MatCholeskyFactorNumeric_MPIDense(Mat A,MatFactorInfo *info,Mat *F)
1291 {
1292   Mat_Plapack    *lu = (Mat_Plapack*)(*F)->spptr;
1293   PetscErrorCode ierr;
1294   PetscInt       info_pla;
1295 
1296   PetscFunctionBegin;
1297 
1298   /* Copy A into F->lu->A */
1299   ierr = MatMPIDenseCopyToPlapack(A,*F);CHKERRQ(ierr);
1300 
1301   /* Factor P A -> Chol */
1302   info_pla = PLA_Chol(PLA_LOWER_TRIANGULAR,lu->A);
1303   if (info_pla) SETERRQ1( PETSC_ERR_MAT_CH_ZRPVT,"Nonpositive definite matrix detected at row %d from PLA_Chol()",info_pla);
1304 
1305   (*F)->assembled = PETSC_TRUE;
1306   PetscFunctionReturn(0);
1307 }
1308 
1309 
1310 #undef __FUNCT__
1311 #define __FUNCT__ "MatFactorSymbolic_MPIDense_Private"
1312 PetscErrorCode MatFactorSymbolic_MPIDense_Private(Mat A,MatFactorInfo *info,Mat *F)
1313 {
1314   PetscErrorCode ierr;
1315 
1316   PetscFunctionBegin;
1317   /* Create the factorization matrix */
1318   ierr = MatCreate(((PetscObject)A)->comm,F);CHKERRQ(ierr);
1319   ierr = MatSetSizes(*F,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N);CHKERRQ(ierr);
1320   ierr = MatSetType(*F,((PetscObject)A)->type_name);CHKERRQ(ierr);
1321   ierr = MatMPIDenseCreatePlapack(*F);CHKERRQ(ierr);
1322   PetscFunctionReturn(0);
1323 }
1324 
1325 /* Note the Petsc r and c permutations are ignored */
1326 #undef __FUNCT__
1327 #define __FUNCT__ "MatLUFactorSymbolic_MPIDense"
1328 PetscErrorCode MatLUFactorSymbolic_MPIDense(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F)
1329 {
1330   PetscErrorCode ierr;
1331   PetscInt       M = A->rmap.N;
1332   Mat_Plapack    *lu;
1333 
1334   PetscFunctionBegin;
1335   ierr = MatFactorSymbolic_MPIDense_Private(A,info,F);CHKERRQ(ierr);
1336   lu = (Mat_Plapack*)(*F)->spptr;
1337   ierr = PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots);CHKERRQ(ierr);
1338   (*F)->factor = FACTOR_LU;
1339   PetscFunctionReturn(0);
1340 }
1341 
1342 /* Note the Petsc perm permutation is ignored */
1343 #undef __FUNCT__
1344 #define __FUNCT__ "MatCholeskyFactorSymbolic_MPIDense"
1345 PetscErrorCode MatCholeskyFactorSymbolic_MPIDense(Mat A,IS perm,MatFactorInfo *info,Mat *F)
1346 {
1347   PetscErrorCode ierr;
1348   PetscTruth     issymmetric,set;
1349 
1350   PetscFunctionBegin;
1351   ierr = MatIsSymmetricKnown(A,&set,&issymmetric); CHKERRQ(ierr);
1352   if (!set || !issymmetric) SETERRQ(PETSC_ERR_USER,"Matrix must be set as MAT_SYMMETRIC for CholeskyFactor()");
1353   ierr = MatFactorSymbolic_MPIDense_Private(A,info,F);CHKERRQ(ierr);
1354   (*F)->factor = FACTOR_CHOLESKY;
1355   PetscFunctionReturn(0);
1356 }
1357 #endif
1358 
1359 /* -------------------------------------------------------------------*/
1360 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
1361        MatGetRow_MPIDense,
1362        MatRestoreRow_MPIDense,
1363        MatMult_MPIDense,
1364 /* 4*/ MatMultAdd_MPIDense,
1365        MatMultTranspose_MPIDense,
1366        MatMultTransposeAdd_MPIDense,
1367 #if defined(PETSC_HAVE_PLAPACK)
1368        MatSolve_MPIDense,
1369 #else
1370        0,
1371 #endif
1372        0,
1373        0,
1374 /*10*/ 0,
1375        0,
1376        0,
1377        0,
1378        MatTranspose_MPIDense,
1379 /*15*/ MatGetInfo_MPIDense,
1380        MatEqual_MPIDense,
1381        MatGetDiagonal_MPIDense,
1382        MatDiagonalScale_MPIDense,
1383        MatNorm_MPIDense,
1384 /*20*/ MatAssemblyBegin_MPIDense,
1385        MatAssemblyEnd_MPIDense,
1386        0,
1387        MatSetOption_MPIDense,
1388        MatZeroEntries_MPIDense,
1389 /*25*/ MatZeroRows_MPIDense,
1390 #if defined(PETSC_HAVE_PLAPACK)
1391        MatLUFactorSymbolic_MPIDense,
1392        MatLUFactorNumeric_MPIDense,
1393        MatCholeskyFactorSymbolic_MPIDense,
1394        MatCholeskyFactorNumeric_MPIDense,
1395 #else
1396        0,
1397        0,
1398        0,
1399        0,
1400 #endif
1401 /*30*/ MatSetUpPreallocation_MPIDense,
1402        0,
1403        0,
1404        MatGetArray_MPIDense,
1405        MatRestoreArray_MPIDense,
1406 /*35*/ MatDuplicate_MPIDense,
1407        0,
1408        0,
1409        0,
1410        0,
1411 /*40*/ 0,
1412        MatGetSubMatrices_MPIDense,
1413        0,
1414        MatGetValues_MPIDense,
1415        0,
1416 /*45*/ 0,
1417        MatScale_MPIDense,
1418        0,
1419        0,
1420        0,
1421 /*50*/ 0,
1422        0,
1423        0,
1424        0,
1425        0,
1426 /*55*/ 0,
1427        0,
1428        0,
1429        0,
1430        0,
1431 /*60*/ MatGetSubMatrix_MPIDense,
1432        MatDestroy_MPIDense,
1433        MatView_MPIDense,
1434        0,
1435        0,
1436 /*65*/ 0,
1437        0,
1438        0,
1439        0,
1440        0,
1441 /*70*/ 0,
1442        0,
1443        0,
1444        0,
1445        0,
1446 /*75*/ 0,
1447        0,
1448        0,
1449        0,
1450        0,
1451 /*80*/ 0,
1452        0,
1453        0,
1454        0,
1455 /*84*/ MatLoad_MPIDense,
1456        0,
1457        0,
1458        0,
1459        0,
1460        0,
1461 /*90*/
1462 #if defined(PETSC_HAVE_PLAPACK)
1463        MatMatMult_MPIDense_MPIDense,
1464        MatMatMultSymbolic_MPIDense_MPIDense,
1465        MatMatMultNumeric_MPIDense_MPIDense,
1466 #else
1467        0,
1468        0,
1469        0,
1470 #endif
1471        0,
1472 /*95*/ 0,
1473        0,
1474        0,
1475        0};
1476 
1477 EXTERN_C_BEGIN
1478 #undef __FUNCT__
1479 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense"
1480 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1481 {
1482   Mat_MPIDense   *a;
1483   PetscErrorCode ierr;
1484 
1485   PetscFunctionBegin;
1486   mat->preallocated = PETSC_TRUE;
1487   /* Note:  For now, when data is specified above, this assumes the user correctly
1488    allocates the local dense storage space.  We should add error checking. */
1489 
1490   a    = (Mat_MPIDense*)mat->data;
1491   ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1492   ierr = MatSetSizes(a->A,mat->rmap.n,mat->cmap.N,mat->rmap.n,mat->cmap.N);CHKERRQ(ierr);
1493   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
1494   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1495   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1496   PetscFunctionReturn(0);
1497 }
1498 EXTERN_C_END
1499 
1500 EXTERN_C_BEGIN
1501 #if defined(PETSC_HAVE_PLAPACK)
1502 #undef __FUNCT__
1503 #define __FUNCT__ "MatSetSolverType_MPIDense_PLAPACK"
1504 PetscErrorCode PETSCMAT_DLLEXPORT MatSetSolverType_MPIDense_PLAPACK(Mat mat,const char *type)
1505 {
1506   PetscFunctionBegin;
1507   /* dummy function so that -mat_solver_type plapack or MatSetSolverType(mat,"plapack"); silently work (since PLAPACK is on by default) */
1508   PetscFunctionReturn(0);
1509 }
1510 #endif
1511 EXTERN_C_END
1512 
1513 /*MC
1514    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1515 
1516    Options Database Keys:
1517 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions()
1518 
1519   Level: beginner
1520 
1521   MATMPIDENSE matrices may use direct solvers (LU, Cholesky, and QR)
1522   for parallel dense matrices via the external package PLAPACK, if PLAPACK is installed
1523   (run config/configure.py with the option --download-plapack)
1524 
1525 
1526   Options Database Keys:
1527 . -mat_plapack_nprows <n> - number of rows in processor partition
1528 . -mat_plapack_npcols <n> - number of columns in processor partition
1529 . -mat_plapack_nb <n> - block size of template vector
1530 . -mat_plapack_nb_alg <n> - algorithmic block size
1531 - -mat_plapack_ckerror <n> - error checking flag
1532 
1533 .seealso: MatCreateMPIDense(), MATDENSE, MATSEQDENSE
1534 M*/
1535 
1536 EXTERN_C_BEGIN
1537 #undef __FUNCT__
1538 #define __FUNCT__ "MatCreate_MPIDense"
1539 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIDense(Mat mat)
1540 {
1541   Mat_MPIDense   *a;
1542   PetscErrorCode ierr;
1543 
1544   PetscFunctionBegin;
1545   ierr              = PetscNewLog(mat,Mat_MPIDense,&a);CHKERRQ(ierr);
1546   mat->data         = (void*)a;
1547   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1548   mat->factor       = 0;
1549   mat->mapping      = 0;
1550 
1551   mat->insertmode = NOT_SET_VALUES;
1552   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&a->rank);CHKERRQ(ierr);
1553   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&a->size);CHKERRQ(ierr);
1554 
1555   mat->rmap.bs = mat->cmap.bs = 1;
1556   ierr = PetscMapSetUp(&mat->rmap);CHKERRQ(ierr);
1557   ierr = PetscMapSetUp(&mat->cmap);CHKERRQ(ierr);
1558   a->nvec = mat->cmap.n;
1559 
1560   /* build cache for off array entries formed */
1561   a->donotstash = PETSC_FALSE;
1562   ierr = MatStashCreate_Private(((PetscObject)mat)->comm,1,&mat->stash);CHKERRQ(ierr);
1563 
1564   /* stuff used for matrix vector multiply */
1565   a->lvec        = 0;
1566   a->Mvctx       = 0;
1567   a->roworiented = PETSC_TRUE;
1568 
1569   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
1570                                      "MatGetDiagonalBlock_MPIDense",
1571                                      MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr);
1572   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C",
1573                                      "MatMPIDenseSetPreallocation_MPIDense",
1574                                      MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1575   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",
1576                                      "MatMatMult_MPIAIJ_MPIDense",
1577                                       MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1578   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",
1579                                      "MatMatMultSymbolic_MPIAIJ_MPIDense",
1580                                       MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1581   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",
1582                                      "MatMatMultNumeric_MPIAIJ_MPIDense",
1583                                       MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1584 #if defined(PETSC_HAVE_PLAPACK)
1585   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatSetSolverType_mpidense_plapack_C",
1586                                      "MatSetSolverType_MPIDense_PLAPACK",
1587                                       MatSetSolverType_MPIDense_PLAPACK);CHKERRQ(ierr);
1588 #endif
1589   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1590 
1591   PetscFunctionReturn(0);
1592 }
1593 EXTERN_C_END
1594 
1595 /*MC
1596    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1597 
1598    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1599    and MATMPIDENSE otherwise.
1600 
1601    Options Database Keys:
1602 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1603 
1604   Level: beginner
1605 
1606 
1607 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1608 M*/
1609 
1610 EXTERN_C_BEGIN
1611 #undef __FUNCT__
1612 #define __FUNCT__ "MatCreate_Dense"
1613 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_Dense(Mat A)
1614 {
1615   PetscErrorCode ierr;
1616   PetscMPIInt    size;
1617 
1618   PetscFunctionBegin;
1619   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
1620   if (size == 1) {
1621     ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr);
1622   } else {
1623     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
1624   }
1625   PetscFunctionReturn(0);
1626 }
1627 EXTERN_C_END
1628 
1629 #undef __FUNCT__
1630 #define __FUNCT__ "MatMPIDenseSetPreallocation"
1631 /*@C
1632    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1633 
1634    Not collective
1635 
1636    Input Parameters:
1637 .  A - the matrix
1638 -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
1639    to control all matrix memory allocation.
1640 
1641    Notes:
1642    The dense format is fully compatible with standard Fortran 77
1643    storage by columns.
1644 
1645    The data input variable is intended primarily for Fortran programmers
1646    who wish to allocate their own matrix memory space.  Most users should
1647    set data=PETSC_NULL.
1648 
1649    Level: intermediate
1650 
1651 .keywords: matrix,dense, parallel
1652 
1653 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1654 @*/
1655 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data)
1656 {
1657   PetscErrorCode ierr,(*f)(Mat,PetscScalar *);
1658 
1659   PetscFunctionBegin;
1660   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1661   if (f) {
1662     ierr = (*f)(mat,data);CHKERRQ(ierr);
1663   }
1664   PetscFunctionReturn(0);
1665 }
1666 
1667 #undef __FUNCT__
1668 #define __FUNCT__ "MatCreateMPIDense"
1669 /*@C
1670    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
1671 
1672    Collective on MPI_Comm
1673 
1674    Input Parameters:
1675 +  comm - MPI communicator
1676 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1677 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1678 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1679 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1680 -  data - optional location of matrix data.  Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1681    to control all matrix memory allocation.
1682 
1683    Output Parameter:
1684 .  A - the matrix
1685 
1686    Notes:
1687    The dense format is fully compatible with standard Fortran 77
1688    storage by columns.
1689 
1690    The data input variable is intended primarily for Fortran programmers
1691    who wish to allocate their own matrix memory space.  Most users should
1692    set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users).
1693 
1694    The user MUST specify either the local or global matrix dimensions
1695    (possibly both).
1696 
1697    Level: intermediate
1698 
1699 .keywords: matrix,dense, parallel
1700 
1701 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1702 @*/
1703 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1704 {
1705   PetscErrorCode ierr;
1706   PetscMPIInt    size;
1707 
1708   PetscFunctionBegin;
1709   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1710   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1711   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1712   if (size > 1) {
1713     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1714     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1715   } else {
1716     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1717     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1718   }
1719   PetscFunctionReturn(0);
1720 }
1721 
1722 #undef __FUNCT__
1723 #define __FUNCT__ "MatDuplicate_MPIDense"
1724 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1725 {
1726   Mat            mat;
1727   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
1728   PetscErrorCode ierr;
1729 
1730   PetscFunctionBegin;
1731   *newmat       = 0;
1732   ierr = MatCreate(((PetscObject)A)->comm,&mat);CHKERRQ(ierr);
1733   ierr = MatSetSizes(mat,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N);CHKERRQ(ierr);
1734   ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1735   a                 = (Mat_MPIDense*)mat->data;
1736   ierr              = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1737   mat->factor       = A->factor;
1738   mat->assembled    = PETSC_TRUE;
1739   mat->preallocated = PETSC_TRUE;
1740 
1741   mat->rmap.rstart     = A->rmap.rstart;
1742   mat->rmap.rend       = A->rmap.rend;
1743   a->size              = oldmat->size;
1744   a->rank              = oldmat->rank;
1745   mat->insertmode      = NOT_SET_VALUES;
1746   a->nvec              = oldmat->nvec;
1747   a->donotstash        = oldmat->donotstash;
1748 
1749   ierr = PetscMemcpy(mat->rmap.range,A->rmap.range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
1750   ierr = PetscMemcpy(mat->cmap.range,A->cmap.range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
1751   ierr = MatStashCreate_Private(((PetscObject)A)->comm,1,&mat->stash);CHKERRQ(ierr);
1752 
1753   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1754   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1755   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1756 
1757 #if defined(PETSC_HAVE_PLAPACK)
1758   ierr = PetscMemcpy(mat->spptr,A->spptr,sizeof(Mat_Plapack));CHKERRQ(ierr);
1759 #endif
1760   *newmat = mat;
1761   PetscFunctionReturn(0);
1762 }
1763 
1764 #include "petscsys.h"
1765 
1766 #undef __FUNCT__
1767 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile"
1768 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N, MatType type,Mat *newmat)
1769 {
1770   PetscErrorCode ierr;
1771   PetscMPIInt    rank,size;
1772   PetscInt       *rowners,i,m,nz,j;
1773   PetscScalar    *array,*vals,*vals_ptr;
1774   MPI_Status     status;
1775 
1776   PetscFunctionBegin;
1777   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1778   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1779 
1780   /* determine ownership of all rows */
1781   m          = M/size + ((M % size) > rank);
1782   ierr       = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1783   ierr       = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
1784   rowners[0] = 0;
1785   for (i=2; i<=size; i++) {
1786     rowners[i] += rowners[i-1];
1787   }
1788 
1789   ierr = MatCreate(comm,newmat);CHKERRQ(ierr);
1790   ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1791   ierr = MatSetType(*newmat,type);CHKERRQ(ierr);
1792   ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr);
1793   ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr);
1794 
1795   if (!rank) {
1796     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1797 
1798     /* read in my part of the matrix numerical values  */
1799     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1800 
1801     /* insert into matrix-by row (this is why cannot directly read into array */
1802     vals_ptr = vals;
1803     for (i=0; i<m; i++) {
1804       for (j=0; j<N; j++) {
1805         array[i + j*m] = *vals_ptr++;
1806       }
1807     }
1808 
1809     /* read in other processors and ship out */
1810     for (i=1; i<size; i++) {
1811       nz   = (rowners[i+1] - rowners[i])*N;
1812       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1813       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(*newmat))->tag,comm);CHKERRQ(ierr);
1814     }
1815   } else {
1816     /* receive numeric values */
1817     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1818 
1819     /* receive message of values*/
1820     ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(*newmat))->tag,comm,&status);CHKERRQ(ierr);
1821 
1822     /* insert into matrix-by row (this is why cannot directly read into array */
1823     vals_ptr = vals;
1824     for (i=0; i<m; i++) {
1825       for (j=0; j<N; j++) {
1826         array[i + j*m] = *vals_ptr++;
1827       }
1828     }
1829   }
1830   ierr = PetscFree(rowners);CHKERRQ(ierr);
1831   ierr = PetscFree(vals);CHKERRQ(ierr);
1832   ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1833   ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1834   PetscFunctionReturn(0);
1835 }
1836 
1837 #undef __FUNCT__
1838 #define __FUNCT__ "MatLoad_MPIDense"
1839 PetscErrorCode MatLoad_MPIDense(PetscViewer viewer, MatType type,Mat *newmat)
1840 {
1841   Mat            A;
1842   PetscScalar    *vals,*svals;
1843   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1844   MPI_Status     status;
1845   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz;
1846   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1847   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1848   PetscInt       i,nz,j,rstart,rend;
1849   int            fd;
1850   PetscErrorCode ierr;
1851 
1852   PetscFunctionBegin;
1853   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1854   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1855   if (!rank) {
1856     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1857     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1858     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1859   }
1860 
1861   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1862   M = header[1]; N = header[2]; nz = header[3];
1863 
1864   /*
1865        Handle case where matrix is stored on disk as a dense matrix
1866   */
1867   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1868     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,type,newmat);CHKERRQ(ierr);
1869     PetscFunctionReturn(0);
1870   }
1871 
1872   /* determine ownership of all rows */
1873   m          = PetscMPIIntCast(M/size + ((M % size) > rank));
1874   ierr       = PetscMalloc((size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr);
1875   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1876   rowners[0] = 0;
1877   for (i=2; i<=size; i++) {
1878     rowners[i] += rowners[i-1];
1879   }
1880   rstart = rowners[rank];
1881   rend   = rowners[rank+1];
1882 
1883   /* distribute row lengths to all processors */
1884   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&ourlens);CHKERRQ(ierr);
1885   offlens = ourlens + (rend-rstart);
1886   if (!rank) {
1887     ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
1888     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1889     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
1890     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1891     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1892     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1893   } else {
1894     ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1895   }
1896 
1897   if (!rank) {
1898     /* calculate the number of nonzeros on each processor */
1899     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
1900     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1901     for (i=0; i<size; i++) {
1902       for (j=rowners[i]; j< rowners[i+1]; j++) {
1903         procsnz[i] += rowlengths[j];
1904       }
1905     }
1906     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1907 
1908     /* determine max buffer needed and allocate it */
1909     maxnz = 0;
1910     for (i=0; i<size; i++) {
1911       maxnz = PetscMax(maxnz,procsnz[i]);
1912     }
1913     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1914 
1915     /* read in my part of the matrix column indices  */
1916     nz = procsnz[0];
1917     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1918     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1919 
1920     /* read in every one elses and ship off */
1921     for (i=1; i<size; i++) {
1922       nz   = procsnz[i];
1923       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1924       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1925     }
1926     ierr = PetscFree(cols);CHKERRQ(ierr);
1927   } else {
1928     /* determine buffer space needed for message */
1929     nz = 0;
1930     for (i=0; i<m; i++) {
1931       nz += ourlens[i];
1932     }
1933     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1934 
1935     /* receive message of column indices*/
1936     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1937     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1938     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1939   }
1940 
1941   /* loop over local rows, determining number of off diagonal entries */
1942   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
1943   jj = 0;
1944   for (i=0; i<m; i++) {
1945     for (j=0; j<ourlens[i]; j++) {
1946       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1947       jj++;
1948     }
1949   }
1950 
1951   /* create our matrix */
1952   for (i=0; i<m; i++) {
1953     ourlens[i] -= offlens[i];
1954   }
1955   ierr = MatCreate(comm,newmat);CHKERRQ(ierr);
1956   ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1957   ierr = MatSetType(*newmat,type);CHKERRQ(ierr);
1958   ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr);
1959   A = *newmat;
1960   for (i=0; i<m; i++) {
1961     ourlens[i] += offlens[i];
1962   }
1963 
1964   if (!rank) {
1965     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1966 
1967     /* read in my part of the matrix numerical values  */
1968     nz = procsnz[0];
1969     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1970 
1971     /* insert into matrix */
1972     jj      = rstart;
1973     smycols = mycols;
1974     svals   = vals;
1975     for (i=0; i<m; i++) {
1976       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1977       smycols += ourlens[i];
1978       svals   += ourlens[i];
1979       jj++;
1980     }
1981 
1982     /* read in other processors and ship out */
1983     for (i=1; i<size; i++) {
1984       nz   = procsnz[i];
1985       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1986       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
1987     }
1988     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1989   } else {
1990     /* receive numeric values */
1991     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1992 
1993     /* receive message of values*/
1994     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
1995     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1996     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1997 
1998     /* insert into matrix */
1999     jj      = rstart;
2000     smycols = mycols;
2001     svals   = vals;
2002     for (i=0; i<m; i++) {
2003       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2004       smycols += ourlens[i];
2005       svals   += ourlens[i];
2006       jj++;
2007     }
2008   }
2009   ierr = PetscFree(ourlens);CHKERRQ(ierr);
2010   ierr = PetscFree(vals);CHKERRQ(ierr);
2011   ierr = PetscFree(mycols);CHKERRQ(ierr);
2012   ierr = PetscFree(rowners);CHKERRQ(ierr);
2013 
2014   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2015   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2016   PetscFunctionReturn(0);
2017 }
2018 
2019 #undef __FUNCT__
2020 #define __FUNCT__ "MatEqual_MPIDense"
2021 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscTruth *flag)
2022 {
2023   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
2024   Mat            a,b;
2025   PetscTruth     flg;
2026   PetscErrorCode ierr;
2027 
2028   PetscFunctionBegin;
2029   a = matA->A;
2030   b = matB->A;
2031   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
2032   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
2033   PetscFunctionReturn(0);
2034 }
2035 
2036 #if defined(PETSC_HAVE_PLAPACK)
2037 
2038 #undef __FUNCT__
2039 #define __FUNCT__ "PetscPLAPACKFinalizePackage"
2040 /*@C
2041   PetscPLAPACKFinalizePackage - This function destroys everything in the Petsc interface to PLAPACK.
2042   Level: developer
2043 
2044 .keywords: Petsc, destroy, package, PLAPACK
2045 .seealso: PetscFinalize()
2046 @*/
2047 PetscErrorCode PETSC_DLLEXPORT PetscPLAPACKFinalizePackage(void)
2048 {
2049   PetscErrorCode ierr;
2050 
2051   PetscFunctionBegin;
2052   ierr = PLA_Finalize();CHKERRQ(ierr);
2053   PetscFunctionReturn(0);
2054 }
2055 
2056 #undef __FUNCT__
2057 #define __FUNCT__ "PetscPLAPACKInitializePackage"
2058 /*@C
2059   PetscPLAPACKInitializePackage - This function initializes everything in the Petsc interface to PLAPACK. It is
2060   called from PetscDLLibraryRegister() when using dynamic libraries, and on the call to PetscInitialize()
2061   when using static libraries.
2062 
2063   Input Parameter:
2064   path - The dynamic library path, or PETSC_NULL
2065 
2066   Level: developer
2067 
2068 .keywords: Petsc, initialize, package, PLAPACK
2069 .seealso: PetscInitializePackage(), PetscInitialize()
2070 @*/
2071 PetscErrorCode PETSC_DLLEXPORT PetscPLAPACKInitializePackage(const char path[])
2072 {
2073   MPI_Comm       comm = PETSC_COMM_WORLD;
2074   PetscMPIInt    size;
2075   PetscErrorCode ierr;
2076 
2077   PetscFunctionBegin;
2078   if (!PLA_Initialized(PETSC_NULL)) {
2079 
2080     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2081     Plapack_nprows = 1;
2082     Plapack_npcols = size;
2083 
2084     ierr = PetscOptionsBegin(comm,PETSC_NULL,"PLAPACK Options","Mat");CHKERRQ(ierr);
2085       ierr = PetscOptionsInt("-plapack_nprows","row dimension of 2D processor mesh","None",Plapack_nprows,&Plapack_nprows,PETSC_NULL);CHKERRQ(ierr);
2086       ierr = PetscOptionsInt("-plapack_npcols","column dimension of 2D processor mesh","None",Plapack_npcols,&Plapack_npcols,PETSC_NULL);CHKERRQ(ierr);
2087 #if defined(PETSC_USE_DEBUG)
2088       Plapack_ierror = 3;
2089 #else
2090       Plapack_ierror = 0;
2091 #endif
2092       ierr = PetscOptionsInt("-plapack_ckerror","error checking flag","None",Plapack_ierror,&Plapack_ierror,PETSC_NULL);CHKERRQ(ierr);
2093       if (Plapack_ierror){
2094 	ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_TRUE,PETSC_TRUE,PETSC_FALSE );CHKERRQ(ierr);
2095       } else {
2096 	ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_FALSE,PETSC_FALSE,PETSC_FALSE );CHKERRQ(ierr);
2097       }
2098 
2099       Plapack_nb_alg = 0;
2100       ierr = PetscOptionsInt("-plapack_nb_alg","algorithmic block size","None",Plapack_nb_alg,&Plapack_nb_alg,PETSC_NULL);CHKERRQ(ierr);
2101       if (Plapack_nb_alg) {
2102         ierr = pla_Environ_set_nb_alg (PLA_OP_ALL_ALG,Plapack_nb_alg);CHKERRQ(ierr);
2103       }
2104     PetscOptionsEnd();
2105 
2106     ierr = PLA_Comm_1D_to_2D(comm,Plapack_nprows,Plapack_npcols,&Plapack_comm_2d);CHKERRQ(ierr);
2107     ierr = PLA_Init(Plapack_comm_2d);CHKERRQ(ierr);
2108     ierr = PetscRegisterFinalize(PetscPLAPACKFinalizePackage);CHKERRQ(ierr);
2109   }
2110   PetscFunctionReturn(0);
2111 }
2112 
2113 
2114 #endif
2115