xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision eae6fb2ecf7051c084a928db0940ac509bd7cf3e)
1 #define PETSCMAT_DLL
2 
3 /*
4    Basic functions for basic parallel dense matrices.
5 */
6 
7 
8 #include "src/mat/impls/dense/mpi/mpidense.h"    /*I   "petscmat.h"  I*/
9 #if defined(PETSC_HAVE_PLAPACK)
10 static PetscMPIInt Plapack_nprows,Plapack_npcols,Plapack_ierror,Plapack_nb_alg;
11 static MPI_Comm Plapack_comm_2d;
12 #endif
13 
14 #undef __FUNCT__
15 #define __FUNCT__ "MatDenseGetLocalMatrix"
16 /*@
17 
18       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
19               matrix that represents the operator. For sequential matrices it returns itself.
20 
21     Input Parameter:
22 .      A - the Seq or MPI dense matrix
23 
24     Output Parameter:
25 .      B - the inner matrix
26 
27     Level: intermediate
28 
29 @*/
30 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
31 {
32   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
33   PetscErrorCode ierr;
34   PetscTruth     flg;
35 
36   PetscFunctionBegin;
37   ierr = PetscTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
38   if (flg) {
39     *B = mat->A;
40   } else {
41     *B = A;
42   }
43   PetscFunctionReturn(0);
44 }
45 
46 #undef __FUNCT__
47 #define __FUNCT__ "MatGetRow_MPIDense"
48 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
49 {
50   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
51   PetscErrorCode ierr;
52   PetscInt       lrow,rstart = A->rmap.rstart,rend = A->rmap.rend;
53 
54   PetscFunctionBegin;
55   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows")
56   lrow = row - rstart;
57   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr);
58   PetscFunctionReturn(0);
59 }
60 
61 #undef __FUNCT__
62 #define __FUNCT__ "MatRestoreRow_MPIDense"
63 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
64 {
65   PetscErrorCode ierr;
66 
67   PetscFunctionBegin;
68   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
69   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
70   PetscFunctionReturn(0);
71 }
72 
73 EXTERN_C_BEGIN
74 #undef __FUNCT__
75 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense"
76 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B)
77 {
78   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
79   PetscErrorCode ierr;
80   PetscInt       m = A->rmap.n,rstart = A->rmap.rstart;
81   PetscScalar    *array;
82   MPI_Comm       comm;
83 
84   PetscFunctionBegin;
85   if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported.");
86 
87   /* The reuse aspect is not implemented efficiently */
88   if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);}
89 
90   ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
91   ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr);
92   ierr = MatCreate(comm,B);CHKERRQ(ierr);
93   ierr = MatSetSizes(*B,m,m,m,m);CHKERRQ(ierr);
94   ierr = MatSetType(*B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
95   ierr = MatSeqDenseSetPreallocation(*B,array+m*rstart);CHKERRQ(ierr);
96   ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr);
97   ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
98   ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
99 
100   *iscopy = PETSC_TRUE;
101   PetscFunctionReturn(0);
102 }
103 EXTERN_C_END
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatSetValues_MPIDense"
107 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
108 {
109   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
110   PetscErrorCode ierr;
111   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend,row;
112   PetscTruth     roworiented = A->roworiented;
113 
114   PetscFunctionBegin;
115   for (i=0; i<m; i++) {
116     if (idxm[i] < 0) continue;
117     if (idxm[i] >= mat->rmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
118     if (idxm[i] >= rstart && idxm[i] < rend) {
119       row = idxm[i] - rstart;
120       if (roworiented) {
121         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
122       } else {
123         for (j=0; j<n; j++) {
124           if (idxn[j] < 0) continue;
125           if (idxn[j] >= mat->cmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
126           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
127         }
128       }
129     } else {
130       if (!A->donotstash) {
131         if (roworiented) {
132           ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr);
133         } else {
134           ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr);
135         }
136       }
137     }
138   }
139   PetscFunctionReturn(0);
140 }
141 
142 #undef __FUNCT__
143 #define __FUNCT__ "MatGetValues_MPIDense"
144 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
145 {
146   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
147   PetscErrorCode ierr;
148   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend,row;
149 
150   PetscFunctionBegin;
151   for (i=0; i<m; i++) {
152     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
153     if (idxm[i] >= mat->rmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
154     if (idxm[i] >= rstart && idxm[i] < rend) {
155       row = idxm[i] - rstart;
156       for (j=0; j<n; j++) {
157         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
158         if (idxn[j] >= mat->cmap.N) {
159           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
160         }
161         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
162       }
163     } else {
164       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
165     }
166   }
167   PetscFunctionReturn(0);
168 }
169 
170 #undef __FUNCT__
171 #define __FUNCT__ "MatGetArray_MPIDense"
172 PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[])
173 {
174   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
175   PetscErrorCode ierr;
176 
177   PetscFunctionBegin;
178   ierr = MatGetArray(a->A,array);CHKERRQ(ierr);
179   PetscFunctionReturn(0);
180 }
181 
182 #undef __FUNCT__
183 #define __FUNCT__ "MatGetSubMatrix_MPIDense"
184 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B)
185 {
186   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data,*newmatd;
187   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
188   PetscErrorCode ierr;
189   PetscInt       i,j,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols;
190   PetscScalar    *av,*bv,*v = lmat->v;
191   Mat            newmat;
192 
193   PetscFunctionBegin;
194   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
195   ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr);
196   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
197   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
198 
199   /* No parallel redistribution currently supported! Should really check each index set
200      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
201      original matrix! */
202 
203   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
204   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
205 
206   /* Check submatrix call */
207   if (scall == MAT_REUSE_MATRIX) {
208     /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
209     /* Really need to test rows and column sizes! */
210     newmat = *B;
211   } else {
212     /* Create and fill new matrix */
213     ierr = MatCreate(((PetscObject)A)->comm,&newmat);CHKERRQ(ierr);
214     ierr = MatSetSizes(newmat,nrows,cs,PETSC_DECIDE,ncols);CHKERRQ(ierr);
215     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
216     ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr);
217   }
218 
219   /* Now extract the data pointers and do the copy, column at a time */
220   newmatd = (Mat_MPIDense*)newmat->data;
221   bv      = ((Mat_SeqDense *)newmatd->A->data)->v;
222 
223   for (i=0; i<ncols; i++) {
224     av = v + nlrows*icol[i];
225     for (j=0; j<nrows; j++) {
226       *bv++ = av[irow[j] - rstart];
227     }
228   }
229 
230   /* Assemble the matrices so that the correct flags are set */
231   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
232   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
233 
234   /* Free work space */
235   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
236   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
237   *B = newmat;
238   PetscFunctionReturn(0);
239 }
240 
241 #undef __FUNCT__
242 #define __FUNCT__ "MatRestoreArray_MPIDense"
243 PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[])
244 {
245   PetscFunctionBegin;
246   PetscFunctionReturn(0);
247 }
248 
249 #undef __FUNCT__
250 #define __FUNCT__ "MatAssemblyBegin_MPIDense"
251 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
252 {
253   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
254   MPI_Comm       comm = ((PetscObject)mat)->comm;
255   PetscErrorCode ierr;
256   PetscInt       nstash,reallocs;
257   InsertMode     addv;
258 
259   PetscFunctionBegin;
260   /* make sure all processors are either in INSERTMODE or ADDMODE */
261   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
262   if (addv == (ADD_VALUES|INSERT_VALUES)) {
263     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
264   }
265   mat->insertmode = addv; /* in case this processor had no cache */
266 
267   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap.range);CHKERRQ(ierr);
268   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
269   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
270   PetscFunctionReturn(0);
271 }
272 
273 #undef __FUNCT__
274 #define __FUNCT__ "MatAssemblyEnd_MPIDense"
275 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
276 {
277   Mat_MPIDense    *mdn=(Mat_MPIDense*)mat->data;
278   PetscErrorCode  ierr;
279   PetscInt        i,*row,*col,flg,j,rstart,ncols;
280   PetscMPIInt     n;
281   PetscScalar     *val;
282   InsertMode      addv=mat->insertmode;
283 
284   PetscFunctionBegin;
285   /*  wait on receives */
286   while (1) {
287     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
288     if (!flg) break;
289 
290     for (i=0; i<n;) {
291       /* Now identify the consecutive vals belonging to the same row */
292       for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
293       if (j < n) ncols = j-i;
294       else       ncols = n-i;
295       /* Now assemble all these values with a single function call */
296       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
297       i = j;
298     }
299   }
300   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
301 
302   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
303   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
304 
305   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
306     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
307   }
308   PetscFunctionReturn(0);
309 }
310 
311 #undef __FUNCT__
312 #define __FUNCT__ "MatZeroEntries_MPIDense"
313 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
314 {
315   PetscErrorCode ierr;
316   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
317 
318   PetscFunctionBegin;
319   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
320   PetscFunctionReturn(0);
321 }
322 
323 /* the code does not do the diagonal entries correctly unless the
324    matrix is square and the column and row owerships are identical.
325    This is a BUG. The only way to fix it seems to be to access
326    mdn->A and mdn->B directly and not through the MatZeroRows()
327    routine.
328 */
329 #undef __FUNCT__
330 #define __FUNCT__ "MatZeroRows_MPIDense"
331 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
332 {
333   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
334   PetscErrorCode ierr;
335   PetscInt       i,*owners = A->rmap.range;
336   PetscInt       *nprocs,j,idx,nsends;
337   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
338   PetscInt       *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source;
339   PetscInt       *lens,*lrows,*values;
340   PetscMPIInt    n,imdex,rank = l->rank,size = l->size;
341   MPI_Comm       comm = ((PetscObject)A)->comm;
342   MPI_Request    *send_waits,*recv_waits;
343   MPI_Status     recv_status,*send_status;
344   PetscTruth     found;
345 
346   PetscFunctionBegin;
347   /*  first count number of contributors to each processor */
348   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
349   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
350   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
351   for (i=0; i<N; i++) {
352     idx = rows[i];
353     found = PETSC_FALSE;
354     for (j=0; j<size; j++) {
355       if (idx >= owners[j] && idx < owners[j+1]) {
356         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
357       }
358     }
359     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
360   }
361   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
362 
363   /* inform other processors of number of messages and max length*/
364   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
365 
366   /* post receives:   */
367   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
368   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
369   for (i=0; i<nrecvs; i++) {
370     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
371   }
372 
373   /* do sends:
374       1) starts[i] gives the starting index in svalues for stuff going to
375          the ith processor
376   */
377   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
378   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
379   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
380   starts[0]  = 0;
381   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
382   for (i=0; i<N; i++) {
383     svalues[starts[owner[i]]++] = rows[i];
384   }
385 
386   starts[0] = 0;
387   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
388   count = 0;
389   for (i=0; i<size; i++) {
390     if (nprocs[2*i+1]) {
391       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
392     }
393   }
394   ierr = PetscFree(starts);CHKERRQ(ierr);
395 
396   base = owners[rank];
397 
398   /*  wait on receives */
399   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
400   source = lens + nrecvs;
401   count  = nrecvs; slen = 0;
402   while (count) {
403     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
404     /* unpack receives into our local space */
405     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
406     source[imdex]  = recv_status.MPI_SOURCE;
407     lens[imdex]    = n;
408     slen += n;
409     count--;
410   }
411   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
412 
413   /* move the data into the send scatter */
414   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
415   count = 0;
416   for (i=0; i<nrecvs; i++) {
417     values = rvalues + i*nmax;
418     for (j=0; j<lens[i]; j++) {
419       lrows[count++] = values[j] - base;
420     }
421   }
422   ierr = PetscFree(rvalues);CHKERRQ(ierr);
423   ierr = PetscFree(lens);CHKERRQ(ierr);
424   ierr = PetscFree(owner);CHKERRQ(ierr);
425   ierr = PetscFree(nprocs);CHKERRQ(ierr);
426 
427   /* actually zap the local rows */
428   ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
429   ierr = PetscFree(lrows);CHKERRQ(ierr);
430 
431   /* wait on sends */
432   if (nsends) {
433     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
434     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
435     ierr = PetscFree(send_status);CHKERRQ(ierr);
436   }
437   ierr = PetscFree(send_waits);CHKERRQ(ierr);
438   ierr = PetscFree(svalues);CHKERRQ(ierr);
439 
440   PetscFunctionReturn(0);
441 }
442 
443 #undef __FUNCT__
444 #define __FUNCT__ "MatMult_MPIDense"
445 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
446 {
447   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
448   PetscErrorCode ierr;
449 
450   PetscFunctionBegin;
451   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
452   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
453   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
454   PetscFunctionReturn(0);
455 }
456 
457 #undef __FUNCT__
458 #define __FUNCT__ "MatMultAdd_MPIDense"
459 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
460 {
461   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
462   PetscErrorCode ierr;
463 
464   PetscFunctionBegin;
465   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
466   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
467   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
468   PetscFunctionReturn(0);
469 }
470 
471 #undef __FUNCT__
472 #define __FUNCT__ "MatMultTranspose_MPIDense"
473 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
474 {
475   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
476   PetscErrorCode ierr;
477   PetscScalar    zero = 0.0;
478 
479   PetscFunctionBegin;
480   ierr = VecSet(yy,zero);CHKERRQ(ierr);
481   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
482   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
483   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
484   PetscFunctionReturn(0);
485 }
486 
487 #undef __FUNCT__
488 #define __FUNCT__ "MatMultTransposeAdd_MPIDense"
489 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
490 {
491   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
492   PetscErrorCode ierr;
493 
494   PetscFunctionBegin;
495   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
496   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
497   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
498   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
499   PetscFunctionReturn(0);
500 }
501 
502 #undef __FUNCT__
503 #define __FUNCT__ "MatGetDiagonal_MPIDense"
504 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
505 {
506   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
507   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
508   PetscErrorCode ierr;
509   PetscInt       len,i,n,m = A->rmap.n,radd;
510   PetscScalar    *x,zero = 0.0;
511 
512   PetscFunctionBegin;
513   ierr = VecSet(v,zero);CHKERRQ(ierr);
514   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
515   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
516   if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
517   len  = PetscMin(a->A->rmap.n,a->A->cmap.n);
518   radd = A->rmap.rstart*m;
519   for (i=0; i<len; i++) {
520     x[i] = aloc->v[radd + i*m + i];
521   }
522   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
523   PetscFunctionReturn(0);
524 }
525 
526 #undef __FUNCT__
527 #define __FUNCT__ "MatDestroy_MPIDense"
528 PetscErrorCode MatDestroy_MPIDense(Mat mat)
529 {
530   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
531   PetscErrorCode ierr;
532 #if defined(PETSC_HAVE_PLAPACK)
533   Mat_Plapack   *lu=(Mat_Plapack*)(mat->spptr);
534 #endif
535 
536   PetscFunctionBegin;
537 
538 #if defined(PETSC_USE_LOG)
539   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N);
540 #endif
541   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
542   ierr = MatDestroy(mdn->A);CHKERRQ(ierr);
543   if (mdn->lvec)   {ierr = VecDestroy(mdn->lvec);CHKERRQ(ierr);}
544   if (mdn->Mvctx)  {ierr = VecScatterDestroy(mdn->Mvctx);CHKERRQ(ierr);}
545 #if defined(PETSC_HAVE_PLAPACK)
546   if (lu->CleanUpPlapack) {
547     ierr = PLA_Obj_free(&lu->A);CHKERRQ(ierr);
548     ierr = PLA_Obj_free (&lu->pivots);CHKERRQ(ierr);
549     ierr = PLA_Temp_free(&lu->templ);CHKERRQ(ierr);
550 
551 
552     ierr = ISDestroy(lu->is_pla);CHKERRQ(ierr);
553     ierr = ISDestroy(lu->is_petsc);CHKERRQ(ierr);
554     ierr = VecScatterDestroy(lu->ctx);CHKERRQ(ierr);
555   }
556 #endif
557 
558   ierr = PetscFree(mdn);CHKERRQ(ierr);
559   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
560   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
561   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
562   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
563   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
564   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
565   PetscFunctionReturn(0);
566 }
567 
568 #undef __FUNCT__
569 #define __FUNCT__ "MatView_MPIDense_Binary"
570 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
571 {
572   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
573   PetscErrorCode    ierr;
574   PetscViewerFormat format;
575   int               fd;
576   PetscInt          header[4],mmax,N = mat->cmap.N,i,j,m,k;
577   PetscMPIInt       rank,tag  = ((PetscObject)viewer)->tag,size;
578   PetscScalar       *work,*v,*vv;
579   Mat_SeqDense      *a = (Mat_SeqDense*)mdn->A->data;
580   MPI_Status        status;
581 
582   PetscFunctionBegin;
583   if (mdn->size == 1) {
584     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
585   } else {
586     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
587     ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
588     ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
589 
590     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
591     if (format == PETSC_VIEWER_BINARY_NATIVE) {
592 
593       if (!rank) {
594 	/* store the matrix as a dense matrix */
595 	header[0] = MAT_FILE_COOKIE;
596 	header[1] = mat->rmap.N;
597 	header[2] = N;
598 	header[3] = MATRIX_BINARY_FORMAT_DENSE;
599 	ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
600 
601 	/* get largest work array needed for transposing array */
602         mmax = mat->rmap.n;
603         for (i=1; i<size; i++) {
604           mmax = PetscMax(mmax,mat->rmap.range[i+1] - mat->rmap.range[i]);
605         }
606 	ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&work);CHKERRQ(ierr);
607 
608 	/* write out local array, by rows */
609         m    = mat->rmap.n;
610 	v    = a->v;
611         for (j=0; j<N; j++) {
612           for (i=0; i<m; i++) {
613 	    work[j + i*N] = *v++;
614 	  }
615 	}
616 	ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
617         /* get largest work array to receive messages from other processes, excludes process zero */
618         mmax = 0;
619         for (i=1; i<size; i++) {
620           mmax = PetscMax(mmax,mat->rmap.range[i+1] - mat->rmap.range[i]);
621         }
622 	ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&vv);CHKERRQ(ierr);
623         v = vv;
624         for (k=1; k<size; k++) {
625           m    = mat->rmap.range[k+1] - mat->rmap.range[k];
626           ierr = MPI_Recv(v,m*N,MPIU_SCALAR,k,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
627 
628           for (j=0; j<N; j++) {
629             for (i=0; i<m; i++) {
630               work[j + i*N] = *v++;
631 	    }
632 	  }
633 	  ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
634         }
635         ierr = PetscFree(work);CHKERRQ(ierr);
636         ierr = PetscFree(vv);CHKERRQ(ierr);
637       } else {
638         ierr = MPI_Send(a->v,mat->rmap.n*mat->cmap.N,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
639       }
640     }
641   }
642   PetscFunctionReturn(0);
643 }
644 
645 #undef __FUNCT__
646 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket"
647 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
648 {
649   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
650   PetscErrorCode    ierr;
651   PetscMPIInt       size = mdn->size,rank = mdn->rank;
652   PetscViewerType   vtype;
653   PetscTruth        iascii,isdraw;
654   PetscViewer       sviewer;
655   PetscViewerFormat format;
656 #if defined(PETSC_HAVE_PLAPACK)
657   Mat_Plapack       *lu=(Mat_Plapack*)(mat->spptr);
658 #endif
659 
660   PetscFunctionBegin;
661   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
662   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
663   if (iascii) {
664     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
665     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
666     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
667       MatInfo info;
668       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
669       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap.n,
670                    (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
671       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
672       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
673 #if defined(PETSC_HAVE_PLAPACK)
674       ierr = PetscViewerASCIIPrintf(viewer,"PLAPACK run parameters:\n");CHKERRQ(ierr);
675       ierr = PetscViewerASCIIPrintf(viewer,"  Processor mesh: nprows %d, npcols %d\n",Plapack_nprows, Plapack_npcols);CHKERRQ(ierr);
676       ierr = PetscViewerASCIIPrintf(viewer,"  Distr. block size nb: %d \n",lu->nb);CHKERRQ(ierr);
677       ierr = PetscViewerASCIIPrintf(viewer,"  Error checking: %d\n",Plapack_ierror);CHKERRQ(ierr);
678       ierr = PetscViewerASCIIPrintf(viewer,"  Algorithmic block size: %d\n",Plapack_nb_alg);CHKERRQ(ierr);
679 #endif
680       PetscFunctionReturn(0);
681     } else if (format == PETSC_VIEWER_ASCII_INFO) {
682       PetscFunctionReturn(0);
683     }
684   } else if (isdraw) {
685     PetscDraw  draw;
686     PetscTruth isnull;
687 
688     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
689     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
690     if (isnull) PetscFunctionReturn(0);
691   }
692 
693   if (size == 1) {
694     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
695   } else {
696     /* assemble the entire matrix onto first processor. */
697     Mat         A;
698     PetscInt    M = mat->rmap.N,N = mat->cmap.N,m,row,i,nz;
699     PetscInt    *cols;
700     PetscScalar *vals;
701 
702     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
703     if (!rank) {
704       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
705     } else {
706       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
707     }
708     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
709     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
710     ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL);
711     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
712 
713     /* Copy the matrix ... This isn't the most efficient means,
714        but it's quick for now */
715     A->insertmode = INSERT_VALUES;
716     row = mat->rmap.rstart; m = mdn->A->rmap.n;
717     for (i=0; i<m; i++) {
718       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
719       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
720       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
721       row++;
722     }
723 
724     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
725     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
726     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
727     if (!rank) {
728       ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
729     }
730     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
731     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
732     ierr = MatDestroy(A);CHKERRQ(ierr);
733   }
734   PetscFunctionReturn(0);
735 }
736 
737 #undef __FUNCT__
738 #define __FUNCT__ "MatView_MPIDense"
739 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
740 {
741   PetscErrorCode ierr;
742   PetscTruth     iascii,isbinary,isdraw,issocket;
743 
744   PetscFunctionBegin;
745 
746   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
747   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
748   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
749   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
750 
751   if (iascii || issocket || isdraw) {
752     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
753   } else if (isbinary) {
754     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
755   } else {
756     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name);
757   }
758   PetscFunctionReturn(0);
759 }
760 
761 #undef __FUNCT__
762 #define __FUNCT__ "MatGetInfo_MPIDense"
763 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
764 {
765   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
766   Mat            mdn = mat->A;
767   PetscErrorCode ierr;
768   PetscReal      isend[5],irecv[5];
769 
770   PetscFunctionBegin;
771   info->rows_global    = (double)A->rmap.N;
772   info->columns_global = (double)A->cmap.N;
773   info->rows_local     = (double)A->rmap.n;
774   info->columns_local  = (double)A->cmap.N;
775   info->block_size     = 1.0;
776   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
777   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
778   isend[3] = info->memory;  isend[4] = info->mallocs;
779   if (flag == MAT_LOCAL) {
780     info->nz_used      = isend[0];
781     info->nz_allocated = isend[1];
782     info->nz_unneeded  = isend[2];
783     info->memory       = isend[3];
784     info->mallocs      = isend[4];
785   } else if (flag == MAT_GLOBAL_MAX) {
786     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr);
787     info->nz_used      = irecv[0];
788     info->nz_allocated = irecv[1];
789     info->nz_unneeded  = irecv[2];
790     info->memory       = irecv[3];
791     info->mallocs      = irecv[4];
792   } else if (flag == MAT_GLOBAL_SUM) {
793     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
794     info->nz_used      = irecv[0];
795     info->nz_allocated = irecv[1];
796     info->nz_unneeded  = irecv[2];
797     info->memory       = irecv[3];
798     info->mallocs      = irecv[4];
799   }
800   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
801   info->fill_ratio_needed = 0;
802   info->factor_mallocs    = 0;
803   PetscFunctionReturn(0);
804 }
805 
806 #undef __FUNCT__
807 #define __FUNCT__ "MatSetOption_MPIDense"
808 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscTruth flg)
809 {
810   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
811   PetscErrorCode ierr;
812 
813   PetscFunctionBegin;
814   switch (op) {
815   case MAT_NEW_NONZERO_LOCATIONS:
816   case MAT_NEW_NONZERO_LOCATION_ERR:
817   case MAT_NEW_NONZERO_ALLOCATION_ERR:
818     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
819     break;
820   case MAT_ROW_ORIENTED:
821     a->roworiented = flg;
822     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
823     break;
824   case MAT_NEW_DIAGONALS:
825   case MAT_USE_HASH_TABLE:
826     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
827     break;
828   case MAT_IGNORE_OFF_PROC_ENTRIES:
829     a->donotstash = flg;
830     break;
831   case MAT_SYMMETRIC:
832   case MAT_STRUCTURALLY_SYMMETRIC:
833   case MAT_HERMITIAN:
834   case MAT_SYMMETRY_ETERNAL:
835   case MAT_IGNORE_LOWER_TRIANGULAR:
836     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
837     break;
838   default:
839     SETERRQ1(PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
840   }
841   PetscFunctionReturn(0);
842 }
843 
844 
845 #undef __FUNCT__
846 #define __FUNCT__ "MatDiagonalScale_MPIDense"
847 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
848 {
849   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
850   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
851   PetscScalar    *l,*r,x,*v;
852   PetscErrorCode ierr;
853   PetscInt       i,j,s2a,s3a,s2,s3,m=mdn->A->rmap.n,n=mdn->A->cmap.n;
854 
855   PetscFunctionBegin;
856   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
857   if (ll) {
858     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
859     if (s2a != s2) SETERRQ2(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
860     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
861     for (i=0; i<m; i++) {
862       x = l[i];
863       v = mat->v + i;
864       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
865     }
866     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
867     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
868   }
869   if (rr) {
870     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
871     if (s3a != s3) SETERRQ2(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
872     ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
873     ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
874     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
875     for (i=0; i<n; i++) {
876       x = r[i];
877       v = mat->v + i*m;
878       for (j=0; j<m; j++) { (*v++) *= x;}
879     }
880     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
881     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
882   }
883   PetscFunctionReturn(0);
884 }
885 
886 #undef __FUNCT__
887 #define __FUNCT__ "MatNorm_MPIDense"
888 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
889 {
890   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
891   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
892   PetscErrorCode ierr;
893   PetscInt       i,j;
894   PetscReal      sum = 0.0;
895   PetscScalar    *v = mat->v;
896 
897   PetscFunctionBegin;
898   if (mdn->size == 1) {
899     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
900   } else {
901     if (type == NORM_FROBENIUS) {
902       for (i=0; i<mdn->A->cmap.n*mdn->A->rmap.n; i++) {
903 #if defined(PETSC_USE_COMPLEX)
904         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
905 #else
906         sum += (*v)*(*v); v++;
907 #endif
908       }
909       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
910       *nrm = sqrt(*nrm);
911       ierr = PetscLogFlops(2*mdn->A->cmap.n*mdn->A->rmap.n);CHKERRQ(ierr);
912     } else if (type == NORM_1) {
913       PetscReal *tmp,*tmp2;
914       ierr = PetscMalloc(2*A->cmap.N*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
915       tmp2 = tmp + A->cmap.N;
916       ierr = PetscMemzero(tmp,2*A->cmap.N*sizeof(PetscReal));CHKERRQ(ierr);
917       *nrm = 0.0;
918       v = mat->v;
919       for (j=0; j<mdn->A->cmap.n; j++) {
920         for (i=0; i<mdn->A->rmap.n; i++) {
921           tmp[j] += PetscAbsScalar(*v);  v++;
922         }
923       }
924       ierr = MPI_Allreduce(tmp,tmp2,A->cmap.N,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
925       for (j=0; j<A->cmap.N; j++) {
926         if (tmp2[j] > *nrm) *nrm = tmp2[j];
927       }
928       ierr = PetscFree(tmp);CHKERRQ(ierr);
929       ierr = PetscLogFlops(A->cmap.n*A->rmap.n);CHKERRQ(ierr);
930     } else if (type == NORM_INFINITY) { /* max row norm */
931       PetscReal ntemp;
932       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
933       ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr);
934     } else {
935       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
936     }
937   }
938   PetscFunctionReturn(0);
939 }
940 
941 #undef __FUNCT__
942 #define __FUNCT__ "MatTranspose_MPIDense"
943 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
944 {
945   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
946   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
947   Mat            B;
948   PetscInt       M = A->rmap.N,N = A->cmap.N,m,n,*rwork,rstart = A->rmap.rstart;
949   PetscErrorCode ierr;
950   PetscInt       j,i;
951   PetscScalar    *v;
952 
953   PetscFunctionBegin;
954   if (A == *matout && M != N) {
955     SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place");
956   }
957   if (reuse == MAT_INITIAL_MATRIX || A == *matout) {
958     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
959     ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,N,M);CHKERRQ(ierr);
960     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
961     ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr);
962   } else {
963     B = *matout;
964   }
965 
966   m = a->A->rmap.n; n = a->A->cmap.n; v = Aloc->v;
967   ierr = PetscMalloc(m*sizeof(PetscInt),&rwork);CHKERRQ(ierr);
968   for (i=0; i<m; i++) rwork[i] = rstart + i;
969   for (j=0; j<n; j++) {
970     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
971     v   += m;
972   }
973   ierr = PetscFree(rwork);CHKERRQ(ierr);
974   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
975   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
976   if (*matout != A) {
977     *matout = B;
978   } else {
979     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
980   }
981   PetscFunctionReturn(0);
982 }
983 
984 #include "petscblaslapack.h"
985 #undef __FUNCT__
986 #define __FUNCT__ "MatScale_MPIDense"
987 PetscErrorCode MatScale_MPIDense(Mat inA,PetscScalar alpha)
988 {
989   Mat_MPIDense   *A = (Mat_MPIDense*)inA->data;
990   Mat_SeqDense   *a = (Mat_SeqDense*)A->A->data;
991   PetscScalar    oalpha = alpha;
992   PetscErrorCode ierr;
993   PetscBLASInt   one = 1,nz = PetscBLASIntCast(inA->rmap.n*inA->cmap.N);
994 
995   PetscFunctionBegin;
996   BLASscal_(&nz,&oalpha,a->v,&one);
997   ierr = PetscLogFlops(nz);CHKERRQ(ierr);
998   PetscFunctionReturn(0);
999 }
1000 
1001 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
1002 
1003 #undef __FUNCT__
1004 #define __FUNCT__ "MatSetUpPreallocation_MPIDense"
1005 PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A)
1006 {
1007   PetscErrorCode ierr;
1008 
1009   PetscFunctionBegin;
1010   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
1011   PetscFunctionReturn(0);
1012 }
1013 
1014 #undef __FUNCT__
1015 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIDense"
1016 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1017 {
1018   PetscErrorCode ierr;
1019   PetscInt       m=A->rmap.n,n=B->cmap.n;
1020   Mat            Cmat;
1021 
1022   PetscFunctionBegin;
1023   if (A->cmap.n != B->rmap.n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap.n %d != B->rmap.n %d\n",A->cmap.n,B->rmap.n);
1024   ierr = MatCreate(((PetscObject)B)->comm,&Cmat);CHKERRQ(ierr);
1025   ierr = MatSetSizes(Cmat,m,n,A->rmap.N,B->cmap.N);CHKERRQ(ierr);
1026   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
1027   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
1028   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1029   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1030   *C = Cmat;
1031   PetscFunctionReturn(0);
1032 }
1033 
1034 #if defined(PETSC_HAVE_PLAPACK)
1035 
1036 #undef __FUNCT__
1037 #define __FUNCT__ "MatMPIDenseCreatePlapack"
1038 PetscErrorCode MatMPIDenseCreatePlapack(Mat A)
1039 {
1040   Mat_Plapack    *lu;
1041   PetscErrorCode ierr;
1042   PetscInt       M=A->rmap.N;
1043   MPI_Comm       comm=((PetscObject)A)->comm;
1044   PetscMPIInt    size;
1045 
1046   PetscFunctionBegin;
1047   lu = (Mat_Plapack*)(A->spptr);
1048 
1049   /* Set default Plapack parameters */
1050   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1051   lu->nb     = M/size;
1052   if (M - lu->nb*size) lu->nb++; /* without cyclic distribution */
1053 
1054   /* Set runtime options */
1055   ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"PLAPACK Options","Mat");CHKERRQ(ierr);
1056     ierr = PetscOptionsInt("-mat_plapack_nb","block size of template vector","None",lu->nb,&lu->nb,PETSC_NULL);CHKERRQ(ierr);
1057   PetscOptionsEnd();
1058 
1059   /* Create object distribution template */
1060   lu->templ = NULL;
1061   ierr = PLA_Temp_create(lu->nb, 0, &lu->templ);CHKERRQ(ierr);
1062 
1063   /* Set the datatype */
1064 #if defined(PETSC_USE_COMPLEX)
1065   lu->datatype = MPI_DOUBLE_COMPLEX;
1066 #else
1067   lu->datatype = MPI_DOUBLE;
1068 #endif
1069 
1070   ierr = PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);CHKERRQ(ierr);
1071 
1072   lu->pla_solved     = PETSC_FALSE; /* MatSolve_Plapack() is called yet */
1073   lu->CleanUpPlapack = PETSC_TRUE;
1074   PetscFunctionReturn(0);
1075 }
1076 
1077 #undef __FUNCT__
1078 #define __FUNCT__ "MatMPIDenseCopyToPlapack"
1079 PetscErrorCode MatMPIDenseCopyToPlapack(Mat A,Mat F)
1080 {
1081   Mat_Plapack    *lu = (Mat_Plapack*)(F)->spptr;
1082   PetscErrorCode ierr;
1083   PetscInt       M=A->rmap.N,m=A->rmap.n,rstart,rend;
1084   PetscScalar    *array;
1085   PetscReal      one = 1.0;
1086 
1087   PetscFunctionBegin;
1088   /* Copy A into lu->A */
1089   ierr = PLA_Obj_set_to_zero(lu->A);CHKERRQ(ierr);
1090   ierr = PLA_API_begin();CHKERRQ(ierr);
1091   ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr);
1092   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
1093   ierr = MatGetArray(A,&array);CHKERRQ(ierr);
1094   ierr = PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);CHKERRQ(ierr);
1095   ierr = MatRestoreArray(A,&array);CHKERRQ(ierr);
1096   ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr);
1097   ierr = PLA_API_end();CHKERRQ(ierr);
1098   lu->rstart         = rstart;
1099   PetscFunctionReturn(0);
1100 }
1101 
1102 #undef __FUNCT__
1103 #define __FUNCT__ "MatSolve_MPIDense"
1104 PetscErrorCode MatSolve_MPIDense(Mat A,Vec b,Vec x)
1105 {
1106   MPI_Comm       comm = ((PetscObject)A)->comm,dummy_comm;
1107   Mat_Plapack    *lu = (Mat_Plapack*)A->spptr;
1108   PetscErrorCode ierr;
1109   PetscInt       M=A->rmap.N,m=A->rmap.n,rstart,i,j,*idx_pla,*idx_petsc,loc_m,loc_stride;
1110   PetscScalar    *array;
1111   PetscReal      one = 1.0;
1112   PetscMPIInt    size,r_rank,r_nproc,c_rank,c_nproc;;
1113   PLA_Obj        v_pla = NULL;
1114   PetscScalar    *loc_buf;
1115   Vec            loc_x;
1116 
1117   PetscFunctionBegin;
1118   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1119 
1120   /* Create PLAPACK vector objects, then copy b into PLAPACK b */
1121   ierr = PLA_Mvector_create(lu->datatype,M,1,lu->templ,PLA_ALIGN_FIRST,&v_pla);CHKERRQ(ierr);
1122   ierr = PLA_Obj_set_to_zero(v_pla);CHKERRQ(ierr);
1123 
1124   /* Copy b into rhs_pla */
1125   ierr = PLA_API_begin();CHKERRQ(ierr);
1126   ierr = PLA_Obj_API_open(v_pla);CHKERRQ(ierr);
1127   ierr = VecGetArray(b,&array);CHKERRQ(ierr);
1128   ierr = PLA_API_axpy_vector_to_global(m,&one,(void *)array,1,v_pla,lu->rstart);CHKERRQ(ierr);
1129   ierr = VecRestoreArray(b,&array);CHKERRQ(ierr);
1130   ierr = PLA_Obj_API_close(v_pla);CHKERRQ(ierr);
1131   ierr = PLA_API_end();CHKERRQ(ierr);
1132 
1133   if (A->factor == FACTOR_LU){
1134     /* Apply the permutations to the right hand sides */
1135     ierr = PLA_Apply_pivots_to_rows (v_pla,lu->pivots);CHKERRQ(ierr);
1136 
1137     /* Solve L y = b, overwriting b with y */
1138     ierr = PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_UNIT_DIAG,lu->A,v_pla );CHKERRQ(ierr);
1139 
1140     /* Solve U x = y (=b), overwriting b with x */
1141     ierr = PLA_Trsv( PLA_UPPER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla );CHKERRQ(ierr);
1142   } else { /* FACTOR_CHOLESKY */
1143     ierr = PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla);CHKERRQ(ierr);
1144     ierr = PLA_Trsv( PLA_LOWER_TRIANGULAR,(lu->datatype == MPI_DOUBLE ? PLA_TRANSPOSE : PLA_CONJUGATE_TRANSPOSE),PLA_NONUNIT_DIAG,lu->A,v_pla);CHKERRQ(ierr);
1145   }
1146 
1147   /* Copy PLAPACK x into Petsc vector x  */
1148   ierr = PLA_Obj_local_length(v_pla, &loc_m);CHKERRQ(ierr);
1149   ierr = PLA_Obj_local_buffer(v_pla, (void**)&loc_buf);CHKERRQ(ierr);
1150   ierr = PLA_Obj_local_stride(v_pla, &loc_stride);CHKERRQ(ierr);
1151   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,loc_m*loc_stride,loc_buf,&loc_x);CHKERRQ(ierr);
1152   if (!lu->pla_solved){
1153 
1154     ierr = PLA_Temp_comm_row_info(lu->templ,&dummy_comm,&r_rank,&r_nproc);CHKERRQ(ierr);
1155     ierr = PLA_Temp_comm_col_info(lu->templ,&dummy_comm,&c_rank,&c_nproc);CHKERRQ(ierr);
1156     /* printf(" [%d] rank: %d %d, nproc: %d %d\n",rank,r_rank,c_rank,r_nproc,c_nproc); */
1157 
1158     /* Create IS and cts for VecScatterring */
1159     ierr = PLA_Obj_local_length(v_pla, &loc_m);CHKERRQ(ierr);
1160     ierr = PLA_Obj_local_stride(v_pla, &loc_stride);CHKERRQ(ierr);
1161     ierr = PetscMalloc((2*loc_m+1)*sizeof(PetscInt),&idx_pla);CHKERRQ(ierr);
1162     idx_petsc = idx_pla + loc_m;
1163 
1164     rstart = (r_rank*c_nproc+c_rank)*lu->nb;
1165     for (i=0; i<loc_m; i+=lu->nb){
1166       j = 0;
1167       while (j < lu->nb && i+j < loc_m){
1168         idx_petsc[i+j] = rstart + j; j++;
1169       }
1170       rstart += size*lu->nb;
1171     }
1172 
1173     for (i=0; i<loc_m; i++) idx_pla[i] = i*loc_stride;
1174 
1175     ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_pla,&lu->is_pla);CHKERRQ(ierr);
1176     ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_petsc,&lu->is_petsc);CHKERRQ(ierr);
1177     ierr = PetscFree(idx_pla);CHKERRQ(ierr);
1178     ierr = VecScatterCreate(loc_x,lu->is_pla,x,lu->is_petsc,&lu->ctx);CHKERRQ(ierr);
1179   }
1180   ierr = VecScatterBegin(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1181   ierr = VecScatterEnd(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1182 
1183   /* Free data */
1184   ierr = VecDestroy(loc_x);CHKERRQ(ierr);
1185   ierr = PLA_Obj_free(&v_pla);CHKERRQ(ierr);
1186 
1187   lu->pla_solved = PETSC_TRUE;
1188   PetscFunctionReturn(0);
1189 }
1190 
1191 #undef __FUNCT__
1192 #define __FUNCT__ "MatLUFactorNumeric_MPIDense"
1193 PetscErrorCode MatLUFactorNumeric_MPIDense(Mat A,MatFactorInfo *info,Mat *F)
1194 {
1195   Mat_Plapack    *lu = (Mat_Plapack*)(*F)->spptr;
1196   PetscErrorCode ierr;
1197   PetscInt       info_pla;
1198 
1199   PetscFunctionBegin;
1200   ierr = PLA_Obj_set_to_zero(lu->A);CHKERRQ(ierr);
1201 
1202   /* Copy A into F->lu->A */
1203   ierr = MatMPIDenseCopyToPlapack(A,*F);CHKERRQ(ierr);
1204 
1205   /* Factor P A -> L U overwriting lower triangular portion of A with L, upper, U */
1206   info_pla = PLA_LU(lu->A,lu->pivots);
1207   if (info_pla) SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot encountered at row %d from PLA_LU()",info_pla);
1208 
1209   (*F)->assembled    = PETSC_TRUE;
1210   PetscFunctionReturn(0);
1211 }
1212 
1213 #undef __FUNCT__
1214 #define __FUNCT__ "MatCholeskyFactorNumeric_MPIDense"
1215 PetscErrorCode MatCholeskyFactorNumeric_MPIDense(Mat A,MatFactorInfo *info,Mat *F)
1216 {
1217   Mat_Plapack    *lu = (Mat_Plapack*)(*F)->spptr;
1218   PetscErrorCode ierr;
1219   PetscInt       info_pla;
1220 
1221   PetscFunctionBegin;
1222 
1223   /* Copy A into F->lu->A */
1224   ierr = MatMPIDenseCopyToPlapack(A,*F);CHKERRQ(ierr);
1225 
1226   /* Factor P A -> Chol */
1227   info_pla = PLA_Chol(PLA_LOWER_TRIANGULAR,lu->A);
1228   if (info_pla) SETERRQ1( PETSC_ERR_MAT_CH_ZRPVT,"Nonpositive definite matrix detected at row %d from PLA_Chol()",info_pla);
1229 
1230   (*F)->assembled = PETSC_TRUE;
1231   PetscFunctionReturn(0);
1232 }
1233 
1234 
1235 #undef __FUNCT__
1236 #define __FUNCT__ "MatFactorSymbolic_MPIDense_Private"
1237 PetscErrorCode MatFactorSymbolic_MPIDense_Private(Mat A,MatFactorInfo *info,Mat *F)
1238 {
1239   PetscErrorCode ierr;
1240 
1241   PetscFunctionBegin;
1242   /* Create the factorization matrix */
1243   ierr = MatCreate(((PetscObject)A)->comm,F);CHKERRQ(ierr);
1244   ierr = MatSetSizes(*F,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N);CHKERRQ(ierr);
1245   ierr = MatSetType(*F,((PetscObject)A)->type_name);CHKERRQ(ierr);
1246   ierr = MatMPIDenseCreatePlapack(*F);CHKERRQ(ierr);
1247   PetscFunctionReturn(0);
1248 }
1249 
1250 /* Note the Petsc r and c permutations are ignored */
1251 #undef __FUNCT__
1252 #define __FUNCT__ "MatLUFactorSymbolic_MPIDense"
1253 PetscErrorCode MatLUFactorSymbolic_MPIDense(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F)
1254 {
1255   PetscErrorCode ierr;
1256   PetscInt       M = A->rmap.N;
1257   Mat_Plapack    *lu;
1258 
1259   PetscFunctionBegin;
1260   ierr = MatFactorSymbolic_MPIDense_Private(A,info,F);CHKERRQ(ierr);
1261   lu = (Mat_Plapack*)(*F)->spptr;
1262   ierr = PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots);CHKERRQ(ierr);
1263   (*F)->factor = FACTOR_LU;
1264   PetscFunctionReturn(0);
1265 }
1266 
1267 /* Note the Petsc perm permutation is ignored */
1268 #undef __FUNCT__
1269 #define __FUNCT__ "MatCholeskyFactorSymbolic_MPIDense"
1270 PetscErrorCode MatCholeskyFactorSymbolic_MPIDense(Mat A,IS perm,MatFactorInfo *info,Mat *F)
1271 {
1272   PetscErrorCode ierr;
1273   PetscTruth     issymmetric,set;
1274 
1275   PetscFunctionBegin;
1276   ierr = MatIsSymmetricKnown(A,&set,&issymmetric); CHKERRQ(ierr);
1277   if (!set || !issymmetric) SETERRQ(PETSC_ERR_USER,"Matrix must be set as MAT_SYMMETRIC for CholeskyFactor()");
1278   ierr = MatFactorSymbolic_MPIDense_Private(A,info,F);CHKERRQ(ierr);
1279   (*F)->factor = FACTOR_CHOLESKY;
1280   PetscFunctionReturn(0);
1281 }
1282 #endif
1283 
1284 /* -------------------------------------------------------------------*/
1285 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
1286        MatGetRow_MPIDense,
1287        MatRestoreRow_MPIDense,
1288        MatMult_MPIDense,
1289 /* 4*/ MatMultAdd_MPIDense,
1290        MatMultTranspose_MPIDense,
1291        MatMultTransposeAdd_MPIDense,
1292 #if defined(PETSC_HAVE_PLAPACK)
1293        MatSolve_MPIDense,
1294 #else
1295        0,
1296 #endif
1297        0,
1298        0,
1299 /*10*/ 0,
1300        0,
1301        0,
1302        0,
1303        MatTranspose_MPIDense,
1304 /*15*/ MatGetInfo_MPIDense,
1305        MatEqual_MPIDense,
1306        MatGetDiagonal_MPIDense,
1307        MatDiagonalScale_MPIDense,
1308        MatNorm_MPIDense,
1309 /*20*/ MatAssemblyBegin_MPIDense,
1310        MatAssemblyEnd_MPIDense,
1311        0,
1312        MatSetOption_MPIDense,
1313        MatZeroEntries_MPIDense,
1314 /*25*/ MatZeroRows_MPIDense,
1315 #if defined(PETSC_HAVE_PLAPACK)
1316        MatLUFactorSymbolic_MPIDense,
1317        MatLUFactorNumeric_MPIDense,
1318        MatCholeskyFactorSymbolic_MPIDense,
1319        MatCholeskyFactorNumeric_MPIDense,
1320 #else
1321        0,
1322        0,
1323        0,
1324        0,
1325 #endif
1326 /*30*/ MatSetUpPreallocation_MPIDense,
1327        0,
1328        0,
1329        MatGetArray_MPIDense,
1330        MatRestoreArray_MPIDense,
1331 /*35*/ MatDuplicate_MPIDense,
1332        0,
1333        0,
1334        0,
1335        0,
1336 /*40*/ 0,
1337        MatGetSubMatrices_MPIDense,
1338        0,
1339        MatGetValues_MPIDense,
1340        0,
1341 /*45*/ 0,
1342        MatScale_MPIDense,
1343        0,
1344        0,
1345        0,
1346 /*50*/ 0,
1347        0,
1348        0,
1349        0,
1350        0,
1351 /*55*/ 0,
1352        0,
1353        0,
1354        0,
1355        0,
1356 /*60*/ MatGetSubMatrix_MPIDense,
1357        MatDestroy_MPIDense,
1358        MatView_MPIDense,
1359        0,
1360        0,
1361 /*65*/ 0,
1362        0,
1363        0,
1364        0,
1365        0,
1366 /*70*/ 0,
1367        0,
1368        0,
1369        0,
1370        0,
1371 /*75*/ 0,
1372        0,
1373        0,
1374        0,
1375        0,
1376 /*80*/ 0,
1377        0,
1378        0,
1379        0,
1380 /*84*/ MatLoad_MPIDense,
1381        0,
1382        0,
1383        0,
1384        0,
1385        0,
1386 /*90*/ 0,
1387        MatMatMultSymbolic_MPIDense_MPIDense,
1388        0,
1389        0,
1390        0,
1391 /*95*/ 0,
1392        0,
1393        0,
1394        0};
1395 
1396 EXTERN_C_BEGIN
1397 #undef __FUNCT__
1398 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense"
1399 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1400 {
1401   Mat_MPIDense   *a;
1402   PetscErrorCode ierr;
1403 
1404   PetscFunctionBegin;
1405   mat->preallocated = PETSC_TRUE;
1406   /* Note:  For now, when data is specified above, this assumes the user correctly
1407    allocates the local dense storage space.  We should add error checking. */
1408 
1409   a    = (Mat_MPIDense*)mat->data;
1410   ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1411   ierr = MatSetSizes(a->A,mat->rmap.n,mat->cmap.N,mat->rmap.n,mat->cmap.N);CHKERRQ(ierr);
1412   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
1413   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1414   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1415   PetscFunctionReturn(0);
1416 }
1417 EXTERN_C_END
1418 
1419 EXTERN_C_BEGIN
1420 #if defined(PETSC_HAVE_PLAPACK)
1421 #undef __FUNCT__
1422 #define __FUNCT__ "MatSetSolverType_MPIDense_PLAPACK"
1423 PetscErrorCode PETSCMAT_DLLEXPORT MatSetSolverType_MPIDense_PLAPACK(Mat mat,const char *type)
1424 {
1425   PetscFunctionBegin;
1426   /* dummy function so that -mat_solver_type plapack or MatSetSolverType(mat,"plapack"); silently work (since PLAPACK is on by default) */
1427   PetscFunctionReturn(0);
1428 }
1429 #endif
1430 EXTERN_C_END
1431 
1432 /*MC
1433    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1434 
1435    Options Database Keys:
1436 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions()
1437 
1438   Level: beginner
1439 
1440   MATMPIDENSE matrices may use direct solvers (LU, Cholesky, and QR)
1441   for parallel dense matrices via the external package PLAPACK, if PLAPACK is installed
1442   (run config/configure.py with the option --download-plapack)
1443 
1444 
1445   Options Database Keys:
1446 . -mat_plapack_nprows <n> - number of rows in processor partition
1447 . -mat_plapack_npcols <n> - number of columns in processor partition
1448 . -mat_plapack_nb <n> - block size of template vector
1449 . -mat_plapack_nb_alg <n> - algorithmic block size
1450 - -mat_plapack_ckerror <n> - error checking flag
1451 
1452 .seealso: MatCreateMPIDense(), MATDENSE, MATSEQDENSE
1453 M*/
1454 
1455 EXTERN_C_BEGIN
1456 #undef __FUNCT__
1457 #define __FUNCT__ "MatCreate_MPIDense"
1458 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIDense(Mat mat)
1459 {
1460   Mat_MPIDense   *a;
1461   PetscErrorCode ierr;
1462 #if defined(PETSC_HAVE_PLAPACK)
1463   Mat_Plapack    *lu;
1464 #endif
1465 
1466   PetscFunctionBegin;
1467   ierr              = PetscNewLog(mat,Mat_MPIDense,&a);CHKERRQ(ierr);
1468   mat->data         = (void*)a;
1469   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1470   mat->factor       = 0;
1471   mat->mapping      = 0;
1472 
1473   mat->insertmode = NOT_SET_VALUES;
1474   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&a->rank);CHKERRQ(ierr);
1475   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&a->size);CHKERRQ(ierr);
1476 
1477   mat->rmap.bs = mat->cmap.bs = 1;
1478   ierr = PetscMapSetUp(&mat->rmap);CHKERRQ(ierr);
1479   ierr = PetscMapSetUp(&mat->cmap);CHKERRQ(ierr);
1480   a->nvec = mat->cmap.n;
1481 
1482   /* build cache for off array entries formed */
1483   a->donotstash = PETSC_FALSE;
1484   ierr = MatStashCreate_Private(((PetscObject)mat)->comm,1,&mat->stash);CHKERRQ(ierr);
1485 
1486   /* stuff used for matrix vector multiply */
1487   a->lvec        = 0;
1488   a->Mvctx       = 0;
1489   a->roworiented = PETSC_TRUE;
1490 
1491   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
1492                                      "MatGetDiagonalBlock_MPIDense",
1493                                      MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr);
1494   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C",
1495                                      "MatMPIDenseSetPreallocation_MPIDense",
1496                                      MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1497   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",
1498                                      "MatMatMult_MPIAIJ_MPIDense",
1499                                       MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1500   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",
1501                                      "MatMatMultSymbolic_MPIAIJ_MPIDense",
1502                                       MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1503   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",
1504                                      "MatMatMultNumeric_MPIAIJ_MPIDense",
1505                                       MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1506 #if defined(PETSC_HAVE_PLAPACK)
1507   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatSetSolverType_mpidense_plapack_C",
1508                                      "MatSetSolverType_MPIDense_PLAPACK",
1509                                       MatSetSolverType_MPIDense_PLAPACK);CHKERRQ(ierr);
1510 #endif
1511   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1512 
1513 #if defined(PETSC_HAVE_PLAPACK)
1514   ierr = PetscNewLog(mat,Mat_Plapack,&lu);CHKERRQ(ierr);
1515   lu->CleanUpPlapack       = PETSC_FALSE;
1516   mat->spptr                 = (void*)lu;
1517 #endif
1518   PetscFunctionReturn(0);
1519 }
1520 EXTERN_C_END
1521 
1522 /*MC
1523    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1524 
1525    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1526    and MATMPIDENSE otherwise.
1527 
1528    Options Database Keys:
1529 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1530 
1531   Level: beginner
1532 
1533 
1534 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1535 M*/
1536 
1537 EXTERN_C_BEGIN
1538 #undef __FUNCT__
1539 #define __FUNCT__ "MatCreate_Dense"
1540 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_Dense(Mat A)
1541 {
1542   PetscErrorCode ierr;
1543   PetscMPIInt    size;
1544 
1545   PetscFunctionBegin;
1546   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
1547   if (size == 1) {
1548     ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr);
1549   } else {
1550     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
1551   }
1552   PetscFunctionReturn(0);
1553 }
1554 EXTERN_C_END
1555 
1556 #undef __FUNCT__
1557 #define __FUNCT__ "MatMPIDenseSetPreallocation"
1558 /*@C
1559    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1560 
1561    Not collective
1562 
1563    Input Parameters:
1564 .  A - the matrix
1565 -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
1566    to control all matrix memory allocation.
1567 
1568    Notes:
1569    The dense format is fully compatible with standard Fortran 77
1570    storage by columns.
1571 
1572    The data input variable is intended primarily for Fortran programmers
1573    who wish to allocate their own matrix memory space.  Most users should
1574    set data=PETSC_NULL.
1575 
1576    Level: intermediate
1577 
1578 .keywords: matrix,dense, parallel
1579 
1580 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1581 @*/
1582 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data)
1583 {
1584   PetscErrorCode ierr,(*f)(Mat,PetscScalar *);
1585 
1586   PetscFunctionBegin;
1587   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1588   if (f) {
1589     ierr = (*f)(mat,data);CHKERRQ(ierr);
1590   }
1591   PetscFunctionReturn(0);
1592 }
1593 
1594 #undef __FUNCT__
1595 #define __FUNCT__ "MatCreateMPIDense"
1596 /*@C
1597    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
1598 
1599    Collective on MPI_Comm
1600 
1601    Input Parameters:
1602 +  comm - MPI communicator
1603 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1604 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1605 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1606 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1607 -  data - optional location of matrix data.  Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1608    to control all matrix memory allocation.
1609 
1610    Output Parameter:
1611 .  A - the matrix
1612 
1613    Notes:
1614    The dense format is fully compatible with standard Fortran 77
1615    storage by columns.
1616 
1617    The data input variable is intended primarily for Fortran programmers
1618    who wish to allocate their own matrix memory space.  Most users should
1619    set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users).
1620 
1621    The user MUST specify either the local or global matrix dimensions
1622    (possibly both).
1623 
1624    Level: intermediate
1625 
1626 .keywords: matrix,dense, parallel
1627 
1628 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1629 @*/
1630 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1631 {
1632   PetscErrorCode ierr;
1633   PetscMPIInt    size;
1634 
1635   PetscFunctionBegin;
1636   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1637   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1638   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1639   if (size > 1) {
1640     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1641     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1642   } else {
1643     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1644     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1645   }
1646   PetscFunctionReturn(0);
1647 }
1648 
1649 #undef __FUNCT__
1650 #define __FUNCT__ "MatDuplicate_MPIDense"
1651 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1652 {
1653   Mat            mat;
1654   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
1655   PetscErrorCode ierr;
1656 
1657   PetscFunctionBegin;
1658   *newmat       = 0;
1659   ierr = MatCreate(((PetscObject)A)->comm,&mat);CHKERRQ(ierr);
1660   ierr = MatSetSizes(mat,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N);CHKERRQ(ierr);
1661   ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1662   a                 = (Mat_MPIDense*)mat->data;
1663   ierr              = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1664   mat->factor       = A->factor;
1665   mat->assembled    = PETSC_TRUE;
1666   mat->preallocated = PETSC_TRUE;
1667 
1668   mat->rmap.rstart     = A->rmap.rstart;
1669   mat->rmap.rend       = A->rmap.rend;
1670   a->size              = oldmat->size;
1671   a->rank              = oldmat->rank;
1672   mat->insertmode      = NOT_SET_VALUES;
1673   a->nvec              = oldmat->nvec;
1674   a->donotstash        = oldmat->donotstash;
1675 
1676   ierr = PetscMemcpy(mat->rmap.range,A->rmap.range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
1677   ierr = PetscMemcpy(mat->cmap.range,A->cmap.range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
1678   ierr = MatStashCreate_Private(((PetscObject)A)->comm,1,&mat->stash);CHKERRQ(ierr);
1679 
1680   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1681   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1682   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1683 
1684 #if defined(PETSC_HAVE_PLAPACK)
1685   ierr = PetscMemcpy(mat->spptr,A->spptr,sizeof(Mat_Plapack));CHKERRQ(ierr);
1686 #endif
1687   *newmat = mat;
1688   PetscFunctionReturn(0);
1689 }
1690 
1691 #include "petscsys.h"
1692 
1693 #undef __FUNCT__
1694 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile"
1695 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N, MatType type,Mat *newmat)
1696 {
1697   PetscErrorCode ierr;
1698   PetscMPIInt    rank,size;
1699   PetscInt       *rowners,i,m,nz,j;
1700   PetscScalar    *array,*vals,*vals_ptr;
1701   MPI_Status     status;
1702 
1703   PetscFunctionBegin;
1704   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1705   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1706 
1707   /* determine ownership of all rows */
1708   m          = M/size + ((M % size) > rank);
1709   ierr       = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1710   ierr       = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
1711   rowners[0] = 0;
1712   for (i=2; i<=size; i++) {
1713     rowners[i] += rowners[i-1];
1714   }
1715 
1716   ierr = MatCreate(comm,newmat);CHKERRQ(ierr);
1717   ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1718   ierr = MatSetType(*newmat,type);CHKERRQ(ierr);
1719   ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr);
1720   ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr);
1721 
1722   if (!rank) {
1723     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1724 
1725     /* read in my part of the matrix numerical values  */
1726     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1727 
1728     /* insert into matrix-by row (this is why cannot directly read into array */
1729     vals_ptr = vals;
1730     for (i=0; i<m; i++) {
1731       for (j=0; j<N; j++) {
1732         array[i + j*m] = *vals_ptr++;
1733       }
1734     }
1735 
1736     /* read in other processors and ship out */
1737     for (i=1; i<size; i++) {
1738       nz   = (rowners[i+1] - rowners[i])*N;
1739       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1740       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(*newmat))->tag,comm);CHKERRQ(ierr);
1741     }
1742   } else {
1743     /* receive numeric values */
1744     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1745 
1746     /* receive message of values*/
1747     ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(*newmat))->tag,comm,&status);CHKERRQ(ierr);
1748 
1749     /* insert into matrix-by row (this is why cannot directly read into array */
1750     vals_ptr = vals;
1751     for (i=0; i<m; i++) {
1752       for (j=0; j<N; j++) {
1753         array[i + j*m] = *vals_ptr++;
1754       }
1755     }
1756   }
1757   ierr = PetscFree(rowners);CHKERRQ(ierr);
1758   ierr = PetscFree(vals);CHKERRQ(ierr);
1759   ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1760   ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1761   PetscFunctionReturn(0);
1762 }
1763 
1764 #undef __FUNCT__
1765 #define __FUNCT__ "MatLoad_MPIDense"
1766 PetscErrorCode MatLoad_MPIDense(PetscViewer viewer, MatType type,Mat *newmat)
1767 {
1768   Mat            A;
1769   PetscScalar    *vals,*svals;
1770   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1771   MPI_Status     status;
1772   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz;
1773   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1774   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1775   PetscInt       i,nz,j,rstart,rend;
1776   int            fd;
1777   PetscErrorCode ierr;
1778 
1779   PetscFunctionBegin;
1780   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1781   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1782   if (!rank) {
1783     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1784     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1785     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1786   }
1787 
1788   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1789   M = header[1]; N = header[2]; nz = header[3];
1790 
1791   /*
1792        Handle case where matrix is stored on disk as a dense matrix
1793   */
1794   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1795     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,type,newmat);CHKERRQ(ierr);
1796     PetscFunctionReturn(0);
1797   }
1798 
1799   /* determine ownership of all rows */
1800   m          = PetscMPIIntCast(M/size + ((M % size) > rank));
1801   ierr       = PetscMalloc((size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr);
1802   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1803   rowners[0] = 0;
1804   for (i=2; i<=size; i++) {
1805     rowners[i] += rowners[i-1];
1806   }
1807   rstart = rowners[rank];
1808   rend   = rowners[rank+1];
1809 
1810   /* distribute row lengths to all processors */
1811   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&ourlens);CHKERRQ(ierr);
1812   offlens = ourlens + (rend-rstart);
1813   if (!rank) {
1814     ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
1815     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1816     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
1817     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1818     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1819     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1820   } else {
1821     ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1822   }
1823 
1824   if (!rank) {
1825     /* calculate the number of nonzeros on each processor */
1826     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
1827     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1828     for (i=0; i<size; i++) {
1829       for (j=rowners[i]; j< rowners[i+1]; j++) {
1830         procsnz[i] += rowlengths[j];
1831       }
1832     }
1833     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1834 
1835     /* determine max buffer needed and allocate it */
1836     maxnz = 0;
1837     for (i=0; i<size; i++) {
1838       maxnz = PetscMax(maxnz,procsnz[i]);
1839     }
1840     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1841 
1842     /* read in my part of the matrix column indices  */
1843     nz = procsnz[0];
1844     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1845     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1846 
1847     /* read in every one elses and ship off */
1848     for (i=1; i<size; i++) {
1849       nz   = procsnz[i];
1850       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1851       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1852     }
1853     ierr = PetscFree(cols);CHKERRQ(ierr);
1854   } else {
1855     /* determine buffer space needed for message */
1856     nz = 0;
1857     for (i=0; i<m; i++) {
1858       nz += ourlens[i];
1859     }
1860     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1861 
1862     /* receive message of column indices*/
1863     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1864     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1865     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1866   }
1867 
1868   /* loop over local rows, determining number of off diagonal entries */
1869   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
1870   jj = 0;
1871   for (i=0; i<m; i++) {
1872     for (j=0; j<ourlens[i]; j++) {
1873       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1874       jj++;
1875     }
1876   }
1877 
1878   /* create our matrix */
1879   for (i=0; i<m; i++) {
1880     ourlens[i] -= offlens[i];
1881   }
1882   ierr = MatCreate(comm,newmat);CHKERRQ(ierr);
1883   ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1884   ierr = MatSetType(*newmat,type);CHKERRQ(ierr);
1885   ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr);
1886   A = *newmat;
1887   for (i=0; i<m; i++) {
1888     ourlens[i] += offlens[i];
1889   }
1890 
1891   if (!rank) {
1892     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1893 
1894     /* read in my part of the matrix numerical values  */
1895     nz = procsnz[0];
1896     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1897 
1898     /* insert into matrix */
1899     jj      = rstart;
1900     smycols = mycols;
1901     svals   = vals;
1902     for (i=0; i<m; i++) {
1903       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1904       smycols += ourlens[i];
1905       svals   += ourlens[i];
1906       jj++;
1907     }
1908 
1909     /* read in other processors and ship out */
1910     for (i=1; i<size; i++) {
1911       nz   = procsnz[i];
1912       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1913       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
1914     }
1915     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1916   } else {
1917     /* receive numeric values */
1918     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1919 
1920     /* receive message of values*/
1921     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
1922     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1923     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1924 
1925     /* insert into matrix */
1926     jj      = rstart;
1927     smycols = mycols;
1928     svals   = vals;
1929     for (i=0; i<m; i++) {
1930       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1931       smycols += ourlens[i];
1932       svals   += ourlens[i];
1933       jj++;
1934     }
1935   }
1936   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1937   ierr = PetscFree(vals);CHKERRQ(ierr);
1938   ierr = PetscFree(mycols);CHKERRQ(ierr);
1939   ierr = PetscFree(rowners);CHKERRQ(ierr);
1940 
1941   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1942   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1943   PetscFunctionReturn(0);
1944 }
1945 
1946 #undef __FUNCT__
1947 #define __FUNCT__ "MatEqual_MPIDense"
1948 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscTruth *flag)
1949 {
1950   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
1951   Mat            a,b;
1952   PetscTruth     flg;
1953   PetscErrorCode ierr;
1954 
1955   PetscFunctionBegin;
1956   a = matA->A;
1957   b = matB->A;
1958   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
1959   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1960   PetscFunctionReturn(0);
1961 }
1962 
1963 #if defined(PETSC_HAVE_PLAPACK)
1964 
1965 #undef __FUNCT__
1966 #define __FUNCT__ "PetscPLAPACKFinalizePackage"
1967 /*@C
1968   PetscPLAPACKFinalizePackage - This function destroys everything in the Petsc interface to PLAPACK.
1969   Level: developer
1970 
1971 .keywords: Petsc, destroy, package, PLAPACK
1972 .seealso: PetscFinalize()
1973 @*/
1974 PetscErrorCode PETSC_DLLEXPORT PetscPLAPACKFinalizePackage(void)
1975 {
1976   PetscErrorCode ierr;
1977 
1978   PetscFunctionBegin;
1979   ierr = PLA_Finalize();CHKERRQ(ierr);
1980   PetscFunctionReturn(0);
1981 }
1982 
1983 #undef __FUNCT__
1984 #define __FUNCT__ "PetscPLAPACKInitializePackage"
1985 /*@C
1986   PetscPLAPACKInitializePackage - This function initializes everything in the Petsc interface to PLAPACK. It is
1987   called from PetscDLLibraryRegister() when using dynamic libraries, and on the call to PetscInitialize()
1988   when using static libraries.
1989 
1990   Input Parameter:
1991   path - The dynamic library path, or PETSC_NULL
1992 
1993   Level: developer
1994 
1995 .keywords: Petsc, initialize, package, PLAPACK
1996 .seealso: PetscInitializePackage(), PetscInitialize()
1997 @*/
1998 PetscErrorCode PETSC_DLLEXPORT PetscPLAPACKInitializePackage(const char path[])
1999 {
2000   MPI_Comm       comm = PETSC_COMM_WORLD;
2001   PetscMPIInt    size;
2002   PetscErrorCode ierr;
2003 
2004   PetscFunctionBegin;
2005   if (!PLA_Initialized(PETSC_NULL)) {
2006 
2007     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2008     Plapack_nprows = 1;
2009     Plapack_npcols = size;
2010 
2011     ierr = PetscOptionsBegin(comm,PETSC_NULL,"PLAPACK Options","Mat");CHKERRQ(ierr);
2012       ierr = PetscOptionsInt("-plapack_nprows","row dimension of 2D processor mesh","None",Plapack_nprows,&Plapack_nprows,PETSC_NULL);CHKERRQ(ierr);
2013       ierr = PetscOptionsInt("-plapack_npcols","column dimension of 2D processor mesh","None",Plapack_npcols,&Plapack_npcols,PETSC_NULL);CHKERRQ(ierr);
2014       ierr = PetscOptionsInt("-plapack_ckerror","error checking flag","None",Plapack_ierror,&Plapack_ierror,PETSC_NULL);CHKERRQ(ierr);
2015       if (Plapack_ierror){
2016 	ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_TRUE,PETSC_TRUE,PETSC_FALSE );CHKERRQ(ierr);
2017       } else {
2018 	ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_FALSE,PETSC_FALSE,PETSC_FALSE );CHKERRQ(ierr);
2019       }
2020 
2021       Plapack_nb_alg = 0;
2022       ierr = PetscOptionsInt("-mat_plapack_nb_alg","algorithmic block size","None",Plapack_nb_alg,&Plapack_nb_alg,PETSC_NULL);CHKERRQ(ierr);
2023       if (Plapack_nb_alg) {
2024         ierr = pla_Environ_set_nb_alg (PLA_OP_ALL_ALG,Plapack_nb_alg);CHKERRQ(ierr);
2025       }
2026     PetscOptionsEnd();
2027 
2028     ierr = PLA_Comm_1D_to_2D(comm,Plapack_nprows,Plapack_npcols,&Plapack_comm_2d);CHKERRQ(ierr);
2029     ierr = PLA_Init(Plapack_comm_2d);CHKERRQ(ierr);
2030     ierr = PetscRegisterFinalize(PetscPLAPACKFinalizePackage);CHKERRQ(ierr);
2031   }
2032   PetscFunctionReturn(0);
2033 }
2034 
2035 
2036 #endif
2037