xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 04fea9ffb6098903af11c91df9842ba83d59ff70)
1 #define PETSCMAT_DLL
2 
3 /*
4    Basic functions for basic parallel dense matrices.
5 */
6 
7 
8 #include "src/mat/impls/dense/mpi/mpidense.h"    /*I   "petscmat.h"  I*/
9 PetscInt Plapack_nprows,Plapack_npcols;
10 MPI_Comm Plapack_comm_2d;
11 
12 #undef __FUNCT__
13 #define __FUNCT__ "MatDenseGetLocalMatrix"
14 /*@
15 
16       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
17               matrix that represents the operator. For sequential matrices it returns itself.
18 
19     Input Parameter:
20 .      A - the Seq or MPI dense matrix
21 
22     Output Parameter:
23 .      B - the inner matrix
24 
25     Level: intermediate
26 
27 @*/
28 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
29 {
30   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
31   PetscErrorCode ierr;
32   PetscTruth     flg;
33 
34   PetscFunctionBegin;
35   ierr = PetscTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
36   if (flg) {
37     *B = mat->A;
38   } else {
39     *B = A;
40   }
41   PetscFunctionReturn(0);
42 }
43 
44 #undef __FUNCT__
45 #define __FUNCT__ "MatGetRow_MPIDense"
46 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
47 {
48   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
49   PetscErrorCode ierr;
50   PetscInt       lrow,rstart = A->rmap.rstart,rend = A->rmap.rend;
51 
52   PetscFunctionBegin;
53   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows")
54   lrow = row - rstart;
55   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr);
56   PetscFunctionReturn(0);
57 }
58 
59 #undef __FUNCT__
60 #define __FUNCT__ "MatRestoreRow_MPIDense"
61 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
62 {
63   PetscErrorCode ierr;
64 
65   PetscFunctionBegin;
66   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
67   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
68   PetscFunctionReturn(0);
69 }
70 
71 EXTERN_C_BEGIN
72 #undef __FUNCT__
73 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense"
74 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B)
75 {
76   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
77   PetscErrorCode ierr;
78   PetscInt       m = A->rmap.n,rstart = A->rmap.rstart;
79   PetscScalar    *array;
80   MPI_Comm       comm;
81 
82   PetscFunctionBegin;
83   if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported.");
84 
85   /* The reuse aspect is not implemented efficiently */
86   if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);}
87 
88   ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
89   ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr);
90   ierr = MatCreate(comm,B);CHKERRQ(ierr);
91   ierr = MatSetSizes(*B,m,m,m,m);CHKERRQ(ierr);
92   ierr = MatSetType(*B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
93   ierr = MatSeqDenseSetPreallocation(*B,array+m*rstart);CHKERRQ(ierr);
94   ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr);
95   ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
96   ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
97 
98   *iscopy = PETSC_TRUE;
99   PetscFunctionReturn(0);
100 }
101 EXTERN_C_END
102 
103 #undef __FUNCT__
104 #define __FUNCT__ "MatSetValues_MPIDense"
105 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
106 {
107   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
108   PetscErrorCode ierr;
109   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend,row;
110   PetscTruth     roworiented = A->roworiented;
111 
112   PetscFunctionBegin;
113   for (i=0; i<m; i++) {
114     if (idxm[i] < 0) continue;
115     if (idxm[i] >= mat->rmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
116     if (idxm[i] >= rstart && idxm[i] < rend) {
117       row = idxm[i] - rstart;
118       if (roworiented) {
119         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
120       } else {
121         for (j=0; j<n; j++) {
122           if (idxn[j] < 0) continue;
123           if (idxn[j] >= mat->cmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
124           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
125         }
126       }
127     } else {
128       if (!A->donotstash) {
129         if (roworiented) {
130           ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);CHKERRQ(ierr);
131         } else {
132           ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);CHKERRQ(ierr);
133         }
134       }
135     }
136   }
137   PetscFunctionReturn(0);
138 }
139 
140 #undef __FUNCT__
141 #define __FUNCT__ "MatGetValues_MPIDense"
142 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
143 {
144   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
145   PetscErrorCode ierr;
146   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend,row;
147 
148   PetscFunctionBegin;
149   for (i=0; i<m; i++) {
150     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
151     if (idxm[i] >= mat->rmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
152     if (idxm[i] >= rstart && idxm[i] < rend) {
153       row = idxm[i] - rstart;
154       for (j=0; j<n; j++) {
155         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
156         if (idxn[j] >= mat->cmap.N) {
157           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
158         }
159         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
160       }
161     } else {
162       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
163     }
164   }
165   PetscFunctionReturn(0);
166 }
167 
168 #undef __FUNCT__
169 #define __FUNCT__ "MatGetArray_MPIDense"
170 PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[])
171 {
172   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
173   PetscErrorCode ierr;
174 
175   PetscFunctionBegin;
176   ierr = MatGetArray(a->A,array);CHKERRQ(ierr);
177   PetscFunctionReturn(0);
178 }
179 
180 #undef __FUNCT__
181 #define __FUNCT__ "MatGetSubMatrix_MPIDense"
182 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B)
183 {
184   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data,*newmatd;
185   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
186   PetscErrorCode ierr;
187   PetscInt       i,j,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols;
188   PetscScalar    *av,*bv,*v = lmat->v;
189   Mat            newmat;
190 
191   PetscFunctionBegin;
192   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
193   ierr = ISGetIndices(iscol,&icol);CHKERRQ(ierr);
194   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
195   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
196 
197   /* No parallel redistribution currently supported! Should really check each index set
198      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
199      original matrix! */
200 
201   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
202   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
203 
204   /* Check submatrix call */
205   if (scall == MAT_REUSE_MATRIX) {
206     /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
207     /* Really need to test rows and column sizes! */
208     newmat = *B;
209   } else {
210     /* Create and fill new matrix */
211     ierr = MatCreate(((PetscObject)A)->comm,&newmat);CHKERRQ(ierr);
212     ierr = MatSetSizes(newmat,nrows,cs,PETSC_DECIDE,ncols);CHKERRQ(ierr);
213     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
214     ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr);
215   }
216 
217   /* Now extract the data pointers and do the copy, column at a time */
218   newmatd = (Mat_MPIDense*)newmat->data;
219   bv      = ((Mat_SeqDense *)newmatd->A->data)->v;
220 
221   for (i=0; i<ncols; i++) {
222     av = v + nlrows*icol[i];
223     for (j=0; j<nrows; j++) {
224       *bv++ = av[irow[j] - rstart];
225     }
226   }
227 
228   /* Assemble the matrices so that the correct flags are set */
229   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
230   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
231 
232   /* Free work space */
233   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
234   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
235   *B = newmat;
236   PetscFunctionReturn(0);
237 }
238 
239 #undef __FUNCT__
240 #define __FUNCT__ "MatRestoreArray_MPIDense"
241 PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[])
242 {
243   PetscFunctionBegin;
244   PetscFunctionReturn(0);
245 }
246 
247 #undef __FUNCT__
248 #define __FUNCT__ "MatAssemblyBegin_MPIDense"
249 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
250 {
251   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
252   MPI_Comm       comm = ((PetscObject)mat)->comm;
253   PetscErrorCode ierr;
254   PetscInt       nstash,reallocs;
255   InsertMode     addv;
256 
257   PetscFunctionBegin;
258   /* make sure all processors are either in INSERTMODE or ADDMODE */
259   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
260   if (addv == (ADD_VALUES|INSERT_VALUES)) {
261     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
262   }
263   mat->insertmode = addv; /* in case this processor had no cache */
264 
265   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap.range);CHKERRQ(ierr);
266   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
267   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
268   PetscFunctionReturn(0);
269 }
270 
271 #undef __FUNCT__
272 #define __FUNCT__ "MatAssemblyEnd_MPIDense"
273 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
274 {
275   Mat_MPIDense    *mdn=(Mat_MPIDense*)mat->data;
276   PetscErrorCode  ierr;
277   PetscInt        i,*row,*col,flg,j,rstart,ncols;
278   PetscMPIInt     n;
279   PetscScalar     *val;
280   InsertMode      addv=mat->insertmode;
281 
282   PetscFunctionBegin;
283   /*  wait on receives */
284   while (1) {
285     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
286     if (!flg) break;
287 
288     for (i=0; i<n;) {
289       /* Now identify the consecutive vals belonging to the same row */
290       for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
291       if (j < n) ncols = j-i;
292       else       ncols = n-i;
293       /* Now assemble all these values with a single function call */
294       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
295       i = j;
296     }
297   }
298   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
299 
300   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
301   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
302 
303   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
304     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
305   }
306   PetscFunctionReturn(0);
307 }
308 
309 #undef __FUNCT__
310 #define __FUNCT__ "MatZeroEntries_MPIDense"
311 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
312 {
313   PetscErrorCode ierr;
314   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
315 
316   PetscFunctionBegin;
317   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
318   PetscFunctionReturn(0);
319 }
320 
321 /* the code does not do the diagonal entries correctly unless the
322    matrix is square and the column and row owerships are identical.
323    This is a BUG. The only way to fix it seems to be to access
324    mdn->A and mdn->B directly and not through the MatZeroRows()
325    routine.
326 */
327 #undef __FUNCT__
328 #define __FUNCT__ "MatZeroRows_MPIDense"
329 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
330 {
331   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
332   PetscErrorCode ierr;
333   PetscInt       i,*owners = A->rmap.range;
334   PetscInt       *nprocs,j,idx,nsends;
335   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
336   PetscInt       *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source;
337   PetscInt       *lens,*lrows,*values;
338   PetscMPIInt    n,imdex,rank = l->rank,size = l->size;
339   MPI_Comm       comm = ((PetscObject)A)->comm;
340   MPI_Request    *send_waits,*recv_waits;
341   MPI_Status     recv_status,*send_status;
342   PetscTruth     found;
343 
344   PetscFunctionBegin;
345   /*  first count number of contributors to each processor */
346   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
347   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
348   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
349   for (i=0; i<N; i++) {
350     idx = rows[i];
351     found = PETSC_FALSE;
352     for (j=0; j<size; j++) {
353       if (idx >= owners[j] && idx < owners[j+1]) {
354         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
355       }
356     }
357     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
358   }
359   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
360 
361   /* inform other processors of number of messages and max length*/
362   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
363 
364   /* post receives:   */
365   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
366   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
367   for (i=0; i<nrecvs; i++) {
368     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
369   }
370 
371   /* do sends:
372       1) starts[i] gives the starting index in svalues for stuff going to
373          the ith processor
374   */
375   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
376   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
377   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
378   starts[0]  = 0;
379   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
380   for (i=0; i<N; i++) {
381     svalues[starts[owner[i]]++] = rows[i];
382   }
383 
384   starts[0] = 0;
385   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
386   count = 0;
387   for (i=0; i<size; i++) {
388     if (nprocs[2*i+1]) {
389       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
390     }
391   }
392   ierr = PetscFree(starts);CHKERRQ(ierr);
393 
394   base = owners[rank];
395 
396   /*  wait on receives */
397   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
398   source = lens + nrecvs;
399   count  = nrecvs; slen = 0;
400   while (count) {
401     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
402     /* unpack receives into our local space */
403     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
404     source[imdex]  = recv_status.MPI_SOURCE;
405     lens[imdex]    = n;
406     slen += n;
407     count--;
408   }
409   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
410 
411   /* move the data into the send scatter */
412   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
413   count = 0;
414   for (i=0; i<nrecvs; i++) {
415     values = rvalues + i*nmax;
416     for (j=0; j<lens[i]; j++) {
417       lrows[count++] = values[j] - base;
418     }
419   }
420   ierr = PetscFree(rvalues);CHKERRQ(ierr);
421   ierr = PetscFree(lens);CHKERRQ(ierr);
422   ierr = PetscFree(owner);CHKERRQ(ierr);
423   ierr = PetscFree(nprocs);CHKERRQ(ierr);
424 
425   /* actually zap the local rows */
426   ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
427   ierr = PetscFree(lrows);CHKERRQ(ierr);
428 
429   /* wait on sends */
430   if (nsends) {
431     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
432     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
433     ierr = PetscFree(send_status);CHKERRQ(ierr);
434   }
435   ierr = PetscFree(send_waits);CHKERRQ(ierr);
436   ierr = PetscFree(svalues);CHKERRQ(ierr);
437 
438   PetscFunctionReturn(0);
439 }
440 
441 #undef __FUNCT__
442 #define __FUNCT__ "MatMult_MPIDense"
443 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
444 {
445   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
446   PetscErrorCode ierr;
447 
448   PetscFunctionBegin;
449   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
450   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
451   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
452   PetscFunctionReturn(0);
453 }
454 
455 #undef __FUNCT__
456 #define __FUNCT__ "MatMultAdd_MPIDense"
457 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
458 {
459   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
460   PetscErrorCode ierr;
461 
462   PetscFunctionBegin;
463   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
464   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
465   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
466   PetscFunctionReturn(0);
467 }
468 
469 #undef __FUNCT__
470 #define __FUNCT__ "MatMultTranspose_MPIDense"
471 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
472 {
473   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
474   PetscErrorCode ierr;
475   PetscScalar    zero = 0.0;
476 
477   PetscFunctionBegin;
478   ierr = VecSet(yy,zero);CHKERRQ(ierr);
479   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
480   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
481   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
482   PetscFunctionReturn(0);
483 }
484 
485 #undef __FUNCT__
486 #define __FUNCT__ "MatMultTransposeAdd_MPIDense"
487 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
488 {
489   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
490   PetscErrorCode ierr;
491 
492   PetscFunctionBegin;
493   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
494   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
495   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
496   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
497   PetscFunctionReturn(0);
498 }
499 
500 #undef __FUNCT__
501 #define __FUNCT__ "MatGetDiagonal_MPIDense"
502 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
503 {
504   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
505   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
506   PetscErrorCode ierr;
507   PetscInt       len,i,n,m = A->rmap.n,radd;
508   PetscScalar    *x,zero = 0.0;
509 
510   PetscFunctionBegin;
511   ierr = VecSet(v,zero);CHKERRQ(ierr);
512   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
513   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
514   if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
515   len  = PetscMin(a->A->rmap.n,a->A->cmap.n);
516   radd = A->rmap.rstart*m;
517   for (i=0; i<len; i++) {
518     x[i] = aloc->v[radd + i*m + i];
519   }
520   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
521   PetscFunctionReturn(0);
522 }
523 
524 #undef __FUNCT__
525 #define __FUNCT__ "MatDestroy_MPIDense"
526 PetscErrorCode MatDestroy_MPIDense(Mat mat)
527 {
528   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
529   PetscErrorCode ierr;
530 #if defined(PETSC_HAVE_PLAPACK)
531   Mat_Plapack   *lu=(Mat_Plapack*)(mat->spptr);
532 #endif
533 
534   PetscFunctionBegin;
535 
536 #if defined(PETSC_USE_LOG)
537   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N);
538 #endif
539   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
540   ierr = MatDestroy(mdn->A);CHKERRQ(ierr);
541   if (mdn->lvec)   {ierr = VecDestroy(mdn->lvec);CHKERRQ(ierr);}
542   if (mdn->Mvctx)  {ierr = VecScatterDestroy(mdn->Mvctx);CHKERRQ(ierr);}
543 #if defined(PETSC_HAVE_PLAPACK)
544   if (lu->CleanUpPlapack) {
545     ierr = PLA_Obj_free(&lu->A);CHKERRQ(ierr);
546     ierr = PLA_Obj_free (&lu->pivots);CHKERRQ(ierr);
547     ierr = PLA_Temp_free(&lu->templ);CHKERRQ(ierr);
548 
549 
550     ierr = ISDestroy(lu->is_pla);CHKERRQ(ierr);
551     ierr = ISDestroy(lu->is_petsc);CHKERRQ(ierr);
552     ierr = VecScatterDestroy(lu->ctx);CHKERRQ(ierr);
553   }
554 #endif
555 
556   ierr = PetscFree(mdn);CHKERRQ(ierr);
557   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
558   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
559   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
560   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
561   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
562   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
563   PetscFunctionReturn(0);
564 }
565 
566 #undef __FUNCT__
567 #define __FUNCT__ "MatView_MPIDense_Binary"
568 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
569 {
570   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
571   PetscErrorCode    ierr;
572   PetscViewerFormat format;
573   int               fd;
574   PetscInt          header[4],mmax,N = mat->cmap.N,i,j,m,k;
575   PetscMPIInt       rank,tag  = ((PetscObject)viewer)->tag,size;
576   PetscScalar       *work,*v,*vv;
577   Mat_SeqDense      *a = (Mat_SeqDense*)mdn->A->data;
578   MPI_Status        status;
579 
580   PetscFunctionBegin;
581   if (mdn->size == 1) {
582     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
583   } else {
584     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
585     ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
586     ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
587 
588     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
589     if (format == PETSC_VIEWER_BINARY_NATIVE) {
590 
591       if (!rank) {
592 	/* store the matrix as a dense matrix */
593 	header[0] = MAT_FILE_COOKIE;
594 	header[1] = mat->rmap.N;
595 	header[2] = N;
596 	header[3] = MATRIX_BINARY_FORMAT_DENSE;
597 	ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
598 
599 	/* get largest work array needed for transposing array */
600         mmax = mat->rmap.n;
601         for (i=1; i<size; i++) {
602           mmax = PetscMax(mmax,mat->rmap.range[i+1] - mat->rmap.range[i]);
603         }
604 	ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&work);CHKERRQ(ierr);
605 
606 	/* write out local array, by rows */
607         m    = mat->rmap.n;
608 	v    = a->v;
609         for (j=0; j<N; j++) {
610           for (i=0; i<m; i++) {
611 	    work[j + i*N] = *v++;
612 	  }
613 	}
614 	ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
615         /* get largest work array to receive messages from other processes, excludes process zero */
616         mmax = 0;
617         for (i=1; i<size; i++) {
618           mmax = PetscMax(mmax,mat->rmap.range[i+1] - mat->rmap.range[i]);
619         }
620 	ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&vv);CHKERRQ(ierr);
621         v = vv;
622         for (k=1; k<size; k++) {
623           m    = mat->rmap.range[k+1] - mat->rmap.range[k];
624           ierr = MPI_Recv(v,m*N,MPIU_SCALAR,k,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
625 
626           for (j=0; j<N; j++) {
627             for (i=0; i<m; i++) {
628               work[j + i*N] = *v++;
629 	    }
630 	  }
631 	  ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
632         }
633         ierr = PetscFree(work);CHKERRQ(ierr);
634         ierr = PetscFree(vv);CHKERRQ(ierr);
635       } else {
636         ierr = MPI_Send(a->v,mat->rmap.n*mat->cmap.N,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
637       }
638     }
639   }
640   PetscFunctionReturn(0);
641 }
642 
643 #undef __FUNCT__
644 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket"
645 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
646 {
647   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
648   PetscErrorCode    ierr;
649   PetscMPIInt       size = mdn->size,rank = mdn->rank;
650   PetscViewerType   vtype;
651   PetscTruth        iascii,isdraw;
652   PetscViewer       sviewer;
653   PetscViewerFormat format;
654 #if defined(PETSC_HAVE_PLAPACK)
655   Mat_Plapack       *lu=(Mat_Plapack*)(mat->spptr);
656 #endif
657 
658   PetscFunctionBegin;
659   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
660   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
661   if (iascii) {
662     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
663     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
664     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
665       MatInfo info;
666       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
667       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap.n,
668                    (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
669       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
670       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
671 #if defined(PETSC_HAVE_PLAPACK)
672       ierr = PetscViewerASCIIPrintf(viewer,"PLAPACK run parameters:\n");CHKERRQ(ierr);
673       ierr = PetscViewerASCIIPrintf(viewer,"  Processor mesh: nprows %d, npcols %d\n",Plapack_nprows, Plapack_npcols);CHKERRQ(ierr);
674       ierr = PetscViewerASCIIPrintf(viewer,"  Distr. block size nb: %d \n",lu->nb);CHKERRQ(ierr);
675       ierr = PetscViewerASCIIPrintf(viewer,"  Error checking: %d\n",lu->ierror);CHKERRQ(ierr);
676       ierr = PetscViewerASCIIPrintf(viewer,"  Algorithmic block size: %d\n",lu->nb_alg);CHKERRQ(ierr);
677 #endif
678       PetscFunctionReturn(0);
679     } else if (format == PETSC_VIEWER_ASCII_INFO) {
680       PetscFunctionReturn(0);
681     }
682   } else if (isdraw) {
683     PetscDraw  draw;
684     PetscTruth isnull;
685 
686     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
687     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
688     if (isnull) PetscFunctionReturn(0);
689   }
690 
691   if (size == 1) {
692     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
693   } else {
694     /* assemble the entire matrix onto first processor. */
695     Mat         A;
696     PetscInt    M = mat->rmap.N,N = mat->cmap.N,m,row,i,nz;
697     PetscInt    *cols;
698     PetscScalar *vals;
699 
700     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
701     if (!rank) {
702       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
703     } else {
704       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
705     }
706     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
707     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
708     ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL);
709     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
710 
711     /* Copy the matrix ... This isn't the most efficient means,
712        but it's quick for now */
713     A->insertmode = INSERT_VALUES;
714     row = mat->rmap.rstart; m = mdn->A->rmap.n;
715     for (i=0; i<m; i++) {
716       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
717       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
718       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
719       row++;
720     }
721 
722     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
723     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
724     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
725     if (!rank) {
726       ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
727     }
728     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
729     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
730     ierr = MatDestroy(A);CHKERRQ(ierr);
731   }
732   PetscFunctionReturn(0);
733 }
734 
735 #undef __FUNCT__
736 #define __FUNCT__ "MatView_MPIDense"
737 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
738 {
739   PetscErrorCode ierr;
740   PetscTruth     iascii,isbinary,isdraw,issocket;
741 
742   PetscFunctionBegin;
743 
744   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
745   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
746   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
747   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
748 
749   if (iascii || issocket || isdraw) {
750     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
751   } else if (isbinary) {
752     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
753   } else {
754     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name);
755   }
756   PetscFunctionReturn(0);
757 }
758 
759 #undef __FUNCT__
760 #define __FUNCT__ "MatGetInfo_MPIDense"
761 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
762 {
763   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
764   Mat            mdn = mat->A;
765   PetscErrorCode ierr;
766   PetscReal      isend[5],irecv[5];
767 
768   PetscFunctionBegin;
769   info->rows_global    = (double)A->rmap.N;
770   info->columns_global = (double)A->cmap.N;
771   info->rows_local     = (double)A->rmap.n;
772   info->columns_local  = (double)A->cmap.N;
773   info->block_size     = 1.0;
774   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
775   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
776   isend[3] = info->memory;  isend[4] = info->mallocs;
777   if (flag == MAT_LOCAL) {
778     info->nz_used      = isend[0];
779     info->nz_allocated = isend[1];
780     info->nz_unneeded  = isend[2];
781     info->memory       = isend[3];
782     info->mallocs      = isend[4];
783   } else if (flag == MAT_GLOBAL_MAX) {
784     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr);
785     info->nz_used      = irecv[0];
786     info->nz_allocated = irecv[1];
787     info->nz_unneeded  = irecv[2];
788     info->memory       = irecv[3];
789     info->mallocs      = irecv[4];
790   } else if (flag == MAT_GLOBAL_SUM) {
791     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
792     info->nz_used      = irecv[0];
793     info->nz_allocated = irecv[1];
794     info->nz_unneeded  = irecv[2];
795     info->memory       = irecv[3];
796     info->mallocs      = irecv[4];
797   }
798   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
799   info->fill_ratio_needed = 0;
800   info->factor_mallocs    = 0;
801   PetscFunctionReturn(0);
802 }
803 
804 #undef __FUNCT__
805 #define __FUNCT__ "MatSetOption_MPIDense"
806 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscTruth flg)
807 {
808   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
809   PetscErrorCode ierr;
810 
811   PetscFunctionBegin;
812   switch (op) {
813   case MAT_NEW_NONZERO_LOCATIONS:
814   case MAT_NEW_NONZERO_LOCATION_ERR:
815   case MAT_NEW_NONZERO_ALLOCATION_ERR:
816     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
817     break;
818   case MAT_ROW_ORIENTED:
819     a->roworiented = flg;
820     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
821     break;
822   case MAT_NEW_DIAGONALS:
823   case MAT_USE_HASH_TABLE:
824     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
825     break;
826   case MAT_IGNORE_OFF_PROC_ENTRIES:
827     a->donotstash = flg;
828     break;
829   case MAT_SYMMETRIC:
830   case MAT_STRUCTURALLY_SYMMETRIC:
831   case MAT_HERMITIAN:
832   case MAT_SYMMETRY_ETERNAL:
833   case MAT_IGNORE_LOWER_TRIANGULAR:
834     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
835     break;
836   default:
837     SETERRQ1(PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
838   }
839   PetscFunctionReturn(0);
840 }
841 
842 
843 #undef __FUNCT__
844 #define __FUNCT__ "MatDiagonalScale_MPIDense"
845 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
846 {
847   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
848   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
849   PetscScalar    *l,*r,x,*v;
850   PetscErrorCode ierr;
851   PetscInt       i,j,s2a,s3a,s2,s3,m=mdn->A->rmap.n,n=mdn->A->cmap.n;
852 
853   PetscFunctionBegin;
854   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
855   if (ll) {
856     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
857     if (s2a != s2) SETERRQ2(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
858     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
859     for (i=0; i<m; i++) {
860       x = l[i];
861       v = mat->v + i;
862       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
863     }
864     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
865     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
866   }
867   if (rr) {
868     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
869     if (s3a != s3) SETERRQ2(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
870     ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
871     ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
872     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
873     for (i=0; i<n; i++) {
874       x = r[i];
875       v = mat->v + i*m;
876       for (j=0; j<m; j++) { (*v++) *= x;}
877     }
878     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
879     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
880   }
881   PetscFunctionReturn(0);
882 }
883 
884 #undef __FUNCT__
885 #define __FUNCT__ "MatNorm_MPIDense"
886 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
887 {
888   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
889   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
890   PetscErrorCode ierr;
891   PetscInt       i,j;
892   PetscReal      sum = 0.0;
893   PetscScalar    *v = mat->v;
894 
895   PetscFunctionBegin;
896   if (mdn->size == 1) {
897     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
898   } else {
899     if (type == NORM_FROBENIUS) {
900       for (i=0; i<mdn->A->cmap.n*mdn->A->rmap.n; i++) {
901 #if defined(PETSC_USE_COMPLEX)
902         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
903 #else
904         sum += (*v)*(*v); v++;
905 #endif
906       }
907       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
908       *nrm = sqrt(*nrm);
909       ierr = PetscLogFlops(2*mdn->A->cmap.n*mdn->A->rmap.n);CHKERRQ(ierr);
910     } else if (type == NORM_1) {
911       PetscReal *tmp,*tmp2;
912       ierr = PetscMalloc(2*A->cmap.N*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
913       tmp2 = tmp + A->cmap.N;
914       ierr = PetscMemzero(tmp,2*A->cmap.N*sizeof(PetscReal));CHKERRQ(ierr);
915       *nrm = 0.0;
916       v = mat->v;
917       for (j=0; j<mdn->A->cmap.n; j++) {
918         for (i=0; i<mdn->A->rmap.n; i++) {
919           tmp[j] += PetscAbsScalar(*v);  v++;
920         }
921       }
922       ierr = MPI_Allreduce(tmp,tmp2,A->cmap.N,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
923       for (j=0; j<A->cmap.N; j++) {
924         if (tmp2[j] > *nrm) *nrm = tmp2[j];
925       }
926       ierr = PetscFree(tmp);CHKERRQ(ierr);
927       ierr = PetscLogFlops(A->cmap.n*A->rmap.n);CHKERRQ(ierr);
928     } else if (type == NORM_INFINITY) { /* max row norm */
929       PetscReal ntemp;
930       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
931       ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr);
932     } else {
933       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
934     }
935   }
936   PetscFunctionReturn(0);
937 }
938 
939 #undef __FUNCT__
940 #define __FUNCT__ "MatTranspose_MPIDense"
941 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
942 {
943   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
944   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
945   Mat            B;
946   PetscInt       M = A->rmap.N,N = A->cmap.N,m,n,*rwork,rstart = A->rmap.rstart;
947   PetscErrorCode ierr;
948   PetscInt       j,i;
949   PetscScalar    *v;
950 
951   PetscFunctionBegin;
952   if (A == *matout && M != N) {
953     SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place");
954   }
955   if (reuse == MAT_INITIAL_MATRIX || A == *matout) {
956     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
957     ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,N,M);CHKERRQ(ierr);
958     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
959     ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr);
960   } else {
961     B = *matout;
962   }
963 
964   m = a->A->rmap.n; n = a->A->cmap.n; v = Aloc->v;
965   ierr = PetscMalloc(m*sizeof(PetscInt),&rwork);CHKERRQ(ierr);
966   for (i=0; i<m; i++) rwork[i] = rstart + i;
967   for (j=0; j<n; j++) {
968     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
969     v   += m;
970   }
971   ierr = PetscFree(rwork);CHKERRQ(ierr);
972   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
973   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
974   if (*matout != A) {
975     *matout = B;
976   } else {
977     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
978   }
979   PetscFunctionReturn(0);
980 }
981 
982 #include "petscblaslapack.h"
983 #undef __FUNCT__
984 #define __FUNCT__ "MatScale_MPIDense"
985 PetscErrorCode MatScale_MPIDense(Mat inA,PetscScalar alpha)
986 {
987   Mat_MPIDense   *A = (Mat_MPIDense*)inA->data;
988   Mat_SeqDense   *a = (Mat_SeqDense*)A->A->data;
989   PetscScalar    oalpha = alpha;
990   PetscErrorCode ierr;
991   PetscBLASInt   one = 1,nz = PetscBLASIntCast(inA->rmap.n*inA->cmap.N);
992 
993   PetscFunctionBegin;
994   BLASscal_(&nz,&oalpha,a->v,&one);
995   ierr = PetscLogFlops(nz);CHKERRQ(ierr);
996   PetscFunctionReturn(0);
997 }
998 
999 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
1000 
1001 #undef __FUNCT__
1002 #define __FUNCT__ "MatSetUpPreallocation_MPIDense"
1003 PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A)
1004 {
1005   PetscErrorCode ierr;
1006 
1007   PetscFunctionBegin;
1008   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
1009   PetscFunctionReturn(0);
1010 }
1011 
1012 #undef __FUNCT__
1013 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIDense"
1014 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1015 {
1016   PetscErrorCode ierr;
1017   PetscInt       m=A->rmap.n,n=B->cmap.n;
1018   Mat            Cmat;
1019 
1020   PetscFunctionBegin;
1021   if (A->cmap.n != B->rmap.n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap.n %d != B->rmap.n %d\n",A->cmap.n,B->rmap.n);
1022   ierr = MatCreate(((PetscObject)B)->comm,&Cmat);CHKERRQ(ierr);
1023   ierr = MatSetSizes(Cmat,m,n,A->rmap.N,B->cmap.N);CHKERRQ(ierr);
1024   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
1025   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
1026   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1027   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1028   *C = Cmat;
1029   PetscFunctionReturn(0);
1030 }
1031 
1032 #if defined(PETSC_HAVE_PLAPACK)
1033 #undef __FUNCT__
1034 #define __FUNCT__ "MatSolve_MPIDense"
1035 PetscErrorCode MatSolve_MPIDense(Mat A,Vec b,Vec x)
1036 {
1037   MPI_Comm       comm = ((PetscObject)A)->comm,dummy_comm;
1038   Mat_Plapack    *lu = (Mat_Plapack*)A->spptr;
1039   PetscErrorCode ierr;
1040   PetscInt       M=A->rmap.N,m=A->rmap.n,rstart,i,j,*idx_pla,*idx_petsc,loc_m,loc_stride;
1041   PetscScalar    *array;
1042   PetscReal      one = 1.0;
1043   PetscMPIInt    size,rank,r_rank,r_nproc,c_rank,c_nproc;;
1044   PLA_Obj        v_pla = NULL;
1045   PetscScalar    *loc_buf;
1046   Vec            loc_x;
1047 
1048   PetscFunctionBegin;
1049   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1050   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1051 
1052   /* Create PLAPACK vector objects, then copy b into PLAPACK b */
1053   ierr = PLA_Mvector_create(lu->datatype,M,1,lu->templ,PLA_ALIGN_FIRST,&v_pla);CHKERRQ(ierr);
1054   ierr = PLA_Obj_set_to_zero(v_pla);CHKERRQ(ierr);
1055 
1056   /* Copy b into rhs_pla */
1057   ierr = PLA_API_begin();CHKERRQ(ierr);
1058   ierr = PLA_Obj_API_open(v_pla);CHKERRQ(ierr);
1059   ierr = VecGetArray(b,&array);CHKERRQ(ierr);
1060   ierr = PLA_API_axpy_vector_to_global(m,&one,(void *)array,1,v_pla,lu->rstart);CHKERRQ(ierr);
1061   ierr = VecRestoreArray(b,&array);CHKERRQ(ierr);
1062   ierr = PLA_Obj_API_close(v_pla);CHKERRQ(ierr);
1063   ierr = PLA_API_end();CHKERRQ(ierr);
1064 
1065   if (A->factor == FACTOR_LU){
1066     /* Apply the permutations to the right hand sides */
1067     ierr = PLA_Apply_pivots_to_rows (v_pla,lu->pivots);CHKERRQ(ierr);
1068 
1069     /* Solve L y = b, overwriting b with y */
1070     ierr = PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_UNIT_DIAG,lu->A,v_pla );CHKERRQ(ierr);
1071 
1072     /* Solve U x = y (=b), overwriting b with x */
1073     ierr = PLA_Trsv( PLA_UPPER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla );CHKERRQ(ierr);
1074   } else { /* FACTOR_CHOLESKY */
1075     ierr = PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla);CHKERRQ(ierr);
1076     ierr = PLA_Trsv( PLA_LOWER_TRIANGULAR,(lu->datatype == MPI_DOUBLE ? PLA_TRANSPOSE : PLA_CONJUGATE_TRANSPOSE),PLA_NONUNIT_DIAG,lu->A,v_pla);CHKERRQ(ierr);
1077   }
1078 
1079   /* Copy PLAPACK x into Petsc vector x  */
1080   ierr = PLA_Obj_local_length(v_pla, &loc_m);CHKERRQ(ierr);
1081   ierr = PLA_Obj_local_buffer(v_pla, (void**)&loc_buf);CHKERRQ(ierr);
1082   ierr = PLA_Obj_local_stride(v_pla, &loc_stride);CHKERRQ(ierr);
1083   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,loc_m*loc_stride,loc_buf,&loc_x);CHKERRQ(ierr);
1084   if (!lu->pla_solved){
1085 
1086     ierr = PLA_Temp_comm_row_info(lu->templ,&dummy_comm,&r_rank,&r_nproc);CHKERRQ(ierr);
1087     ierr = PLA_Temp_comm_col_info(lu->templ,&dummy_comm,&c_rank,&c_nproc);CHKERRQ(ierr);
1088     /* printf(" [%d] rank: %d %d, nproc: %d %d\n",rank,r_rank,c_rank,r_nproc,c_nproc); */
1089 
1090     /* Create IS and cts for VecScatterring */
1091     ierr = PLA_Obj_local_length(v_pla, &loc_m);CHKERRQ(ierr);
1092     ierr = PLA_Obj_local_stride(v_pla, &loc_stride);CHKERRQ(ierr);
1093     ierr = PetscMalloc((2*loc_m+1)*sizeof(PetscInt),&idx_pla);CHKERRQ(ierr);
1094     idx_petsc = idx_pla + loc_m;
1095 
1096     rstart = (r_rank*c_nproc+c_rank)*lu->nb;
1097     for (i=0; i<loc_m; i+=lu->nb){
1098       j = 0;
1099       while (j < lu->nb && i+j < loc_m){
1100         idx_petsc[i+j] = rstart + j; j++;
1101       }
1102       rstart += size*lu->nb;
1103     }
1104 
1105     for (i=0; i<loc_m; i++) idx_pla[i] = i*loc_stride;
1106 
1107     ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_pla,&lu->is_pla);CHKERRQ(ierr);
1108     ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_petsc,&lu->is_petsc);CHKERRQ(ierr);
1109     ierr = PetscFree(idx_pla);CHKERRQ(ierr);
1110     ierr = VecScatterCreate(loc_x,lu->is_pla,x,lu->is_petsc,&lu->ctx);CHKERRQ(ierr);
1111   }
1112   ierr = VecScatterBegin(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1113   ierr = VecScatterEnd(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1114 
1115   /* Free data */
1116   ierr = VecDestroy(loc_x);CHKERRQ(ierr);
1117   ierr = PLA_Obj_free(&v_pla);CHKERRQ(ierr);
1118 
1119   lu->pla_solved = PETSC_TRUE;
1120   PetscFunctionReturn(0);
1121 }
1122 
1123 #undef __FUNCT__
1124 #define __FUNCT__ "MatLUFactorNumeric_MPIDense"
1125 PetscErrorCode MatLUFactorNumeric_MPIDense(Mat A,MatFactorInfo *info,Mat *F)
1126 {
1127   Mat_Plapack    *lu = (Mat_Plapack*)(*F)->spptr;
1128   PetscErrorCode ierr;
1129   PetscInt       M=A->rmap.N,m=A->rmap.n,rstart,rend;
1130   PetscInt       info_pla=0;
1131   PetscScalar    *array,one = 1.0;
1132 
1133   PetscFunctionBegin;
1134   ierr = PLA_Obj_set_to_zero(lu->A);CHKERRQ(ierr);
1135 
1136   /* Copy A into lu->A */
1137   ierr = PLA_API_begin();CHKERRQ(ierr);
1138   ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr);
1139   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
1140   ierr = MatGetArray(A,&array);CHKERRQ(ierr);
1141   ierr = PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);CHKERRQ(ierr);
1142   ierr = MatRestoreArray(A,&array);CHKERRQ(ierr);
1143   ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr);
1144   ierr = PLA_API_end();CHKERRQ(ierr);
1145 
1146   /* Factor P A -> L U overwriting lower triangular portion of A with L, upper, U */
1147   info_pla = PLA_LU(lu->A,lu->pivots);
1148   if (info_pla != 0)
1149     SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot encountered at row %d from PLA_LU()",info_pla);
1150 
1151   lu->CleanUpPlapack = PETSC_TRUE;
1152   lu->rstart         = rstart;
1153 
1154   (*F)->assembled    = PETSC_TRUE;  /* required by -ksp_view */
1155   PetscFunctionReturn(0);
1156 }
1157 
1158 #undef __FUNCT__
1159 #define __FUNCT__ "MatCholeskyFactorNumeric_MPIDense"
1160 PetscErrorCode MatCholeskyFactorNumeric_MPIDense(Mat A,MatFactorInfo *info,Mat *F)
1161 {
1162   Mat_Plapack    *lu = (Mat_Plapack*)(*F)->spptr;
1163   PetscErrorCode ierr;
1164   PetscInt       M=A->rmap.N,m=A->rmap.n,rstart,rend;
1165   PetscInt       info_pla=0;
1166   PetscScalar    *array,one = 1.0;
1167 
1168   PetscFunctionBegin;
1169 
1170   /* Copy A into lu->A */
1171   ierr = PLA_Obj_set_to_zero(lu->A);CHKERRQ(ierr);
1172   ierr = PLA_API_begin();CHKERRQ(ierr);
1173   ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr);
1174   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
1175   ierr = MatGetArray(A,&array);CHKERRQ(ierr);
1176   ierr = PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);CHKERRQ(ierr);
1177   ierr = MatRestoreArray(A,&array);CHKERRQ(ierr);
1178   ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr);
1179   ierr = PLA_API_end();CHKERRQ(ierr);
1180 
1181   /* Factor P A -> Chol */
1182   info_pla = PLA_Chol(PLA_LOWER_TRIANGULAR,lu->A);
1183   if (info_pla != 0)
1184     SETERRQ1( PETSC_ERR_MAT_CH_ZRPVT,"Nonpositive definite matrix detected at row %d from PLA_Chol()",info_pla);
1185 
1186   lu->CleanUpPlapack = PETSC_TRUE;
1187   lu->rstart         = rstart;
1188 
1189   (*F)->assembled    = PETSC_TRUE;  /* required by -ksp_view */
1190   PetscFunctionReturn(0);
1191 }
1192 
1193 #undef __FUNCT__
1194 #define __FUNCT__ "MatFactorSymbolic_MPIDense_Private"
1195 PetscErrorCode MatFactorSymbolic_MPIDense_Private(Mat A,MatFactorInfo *info,Mat *F)
1196 {
1197   Mat            B;
1198   Mat_Plapack    *lu;
1199   PetscErrorCode ierr;
1200   PetscInt       M=A->rmap.N,N=A->cmap.N;
1201   MPI_Comm       comm=((PetscObject)A)->comm;
1202   PetscMPIInt    size;
1203   PetscInt       ierror;
1204 
1205   PetscFunctionBegin;
1206   /* Create the factorization matrix */
1207   ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1208   ierr = MatSetSizes(B,A->rmap.n,A->cmap.n,M,N);CHKERRQ(ierr);
1209   ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1210 
1211   lu = (Mat_Plapack*)(B->spptr);
1212 
1213   /* Set default Plapack parameters */
1214   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1215   ierror = 0;
1216   lu->nb     = M/size;
1217   if (M - lu->nb*size) lu->nb++; /* without cyclic distribution */
1218 
1219   /* Set runtime options */
1220   ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"PLAPACK Options","Mat");CHKERRQ(ierr);
1221   ierr = PetscOptionsInt("-mat_plapack_nprows","row dimension of 2D processor mesh","None",Plapack_nprows,&Plapack_nprows,PETSC_NULL);CHKERRQ(ierr);
1222   ierr = PetscOptionsInt("-mat_plapack_npcols","column dimension of 2D processor mesh","None",Plapack_npcols,&Plapack_npcols,PETSC_NULL);CHKERRQ(ierr);
1223 
1224   ierr = PetscOptionsInt("-mat_plapack_nb","block size of template vector","None",lu->nb,&lu->nb,PETSC_NULL);CHKERRQ(ierr);
1225   ierr = PetscOptionsInt("-mat_plapack_ckerror","error checking flag","None",ierror,&ierror,PETSC_NULL);CHKERRQ(ierr);
1226   if (ierror){
1227     ierr = PLA_Set_error_checking(ierror,PETSC_TRUE,PETSC_TRUE,PETSC_FALSE );CHKERRQ(ierr);
1228   } else {
1229     ierr = PLA_Set_error_checking(ierror,PETSC_FALSE,PETSC_FALSE,PETSC_FALSE );CHKERRQ(ierr);
1230   }
1231   lu->ierror = ierror;
1232 
1233   lu->nb_alg = 0;
1234   ierr = PetscOptionsInt("-mat_plapack_nb_alg","algorithmic block size","None",lu->nb_alg,&lu->nb_alg,PETSC_NULL);CHKERRQ(ierr);
1235   if (lu->nb_alg){
1236     ierr = pla_Environ_set_nb_alg (PLA_OP_ALL_ALG,lu->nb_alg);CHKERRQ(ierr);
1237   }
1238   PetscOptionsEnd();
1239 
1240 
1241   /* Create object distribution template */
1242   lu->templ = NULL;
1243   ierr = PLA_Temp_create(lu->nb, 0, &lu->templ);CHKERRQ(ierr);
1244 
1245   /* Use suggested nb_alg if it is not provided by user */
1246   if (lu->nb_alg == 0){
1247     ierr = PLA_Environ_nb_alg(PLA_OP_PAN_PAN,lu->templ,&lu->nb_alg);CHKERRQ(ierr);
1248     ierr = pla_Environ_set_nb_alg(PLA_OP_ALL_ALG,lu->nb_alg);CHKERRQ(ierr);
1249   }
1250 
1251   /* Set the datatype */
1252 #if defined(PETSC_USE_COMPLEX)
1253   lu->datatype = MPI_DOUBLE_COMPLEX;
1254 #else
1255   lu->datatype = MPI_DOUBLE;
1256 #endif
1257 
1258   ierr = PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);CHKERRQ(ierr);
1259 
1260   lu->pla_solved     = PETSC_FALSE; /* MatSolve_Plapack() is called yet */
1261   lu->CleanUpPlapack = PETSC_TRUE;
1262   *F                 = B;
1263   PetscFunctionReturn(0);
1264 }
1265 
1266 /* Note the Petsc r and c permutations are ignored */
1267 #undef __FUNCT__
1268 #define __FUNCT__ "MatLUFactorSymbolic_MPIDense"
1269 PetscErrorCode MatLUFactorSymbolic_MPIDense(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F)
1270 {
1271   PetscErrorCode ierr;
1272   PetscInt       M = A->rmap.N;
1273   Mat_Plapack    *lu;
1274 
1275   PetscFunctionBegin;
1276   ierr = MatFactorSymbolic_MPIDense_Private(A,info,F);CHKERRQ(ierr);
1277   lu = (Mat_Plapack*)(*F)->spptr;
1278   ierr = PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots);CHKERRQ(ierr);
1279   (*F)->factor = FACTOR_LU;
1280   PetscFunctionReturn(0);
1281 }
1282 
1283 /* Note the Petsc perm permutation is ignored */
1284 #undef __FUNCT__
1285 #define __FUNCT__ "MatCholeskyFactorSymbolic_MPIDense"
1286 PetscErrorCode MatCholeskyFactorSymbolic_MPIDense(Mat A,IS perm,MatFactorInfo *info,Mat *F)
1287 {
1288   PetscErrorCode ierr;
1289   PetscTruth     issymmetric,set;
1290 
1291   PetscFunctionBegin;
1292   ierr = MatIsSymmetricKnown(A,&set,&issymmetric); CHKERRQ(ierr);
1293   if (!set || !issymmetric) SETERRQ(PETSC_ERR_USER,"Matrix must be set as MAT_SYMMETRIC for CholeskyFactor()");
1294   ierr = MatFactorSymbolic_MPIDense_Private(A,info,F);CHKERRQ(ierr);
1295   (*F)->factor = FACTOR_CHOLESKY;
1296   PetscFunctionReturn(0);
1297 }
1298 #endif
1299 
1300 /* -------------------------------------------------------------------*/
1301 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
1302        MatGetRow_MPIDense,
1303        MatRestoreRow_MPIDense,
1304        MatMult_MPIDense,
1305 /* 4*/ MatMultAdd_MPIDense,
1306        MatMultTranspose_MPIDense,
1307        MatMultTransposeAdd_MPIDense,
1308 #if defined(PETSC_HAVE_PLAPACK)
1309        MatSolve_MPIDense,
1310 #else
1311        0,
1312 #endif
1313        0,
1314        0,
1315 /*10*/ 0,
1316        0,
1317        0,
1318        0,
1319        MatTranspose_MPIDense,
1320 /*15*/ MatGetInfo_MPIDense,
1321        MatEqual_MPIDense,
1322        MatGetDiagonal_MPIDense,
1323        MatDiagonalScale_MPIDense,
1324        MatNorm_MPIDense,
1325 /*20*/ MatAssemblyBegin_MPIDense,
1326        MatAssemblyEnd_MPIDense,
1327        0,
1328        MatSetOption_MPIDense,
1329        MatZeroEntries_MPIDense,
1330 /*25*/ MatZeroRows_MPIDense,
1331 #if defined(PETSC_HAVE_PLAPACK)
1332        MatLUFactorSymbolic_MPIDense,
1333        MatLUFactorNumeric_MPIDense,
1334        MatCholeskyFactorSymbolic_MPIDense,
1335        MatCholeskyFactorNumeric_MPIDense,
1336 #else
1337        0,
1338        0,
1339        0,
1340        0,
1341 #endif
1342 /*30*/ MatSetUpPreallocation_MPIDense,
1343        0,
1344        0,
1345        MatGetArray_MPIDense,
1346        MatRestoreArray_MPIDense,
1347 /*35*/ MatDuplicate_MPIDense,
1348        0,
1349        0,
1350        0,
1351        0,
1352 /*40*/ 0,
1353        MatGetSubMatrices_MPIDense,
1354        0,
1355        MatGetValues_MPIDense,
1356        0,
1357 /*45*/ 0,
1358        MatScale_MPIDense,
1359        0,
1360        0,
1361        0,
1362 /*50*/ 0,
1363        0,
1364        0,
1365        0,
1366        0,
1367 /*55*/ 0,
1368        0,
1369        0,
1370        0,
1371        0,
1372 /*60*/ MatGetSubMatrix_MPIDense,
1373        MatDestroy_MPIDense,
1374        MatView_MPIDense,
1375        0,
1376        0,
1377 /*65*/ 0,
1378        0,
1379        0,
1380        0,
1381        0,
1382 /*70*/ 0,
1383        0,
1384        0,
1385        0,
1386        0,
1387 /*75*/ 0,
1388        0,
1389        0,
1390        0,
1391        0,
1392 /*80*/ 0,
1393        0,
1394        0,
1395        0,
1396 /*84*/ MatLoad_MPIDense,
1397        0,
1398        0,
1399        0,
1400        0,
1401        0,
1402 /*90*/ 0,
1403        MatMatMultSymbolic_MPIDense_MPIDense,
1404        0,
1405        0,
1406        0,
1407 /*95*/ 0,
1408        0,
1409        0,
1410        0};
1411 
1412 EXTERN_C_BEGIN
1413 #undef __FUNCT__
1414 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense"
1415 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1416 {
1417   Mat_MPIDense   *a;
1418   PetscErrorCode ierr;
1419 
1420   PetscFunctionBegin;
1421   mat->preallocated = PETSC_TRUE;
1422   /* Note:  For now, when data is specified above, this assumes the user correctly
1423    allocates the local dense storage space.  We should add error checking. */
1424 
1425   a    = (Mat_MPIDense*)mat->data;
1426   ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1427   ierr = MatSetSizes(a->A,mat->rmap.n,mat->cmap.N,mat->rmap.n,mat->cmap.N);CHKERRQ(ierr);
1428   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
1429   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1430   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1431   PetscFunctionReturn(0);
1432 }
1433 EXTERN_C_END
1434 
1435 EXTERN_C_BEGIN
1436 #if defined(PETSC_HAVE_PLAPACK)
1437 #undef __FUNCT__
1438 #define __FUNCT__ "MatSetSolverType_MPIDense_PLAPACK"
1439 PetscErrorCode PETSCMAT_DLLEXPORT MatSetSolverType_MPIDense_PLAPACK(Mat mat,const char *type)
1440 {
1441   PetscFunctionBegin;
1442   /* dummy function so that -mat_solver_type plapack or MatSetSolverType(mat,"plapack"); silently work (since PLAPACK is on by default) */
1443   PetscFunctionReturn(0);
1444 }
1445 #endif
1446 EXTERN_C_END
1447 
1448 /*MC
1449    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1450 
1451    Options Database Keys:
1452 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions()
1453 
1454   Level: beginner
1455 
1456   MATMPIDENSE matrices may use direct solvers (LU, Cholesky, and QR)
1457   for parallel dense matrices via the external package PLAPACK, if PLAPACK is installed
1458   (run config/configure.py with the option --download-plapack)
1459 
1460 
1461   Options Database Keys:
1462 . -mat_plapack_nprows <n> - number of rows in processor partition
1463 . -mat_plapack_npcols <n> - number of columns in processor partition
1464 . -mat_plapack_nb <n> - block size of template vector
1465 . -mat_plapack_nb_alg <n> - algorithmic block size
1466 - -mat_plapack_ckerror <n> - error checking flag
1467 
1468 .seealso: MatCreateMPIDense(), MATDENSE, MATSEQDENSE
1469 M*/
1470 
1471 EXTERN_C_BEGIN
1472 #undef __FUNCT__
1473 #define __FUNCT__ "MatCreate_MPIDense"
1474 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIDense(Mat mat)
1475 {
1476   Mat_MPIDense   *a;
1477   PetscErrorCode ierr;
1478 #if defined(PETSC_HAVE_PLAPACK)
1479   Mat_Plapack    *lu;
1480 #endif
1481 
1482   PetscFunctionBegin;
1483   ierr              = PetscNewLog(mat,Mat_MPIDense,&a);CHKERRQ(ierr);
1484   mat->data         = (void*)a;
1485   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1486   mat->factor       = 0;
1487   mat->mapping      = 0;
1488 
1489   mat->insertmode = NOT_SET_VALUES;
1490   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&a->rank);CHKERRQ(ierr);
1491   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&a->size);CHKERRQ(ierr);
1492 
1493   mat->rmap.bs = mat->cmap.bs = 1;
1494   ierr = PetscMapSetUp(&mat->rmap);CHKERRQ(ierr);
1495   ierr = PetscMapSetUp(&mat->cmap);CHKERRQ(ierr);
1496   a->nvec = mat->cmap.n;
1497 
1498   /* build cache for off array entries formed */
1499   a->donotstash = PETSC_FALSE;
1500   ierr = MatStashCreate_Private(((PetscObject)mat)->comm,1,&mat->stash);CHKERRQ(ierr);
1501 
1502   /* stuff used for matrix vector multiply */
1503   a->lvec        = 0;
1504   a->Mvctx       = 0;
1505   a->roworiented = PETSC_TRUE;
1506 
1507   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
1508                                      "MatGetDiagonalBlock_MPIDense",
1509                                      MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr);
1510   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C",
1511                                      "MatMPIDenseSetPreallocation_MPIDense",
1512                                      MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1513   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",
1514                                      "MatMatMult_MPIAIJ_MPIDense",
1515                                       MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1516   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",
1517                                      "MatMatMultSymbolic_MPIAIJ_MPIDense",
1518                                       MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1519   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",
1520                                      "MatMatMultNumeric_MPIAIJ_MPIDense",
1521                                       MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1522 #if defined(PETSC_HAVE_PLAPACK)
1523   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatSetSolverType_mpidense_plapack_C",
1524                                      "MatSetSolverType_MPIDense_PLAPACK",
1525                                       MatSetSolverType_MPIDense_PLAPACK);CHKERRQ(ierr);
1526 #endif
1527   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1528 
1529 #if defined(PETSC_HAVE_PLAPACK)
1530   ierr = PetscNewLog(mat,Mat_Plapack,&lu);CHKERRQ(ierr);
1531   lu->CleanUpPlapack       = PETSC_FALSE;
1532   mat->spptr                 = (void*)lu;
1533 #endif
1534   PetscFunctionReturn(0);
1535 }
1536 EXTERN_C_END
1537 
1538 /*MC
1539    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1540 
1541    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1542    and MATMPIDENSE otherwise.
1543 
1544    Options Database Keys:
1545 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1546 
1547   Level: beginner
1548 
1549 
1550 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1551 M*/
1552 
1553 EXTERN_C_BEGIN
1554 #undef __FUNCT__
1555 #define __FUNCT__ "MatCreate_Dense"
1556 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_Dense(Mat A)
1557 {
1558   PetscErrorCode ierr;
1559   PetscMPIInt    size;
1560 
1561   PetscFunctionBegin;
1562   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
1563   if (size == 1) {
1564     ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr);
1565   } else {
1566     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
1567   }
1568   PetscFunctionReturn(0);
1569 }
1570 EXTERN_C_END
1571 
1572 #undef __FUNCT__
1573 #define __FUNCT__ "MatMPIDenseSetPreallocation"
1574 /*@C
1575    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1576 
1577    Not collective
1578 
1579    Input Parameters:
1580 .  A - the matrix
1581 -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
1582    to control all matrix memory allocation.
1583 
1584    Notes:
1585    The dense format is fully compatible with standard Fortran 77
1586    storage by columns.
1587 
1588    The data input variable is intended primarily for Fortran programmers
1589    who wish to allocate their own matrix memory space.  Most users should
1590    set data=PETSC_NULL.
1591 
1592    Level: intermediate
1593 
1594 .keywords: matrix,dense, parallel
1595 
1596 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1597 @*/
1598 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data)
1599 {
1600   PetscErrorCode ierr,(*f)(Mat,PetscScalar *);
1601 
1602   PetscFunctionBegin;
1603   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1604   if (f) {
1605     ierr = (*f)(mat,data);CHKERRQ(ierr);
1606   }
1607   PetscFunctionReturn(0);
1608 }
1609 
1610 #undef __FUNCT__
1611 #define __FUNCT__ "MatCreateMPIDense"
1612 /*@C
1613    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
1614 
1615    Collective on MPI_Comm
1616 
1617    Input Parameters:
1618 +  comm - MPI communicator
1619 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1620 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1621 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1622 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1623 -  data - optional location of matrix data.  Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1624    to control all matrix memory allocation.
1625 
1626    Output Parameter:
1627 .  A - the matrix
1628 
1629    Notes:
1630    The dense format is fully compatible with standard Fortran 77
1631    storage by columns.
1632 
1633    The data input variable is intended primarily for Fortran programmers
1634    who wish to allocate their own matrix memory space.  Most users should
1635    set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users).
1636 
1637    The user MUST specify either the local or global matrix dimensions
1638    (possibly both).
1639 
1640    Level: intermediate
1641 
1642 .keywords: matrix,dense, parallel
1643 
1644 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1645 @*/
1646 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1647 {
1648   PetscErrorCode ierr;
1649   PetscMPIInt    size;
1650 
1651   PetscFunctionBegin;
1652   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1653   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1654   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1655   if (size > 1) {
1656     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1657     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1658   } else {
1659     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1660     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1661   }
1662   PetscFunctionReturn(0);
1663 }
1664 
1665 #undef __FUNCT__
1666 #define __FUNCT__ "MatDuplicate_MPIDense"
1667 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1668 {
1669   Mat            mat;
1670   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
1671   PetscErrorCode ierr;
1672 
1673   PetscFunctionBegin;
1674   *newmat       = 0;
1675   ierr = MatCreate(((PetscObject)A)->comm,&mat);CHKERRQ(ierr);
1676   ierr = MatSetSizes(mat,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N);CHKERRQ(ierr);
1677   ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1678   a                 = (Mat_MPIDense*)mat->data;
1679   ierr              = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1680   mat->factor       = A->factor;
1681   mat->assembled    = PETSC_TRUE;
1682   mat->preallocated = PETSC_TRUE;
1683 
1684   mat->rmap.rstart     = A->rmap.rstart;
1685   mat->rmap.rend       = A->rmap.rend;
1686   a->size              = oldmat->size;
1687   a->rank              = oldmat->rank;
1688   mat->insertmode      = NOT_SET_VALUES;
1689   a->nvec              = oldmat->nvec;
1690   a->donotstash        = oldmat->donotstash;
1691 
1692   ierr = PetscMemcpy(mat->rmap.range,A->rmap.range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
1693   ierr = PetscMemcpy(mat->cmap.range,A->cmap.range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
1694   ierr = MatStashCreate_Private(((PetscObject)A)->comm,1,&mat->stash);CHKERRQ(ierr);
1695 
1696   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1697   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1698   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1699 
1700 #if defined(PETSC_HAVE_PLAPACK)
1701   ierr = PetscMemcpy(mat->spptr,A->spptr,sizeof(Mat_Plapack));CHKERRQ(ierr);
1702 #endif
1703   *newmat = mat;
1704   PetscFunctionReturn(0);
1705 }
1706 
1707 #include "petscsys.h"
1708 
1709 #undef __FUNCT__
1710 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile"
1711 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N, MatType type,Mat *newmat)
1712 {
1713   PetscErrorCode ierr;
1714   PetscMPIInt    rank,size;
1715   PetscInt       *rowners,i,m,nz,j;
1716   PetscScalar    *array,*vals,*vals_ptr;
1717   MPI_Status     status;
1718 
1719   PetscFunctionBegin;
1720   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1721   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1722 
1723   /* determine ownership of all rows */
1724   m          = M/size + ((M % size) > rank);
1725   ierr       = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1726   ierr       = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
1727   rowners[0] = 0;
1728   for (i=2; i<=size; i++) {
1729     rowners[i] += rowners[i-1];
1730   }
1731 
1732   ierr = MatCreate(comm,newmat);CHKERRQ(ierr);
1733   ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1734   ierr = MatSetType(*newmat,type);CHKERRQ(ierr);
1735   ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr);
1736   ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr);
1737 
1738   if (!rank) {
1739     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1740 
1741     /* read in my part of the matrix numerical values  */
1742     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1743 
1744     /* insert into matrix-by row (this is why cannot directly read into array */
1745     vals_ptr = vals;
1746     for (i=0; i<m; i++) {
1747       for (j=0; j<N; j++) {
1748         array[i + j*m] = *vals_ptr++;
1749       }
1750     }
1751 
1752     /* read in other processors and ship out */
1753     for (i=1; i<size; i++) {
1754       nz   = (rowners[i+1] - rowners[i])*N;
1755       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1756       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(*newmat))->tag,comm);CHKERRQ(ierr);
1757     }
1758   } else {
1759     /* receive numeric values */
1760     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1761 
1762     /* receive message of values*/
1763     ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(*newmat))->tag,comm,&status);CHKERRQ(ierr);
1764 
1765     /* insert into matrix-by row (this is why cannot directly read into array */
1766     vals_ptr = vals;
1767     for (i=0; i<m; i++) {
1768       for (j=0; j<N; j++) {
1769         array[i + j*m] = *vals_ptr++;
1770       }
1771     }
1772   }
1773   ierr = PetscFree(rowners);CHKERRQ(ierr);
1774   ierr = PetscFree(vals);CHKERRQ(ierr);
1775   ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1776   ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1777   PetscFunctionReturn(0);
1778 }
1779 
1780 #undef __FUNCT__
1781 #define __FUNCT__ "MatLoad_MPIDense"
1782 PetscErrorCode MatLoad_MPIDense(PetscViewer viewer, MatType type,Mat *newmat)
1783 {
1784   Mat            A;
1785   PetscScalar    *vals,*svals;
1786   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1787   MPI_Status     status;
1788   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz;
1789   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1790   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1791   PetscInt       i,nz,j,rstart,rend;
1792   int            fd;
1793   PetscErrorCode ierr;
1794 
1795   PetscFunctionBegin;
1796   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1797   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1798   if (!rank) {
1799     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1800     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1801     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1802   }
1803 
1804   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1805   M = header[1]; N = header[2]; nz = header[3];
1806 
1807   /*
1808        Handle case where matrix is stored on disk as a dense matrix
1809   */
1810   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1811     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,type,newmat);CHKERRQ(ierr);
1812     PetscFunctionReturn(0);
1813   }
1814 
1815   /* determine ownership of all rows */
1816   m          = PetscMPIIntCast(M/size + ((M % size) > rank));
1817   ierr       = PetscMalloc((size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr);
1818   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1819   rowners[0] = 0;
1820   for (i=2; i<=size; i++) {
1821     rowners[i] += rowners[i-1];
1822   }
1823   rstart = rowners[rank];
1824   rend   = rowners[rank+1];
1825 
1826   /* distribute row lengths to all processors */
1827   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&ourlens);CHKERRQ(ierr);
1828   offlens = ourlens + (rend-rstart);
1829   if (!rank) {
1830     ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
1831     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1832     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
1833     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1834     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1835     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1836   } else {
1837     ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1838   }
1839 
1840   if (!rank) {
1841     /* calculate the number of nonzeros on each processor */
1842     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
1843     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1844     for (i=0; i<size; i++) {
1845       for (j=rowners[i]; j< rowners[i+1]; j++) {
1846         procsnz[i] += rowlengths[j];
1847       }
1848     }
1849     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1850 
1851     /* determine max buffer needed and allocate it */
1852     maxnz = 0;
1853     for (i=0; i<size; i++) {
1854       maxnz = PetscMax(maxnz,procsnz[i]);
1855     }
1856     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1857 
1858     /* read in my part of the matrix column indices  */
1859     nz = procsnz[0];
1860     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1861     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1862 
1863     /* read in every one elses and ship off */
1864     for (i=1; i<size; i++) {
1865       nz   = procsnz[i];
1866       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1867       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1868     }
1869     ierr = PetscFree(cols);CHKERRQ(ierr);
1870   } else {
1871     /* determine buffer space needed for message */
1872     nz = 0;
1873     for (i=0; i<m; i++) {
1874       nz += ourlens[i];
1875     }
1876     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1877 
1878     /* receive message of column indices*/
1879     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1880     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1881     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1882   }
1883 
1884   /* loop over local rows, determining number of off diagonal entries */
1885   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
1886   jj = 0;
1887   for (i=0; i<m; i++) {
1888     for (j=0; j<ourlens[i]; j++) {
1889       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1890       jj++;
1891     }
1892   }
1893 
1894   /* create our matrix */
1895   for (i=0; i<m; i++) {
1896     ourlens[i] -= offlens[i];
1897   }
1898   ierr = MatCreate(comm,newmat);CHKERRQ(ierr);
1899   ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1900   ierr = MatSetType(*newmat,type);CHKERRQ(ierr);
1901   ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr);
1902   A = *newmat;
1903   for (i=0; i<m; i++) {
1904     ourlens[i] += offlens[i];
1905   }
1906 
1907   if (!rank) {
1908     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1909 
1910     /* read in my part of the matrix numerical values  */
1911     nz = procsnz[0];
1912     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1913 
1914     /* insert into matrix */
1915     jj      = rstart;
1916     smycols = mycols;
1917     svals   = vals;
1918     for (i=0; i<m; i++) {
1919       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1920       smycols += ourlens[i];
1921       svals   += ourlens[i];
1922       jj++;
1923     }
1924 
1925     /* read in other processors and ship out */
1926     for (i=1; i<size; i++) {
1927       nz   = procsnz[i];
1928       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1929       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
1930     }
1931     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1932   } else {
1933     /* receive numeric values */
1934     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1935 
1936     /* receive message of values*/
1937     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
1938     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1939     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1940 
1941     /* insert into matrix */
1942     jj      = rstart;
1943     smycols = mycols;
1944     svals   = vals;
1945     for (i=0; i<m; i++) {
1946       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1947       smycols += ourlens[i];
1948       svals   += ourlens[i];
1949       jj++;
1950     }
1951   }
1952   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1953   ierr = PetscFree(vals);CHKERRQ(ierr);
1954   ierr = PetscFree(mycols);CHKERRQ(ierr);
1955   ierr = PetscFree(rowners);CHKERRQ(ierr);
1956 
1957   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1958   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1959   PetscFunctionReturn(0);
1960 }
1961 
1962 #undef __FUNCT__
1963 #define __FUNCT__ "MatEqual_MPIDense"
1964 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscTruth *flag)
1965 {
1966   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
1967   Mat            a,b;
1968   PetscTruth     flg;
1969   PetscErrorCode ierr;
1970 
1971   PetscFunctionBegin;
1972   a = matA->A;
1973   b = matB->A;
1974   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
1975   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1976   PetscFunctionReturn(0);
1977 }
1978 
1979 #if defined(PETSC_HAVE_PLAPACK)
1980 
1981 #undef __FUNCT__
1982 #define __FUNCT__ "PetscPLAPACKFinalizePackage"
1983 /*@C
1984   PetscPLAPACKFinalizePackage - This function destroys everything in the Petsc interface to PLAPACK.
1985   Level: developer
1986 
1987 .keywords: Petsc, destroy, package, PLAPACK
1988 .seealso: PetscFinalize()
1989 @*/
1990 PetscErrorCode PETSC_DLLEXPORT PetscPLAPACKFinalizePackage(void)
1991 {
1992   PetscErrorCode ierr;
1993 
1994   PetscFunctionBegin;
1995   ierr = PLA_Finalize();CHKERRQ(ierr);
1996   PetscFunctionReturn(0);
1997 }
1998 
1999 #undef __FUNCT__
2000 #define __FUNCT__ "PetscPLAPACKInitializePackage"
2001 /*@C
2002   PetscPLAPACKInitializePackage - This function initializes everything in the Petsc interface to PLAPACK. It is
2003   called from PetscDLLibraryRegister() when using dynamic libraries, and on the call to PetscInitialize()
2004   when using static libraries.
2005 
2006   Input Parameter:
2007   path - The dynamic library path, or PETSC_NULL
2008 
2009   Level: developer
2010 
2011 .keywords: Petsc, initialize, package, PLAPACK
2012 .seealso: PetscInitializePackage(), PetscInitialize()
2013 @*/
2014 PetscErrorCode PETSC_DLLEXPORT PetscPLAPACKInitializePackage(const char path[])
2015 {
2016   MPI_Comm       comm = PETSC_COMM_WORLD;
2017   PetscMPIInt    size;
2018   PetscErrorCode ierr;
2019 
2020   PetscFunctionBegin;
2021   if (!PLA_Initialized(PETSC_NULL)) {
2022 
2023     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2024     Plapack_nprows = 1;
2025     Plapack_npcols = size;
2026 
2027     ierr = PLA_Comm_1D_to_2D(comm,Plapack_nprows,Plapack_npcols,&Plapack_comm_2d);CHKERRQ(ierr);
2028     ierr = PLA_Init(Plapack_comm_2d);CHKERRQ(ierr);
2029     ierr = PetscRegisterFinalize(PetscPLAPACKFinalizePackage);CHKERRQ(ierr);
2030   }
2031   PetscFunctionReturn(0);
2032 }
2033 
2034 
2035 #endif
2036