xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision b2a8bc5c908ef2af5b08c02a70bef9ae6ac1e274)
1 #define PETSCMAT_DLL
2 
3 /*
4    Basic functions for basic parallel dense matrices.
5 */
6 
7 
8 #include "../src/mat/impls/dense/mpi/mpidense.h"    /*I   "petscmat.h"  I*/
9 #if defined(PETSC_HAVE_PLAPACK)
10 static PetscMPIInt Plapack_nprows,Plapack_npcols,Plapack_ierror,Plapack_nb_alg;
11 static MPI_Comm Plapack_comm_2d;
12 EXTERN_C_BEGIN
13 #include "PLA.h"
14 EXTERN_C_END
15 
16 typedef struct {
17   PLA_Obj        A,pivots;
18   PLA_Template   templ;
19   MPI_Datatype   datatype;
20   PetscInt       nb,rstart;
21   VecScatter     ctx;
22   IS             is_pla,is_petsc;
23   PetscTruth     pla_solved;
24   MatStructure   mstruct;
25 } Mat_Plapack;
26 #endif
27 
28 #undef __FUNCT__
29 #define __FUNCT__ "MatDenseGetLocalMatrix"
30 /*@
31 
32       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
33               matrix that represents the operator. For sequential matrices it returns itself.
34 
35     Input Parameter:
36 .      A - the Seq or MPI dense matrix
37 
38     Output Parameter:
39 .      B - the inner matrix
40 
41     Level: intermediate
42 
43 @*/
44 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
45 {
46   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
47   PetscErrorCode ierr;
48   PetscTruth     flg;
49 
50   PetscFunctionBegin;
51   ierr = PetscTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
52   if (flg) {
53     *B = mat->A;
54   } else {
55     *B = A;
56   }
57   PetscFunctionReturn(0);
58 }
59 
60 #undef __FUNCT__
61 #define __FUNCT__ "MatGetRow_MPIDense"
62 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
63 {
64   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
65   PetscErrorCode ierr;
66   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
67 
68   PetscFunctionBegin;
69   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows")
70   lrow = row - rstart;
71   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr);
72   PetscFunctionReturn(0);
73 }
74 
75 #undef __FUNCT__
76 #define __FUNCT__ "MatRestoreRow_MPIDense"
77 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
78 {
79   PetscErrorCode ierr;
80 
81   PetscFunctionBegin;
82   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
83   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
84   PetscFunctionReturn(0);
85 }
86 
87 EXTERN_C_BEGIN
88 #undef __FUNCT__
89 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense"
90 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B)
91 {
92   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
93   PetscErrorCode ierr;
94   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
95   PetscScalar    *array;
96   MPI_Comm       comm;
97 
98   PetscFunctionBegin;
99   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported.");
100 
101   /* The reuse aspect is not implemented efficiently */
102   if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);}
103 
104   ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
105   ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr);
106   ierr = MatCreate(comm,B);CHKERRQ(ierr);
107   ierr = MatSetSizes(*B,m,m,m,m);CHKERRQ(ierr);
108   ierr = MatSetType(*B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
109   ierr = MatSeqDenseSetPreallocation(*B,array+m*rstart);CHKERRQ(ierr);
110   ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr);
111   ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
112   ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
113 
114   *iscopy = PETSC_TRUE;
115   PetscFunctionReturn(0);
116 }
117 EXTERN_C_END
118 
119 #undef __FUNCT__
120 #define __FUNCT__ "MatSetValues_MPIDense"
121 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
122 {
123   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
124   PetscErrorCode ierr;
125   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
126   PetscTruth     roworiented = A->roworiented;
127 
128   PetscFunctionBegin;
129   for (i=0; i<m; i++) {
130     if (idxm[i] < 0) continue;
131     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
132     if (idxm[i] >= rstart && idxm[i] < rend) {
133       row = idxm[i] - rstart;
134       if (roworiented) {
135         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
136       } else {
137         for (j=0; j<n; j++) {
138           if (idxn[j] < 0) continue;
139           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
140           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
141         }
142       }
143     } else {
144       if (!A->donotstash) {
145         if (roworiented) {
146           ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
147         } else {
148           ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
149         }
150       }
151     }
152   }
153   PetscFunctionReturn(0);
154 }
155 
156 #undef __FUNCT__
157 #define __FUNCT__ "MatGetValues_MPIDense"
158 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
159 {
160   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
161   PetscErrorCode ierr;
162   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
163 
164   PetscFunctionBegin;
165   for (i=0; i<m; i++) {
166     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
167     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
168     if (idxm[i] >= rstart && idxm[i] < rend) {
169       row = idxm[i] - rstart;
170       for (j=0; j<n; j++) {
171         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
172         if (idxn[j] >= mat->cmap->N) {
173           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
174         }
175         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
176       }
177     } else {
178       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
179     }
180   }
181   PetscFunctionReturn(0);
182 }
183 
184 #undef __FUNCT__
185 #define __FUNCT__ "MatGetArray_MPIDense"
186 PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[])
187 {
188   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
189   PetscErrorCode ierr;
190 
191   PetscFunctionBegin;
192   ierr = MatGetArray(a->A,array);CHKERRQ(ierr);
193   PetscFunctionReturn(0);
194 }
195 
196 #undef __FUNCT__
197 #define __FUNCT__ "MatGetSubMatrix_MPIDense"
198 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
199 {
200   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data,*newmatd;
201   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
202   PetscErrorCode ierr;
203   PetscInt       i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
204   const PetscInt *irow,*icol;
205   PetscScalar    *av,*bv,*v = lmat->v;
206   Mat            newmat;
207   IS             iscol_local;
208 
209   PetscFunctionBegin;
210   ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
211   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
212   ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr);
213   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
214   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
215   ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */
216 
217   /* No parallel redistribution currently supported! Should really check each index set
218      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
219      original matrix! */
220 
221   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
222   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
223 
224   /* Check submatrix call */
225   if (scall == MAT_REUSE_MATRIX) {
226     /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
227     /* Really need to test rows and column sizes! */
228     newmat = *B;
229   } else {
230     /* Create and fill new matrix */
231     ierr = MatCreate(((PetscObject)A)->comm,&newmat);CHKERRQ(ierr);
232     ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
233     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
234     ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr);
235   }
236 
237   /* Now extract the data pointers and do the copy, column at a time */
238   newmatd = (Mat_MPIDense*)newmat->data;
239   bv      = ((Mat_SeqDense *)newmatd->A->data)->v;
240 
241   for (i=0; i<Ncols; i++) {
242     av = v + ((Mat_SeqDense *)mat->A->data)->lda*icol[i];
243     for (j=0; j<nrows; j++) {
244       *bv++ = av[irow[j] - rstart];
245     }
246   }
247 
248   /* Assemble the matrices so that the correct flags are set */
249   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
250   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
251 
252   /* Free work space */
253   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
254   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
255   *B = newmat;
256   PetscFunctionReturn(0);
257 }
258 
259 #undef __FUNCT__
260 #define __FUNCT__ "MatRestoreArray_MPIDense"
261 PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[])
262 {
263   PetscFunctionBegin;
264   PetscFunctionReturn(0);
265 }
266 
267 #undef __FUNCT__
268 #define __FUNCT__ "MatAssemblyBegin_MPIDense"
269 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
270 {
271   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
272   MPI_Comm       comm = ((PetscObject)mat)->comm;
273   PetscErrorCode ierr;
274   PetscInt       nstash,reallocs;
275   InsertMode     addv;
276 
277   PetscFunctionBegin;
278   /* make sure all processors are either in INSERTMODE or ADDMODE */
279   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
280   if (addv == (ADD_VALUES|INSERT_VALUES)) {
281     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
282   }
283   mat->insertmode = addv; /* in case this processor had no cache */
284 
285   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
286   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
287   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
288   PetscFunctionReturn(0);
289 }
290 
291 #undef __FUNCT__
292 #define __FUNCT__ "MatAssemblyEnd_MPIDense"
293 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
294 {
295   Mat_MPIDense    *mdn=(Mat_MPIDense*)mat->data;
296   PetscErrorCode  ierr;
297   PetscInt        i,*row,*col,flg,j,rstart,ncols;
298   PetscMPIInt     n;
299   PetscScalar     *val;
300   InsertMode      addv=mat->insertmode;
301 
302   PetscFunctionBegin;
303   /*  wait on receives */
304   while (1) {
305     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
306     if (!flg) break;
307 
308     for (i=0; i<n;) {
309       /* Now identify the consecutive vals belonging to the same row */
310       for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
311       if (j < n) ncols = j-i;
312       else       ncols = n-i;
313       /* Now assemble all these values with a single function call */
314       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
315       i = j;
316     }
317   }
318   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
319 
320   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
321   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
322 
323   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
324     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
325   }
326   PetscFunctionReturn(0);
327 }
328 
329 #undef __FUNCT__
330 #define __FUNCT__ "MatZeroEntries_MPIDense"
331 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
332 {
333   PetscErrorCode ierr;
334   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
335 
336   PetscFunctionBegin;
337   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
338   PetscFunctionReturn(0);
339 }
340 
341 /* the code does not do the diagonal entries correctly unless the
342    matrix is square and the column and row owerships are identical.
343    This is a BUG. The only way to fix it seems to be to access
344    mdn->A and mdn->B directly and not through the MatZeroRows()
345    routine.
346 */
347 #undef __FUNCT__
348 #define __FUNCT__ "MatZeroRows_MPIDense"
349 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
350 {
351   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
352   PetscErrorCode ierr;
353   PetscInt       i,*owners = A->rmap->range;
354   PetscInt       *nprocs,j,idx,nsends;
355   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
356   PetscInt       *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source;
357   PetscInt       *lens,*lrows,*values;
358   PetscMPIInt    n,imdex,rank = l->rank,size = l->size;
359   MPI_Comm       comm = ((PetscObject)A)->comm;
360   MPI_Request    *send_waits,*recv_waits;
361   MPI_Status     recv_status,*send_status;
362   PetscTruth     found;
363 
364   PetscFunctionBegin;
365   /*  first count number of contributors to each processor */
366   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
367   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
368   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
369   for (i=0; i<N; i++) {
370     idx = rows[i];
371     found = PETSC_FALSE;
372     for (j=0; j<size; j++) {
373       if (idx >= owners[j] && idx < owners[j+1]) {
374         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
375       }
376     }
377     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
378   }
379   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
380 
381   /* inform other processors of number of messages and max length*/
382   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
383 
384   /* post receives:   */
385   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
386   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
387   for (i=0; i<nrecvs; i++) {
388     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
389   }
390 
391   /* do sends:
392       1) starts[i] gives the starting index in svalues for stuff going to
393          the ith processor
394   */
395   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
396   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
397   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
398   starts[0]  = 0;
399   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
400   for (i=0; i<N; i++) {
401     svalues[starts[owner[i]]++] = rows[i];
402   }
403 
404   starts[0] = 0;
405   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
406   count = 0;
407   for (i=0; i<size; i++) {
408     if (nprocs[2*i+1]) {
409       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
410     }
411   }
412   ierr = PetscFree(starts);CHKERRQ(ierr);
413 
414   base = owners[rank];
415 
416   /*  wait on receives */
417   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
418   source = lens + nrecvs;
419   count  = nrecvs; slen = 0;
420   while (count) {
421     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
422     /* unpack receives into our local space */
423     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
424     source[imdex]  = recv_status.MPI_SOURCE;
425     lens[imdex]    = n;
426     slen += n;
427     count--;
428   }
429   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
430 
431   /* move the data into the send scatter */
432   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
433   count = 0;
434   for (i=0; i<nrecvs; i++) {
435     values = rvalues + i*nmax;
436     for (j=0; j<lens[i]; j++) {
437       lrows[count++] = values[j] - base;
438     }
439   }
440   ierr = PetscFree(rvalues);CHKERRQ(ierr);
441   ierr = PetscFree(lens);CHKERRQ(ierr);
442   ierr = PetscFree(owner);CHKERRQ(ierr);
443   ierr = PetscFree(nprocs);CHKERRQ(ierr);
444 
445   /* actually zap the local rows */
446   ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
447   ierr = PetscFree(lrows);CHKERRQ(ierr);
448 
449   /* wait on sends */
450   if (nsends) {
451     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
452     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
453     ierr = PetscFree(send_status);CHKERRQ(ierr);
454   }
455   ierr = PetscFree(send_waits);CHKERRQ(ierr);
456   ierr = PetscFree(svalues);CHKERRQ(ierr);
457 
458   PetscFunctionReturn(0);
459 }
460 
461 #undef __FUNCT__
462 #define __FUNCT__ "MatMult_MPIDense"
463 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
464 {
465   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
466   PetscErrorCode ierr;
467 
468   PetscFunctionBegin;
469   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
470   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
471   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
472   PetscFunctionReturn(0);
473 }
474 
475 #undef __FUNCT__
476 #define __FUNCT__ "MatMultAdd_MPIDense"
477 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
478 {
479   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
480   PetscErrorCode ierr;
481 
482   PetscFunctionBegin;
483   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
484   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
485   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
486   PetscFunctionReturn(0);
487 }
488 
489 #undef __FUNCT__
490 #define __FUNCT__ "MatMultTranspose_MPIDense"
491 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
492 {
493   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
494   PetscErrorCode ierr;
495   PetscScalar    zero = 0.0;
496 
497   PetscFunctionBegin;
498   ierr = VecSet(yy,zero);CHKERRQ(ierr);
499   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
500   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
501   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
502   PetscFunctionReturn(0);
503 }
504 
505 #undef __FUNCT__
506 #define __FUNCT__ "MatMultTransposeAdd_MPIDense"
507 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
508 {
509   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
510   PetscErrorCode ierr;
511 
512   PetscFunctionBegin;
513   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
514   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
515   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
516   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
517   PetscFunctionReturn(0);
518 }
519 
520 #undef __FUNCT__
521 #define __FUNCT__ "MatGetDiagonal_MPIDense"
522 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
523 {
524   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
525   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
526   PetscErrorCode ierr;
527   PetscInt       len,i,n,m = A->rmap->n,radd;
528   PetscScalar    *x,zero = 0.0;
529 
530   PetscFunctionBegin;
531   ierr = VecSet(v,zero);CHKERRQ(ierr);
532   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
533   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
534   if (n != A->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
535   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
536   radd = A->rmap->rstart*m;
537   for (i=0; i<len; i++) {
538     x[i] = aloc->v[radd + i*m + i];
539   }
540   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
541   PetscFunctionReturn(0);
542 }
543 
544 #undef __FUNCT__
545 #define __FUNCT__ "MatDestroy_MPIDense"
546 PetscErrorCode MatDestroy_MPIDense(Mat mat)
547 {
548   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
549   PetscErrorCode ierr;
550 #if defined(PETSC_HAVE_PLAPACK)
551   Mat_Plapack   *lu=(Mat_Plapack*)(mat->spptr);
552 #endif
553 
554   PetscFunctionBegin;
555 
556 #if defined(PETSC_USE_LOG)
557   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
558 #endif
559   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
560   ierr = MatDestroy(mdn->A);CHKERRQ(ierr);
561   if (mdn->lvec)   {ierr = VecDestroy(mdn->lvec);CHKERRQ(ierr);}
562   if (mdn->Mvctx)  {ierr = VecScatterDestroy(mdn->Mvctx);CHKERRQ(ierr);}
563 #if defined(PETSC_HAVE_PLAPACK)
564   if (lu) {
565     ierr = PLA_Obj_free(&lu->A);CHKERRQ(ierr);
566     ierr = PLA_Obj_free (&lu->pivots);CHKERRQ(ierr);
567     ierr = PLA_Temp_free(&lu->templ);CHKERRQ(ierr);
568 
569     if (lu->is_pla) {
570       ierr = ISDestroy(lu->is_pla);CHKERRQ(ierr);
571       ierr = ISDestroy(lu->is_petsc);CHKERRQ(ierr);
572       ierr = VecScatterDestroy(lu->ctx);CHKERRQ(ierr);
573     }
574   }
575 #endif
576 
577   ierr = PetscFree(mdn);CHKERRQ(ierr);
578   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
579   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
580   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
581   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
582   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
583   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
584   PetscFunctionReturn(0);
585 }
586 
587 #undef __FUNCT__
588 #define __FUNCT__ "MatView_MPIDense_Binary"
589 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
590 {
591   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
592   PetscErrorCode    ierr;
593   PetscViewerFormat format;
594   int               fd;
595   PetscInt          header[4],mmax,N = mat->cmap->N,i,j,m,k;
596   PetscMPIInt       rank,tag  = ((PetscObject)viewer)->tag,size;
597   PetscScalar       *work,*v,*vv;
598   Mat_SeqDense      *a = (Mat_SeqDense*)mdn->A->data;
599   MPI_Status        status;
600 
601   PetscFunctionBegin;
602   if (mdn->size == 1) {
603     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
604   } else {
605     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
606     ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
607     ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
608 
609     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
610     if (format == PETSC_VIEWER_NATIVE) {
611 
612       if (!rank) {
613         /* store the matrix as a dense matrix */
614         header[0] = MAT_FILE_COOKIE;
615         header[1] = mat->rmap->N;
616         header[2] = N;
617         header[3] = MATRIX_BINARY_FORMAT_DENSE;
618         ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
619 
620         /* get largest work array needed for transposing array */
621         mmax = mat->rmap->n;
622         for (i=1; i<size; i++) {
623           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
624         }
625         ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&work);CHKERRQ(ierr);
626 
627         /* write out local array, by rows */
628         m    = mat->rmap->n;
629         v    = a->v;
630         for (j=0; j<N; j++) {
631           for (i=0; i<m; i++) {
632             work[j + i*N] = *v++;
633           }
634         }
635         ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
636         /* get largest work array to receive messages from other processes, excludes process zero */
637         mmax = 0;
638         for (i=1; i<size; i++) {
639           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
640         }
641         ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&vv);CHKERRQ(ierr);
642         for(k = 1; k < size; k++) {
643           v    = vv;
644           m    = mat->rmap->range[k+1] - mat->rmap->range[k];
645           ierr = MPI_Recv(v,m*N,MPIU_SCALAR,k,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
646 
647           for(j = 0; j < N; j++) {
648             for(i = 0; i < m; i++) {
649               work[j + i*N] = *v++;
650             }
651           }
652           ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
653         }
654         ierr = PetscFree(work);CHKERRQ(ierr);
655         ierr = PetscFree(vv);CHKERRQ(ierr);
656       } else {
657         ierr = MPI_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
658       }
659     } else {
660       SETERRQ(PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerSetFormat(viewer,PETSC_VIEWER_NATIVE");
661     }
662   }
663   PetscFunctionReturn(0);
664 }
665 
666 #undef __FUNCT__
667 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket"
668 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
669 {
670   Mat_MPIDense          *mdn = (Mat_MPIDense*)mat->data;
671   PetscErrorCode        ierr;
672   PetscMPIInt           size = mdn->size,rank = mdn->rank;
673   const PetscViewerType vtype;
674   PetscTruth            iascii,isdraw;
675   PetscViewer           sviewer;
676   PetscViewerFormat     format;
677 #if defined(PETSC_HAVE_PLAPACK)
678   Mat_Plapack           *lu;
679 #endif
680 
681   PetscFunctionBegin;
682   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
683   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
684   if (iascii) {
685     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
686     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
687     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
688       MatInfo info;
689       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
690       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,
691                    (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
692       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
693 #if defined(PETSC_HAVE_PLAPACK)
694       ierr = PetscViewerASCIIPrintf(viewer,"PLAPACK run parameters:\n");CHKERRQ(ierr);
695       ierr = PetscViewerASCIIPrintf(viewer,"  Processor mesh: nprows %d, npcols %d\n",Plapack_nprows, Plapack_npcols);CHKERRQ(ierr);
696       ierr = PetscViewerASCIIPrintf(viewer,"  Error checking: %d\n",Plapack_ierror);CHKERRQ(ierr);
697       ierr = PetscViewerASCIIPrintf(viewer,"  Algorithmic block size: %d\n",Plapack_nb_alg);CHKERRQ(ierr);
698       if (mat->factor){
699         lu=(Mat_Plapack*)(mat->spptr);
700         ierr = PetscViewerASCIIPrintf(viewer,"  Distr. block size nb: %d \n",lu->nb);CHKERRQ(ierr);
701       }
702 #else
703       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
704 #endif
705       PetscFunctionReturn(0);
706     } else if (format == PETSC_VIEWER_ASCII_INFO) {
707       PetscFunctionReturn(0);
708     }
709   } else if (isdraw) {
710     PetscDraw  draw;
711     PetscTruth isnull;
712 
713     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
714     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
715     if (isnull) PetscFunctionReturn(0);
716   }
717 
718   if (size == 1) {
719     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
720   } else {
721     /* assemble the entire matrix onto first processor. */
722     Mat         A;
723     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
724     PetscInt    *cols;
725     PetscScalar *vals;
726 
727     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
728     if (!rank) {
729       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
730     } else {
731       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
732     }
733     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
734     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
735     ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL);
736     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
737 
738     /* Copy the matrix ... This isn't the most efficient means,
739        but it's quick for now */
740     A->insertmode = INSERT_VALUES;
741     row = mat->rmap->rstart; m = mdn->A->rmap->n;
742     for (i=0; i<m; i++) {
743       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
744       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
745       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
746       row++;
747     }
748 
749     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
750     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
751     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
752     if (!rank) {
753       ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
754     }
755     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
756     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
757     ierr = MatDestroy(A);CHKERRQ(ierr);
758   }
759   PetscFunctionReturn(0);
760 }
761 
762 #undef __FUNCT__
763 #define __FUNCT__ "MatView_MPIDense"
764 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
765 {
766   PetscErrorCode ierr;
767   PetscTruth     iascii,isbinary,isdraw,issocket;
768 
769   PetscFunctionBegin;
770 
771   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
772   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
773   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
774   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
775 
776   if (iascii || issocket || isdraw) {
777     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
778   } else if (isbinary) {
779     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
780   } else {
781     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name);
782   }
783   PetscFunctionReturn(0);
784 }
785 
786 #undef __FUNCT__
787 #define __FUNCT__ "MatGetInfo_MPIDense"
788 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
789 {
790   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
791   Mat            mdn = mat->A;
792   PetscErrorCode ierr;
793   PetscReal      isend[5],irecv[5];
794 
795   PetscFunctionBegin;
796   info->block_size     = 1.0;
797   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
798   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
799   isend[3] = info->memory;  isend[4] = info->mallocs;
800   if (flag == MAT_LOCAL) {
801     info->nz_used      = isend[0];
802     info->nz_allocated = isend[1];
803     info->nz_unneeded  = isend[2];
804     info->memory       = isend[3];
805     info->mallocs      = isend[4];
806   } else if (flag == MAT_GLOBAL_MAX) {
807     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr);
808     info->nz_used      = irecv[0];
809     info->nz_allocated = irecv[1];
810     info->nz_unneeded  = irecv[2];
811     info->memory       = irecv[3];
812     info->mallocs      = irecv[4];
813   } else if (flag == MAT_GLOBAL_SUM) {
814     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
815     info->nz_used      = irecv[0];
816     info->nz_allocated = irecv[1];
817     info->nz_unneeded  = irecv[2];
818     info->memory       = irecv[3];
819     info->mallocs      = irecv[4];
820   }
821   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
822   info->fill_ratio_needed = 0;
823   info->factor_mallocs    = 0;
824   PetscFunctionReturn(0);
825 }
826 
827 #undef __FUNCT__
828 #define __FUNCT__ "MatSetOption_MPIDense"
829 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscTruth flg)
830 {
831   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
832   PetscErrorCode ierr;
833 
834   PetscFunctionBegin;
835   switch (op) {
836   case MAT_NEW_NONZERO_LOCATIONS:
837   case MAT_NEW_NONZERO_LOCATION_ERR:
838   case MAT_NEW_NONZERO_ALLOCATION_ERR:
839     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
840     break;
841   case MAT_ROW_ORIENTED:
842     a->roworiented = flg;
843     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
844     break;
845   case MAT_NEW_DIAGONALS:
846   case MAT_USE_HASH_TABLE:
847     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
848     break;
849   case MAT_IGNORE_OFF_PROC_ENTRIES:
850     a->donotstash = flg;
851     break;
852   case MAT_SYMMETRIC:
853   case MAT_STRUCTURALLY_SYMMETRIC:
854   case MAT_HERMITIAN:
855   case MAT_SYMMETRY_ETERNAL:
856   case MAT_IGNORE_LOWER_TRIANGULAR:
857     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
858     break;
859   default:
860     SETERRQ1(PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
861   }
862   PetscFunctionReturn(0);
863 }
864 
865 
866 #undef __FUNCT__
867 #define __FUNCT__ "MatDiagonalScale_MPIDense"
868 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
869 {
870   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
871   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
872   PetscScalar    *l,*r,x,*v;
873   PetscErrorCode ierr;
874   PetscInt       i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n;
875 
876   PetscFunctionBegin;
877   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
878   if (ll) {
879     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
880     if (s2a != s2) SETERRQ2(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
881     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
882     for (i=0; i<m; i++) {
883       x = l[i];
884       v = mat->v + i;
885       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
886     }
887     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
888     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
889   }
890   if (rr) {
891     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
892     if (s3a != s3) SETERRQ2(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
893     ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
894     ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
895     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
896     for (i=0; i<n; i++) {
897       x = r[i];
898       v = mat->v + i*m;
899       for (j=0; j<m; j++) { (*v++) *= x;}
900     }
901     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
902     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
903   }
904   PetscFunctionReturn(0);
905 }
906 
907 #undef __FUNCT__
908 #define __FUNCT__ "MatNorm_MPIDense"
909 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
910 {
911   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
912   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
913   PetscErrorCode ierr;
914   PetscInt       i,j;
915   PetscReal      sum = 0.0;
916   PetscScalar    *v = mat->v;
917 
918   PetscFunctionBegin;
919   if (mdn->size == 1) {
920     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
921   } else {
922     if (type == NORM_FROBENIUS) {
923       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
924 #if defined(PETSC_USE_COMPLEX)
925         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
926 #else
927         sum += (*v)*(*v); v++;
928 #endif
929       }
930       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
931       *nrm = sqrt(*nrm);
932       ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr);
933     } else if (type == NORM_1) {
934       PetscReal *tmp,*tmp2;
935       ierr = PetscMalloc(2*A->cmap->N*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
936       tmp2 = tmp + A->cmap->N;
937       ierr = PetscMemzero(tmp,2*A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
938       *nrm = 0.0;
939       v = mat->v;
940       for (j=0; j<mdn->A->cmap->n; j++) {
941         for (i=0; i<mdn->A->rmap->n; i++) {
942           tmp[j] += PetscAbsScalar(*v);  v++;
943         }
944       }
945       ierr = MPI_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
946       for (j=0; j<A->cmap->N; j++) {
947         if (tmp2[j] > *nrm) *nrm = tmp2[j];
948       }
949       ierr = PetscFree(tmp);CHKERRQ(ierr);
950       ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr);
951     } else if (type == NORM_INFINITY) { /* max row norm */
952       PetscReal ntemp;
953       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
954       ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr);
955     } else {
956       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
957     }
958   }
959   PetscFunctionReturn(0);
960 }
961 
962 #undef __FUNCT__
963 #define __FUNCT__ "MatTranspose_MPIDense"
964 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
965 {
966   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
967   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
968   Mat            B;
969   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
970   PetscErrorCode ierr;
971   PetscInt       j,i;
972   PetscScalar    *v;
973 
974   PetscFunctionBegin;
975   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place");
976   if (reuse == MAT_INITIAL_MATRIX || A == *matout) {
977     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
978     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
979     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
980     ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr);
981   } else {
982     B = *matout;
983   }
984 
985   m = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v;
986   ierr = PetscMalloc(m*sizeof(PetscInt),&rwork);CHKERRQ(ierr);
987   for (i=0; i<m; i++) rwork[i] = rstart + i;
988   for (j=0; j<n; j++) {
989     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
990     v   += m;
991   }
992   ierr = PetscFree(rwork);CHKERRQ(ierr);
993   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
994   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
995   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
996     *matout = B;
997   } else {
998     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
999   }
1000   PetscFunctionReturn(0);
1001 }
1002 
1003 
1004 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
1005 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
1006 
1007 #undef __FUNCT__
1008 #define __FUNCT__ "MatSetUpPreallocation_MPIDense"
1009 PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A)
1010 {
1011   PetscErrorCode ierr;
1012 
1013   PetscFunctionBegin;
1014   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
1015   PetscFunctionReturn(0);
1016 }
1017 
1018 #if defined(PETSC_HAVE_PLAPACK)
1019 
1020 #undef __FUNCT__
1021 #define __FUNCT__ "MatMPIDenseCopyToPlapack"
1022 PetscErrorCode MatMPIDenseCopyToPlapack(Mat A,Mat F)
1023 {
1024   Mat_Plapack    *lu = (Mat_Plapack*)(F)->spptr;
1025   PetscErrorCode ierr;
1026   PetscInt       M=A->cmap->N,m=A->rmap->n,rstart;
1027   PetscScalar    *array;
1028   PetscReal      one = 1.0;
1029 
1030   PetscFunctionBegin;
1031   /* Copy A into F->lu->A */
1032   ierr = PLA_Obj_set_to_zero(lu->A);CHKERRQ(ierr);
1033   ierr = PLA_API_begin();CHKERRQ(ierr);
1034   ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr);
1035   ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr);
1036   ierr = MatGetArray(A,&array);CHKERRQ(ierr);
1037   ierr = PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);CHKERRQ(ierr);
1038   ierr = MatRestoreArray(A,&array);CHKERRQ(ierr);
1039   ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr);
1040   ierr = PLA_API_end();CHKERRQ(ierr);
1041   lu->rstart = rstart;
1042   PetscFunctionReturn(0);
1043 }
1044 
1045 #undef __FUNCT__
1046 #define __FUNCT__ "MatMPIDenseCopyFromPlapack"
1047 PetscErrorCode MatMPIDenseCopyFromPlapack(Mat F,Mat A)
1048 {
1049   Mat_Plapack    *lu = (Mat_Plapack*)(F)->spptr;
1050   PetscErrorCode ierr;
1051   PetscInt       M=A->cmap->N,m=A->rmap->n,rstart;
1052   PetscScalar    *array;
1053   PetscReal      one = 1.0;
1054 
1055   PetscFunctionBegin;
1056   /* Copy F into A->lu->A */
1057   ierr = MatZeroEntries(A);CHKERRQ(ierr);
1058   ierr = PLA_API_begin();CHKERRQ(ierr);
1059   ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr);
1060   ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr);
1061   ierr = MatGetArray(A,&array);CHKERRQ(ierr);
1062   ierr = PLA_API_axpy_global_to_matrix(m,M, &one,lu->A,rstart,0,(void *)array,m);CHKERRQ(ierr);
1063   ierr = MatRestoreArray(A,&array);CHKERRQ(ierr);
1064   ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr);
1065   ierr = PLA_API_end();CHKERRQ(ierr);
1066   lu->rstart = rstart;
1067   PetscFunctionReturn(0);
1068 }
1069 
1070 #undef __FUNCT__
1071 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIDense"
1072 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1073 {
1074   PetscErrorCode ierr;
1075   Mat_Plapack    *luA = (Mat_Plapack*)A->spptr;
1076   Mat_Plapack    *luB = (Mat_Plapack*)B->spptr;
1077   Mat_Plapack    *luC = (Mat_Plapack*)C->spptr;
1078   PLA_Obj        alpha = NULL,beta = NULL;
1079 
1080   PetscFunctionBegin;
1081   ierr = MatMPIDenseCopyToPlapack(A,A);CHKERRQ(ierr);
1082   ierr = MatMPIDenseCopyToPlapack(B,B);CHKERRQ(ierr);
1083 
1084   /*
1085   ierr = PLA_Global_show("A = ",luA->A,"%g ","");CHKERRQ(ierr);
1086   ierr = PLA_Global_show("B = ",luB->A,"%g ","");CHKERRQ(ierr);
1087   */
1088 
1089   /* do the multiply in PLA  */
1090   ierr = PLA_Create_constants_conf_to(luA->A,NULL,NULL,&alpha);CHKERRQ(ierr);
1091   ierr = PLA_Create_constants_conf_to(luC->A,NULL,&beta,NULL);CHKERRQ(ierr);
1092   CHKMEMQ;
1093 
1094   ierr = PLA_Gemm(PLA_NO_TRANSPOSE,PLA_NO_TRANSPOSE,alpha,luA->A,luB->A,beta,luC->A); /* CHKERRQ(ierr); */
1095   CHKMEMQ;
1096   ierr = PLA_Obj_free(&alpha);CHKERRQ(ierr);
1097   ierr = PLA_Obj_free(&beta);CHKERRQ(ierr);
1098 
1099   /*
1100   ierr = PLA_Global_show("C = ",luC->A,"%g ","");CHKERRQ(ierr);
1101   */
1102   ierr = MatMPIDenseCopyFromPlapack(C,C);CHKERRQ(ierr);
1103   PetscFunctionReturn(0);
1104 }
1105 
1106 #undef __FUNCT__
1107 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIDense"
1108 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1109 {
1110   PetscErrorCode ierr;
1111   PetscInt       m=A->rmap->n,n=B->cmap->n;
1112   Mat            Cmat;
1113 
1114   PetscFunctionBegin;
1115   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
1116   SETERRQ(PETSC_ERR_LIB,"Due to aparent bugs in PLAPACK,this is not currently supported");
1117   ierr = MatCreate(((PetscObject)B)->comm,&Cmat);CHKERRQ(ierr);
1118   ierr = MatSetSizes(Cmat,m,n,A->rmap->N,B->cmap->N);CHKERRQ(ierr);
1119   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
1120   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1121   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1122 
1123   *C = Cmat;
1124   PetscFunctionReturn(0);
1125 }
1126 
1127 #undef __FUNCT__
1128 #define __FUNCT__ "MatMatMult_MPIDense_MPIDense"
1129 PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
1130 {
1131   PetscErrorCode ierr;
1132 
1133   PetscFunctionBegin;
1134   if (scall == MAT_INITIAL_MATRIX){
1135     ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr);
1136   }
1137   ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr);
1138   PetscFunctionReturn(0);
1139 }
1140 
1141 #undef __FUNCT__
1142 #define __FUNCT__ "MatSolve_MPIDense"
1143 PetscErrorCode MatSolve_MPIDense(Mat A,Vec b,Vec x)
1144 {
1145   MPI_Comm       comm = ((PetscObject)A)->comm;
1146   Mat_Plapack    *lu = (Mat_Plapack*)A->spptr;
1147   PetscErrorCode ierr;
1148   PetscInt       M=A->rmap->N,m=A->rmap->n,rstart,i,j,*idx_pla,*idx_petsc,loc_m,loc_stride;
1149   PetscScalar    *array;
1150   PetscReal      one = 1.0;
1151   PetscMPIInt    size,rank,r_rank,r_nproc,c_rank,c_nproc;;
1152   PLA_Obj        v_pla = NULL;
1153   PetscScalar    *loc_buf;
1154   Vec            loc_x;
1155 
1156   PetscFunctionBegin;
1157   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1158   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1159 
1160   /* Create PLAPACK vector objects, then copy b into PLAPACK b */
1161   PLA_Mvector_create(lu->datatype,M,1,lu->templ,PLA_ALIGN_FIRST,&v_pla);
1162   PLA_Obj_set_to_zero(v_pla);
1163 
1164   /* Copy b into rhs_pla */
1165   PLA_API_begin();
1166   PLA_Obj_API_open(v_pla);
1167   ierr = VecGetArray(b,&array);CHKERRQ(ierr);
1168   PLA_API_axpy_vector_to_global(m,&one,(void *)array,1,v_pla,lu->rstart);
1169   ierr = VecRestoreArray(b,&array);CHKERRQ(ierr);
1170   PLA_Obj_API_close(v_pla);
1171   PLA_API_end();
1172 
1173   if (A->factor == MAT_FACTOR_LU){
1174     /* Apply the permutations to the right hand sides */
1175     PLA_Apply_pivots_to_rows (v_pla,lu->pivots);
1176 
1177     /* Solve L y = b, overwriting b with y */
1178     PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_UNIT_DIAG,lu->A,v_pla );
1179 
1180     /* Solve U x = y (=b), overwriting b with x */
1181     PLA_Trsv( PLA_UPPER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla );
1182   } else { /* MAT_FACTOR_CHOLESKY */
1183     PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla);
1184     PLA_Trsv( PLA_LOWER_TRIANGULAR,(lu->datatype == MPI_DOUBLE ? PLA_TRANSPOSE : PLA_CONJUGATE_TRANSPOSE),
1185                                     PLA_NONUNIT_DIAG,lu->A,v_pla);
1186   }
1187 
1188   /* Copy PLAPACK x into Petsc vector x  */
1189   PLA_Obj_local_length(v_pla, &loc_m);
1190   PLA_Obj_local_buffer(v_pla, (void**)&loc_buf);
1191   PLA_Obj_local_stride(v_pla, &loc_stride);
1192   /*
1193     PetscPrintf(PETSC_COMM_SELF," [%d] b - local_m %d local_stride %d, loc_buf: %g %g, nb: %d\n",rank,loc_m,loc_stride,loc_buf[0],loc_buf[(loc_m-1)*loc_stride],lu->nb);
1194   */
1195   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,loc_m*loc_stride,loc_buf,&loc_x);CHKERRQ(ierr);
1196   if (!lu->pla_solved){
1197 
1198     PLA_Temp_comm_row_info(lu->templ,&Plapack_comm_2d,&r_rank,&r_nproc);
1199     PLA_Temp_comm_col_info(lu->templ,&Plapack_comm_2d,&c_rank,&c_nproc);
1200 
1201     /* Create IS and cts for VecScatterring */
1202     PLA_Obj_local_length(v_pla, &loc_m);
1203     PLA_Obj_local_stride(v_pla, &loc_stride);
1204     ierr = PetscMalloc((2*loc_m+1)*sizeof(PetscInt),&idx_pla);CHKERRQ(ierr);
1205     idx_petsc = idx_pla + loc_m;
1206 
1207     rstart = (r_rank*c_nproc+c_rank)*lu->nb;
1208     for (i=0; i<loc_m; i+=lu->nb){
1209       j = 0;
1210       while (j < lu->nb && i+j < loc_m){
1211         idx_petsc[i+j] = rstart + j; j++;
1212       }
1213       rstart += size*lu->nb;
1214     }
1215 
1216     for (i=0; i<loc_m; i++) idx_pla[i] = i*loc_stride;
1217 
1218     ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_pla,&lu->is_pla);CHKERRQ(ierr);
1219     ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_petsc,&lu->is_petsc);CHKERRQ(ierr);
1220     ierr = PetscFree(idx_pla);CHKERRQ(ierr);
1221     ierr = VecScatterCreate(loc_x,lu->is_pla,x,lu->is_petsc,&lu->ctx);CHKERRQ(ierr);
1222   }
1223   ierr = VecScatterBegin(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1224   ierr = VecScatterEnd(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1225 
1226   /* Free data */
1227   ierr = VecDestroy(loc_x);CHKERRQ(ierr);
1228   PLA_Obj_free(&v_pla);
1229 
1230   lu->pla_solved = PETSC_TRUE;
1231   PetscFunctionReturn(0);
1232 }
1233 
1234 #undef __FUNCT__
1235 #define __FUNCT__ "MatLUFactorNumeric_MPIDense"
1236 PetscErrorCode MatLUFactorNumeric_MPIDense(Mat F,Mat A,const MatFactorInfo *info)
1237 {
1238   Mat_Plapack    *lu = (Mat_Plapack*)(F)->spptr;
1239   PetscErrorCode ierr;
1240   PetscInt       M=A->rmap->N,m=A->rmap->n,rstart,rend;
1241   PetscInt       info_pla=0;
1242   PetscScalar    *array,one = 1.0;
1243 
1244   PetscFunctionBegin;
1245   if (lu->mstruct == SAME_NONZERO_PATTERN){
1246     PLA_Obj_free(&lu->A);
1247     PLA_Obj_free (&lu->pivots);
1248   }
1249   /* Create PLAPACK matrix object */
1250   lu->A = NULL; lu->pivots = NULL;
1251   PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);
1252   PLA_Obj_set_to_zero(lu->A);
1253   PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots);
1254 
1255   /* Copy A into lu->A */
1256   PLA_API_begin();
1257   PLA_Obj_API_open(lu->A);
1258   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
1259   ierr = MatGetArray(A,&array);CHKERRQ(ierr);
1260   PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);
1261   ierr = MatRestoreArray(A,&array);CHKERRQ(ierr);
1262   PLA_Obj_API_close(lu->A);
1263   PLA_API_end();
1264 
1265   /* Factor P A -> L U overwriting lower triangular portion of A with L, upper, U */
1266   info_pla = PLA_LU(lu->A,lu->pivots);
1267   if (info_pla != 0)
1268     SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot encountered at row %d from PLA_LU()",info_pla);
1269 
1270   lu->rstart         = rstart;
1271   lu->mstruct        = SAME_NONZERO_PATTERN;
1272   F->ops->solve      = MatSolve_MPIDense;
1273   F->assembled       = PETSC_TRUE;  /* required by -ksp_view */
1274   PetscFunctionReturn(0);
1275 }
1276 
1277 #undef __FUNCT__
1278 #define __FUNCT__ "MatCholeskyFactorNumeric_MPIDense"
1279 PetscErrorCode MatCholeskyFactorNumeric_MPIDense(Mat F,Mat A,const MatFactorInfo *info)
1280 {
1281   Mat_Plapack    *lu = (Mat_Plapack*)F->spptr;
1282   PetscErrorCode ierr;
1283   PetscInt       M=A->rmap->N,m=A->rmap->n,rstart,rend;
1284   PetscInt       info_pla=0;
1285   PetscScalar    *array,one = 1.0;
1286 
1287   PetscFunctionBegin;
1288   if (lu->mstruct == SAME_NONZERO_PATTERN){
1289     PLA_Obj_free(&lu->A);
1290   }
1291   /* Create PLAPACK matrix object */
1292   lu->A      = NULL;
1293   lu->pivots = NULL;
1294   PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);
1295 
1296   /* Copy A into lu->A */
1297   PLA_API_begin();
1298   PLA_Obj_API_open(lu->A);
1299   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
1300   ierr = MatGetArray(A,&array);CHKERRQ(ierr);
1301   PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);
1302   ierr = MatRestoreArray(A,&array);CHKERRQ(ierr);
1303   PLA_Obj_API_close(lu->A);
1304   PLA_API_end();
1305 
1306   /* Factor P A -> Chol */
1307   info_pla = PLA_Chol(PLA_LOWER_TRIANGULAR,lu->A);
1308   if (info_pla != 0)
1309     SETERRQ1( PETSC_ERR_MAT_CH_ZRPVT,"Nonpositive definite matrix detected at row %d from PLA_Chol()",info_pla);
1310 
1311   lu->rstart         = rstart;
1312   lu->mstruct        = SAME_NONZERO_PATTERN;
1313   F->ops->solve      = MatSolve_MPIDense;
1314   F->assembled       = PETSC_TRUE;  /* required by -ksp_view */
1315   PetscFunctionReturn(0);
1316 }
1317 
1318 /* Note the Petsc perm permutation is ignored */
1319 #undef __FUNCT__
1320 #define __FUNCT__ "MatCholeskyFactorSymbolic_MPIDense"
1321 PetscErrorCode MatCholeskyFactorSymbolic_MPIDense(Mat F,Mat A,IS perm,const MatFactorInfo *info)
1322 {
1323   PetscErrorCode ierr;
1324   PetscTruth     issymmetric,set;
1325 
1326   PetscFunctionBegin;
1327   ierr = MatIsSymmetricKnown(A,&set,&issymmetric);CHKERRQ(ierr);
1328   if (!set || !issymmetric) SETERRQ(PETSC_ERR_USER,"Matrix must be set as MAT_SYMMETRIC for CholeskyFactor()");
1329   F->ops->choleskyfactornumeric  = MatCholeskyFactorNumeric_MPIDense;
1330   PetscFunctionReturn(0);
1331 }
1332 
1333 /* Note the Petsc r and c permutations are ignored */
1334 #undef __FUNCT__
1335 #define __FUNCT__ "MatLUFactorSymbolic_MPIDense"
1336 PetscErrorCode MatLUFactorSymbolic_MPIDense(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
1337 {
1338   PetscErrorCode ierr;
1339   PetscInt       M = A->rmap->N;
1340   Mat_Plapack    *lu;
1341 
1342   PetscFunctionBegin;
1343   lu = (Mat_Plapack*)F->spptr;
1344   ierr = PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots);CHKERRQ(ierr);
1345   F->ops->lufactornumeric  = MatLUFactorNumeric_MPIDense;
1346   PetscFunctionReturn(0);
1347 }
1348 
1349 EXTERN_C_BEGIN
1350 #undef __FUNCT__
1351 #define __FUNCT__ "MatFactorGetSolverPackage_mpidense_plapack"
1352 PetscErrorCode MatFactorGetSolverPackage_mpidense_plapack(Mat A,const MatSolverPackage *type)
1353 {
1354   PetscFunctionBegin;
1355   *type = MAT_SOLVER_PLAPACK;
1356   PetscFunctionReturn(0);
1357 }
1358 EXTERN_C_END
1359 
1360 EXTERN_C_BEGIN
1361 #undef __FUNCT__
1362 #define __FUNCT__ "MatGetFactor_mpidense_plapack"
1363 PetscErrorCode MatGetFactor_mpidense_plapack(Mat A,MatFactorType ftype,Mat *F)
1364 {
1365   PetscErrorCode ierr;
1366   Mat_Plapack    *lu;
1367   PetscMPIInt    size;
1368   PetscInt       M=A->rmap->N;
1369 
1370   PetscFunctionBegin;
1371   /* Create the factorization matrix */
1372   ierr = MatCreate(((PetscObject)A)->comm,F);CHKERRQ(ierr);
1373   ierr = MatSetSizes(*F,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1374   ierr = MatSetType(*F,((PetscObject)A)->type_name);CHKERRQ(ierr);
1375   ierr = PetscNewLog(*F,Mat_Plapack,&lu);CHKERRQ(ierr);
1376   (*F)->spptr = (void*)lu;
1377 
1378   /* Set default Plapack parameters */
1379   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
1380   lu->nb = M/size;
1381   if (M - lu->nb*size) lu->nb++; /* without cyclic distribution */
1382 
1383   /* Set runtime options */
1384   ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"PLAPACK Options","Mat");CHKERRQ(ierr);
1385     ierr = PetscOptionsInt("-mat_plapack_nb","block size of template vector","None",lu->nb,&lu->nb,PETSC_NULL);CHKERRQ(ierr);
1386   PetscOptionsEnd();
1387 
1388   /* Create object distribution template */
1389   lu->templ = NULL;
1390   ierr = PLA_Temp_create(lu->nb, 0, &lu->templ);CHKERRQ(ierr);
1391 
1392   /* Set the datatype */
1393 #if defined(PETSC_USE_COMPLEX)
1394   lu->datatype = MPI_DOUBLE_COMPLEX;
1395 #else
1396   lu->datatype = MPI_DOUBLE;
1397 #endif
1398 
1399   ierr = PLA_Matrix_create(lu->datatype,M,A->cmap->N,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);CHKERRQ(ierr);
1400 
1401 
1402   lu->pla_solved     = PETSC_FALSE; /* MatSolve_Plapack() is called yet */
1403   lu->mstruct        = DIFFERENT_NONZERO_PATTERN;
1404 
1405   if (ftype == MAT_FACTOR_LU) {
1406     (*F)->ops->lufactorsymbolic = MatLUFactorSymbolic_MPIDense;
1407   } else if (ftype == MAT_FACTOR_CHOLESKY) {
1408     (*F)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MPIDense;
1409   } else SETERRQ(PETSC_ERR_SUP,"No incomplete factorizations for dense matrices");
1410   (*F)->factor = ftype;
1411   ierr = PetscObjectComposeFunctionDynamic((PetscObject)(*F),"MatFactorGetSolverPackage_C","MatFactorGetSolverPackage_mpidense_plapack",MatFactorGetSolverPackage_mpidense_plapack);CHKERRQ(ierr);
1412   PetscFunctionReturn(0);
1413 }
1414 EXTERN_C_END
1415 #endif
1416 
1417 #undef __FUNCT__
1418 #define __FUNCT__ "MatGetFactor_mpidense_petsc"
1419 PetscErrorCode MatGetFactor_mpidense_petsc(Mat A,MatFactorType ftype,Mat *F)
1420 {
1421 #if defined(PETSC_HAVE_PLAPACK)
1422   PetscErrorCode ierr;
1423 #endif
1424 
1425   PetscFunctionBegin;
1426 #if defined(PETSC_HAVE_PLAPACK)
1427   ierr = MatGetFactor_mpidense_plapack(A,ftype,F);CHKERRQ(ierr);
1428 #else
1429   SETERRQ1(PETSC_ERR_SUP,"Matrix format %s uses PLAPACK direct solver. Install PLAPACK",((PetscObject)A)->type_name);
1430 #endif
1431   PetscFunctionReturn(0);
1432 }
1433 
1434 #undef __FUNCT__
1435 #define __FUNCT__ "MatAXPY_MPIDense"
1436 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
1437 {
1438   PetscErrorCode ierr;
1439   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
1440 
1441   PetscFunctionBegin;
1442   ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr);
1443   PetscFunctionReturn(0);
1444 }
1445 
1446 /* -------------------------------------------------------------------*/
1447 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
1448        MatGetRow_MPIDense,
1449        MatRestoreRow_MPIDense,
1450        MatMult_MPIDense,
1451 /* 4*/ MatMultAdd_MPIDense,
1452        MatMultTranspose_MPIDense,
1453        MatMultTransposeAdd_MPIDense,
1454        0,
1455        0,
1456        0,
1457 /*10*/ 0,
1458        0,
1459        0,
1460        0,
1461        MatTranspose_MPIDense,
1462 /*15*/ MatGetInfo_MPIDense,
1463        MatEqual_MPIDense,
1464        MatGetDiagonal_MPIDense,
1465        MatDiagonalScale_MPIDense,
1466        MatNorm_MPIDense,
1467 /*20*/ MatAssemblyBegin_MPIDense,
1468        MatAssemblyEnd_MPIDense,
1469        MatSetOption_MPIDense,
1470        MatZeroEntries_MPIDense,
1471 /*24*/ MatZeroRows_MPIDense,
1472        0,
1473        0,
1474        0,
1475        0,
1476 /*29*/ MatSetUpPreallocation_MPIDense,
1477        0,
1478        0,
1479        MatGetArray_MPIDense,
1480        MatRestoreArray_MPIDense,
1481 /*34*/ MatDuplicate_MPIDense,
1482        0,
1483        0,
1484        0,
1485        0,
1486 /*39*/ MatAXPY_MPIDense,
1487        MatGetSubMatrices_MPIDense,
1488        0,
1489        MatGetValues_MPIDense,
1490        0,
1491 /*44*/ 0,
1492        MatScale_MPIDense,
1493        0,
1494        0,
1495        0,
1496 /*49*/ 0,
1497        0,
1498        0,
1499        0,
1500        0,
1501 /*54*/ 0,
1502        0,
1503        0,
1504        0,
1505        0,
1506 /*59*/ MatGetSubMatrix_MPIDense,
1507        MatDestroy_MPIDense,
1508        MatView_MPIDense,
1509        0,
1510        0,
1511 /*64*/ 0,
1512        0,
1513        0,
1514        0,
1515        0,
1516 /*69*/ 0,
1517        0,
1518        0,
1519        0,
1520        0,
1521 /*74*/ 0,
1522        0,
1523        0,
1524        0,
1525        0,
1526 /*79*/ 0,
1527        0,
1528        0,
1529        0,
1530 /*83*/ MatLoad_MPIDense,
1531        0,
1532        0,
1533        0,
1534        0,
1535        0,
1536 /*89*/
1537 #if defined(PETSC_HAVE_PLAPACK)
1538        MatMatMult_MPIDense_MPIDense,
1539        MatMatMultSymbolic_MPIDense_MPIDense,
1540        MatMatMultNumeric_MPIDense_MPIDense,
1541 #else
1542        0,
1543        0,
1544        0,
1545 #endif
1546        0,
1547 /*94*/ 0,
1548        0,
1549        0,
1550        0};
1551 
1552 EXTERN_C_BEGIN
1553 #undef __FUNCT__
1554 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense"
1555 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1556 {
1557   Mat_MPIDense   *a;
1558   PetscErrorCode ierr;
1559 
1560   PetscFunctionBegin;
1561   mat->preallocated = PETSC_TRUE;
1562   /* Note:  For now, when data is specified above, this assumes the user correctly
1563    allocates the local dense storage space.  We should add error checking. */
1564 
1565   a    = (Mat_MPIDense*)mat->data;
1566   ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1567   ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr);
1568   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
1569   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1570   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1571   PetscFunctionReturn(0);
1572 }
1573 EXTERN_C_END
1574 
1575 /*MC
1576    MAT_SOLVER_PLAPACK = "mpidense" - Parallel LU and Cholesky factorization for MATMPIDENSE matrices
1577 
1578   run config/configure.py with the option --download-plapack
1579 
1580 
1581   Options Database Keys:
1582 . -mat_plapack_nprows <n> - number of rows in processor partition
1583 . -mat_plapack_npcols <n> - number of columns in processor partition
1584 . -mat_plapack_nb <n> - block size of template vector
1585 . -mat_plapack_nb_alg <n> - algorithmic block size
1586 - -mat_plapack_ckerror <n> - error checking flag
1587 
1588 .seealso: MatCreateMPIDense(), MATDENSE, MATSEQDENSE, PCFactorSetSolverPackage(), MatSolverPackage
1589 
1590 M*/
1591 
1592 EXTERN_C_BEGIN
1593 #undef __FUNCT__
1594 #define __FUNCT__ "MatCreate_MPIDense"
1595 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIDense(Mat mat)
1596 {
1597   Mat_MPIDense   *a;
1598   PetscErrorCode ierr;
1599 
1600   PetscFunctionBegin;
1601   ierr              = PetscNewLog(mat,Mat_MPIDense,&a);CHKERRQ(ierr);
1602   mat->data         = (void*)a;
1603   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1604   mat->mapping      = 0;
1605 
1606   mat->insertmode = NOT_SET_VALUES;
1607   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&a->rank);CHKERRQ(ierr);
1608   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&a->size);CHKERRQ(ierr);
1609 
1610   ierr = PetscLayoutSetBlockSize(mat->rmap,1);CHKERRQ(ierr);
1611   ierr = PetscLayoutSetBlockSize(mat->cmap,1);CHKERRQ(ierr);
1612   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
1613   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
1614   a->nvec = mat->cmap->n;
1615 
1616   /* build cache for off array entries formed */
1617   a->donotstash = PETSC_FALSE;
1618   ierr = MatStashCreate_Private(((PetscObject)mat)->comm,1,&mat->stash);CHKERRQ(ierr);
1619 
1620   /* stuff used for matrix vector multiply */
1621   a->lvec        = 0;
1622   a->Mvctx       = 0;
1623   a->roworiented = PETSC_TRUE;
1624 
1625   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
1626                                      "MatGetDiagonalBlock_MPIDense",
1627                                      MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr);
1628   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C",
1629                                      "MatMPIDenseSetPreallocation_MPIDense",
1630                                      MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1631   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",
1632                                      "MatMatMult_MPIAIJ_MPIDense",
1633                                       MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1634   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",
1635                                      "MatMatMultSymbolic_MPIAIJ_MPIDense",
1636                                       MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1637   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",
1638                                      "MatMatMultNumeric_MPIAIJ_MPIDense",
1639                                       MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1640   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetFactor_petsc_C",
1641                                      "MatGetFactor_mpidense_petsc",
1642                                       MatGetFactor_mpidense_petsc);CHKERRQ(ierr);
1643 #if defined(PETSC_HAVE_PLAPACK)
1644   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetFactor_plapack_C",
1645                                      "MatGetFactor_mpidense_plapack",
1646                                       MatGetFactor_mpidense_plapack);CHKERRQ(ierr);
1647   ierr = PetscPLAPACKInitializePackage(((PetscObject)mat)->comm);CHKERRQ(ierr);
1648 #endif
1649   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1650 
1651   PetscFunctionReturn(0);
1652 }
1653 EXTERN_C_END
1654 
1655 /*MC
1656    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1657 
1658    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1659    and MATMPIDENSE otherwise.
1660 
1661    Options Database Keys:
1662 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1663 
1664   Level: beginner
1665 
1666 
1667 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1668 M*/
1669 
1670 EXTERN_C_BEGIN
1671 #undef __FUNCT__
1672 #define __FUNCT__ "MatCreate_Dense"
1673 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_Dense(Mat A)
1674 {
1675   PetscErrorCode ierr;
1676   PetscMPIInt    size;
1677 
1678   PetscFunctionBegin;
1679   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
1680   if (size == 1) {
1681     ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr);
1682   } else {
1683     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
1684   }
1685   PetscFunctionReturn(0);
1686 }
1687 EXTERN_C_END
1688 
1689 #undef __FUNCT__
1690 #define __FUNCT__ "MatMPIDenseSetPreallocation"
1691 /*@C
1692    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1693 
1694    Not collective
1695 
1696    Input Parameters:
1697 .  A - the matrix
1698 -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
1699    to control all matrix memory allocation.
1700 
1701    Notes:
1702    The dense format is fully compatible with standard Fortran 77
1703    storage by columns.
1704 
1705    The data input variable is intended primarily for Fortran programmers
1706    who wish to allocate their own matrix memory space.  Most users should
1707    set data=PETSC_NULL.
1708 
1709    Level: intermediate
1710 
1711 .keywords: matrix,dense, parallel
1712 
1713 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1714 @*/
1715 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data)
1716 {
1717   PetscErrorCode ierr,(*f)(Mat,PetscScalar *);
1718 
1719   PetscFunctionBegin;
1720   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1721   if (f) {
1722     ierr = (*f)(mat,data);CHKERRQ(ierr);
1723   }
1724   PetscFunctionReturn(0);
1725 }
1726 
1727 #undef __FUNCT__
1728 #define __FUNCT__ "MatCreateMPIDense"
1729 /*@C
1730    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
1731 
1732    Collective on MPI_Comm
1733 
1734    Input Parameters:
1735 +  comm - MPI communicator
1736 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1737 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1738 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1739 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1740 -  data - optional location of matrix data.  Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1741    to control all matrix memory allocation.
1742 
1743    Output Parameter:
1744 .  A - the matrix
1745 
1746    Notes:
1747    The dense format is fully compatible with standard Fortran 77
1748    storage by columns.
1749 
1750    The data input variable is intended primarily for Fortran programmers
1751    who wish to allocate their own matrix memory space.  Most users should
1752    set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users).
1753 
1754    The user MUST specify either the local or global matrix dimensions
1755    (possibly both).
1756 
1757    Level: intermediate
1758 
1759 .keywords: matrix,dense, parallel
1760 
1761 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1762 @*/
1763 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1764 {
1765   PetscErrorCode ierr;
1766   PetscMPIInt    size;
1767 
1768   PetscFunctionBegin;
1769   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1770   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1771   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1772   if (size > 1) {
1773     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1774     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1775   } else {
1776     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1777     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1778   }
1779   PetscFunctionReturn(0);
1780 }
1781 
1782 #undef __FUNCT__
1783 #define __FUNCT__ "MatDuplicate_MPIDense"
1784 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1785 {
1786   Mat            mat;
1787   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
1788   PetscErrorCode ierr;
1789 
1790   PetscFunctionBegin;
1791   *newmat       = 0;
1792   ierr = MatCreate(((PetscObject)A)->comm,&mat);CHKERRQ(ierr);
1793   ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1794   ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1795   a                 = (Mat_MPIDense*)mat->data;
1796   ierr              = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1797   mat->factor       = A->factor;
1798   mat->assembled    = PETSC_TRUE;
1799   mat->preallocated = PETSC_TRUE;
1800 
1801   mat->rmap->rstart     = A->rmap->rstart;
1802   mat->rmap->rend       = A->rmap->rend;
1803   a->size              = oldmat->size;
1804   a->rank              = oldmat->rank;
1805   mat->insertmode      = NOT_SET_VALUES;
1806   a->nvec              = oldmat->nvec;
1807   a->donotstash        = oldmat->donotstash;
1808 
1809   ierr = PetscMemcpy(mat->rmap->range,A->rmap->range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
1810   ierr = PetscMemcpy(mat->cmap->range,A->cmap->range,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
1811   ierr = MatStashCreate_Private(((PetscObject)A)->comm,1,&mat->stash);CHKERRQ(ierr);
1812 
1813   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1814   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1815   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1816 
1817   *newmat = mat;
1818   PetscFunctionReturn(0);
1819 }
1820 
1821 #include "petscsys.h"
1822 
1823 #undef __FUNCT__
1824 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile"
1825 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N, const MatType type,Mat *newmat)
1826 {
1827   PetscErrorCode ierr;
1828   PetscMPIInt    rank,size;
1829   PetscInt       *rowners,i,m,nz,j;
1830   PetscScalar    *array,*vals,*vals_ptr;
1831   MPI_Status     status;
1832 
1833   PetscFunctionBegin;
1834   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1835   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1836 
1837   /* determine ownership of all rows */
1838   m          = M/size + ((M % size) > rank);
1839   ierr       = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1840   ierr       = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
1841   rowners[0] = 0;
1842   for (i=2; i<=size; i++) {
1843     rowners[i] += rowners[i-1];
1844   }
1845 
1846   ierr = MatCreate(comm,newmat);CHKERRQ(ierr);
1847   ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1848   ierr = MatSetType(*newmat,type);CHKERRQ(ierr);
1849   ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr);
1850   ierr = MatGetArray(*newmat,&array);CHKERRQ(ierr);
1851 
1852   if (!rank) {
1853     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1854 
1855     /* read in my part of the matrix numerical values  */
1856     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1857 
1858     /* insert into matrix-by row (this is why cannot directly read into array */
1859     vals_ptr = vals;
1860     for (i=0; i<m; i++) {
1861       for (j=0; j<N; j++) {
1862         array[i + j*m] = *vals_ptr++;
1863       }
1864     }
1865 
1866     /* read in other processors and ship out */
1867     for (i=1; i<size; i++) {
1868       nz   = (rowners[i+1] - rowners[i])*N;
1869       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1870       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(*newmat))->tag,comm);CHKERRQ(ierr);
1871     }
1872   } else {
1873     /* receive numeric values */
1874     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1875 
1876     /* receive message of values*/
1877     ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(*newmat))->tag,comm,&status);CHKERRQ(ierr);
1878 
1879     /* insert into matrix-by row (this is why cannot directly read into array */
1880     vals_ptr = vals;
1881     for (i=0; i<m; i++) {
1882       for (j=0; j<N; j++) {
1883         array[i + j*m] = *vals_ptr++;
1884       }
1885     }
1886   }
1887   ierr = PetscFree(rowners);CHKERRQ(ierr);
1888   ierr = PetscFree(vals);CHKERRQ(ierr);
1889   ierr = MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1890   ierr = MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1891   PetscFunctionReturn(0);
1892 }
1893 
1894 #undef __FUNCT__
1895 #define __FUNCT__ "MatLoad_MPIDense"
1896 PetscErrorCode MatLoad_MPIDense(PetscViewer viewer,const MatType type,Mat *newmat)
1897 {
1898   Mat            A;
1899   PetscScalar    *vals,*svals;
1900   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1901   MPI_Status     status;
1902   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz;
1903   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1904   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1905   PetscInt       i,nz,j,rstart,rend;
1906   int            fd;
1907   PetscErrorCode ierr;
1908 
1909   PetscFunctionBegin;
1910   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1911   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1912   if (!rank) {
1913     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1914     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1915     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1916   }
1917 
1918   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1919   M = header[1]; N = header[2]; nz = header[3];
1920 
1921   /*
1922        Handle case where matrix is stored on disk as a dense matrix
1923   */
1924   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1925     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,type,newmat);CHKERRQ(ierr);
1926     PetscFunctionReturn(0);
1927   }
1928 
1929   /* determine ownership of all rows */
1930   m          = PetscMPIIntCast(M/size + ((M % size) > rank));
1931   ierr       = PetscMalloc((size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr);
1932   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1933   rowners[0] = 0;
1934   for (i=2; i<=size; i++) {
1935     rowners[i] += rowners[i-1];
1936   }
1937   rstart = rowners[rank];
1938   rend   = rowners[rank+1];
1939 
1940   /* distribute row lengths to all processors */
1941   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&ourlens);CHKERRQ(ierr);
1942   offlens = ourlens + (rend-rstart);
1943   if (!rank) {
1944     ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
1945     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1946     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
1947     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1948     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1949     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1950   } else {
1951     ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1952   }
1953 
1954   if (!rank) {
1955     /* calculate the number of nonzeros on each processor */
1956     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
1957     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1958     for (i=0; i<size; i++) {
1959       for (j=rowners[i]; j< rowners[i+1]; j++) {
1960         procsnz[i] += rowlengths[j];
1961       }
1962     }
1963     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1964 
1965     /* determine max buffer needed and allocate it */
1966     maxnz = 0;
1967     for (i=0; i<size; i++) {
1968       maxnz = PetscMax(maxnz,procsnz[i]);
1969     }
1970     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1971 
1972     /* read in my part of the matrix column indices  */
1973     nz = procsnz[0];
1974     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1975     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1976 
1977     /* read in every one elses and ship off */
1978     for (i=1; i<size; i++) {
1979       nz   = procsnz[i];
1980       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1981       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1982     }
1983     ierr = PetscFree(cols);CHKERRQ(ierr);
1984   } else {
1985     /* determine buffer space needed for message */
1986     nz = 0;
1987     for (i=0; i<m; i++) {
1988       nz += ourlens[i];
1989     }
1990     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1991 
1992     /* receive message of column indices*/
1993     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1994     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1995     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1996   }
1997 
1998   /* loop over local rows, determining number of off diagonal entries */
1999   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2000   jj = 0;
2001   for (i=0; i<m; i++) {
2002     for (j=0; j<ourlens[i]; j++) {
2003       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
2004       jj++;
2005     }
2006   }
2007 
2008   /* create our matrix */
2009   for (i=0; i<m; i++) {
2010     ourlens[i] -= offlens[i];
2011   }
2012   ierr = MatCreate(comm,newmat);CHKERRQ(ierr);
2013   ierr = MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
2014   ierr = MatSetType(*newmat,type);CHKERRQ(ierr);
2015   ierr = MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);CHKERRQ(ierr);
2016   A = *newmat;
2017   for (i=0; i<m; i++) {
2018     ourlens[i] += offlens[i];
2019   }
2020 
2021   if (!rank) {
2022     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2023 
2024     /* read in my part of the matrix numerical values  */
2025     nz = procsnz[0];
2026     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2027 
2028     /* insert into matrix */
2029     jj      = rstart;
2030     smycols = mycols;
2031     svals   = vals;
2032     for (i=0; i<m; i++) {
2033       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2034       smycols += ourlens[i];
2035       svals   += ourlens[i];
2036       jj++;
2037     }
2038 
2039     /* read in other processors and ship out */
2040     for (i=1; i<size; i++) {
2041       nz   = procsnz[i];
2042       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2043       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
2044     }
2045     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2046   } else {
2047     /* receive numeric values */
2048     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2049 
2050     /* receive message of values*/
2051     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
2052     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2053     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2054 
2055     /* insert into matrix */
2056     jj      = rstart;
2057     smycols = mycols;
2058     svals   = vals;
2059     for (i=0; i<m; i++) {
2060       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2061       smycols += ourlens[i];
2062       svals   += ourlens[i];
2063       jj++;
2064     }
2065   }
2066   ierr = PetscFree(ourlens);CHKERRQ(ierr);
2067   ierr = PetscFree(vals);CHKERRQ(ierr);
2068   ierr = PetscFree(mycols);CHKERRQ(ierr);
2069   ierr = PetscFree(rowners);CHKERRQ(ierr);
2070 
2071   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2072   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2073   PetscFunctionReturn(0);
2074 }
2075 
2076 #undef __FUNCT__
2077 #define __FUNCT__ "MatEqual_MPIDense"
2078 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscTruth *flag)
2079 {
2080   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
2081   Mat            a,b;
2082   PetscTruth     flg;
2083   PetscErrorCode ierr;
2084 
2085   PetscFunctionBegin;
2086   a = matA->A;
2087   b = matB->A;
2088   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
2089   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
2090   PetscFunctionReturn(0);
2091 }
2092 
2093 #if defined(PETSC_HAVE_PLAPACK)
2094 
2095 #undef __FUNCT__
2096 #define __FUNCT__ "PetscPLAPACKFinalizePackage"
2097 /*@C
2098   PetscPLAPACKFinalizePackage - This function destroys everything in the Petsc interface to PLAPACK.
2099   Level: developer
2100 
2101 .keywords: Petsc, destroy, package, PLAPACK
2102 .seealso: PetscFinalize()
2103 @*/
2104 PetscErrorCode PETSC_DLLEXPORT PetscPLAPACKFinalizePackage(void)
2105 {
2106   PetscErrorCode ierr;
2107 
2108   PetscFunctionBegin;
2109   ierr = PLA_Finalize();CHKERRQ(ierr);
2110   PetscFunctionReturn(0);
2111 }
2112 
2113 #undef __FUNCT__
2114 #define __FUNCT__ "PetscPLAPACKInitializePackage"
2115 /*@C
2116   PetscPLAPACKInitializePackage - This function initializes everything in the Petsc interface to PLAPACK. It is
2117   called from MatCreate_MPIDense() the first time an MPI dense matrix is called.
2118 
2119   Input Parameter:
2120 .   comm - the communicator the matrix lives on
2121 
2122   Level: developer
2123 
2124    Notes: PLAPACK does not have a good fit with MPI communicators; all (parallel) PLAPACK objects have to live in the
2125   same communicator (because there is some global state that is initialized and used for all matrices). In addition if
2126   PLAPACK is initialized (that is the initial matrices created) are on subcommunicators of MPI_COMM_WORLD, these subcommunicators
2127   cannot overlap.
2128 
2129 .keywords: Petsc, initialize, package, PLAPACK
2130 .seealso: PetscInitializePackage(), PetscInitialize()
2131 @*/
2132 PetscErrorCode PETSC_DLLEXPORT PetscPLAPACKInitializePackage(MPI_Comm comm)
2133 {
2134   PetscMPIInt    size;
2135   PetscErrorCode ierr;
2136 
2137   PetscFunctionBegin;
2138   if (!PLA_Initialized(PETSC_NULL)) {
2139 
2140     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2141     Plapack_nprows = 1;
2142     Plapack_npcols = size;
2143 
2144     ierr = PetscOptionsBegin(comm,PETSC_NULL,"PLAPACK Options","Mat");CHKERRQ(ierr);
2145       ierr = PetscOptionsInt("-mat_plapack_nprows","row dimension of 2D processor mesh","None",Plapack_nprows,&Plapack_nprows,PETSC_NULL);CHKERRQ(ierr);
2146       ierr = PetscOptionsInt("-mat_plapack_npcols","column dimension of 2D processor mesh","None",Plapack_npcols,&Plapack_npcols,PETSC_NULL);CHKERRQ(ierr);
2147 #if defined(PETSC_USE_DEBUG)
2148       Plapack_ierror = 3;
2149 #else
2150       Plapack_ierror = 0;
2151 #endif
2152       ierr = PetscOptionsInt("-mat_plapack_ckerror","error checking flag","None",Plapack_ierror,&Plapack_ierror,PETSC_NULL);CHKERRQ(ierr);
2153       if (Plapack_ierror){
2154 	ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_TRUE,PETSC_TRUE,PETSC_FALSE );CHKERRQ(ierr);
2155       } else {
2156 	ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_FALSE,PETSC_FALSE,PETSC_FALSE );CHKERRQ(ierr);
2157       }
2158 
2159       Plapack_nb_alg = 0;
2160       ierr = PetscOptionsInt("-mat_plapack_nb_alg","algorithmic block size","None",Plapack_nb_alg,&Plapack_nb_alg,PETSC_NULL);CHKERRQ(ierr);
2161       if (Plapack_nb_alg) {
2162         ierr = pla_Environ_set_nb_alg (PLA_OP_ALL_ALG,Plapack_nb_alg);CHKERRQ(ierr);
2163       }
2164     PetscOptionsEnd();
2165 
2166     ierr = PLA_Comm_1D_to_2D(comm,Plapack_nprows,Plapack_npcols,&Plapack_comm_2d);CHKERRQ(ierr);
2167     ierr = PLA_Init(Plapack_comm_2d);CHKERRQ(ierr);
2168     ierr = PetscRegisterFinalize(PetscPLAPACKFinalizePackage);CHKERRQ(ierr);
2169   }
2170   PetscFunctionReturn(0);
2171 }
2172 
2173 #endif
2174