xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 09573ac72a50d3e7ecd55a2b7f0ef28450cd0a8b)
1 #define PETSCMAT_DLL
2 
3 /*
4    Basic functions for basic parallel dense matrices.
5 */
6 
7 
8 #include "../src/mat/impls/dense/mpi/mpidense.h"    /*I   "petscmat.h"  I*/
9 #if defined(PETSC_HAVE_PLAPACK)
10 static PetscMPIInt Plapack_nprows,Plapack_npcols,Plapack_ierror,Plapack_nb_alg;
11 static MPI_Comm Plapack_comm_2d;
12 EXTERN_C_BEGIN
13 #include "PLA.h"
14 EXTERN_C_END
15 
16 typedef struct {
17   PLA_Obj        A,pivots;
18   PLA_Template   templ;
19   MPI_Datatype   datatype;
20   PetscInt       nb,rstart;
21   VecScatter     ctx;
22   IS             is_pla,is_petsc;
23   PetscBool      pla_solved;
24   MatStructure   mstruct;
25 } Mat_Plapack;
26 #endif
27 
28 #undef __FUNCT__
29 #define __FUNCT__ "MatDenseGetLocalMatrix"
30 /*@
31 
32       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
33               matrix that represents the operator. For sequential matrices it returns itself.
34 
35     Input Parameter:
36 .      A - the Seq or MPI dense matrix
37 
38     Output Parameter:
39 .      B - the inner matrix
40 
41     Level: intermediate
42 
43 @*/
44 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
45 {
46   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
47   PetscErrorCode ierr;
48   PetscBool      flg;
49 
50   PetscFunctionBegin;
51   ierr = PetscTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
52   if (flg) {
53     *B = mat->A;
54   } else {
55     *B = A;
56   }
57   PetscFunctionReturn(0);
58 }
59 
60 #undef __FUNCT__
61 #define __FUNCT__ "MatGetRow_MPIDense"
62 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
63 {
64   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
65   PetscErrorCode ierr;
66   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
67 
68   PetscFunctionBegin;
69   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
70   lrow = row - rstart;
71   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr);
72   PetscFunctionReturn(0);
73 }
74 
75 #undef __FUNCT__
76 #define __FUNCT__ "MatRestoreRow_MPIDense"
77 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
78 {
79   PetscErrorCode ierr;
80 
81   PetscFunctionBegin;
82   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
83   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
84   PetscFunctionReturn(0);
85 }
86 
87 EXTERN_C_BEGIN
88 #undef __FUNCT__
89 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense"
90 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIDense(Mat A,PetscBool  *iscopy,MatReuse reuse,Mat *B)
91 {
92   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
93   PetscErrorCode ierr;
94   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
95   PetscScalar    *array;
96   MPI_Comm       comm;
97 
98   PetscFunctionBegin;
99   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
100 
101   /* The reuse aspect is not implemented efficiently */
102   if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);}
103 
104   ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
105   ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr);
106   ierr = MatCreate(comm,B);CHKERRQ(ierr);
107   ierr = MatSetSizes(*B,m,m,m,m);CHKERRQ(ierr);
108   ierr = MatSetType(*B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
109   ierr = MatSeqDenseSetPreallocation(*B,array+m*rstart);CHKERRQ(ierr);
110   ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr);
111   ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
112   ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
113 
114   *iscopy = PETSC_TRUE;
115   PetscFunctionReturn(0);
116 }
117 EXTERN_C_END
118 
119 #undef __FUNCT__
120 #define __FUNCT__ "MatSetValues_MPIDense"
121 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
122 {
123   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
124   PetscErrorCode ierr;
125   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
126   PetscBool      roworiented = A->roworiented;
127 
128   PetscFunctionBegin;
129   if (v) PetscValidScalarPointer(v,6);
130   for (i=0; i<m; i++) {
131     if (idxm[i] < 0) continue;
132     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
133     if (idxm[i] >= rstart && idxm[i] < rend) {
134       row = idxm[i] - rstart;
135       if (roworiented) {
136         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
137       } else {
138         for (j=0; j<n; j++) {
139           if (idxn[j] < 0) continue;
140           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
141           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
142         }
143       }
144     } else {
145       if (!A->donotstash) {
146         if (roworiented) {
147           ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
148         } else {
149           ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
150         }
151       }
152     }
153   }
154   PetscFunctionReturn(0);
155 }
156 
157 #undef __FUNCT__
158 #define __FUNCT__ "MatGetValues_MPIDense"
159 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
160 {
161   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
162   PetscErrorCode ierr;
163   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
164 
165   PetscFunctionBegin;
166   for (i=0; i<m; i++) {
167     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
168     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
169     if (idxm[i] >= rstart && idxm[i] < rend) {
170       row = idxm[i] - rstart;
171       for (j=0; j<n; j++) {
172         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
173         if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
174         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
175       }
176     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
177   }
178   PetscFunctionReturn(0);
179 }
180 
181 #undef __FUNCT__
182 #define __FUNCT__ "MatGetArray_MPIDense"
183 PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[])
184 {
185   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
186   PetscErrorCode ierr;
187 
188   PetscFunctionBegin;
189   ierr = MatGetArray(a->A,array);CHKERRQ(ierr);
190   PetscFunctionReturn(0);
191 }
192 
193 #undef __FUNCT__
194 #define __FUNCT__ "MatGetSubMatrix_MPIDense"
195 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
196 {
197   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data,*newmatd;
198   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
199   PetscErrorCode ierr;
200   PetscInt       i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
201   const PetscInt *irow,*icol;
202   PetscScalar    *av,*bv,*v = lmat->v;
203   Mat            newmat;
204   IS             iscol_local;
205 
206   PetscFunctionBegin;
207   ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
208   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
209   ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr);
210   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
211   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
212   ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */
213 
214   /* No parallel redistribution currently supported! Should really check each index set
215      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
216      original matrix! */
217 
218   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
219   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
220 
221   /* Check submatrix call */
222   if (scall == MAT_REUSE_MATRIX) {
223     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
224     /* Really need to test rows and column sizes! */
225     newmat = *B;
226   } else {
227     /* Create and fill new matrix */
228     ierr = MatCreate(((PetscObject)A)->comm,&newmat);CHKERRQ(ierr);
229     ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
230     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
231     ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr);
232   }
233 
234   /* Now extract the data pointers and do the copy, column at a time */
235   newmatd = (Mat_MPIDense*)newmat->data;
236   bv      = ((Mat_SeqDense *)newmatd->A->data)->v;
237 
238   for (i=0; i<Ncols; i++) {
239     av = v + ((Mat_SeqDense *)mat->A->data)->lda*icol[i];
240     for (j=0; j<nrows; j++) {
241       *bv++ = av[irow[j] - rstart];
242     }
243   }
244 
245   /* Assemble the matrices so that the correct flags are set */
246   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
247   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
248 
249   /* Free work space */
250   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
251   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
252   *B = newmat;
253   PetscFunctionReturn(0);
254 }
255 
256 #undef __FUNCT__
257 #define __FUNCT__ "MatRestoreArray_MPIDense"
258 PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[])
259 {
260   PetscFunctionBegin;
261   PetscFunctionReturn(0);
262 }
263 
264 #undef __FUNCT__
265 #define __FUNCT__ "MatAssemblyBegin_MPIDense"
266 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
267 {
268   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
269   MPI_Comm       comm = ((PetscObject)mat)->comm;
270   PetscErrorCode ierr;
271   PetscInt       nstash,reallocs;
272   InsertMode     addv;
273 
274   PetscFunctionBegin;
275   /* make sure all processors are either in INSERTMODE or ADDMODE */
276   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
277   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
278   mat->insertmode = addv; /* in case this processor had no cache */
279 
280   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
281   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
282   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
283   PetscFunctionReturn(0);
284 }
285 
286 #undef __FUNCT__
287 #define __FUNCT__ "MatAssemblyEnd_MPIDense"
288 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
289 {
290   Mat_MPIDense    *mdn=(Mat_MPIDense*)mat->data;
291   PetscErrorCode  ierr;
292   PetscInt        i,*row,*col,flg,j,rstart,ncols;
293   PetscMPIInt     n;
294   PetscScalar     *val;
295   InsertMode      addv=mat->insertmode;
296 
297   PetscFunctionBegin;
298   /*  wait on receives */
299   while (1) {
300     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
301     if (!flg) break;
302 
303     for (i=0; i<n;) {
304       /* Now identify the consecutive vals belonging to the same row */
305       for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
306       if (j < n) ncols = j-i;
307       else       ncols = n-i;
308       /* Now assemble all these values with a single function call */
309       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
310       i = j;
311     }
312   }
313   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
314 
315   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
316   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
317 
318   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
319     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
320   }
321   PetscFunctionReturn(0);
322 }
323 
324 #undef __FUNCT__
325 #define __FUNCT__ "MatZeroEntries_MPIDense"
326 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
327 {
328   PetscErrorCode ierr;
329   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
330 
331   PetscFunctionBegin;
332   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
333   PetscFunctionReturn(0);
334 }
335 
336 /* the code does not do the diagonal entries correctly unless the
337    matrix is square and the column and row owerships are identical.
338    This is a BUG. The only way to fix it seems to be to access
339    mdn->A and mdn->B directly and not through the MatZeroRows()
340    routine.
341 */
342 #undef __FUNCT__
343 #define __FUNCT__ "MatZeroRows_MPIDense"
344 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
345 {
346   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
347   PetscErrorCode    ierr;
348   PetscInt          i,*owners = A->rmap->range;
349   PetscInt          *nprocs,j,idx,nsends;
350   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
351   PetscInt          *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source;
352   PetscInt          *lens,*lrows,*values;
353   PetscMPIInt       n,imdex,rank = l->rank,size = l->size;
354   MPI_Comm          comm = ((PetscObject)A)->comm;
355   MPI_Request       *send_waits,*recv_waits;
356   MPI_Status        recv_status,*send_status;
357   PetscBool         found;
358   const PetscScalar *xx;
359   PetscScalar       *bb;
360 
361   PetscFunctionBegin;
362   /*  first count number of contributors to each processor */
363   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
364   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
365   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
366   for (i=0; i<N; i++) {
367     idx = rows[i];
368     found = PETSC_FALSE;
369     for (j=0; j<size; j++) {
370       if (idx >= owners[j] && idx < owners[j+1]) {
371         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
372       }
373     }
374     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
375   }
376   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
377 
378   /* inform other processors of number of messages and max length*/
379   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
380 
381   /* post receives:   */
382   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
383   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
384   for (i=0; i<nrecvs; i++) {
385     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
386   }
387 
388   /* do sends:
389       1) starts[i] gives the starting index in svalues for stuff going to
390          the ith processor
391   */
392   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
393   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
394   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
395   starts[0]  = 0;
396   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
397   for (i=0; i<N; i++) {
398     svalues[starts[owner[i]]++] = rows[i];
399   }
400 
401   starts[0] = 0;
402   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
403   count = 0;
404   for (i=0; i<size; i++) {
405     if (nprocs[2*i+1]) {
406       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
407     }
408   }
409   ierr = PetscFree(starts);CHKERRQ(ierr);
410 
411   base = owners[rank];
412 
413   /*  wait on receives */
414   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
415   count  = nrecvs;
416   slen   = 0;
417   while (count) {
418     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
419     /* unpack receives into our local space */
420     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
421     source[imdex]  = recv_status.MPI_SOURCE;
422     lens[imdex]    = n;
423     slen += n;
424     count--;
425   }
426   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
427 
428   /* move the data into the send scatter */
429   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
430   count = 0;
431   for (i=0; i<nrecvs; i++) {
432     values = rvalues + i*nmax;
433     for (j=0; j<lens[i]; j++) {
434       lrows[count++] = values[j] - base;
435     }
436   }
437   ierr = PetscFree(rvalues);CHKERRQ(ierr);
438   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
439   ierr = PetscFree(owner);CHKERRQ(ierr);
440   ierr = PetscFree(nprocs);CHKERRQ(ierr);
441 
442   /* fix right hand side if needed */
443   if (x && b) {
444     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
445     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
446     for (i=0; i<slen; i++) {
447       bb[lrows[i]] = diag*xx[lrows[i]];
448     }
449     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
450     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
451   }
452 
453   /* actually zap the local rows */
454   ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
455   ierr = PetscFree(lrows);CHKERRQ(ierr);
456 
457   /* wait on sends */
458   if (nsends) {
459     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
460     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
461     ierr = PetscFree(send_status);CHKERRQ(ierr);
462   }
463   ierr = PetscFree(send_waits);CHKERRQ(ierr);
464   ierr = PetscFree(svalues);CHKERRQ(ierr);
465 
466   PetscFunctionReturn(0);
467 }
468 
469 #undef __FUNCT__
470 #define __FUNCT__ "MatMult_MPIDense"
471 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
472 {
473   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
474   PetscErrorCode ierr;
475 
476   PetscFunctionBegin;
477   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
478   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
479   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
480   PetscFunctionReturn(0);
481 }
482 
483 #undef __FUNCT__
484 #define __FUNCT__ "MatMultAdd_MPIDense"
485 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
486 {
487   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
488   PetscErrorCode ierr;
489 
490   PetscFunctionBegin;
491   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
492   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
493   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
494   PetscFunctionReturn(0);
495 }
496 
497 #undef __FUNCT__
498 #define __FUNCT__ "MatMultTranspose_MPIDense"
499 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
500 {
501   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
502   PetscErrorCode ierr;
503   PetscScalar    zero = 0.0;
504 
505   PetscFunctionBegin;
506   ierr = VecSet(yy,zero);CHKERRQ(ierr);
507   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
508   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
509   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
510   PetscFunctionReturn(0);
511 }
512 
513 #undef __FUNCT__
514 #define __FUNCT__ "MatMultTransposeAdd_MPIDense"
515 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
516 {
517   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
518   PetscErrorCode ierr;
519 
520   PetscFunctionBegin;
521   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
522   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
523   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
524   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
525   PetscFunctionReturn(0);
526 }
527 
528 #undef __FUNCT__
529 #define __FUNCT__ "MatGetDiagonal_MPIDense"
530 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
531 {
532   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
533   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
534   PetscErrorCode ierr;
535   PetscInt       len,i,n,m = A->rmap->n,radd;
536   PetscScalar    *x,zero = 0.0;
537 
538   PetscFunctionBegin;
539   ierr = VecSet(v,zero);CHKERRQ(ierr);
540   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
541   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
542   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
543   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
544   radd = A->rmap->rstart*m;
545   for (i=0; i<len; i++) {
546     x[i] = aloc->v[radd + i*m + i];
547   }
548   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
549   PetscFunctionReturn(0);
550 }
551 
552 #undef __FUNCT__
553 #define __FUNCT__ "MatDestroy_MPIDense"
554 PetscErrorCode MatDestroy_MPIDense(Mat mat)
555 {
556   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
557   PetscErrorCode ierr;
558 #if defined(PETSC_HAVE_PLAPACK)
559   Mat_Plapack   *lu=(Mat_Plapack*)(mat->spptr);
560 #endif
561 
562   PetscFunctionBegin;
563 
564 #if defined(PETSC_USE_LOG)
565   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
566 #endif
567   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
568   ierr = MatDestroy(mdn->A);CHKERRQ(ierr);
569   if (mdn->lvec)   {ierr = VecDestroy(mdn->lvec);CHKERRQ(ierr);}
570   if (mdn->Mvctx)  {ierr = VecScatterDestroy(mdn->Mvctx);CHKERRQ(ierr);}
571 #if defined(PETSC_HAVE_PLAPACK)
572   if (lu) {
573     ierr = PLA_Obj_free(&lu->A);CHKERRQ(ierr);
574     ierr = PLA_Obj_free (&lu->pivots);CHKERRQ(ierr);
575     ierr = PLA_Temp_free(&lu->templ);CHKERRQ(ierr);
576 
577     if (lu->is_pla) {
578       ierr = ISDestroy(lu->is_pla);CHKERRQ(ierr);
579       ierr = ISDestroy(lu->is_petsc);CHKERRQ(ierr);
580       ierr = VecScatterDestroy(lu->ctx);CHKERRQ(ierr);
581     }
582   }
583 #endif
584 
585   ierr = PetscFree(mdn);CHKERRQ(ierr);
586   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
587   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
588   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
589   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
590   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
591   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
592   PetscFunctionReturn(0);
593 }
594 
595 #undef __FUNCT__
596 #define __FUNCT__ "MatView_MPIDense_Binary"
597 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
598 {
599   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
600   PetscErrorCode    ierr;
601   PetscViewerFormat format;
602   int               fd;
603   PetscInt          header[4],mmax,N = mat->cmap->N,i,j,m,k;
604   PetscMPIInt       rank,tag  = ((PetscObject)viewer)->tag,size;
605   PetscScalar       *work,*v,*vv;
606   Mat_SeqDense      *a = (Mat_SeqDense*)mdn->A->data;
607   MPI_Status        status;
608 
609   PetscFunctionBegin;
610   if (mdn->size == 1) {
611     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
612   } else {
613     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
614     ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
615     ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
616 
617     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
618     if (format == PETSC_VIEWER_NATIVE) {
619 
620       if (!rank) {
621         /* store the matrix as a dense matrix */
622         header[0] = MAT_FILE_CLASSID;
623         header[1] = mat->rmap->N;
624         header[2] = N;
625         header[3] = MATRIX_BINARY_FORMAT_DENSE;
626         ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
627 
628         /* get largest work array needed for transposing array */
629         mmax = mat->rmap->n;
630         for (i=1; i<size; i++) {
631           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
632         }
633         ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&work);CHKERRQ(ierr);
634 
635         /* write out local array, by rows */
636         m    = mat->rmap->n;
637         v    = a->v;
638         for (j=0; j<N; j++) {
639           for (i=0; i<m; i++) {
640             work[j + i*N] = *v++;
641           }
642         }
643         ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
644         /* get largest work array to receive messages from other processes, excludes process zero */
645         mmax = 0;
646         for (i=1; i<size; i++) {
647           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
648         }
649         ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&vv);CHKERRQ(ierr);
650         for(k = 1; k < size; k++) {
651           v    = vv;
652           m    = mat->rmap->range[k+1] - mat->rmap->range[k];
653           ierr = MPI_Recv(v,m*N,MPIU_SCALAR,k,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
654 
655           for(j = 0; j < N; j++) {
656             for(i = 0; i < m; i++) {
657               work[j + i*N] = *v++;
658             }
659           }
660           ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
661         }
662         ierr = PetscFree(work);CHKERRQ(ierr);
663         ierr = PetscFree(vv);CHKERRQ(ierr);
664       } else {
665         ierr = MPI_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
666       }
667     } else {
668       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerSetFormat(viewer,PETSC_VIEWER_NATIVE)");
669     }
670   }
671   PetscFunctionReturn(0);
672 }
673 
674 #undef __FUNCT__
675 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket"
676 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
677 {
678   Mat_MPIDense          *mdn = (Mat_MPIDense*)mat->data;
679   PetscErrorCode        ierr;
680   PetscMPIInt           size = mdn->size,rank = mdn->rank;
681   const PetscViewerType vtype;
682   PetscBool             iascii,isdraw;
683   PetscViewer           sviewer;
684   PetscViewerFormat     format;
685 #if defined(PETSC_HAVE_PLAPACK)
686   Mat_Plapack           *lu;
687 #endif
688 
689   PetscFunctionBegin;
690   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
691   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
692   if (iascii) {
693     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
694     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
695     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
696       MatInfo info;
697       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
698       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,
699                    (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
700       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
701 #if defined(PETSC_HAVE_PLAPACK)
702       ierr = PetscViewerASCIIPrintf(viewer,"PLAPACK run parameters:\n");CHKERRQ(ierr);
703       ierr = PetscViewerASCIIPrintf(viewer,"  Processor mesh: nprows %d, npcols %d\n",Plapack_nprows, Plapack_npcols);CHKERRQ(ierr);
704       ierr = PetscViewerASCIIPrintf(viewer,"  Error checking: %d\n",Plapack_ierror);CHKERRQ(ierr);
705       ierr = PetscViewerASCIIPrintf(viewer,"  Algorithmic block size: %d\n",Plapack_nb_alg);CHKERRQ(ierr);
706       if (mat->factortype){
707         lu=(Mat_Plapack*)(mat->spptr);
708         ierr = PetscViewerASCIIPrintf(viewer,"  Distr. block size nb: %d \n",lu->nb);CHKERRQ(ierr);
709       }
710 #else
711       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
712 #endif
713       PetscFunctionReturn(0);
714     } else if (format == PETSC_VIEWER_ASCII_INFO) {
715       PetscFunctionReturn(0);
716     }
717   } else if (isdraw) {
718     PetscDraw  draw;
719     PetscBool  isnull;
720 
721     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
722     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
723     if (isnull) PetscFunctionReturn(0);
724   }
725 
726   if (size == 1) {
727     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
728   } else {
729     /* assemble the entire matrix onto first processor. */
730     Mat         A;
731     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
732     PetscInt    *cols;
733     PetscScalar *vals;
734 
735     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
736     if (!rank) {
737       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
738     } else {
739       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
740     }
741     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
742     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
743     ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL);
744     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
745 
746     /* Copy the matrix ... This isn't the most efficient means,
747        but it's quick for now */
748     A->insertmode = INSERT_VALUES;
749     row = mat->rmap->rstart; m = mdn->A->rmap->n;
750     for (i=0; i<m; i++) {
751       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
752       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
753       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
754       row++;
755     }
756 
757     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
758     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
759     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
760     if (!rank) {
761       ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
762       /* Set the type name to MATMPIDense so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqDense_ASCII()*/
763       PetscStrcpy(((PetscObject)((Mat_MPIDense*)(A->data))->A)->type_name,MATMPIDENSE);
764       ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
765     }
766     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
767     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
768     ierr = MatDestroy(A);CHKERRQ(ierr);
769   }
770   PetscFunctionReturn(0);
771 }
772 
773 #undef __FUNCT__
774 #define __FUNCT__ "MatView_MPIDense"
775 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
776 {
777   PetscErrorCode ierr;
778   PetscBool      iascii,isbinary,isdraw,issocket;
779 
780   PetscFunctionBegin;
781 
782   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
783   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
784   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
785   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
786 
787   if (iascii || issocket || isdraw) {
788     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
789   } else if (isbinary) {
790     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
791   } else {
792     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name);
793   }
794   PetscFunctionReturn(0);
795 }
796 
797 #undef __FUNCT__
798 #define __FUNCT__ "MatGetInfo_MPIDense"
799 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
800 {
801   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
802   Mat            mdn = mat->A;
803   PetscErrorCode ierr;
804   PetscReal      isend[5],irecv[5];
805 
806   PetscFunctionBegin;
807   info->block_size     = 1.0;
808   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
809   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
810   isend[3] = info->memory;  isend[4] = info->mallocs;
811   if (flag == MAT_LOCAL) {
812     info->nz_used      = isend[0];
813     info->nz_allocated = isend[1];
814     info->nz_unneeded  = isend[2];
815     info->memory       = isend[3];
816     info->mallocs      = isend[4];
817   } else if (flag == MAT_GLOBAL_MAX) {
818     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr);
819     info->nz_used      = irecv[0];
820     info->nz_allocated = irecv[1];
821     info->nz_unneeded  = irecv[2];
822     info->memory       = irecv[3];
823     info->mallocs      = irecv[4];
824   } else if (flag == MAT_GLOBAL_SUM) {
825     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
826     info->nz_used      = irecv[0];
827     info->nz_allocated = irecv[1];
828     info->nz_unneeded  = irecv[2];
829     info->memory       = irecv[3];
830     info->mallocs      = irecv[4];
831   }
832   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
833   info->fill_ratio_needed = 0;
834   info->factor_mallocs    = 0;
835   PetscFunctionReturn(0);
836 }
837 
838 #undef __FUNCT__
839 #define __FUNCT__ "MatSetOption_MPIDense"
840 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool  flg)
841 {
842   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
843   PetscErrorCode ierr;
844 
845   PetscFunctionBegin;
846   switch (op) {
847   case MAT_NEW_NONZERO_LOCATIONS:
848   case MAT_NEW_NONZERO_LOCATION_ERR:
849   case MAT_NEW_NONZERO_ALLOCATION_ERR:
850     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
851     break;
852   case MAT_ROW_ORIENTED:
853     a->roworiented = flg;
854     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
855     break;
856   case MAT_NEW_DIAGONALS:
857   case MAT_USE_HASH_TABLE:
858     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
859     break;
860   case MAT_IGNORE_OFF_PROC_ENTRIES:
861     a->donotstash = flg;
862     break;
863   case MAT_SYMMETRIC:
864   case MAT_STRUCTURALLY_SYMMETRIC:
865   case MAT_HERMITIAN:
866   case MAT_SYMMETRY_ETERNAL:
867   case MAT_IGNORE_LOWER_TRIANGULAR:
868     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
869     break;
870   default:
871     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
872   }
873   PetscFunctionReturn(0);
874 }
875 
876 
877 #undef __FUNCT__
878 #define __FUNCT__ "MatDiagonalScale_MPIDense"
879 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
880 {
881   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
882   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
883   PetscScalar    *l,*r,x,*v;
884   PetscErrorCode ierr;
885   PetscInt       i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n;
886 
887   PetscFunctionBegin;
888   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
889   if (ll) {
890     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
891     if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
892     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
893     for (i=0; i<m; i++) {
894       x = l[i];
895       v = mat->v + i;
896       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
897     }
898     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
899     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
900   }
901   if (rr) {
902     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
903     if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
904     ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
905     ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
906     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
907     for (i=0; i<n; i++) {
908       x = r[i];
909       v = mat->v + i*m;
910       for (j=0; j<m; j++) { (*v++) *= x;}
911     }
912     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
913     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
914   }
915   PetscFunctionReturn(0);
916 }
917 
918 #undef __FUNCT__
919 #define __FUNCT__ "MatNorm_MPIDense"
920 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
921 {
922   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
923   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
924   PetscErrorCode ierr;
925   PetscInt       i,j;
926   PetscReal      sum = 0.0;
927   PetscScalar    *v = mat->v;
928 
929   PetscFunctionBegin;
930   if (mdn->size == 1) {
931     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
932   } else {
933     if (type == NORM_FROBENIUS) {
934       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
935 #if defined(PETSC_USE_COMPLEX)
936         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
937 #else
938         sum += (*v)*(*v); v++;
939 #endif
940       }
941       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
942       *nrm = sqrt(*nrm);
943       ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr);
944     } else if (type == NORM_1) {
945       PetscReal *tmp,*tmp2;
946       ierr = PetscMalloc2(A->cmap->N,PetscReal,&tmp,A->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr);
947       ierr = PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
948       ierr = PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
949       *nrm = 0.0;
950       v = mat->v;
951       for (j=0; j<mdn->A->cmap->n; j++) {
952         for (i=0; i<mdn->A->rmap->n; i++) {
953           tmp[j] += PetscAbsScalar(*v);  v++;
954         }
955       }
956       ierr = MPI_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
957       for (j=0; j<A->cmap->N; j++) {
958         if (tmp2[j] > *nrm) *nrm = tmp2[j];
959       }
960       ierr = PetscFree2(tmp,tmp);CHKERRQ(ierr);
961       ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr);
962     } else if (type == NORM_INFINITY) { /* max row norm */
963       PetscReal ntemp;
964       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
965       ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr);
966     } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"No support for two norm");
967   }
968   PetscFunctionReturn(0);
969 }
970 
971 #undef __FUNCT__
972 #define __FUNCT__ "MatTranspose_MPIDense"
973 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
974 {
975   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
976   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
977   Mat            B;
978   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
979   PetscErrorCode ierr;
980   PetscInt       j,i;
981   PetscScalar    *v;
982 
983   PetscFunctionBegin;
984   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports square matrix only in-place");
985   if (reuse == MAT_INITIAL_MATRIX || A == *matout) {
986     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
987     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
988     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
989     ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr);
990   } else {
991     B = *matout;
992   }
993 
994   m = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v;
995   ierr = PetscMalloc(m*sizeof(PetscInt),&rwork);CHKERRQ(ierr);
996   for (i=0; i<m; i++) rwork[i] = rstart + i;
997   for (j=0; j<n; j++) {
998     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
999     v   += m;
1000   }
1001   ierr = PetscFree(rwork);CHKERRQ(ierr);
1002   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1003   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1004   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1005     *matout = B;
1006   } else {
1007     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
1008   }
1009   PetscFunctionReturn(0);
1010 }
1011 
1012 
1013 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
1014 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
1015 
1016 #undef __FUNCT__
1017 #define __FUNCT__ "MatSetUpPreallocation_MPIDense"
1018 PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A)
1019 {
1020   PetscErrorCode ierr;
1021 
1022   PetscFunctionBegin;
1023   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
1024   PetscFunctionReturn(0);
1025 }
1026 
1027 #if defined(PETSC_HAVE_PLAPACK)
1028 
1029 #undef __FUNCT__
1030 #define __FUNCT__ "MatMPIDenseCopyToPlapack"
1031 PetscErrorCode MatMPIDenseCopyToPlapack(Mat A,Mat F)
1032 {
1033   Mat_Plapack    *lu = (Mat_Plapack*)(F)->spptr;
1034   PetscErrorCode ierr;
1035   PetscInt       M=A->cmap->N,m=A->rmap->n,rstart;
1036   PetscScalar    *array;
1037   PetscReal      one = 1.0;
1038 
1039   PetscFunctionBegin;
1040   /* Copy A into F->lu->A */
1041   ierr = PLA_Obj_set_to_zero(lu->A);CHKERRQ(ierr);
1042   ierr = PLA_API_begin();CHKERRQ(ierr);
1043   ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr);
1044   ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr);
1045   ierr = MatGetArray(A,&array);CHKERRQ(ierr);
1046   ierr = PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);CHKERRQ(ierr);
1047   ierr = MatRestoreArray(A,&array);CHKERRQ(ierr);
1048   ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr);
1049   ierr = PLA_API_end();CHKERRQ(ierr);
1050   lu->rstart = rstart;
1051   PetscFunctionReturn(0);
1052 }
1053 
1054 #undef __FUNCT__
1055 #define __FUNCT__ "MatMPIDenseCopyFromPlapack"
1056 PetscErrorCode MatMPIDenseCopyFromPlapack(Mat F,Mat A)
1057 {
1058   Mat_Plapack    *lu = (Mat_Plapack*)(F)->spptr;
1059   PetscErrorCode ierr;
1060   PetscInt       M=A->cmap->N,m=A->rmap->n,rstart;
1061   PetscScalar    *array;
1062   PetscReal      one = 1.0;
1063 
1064   PetscFunctionBegin;
1065   /* Copy F into A->lu->A */
1066   ierr = MatZeroEntries(A);CHKERRQ(ierr);
1067   ierr = PLA_API_begin();CHKERRQ(ierr);
1068   ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr);
1069   ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr);
1070   ierr = MatGetArray(A,&array);CHKERRQ(ierr);
1071   ierr = PLA_API_axpy_global_to_matrix(m,M, &one,lu->A,rstart,0,(void *)array,m);CHKERRQ(ierr);
1072   ierr = MatRestoreArray(A,&array);CHKERRQ(ierr);
1073   ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr);
1074   ierr = PLA_API_end();CHKERRQ(ierr);
1075   lu->rstart = rstart;
1076   PetscFunctionReturn(0);
1077 }
1078 
1079 #undef __FUNCT__
1080 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIDense"
1081 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1082 {
1083   PetscErrorCode ierr;
1084   Mat_Plapack    *luA = (Mat_Plapack*)A->spptr;
1085   Mat_Plapack    *luB = (Mat_Plapack*)B->spptr;
1086   Mat_Plapack    *luC = (Mat_Plapack*)C->spptr;
1087   PLA_Obj        alpha = NULL,beta = NULL;
1088 
1089   PetscFunctionBegin;
1090   ierr = MatMPIDenseCopyToPlapack(A,A);CHKERRQ(ierr);
1091   ierr = MatMPIDenseCopyToPlapack(B,B);CHKERRQ(ierr);
1092 
1093   /*
1094   ierr = PLA_Global_show("A = ",luA->A,"%g ","");CHKERRQ(ierr);
1095   ierr = PLA_Global_show("B = ",luB->A,"%g ","");CHKERRQ(ierr);
1096   */
1097 
1098   /* do the multiply in PLA  */
1099   ierr = PLA_Create_constants_conf_to(luA->A,NULL,NULL,&alpha);CHKERRQ(ierr);
1100   ierr = PLA_Create_constants_conf_to(luC->A,NULL,&beta,NULL);CHKERRQ(ierr);
1101   CHKMEMQ;
1102 
1103   ierr = PLA_Gemm(PLA_NO_TRANSPOSE,PLA_NO_TRANSPOSE,alpha,luA->A,luB->A,beta,luC->A); /* CHKERRQ(ierr); */
1104   CHKMEMQ;
1105   ierr = PLA_Obj_free(&alpha);CHKERRQ(ierr);
1106   ierr = PLA_Obj_free(&beta);CHKERRQ(ierr);
1107 
1108   /*
1109   ierr = PLA_Global_show("C = ",luC->A,"%g ","");CHKERRQ(ierr);
1110   */
1111   ierr = MatMPIDenseCopyFromPlapack(C,C);CHKERRQ(ierr);
1112   PetscFunctionReturn(0);
1113 }
1114 
1115 #undef __FUNCT__
1116 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIDense"
1117 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1118 {
1119   PetscErrorCode ierr;
1120   PetscInt       m=A->rmap->n,n=B->cmap->n;
1121   Mat            Cmat;
1122 
1123   PetscFunctionBegin;
1124   if (A->cmap->n != B->rmap->n) SETERRQ2(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
1125   SETERRQ(((PetscObject)A)->comm,PETSC_ERR_LIB,"Due to apparent bugs in PLAPACK,this is not currently supported");
1126   ierr = MatCreate(((PetscObject)B)->comm,&Cmat);CHKERRQ(ierr);
1127   ierr = MatSetSizes(Cmat,m,n,A->rmap->N,B->cmap->N);CHKERRQ(ierr);
1128   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
1129   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1130   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1131 
1132   *C = Cmat;
1133   PetscFunctionReturn(0);
1134 }
1135 
1136 #undef __FUNCT__
1137 #define __FUNCT__ "MatMatMult_MPIDense_MPIDense"
1138 PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
1139 {
1140   PetscErrorCode ierr;
1141 
1142   PetscFunctionBegin;
1143   if (scall == MAT_INITIAL_MATRIX){
1144     ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr);
1145   }
1146   ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr);
1147   PetscFunctionReturn(0);
1148 }
1149 
1150 #undef __FUNCT__
1151 #define __FUNCT__ "MatSolve_MPIDense"
1152 PetscErrorCode MatSolve_MPIDense(Mat A,Vec b,Vec x)
1153 {
1154   MPI_Comm       comm = ((PetscObject)A)->comm;
1155   Mat_Plapack    *lu = (Mat_Plapack*)A->spptr;
1156   PetscErrorCode ierr;
1157   PetscInt       M=A->rmap->N,m=A->rmap->n,rstart,i,j,*idx_pla,*idx_petsc,loc_m,loc_stride;
1158   PetscScalar    *array;
1159   PetscReal      one = 1.0;
1160   PetscMPIInt    size,rank,r_rank,r_nproc,c_rank,c_nproc;;
1161   PLA_Obj        v_pla = NULL;
1162   PetscScalar    *loc_buf;
1163   Vec            loc_x;
1164 
1165   PetscFunctionBegin;
1166   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1167   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1168 
1169   /* Create PLAPACK vector objects, then copy b into PLAPACK b */
1170   PLA_Mvector_create(lu->datatype,M,1,lu->templ,PLA_ALIGN_FIRST,&v_pla);
1171   PLA_Obj_set_to_zero(v_pla);
1172 
1173   /* Copy b into rhs_pla */
1174   PLA_API_begin();
1175   PLA_Obj_API_open(v_pla);
1176   ierr = VecGetArray(b,&array);CHKERRQ(ierr);
1177   PLA_API_axpy_vector_to_global(m,&one,(void *)array,1,v_pla,lu->rstart);
1178   ierr = VecRestoreArray(b,&array);CHKERRQ(ierr);
1179   PLA_Obj_API_close(v_pla);
1180   PLA_API_end();
1181 
1182   if (A->factortype == MAT_FACTOR_LU){
1183     /* Apply the permutations to the right hand sides */
1184     PLA_Apply_pivots_to_rows (v_pla,lu->pivots);
1185 
1186     /* Solve L y = b, overwriting b with y */
1187     PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_UNIT_DIAG,lu->A,v_pla );
1188 
1189     /* Solve U x = y (=b), overwriting b with x */
1190     PLA_Trsv( PLA_UPPER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla );
1191   } else { /* MAT_FACTOR_CHOLESKY */
1192     PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla);
1193     PLA_Trsv( PLA_LOWER_TRIANGULAR,(lu->datatype == MPI_DOUBLE ? PLA_TRANSPOSE : PLA_CONJUGATE_TRANSPOSE),
1194                                     PLA_NONUNIT_DIAG,lu->A,v_pla);
1195   }
1196 
1197   /* Copy PLAPACK x into Petsc vector x  */
1198   PLA_Obj_local_length(v_pla, &loc_m);
1199   PLA_Obj_local_buffer(v_pla, (void**)&loc_buf);
1200   PLA_Obj_local_stride(v_pla, &loc_stride);
1201   /*
1202     PetscPrintf(PETSC_COMM_SELF," [%d] b - local_m %d local_stride %d, loc_buf: %g %g, nb: %d\n",rank,loc_m,loc_stride,loc_buf[0],loc_buf[(loc_m-1)*loc_stride],lu->nb);
1203   */
1204   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,loc_m*loc_stride,loc_buf,&loc_x);CHKERRQ(ierr);
1205   if (!lu->pla_solved){
1206 
1207     PLA_Temp_comm_row_info(lu->templ,&Plapack_comm_2d,&r_rank,&r_nproc);
1208     PLA_Temp_comm_col_info(lu->templ,&Plapack_comm_2d,&c_rank,&c_nproc);
1209 
1210     /* Create IS and cts for VecScatterring */
1211     PLA_Obj_local_length(v_pla, &loc_m);
1212     PLA_Obj_local_stride(v_pla, &loc_stride);
1213     ierr = PetscMalloc2(loc_m,PetscInt,&idx_pla,loc_m,PetscInt,&idx_petsc);CHKERRQ(ierr);
1214 
1215     rstart = (r_rank*c_nproc+c_rank)*lu->nb;
1216     for (i=0; i<loc_m; i+=lu->nb){
1217       j = 0;
1218       while (j < lu->nb && i+j < loc_m){
1219         idx_petsc[i+j] = rstart + j; j++;
1220       }
1221       rstart += size*lu->nb;
1222     }
1223 
1224     for (i=0; i<loc_m; i++) idx_pla[i] = i*loc_stride;
1225 
1226     ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_pla,PETSC_COPY_VALUES,&lu->is_pla);CHKERRQ(ierr);
1227     ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_petsc,PETSC_COPY_VALUES,&lu->is_petsc);CHKERRQ(ierr);
1228     ierr = PetscFree2(idx_pla,idx_petsc);CHKERRQ(ierr);
1229     ierr = VecScatterCreate(loc_x,lu->is_pla,x,lu->is_petsc,&lu->ctx);CHKERRQ(ierr);
1230   }
1231   ierr = VecScatterBegin(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1232   ierr = VecScatterEnd(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1233 
1234   /* Free data */
1235   ierr = VecDestroy(loc_x);CHKERRQ(ierr);
1236   PLA_Obj_free(&v_pla);
1237 
1238   lu->pla_solved = PETSC_TRUE;
1239   PetscFunctionReturn(0);
1240 }
1241 
1242 #undef __FUNCT__
1243 #define __FUNCT__ "MatLUFactorNumeric_MPIDense"
1244 PetscErrorCode MatLUFactorNumeric_MPIDense(Mat F,Mat A,const MatFactorInfo *info)
1245 {
1246   Mat_Plapack    *lu = (Mat_Plapack*)(F)->spptr;
1247   PetscErrorCode ierr;
1248   PetscInt       M=A->rmap->N,m=A->rmap->n,rstart,rend;
1249   PetscInt       info_pla=0;
1250   PetscScalar    *array,one = 1.0;
1251 
1252   PetscFunctionBegin;
1253   if (lu->mstruct == SAME_NONZERO_PATTERN){
1254     PLA_Obj_free(&lu->A);
1255     PLA_Obj_free (&lu->pivots);
1256   }
1257   /* Create PLAPACK matrix object */
1258   lu->A = NULL; lu->pivots = NULL;
1259   PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);
1260   PLA_Obj_set_to_zero(lu->A);
1261   PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots);
1262 
1263   /* Copy A into lu->A */
1264   PLA_API_begin();
1265   PLA_Obj_API_open(lu->A);
1266   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
1267   ierr = MatGetArray(A,&array);CHKERRQ(ierr);
1268   PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);
1269   ierr = MatRestoreArray(A,&array);CHKERRQ(ierr);
1270   PLA_Obj_API_close(lu->A);
1271   PLA_API_end();
1272 
1273   /* Factor P A -> L U overwriting lower triangular portion of A with L, upper, U */
1274   info_pla = PLA_LU(lu->A,lu->pivots);
1275   if (info_pla != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot encountered at row %d from PLA_LU()",info_pla);
1276 
1277   lu->rstart         = rstart;
1278   lu->mstruct        = SAME_NONZERO_PATTERN;
1279   F->ops->solve      = MatSolve_MPIDense;
1280   F->assembled       = PETSC_TRUE;  /* required by -ksp_view */
1281   PetscFunctionReturn(0);
1282 }
1283 
1284 #undef __FUNCT__
1285 #define __FUNCT__ "MatCholeskyFactorNumeric_MPIDense"
1286 PetscErrorCode MatCholeskyFactorNumeric_MPIDense(Mat F,Mat A,const MatFactorInfo *info)
1287 {
1288   Mat_Plapack    *lu = (Mat_Plapack*)F->spptr;
1289   PetscErrorCode ierr;
1290   PetscInt       M=A->rmap->N,m=A->rmap->n,rstart,rend;
1291   PetscInt       info_pla=0;
1292   PetscScalar    *array,one = 1.0;
1293 
1294   PetscFunctionBegin;
1295   if (lu->mstruct == SAME_NONZERO_PATTERN){
1296     PLA_Obj_free(&lu->A);
1297   }
1298   /* Create PLAPACK matrix object */
1299   lu->A      = NULL;
1300   lu->pivots = NULL;
1301   PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);
1302 
1303   /* Copy A into lu->A */
1304   PLA_API_begin();
1305   PLA_Obj_API_open(lu->A);
1306   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
1307   ierr = MatGetArray(A,&array);CHKERRQ(ierr);
1308   PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);
1309   ierr = MatRestoreArray(A,&array);CHKERRQ(ierr);
1310   PLA_Obj_API_close(lu->A);
1311   PLA_API_end();
1312 
1313   /* Factor P A -> Chol */
1314   info_pla = PLA_Chol(PLA_LOWER_TRIANGULAR,lu->A);
1315   if (info_pla != 0) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT,"Nonpositive definite matrix detected at row %d from PLA_Chol()",info_pla);
1316 
1317   lu->rstart         = rstart;
1318   lu->mstruct        = SAME_NONZERO_PATTERN;
1319   F->ops->solve      = MatSolve_MPIDense;
1320   F->assembled       = PETSC_TRUE;  /* required by -ksp_view */
1321   PetscFunctionReturn(0);
1322 }
1323 
1324 /* Note the Petsc perm permutation is ignored */
1325 #undef __FUNCT__
1326 #define __FUNCT__ "MatCholeskyFactorSymbolic_MPIDense"
1327 PetscErrorCode MatCholeskyFactorSymbolic_MPIDense(Mat F,Mat A,IS perm,const MatFactorInfo *info)
1328 {
1329   PetscErrorCode ierr;
1330   PetscBool      issymmetric,set;
1331 
1332   PetscFunctionBegin;
1333   ierr = MatIsSymmetricKnown(A,&set,&issymmetric);CHKERRQ(ierr);
1334   if (!set || !issymmetric) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_USER,"Matrix must be set as MAT_SYMMETRIC for CholeskyFactor()");
1335   F->ops->choleskyfactornumeric  = MatCholeskyFactorNumeric_MPIDense;
1336   PetscFunctionReturn(0);
1337 }
1338 
1339 /* Note the Petsc r and c permutations are ignored */
1340 #undef __FUNCT__
1341 #define __FUNCT__ "MatLUFactorSymbolic_MPIDense"
1342 PetscErrorCode MatLUFactorSymbolic_MPIDense(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
1343 {
1344   PetscErrorCode ierr;
1345   PetscInt       M = A->rmap->N;
1346   Mat_Plapack    *lu;
1347 
1348   PetscFunctionBegin;
1349   lu = (Mat_Plapack*)F->spptr;
1350   ierr = PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots);CHKERRQ(ierr);
1351   F->ops->lufactornumeric  = MatLUFactorNumeric_MPIDense;
1352   PetscFunctionReturn(0);
1353 }
1354 
1355 EXTERN_C_BEGIN
1356 #undef __FUNCT__
1357 #define __FUNCT__ "MatFactorGetSolverPackage_mpidense_plapack"
1358 PetscErrorCode MatFactorGetSolverPackage_mpidense_plapack(Mat A,const MatSolverPackage *type)
1359 {
1360   PetscFunctionBegin;
1361   *type = MATSOLVERPLAPACK;
1362   PetscFunctionReturn(0);
1363 }
1364 EXTERN_C_END
1365 
1366 EXTERN_C_BEGIN
1367 #undef __FUNCT__
1368 #define __FUNCT__ "MatGetFactor_mpidense_plapack"
1369 PetscErrorCode MatGetFactor_mpidense_plapack(Mat A,MatFactorType ftype,Mat *F)
1370 {
1371   PetscErrorCode ierr;
1372   Mat_Plapack    *lu;
1373   PetscMPIInt    size;
1374   PetscInt       M=A->rmap->N;
1375 
1376   PetscFunctionBegin;
1377   /* Create the factorization matrix */
1378   ierr = MatCreate(((PetscObject)A)->comm,F);CHKERRQ(ierr);
1379   ierr = MatSetSizes(*F,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1380   ierr = MatSetType(*F,((PetscObject)A)->type_name);CHKERRQ(ierr);
1381   ierr = PetscNewLog(*F,Mat_Plapack,&lu);CHKERRQ(ierr);
1382   (*F)->spptr = (void*)lu;
1383 
1384   /* Set default Plapack parameters */
1385   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
1386   lu->nb = M/size;
1387   if (M - lu->nb*size) lu->nb++; /* without cyclic distribution */
1388 
1389   /* Set runtime options */
1390   ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"PLAPACK Options","Mat");CHKERRQ(ierr);
1391     ierr = PetscOptionsInt("-mat_plapack_nb","block size of template vector","None",lu->nb,&lu->nb,PETSC_NULL);CHKERRQ(ierr);
1392   PetscOptionsEnd();
1393 
1394   /* Create object distribution template */
1395   lu->templ = NULL;
1396   ierr = PLA_Temp_create(lu->nb, 0, &lu->templ);CHKERRQ(ierr);
1397 
1398   /* Set the datatype */
1399 #if defined(PETSC_USE_COMPLEX)
1400   lu->datatype = MPI_DOUBLE_COMPLEX;
1401 #else
1402   lu->datatype = MPI_DOUBLE;
1403 #endif
1404 
1405   ierr = PLA_Matrix_create(lu->datatype,M,A->cmap->N,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);CHKERRQ(ierr);
1406 
1407 
1408   lu->pla_solved     = PETSC_FALSE; /* MatSolve_Plapack() is called yet */
1409   lu->mstruct        = DIFFERENT_NONZERO_PATTERN;
1410 
1411   if (ftype == MAT_FACTOR_LU) {
1412     (*F)->ops->lufactorsymbolic = MatLUFactorSymbolic_MPIDense;
1413   } else if (ftype == MAT_FACTOR_CHOLESKY) {
1414     (*F)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MPIDense;
1415   } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"No incomplete factorizations for dense matrices");
1416   (*F)->factortype = ftype;
1417   ierr = PetscObjectComposeFunctionDynamic((PetscObject)(*F),"MatFactorGetSolverPackage_C","MatFactorGetSolverPackage_mpidense_plapack",MatFactorGetSolverPackage_mpidense_plapack);CHKERRQ(ierr);
1418   PetscFunctionReturn(0);
1419 }
1420 EXTERN_C_END
1421 #endif
1422 
1423 #undef __FUNCT__
1424 #define __FUNCT__ "MatGetFactor_mpidense_petsc"
1425 PetscErrorCode MatGetFactor_mpidense_petsc(Mat A,MatFactorType ftype,Mat *F)
1426 {
1427 #if defined(PETSC_HAVE_PLAPACK)
1428   PetscErrorCode ierr;
1429 #endif
1430 
1431   PetscFunctionBegin;
1432 #if defined(PETSC_HAVE_PLAPACK)
1433   ierr = MatGetFactor_mpidense_plapack(A,ftype,F);CHKERRQ(ierr);
1434 #else
1435   SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix format %s uses PLAPACK direct solver. Install PLAPACK",((PetscObject)A)->type_name);
1436 #endif
1437   PetscFunctionReturn(0);
1438 }
1439 
1440 #undef __FUNCT__
1441 #define __FUNCT__ "MatAXPY_MPIDense"
1442 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
1443 {
1444   PetscErrorCode ierr;
1445   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
1446 
1447   PetscFunctionBegin;
1448   ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr);
1449   PetscFunctionReturn(0);
1450 }
1451 
1452 #undef __FUNCT__
1453 #define __FUNCT__ "MatConjugate_MPIDense"
1454 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIDense(Mat mat)
1455 {
1456   Mat_MPIDense   *a = (Mat_MPIDense *)mat->data;
1457   PetscErrorCode ierr;
1458 
1459   PetscFunctionBegin;
1460   ierr = MatConjugate(a->A);CHKERRQ(ierr);
1461   PetscFunctionReturn(0);
1462 }
1463 
1464 #undef __FUNCT__
1465 #define __FUNCT__ "MatRealPart_MPIDense"
1466 PetscErrorCode MatRealPart_MPIDense(Mat A)
1467 {
1468   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1469   PetscErrorCode ierr;
1470 
1471   PetscFunctionBegin;
1472   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1473   PetscFunctionReturn(0);
1474 }
1475 
1476 #undef __FUNCT__
1477 #define __FUNCT__ "MatImaginaryPart_MPIDense"
1478 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1479 {
1480   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1481   PetscErrorCode ierr;
1482 
1483   PetscFunctionBegin;
1484   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1485   PetscFunctionReturn(0);
1486 }
1487 
1488 /* -------------------------------------------------------------------*/
1489 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
1490        MatGetRow_MPIDense,
1491        MatRestoreRow_MPIDense,
1492        MatMult_MPIDense,
1493 /* 4*/ MatMultAdd_MPIDense,
1494        MatMultTranspose_MPIDense,
1495        MatMultTransposeAdd_MPIDense,
1496        0,
1497        0,
1498        0,
1499 /*10*/ 0,
1500        0,
1501        0,
1502        0,
1503        MatTranspose_MPIDense,
1504 /*15*/ MatGetInfo_MPIDense,
1505        MatEqual_MPIDense,
1506        MatGetDiagonal_MPIDense,
1507        MatDiagonalScale_MPIDense,
1508        MatNorm_MPIDense,
1509 /*20*/ MatAssemblyBegin_MPIDense,
1510        MatAssemblyEnd_MPIDense,
1511        MatSetOption_MPIDense,
1512        MatZeroEntries_MPIDense,
1513 /*24*/ MatZeroRows_MPIDense,
1514        0,
1515        0,
1516        0,
1517        0,
1518 /*29*/ MatSetUpPreallocation_MPIDense,
1519        0,
1520        0,
1521        MatGetArray_MPIDense,
1522        MatRestoreArray_MPIDense,
1523 /*34*/ MatDuplicate_MPIDense,
1524        0,
1525        0,
1526        0,
1527        0,
1528 /*39*/ MatAXPY_MPIDense,
1529        MatGetSubMatrices_MPIDense,
1530        0,
1531        MatGetValues_MPIDense,
1532        0,
1533 /*44*/ 0,
1534        MatScale_MPIDense,
1535        0,
1536        0,
1537        0,
1538 /*49*/ 0,
1539        0,
1540        0,
1541        0,
1542        0,
1543 /*54*/ 0,
1544        0,
1545        0,
1546        0,
1547        0,
1548 /*59*/ MatGetSubMatrix_MPIDense,
1549        MatDestroy_MPIDense,
1550        MatView_MPIDense,
1551        0,
1552        0,
1553 /*64*/ 0,
1554        0,
1555        0,
1556        0,
1557        0,
1558 /*69*/ 0,
1559        0,
1560        0,
1561        0,
1562        0,
1563 /*74*/ 0,
1564        0,
1565        0,
1566        0,
1567        0,
1568 /*79*/ 0,
1569        0,
1570        0,
1571        0,
1572 /*83*/ MatLoad_MPIDense,
1573        0,
1574        0,
1575        0,
1576        0,
1577        0,
1578 /*89*/
1579 #if defined(PETSC_HAVE_PLAPACK)
1580        MatMatMult_MPIDense_MPIDense,
1581        MatMatMultSymbolic_MPIDense_MPIDense,
1582        MatMatMultNumeric_MPIDense_MPIDense,
1583 #else
1584        0,
1585        0,
1586        0,
1587 #endif
1588        0,
1589        0,
1590 /*94*/ 0,
1591        0,
1592        0,
1593        0,
1594        0,
1595 /*99*/ 0,
1596        0,
1597        0,
1598        MatConjugate_MPIDense,
1599        0,
1600 /*104*/0,
1601        MatRealPart_MPIDense,
1602        MatImaginaryPart_MPIDense,
1603        0,
1604        0,
1605 /*109*/0,
1606        0,
1607        0,
1608        0,
1609        0,
1610 /*114*/0,
1611        0,
1612        0,
1613        0,
1614        0,
1615 /*119*/0,
1616        0,
1617        0,
1618        0
1619 };
1620 
1621 EXTERN_C_BEGIN
1622 #undef __FUNCT__
1623 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense"
1624 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1625 {
1626   Mat_MPIDense   *a;
1627   PetscErrorCode ierr;
1628 
1629   PetscFunctionBegin;
1630   mat->preallocated = PETSC_TRUE;
1631   /* Note:  For now, when data is specified above, this assumes the user correctly
1632    allocates the local dense storage space.  We should add error checking. */
1633 
1634   a    = (Mat_MPIDense*)mat->data;
1635   ierr = PetscLayoutSetBlockSize(mat->rmap,1);CHKERRQ(ierr);
1636   ierr = PetscLayoutSetBlockSize(mat->cmap,1);CHKERRQ(ierr);
1637   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
1638   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
1639   a->nvec = mat->cmap->n;
1640 
1641   ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1642   ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr);
1643   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
1644   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1645   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1646   PetscFunctionReturn(0);
1647 }
1648 EXTERN_C_END
1649 
1650 /*MC
1651    MATSOLVERPLAPACK = "mpidense" - Parallel LU and Cholesky factorization for MATMPIDENSE matrices
1652 
1653   run ./configure with the option --download-plapack
1654 
1655 
1656   Options Database Keys:
1657 . -mat_plapack_nprows <n> - number of rows in processor partition
1658 . -mat_plapack_npcols <n> - number of columns in processor partition
1659 . -mat_plapack_nb <n> - block size of template vector
1660 . -mat_plapack_nb_alg <n> - algorithmic block size
1661 - -mat_plapack_ckerror <n> - error checking flag
1662 
1663    Level: intermediate
1664 
1665 .seealso: MatCreateMPIDense(), MATDENSE, MATSEQDENSE, PCFactorSetSolverPackage(), MatSolverPackage
1666 
1667 M*/
1668 
1669 EXTERN_C_BEGIN
1670 #undef __FUNCT__
1671 #define __FUNCT__ "MatCreate_MPIDense"
1672 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIDense(Mat mat)
1673 {
1674   Mat_MPIDense   *a;
1675   PetscErrorCode ierr;
1676 
1677   PetscFunctionBegin;
1678   ierr              = PetscNewLog(mat,Mat_MPIDense,&a);CHKERRQ(ierr);
1679   mat->data         = (void*)a;
1680   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1681 
1682   mat->insertmode = NOT_SET_VALUES;
1683   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&a->rank);CHKERRQ(ierr);
1684   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&a->size);CHKERRQ(ierr);
1685 
1686   /* build cache for off array entries formed */
1687   a->donotstash = PETSC_FALSE;
1688   ierr = MatStashCreate_Private(((PetscObject)mat)->comm,1,&mat->stash);CHKERRQ(ierr);
1689 
1690   /* stuff used for matrix vector multiply */
1691   a->lvec        = 0;
1692   a->Mvctx       = 0;
1693   a->roworiented = PETSC_TRUE;
1694 
1695   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
1696                                      "MatGetDiagonalBlock_MPIDense",
1697                                      MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr);
1698   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C",
1699                                      "MatMPIDenseSetPreallocation_MPIDense",
1700                                      MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1701   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",
1702                                      "MatMatMult_MPIAIJ_MPIDense",
1703                                       MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1704   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",
1705                                      "MatMatMultSymbolic_MPIAIJ_MPIDense",
1706                                       MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1707   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",
1708                                      "MatMatMultNumeric_MPIAIJ_MPIDense",
1709                                       MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1710   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetFactor_petsc_C",
1711                                      "MatGetFactor_mpidense_petsc",
1712                                       MatGetFactor_mpidense_petsc);CHKERRQ(ierr);
1713 #if defined(PETSC_HAVE_PLAPACK)
1714   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetFactor_plapack_C",
1715                                      "MatGetFactor_mpidense_plapack",
1716                                       MatGetFactor_mpidense_plapack);CHKERRQ(ierr);
1717   ierr = PetscPLAPACKInitializePackage(((PetscObject)mat)->comm);CHKERRQ(ierr);
1718 #endif
1719   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1720 
1721   PetscFunctionReturn(0);
1722 }
1723 EXTERN_C_END
1724 
1725 /*MC
1726    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1727 
1728    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1729    and MATMPIDENSE otherwise.
1730 
1731    Options Database Keys:
1732 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1733 
1734   Level: beginner
1735 
1736 
1737 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1738 M*/
1739 
1740 EXTERN_C_BEGIN
1741 #undef __FUNCT__
1742 #define __FUNCT__ "MatCreate_Dense"
1743 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_Dense(Mat A)
1744 {
1745   PetscErrorCode ierr;
1746   PetscMPIInt    size;
1747 
1748   PetscFunctionBegin;
1749   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
1750   if (size == 1) {
1751     ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr);
1752   } else {
1753     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
1754   }
1755   PetscFunctionReturn(0);
1756 }
1757 EXTERN_C_END
1758 
1759 #undef __FUNCT__
1760 #define __FUNCT__ "MatMPIDenseSetPreallocation"
1761 /*@C
1762    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1763 
1764    Not collective
1765 
1766    Input Parameters:
1767 .  A - the matrix
1768 -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
1769    to control all matrix memory allocation.
1770 
1771    Notes:
1772    The dense format is fully compatible with standard Fortran 77
1773    storage by columns.
1774 
1775    The data input variable is intended primarily for Fortran programmers
1776    who wish to allocate their own matrix memory space.  Most users should
1777    set data=PETSC_NULL.
1778 
1779    Level: intermediate
1780 
1781 .keywords: matrix,dense, parallel
1782 
1783 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1784 @*/
1785 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data)
1786 {
1787   PetscErrorCode ierr;
1788 
1789   PetscFunctionBegin;
1790   ierr = PetscTryMethod(mat,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar *),(mat,data));CHKERRQ(ierr);
1791   PetscFunctionReturn(0);
1792 }
1793 
1794 #undef __FUNCT__
1795 #define __FUNCT__ "MatCreateMPIDense"
1796 /*@C
1797    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
1798 
1799    Collective on MPI_Comm
1800 
1801    Input Parameters:
1802 +  comm - MPI communicator
1803 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1804 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1805 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1806 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1807 -  data - optional location of matrix data.  Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1808    to control all matrix memory allocation.
1809 
1810    Output Parameter:
1811 .  A - the matrix
1812 
1813    Notes:
1814    The dense format is fully compatible with standard Fortran 77
1815    storage by columns.
1816 
1817    The data input variable is intended primarily for Fortran programmers
1818    who wish to allocate their own matrix memory space.  Most users should
1819    set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users).
1820 
1821    The user MUST specify either the local or global matrix dimensions
1822    (possibly both).
1823 
1824    Level: intermediate
1825 
1826 .keywords: matrix,dense, parallel
1827 
1828 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1829 @*/
1830 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1831 {
1832   PetscErrorCode ierr;
1833   PetscMPIInt    size;
1834 
1835   PetscFunctionBegin;
1836   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1837   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1838   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1839   if (size > 1) {
1840     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1841     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1842   } else {
1843     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1844     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1845   }
1846   PetscFunctionReturn(0);
1847 }
1848 
1849 #undef __FUNCT__
1850 #define __FUNCT__ "MatDuplicate_MPIDense"
1851 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1852 {
1853   Mat            mat;
1854   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
1855   PetscErrorCode ierr;
1856 
1857   PetscFunctionBegin;
1858   *newmat       = 0;
1859   ierr = MatCreate(((PetscObject)A)->comm,&mat);CHKERRQ(ierr);
1860   ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1861   ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1862   a                 = (Mat_MPIDense*)mat->data;
1863   ierr              = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1864 
1865   mat->factortype   = A->factortype;
1866   mat->assembled    = PETSC_TRUE;
1867   mat->preallocated = PETSC_TRUE;
1868 
1869   a->size           = oldmat->size;
1870   a->rank           = oldmat->rank;
1871   mat->insertmode   = NOT_SET_VALUES;
1872   a->nvec           = oldmat->nvec;
1873   a->donotstash     = oldmat->donotstash;
1874 
1875   ierr = PetscLayoutCopy(A->rmap,&mat->rmap);CHKERRQ(ierr);
1876   ierr = PetscLayoutCopy(A->cmap,&mat->cmap);CHKERRQ(ierr);
1877 
1878   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1879   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1880   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1881 
1882   *newmat = mat;
1883   PetscFunctionReturn(0);
1884 }
1885 
1886 #undef __FUNCT__
1887 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile"
1888 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat,PetscInt sizesset)
1889 {
1890   PetscErrorCode ierr;
1891   PetscMPIInt    rank,size;
1892   PetscInt       *rowners,i,m,nz,j;
1893   PetscScalar    *array,*vals,*vals_ptr;
1894   MPI_Status     status;
1895 
1896   PetscFunctionBegin;
1897   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1898   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1899 
1900   /* determine ownership of all rows */
1901   if (newmat->rmap->n < 0) m          = M/size + ((M % size) > rank);
1902   else m = newmat->rmap->n;
1903   ierr       = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1904   ierr       = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
1905   rowners[0] = 0;
1906   for (i=2; i<=size; i++) {
1907     rowners[i] += rowners[i-1];
1908   }
1909 
1910   if (!sizesset) {
1911     ierr = MatSetSizes(newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1912   }
1913   ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr);
1914   ierr = MatGetArray(newmat,&array);CHKERRQ(ierr);
1915 
1916   if (!rank) {
1917     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1918 
1919     /* read in my part of the matrix numerical values  */
1920     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1921 
1922     /* insert into matrix-by row (this is why cannot directly read into array */
1923     vals_ptr = vals;
1924     for (i=0; i<m; i++) {
1925       for (j=0; j<N; j++) {
1926         array[i + j*m] = *vals_ptr++;
1927       }
1928     }
1929 
1930     /* read in other processors and ship out */
1931     for (i=1; i<size; i++) {
1932       nz   = (rowners[i+1] - rowners[i])*N;
1933       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1934       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr);
1935     }
1936   } else {
1937     /* receive numeric values */
1938     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1939 
1940     /* receive message of values*/
1941     ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm,&status);CHKERRQ(ierr);
1942 
1943     /* insert into matrix-by row (this is why cannot directly read into array */
1944     vals_ptr = vals;
1945     for (i=0; i<m; i++) {
1946       for (j=0; j<N; j++) {
1947         array[i + j*m] = *vals_ptr++;
1948       }
1949     }
1950   }
1951   ierr = PetscFree(rowners);CHKERRQ(ierr);
1952   ierr = PetscFree(vals);CHKERRQ(ierr);
1953   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1954   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1955   PetscFunctionReturn(0);
1956 }
1957 
1958 #undef __FUNCT__
1959 #define __FUNCT__ "MatLoad_MPIDense"
1960 PetscErrorCode MatLoad_MPIDense(Mat newmat,PetscViewer viewer)
1961 {
1962   PetscScalar    *vals,*svals;
1963   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1964   MPI_Status     status;
1965   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz;
1966   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1967   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1968   PetscInt       i,nz,j,rstart,rend,sizesset=1,grows,gcols;
1969   int            fd;
1970   PetscErrorCode ierr;
1971 
1972   PetscFunctionBegin;
1973   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1974   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1975   if (!rank) {
1976     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1977     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1978     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1979   }
1980   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
1981 
1982   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1983   M = header[1]; N = header[2]; nz = header[3];
1984 
1985   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
1986   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
1987   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
1988 
1989   /* If global sizes are set, check if they are consistent with that given in the file */
1990   if (sizesset) {
1991     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
1992   }
1993   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
1994   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
1995 
1996   /*
1997        Handle case where matrix is stored on disk as a dense matrix
1998   */
1999   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
2000     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat,sizesset);CHKERRQ(ierr);
2001     PetscFunctionReturn(0);
2002   }
2003 
2004   /* determine ownership of all rows */
2005   if (newmat->rmap->n < 0) m          = PetscMPIIntCast(M/size + ((M % size) > rank));
2006   else m = PetscMPIIntCast(newmat->rmap->n);
2007   ierr       = PetscMalloc((size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr);
2008   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2009   rowners[0] = 0;
2010   for (i=2; i<=size; i++) {
2011     rowners[i] += rowners[i-1];
2012   }
2013   rstart = rowners[rank];
2014   rend   = rowners[rank+1];
2015 
2016   /* distribute row lengths to all processors */
2017   ierr    = PetscMalloc2(rend-rstart,PetscInt,&ourlens,rend-rstart,PetscInt,&offlens);CHKERRQ(ierr);
2018   if (!rank) {
2019     ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2020     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2021     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
2022     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
2023     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
2024     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2025   } else {
2026     ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
2027   }
2028 
2029   if (!rank) {
2030     /* calculate the number of nonzeros on each processor */
2031     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2032     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2033     for (i=0; i<size; i++) {
2034       for (j=rowners[i]; j< rowners[i+1]; j++) {
2035         procsnz[i] += rowlengths[j];
2036       }
2037     }
2038     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2039 
2040     /* determine max buffer needed and allocate it */
2041     maxnz = 0;
2042     for (i=0; i<size; i++) {
2043       maxnz = PetscMax(maxnz,procsnz[i]);
2044     }
2045     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2046 
2047     /* read in my part of the matrix column indices  */
2048     nz = procsnz[0];
2049     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2050     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2051 
2052     /* read in every one elses and ship off */
2053     for (i=1; i<size; i++) {
2054       nz   = procsnz[i];
2055       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2056       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2057     }
2058     ierr = PetscFree(cols);CHKERRQ(ierr);
2059   } else {
2060     /* determine buffer space needed for message */
2061     nz = 0;
2062     for (i=0; i<m; i++) {
2063       nz += ourlens[i];
2064     }
2065     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2066 
2067     /* receive message of column indices*/
2068     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2069     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2070     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2071   }
2072 
2073   /* loop over local rows, determining number of off diagonal entries */
2074   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2075   jj = 0;
2076   for (i=0; i<m; i++) {
2077     for (j=0; j<ourlens[i]; j++) {
2078       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
2079       jj++;
2080     }
2081   }
2082 
2083   /* create our matrix */
2084   for (i=0; i<m; i++) {
2085     ourlens[i] -= offlens[i];
2086   }
2087 
2088   if (!sizesset) {
2089     ierr = MatSetSizes(newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
2090   }
2091   ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr);
2092   for (i=0; i<m; i++) {
2093     ourlens[i] += offlens[i];
2094   }
2095 
2096   if (!rank) {
2097     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2098 
2099     /* read in my part of the matrix numerical values  */
2100     nz = procsnz[0];
2101     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2102 
2103     /* insert into matrix */
2104     jj      = rstart;
2105     smycols = mycols;
2106     svals   = vals;
2107     for (i=0; i<m; i++) {
2108       ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2109       smycols += ourlens[i];
2110       svals   += ourlens[i];
2111       jj++;
2112     }
2113 
2114     /* read in other processors and ship out */
2115     for (i=1; i<size; i++) {
2116       nz   = procsnz[i];
2117       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2118       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2119     }
2120     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2121   } else {
2122     /* receive numeric values */
2123     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2124 
2125     /* receive message of values*/
2126     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
2127     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2128     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2129 
2130     /* insert into matrix */
2131     jj      = rstart;
2132     smycols = mycols;
2133     svals   = vals;
2134     for (i=0; i<m; i++) {
2135       ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2136       smycols += ourlens[i];
2137       svals   += ourlens[i];
2138       jj++;
2139     }
2140   }
2141   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2142   ierr = PetscFree(vals);CHKERRQ(ierr);
2143   ierr = PetscFree(mycols);CHKERRQ(ierr);
2144   ierr = PetscFree(rowners);CHKERRQ(ierr);
2145 
2146   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2147   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2148   PetscFunctionReturn(0);
2149 }
2150 
2151 #undef __FUNCT__
2152 #define __FUNCT__ "MatEqual_MPIDense"
2153 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool  *flag)
2154 {
2155   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
2156   Mat            a,b;
2157   PetscBool      flg;
2158   PetscErrorCode ierr;
2159 
2160   PetscFunctionBegin;
2161   a = matA->A;
2162   b = matB->A;
2163   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
2164   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
2165   PetscFunctionReturn(0);
2166 }
2167 
2168 #if defined(PETSC_HAVE_PLAPACK)
2169 
2170 #undef __FUNCT__
2171 #define __FUNCT__ "PetscPLAPACKFinalizePackage"
2172 /*@C
2173   PetscPLAPACKFinalizePackage - This function destroys everything in the Petsc interface to PLAPACK.
2174   Level: developer
2175 
2176 .keywords: Petsc, destroy, package, PLAPACK
2177 .seealso: PetscFinalize()
2178 @*/
2179 PetscErrorCode PETSCMAT_DLLEXPORT PetscPLAPACKFinalizePackage(void)
2180 {
2181   PetscErrorCode ierr;
2182 
2183   PetscFunctionBegin;
2184   ierr = PLA_Finalize();CHKERRQ(ierr);
2185   PetscFunctionReturn(0);
2186 }
2187 
2188 #undef __FUNCT__
2189 #define __FUNCT__ "PetscPLAPACKInitializePackage"
2190 /*@C
2191   PetscPLAPACKInitializePackage - This function initializes everything in the Petsc interface to PLAPACK. It is
2192   called from MatCreate_MPIDense() the first time an MPI dense matrix is called.
2193 
2194   Input Parameter:
2195 .   comm - the communicator the matrix lives on
2196 
2197   Level: developer
2198 
2199    Notes: PLAPACK does not have a good fit with MPI communicators; all (parallel) PLAPACK objects have to live in the
2200   same communicator (because there is some global state that is initialized and used for all matrices). In addition if
2201   PLAPACK is initialized (that is the initial matrices created) are on subcommunicators of MPI_COMM_WORLD, these subcommunicators
2202   cannot overlap.
2203 
2204 .keywords: Petsc, initialize, package, PLAPACK
2205 .seealso: PetscSysInitializePackage(), PetscInitialize()
2206 @*/
2207 PetscErrorCode PETSCMAT_DLLEXPORT PetscPLAPACKInitializePackage(MPI_Comm comm)
2208 {
2209   PetscMPIInt    size;
2210   PetscErrorCode ierr;
2211 
2212   PetscFunctionBegin;
2213   if (!PLA_Initialized(PETSC_NULL)) {
2214 
2215     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2216     Plapack_nprows = 1;
2217     Plapack_npcols = size;
2218 
2219     ierr = PetscOptionsBegin(comm,PETSC_NULL,"PLAPACK Options","Mat");CHKERRQ(ierr);
2220       ierr = PetscOptionsInt("-mat_plapack_nprows","row dimension of 2D processor mesh","None",Plapack_nprows,&Plapack_nprows,PETSC_NULL);CHKERRQ(ierr);
2221       ierr = PetscOptionsInt("-mat_plapack_npcols","column dimension of 2D processor mesh","None",Plapack_npcols,&Plapack_npcols,PETSC_NULL);CHKERRQ(ierr);
2222 #if defined(PETSC_USE_DEBUG)
2223       Plapack_ierror = 3;
2224 #else
2225       Plapack_ierror = 0;
2226 #endif
2227       ierr = PetscOptionsInt("-mat_plapack_ckerror","error checking flag","None",Plapack_ierror,&Plapack_ierror,PETSC_NULL);CHKERRQ(ierr);
2228       if (Plapack_ierror){
2229 	ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_TRUE,PETSC_TRUE,PETSC_FALSE );CHKERRQ(ierr);
2230       } else {
2231 	ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_FALSE,PETSC_FALSE,PETSC_FALSE );CHKERRQ(ierr);
2232       }
2233 
2234       Plapack_nb_alg = 0;
2235       ierr = PetscOptionsInt("-mat_plapack_nb_alg","algorithmic block size","None",Plapack_nb_alg,&Plapack_nb_alg,PETSC_NULL);CHKERRQ(ierr);
2236       if (Plapack_nb_alg) {
2237         ierr = pla_Environ_set_nb_alg (PLA_OP_ALL_ALG,Plapack_nb_alg);CHKERRQ(ierr);
2238       }
2239     PetscOptionsEnd();
2240 
2241     ierr = PLA_Comm_1D_to_2D(comm,Plapack_nprows,Plapack_npcols,&Plapack_comm_2d);CHKERRQ(ierr);
2242     ierr = PLA_Init(Plapack_comm_2d);CHKERRQ(ierr);
2243     ierr = PetscRegisterFinalize(PetscPLAPACKFinalizePackage);CHKERRQ(ierr);
2244   }
2245   PetscFunctionReturn(0);
2246 }
2247 
2248 #endif
2249