xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision ebdbe8816e0a29f4c004cccc3e6a5c7a4e425788)
1 #define PETSCMAT_DLL
2 
3 /*
4    Basic functions for basic parallel dense matrices.
5 */
6 
7 
8 #include "../src/mat/impls/dense/mpi/mpidense.h"    /*I   "petscmat.h"  I*/
9 #if defined(PETSC_HAVE_PLAPACK)
10 static PetscMPIInt Plapack_nprows,Plapack_npcols,Plapack_ierror,Plapack_nb_alg;
11 static MPI_Comm Plapack_comm_2d;
12 EXTERN_C_BEGIN
13 #include "PLA.h"
14 EXTERN_C_END
15 
16 typedef struct {
17   PLA_Obj        A,pivots;
18   PLA_Template   templ;
19   MPI_Datatype   datatype;
20   PetscInt       nb,rstart;
21   VecScatter     ctx;
22   IS             is_pla,is_petsc;
23   PetscBool      pla_solved;
24   MatStructure   mstruct;
25 } Mat_Plapack;
26 #endif
27 
28 #undef __FUNCT__
29 #define __FUNCT__ "MatDenseGetLocalMatrix"
30 /*@
31 
32       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
33               matrix that represents the operator. For sequential matrices it returns itself.
34 
35     Input Parameter:
36 .      A - the Seq or MPI dense matrix
37 
38     Output Parameter:
39 .      B - the inner matrix
40 
41     Level: intermediate
42 
43 @*/
44 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
45 {
46   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
47   PetscErrorCode ierr;
48   PetscBool      flg;
49 
50   PetscFunctionBegin;
51   ierr = PetscTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
52   if (flg) {
53     *B = mat->A;
54   } else {
55     *B = A;
56   }
57   PetscFunctionReturn(0);
58 }
59 
60 #undef __FUNCT__
61 #define __FUNCT__ "MatGetRow_MPIDense"
62 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
63 {
64   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
65   PetscErrorCode ierr;
66   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
67 
68   PetscFunctionBegin;
69   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
70   lrow = row - rstart;
71   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);CHKERRQ(ierr);
72   PetscFunctionReturn(0);
73 }
74 
75 #undef __FUNCT__
76 #define __FUNCT__ "MatRestoreRow_MPIDense"
77 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
78 {
79   PetscErrorCode ierr;
80 
81   PetscFunctionBegin;
82   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
83   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
84   PetscFunctionReturn(0);
85 }
86 
87 EXTERN_C_BEGIN
88 #undef __FUNCT__
89 #define __FUNCT__ "MatGetDiagonalBlock_MPIDense"
90 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIDense(Mat A,PetscBool  *iscopy,MatReuse reuse,Mat *B)
91 {
92   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
93   PetscErrorCode ierr;
94   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
95   PetscScalar    *array;
96   MPI_Comm       comm;
97 
98   PetscFunctionBegin;
99   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
100 
101   /* The reuse aspect is not implemented efficiently */
102   if (reuse) { ierr = MatDestroy(*B);CHKERRQ(ierr);}
103 
104   ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
105   ierr = MatGetArray(mdn->A,&array);CHKERRQ(ierr);
106   ierr = MatCreate(comm,B);CHKERRQ(ierr);
107   ierr = MatSetSizes(*B,m,m,m,m);CHKERRQ(ierr);
108   ierr = MatSetType(*B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
109   ierr = MatSeqDenseSetPreallocation(*B,array+m*rstart);CHKERRQ(ierr);
110   ierr = MatRestoreArray(mdn->A,&array);CHKERRQ(ierr);
111   ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
112   ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
113 
114   *iscopy = PETSC_TRUE;
115   PetscFunctionReturn(0);
116 }
117 EXTERN_C_END
118 
119 #undef __FUNCT__
120 #define __FUNCT__ "MatSetValues_MPIDense"
121 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
122 {
123   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
124   PetscErrorCode ierr;
125   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
126   PetscBool      roworiented = A->roworiented;
127 
128   PetscFunctionBegin;
129   if (v) PetscValidScalarPointer(v,6);
130   for (i=0; i<m; i++) {
131     if (idxm[i] < 0) continue;
132     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
133     if (idxm[i] >= rstart && idxm[i] < rend) {
134       row = idxm[i] - rstart;
135       if (roworiented) {
136         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
137       } else {
138         for (j=0; j<n; j++) {
139           if (idxn[j] < 0) continue;
140           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
141           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
142         }
143       }
144     } else {
145       if (!A->donotstash) {
146         if (roworiented) {
147           ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
148         } else {
149           ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
150         }
151       }
152     }
153   }
154   PetscFunctionReturn(0);
155 }
156 
157 #undef __FUNCT__
158 #define __FUNCT__ "MatGetValues_MPIDense"
159 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
160 {
161   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
162   PetscErrorCode ierr;
163   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
164 
165   PetscFunctionBegin;
166   for (i=0; i<m; i++) {
167     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
168     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
169     if (idxm[i] >= rstart && idxm[i] < rend) {
170       row = idxm[i] - rstart;
171       for (j=0; j<n; j++) {
172         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
173         if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
174         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
175       }
176     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
177   }
178   PetscFunctionReturn(0);
179 }
180 
181 #undef __FUNCT__
182 #define __FUNCT__ "MatGetArray_MPIDense"
183 PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[])
184 {
185   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
186   PetscErrorCode ierr;
187 
188   PetscFunctionBegin;
189   ierr = MatGetArray(a->A,array);CHKERRQ(ierr);
190   PetscFunctionReturn(0);
191 }
192 
193 #undef __FUNCT__
194 #define __FUNCT__ "MatGetSubMatrix_MPIDense"
195 static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
196 {
197   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data,*newmatd;
198   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
199   PetscErrorCode ierr;
200   PetscInt       i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
201   const PetscInt *irow,*icol;
202   PetscScalar    *av,*bv,*v = lmat->v;
203   Mat            newmat;
204   IS             iscol_local;
205 
206   PetscFunctionBegin;
207   ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
208   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
209   ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr);
210   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
211   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
212   ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */
213 
214   /* No parallel redistribution currently supported! Should really check each index set
215      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
216      original matrix! */
217 
218   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
219   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
220 
221   /* Check submatrix call */
222   if (scall == MAT_REUSE_MATRIX) {
223     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
224     /* Really need to test rows and column sizes! */
225     newmat = *B;
226   } else {
227     /* Create and fill new matrix */
228     ierr = MatCreate(((PetscObject)A)->comm,&newmat);CHKERRQ(ierr);
229     ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
230     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
231     ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr);
232   }
233 
234   /* Now extract the data pointers and do the copy, column at a time */
235   newmatd = (Mat_MPIDense*)newmat->data;
236   bv      = ((Mat_SeqDense *)newmatd->A->data)->v;
237 
238   for (i=0; i<Ncols; i++) {
239     av = v + ((Mat_SeqDense *)mat->A->data)->lda*icol[i];
240     for (j=0; j<nrows; j++) {
241       *bv++ = av[irow[j] - rstart];
242     }
243   }
244 
245   /* Assemble the matrices so that the correct flags are set */
246   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
247   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
248 
249   /* Free work space */
250   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
251   ierr = ISRestoreIndices(iscol,&icol);CHKERRQ(ierr);
252   *B = newmat;
253   PetscFunctionReturn(0);
254 }
255 
256 #undef __FUNCT__
257 #define __FUNCT__ "MatRestoreArray_MPIDense"
258 PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[])
259 {
260   PetscFunctionBegin;
261   PetscFunctionReturn(0);
262 }
263 
264 #undef __FUNCT__
265 #define __FUNCT__ "MatAssemblyBegin_MPIDense"
266 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
267 {
268   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
269   MPI_Comm       comm = ((PetscObject)mat)->comm;
270   PetscErrorCode ierr;
271   PetscInt       nstash,reallocs;
272   InsertMode     addv;
273 
274   PetscFunctionBegin;
275   /* make sure all processors are either in INSERTMODE or ADDMODE */
276   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
277   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
278   mat->insertmode = addv; /* in case this processor had no cache */
279 
280   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
281   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
282   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
283   PetscFunctionReturn(0);
284 }
285 
286 #undef __FUNCT__
287 #define __FUNCT__ "MatAssemblyEnd_MPIDense"
288 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
289 {
290   Mat_MPIDense    *mdn=(Mat_MPIDense*)mat->data;
291   PetscErrorCode  ierr;
292   PetscInt        i,*row,*col,flg,j,rstart,ncols;
293   PetscMPIInt     n;
294   PetscScalar     *val;
295   InsertMode      addv=mat->insertmode;
296 
297   PetscFunctionBegin;
298   /*  wait on receives */
299   while (1) {
300     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
301     if (!flg) break;
302 
303     for (i=0; i<n;) {
304       /* Now identify the consecutive vals belonging to the same row */
305       for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
306       if (j < n) ncols = j-i;
307       else       ncols = n-i;
308       /* Now assemble all these values with a single function call */
309       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
310       i = j;
311     }
312   }
313   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
314 
315   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
316   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
317 
318   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
319     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
320   }
321   PetscFunctionReturn(0);
322 }
323 
324 #undef __FUNCT__
325 #define __FUNCT__ "MatZeroEntries_MPIDense"
326 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
327 {
328   PetscErrorCode ierr;
329   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
330 
331   PetscFunctionBegin;
332   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
333   PetscFunctionReturn(0);
334 }
335 
336 /* the code does not do the diagonal entries correctly unless the
337    matrix is square and the column and row owerships are identical.
338    This is a BUG. The only way to fix it seems to be to access
339    mdn->A and mdn->B directly and not through the MatZeroRows()
340    routine.
341 */
342 #undef __FUNCT__
343 #define __FUNCT__ "MatZeroRows_MPIDense"
344 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
345 {
346   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
347   PetscErrorCode    ierr;
348   PetscInt          i,*owners = A->rmap->range;
349   PetscInt          *nprocs,j,idx,nsends;
350   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
351   PetscInt          *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source;
352   PetscInt          *lens,*lrows,*values;
353   PetscMPIInt       n,imdex,rank = l->rank,size = l->size;
354   MPI_Comm          comm = ((PetscObject)A)->comm;
355   MPI_Request       *send_waits,*recv_waits;
356   MPI_Status        recv_status,*send_status;
357   PetscBool         found;
358   const PetscScalar *xx;
359   PetscScalar       *bb;
360 
361   PetscFunctionBegin;
362   /*  first count number of contributors to each processor */
363   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
364   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
365   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
366   for (i=0; i<N; i++) {
367     idx = rows[i];
368     found = PETSC_FALSE;
369     for (j=0; j<size; j++) {
370       if (idx >= owners[j] && idx < owners[j+1]) {
371         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
372       }
373     }
374     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
375   }
376   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
377 
378   /* inform other processors of number of messages and max length*/
379   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
380 
381   /* post receives:   */
382   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
383   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
384   for (i=0; i<nrecvs; i++) {
385     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
386   }
387 
388   /* do sends:
389       1) starts[i] gives the starting index in svalues for stuff going to
390          the ith processor
391   */
392   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
393   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
394   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
395   starts[0]  = 0;
396   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
397   for (i=0; i<N; i++) {
398     svalues[starts[owner[i]]++] = rows[i];
399   }
400 
401   starts[0] = 0;
402   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
403   count = 0;
404   for (i=0; i<size; i++) {
405     if (nprocs[2*i+1]) {
406       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
407     }
408   }
409   ierr = PetscFree(starts);CHKERRQ(ierr);
410 
411   base = owners[rank];
412 
413   /*  wait on receives */
414   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
415   count  = nrecvs;
416   slen   = 0;
417   while (count) {
418     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
419     /* unpack receives into our local space */
420     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
421     source[imdex]  = recv_status.MPI_SOURCE;
422     lens[imdex]    = n;
423     slen += n;
424     count--;
425   }
426   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
427 
428   /* move the data into the send scatter */
429   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
430   count = 0;
431   for (i=0; i<nrecvs; i++) {
432     values = rvalues + i*nmax;
433     for (j=0; j<lens[i]; j++) {
434       lrows[count++] = values[j] - base;
435     }
436   }
437   ierr = PetscFree(rvalues);CHKERRQ(ierr);
438   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
439   ierr = PetscFree(owner);CHKERRQ(ierr);
440   ierr = PetscFree(nprocs);CHKERRQ(ierr);
441 
442   /* fix right hand side if needed */
443   if (x && b) {
444     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
445     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
446     for (i=0; i<slen; i++) {
447       bb[lrows[i]] = diag*xx[lrows[i]];
448     }
449     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
450     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
451   }
452 
453   /* actually zap the local rows */
454   ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
455   ierr = PetscFree(lrows);CHKERRQ(ierr);
456 
457   /* wait on sends */
458   if (nsends) {
459     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
460     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
461     ierr = PetscFree(send_status);CHKERRQ(ierr);
462   }
463   ierr = PetscFree(send_waits);CHKERRQ(ierr);
464   ierr = PetscFree(svalues);CHKERRQ(ierr);
465 
466   PetscFunctionReturn(0);
467 }
468 
469 #undef __FUNCT__
470 #define __FUNCT__ "MatMult_MPIDense"
471 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
472 {
473   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
474   PetscErrorCode ierr;
475 
476   PetscFunctionBegin;
477   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
478   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
479   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
480   PetscFunctionReturn(0);
481 }
482 
483 #undef __FUNCT__
484 #define __FUNCT__ "MatMultAdd_MPIDense"
485 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
486 {
487   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
488   PetscErrorCode ierr;
489 
490   PetscFunctionBegin;
491   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
492   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
493   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
494   PetscFunctionReturn(0);
495 }
496 
497 #undef __FUNCT__
498 #define __FUNCT__ "MatMultTranspose_MPIDense"
499 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
500 {
501   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
502   PetscErrorCode ierr;
503   PetscScalar    zero = 0.0;
504 
505   PetscFunctionBegin;
506   ierr = VecSet(yy,zero);CHKERRQ(ierr);
507   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
508   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
509   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
510   PetscFunctionReturn(0);
511 }
512 
513 #undef __FUNCT__
514 #define __FUNCT__ "MatMultTransposeAdd_MPIDense"
515 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
516 {
517   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
518   PetscErrorCode ierr;
519 
520   PetscFunctionBegin;
521   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
522   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
523   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
524   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
525   PetscFunctionReturn(0);
526 }
527 
528 #undef __FUNCT__
529 #define __FUNCT__ "MatGetDiagonal_MPIDense"
530 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
531 {
532   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
533   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
534   PetscErrorCode ierr;
535   PetscInt       len,i,n,m = A->rmap->n,radd;
536   PetscScalar    *x,zero = 0.0;
537 
538   PetscFunctionBegin;
539   ierr = VecSet(v,zero);CHKERRQ(ierr);
540   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
541   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
542   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
543   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
544   radd = A->rmap->rstart*m;
545   for (i=0; i<len; i++) {
546     x[i] = aloc->v[radd + i*m + i];
547   }
548   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
549   PetscFunctionReturn(0);
550 }
551 
552 #undef __FUNCT__
553 #define __FUNCT__ "MatDestroy_MPIDense"
554 PetscErrorCode MatDestroy_MPIDense(Mat mat)
555 {
556   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
557   PetscErrorCode ierr;
558 #if defined(PETSC_HAVE_PLAPACK)
559   Mat_Plapack   *lu=(Mat_Plapack*)(mat->spptr);
560 #endif
561 
562   PetscFunctionBegin;
563 
564 #if defined(PETSC_USE_LOG)
565   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
566 #endif
567   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
568   ierr = MatDestroy(mdn->A);CHKERRQ(ierr);
569   if (mdn->lvec)   {ierr = VecDestroy(mdn->lvec);CHKERRQ(ierr);}
570   if (mdn->Mvctx)  {ierr = VecScatterDestroy(mdn->Mvctx);CHKERRQ(ierr);}
571 #if defined(PETSC_HAVE_PLAPACK)
572   if (lu) {
573     ierr = PLA_Obj_free(&lu->A);CHKERRQ(ierr);
574     ierr = PLA_Obj_free (&lu->pivots);CHKERRQ(ierr);
575     ierr = PLA_Temp_free(&lu->templ);CHKERRQ(ierr);
576 
577     if (lu->is_pla) {
578       ierr = ISDestroy(lu->is_pla);CHKERRQ(ierr);
579       ierr = ISDestroy(lu->is_petsc);CHKERRQ(ierr);
580       ierr = VecScatterDestroy(lu->ctx);CHKERRQ(ierr);
581     }
582   }
583 #endif
584 
585   ierr = PetscFree(mdn);CHKERRQ(ierr);
586   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
587   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
588   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
589   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
590   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
591   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
592   PetscFunctionReturn(0);
593 }
594 
595 #undef __FUNCT__
596 #define __FUNCT__ "MatView_MPIDense_Binary"
597 static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
598 {
599   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
600   PetscErrorCode    ierr;
601   PetscViewerFormat format;
602   int               fd;
603   PetscInt          header[4],mmax,N = mat->cmap->N,i,j,m,k;
604   PetscMPIInt       rank,tag  = ((PetscObject)viewer)->tag,size;
605   PetscScalar       *work,*v,*vv;
606   Mat_SeqDense      *a = (Mat_SeqDense*)mdn->A->data;
607   MPI_Status        status;
608 
609   PetscFunctionBegin;
610   if (mdn->size == 1) {
611     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
612   } else {
613     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
614     ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
615     ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
616 
617     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
618     if (format == PETSC_VIEWER_NATIVE) {
619 
620       if (!rank) {
621         /* store the matrix as a dense matrix */
622         header[0] = MAT_FILE_CLASSID;
623         header[1] = mat->rmap->N;
624         header[2] = N;
625         header[3] = MATRIX_BINARY_FORMAT_DENSE;
626         ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
627 
628         /* get largest work array needed for transposing array */
629         mmax = mat->rmap->n;
630         for (i=1; i<size; i++) {
631           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
632         }
633         ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&work);CHKERRQ(ierr);
634 
635         /* write out local array, by rows */
636         m    = mat->rmap->n;
637         v    = a->v;
638         for (j=0; j<N; j++) {
639           for (i=0; i<m; i++) {
640             work[j + i*N] = *v++;
641           }
642         }
643         ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
644         /* get largest work array to receive messages from other processes, excludes process zero */
645         mmax = 0;
646         for (i=1; i<size; i++) {
647           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
648         }
649         ierr = PetscMalloc(mmax*N*sizeof(PetscScalar),&vv);CHKERRQ(ierr);
650         for(k = 1; k < size; k++) {
651           v    = vv;
652           m    = mat->rmap->range[k+1] - mat->rmap->range[k];
653           ierr = MPI_Recv(v,m*N,MPIU_SCALAR,k,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
654 
655           for(j = 0; j < N; j++) {
656             for(i = 0; i < m; i++) {
657               work[j + i*N] = *v++;
658             }
659           }
660           ierr = PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
661         }
662         ierr = PetscFree(work);CHKERRQ(ierr);
663         ierr = PetscFree(vv);CHKERRQ(ierr);
664       } else {
665         ierr = MPI_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
666       }
667     } else {
668       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerSetFormat(viewer,PETSC_VIEWER_NATIVE)");
669     }
670   }
671   PetscFunctionReturn(0);
672 }
673 
674 #undef __FUNCT__
675 #define __FUNCT__ "MatView_MPIDense_ASCIIorDraworSocket"
676 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
677 {
678   Mat_MPIDense          *mdn = (Mat_MPIDense*)mat->data;
679   PetscErrorCode        ierr;
680   PetscMPIInt           size = mdn->size,rank = mdn->rank;
681   const PetscViewerType vtype;
682   PetscBool             iascii,isdraw;
683   PetscViewer           sviewer;
684   PetscViewerFormat     format;
685 #if defined(PETSC_HAVE_PLAPACK)
686   Mat_Plapack           *lu;
687 #endif
688 
689   PetscFunctionBegin;
690   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
691   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
692   if (iascii) {
693     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
694     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
695     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
696       MatInfo info;
697       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
698       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,
699                    (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
700       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
701 #if defined(PETSC_HAVE_PLAPACK)
702       ierr = PetscViewerASCIIPrintf(viewer,"PLAPACK run parameters:\n");CHKERRQ(ierr);
703       ierr = PetscViewerASCIIPrintf(viewer,"  Processor mesh: nprows %d, npcols %d\n",Plapack_nprows, Plapack_npcols);CHKERRQ(ierr);
704       ierr = PetscViewerASCIIPrintf(viewer,"  Error checking: %d\n",Plapack_ierror);CHKERRQ(ierr);
705       ierr = PetscViewerASCIIPrintf(viewer,"  Algorithmic block size: %d\n",Plapack_nb_alg);CHKERRQ(ierr);
706       if (mat->factortype){
707         lu=(Mat_Plapack*)(mat->spptr);
708         ierr = PetscViewerASCIIPrintf(viewer,"  Distr. block size nb: %d \n",lu->nb);CHKERRQ(ierr);
709       }
710 #else
711       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
712 #endif
713       PetscFunctionReturn(0);
714     } else if (format == PETSC_VIEWER_ASCII_INFO) {
715       PetscFunctionReturn(0);
716     }
717   } else if (isdraw) {
718     PetscDraw  draw;
719     PetscBool  isnull;
720 
721     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
722     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
723     if (isnull) PetscFunctionReturn(0);
724   }
725 
726   if (size == 1) {
727     ierr = MatView(mdn->A,viewer);CHKERRQ(ierr);
728   } else {
729     /* assemble the entire matrix onto first processor. */
730     Mat         A;
731     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
732     PetscInt    *cols;
733     PetscScalar *vals;
734 
735     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
736     if (!rank) {
737       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
738     } else {
739       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
740     }
741     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
742     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
743     ierr = MatMPIDenseSetPreallocation(A,PETSC_NULL);
744     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
745 
746     /* Copy the matrix ... This isn't the most efficient means,
747        but it's quick for now */
748     A->insertmode = INSERT_VALUES;
749     row = mat->rmap->rstart; m = mdn->A->rmap->n;
750     for (i=0; i<m; i++) {
751       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
752       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
753       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
754       row++;
755     }
756 
757     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
758     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
759     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
760     if (!rank) {
761       ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
762       /* Set the type name to MATMPIDense so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqDense_ASCII()*/
763       PetscStrcpy(((PetscObject)((Mat_MPIDense*)(A->data))->A)->type_name,MATMPIDENSE);
764       ierr = MatView(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
765     }
766     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
767     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
768     ierr = MatDestroy(A);CHKERRQ(ierr);
769   }
770   PetscFunctionReturn(0);
771 }
772 
773 #undef __FUNCT__
774 #define __FUNCT__ "MatView_MPIDense"
775 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
776 {
777   PetscErrorCode ierr;
778   PetscBool      iascii,isbinary,isdraw,issocket;
779 
780   PetscFunctionBegin;
781 
782   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
783   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
784   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
785   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
786 
787   if (iascii || issocket || isdraw) {
788     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
789   } else if (isbinary) {
790     ierr = MatView_MPIDense_Binary(mat,viewer);CHKERRQ(ierr);
791   } else {
792     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name);
793   }
794   PetscFunctionReturn(0);
795 }
796 
797 #undef __FUNCT__
798 #define __FUNCT__ "MatGetInfo_MPIDense"
799 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
800 {
801   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
802   Mat            mdn = mat->A;
803   PetscErrorCode ierr;
804   PetscReal      isend[5],irecv[5];
805 
806   PetscFunctionBegin;
807   info->block_size     = 1.0;
808   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
809   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
810   isend[3] = info->memory;  isend[4] = info->mallocs;
811   if (flag == MAT_LOCAL) {
812     info->nz_used      = isend[0];
813     info->nz_allocated = isend[1];
814     info->nz_unneeded  = isend[2];
815     info->memory       = isend[3];
816     info->mallocs      = isend[4];
817   } else if (flag == MAT_GLOBAL_MAX) {
818     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr);
819     info->nz_used      = irecv[0];
820     info->nz_allocated = irecv[1];
821     info->nz_unneeded  = irecv[2];
822     info->memory       = irecv[3];
823     info->mallocs      = irecv[4];
824   } else if (flag == MAT_GLOBAL_SUM) {
825     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
826     info->nz_used      = irecv[0];
827     info->nz_allocated = irecv[1];
828     info->nz_unneeded  = irecv[2];
829     info->memory       = irecv[3];
830     info->mallocs      = irecv[4];
831   }
832   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
833   info->fill_ratio_needed = 0;
834   info->factor_mallocs    = 0;
835   PetscFunctionReturn(0);
836 }
837 
838 #undef __FUNCT__
839 #define __FUNCT__ "MatSetOption_MPIDense"
840 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool  flg)
841 {
842   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
843   PetscErrorCode ierr;
844 
845   PetscFunctionBegin;
846   switch (op) {
847   case MAT_NEW_NONZERO_LOCATIONS:
848   case MAT_NEW_NONZERO_LOCATION_ERR:
849   case MAT_NEW_NONZERO_ALLOCATION_ERR:
850     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
851     break;
852   case MAT_ROW_ORIENTED:
853     a->roworiented = flg;
854     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
855     break;
856   case MAT_NEW_DIAGONALS:
857   case MAT_USE_HASH_TABLE:
858     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
859     break;
860   case MAT_IGNORE_OFF_PROC_ENTRIES:
861     a->donotstash = flg;
862     break;
863   case MAT_SYMMETRIC:
864   case MAT_STRUCTURALLY_SYMMETRIC:
865   case MAT_HERMITIAN:
866   case MAT_SYMMETRY_ETERNAL:
867   case MAT_IGNORE_LOWER_TRIANGULAR:
868     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
869     break;
870   default:
871     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
872   }
873   PetscFunctionReturn(0);
874 }
875 
876 
877 #undef __FUNCT__
878 #define __FUNCT__ "MatDiagonalScale_MPIDense"
879 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
880 {
881   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
882   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
883   PetscScalar    *l,*r,x,*v;
884   PetscErrorCode ierr;
885   PetscInt       i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n;
886 
887   PetscFunctionBegin;
888   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
889   if (ll) {
890     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
891     if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
892     ierr = VecGetArray(ll,&l);CHKERRQ(ierr);
893     for (i=0; i<m; i++) {
894       x = l[i];
895       v = mat->v + i;
896       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
897     }
898     ierr = VecRestoreArray(ll,&l);CHKERRQ(ierr);
899     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
900   }
901   if (rr) {
902     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
903     if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
904     ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
905     ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
906     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
907     for (i=0; i<n; i++) {
908       x = r[i];
909       v = mat->v + i*m;
910       for (j=0; j<m; j++) { (*v++) *= x;}
911     }
912     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
913     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
914   }
915   PetscFunctionReturn(0);
916 }
917 
918 #undef __FUNCT__
919 #define __FUNCT__ "MatNorm_MPIDense"
920 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
921 {
922   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
923   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
924   PetscErrorCode ierr;
925   PetscInt       i,j;
926   PetscReal      sum = 0.0;
927   PetscScalar    *v = mat->v;
928 
929   PetscFunctionBegin;
930   if (mdn->size == 1) {
931     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
932   } else {
933     if (type == NORM_FROBENIUS) {
934       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
935 #if defined(PETSC_USE_COMPLEX)
936         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
937 #else
938         sum += (*v)*(*v); v++;
939 #endif
940       }
941       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
942       *nrm = sqrt(*nrm);
943       ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr);
944     } else if (type == NORM_1) {
945       PetscReal *tmp,*tmp2;
946       ierr = PetscMalloc2(A->cmap->N,PetscReal,&tmp,A->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr);
947       ierr = PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
948       ierr = PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
949       *nrm = 0.0;
950       v = mat->v;
951       for (j=0; j<mdn->A->cmap->n; j++) {
952         for (i=0; i<mdn->A->rmap->n; i++) {
953           tmp[j] += PetscAbsScalar(*v);  v++;
954         }
955       }
956       ierr = MPI_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);CHKERRQ(ierr);
957       for (j=0; j<A->cmap->N; j++) {
958         if (tmp2[j] > *nrm) *nrm = tmp2[j];
959       }
960       ierr = PetscFree2(tmp,tmp);CHKERRQ(ierr);
961       ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr);
962     } else if (type == NORM_INFINITY) { /* max row norm */
963       PetscReal ntemp;
964       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
965       ierr = MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);CHKERRQ(ierr);
966     } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"No support for two norm");
967   }
968   PetscFunctionReturn(0);
969 }
970 
971 #undef __FUNCT__
972 #define __FUNCT__ "MatTranspose_MPIDense"
973 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
974 {
975   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
976   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
977   Mat            B;
978   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
979   PetscErrorCode ierr;
980   PetscInt       j,i;
981   PetscScalar    *v;
982 
983   PetscFunctionBegin;
984   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports square matrix only in-place");
985   if (reuse == MAT_INITIAL_MATRIX || A == *matout) {
986     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
987     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
988     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
989     ierr = MatMPIDenseSetPreallocation(B,PETSC_NULL);CHKERRQ(ierr);
990   } else {
991     B = *matout;
992   }
993 
994   m = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v;
995   ierr = PetscMalloc(m*sizeof(PetscInt),&rwork);CHKERRQ(ierr);
996   for (i=0; i<m; i++) rwork[i] = rstart + i;
997   for (j=0; j<n; j++) {
998     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
999     v   += m;
1000   }
1001   ierr = PetscFree(rwork);CHKERRQ(ierr);
1002   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1003   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1004   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1005     *matout = B;
1006   } else {
1007     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
1008   }
1009   PetscFunctionReturn(0);
1010 }
1011 
1012 
1013 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
1014 extern PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
1015 
1016 #undef __FUNCT__
1017 #define __FUNCT__ "MatSetUpPreallocation_MPIDense"
1018 PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A)
1019 {
1020   PetscErrorCode ierr;
1021 
1022   PetscFunctionBegin;
1023   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
1024   PetscFunctionReturn(0);
1025 }
1026 
1027 #if defined(PETSC_HAVE_PLAPACK)
1028 
1029 #undef __FUNCT__
1030 #define __FUNCT__ "MatMPIDenseCopyToPlapack"
1031 PetscErrorCode MatMPIDenseCopyToPlapack(Mat A,Mat F)
1032 {
1033   Mat_Plapack    *lu = (Mat_Plapack*)(F)->spptr;
1034   PetscErrorCode ierr;
1035   PetscInt       M=A->cmap->N,m=A->rmap->n,rstart;
1036   PetscScalar    *array;
1037   PetscReal      one = 1.0;
1038 
1039   PetscFunctionBegin;
1040   /* Copy A into F->lu->A */
1041   ierr = PLA_Obj_set_to_zero(lu->A);CHKERRQ(ierr);
1042   ierr = PLA_API_begin();CHKERRQ(ierr);
1043   ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr);
1044   ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr);
1045   ierr = MatGetArray(A,&array);CHKERRQ(ierr);
1046   ierr = PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);CHKERRQ(ierr);
1047   ierr = MatRestoreArray(A,&array);CHKERRQ(ierr);
1048   ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr);
1049   ierr = PLA_API_end();CHKERRQ(ierr);
1050   lu->rstart = rstart;
1051   PetscFunctionReturn(0);
1052 }
1053 
1054 #undef __FUNCT__
1055 #define __FUNCT__ "MatMPIDenseCopyFromPlapack"
1056 PetscErrorCode MatMPIDenseCopyFromPlapack(Mat F,Mat A)
1057 {
1058   Mat_Plapack    *lu = (Mat_Plapack*)(F)->spptr;
1059   PetscErrorCode ierr;
1060   PetscInt       M=A->cmap->N,m=A->rmap->n,rstart;
1061   PetscScalar    *array;
1062   PetscReal      one = 1.0;
1063 
1064   PetscFunctionBegin;
1065   /* Copy F into A->lu->A */
1066   ierr = MatZeroEntries(A);CHKERRQ(ierr);
1067   ierr = PLA_API_begin();CHKERRQ(ierr);
1068   ierr = PLA_Obj_API_open(lu->A);CHKERRQ(ierr);
1069   ierr = MatGetOwnershipRange(A,&rstart,PETSC_NULL);CHKERRQ(ierr);
1070   ierr = MatGetArray(A,&array);CHKERRQ(ierr);
1071   ierr = PLA_API_axpy_global_to_matrix(m,M, &one,lu->A,rstart,0,(void *)array,m);CHKERRQ(ierr);
1072   ierr = MatRestoreArray(A,&array);CHKERRQ(ierr);
1073   ierr = PLA_Obj_API_close(lu->A);CHKERRQ(ierr);
1074   ierr = PLA_API_end();CHKERRQ(ierr);
1075   lu->rstart = rstart;
1076   PetscFunctionReturn(0);
1077 }
1078 
1079 #undef __FUNCT__
1080 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIDense"
1081 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1082 {
1083   PetscErrorCode ierr;
1084   Mat_Plapack    *luA = (Mat_Plapack*)A->spptr;
1085   Mat_Plapack    *luB = (Mat_Plapack*)B->spptr;
1086   Mat_Plapack    *luC = (Mat_Plapack*)C->spptr;
1087   PLA_Obj        alpha = NULL,beta = NULL;
1088 
1089   PetscFunctionBegin;
1090   ierr = MatMPIDenseCopyToPlapack(A,A);CHKERRQ(ierr);
1091   ierr = MatMPIDenseCopyToPlapack(B,B);CHKERRQ(ierr);
1092 
1093   /*
1094   ierr = PLA_Global_show("A = ",luA->A,"%g ","");CHKERRQ(ierr);
1095   ierr = PLA_Global_show("B = ",luB->A,"%g ","");CHKERRQ(ierr);
1096   */
1097 
1098   /* do the multiply in PLA  */
1099   ierr = PLA_Create_constants_conf_to(luA->A,NULL,NULL,&alpha);CHKERRQ(ierr);
1100   ierr = PLA_Create_constants_conf_to(luC->A,NULL,&beta,NULL);CHKERRQ(ierr);
1101   CHKMEMQ;
1102 
1103   ierr = PLA_Gemm(PLA_NO_TRANSPOSE,PLA_NO_TRANSPOSE,alpha,luA->A,luB->A,beta,luC->A); /* CHKERRQ(ierr); */
1104   CHKMEMQ;
1105   ierr = PLA_Obj_free(&alpha);CHKERRQ(ierr);
1106   ierr = PLA_Obj_free(&beta);CHKERRQ(ierr);
1107 
1108   /*
1109   ierr = PLA_Global_show("C = ",luC->A,"%g ","");CHKERRQ(ierr);
1110   */
1111   ierr = MatMPIDenseCopyFromPlapack(C,C);CHKERRQ(ierr);
1112   PetscFunctionReturn(0);
1113 }
1114 
1115 #undef __FUNCT__
1116 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIDense"
1117 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1118 {
1119   PetscErrorCode ierr;
1120   PetscInt       m=A->rmap->n,n=B->cmap->n;
1121   Mat            Cmat;
1122 
1123   PetscFunctionBegin;
1124   if (A->cmap->n != B->rmap->n) SETERRQ2(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
1125   SETERRQ(((PetscObject)A)->comm,PETSC_ERR_LIB,"Due to apparent bugs in PLAPACK,this is not currently supported");
1126   ierr = MatCreate(((PetscObject)B)->comm,&Cmat);CHKERRQ(ierr);
1127   ierr = MatSetSizes(Cmat,m,n,A->rmap->N,B->cmap->N);CHKERRQ(ierr);
1128   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
1129   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1130   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1131 
1132   *C = Cmat;
1133   PetscFunctionReturn(0);
1134 }
1135 
1136 #undef __FUNCT__
1137 #define __FUNCT__ "MatMatMult_MPIDense_MPIDense"
1138 PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
1139 {
1140   PetscErrorCode ierr;
1141 
1142   PetscFunctionBegin;
1143   if (scall == MAT_INITIAL_MATRIX){
1144     ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr);
1145   }
1146   ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr);
1147   PetscFunctionReturn(0);
1148 }
1149 
1150 #undef __FUNCT__
1151 #define __FUNCT__ "MatSolve_MPIDense"
1152 PetscErrorCode MatSolve_MPIDense(Mat A,Vec b,Vec x)
1153 {
1154   MPI_Comm       comm = ((PetscObject)A)->comm;
1155   Mat_Plapack    *lu = (Mat_Plapack*)A->spptr;
1156   PetscErrorCode ierr;
1157   PetscInt       M=A->rmap->N,m=A->rmap->n,rstart,i,j,*idx_pla,*idx_petsc,loc_m,loc_stride;
1158   PetscScalar    *array;
1159   PetscReal      one = 1.0;
1160   PetscMPIInt    size,rank,r_rank,r_nproc,c_rank,c_nproc;;
1161   PLA_Obj        v_pla = NULL;
1162   PetscScalar    *loc_buf;
1163   Vec            loc_x;
1164 
1165   PetscFunctionBegin;
1166   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1167   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1168 
1169   /* Create PLAPACK vector objects, then copy b into PLAPACK b */
1170   PLA_Mvector_create(lu->datatype,M,1,lu->templ,PLA_ALIGN_FIRST,&v_pla);
1171   PLA_Obj_set_to_zero(v_pla);
1172 
1173   /* Copy b into rhs_pla */
1174   PLA_API_begin();
1175   PLA_Obj_API_open(v_pla);
1176   ierr = VecGetArray(b,&array);CHKERRQ(ierr);
1177   PLA_API_axpy_vector_to_global(m,&one,(void *)array,1,v_pla,lu->rstart);
1178   ierr = VecRestoreArray(b,&array);CHKERRQ(ierr);
1179   PLA_Obj_API_close(v_pla);
1180   PLA_API_end();
1181 
1182   if (A->factortype == MAT_FACTOR_LU){
1183     /* Apply the permutations to the right hand sides */
1184     PLA_Apply_pivots_to_rows (v_pla,lu->pivots);
1185 
1186     /* Solve L y = b, overwriting b with y */
1187     PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_UNIT_DIAG,lu->A,v_pla );
1188 
1189     /* Solve U x = y (=b), overwriting b with x */
1190     PLA_Trsv( PLA_UPPER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla );
1191   } else { /* MAT_FACTOR_CHOLESKY */
1192     PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla);
1193     PLA_Trsv( PLA_LOWER_TRIANGULAR,(lu->datatype == MPI_DOUBLE ? PLA_TRANSPOSE : PLA_CONJUGATE_TRANSPOSE),
1194                                     PLA_NONUNIT_DIAG,lu->A,v_pla);
1195   }
1196 
1197   /* Copy PLAPACK x into Petsc vector x  */
1198   PLA_Obj_local_length(v_pla, &loc_m);
1199   PLA_Obj_local_buffer(v_pla, (void**)&loc_buf);
1200   PLA_Obj_local_stride(v_pla, &loc_stride);
1201   /*
1202     PetscPrintf(PETSC_COMM_SELF," [%d] b - local_m %d local_stride %d, loc_buf: %g %g, nb: %d\n",rank,loc_m,loc_stride,loc_buf[0],loc_buf[(loc_m-1)*loc_stride],lu->nb);
1203   */
1204   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,loc_m*loc_stride,loc_buf,&loc_x);CHKERRQ(ierr);
1205   if (!lu->pla_solved){
1206 
1207     PLA_Temp_comm_row_info(lu->templ,&Plapack_comm_2d,&r_rank,&r_nproc);
1208     PLA_Temp_comm_col_info(lu->templ,&Plapack_comm_2d,&c_rank,&c_nproc);
1209 
1210     /* Create IS and cts for VecScatterring */
1211     PLA_Obj_local_length(v_pla, &loc_m);
1212     PLA_Obj_local_stride(v_pla, &loc_stride);
1213     ierr = PetscMalloc2(loc_m,PetscInt,&idx_pla,loc_m,PetscInt,&idx_petsc);CHKERRQ(ierr);
1214 
1215     rstart = (r_rank*c_nproc+c_rank)*lu->nb;
1216     for (i=0; i<loc_m; i+=lu->nb){
1217       j = 0;
1218       while (j < lu->nb && i+j < loc_m){
1219         idx_petsc[i+j] = rstart + j; j++;
1220       }
1221       rstart += size*lu->nb;
1222     }
1223 
1224     for (i=0; i<loc_m; i++) idx_pla[i] = i*loc_stride;
1225 
1226     ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_pla,PETSC_COPY_VALUES,&lu->is_pla);CHKERRQ(ierr);
1227     ierr = ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_petsc,PETSC_COPY_VALUES,&lu->is_petsc);CHKERRQ(ierr);
1228     ierr = PetscFree2(idx_pla,idx_petsc);CHKERRQ(ierr);
1229     ierr = VecScatterCreate(loc_x,lu->is_pla,x,lu->is_petsc,&lu->ctx);CHKERRQ(ierr);
1230   }
1231   ierr = VecScatterBegin(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1232   ierr = VecScatterEnd(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1233 
1234   /* Free data */
1235   ierr = VecDestroy(loc_x);CHKERRQ(ierr);
1236   PLA_Obj_free(&v_pla);
1237 
1238   lu->pla_solved = PETSC_TRUE;
1239   PetscFunctionReturn(0);
1240 }
1241 
1242 #undef __FUNCT__
1243 #define __FUNCT__ "MatLUFactorNumeric_MPIDense"
1244 PetscErrorCode MatLUFactorNumeric_MPIDense(Mat F,Mat A,const MatFactorInfo *info)
1245 {
1246   Mat_Plapack    *lu = (Mat_Plapack*)(F)->spptr;
1247   PetscErrorCode ierr;
1248   PetscInt       M=A->rmap->N,m=A->rmap->n,rstart,rend;
1249   PetscInt       info_pla=0;
1250   PetscScalar    *array,one = 1.0;
1251 
1252   PetscFunctionBegin;
1253   if (lu->mstruct == SAME_NONZERO_PATTERN){
1254     PLA_Obj_free(&lu->A);
1255     PLA_Obj_free (&lu->pivots);
1256   }
1257   /* Create PLAPACK matrix object */
1258   lu->A = NULL; lu->pivots = NULL;
1259   PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);
1260   PLA_Obj_set_to_zero(lu->A);
1261   PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots);
1262 
1263   /* Copy A into lu->A */
1264   PLA_API_begin();
1265   PLA_Obj_API_open(lu->A);
1266   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
1267   ierr = MatGetArray(A,&array);CHKERRQ(ierr);
1268   PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);
1269   ierr = MatRestoreArray(A,&array);CHKERRQ(ierr);
1270   PLA_Obj_API_close(lu->A);
1271   PLA_API_end();
1272 
1273   /* Factor P A -> L U overwriting lower triangular portion of A with L, upper, U */
1274   info_pla = PLA_LU(lu->A,lu->pivots);
1275   if (info_pla != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot encountered at row %d from PLA_LU()",info_pla);
1276 
1277   lu->rstart         = rstart;
1278   lu->mstruct        = SAME_NONZERO_PATTERN;
1279   F->ops->solve      = MatSolve_MPIDense;
1280   F->assembled       = PETSC_TRUE;  /* required by -ksp_view */
1281   PetscFunctionReturn(0);
1282 }
1283 
1284 #undef __FUNCT__
1285 #define __FUNCT__ "MatCholeskyFactorNumeric_MPIDense"
1286 PetscErrorCode MatCholeskyFactorNumeric_MPIDense(Mat F,Mat A,const MatFactorInfo *info)
1287 {
1288   Mat_Plapack    *lu = (Mat_Plapack*)F->spptr;
1289   PetscErrorCode ierr;
1290   PetscInt       M=A->rmap->N,m=A->rmap->n,rstart,rend;
1291   PetscInt       info_pla=0;
1292   PetscScalar    *array,one = 1.0;
1293 
1294   PetscFunctionBegin;
1295   if (lu->mstruct == SAME_NONZERO_PATTERN){
1296     PLA_Obj_free(&lu->A);
1297   }
1298   /* Create PLAPACK matrix object */
1299   lu->A      = NULL;
1300   lu->pivots = NULL;
1301   PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);
1302 
1303   /* Copy A into lu->A */
1304   PLA_API_begin();
1305   PLA_Obj_API_open(lu->A);
1306   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
1307   ierr = MatGetArray(A,&array);CHKERRQ(ierr);
1308   PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);
1309   ierr = MatRestoreArray(A,&array);CHKERRQ(ierr);
1310   PLA_Obj_API_close(lu->A);
1311   PLA_API_end();
1312 
1313   /* Factor P A -> Chol */
1314   info_pla = PLA_Chol(PLA_LOWER_TRIANGULAR,lu->A);
1315   if (info_pla != 0) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT,"Nonpositive definite matrix detected at row %d from PLA_Chol()",info_pla);
1316 
1317   lu->rstart         = rstart;
1318   lu->mstruct        = SAME_NONZERO_PATTERN;
1319   F->ops->solve      = MatSolve_MPIDense;
1320   F->assembled       = PETSC_TRUE;  /* required by -ksp_view */
1321   PetscFunctionReturn(0);
1322 }
1323 
1324 /* Note the Petsc perm permutation is ignored */
1325 #undef __FUNCT__
1326 #define __FUNCT__ "MatCholeskyFactorSymbolic_MPIDense"
1327 PetscErrorCode MatCholeskyFactorSymbolic_MPIDense(Mat F,Mat A,IS perm,const MatFactorInfo *info)
1328 {
1329   PetscErrorCode ierr;
1330   PetscBool      issymmetric,set;
1331 
1332   PetscFunctionBegin;
1333   ierr = MatIsSymmetricKnown(A,&set,&issymmetric);CHKERRQ(ierr);
1334   if (!set || !issymmetric) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_USER,"Matrix must be set as MAT_SYMMETRIC for CholeskyFactor()");
1335   F->ops->choleskyfactornumeric  = MatCholeskyFactorNumeric_MPIDense;
1336   PetscFunctionReturn(0);
1337 }
1338 
1339 /* Note the Petsc r and c permutations are ignored */
1340 #undef __FUNCT__
1341 #define __FUNCT__ "MatLUFactorSymbolic_MPIDense"
1342 PetscErrorCode MatLUFactorSymbolic_MPIDense(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
1343 {
1344   PetscErrorCode ierr;
1345   PetscInt       M = A->rmap->N;
1346   Mat_Plapack    *lu;
1347 
1348   PetscFunctionBegin;
1349   lu = (Mat_Plapack*)F->spptr;
1350   ierr = PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots);CHKERRQ(ierr);
1351   F->ops->lufactornumeric  = MatLUFactorNumeric_MPIDense;
1352   PetscFunctionReturn(0);
1353 }
1354 
1355 EXTERN_C_BEGIN
1356 #undef __FUNCT__
1357 #define __FUNCT__ "MatFactorGetSolverPackage_mpidense_plapack"
1358 PetscErrorCode MatFactorGetSolverPackage_mpidense_plapack(Mat A,const MatSolverPackage *type)
1359 {
1360   PetscFunctionBegin;
1361   *type = MATSOLVERPLAPACK;
1362   PetscFunctionReturn(0);
1363 }
1364 EXTERN_C_END
1365 
1366 EXTERN_C_BEGIN
1367 #undef __FUNCT__
1368 #define __FUNCT__ "MatGetFactor_mpidense_plapack"
1369 PetscErrorCode MatGetFactor_mpidense_plapack(Mat A,MatFactorType ftype,Mat *F)
1370 {
1371   PetscErrorCode ierr;
1372   Mat_Plapack    *lu;
1373   PetscMPIInt    size;
1374   PetscInt       M=A->rmap->N;
1375 
1376   PetscFunctionBegin;
1377   /* Create the factorization matrix */
1378   ierr = MatCreate(((PetscObject)A)->comm,F);CHKERRQ(ierr);
1379   ierr = MatSetSizes(*F,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1380   ierr = MatSetType(*F,((PetscObject)A)->type_name);CHKERRQ(ierr);
1381   ierr = PetscNewLog(*F,Mat_Plapack,&lu);CHKERRQ(ierr);
1382   (*F)->spptr = (void*)lu;
1383 
1384   /* Set default Plapack parameters */
1385   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
1386   lu->nb = M/size;
1387   if (M - lu->nb*size) lu->nb++; /* without cyclic distribution */
1388 
1389   /* Set runtime options */
1390   ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"PLAPACK Options","Mat");CHKERRQ(ierr);
1391     ierr = PetscOptionsInt("-mat_plapack_nb","block size of template vector","None",lu->nb,&lu->nb,PETSC_NULL);CHKERRQ(ierr);
1392   PetscOptionsEnd();
1393 
1394   /* Create object distribution template */
1395   lu->templ = NULL;
1396   ierr = PLA_Temp_create(lu->nb, 0, &lu->templ);CHKERRQ(ierr);
1397 
1398   /* Set the datatype */
1399 #if defined(PETSC_USE_COMPLEX)
1400   lu->datatype = MPI_DOUBLE_COMPLEX;
1401 #else
1402   lu->datatype = MPI_DOUBLE;
1403 #endif
1404 
1405   ierr = PLA_Matrix_create(lu->datatype,M,A->cmap->N,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);CHKERRQ(ierr);
1406 
1407 
1408   lu->pla_solved     = PETSC_FALSE; /* MatSolve_Plapack() is called yet */
1409   lu->mstruct        = DIFFERENT_NONZERO_PATTERN;
1410 
1411   if (ftype == MAT_FACTOR_LU) {
1412     (*F)->ops->lufactorsymbolic = MatLUFactorSymbolic_MPIDense;
1413   } else if (ftype == MAT_FACTOR_CHOLESKY) {
1414     (*F)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MPIDense;
1415   } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"No incomplete factorizations for dense matrices");
1416   (*F)->factortype = ftype;
1417   ierr = PetscObjectComposeFunctionDynamic((PetscObject)(*F),"MatFactorGetSolverPackage_C","MatFactorGetSolverPackage_mpidense_plapack",MatFactorGetSolverPackage_mpidense_plapack);CHKERRQ(ierr);
1418   PetscFunctionReturn(0);
1419 }
1420 EXTERN_C_END
1421 #endif
1422 
1423 #undef __FUNCT__
1424 #define __FUNCT__ "MatGetFactor_mpidense_petsc"
1425 PetscErrorCode MatGetFactor_mpidense_petsc(Mat A,MatFactorType ftype,Mat *F)
1426 {
1427 #if defined(PETSC_HAVE_PLAPACK)
1428   PetscErrorCode ierr;
1429 #endif
1430 
1431   PetscFunctionBegin;
1432 #if defined(PETSC_HAVE_PLAPACK)
1433   ierr = MatGetFactor_mpidense_plapack(A,ftype,F);CHKERRQ(ierr);
1434 #else
1435   SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix format %s uses PLAPACK direct solver. Install PLAPACK",((PetscObject)A)->type_name);
1436 #endif
1437   PetscFunctionReturn(0);
1438 }
1439 
1440 #undef __FUNCT__
1441 #define __FUNCT__ "MatAXPY_MPIDense"
1442 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
1443 {
1444   PetscErrorCode ierr;
1445   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
1446 
1447   PetscFunctionBegin;
1448   ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr);
1449   PetscFunctionReturn(0);
1450 }
1451 
1452 #undef __FUNCT__
1453 #define __FUNCT__ "MatConjugate_MPIDense"
1454 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIDense(Mat mat)
1455 {
1456   Mat_MPIDense   *a = (Mat_MPIDense *)mat->data;
1457   PetscErrorCode ierr;
1458 
1459   PetscFunctionBegin;
1460   ierr = MatConjugate(a->A);CHKERRQ(ierr);
1461   PetscFunctionReturn(0);
1462 }
1463 
1464 #undef __FUNCT__
1465 #define __FUNCT__ "MatRealPart_MPIDense"
1466 PetscErrorCode MatRealPart_MPIDense(Mat A)
1467 {
1468   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1469   PetscErrorCode ierr;
1470 
1471   PetscFunctionBegin;
1472   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1473   PetscFunctionReturn(0);
1474 }
1475 
1476 #undef __FUNCT__
1477 #define __FUNCT__ "MatImaginaryPart_MPIDense"
1478 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1479 {
1480   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1481   PetscErrorCode ierr;
1482 
1483   PetscFunctionBegin;
1484   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1485   PetscFunctionReturn(0);
1486 }
1487 
1488 /* -------------------------------------------------------------------*/
1489 static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
1490        MatGetRow_MPIDense,
1491        MatRestoreRow_MPIDense,
1492        MatMult_MPIDense,
1493 /* 4*/ MatMultAdd_MPIDense,
1494        MatMultTranspose_MPIDense,
1495        MatMultTransposeAdd_MPIDense,
1496        0,
1497        0,
1498        0,
1499 /*10*/ 0,
1500        0,
1501        0,
1502        0,
1503        MatTranspose_MPIDense,
1504 /*15*/ MatGetInfo_MPIDense,
1505        MatEqual_MPIDense,
1506        MatGetDiagonal_MPIDense,
1507        MatDiagonalScale_MPIDense,
1508        MatNorm_MPIDense,
1509 /*20*/ MatAssemblyBegin_MPIDense,
1510        MatAssemblyEnd_MPIDense,
1511        MatSetOption_MPIDense,
1512        MatZeroEntries_MPIDense,
1513 /*24*/ MatZeroRows_MPIDense,
1514        0,
1515        0,
1516        0,
1517        0,
1518 /*29*/ MatSetUpPreallocation_MPIDense,
1519        0,
1520        0,
1521        MatGetArray_MPIDense,
1522        MatRestoreArray_MPIDense,
1523 /*34*/ MatDuplicate_MPIDense,
1524        0,
1525        0,
1526        0,
1527        0,
1528 /*39*/ MatAXPY_MPIDense,
1529        MatGetSubMatrices_MPIDense,
1530        0,
1531        MatGetValues_MPIDense,
1532        0,
1533 /*44*/ 0,
1534        MatScale_MPIDense,
1535        0,
1536        0,
1537        0,
1538 /*49*/ 0,
1539        0,
1540        0,
1541        0,
1542        0,
1543 /*54*/ 0,
1544        0,
1545        0,
1546        0,
1547        0,
1548 /*59*/ MatGetSubMatrix_MPIDense,
1549        MatDestroy_MPIDense,
1550        MatView_MPIDense,
1551        0,
1552        0,
1553 /*64*/ 0,
1554        0,
1555        0,
1556        0,
1557        0,
1558 /*69*/ 0,
1559        0,
1560        0,
1561        0,
1562        0,
1563 /*74*/ 0,
1564        0,
1565        0,
1566        0,
1567        0,
1568 /*79*/ 0,
1569        0,
1570        0,
1571        0,
1572 /*83*/ MatLoad_MPIDense,
1573        0,
1574        0,
1575        0,
1576        0,
1577        0,
1578 /*89*/
1579 #if defined(PETSC_HAVE_PLAPACK)
1580        MatMatMult_MPIDense_MPIDense,
1581        MatMatMultSymbolic_MPIDense_MPIDense,
1582        MatMatMultNumeric_MPIDense_MPIDense,
1583 #else
1584        0,
1585        0,
1586        0,
1587 #endif
1588        0,
1589        0,
1590 /*94*/ 0,
1591        0,
1592        0,
1593        0,
1594        0,
1595 /*99*/ 0,
1596        0,
1597        0,
1598        MatConjugate_MPIDense,
1599        0,
1600 /*104*/0,
1601        MatRealPart_MPIDense,
1602        MatImaginaryPart_MPIDense,
1603        0,
1604        0,
1605 /*109*/0,
1606        0,
1607        0,
1608        0,
1609        0,
1610 /*114*/0,
1611        0,
1612        0,
1613        0,
1614        0,
1615 /*119*/0,
1616        0,
1617        0,
1618        0
1619 };
1620 
1621 EXTERN_C_BEGIN
1622 #undef __FUNCT__
1623 #define __FUNCT__ "MatMPIDenseSetPreallocation_MPIDense"
1624 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1625 {
1626   Mat_MPIDense   *a;
1627   PetscErrorCode ierr;
1628 
1629   PetscFunctionBegin;
1630   mat->preallocated = PETSC_TRUE;
1631   /* Note:  For now, when data is specified above, this assumes the user correctly
1632    allocates the local dense storage space.  We should add error checking. */
1633 
1634   a    = (Mat_MPIDense*)mat->data;
1635   ierr = PetscLayoutSetBlockSize(mat->rmap,1);CHKERRQ(ierr);
1636   ierr = PetscLayoutSetBlockSize(mat->cmap,1);CHKERRQ(ierr);
1637   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
1638   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
1639   a->nvec = mat->cmap->n;
1640 
1641   ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1642   ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr);
1643   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
1644   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1645   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1646   PetscFunctionReturn(0);
1647 }
1648 EXTERN_C_END
1649 
1650 /*MC
1651    MATSOLVERPLAPACK = "mpidense" - Parallel LU and Cholesky factorization for MATMPIDENSE matrices
1652 
1653   run ./configure with the option --download-plapack
1654 
1655 
1656   Options Database Keys:
1657 . -mat_plapack_nprows <n> - number of rows in processor partition
1658 . -mat_plapack_npcols <n> - number of columns in processor partition
1659 . -mat_plapack_nb <n> - block size of template vector
1660 . -mat_plapack_nb_alg <n> - algorithmic block size
1661 - -mat_plapack_ckerror <n> - error checking flag
1662 
1663    Level: intermediate
1664 
1665 .seealso: MatCreateMPIDense(), MATDENSE, MATSEQDENSE, PCFactorSetSolverPackage(), MatSolverPackage
1666 
1667 M*/
1668 
1669 EXTERN_C_BEGIN
1670 #undef __FUNCT__
1671 #define __FUNCT__ "MatCreate_MPIDense"
1672 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIDense(Mat mat)
1673 {
1674   Mat_MPIDense   *a;
1675   PetscErrorCode ierr;
1676 
1677   PetscFunctionBegin;
1678   ierr              = PetscNewLog(mat,Mat_MPIDense,&a);CHKERRQ(ierr);
1679   mat->data         = (void*)a;
1680   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1681   mat->mapping      = 0;
1682 
1683   mat->insertmode = NOT_SET_VALUES;
1684   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&a->rank);CHKERRQ(ierr);
1685   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&a->size);CHKERRQ(ierr);
1686 
1687   /* build cache for off array entries formed */
1688   a->donotstash = PETSC_FALSE;
1689   ierr = MatStashCreate_Private(((PetscObject)mat)->comm,1,&mat->stash);CHKERRQ(ierr);
1690 
1691   /* stuff used for matrix vector multiply */
1692   a->lvec        = 0;
1693   a->Mvctx       = 0;
1694   a->roworiented = PETSC_TRUE;
1695 
1696   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
1697                                      "MatGetDiagonalBlock_MPIDense",
1698                                      MatGetDiagonalBlock_MPIDense);CHKERRQ(ierr);
1699   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C",
1700                                      "MatMPIDenseSetPreallocation_MPIDense",
1701                                      MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1702   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",
1703                                      "MatMatMult_MPIAIJ_MPIDense",
1704                                       MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1705   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",
1706                                      "MatMatMultSymbolic_MPIAIJ_MPIDense",
1707                                       MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1708   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",
1709                                      "MatMatMultNumeric_MPIAIJ_MPIDense",
1710                                       MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1711   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetFactor_petsc_C",
1712                                      "MatGetFactor_mpidense_petsc",
1713                                       MatGetFactor_mpidense_petsc);CHKERRQ(ierr);
1714 #if defined(PETSC_HAVE_PLAPACK)
1715   ierr = PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetFactor_plapack_C",
1716                                      "MatGetFactor_mpidense_plapack",
1717                                       MatGetFactor_mpidense_plapack);CHKERRQ(ierr);
1718   ierr = PetscPLAPACKInitializePackage(((PetscObject)mat)->comm);CHKERRQ(ierr);
1719 #endif
1720   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1721 
1722   PetscFunctionReturn(0);
1723 }
1724 EXTERN_C_END
1725 
1726 /*MC
1727    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1728 
1729    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1730    and MATMPIDENSE otherwise.
1731 
1732    Options Database Keys:
1733 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1734 
1735   Level: beginner
1736 
1737 
1738 .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1739 M*/
1740 
1741 EXTERN_C_BEGIN
1742 #undef __FUNCT__
1743 #define __FUNCT__ "MatCreate_Dense"
1744 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_Dense(Mat A)
1745 {
1746   PetscErrorCode ierr;
1747   PetscMPIInt    size;
1748 
1749   PetscFunctionBegin;
1750   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
1751   if (size == 1) {
1752     ierr = MatSetType(A,MATSEQDENSE);CHKERRQ(ierr);
1753   } else {
1754     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
1755   }
1756   PetscFunctionReturn(0);
1757 }
1758 EXTERN_C_END
1759 
1760 #undef __FUNCT__
1761 #define __FUNCT__ "MatMPIDenseSetPreallocation"
1762 /*@C
1763    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1764 
1765    Not collective
1766 
1767    Input Parameters:
1768 .  A - the matrix
1769 -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
1770    to control all matrix memory allocation.
1771 
1772    Notes:
1773    The dense format is fully compatible with standard Fortran 77
1774    storage by columns.
1775 
1776    The data input variable is intended primarily for Fortran programmers
1777    who wish to allocate their own matrix memory space.  Most users should
1778    set data=PETSC_NULL.
1779 
1780    Level: intermediate
1781 
1782 .keywords: matrix,dense, parallel
1783 
1784 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1785 @*/
1786 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data)
1787 {
1788   PetscErrorCode ierr;
1789 
1790   PetscFunctionBegin;
1791   ierr = PetscTryMethod(mat,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar *),(mat,data));CHKERRQ(ierr);
1792   PetscFunctionReturn(0);
1793 }
1794 
1795 #undef __FUNCT__
1796 #define __FUNCT__ "MatCreateMPIDense"
1797 /*@C
1798    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
1799 
1800    Collective on MPI_Comm
1801 
1802    Input Parameters:
1803 +  comm - MPI communicator
1804 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1805 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1806 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1807 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1808 -  data - optional location of matrix data.  Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1809    to control all matrix memory allocation.
1810 
1811    Output Parameter:
1812 .  A - the matrix
1813 
1814    Notes:
1815    The dense format is fully compatible with standard Fortran 77
1816    storage by columns.
1817 
1818    The data input variable is intended primarily for Fortran programmers
1819    who wish to allocate their own matrix memory space.  Most users should
1820    set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users).
1821 
1822    The user MUST specify either the local or global matrix dimensions
1823    (possibly both).
1824 
1825    Level: intermediate
1826 
1827 .keywords: matrix,dense, parallel
1828 
1829 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1830 @*/
1831 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1832 {
1833   PetscErrorCode ierr;
1834   PetscMPIInt    size;
1835 
1836   PetscFunctionBegin;
1837   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1838   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1839   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1840   if (size > 1) {
1841     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1842     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1843   } else {
1844     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1845     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1846   }
1847   PetscFunctionReturn(0);
1848 }
1849 
1850 #undef __FUNCT__
1851 #define __FUNCT__ "MatDuplicate_MPIDense"
1852 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1853 {
1854   Mat            mat;
1855   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
1856   PetscErrorCode ierr;
1857 
1858   PetscFunctionBegin;
1859   *newmat       = 0;
1860   ierr = MatCreate(((PetscObject)A)->comm,&mat);CHKERRQ(ierr);
1861   ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1862   ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1863   a                 = (Mat_MPIDense*)mat->data;
1864   ierr              = PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1865 
1866   mat->factortype   = A->factortype;
1867   mat->assembled    = PETSC_TRUE;
1868   mat->preallocated = PETSC_TRUE;
1869 
1870   a->size           = oldmat->size;
1871   a->rank           = oldmat->rank;
1872   mat->insertmode   = NOT_SET_VALUES;
1873   a->nvec           = oldmat->nvec;
1874   a->donotstash     = oldmat->donotstash;
1875 
1876   ierr = PetscLayoutCopy(A->rmap,&mat->rmap);CHKERRQ(ierr);
1877   ierr = PetscLayoutCopy(A->cmap,&mat->cmap);CHKERRQ(ierr);
1878 
1879   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1880   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1881   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1882 
1883   *newmat = mat;
1884   PetscFunctionReturn(0);
1885 }
1886 
1887 #undef __FUNCT__
1888 #define __FUNCT__ "MatLoad_MPIDense_DenseInFile"
1889 PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N,Mat newmat,PetscInt sizesset)
1890 {
1891   PetscErrorCode ierr;
1892   PetscMPIInt    rank,size;
1893   PetscInt       *rowners,i,m,nz,j;
1894   PetscScalar    *array,*vals,*vals_ptr;
1895   MPI_Status     status;
1896 
1897   PetscFunctionBegin;
1898   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1899   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1900 
1901   /* determine ownership of all rows */
1902   if (newmat->rmap->n < 0) m          = M/size + ((M % size) > rank);
1903   else m = newmat->rmap->n;
1904   ierr       = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1905   ierr       = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
1906   rowners[0] = 0;
1907   for (i=2; i<=size; i++) {
1908     rowners[i] += rowners[i-1];
1909   }
1910 
1911   if (!sizesset) {
1912     ierr = MatSetSizes(newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
1913   }
1914   ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr);
1915   ierr = MatGetArray(newmat,&array);CHKERRQ(ierr);
1916 
1917   if (!rank) {
1918     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1919 
1920     /* read in my part of the matrix numerical values  */
1921     ierr = PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);CHKERRQ(ierr);
1922 
1923     /* insert into matrix-by row (this is why cannot directly read into array */
1924     vals_ptr = vals;
1925     for (i=0; i<m; i++) {
1926       for (j=0; j<N; j++) {
1927         array[i + j*m] = *vals_ptr++;
1928       }
1929     }
1930 
1931     /* read in other processors and ship out */
1932     for (i=1; i<size; i++) {
1933       nz   = (rowners[i+1] - rowners[i])*N;
1934       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1935       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(newmat))->tag,comm);CHKERRQ(ierr);
1936     }
1937   } else {
1938     /* receive numeric values */
1939     ierr = PetscMalloc(m*N*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1940 
1941     /* receive message of values*/
1942     ierr = MPI_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(newmat))->tag,comm,&status);CHKERRQ(ierr);
1943 
1944     /* insert into matrix-by row (this is why cannot directly read into array */
1945     vals_ptr = vals;
1946     for (i=0; i<m; i++) {
1947       for (j=0; j<N; j++) {
1948         array[i + j*m] = *vals_ptr++;
1949       }
1950     }
1951   }
1952   ierr = PetscFree(rowners);CHKERRQ(ierr);
1953   ierr = PetscFree(vals);CHKERRQ(ierr);
1954   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1955   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1956   PetscFunctionReturn(0);
1957 }
1958 
1959 #undef __FUNCT__
1960 #define __FUNCT__ "MatLoad_MPIDense"
1961 PetscErrorCode MatLoad_MPIDense(Mat newmat,PetscViewer viewer)
1962 {
1963   PetscScalar    *vals,*svals;
1964   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1965   MPI_Status     status;
1966   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz;
1967   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1968   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1969   PetscInt       i,nz,j,rstart,rend,sizesset=1,grows,gcols;
1970   int            fd;
1971   PetscErrorCode ierr;
1972 
1973   PetscFunctionBegin;
1974   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1975   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1976   if (!rank) {
1977     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1978     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1979     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1980   }
1981   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
1982 
1983   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1984   M = header[1]; N = header[2]; nz = header[3];
1985 
1986   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
1987   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
1988   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
1989 
1990   /* If global sizes are set, check if they are consistent with that given in the file */
1991   if (sizesset) {
1992     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
1993   }
1994   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
1995   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
1996 
1997   /*
1998        Handle case where matrix is stored on disk as a dense matrix
1999   */
2000   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
2001     ierr = MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat,sizesset);CHKERRQ(ierr);
2002     PetscFunctionReturn(0);
2003   }
2004 
2005   /* determine ownership of all rows */
2006   if (newmat->rmap->n < 0) m          = PetscMPIIntCast(M/size + ((M % size) > rank));
2007   else m = PetscMPIIntCast(newmat->rmap->n);
2008   ierr       = PetscMalloc((size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr);
2009   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2010   rowners[0] = 0;
2011   for (i=2; i<=size; i++) {
2012     rowners[i] += rowners[i-1];
2013   }
2014   rstart = rowners[rank];
2015   rend   = rowners[rank+1];
2016 
2017   /* distribute row lengths to all processors */
2018   ierr    = PetscMalloc2(rend-rstart,PetscInt,&ourlens,rend-rstart,PetscInt,&offlens);CHKERRQ(ierr);
2019   if (!rank) {
2020     ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2021     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2022     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
2023     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
2024     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
2025     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2026   } else {
2027     ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
2028   }
2029 
2030   if (!rank) {
2031     /* calculate the number of nonzeros on each processor */
2032     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2033     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2034     for (i=0; i<size; i++) {
2035       for (j=rowners[i]; j< rowners[i+1]; j++) {
2036         procsnz[i] += rowlengths[j];
2037       }
2038     }
2039     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2040 
2041     /* determine max buffer needed and allocate it */
2042     maxnz = 0;
2043     for (i=0; i<size; i++) {
2044       maxnz = PetscMax(maxnz,procsnz[i]);
2045     }
2046     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2047 
2048     /* read in my part of the matrix column indices  */
2049     nz = procsnz[0];
2050     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2051     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2052 
2053     /* read in every one elses and ship off */
2054     for (i=1; i<size; i++) {
2055       nz   = procsnz[i];
2056       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2057       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2058     }
2059     ierr = PetscFree(cols);CHKERRQ(ierr);
2060   } else {
2061     /* determine buffer space needed for message */
2062     nz = 0;
2063     for (i=0; i<m; i++) {
2064       nz += ourlens[i];
2065     }
2066     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2067 
2068     /* receive message of column indices*/
2069     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2070     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2071     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2072   }
2073 
2074   /* loop over local rows, determining number of off diagonal entries */
2075   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2076   jj = 0;
2077   for (i=0; i<m; i++) {
2078     for (j=0; j<ourlens[i]; j++) {
2079       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
2080       jj++;
2081     }
2082   }
2083 
2084   /* create our matrix */
2085   for (i=0; i<m; i++) {
2086     ourlens[i] -= offlens[i];
2087   }
2088 
2089   if (!sizesset) {
2090     ierr = MatSetSizes(newmat,m,PETSC_DECIDE,M,N);CHKERRQ(ierr);
2091   }
2092   ierr = MatMPIDenseSetPreallocation(newmat,PETSC_NULL);CHKERRQ(ierr);
2093   for (i=0; i<m; i++) {
2094     ourlens[i] += offlens[i];
2095   }
2096 
2097   if (!rank) {
2098     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2099 
2100     /* read in my part of the matrix numerical values  */
2101     nz = procsnz[0];
2102     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2103 
2104     /* insert into matrix */
2105     jj      = rstart;
2106     smycols = mycols;
2107     svals   = vals;
2108     for (i=0; i<m; i++) {
2109       ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2110       smycols += ourlens[i];
2111       svals   += ourlens[i];
2112       jj++;
2113     }
2114 
2115     /* read in other processors and ship out */
2116     for (i=1; i<size; i++) {
2117       nz   = procsnz[i];
2118       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2119       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2120     }
2121     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2122   } else {
2123     /* receive numeric values */
2124     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2125 
2126     /* receive message of values*/
2127     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
2128     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2129     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2130 
2131     /* insert into matrix */
2132     jj      = rstart;
2133     smycols = mycols;
2134     svals   = vals;
2135     for (i=0; i<m; i++) {
2136       ierr = MatSetValues(newmat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2137       smycols += ourlens[i];
2138       svals   += ourlens[i];
2139       jj++;
2140     }
2141   }
2142   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2143   ierr = PetscFree(vals);CHKERRQ(ierr);
2144   ierr = PetscFree(mycols);CHKERRQ(ierr);
2145   ierr = PetscFree(rowners);CHKERRQ(ierr);
2146 
2147   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2148   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2149   PetscFunctionReturn(0);
2150 }
2151 
2152 #undef __FUNCT__
2153 #define __FUNCT__ "MatEqual_MPIDense"
2154 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool  *flag)
2155 {
2156   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
2157   Mat            a,b;
2158   PetscBool      flg;
2159   PetscErrorCode ierr;
2160 
2161   PetscFunctionBegin;
2162   a = matA->A;
2163   b = matB->A;
2164   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
2165   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
2166   PetscFunctionReturn(0);
2167 }
2168 
2169 #if defined(PETSC_HAVE_PLAPACK)
2170 
2171 #undef __FUNCT__
2172 #define __FUNCT__ "PetscPLAPACKFinalizePackage"
2173 /*@C
2174   PetscPLAPACKFinalizePackage - This function destroys everything in the Petsc interface to PLAPACK.
2175   Level: developer
2176 
2177 .keywords: Petsc, destroy, package, PLAPACK
2178 .seealso: PetscFinalize()
2179 @*/
2180 PetscErrorCode PETSCMAT_DLLEXPORT PetscPLAPACKFinalizePackage(void)
2181 {
2182   PetscErrorCode ierr;
2183 
2184   PetscFunctionBegin;
2185   ierr = PLA_Finalize();CHKERRQ(ierr);
2186   PetscFunctionReturn(0);
2187 }
2188 
2189 #undef __FUNCT__
2190 #define __FUNCT__ "PetscPLAPACKInitializePackage"
2191 /*@C
2192   PetscPLAPACKInitializePackage - This function initializes everything in the Petsc interface to PLAPACK. It is
2193   called from MatCreate_MPIDense() the first time an MPI dense matrix is called.
2194 
2195   Input Parameter:
2196 .   comm - the communicator the matrix lives on
2197 
2198   Level: developer
2199 
2200    Notes: PLAPACK does not have a good fit with MPI communicators; all (parallel) PLAPACK objects have to live in the
2201   same communicator (because there is some global state that is initialized and used for all matrices). In addition if
2202   PLAPACK is initialized (that is the initial matrices created) are on subcommunicators of MPI_COMM_WORLD, these subcommunicators
2203   cannot overlap.
2204 
2205 .keywords: Petsc, initialize, package, PLAPACK
2206 .seealso: PetscSysInitializePackage(), PetscInitialize()
2207 @*/
2208 PetscErrorCode PETSCMAT_DLLEXPORT PetscPLAPACKInitializePackage(MPI_Comm comm)
2209 {
2210   PetscMPIInt    size;
2211   PetscErrorCode ierr;
2212 
2213   PetscFunctionBegin;
2214   if (!PLA_Initialized(PETSC_NULL)) {
2215 
2216     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2217     Plapack_nprows = 1;
2218     Plapack_npcols = size;
2219 
2220     ierr = PetscOptionsBegin(comm,PETSC_NULL,"PLAPACK Options","Mat");CHKERRQ(ierr);
2221       ierr = PetscOptionsInt("-mat_plapack_nprows","row dimension of 2D processor mesh","None",Plapack_nprows,&Plapack_nprows,PETSC_NULL);CHKERRQ(ierr);
2222       ierr = PetscOptionsInt("-mat_plapack_npcols","column dimension of 2D processor mesh","None",Plapack_npcols,&Plapack_npcols,PETSC_NULL);CHKERRQ(ierr);
2223 #if defined(PETSC_USE_DEBUG)
2224       Plapack_ierror = 3;
2225 #else
2226       Plapack_ierror = 0;
2227 #endif
2228       ierr = PetscOptionsInt("-mat_plapack_ckerror","error checking flag","None",Plapack_ierror,&Plapack_ierror,PETSC_NULL);CHKERRQ(ierr);
2229       if (Plapack_ierror){
2230 	ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_TRUE,PETSC_TRUE,PETSC_FALSE );CHKERRQ(ierr);
2231       } else {
2232 	ierr = PLA_Set_error_checking(Plapack_ierror,PETSC_FALSE,PETSC_FALSE,PETSC_FALSE );CHKERRQ(ierr);
2233       }
2234 
2235       Plapack_nb_alg = 0;
2236       ierr = PetscOptionsInt("-mat_plapack_nb_alg","algorithmic block size","None",Plapack_nb_alg,&Plapack_nb_alg,PETSC_NULL);CHKERRQ(ierr);
2237       if (Plapack_nb_alg) {
2238         ierr = pla_Environ_set_nb_alg (PLA_OP_ALL_ALG,Plapack_nb_alg);CHKERRQ(ierr);
2239       }
2240     PetscOptionsEnd();
2241 
2242     ierr = PLA_Comm_1D_to_2D(comm,Plapack_nprows,Plapack_npcols,&Plapack_comm_2d);CHKERRQ(ierr);
2243     ierr = PLA_Init(Plapack_comm_2d);CHKERRQ(ierr);
2244     ierr = PetscRegisterFinalize(PetscPLAPACKFinalizePackage);CHKERRQ(ierr);
2245   }
2246   PetscFunctionReturn(0);
2247 }
2248 
2249 #endif
2250