xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 57715deb645f54fa2d0f094b2765b18b9347b2aa)
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4 */
5 
6 #include <../src/mat/impls/dense/mpi/mpidense.h>    /*I   "petscmat.h"  I*/
7 #include <../src/mat/impls/aij/mpi/mpiaij.h>
8 #include <petscblaslapack.h>
9 
10 /*@
11 
12       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
13               matrix that represents the operator. For sequential matrices it returns itself.
14 
15     Input Parameter:
16 .      A - the Seq or MPI dense matrix
17 
18     Output Parameter:
19 .      B - the inner matrix
20 
21     Level: intermediate
22 
23 @*/
24 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
25 {
26   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
27   PetscErrorCode ierr;
28   PetscBool      flg;
29 
30   PetscFunctionBegin;
31   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
32   if (flg) *B = mat->A;
33   else *B = A;
34   PetscFunctionReturn(0);
35 }
36 
37 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
38 {
39   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
40   PetscErrorCode ierr;
41   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
42 
43   PetscFunctionBegin;
44   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
45   lrow = row - rstart;
46   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
47   PetscFunctionReturn(0);
48 }
49 
50 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
51 {
52   PetscErrorCode ierr;
53 
54   PetscFunctionBegin;
55   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
56   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
57   PetscFunctionReturn(0);
58 }
59 
60 PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
61 {
62   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
63   PetscErrorCode ierr;
64   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
65   PetscScalar    *array;
66   MPI_Comm       comm;
67   Mat            B;
68 
69   PetscFunctionBegin;
70   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
71 
72   ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr);
73   if (!B) {
74     ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
75     ierr = MatCreate(comm,&B);CHKERRQ(ierr);
76     ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr);
77     ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
78     ierr = MatDenseGetArray(mdn->A,&array);CHKERRQ(ierr);
79     ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr);
80     ierr = MatDenseRestoreArray(mdn->A,&array);CHKERRQ(ierr);
81     ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
82     ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
83     ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr);
84     *a   = B;
85     ierr = MatDestroy(&B);CHKERRQ(ierr);
86   } else *a = B;
87   PetscFunctionReturn(0);
88 }
89 
90 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
91 {
92   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
93   PetscErrorCode ierr;
94   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
95   PetscBool      roworiented = A->roworiented;
96 
97   PetscFunctionBegin;
98   for (i=0; i<m; i++) {
99     if (idxm[i] < 0) continue;
100     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
101     if (idxm[i] >= rstart && idxm[i] < rend) {
102       row = idxm[i] - rstart;
103       if (roworiented) {
104         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
105       } else {
106         for (j=0; j<n; j++) {
107           if (idxn[j] < 0) continue;
108           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
109           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
110         }
111       }
112     } else if (!A->donotstash) {
113       mat->assembled = PETSC_FALSE;
114       if (roworiented) {
115         ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
116       } else {
117         ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
118       }
119     }
120   }
121   PetscFunctionReturn(0);
122 }
123 
124 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
125 {
126   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
127   PetscErrorCode ierr;
128   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
129 
130   PetscFunctionBegin;
131   for (i=0; i<m; i++) {
132     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
133     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
134     if (idxm[i] >= rstart && idxm[i] < rend) {
135       row = idxm[i] - rstart;
136       for (j=0; j<n; j++) {
137         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
138         if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
139         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
140       }
141     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
142   }
143   PetscFunctionReturn(0);
144 }
145 
146 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda)
147 {
148   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
149   PetscErrorCode ierr;
150 
151   PetscFunctionBegin;
152   ierr = MatDenseGetLDA(a->A,lda);CHKERRQ(ierr);
153   PetscFunctionReturn(0);
154 }
155 
156 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[])
157 {
158   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
159   PetscErrorCode ierr;
160 
161   PetscFunctionBegin;
162   ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr);
163   PetscFunctionReturn(0);
164 }
165 
166 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar *array[])
167 {
168   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
169   PetscErrorCode ierr;
170 
171   PetscFunctionBegin;
172   ierr = MatDenseGetArrayRead(a->A,array);CHKERRQ(ierr);
173   PetscFunctionReturn(0);
174 }
175 
176 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar array[])
177 {
178   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
179   PetscErrorCode ierr;
180 
181   PetscFunctionBegin;
182   ierr = MatDensePlaceArray(a->A,array);CHKERRQ(ierr);
183   PetscFunctionReturn(0);
184 }
185 
186 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
187 {
188   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
189   PetscErrorCode ierr;
190 
191   PetscFunctionBegin;
192   ierr = MatDenseResetArray(a->A);CHKERRQ(ierr);
193   PetscFunctionReturn(0);
194 }
195 
196 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
197 {
198   Mat_MPIDense   *mat  = (Mat_MPIDense*)A->data,*newmatd;
199   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
200   PetscErrorCode ierr;
201   PetscInt       i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
202   const PetscInt *irow,*icol;
203   PetscScalar    *av,*bv,*v = lmat->v;
204   Mat            newmat;
205   IS             iscol_local;
206   MPI_Comm       comm_is,comm_mat;
207 
208   PetscFunctionBegin;
209   ierr = PetscObjectGetComm((PetscObject)A,&comm_mat);CHKERRQ(ierr);
210   ierr = PetscObjectGetComm((PetscObject)iscol,&comm_is);CHKERRQ(ierr);
211   if (comm_mat != comm_is) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator");
212 
213   ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
214   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
215   ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr);
216   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
217   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
218   ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */
219 
220   /* No parallel redistribution currently supported! Should really check each index set
221      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
222      original matrix! */
223 
224   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
225   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
226 
227   /* Check submatrix call */
228   if (scall == MAT_REUSE_MATRIX) {
229     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
230     /* Really need to test rows and column sizes! */
231     newmat = *B;
232   } else {
233     /* Create and fill new matrix */
234     ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr);
235     ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
236     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
237     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
238   }
239 
240   /* Now extract the data pointers and do the copy, column at a time */
241   newmatd = (Mat_MPIDense*)newmat->data;
242   bv      = ((Mat_SeqDense*)newmatd->A->data)->v;
243 
244   for (i=0; i<Ncols; i++) {
245     av = v + ((Mat_SeqDense*)mat->A->data)->lda*icol[i];
246     for (j=0; j<nrows; j++) {
247       *bv++ = av[irow[j] - rstart];
248     }
249   }
250 
251   /* Assemble the matrices so that the correct flags are set */
252   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
253   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
254 
255   /* Free work space */
256   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
257   ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr);
258   ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
259   *B   = newmat;
260   PetscFunctionReturn(0);
261 }
262 
263 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[])
264 {
265   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
266   PetscErrorCode ierr;
267 
268   PetscFunctionBegin;
269   ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr);
270   PetscFunctionReturn(0);
271 }
272 
273 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar *array[])
274 {
275   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
276   PetscErrorCode ierr;
277 
278   PetscFunctionBegin;
279   ierr = MatDenseRestoreArrayRead(a->A,array);CHKERRQ(ierr);
280   PetscFunctionReturn(0);
281 }
282 
283 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
284 {
285   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
286   MPI_Comm       comm;
287   PetscErrorCode ierr;
288   PetscInt       nstash,reallocs;
289   InsertMode     addv;
290 
291   PetscFunctionBegin;
292   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
293   /* make sure all processors are either in INSERTMODE or ADDMODE */
294   ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,comm);CHKERRQ(ierr);
295   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
296   mat->insertmode = addv; /* in case this processor had no cache */
297 
298   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
299   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
300   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
301   PetscFunctionReturn(0);
302 }
303 
304 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
305 {
306   Mat_MPIDense   *mdn=(Mat_MPIDense*)mat->data;
307   PetscErrorCode ierr;
308   PetscInt       i,*row,*col,flg,j,rstart,ncols;
309   PetscMPIInt    n;
310   PetscScalar    *val;
311 
312   PetscFunctionBegin;
313   /*  wait on receives */
314   while (1) {
315     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
316     if (!flg) break;
317 
318     for (i=0; i<n;) {
319       /* Now identify the consecutive vals belonging to the same row */
320       for (j=i,rstart=row[j]; j<n; j++) {
321         if (row[j] != rstart) break;
322       }
323       if (j < n) ncols = j-i;
324       else       ncols = n-i;
325       /* Now assemble all these values with a single function call */
326       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
327       i    = j;
328     }
329   }
330   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
331 
332   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
333   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
334 
335   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
336     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
337   }
338   PetscFunctionReturn(0);
339 }
340 
341 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
342 {
343   PetscErrorCode ierr;
344   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
345 
346   PetscFunctionBegin;
347   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
348   PetscFunctionReturn(0);
349 }
350 
351 /* the code does not do the diagonal entries correctly unless the
352    matrix is square and the column and row owerships are identical.
353    This is a BUG. The only way to fix it seems to be to access
354    mdn->A and mdn->B directly and not through the MatZeroRows()
355    routine.
356 */
357 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
358 {
359   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
360   PetscErrorCode    ierr;
361   PetscInt          i,*owners = A->rmap->range;
362   PetscInt          *sizes,j,idx,nsends;
363   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
364   PetscInt          *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source;
365   PetscInt          *lens,*lrows,*values;
366   PetscMPIInt       n,imdex,rank = l->rank,size = l->size;
367   MPI_Comm          comm;
368   MPI_Request       *send_waits,*recv_waits;
369   MPI_Status        recv_status,*send_status;
370   PetscBool         found;
371   const PetscScalar *xx;
372   PetscScalar       *bb;
373 
374   PetscFunctionBegin;
375   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
376   if (A->rmap->N != A->cmap->N) SETERRQ(comm,PETSC_ERR_SUP,"Only handles square matrices");
377   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership");
378   /*  first count number of contributors to each processor */
379   ierr = PetscCalloc1(2*size,&sizes);CHKERRQ(ierr);
380   ierr = PetscMalloc1(N+1,&owner);CHKERRQ(ierr);  /* see note*/
381   for (i=0; i<N; i++) {
382     idx   = rows[i];
383     found = PETSC_FALSE;
384     for (j=0; j<size; j++) {
385       if (idx >= owners[j] && idx < owners[j+1]) {
386         sizes[2*j]++; sizes[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
387       }
388     }
389     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
390   }
391   nsends = 0;
392   for (i=0; i<size; i++) nsends += sizes[2*i+1];
393 
394   /* inform other processors of number of messages and max length*/
395   ierr = PetscMaxSum(comm,sizes,&nmax,&nrecvs);CHKERRQ(ierr);
396 
397   /* post receives:   */
398   ierr = PetscMalloc1((nrecvs+1)*(nmax+1),&rvalues);CHKERRQ(ierr);
399   ierr = PetscMalloc1(nrecvs+1,&recv_waits);CHKERRQ(ierr);
400   for (i=0; i<nrecvs; i++) {
401     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
402   }
403 
404   /* do sends:
405       1) starts[i] gives the starting index in svalues for stuff going to
406          the ith processor
407   */
408   ierr = PetscMalloc1(N+1,&svalues);CHKERRQ(ierr);
409   ierr = PetscMalloc1(nsends+1,&send_waits);CHKERRQ(ierr);
410   ierr = PetscMalloc1(size+1,&starts);CHKERRQ(ierr);
411 
412   starts[0] = 0;
413   for (i=1; i<size; i++) starts[i] = starts[i-1] + sizes[2*i-2];
414   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
415 
416   starts[0] = 0;
417   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + sizes[2*i-2];
418   count = 0;
419   for (i=0; i<size; i++) {
420     if (sizes[2*i+1]) {
421       ierr = MPI_Isend(svalues+starts[i],sizes[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
422     }
423   }
424   ierr = PetscFree(starts);CHKERRQ(ierr);
425 
426   base = owners[rank];
427 
428   /*  wait on receives */
429   ierr  = PetscMalloc2(nrecvs,&lens,nrecvs,&source);CHKERRQ(ierr);
430   count = nrecvs;
431   slen  = 0;
432   while (count) {
433     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
434     /* unpack receives into our local space */
435     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
436 
437     source[imdex] = recv_status.MPI_SOURCE;
438     lens[imdex]   = n;
439     slen += n;
440     count--;
441   }
442   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
443 
444   /* move the data into the send scatter */
445   ierr  = PetscMalloc1(slen+1,&lrows);CHKERRQ(ierr);
446   count = 0;
447   for (i=0; i<nrecvs; i++) {
448     values = rvalues + i*nmax;
449     for (j=0; j<lens[i]; j++) {
450       lrows[count++] = values[j] - base;
451     }
452   }
453   ierr = PetscFree(rvalues);CHKERRQ(ierr);
454   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
455   ierr = PetscFree(owner);CHKERRQ(ierr);
456   ierr = PetscFree(sizes);CHKERRQ(ierr);
457 
458   /* fix right hand side if needed */
459   if (x && b) {
460     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
461     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
462     for (i=0; i<slen; i++) {
463       bb[lrows[i]] = diag*xx[lrows[i]];
464     }
465     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
466     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
467   }
468 
469   /* actually zap the local rows */
470   ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
471   if (diag != 0.0) {
472     Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data;
473     PetscInt     m   = ll->lda, i;
474 
475     for (i=0; i<slen; i++) {
476       ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag;
477     }
478   }
479   ierr = PetscFree(lrows);CHKERRQ(ierr);
480 
481   /* wait on sends */
482   if (nsends) {
483     ierr = PetscMalloc1(nsends,&send_status);CHKERRQ(ierr);
484     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
485     ierr = PetscFree(send_status);CHKERRQ(ierr);
486   }
487   ierr = PetscFree(send_waits);CHKERRQ(ierr);
488   ierr = PetscFree(svalues);CHKERRQ(ierr);
489   PetscFunctionReturn(0);
490 }
491 
492 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec);
493 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec);
494 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec);
495 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec);
496 
497 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
498 {
499   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
500   PetscErrorCode ierr;
501 
502   PetscFunctionBegin;
503   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
504   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
505   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
506   PetscFunctionReturn(0);
507 }
508 
509 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
510 {
511   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
512   PetscErrorCode ierr;
513 
514   PetscFunctionBegin;
515   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
516   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
517   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
518   PetscFunctionReturn(0);
519 }
520 
521 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
522 {
523   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
524   PetscErrorCode ierr;
525   PetscScalar    zero = 0.0;
526 
527   PetscFunctionBegin;
528   ierr = VecSet(yy,zero);CHKERRQ(ierr);
529   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
530   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
531   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
532   PetscFunctionReturn(0);
533 }
534 
535 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
536 {
537   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
538   PetscErrorCode ierr;
539 
540   PetscFunctionBegin;
541   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
542   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
543   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
544   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
545   PetscFunctionReturn(0);
546 }
547 
548 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
549 {
550   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
551   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
552   PetscErrorCode ierr;
553   PetscInt       len,i,n,m = A->rmap->n,radd;
554   PetscScalar    *x,zero = 0.0;
555 
556   PetscFunctionBegin;
557   ierr = VecSet(v,zero);CHKERRQ(ierr);
558   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
559   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
560   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
561   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
562   radd = A->rmap->rstart*m;
563   for (i=0; i<len; i++) {
564     x[i] = aloc->v[radd + i*m + i];
565   }
566   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
567   PetscFunctionReturn(0);
568 }
569 
570 PetscErrorCode MatDestroy_MPIDense(Mat mat)
571 {
572   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
573   PetscErrorCode ierr;
574 
575   PetscFunctionBegin;
576 #if defined(PETSC_USE_LOG)
577   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
578 #endif
579   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
580   ierr = MatDestroy(&mdn->A);CHKERRQ(ierr);
581   ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr);
582   ierr = VecScatterDestroy(&mdn->Mvctx);CHKERRQ(ierr);
583 
584   ierr = PetscFree(mat->data);CHKERRQ(ierr);
585   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
586 
587   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL);CHKERRQ(ierr);
588   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr);
589   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr);
590   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL);CHKERRQ(ierr);
591   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL);CHKERRQ(ierr);
592   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);CHKERRQ(ierr);
593   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);CHKERRQ(ierr);
594 #if defined(PETSC_HAVE_ELEMENTAL)
595   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr);
596 #endif
597   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr);
598   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
599   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",NULL);CHKERRQ(ierr);
600   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
601   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
602   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_nest_mpidense_C",NULL);CHKERRQ(ierr);
603   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_nest_mpidense_C",NULL);CHKERRQ(ierr);
604   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
605   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
606   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL);CHKERRQ(ierr);
607   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL);CHKERRQ(ierr);
608   PetscFunctionReturn(0);
609 }
610 
611 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat,PetscViewer);
612 
613 #include <petscdraw.h>
614 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
615 {
616   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
617   PetscErrorCode    ierr;
618   PetscMPIInt       rank = mdn->rank;
619   PetscViewerType   vtype;
620   PetscBool         iascii,isdraw;
621   PetscViewer       sviewer;
622   PetscViewerFormat format;
623 
624   PetscFunctionBegin;
625   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
626   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
627   if (iascii) {
628     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
629     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
630     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
631       MatInfo info;
632       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
633       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
634       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
635       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
636       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
637       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
638       PetscFunctionReturn(0);
639     } else if (format == PETSC_VIEWER_ASCII_INFO) {
640       PetscFunctionReturn(0);
641     }
642   } else if (isdraw) {
643     PetscDraw draw;
644     PetscBool isnull;
645 
646     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
647     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
648     if (isnull) PetscFunctionReturn(0);
649   }
650 
651   {
652     /* assemble the entire matrix onto first processor. */
653     Mat         A;
654     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
655     PetscInt    *cols;
656     PetscScalar *vals;
657 
658     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
659     if (!rank) {
660       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
661     } else {
662       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
663     }
664     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
665     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
666     ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr);
667     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
668 
669     /* Copy the matrix ... This isn't the most efficient means,
670        but it's quick for now */
671     A->insertmode = INSERT_VALUES;
672 
673     row = mat->rmap->rstart;
674     m   = mdn->A->rmap->n;
675     for (i=0; i<m; i++) {
676       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
677       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
678       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
679       row++;
680     }
681 
682     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
683     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
684     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
685     if (!rank) {
686       ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
687       ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
688     }
689     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
690     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
691     ierr = MatDestroy(&A);CHKERRQ(ierr);
692   }
693   PetscFunctionReturn(0);
694 }
695 
696 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
697 {
698   PetscErrorCode ierr;
699   PetscBool      iascii,isbinary,isdraw,issocket;
700 
701   PetscFunctionBegin;
702   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
703   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
704   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
705   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
706 
707   if (iascii || issocket || isdraw) {
708     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
709   } else if (isbinary) {
710     ierr = MatView_Dense_Binary(mat,viewer);CHKERRQ(ierr);
711   }
712   PetscFunctionReturn(0);
713 }
714 
715 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
716 {
717   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
718   Mat            mdn  = mat->A;
719   PetscErrorCode ierr;
720   PetscLogDouble isend[5],irecv[5];
721 
722   PetscFunctionBegin;
723   info->block_size = 1.0;
724 
725   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
726 
727   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
728   isend[3] = info->memory;  isend[4] = info->mallocs;
729   if (flag == MAT_LOCAL) {
730     info->nz_used      = isend[0];
731     info->nz_allocated = isend[1];
732     info->nz_unneeded  = isend[2];
733     info->memory       = isend[3];
734     info->mallocs      = isend[4];
735   } else if (flag == MAT_GLOBAL_MAX) {
736     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
737 
738     info->nz_used      = irecv[0];
739     info->nz_allocated = irecv[1];
740     info->nz_unneeded  = irecv[2];
741     info->memory       = irecv[3];
742     info->mallocs      = irecv[4];
743   } else if (flag == MAT_GLOBAL_SUM) {
744     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
745 
746     info->nz_used      = irecv[0];
747     info->nz_allocated = irecv[1];
748     info->nz_unneeded  = irecv[2];
749     info->memory       = irecv[3];
750     info->mallocs      = irecv[4];
751   }
752   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
753   info->fill_ratio_needed = 0;
754   info->factor_mallocs    = 0;
755   PetscFunctionReturn(0);
756 }
757 
758 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
759 {
760   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
761   PetscErrorCode ierr;
762 
763   PetscFunctionBegin;
764   switch (op) {
765   case MAT_NEW_NONZERO_LOCATIONS:
766   case MAT_NEW_NONZERO_LOCATION_ERR:
767   case MAT_NEW_NONZERO_ALLOCATION_ERR:
768     MatCheckPreallocated(A,1);
769     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
770     break;
771   case MAT_ROW_ORIENTED:
772     MatCheckPreallocated(A,1);
773     a->roworiented = flg;
774     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
775     break;
776   case MAT_NEW_DIAGONALS:
777   case MAT_KEEP_NONZERO_PATTERN:
778   case MAT_USE_HASH_TABLE:
779   case MAT_SORTED_FULL:
780     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
781     break;
782   case MAT_IGNORE_OFF_PROC_ENTRIES:
783     a->donotstash = flg;
784     break;
785   case MAT_SYMMETRIC:
786   case MAT_STRUCTURALLY_SYMMETRIC:
787   case MAT_HERMITIAN:
788   case MAT_SYMMETRY_ETERNAL:
789   case MAT_IGNORE_LOWER_TRIANGULAR:
790   case MAT_IGNORE_ZERO_ENTRIES:
791     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
792     break;
793   default:
794     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
795   }
796   PetscFunctionReturn(0);
797 }
798 
799 
800 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
801 {
802   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
803   Mat_SeqDense      *mat = (Mat_SeqDense*)mdn->A->data;
804   const PetscScalar *l,*r;
805   PetscScalar       x,*v;
806   PetscErrorCode    ierr;
807   PetscInt          i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n;
808 
809   PetscFunctionBegin;
810   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
811   if (ll) {
812     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
813     if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
814     ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr);
815     for (i=0; i<m; i++) {
816       x = l[i];
817       v = mat->v + i;
818       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
819     }
820     ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr);
821     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
822   }
823   if (rr) {
824     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
825     if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
826     ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
827     ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
828     ierr = VecGetArrayRead(mdn->lvec,&r);CHKERRQ(ierr);
829     for (i=0; i<n; i++) {
830       x = r[i];
831       v = mat->v + i*m;
832       for (j=0; j<m; j++) (*v++) *= x;
833     }
834     ierr = VecRestoreArrayRead(mdn->lvec,&r);CHKERRQ(ierr);
835     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
836   }
837   PetscFunctionReturn(0);
838 }
839 
840 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
841 {
842   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
843   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
844   PetscErrorCode ierr;
845   PetscInt       i,j;
846   PetscReal      sum = 0.0;
847   PetscScalar    *v  = mat->v;
848 
849   PetscFunctionBegin;
850   if (mdn->size == 1) {
851     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
852   } else {
853     if (type == NORM_FROBENIUS) {
854       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
855         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
856       }
857       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
858       *nrm = PetscSqrtReal(*nrm);
859       ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr);
860     } else if (type == NORM_1) {
861       PetscReal *tmp,*tmp2;
862       ierr = PetscCalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr);
863       *nrm = 0.0;
864       v    = mat->v;
865       for (j=0; j<mdn->A->cmap->n; j++) {
866         for (i=0; i<mdn->A->rmap->n; i++) {
867           tmp[j] += PetscAbsScalar(*v);  v++;
868         }
869       }
870       ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
871       for (j=0; j<A->cmap->N; j++) {
872         if (tmp2[j] > *nrm) *nrm = tmp2[j];
873       }
874       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
875       ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr);
876     } else if (type == NORM_INFINITY) { /* max row norm */
877       PetscReal ntemp;
878       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
879       ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
880     } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
881   }
882   PetscFunctionReturn(0);
883 }
884 
885 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
886 {
887   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
888   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
889   Mat            B;
890   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
891   PetscErrorCode ierr;
892   PetscInt       j,i;
893   PetscScalar    *v;
894 
895   PetscFunctionBegin;
896   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
897     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
898     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
899     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
900     ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr);
901   } else {
902     B = *matout;
903   }
904 
905   m    = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v;
906   ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr);
907   for (i=0; i<m; i++) rwork[i] = rstart + i;
908   for (j=0; j<n; j++) {
909     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
910     v   += m;
911   }
912   ierr = PetscFree(rwork);CHKERRQ(ierr);
913   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
914   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
915   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
916     *matout = B;
917   } else {
918     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
919   }
920   PetscFunctionReturn(0);
921 }
922 
923 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
924 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
925 
926 PetscErrorCode MatSetUp_MPIDense(Mat A)
927 {
928   PetscErrorCode ierr;
929 
930   PetscFunctionBegin;
931   ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
932   ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
933   if (!A->preallocated) {
934     ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
935   }
936   PetscFunctionReturn(0);
937 }
938 
939 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
940 {
941   PetscErrorCode ierr;
942   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
943 
944   PetscFunctionBegin;
945   ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr);
946   ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
947   PetscFunctionReturn(0);
948 }
949 
950 PetscErrorCode  MatConjugate_MPIDense(Mat mat)
951 {
952   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
953   PetscErrorCode ierr;
954 
955   PetscFunctionBegin;
956   ierr = MatConjugate(a->A);CHKERRQ(ierr);
957   PetscFunctionReturn(0);
958 }
959 
960 PetscErrorCode MatRealPart_MPIDense(Mat A)
961 {
962   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
963   PetscErrorCode ierr;
964 
965   PetscFunctionBegin;
966   ierr = MatRealPart(a->A);CHKERRQ(ierr);
967   PetscFunctionReturn(0);
968 }
969 
970 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
971 {
972   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
973   PetscErrorCode ierr;
974 
975   PetscFunctionBegin;
976   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
977   PetscFunctionReturn(0);
978 }
979 
980 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col)
981 {
982   PetscErrorCode    ierr;
983   PetscScalar       *x;
984   const PetscScalar *a;
985   PetscInt          lda;
986 
987   PetscFunctionBegin;
988   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
989   ierr = MatDenseGetLDA(A,&lda);CHKERRQ(ierr);
990   ierr = MatDenseGetArrayRead(A,&a);CHKERRQ(ierr);
991   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
992   ierr = PetscArraycpy(x,a+col*lda,A->rmap->n);CHKERRQ(ierr);
993   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
994   ierr = MatDenseGetArrayRead(A,&a);CHKERRQ(ierr);
995   PetscFunctionReturn(0);
996 }
997 
998 PETSC_INTERN PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*);
999 
1000 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms)
1001 {
1002   PetscErrorCode ierr;
1003   PetscInt       i,n;
1004   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
1005   PetscReal      *work;
1006 
1007   PetscFunctionBegin;
1008   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
1009   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
1010   ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr);
1011   if (type == NORM_2) {
1012     for (i=0; i<n; i++) work[i] *= work[i];
1013   }
1014   if (type == NORM_INFINITY) {
1015     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
1016   } else {
1017     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
1018   }
1019   ierr = PetscFree(work);CHKERRQ(ierr);
1020   if (type == NORM_2) {
1021     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
1022   }
1023   PetscFunctionReturn(0);
1024 }
1025 
1026 static PetscErrorCode  MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1027 {
1028   Mat_MPIDense   *d = (Mat_MPIDense*)x->data;
1029   PetscErrorCode ierr;
1030   PetscScalar    *a;
1031   PetscInt       m,n,i;
1032 
1033   PetscFunctionBegin;
1034   ierr = MatGetSize(d->A,&m,&n);CHKERRQ(ierr);
1035   ierr = MatDenseGetArray(d->A,&a);CHKERRQ(ierr);
1036   for (i=0; i<m*n; i++) {
1037     ierr = PetscRandomGetValue(rctx,a+i);CHKERRQ(ierr);
1038   }
1039   ierr = MatDenseRestoreArray(d->A,&a);CHKERRQ(ierr);
1040   PetscFunctionReturn(0);
1041 }
1042 
1043 PETSC_INTERN PetscErrorCode MatMatMultNumeric_MPIDense(Mat A,Mat,Mat);
1044 
1045 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool  *missing,PetscInt *d)
1046 {
1047   PetscFunctionBegin;
1048   *missing = PETSC_FALSE;
1049   PetscFunctionReturn(0);
1050 }
1051 
1052 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat);
1053 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);
1054 
1055 /* -------------------------------------------------------------------*/
1056 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1057                                         MatGetRow_MPIDense,
1058                                         MatRestoreRow_MPIDense,
1059                                         MatMult_MPIDense,
1060                                 /*  4*/ MatMultAdd_MPIDense,
1061                                         MatMultTranspose_MPIDense,
1062                                         MatMultTransposeAdd_MPIDense,
1063                                         0,
1064                                         0,
1065                                         0,
1066                                 /* 10*/ 0,
1067                                         0,
1068                                         0,
1069                                         0,
1070                                         MatTranspose_MPIDense,
1071                                 /* 15*/ MatGetInfo_MPIDense,
1072                                         MatEqual_MPIDense,
1073                                         MatGetDiagonal_MPIDense,
1074                                         MatDiagonalScale_MPIDense,
1075                                         MatNorm_MPIDense,
1076                                 /* 20*/ MatAssemblyBegin_MPIDense,
1077                                         MatAssemblyEnd_MPIDense,
1078                                         MatSetOption_MPIDense,
1079                                         MatZeroEntries_MPIDense,
1080                                 /* 24*/ MatZeroRows_MPIDense,
1081                                         0,
1082                                         0,
1083                                         0,
1084                                         0,
1085                                 /* 29*/ MatSetUp_MPIDense,
1086                                         0,
1087                                         0,
1088                                         MatGetDiagonalBlock_MPIDense,
1089                                         0,
1090                                 /* 34*/ MatDuplicate_MPIDense,
1091                                         0,
1092                                         0,
1093                                         0,
1094                                         0,
1095                                 /* 39*/ MatAXPY_MPIDense,
1096                                         MatCreateSubMatrices_MPIDense,
1097                                         0,
1098                                         MatGetValues_MPIDense,
1099                                         0,
1100                                 /* 44*/ 0,
1101                                         MatScale_MPIDense,
1102                                         MatShift_Basic,
1103                                         0,
1104                                         0,
1105                                 /* 49*/ MatSetRandom_MPIDense,
1106                                         0,
1107                                         0,
1108                                         0,
1109                                         0,
1110                                 /* 54*/ 0,
1111                                         0,
1112                                         0,
1113                                         0,
1114                                         0,
1115                                 /* 59*/ MatCreateSubMatrix_MPIDense,
1116                                         MatDestroy_MPIDense,
1117                                         MatView_MPIDense,
1118                                         0,
1119                                         0,
1120                                 /* 64*/ 0,
1121                                         0,
1122                                         0,
1123                                         0,
1124                                         0,
1125                                 /* 69*/ 0,
1126                                         0,
1127                                         0,
1128                                         0,
1129                                         0,
1130                                 /* 74*/ 0,
1131                                         0,
1132                                         0,
1133                                         0,
1134                                         0,
1135                                 /* 79*/ 0,
1136                                         0,
1137                                         0,
1138                                         0,
1139                                 /* 83*/ MatLoad_MPIDense,
1140                                         0,
1141                                         0,
1142                                         0,
1143                                         0,
1144                                         0,
1145 #if defined(PETSC_HAVE_ELEMENTAL)
1146                                 /* 89*/ 0,
1147                                         0,
1148 #else
1149                                 /* 89*/ 0,
1150                                         0,
1151 #endif
1152                                         MatMatMultNumeric_MPIDense,
1153                                         0,
1154                                         0,
1155                                 /* 94*/ 0,
1156                                         0,
1157                                         0,
1158                                         MatMatTransposeMultNumeric_MPIDense_MPIDense,
1159                                         0,
1160                                 /* 99*/ MatProductSetFromOptions_MPIDense,
1161                                         0,
1162                                         0,
1163                                         MatConjugate_MPIDense,
1164                                         0,
1165                                 /*104*/ 0,
1166                                         MatRealPart_MPIDense,
1167                                         MatImaginaryPart_MPIDense,
1168                                         0,
1169                                         0,
1170                                 /*109*/ 0,
1171                                         0,
1172                                         0,
1173                                         MatGetColumnVector_MPIDense,
1174                                         MatMissingDiagonal_MPIDense,
1175                                 /*114*/ 0,
1176                                         0,
1177                                         0,
1178                                         0,
1179                                         0,
1180                                 /*119*/ 0,
1181                                         0,
1182                                         0,
1183                                         0,
1184                                         0,
1185                                 /*124*/ 0,
1186                                         MatGetColumnNorms_MPIDense,
1187                                         0,
1188                                         0,
1189                                         0,
1190                                 /*129*/ 0,
1191                                         0,
1192                                         0,
1193                                         MatTransposeMatMultNumeric_MPIDense_MPIDense,
1194                                         0,
1195                                 /*134*/ 0,
1196                                         0,
1197                                         0,
1198                                         0,
1199                                         0,
1200                                 /*139*/ 0,
1201                                         0,
1202                                         0,
1203                                         0,
1204                                         0,
1205                                         MatCreateMPIMatConcatenateSeqMat_MPIDense,
1206                                 /*145*/ 0,
1207                                         0,
1208                                         0
1209 };
1210 
1211 PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1212 {
1213   Mat_MPIDense   *a;
1214   PetscErrorCode ierr;
1215 
1216   PetscFunctionBegin;
1217   if (mat->preallocated) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Matrix has already been preallocated");
1218   mat->preallocated = PETSC_TRUE;
1219   /* Note:  For now, when data is specified above, this assumes the user correctly
1220    allocates the local dense storage space.  We should add error checking. */
1221 
1222   a       = (Mat_MPIDense*)mat->data;
1223   ierr    = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
1224   ierr    = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
1225   a->nvec = mat->cmap->n;
1226 
1227   ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1228   ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr);
1229   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
1230   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1231   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1232   PetscFunctionReturn(0);
1233 }
1234 
1235 #if defined(PETSC_HAVE_ELEMENTAL)
1236 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1237 {
1238   Mat            mat_elemental;
1239   PetscErrorCode ierr;
1240   PetscScalar    *v;
1241   PetscInt       m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols;
1242 
1243   PetscFunctionBegin;
1244   if (reuse == MAT_REUSE_MATRIX) {
1245     mat_elemental = *newmat;
1246     ierr = MatZeroEntries(*newmat);CHKERRQ(ierr);
1247   } else {
1248     ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr);
1249     ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1250     ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr);
1251     ierr = MatSetUp(mat_elemental);CHKERRQ(ierr);
1252     ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
1253   }
1254 
1255   ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr);
1256   for (i=0; i<N; i++) cols[i] = i;
1257   for (i=0; i<m; i++) rows[i] = rstart + i;
1258 
1259   /* PETSc-Elemental interaface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1260   ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr);
1261   ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr);
1262   ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1263   ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1264   ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr);
1265   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1266 
1267   if (reuse == MAT_INPLACE_MATRIX) {
1268     ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr);
1269   } else {
1270     *newmat = mat_elemental;
1271   }
1272   PetscFunctionReturn(0);
1273 }
1274 #endif
1275 
1276 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals)
1277 {
1278   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1279   PetscErrorCode ierr;
1280 
1281   PetscFunctionBegin;
1282   ierr = MatDenseGetColumn(mat->A,col,vals);CHKERRQ(ierr);
1283   PetscFunctionReturn(0);
1284 }
1285 
1286 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals)
1287 {
1288   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1289   PetscErrorCode ierr;
1290 
1291   PetscFunctionBegin;
1292   ierr = MatDenseRestoreColumn(mat->A,vals);CHKERRQ(ierr);
1293   PetscFunctionReturn(0);
1294 }
1295 
1296 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
1297 {
1298   PetscErrorCode ierr;
1299   Mat_MPIDense   *mat;
1300   PetscInt       m,nloc,N;
1301 
1302   PetscFunctionBegin;
1303   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
1304   ierr = MatGetLocalSize(inmat,NULL,&nloc);CHKERRQ(ierr);
1305   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1306     PetscInt sum;
1307 
1308     if (n == PETSC_DECIDE) {
1309       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
1310     }
1311     /* Check sum(n) = N */
1312     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
1313     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
1314 
1315     ierr = MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat);CHKERRQ(ierr);
1316   }
1317 
1318   /* numeric phase */
1319   mat = (Mat_MPIDense*)(*outmat)->data;
1320   ierr = MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1321   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1322   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1323   PetscFunctionReturn(0);
1324 }
1325 
1326 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1327 {
1328   Mat_MPIDense   *a;
1329   PetscErrorCode ierr;
1330 
1331   PetscFunctionBegin;
1332   ierr      = PetscNewLog(mat,&a);CHKERRQ(ierr);
1333   mat->data = (void*)a;
1334   ierr      = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1335 
1336   mat->insertmode = NOT_SET_VALUES;
1337   ierr            = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);CHKERRQ(ierr);
1338   ierr            = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);CHKERRQ(ierr);
1339 
1340   /* build cache for off array entries formed */
1341   a->donotstash = PETSC_FALSE;
1342 
1343   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr);
1344 
1345   /* stuff used for matrix vector multiply */
1346   a->lvec        = 0;
1347   a->Mvctx       = 0;
1348   a->roworiented = PETSC_TRUE;
1349 
1350   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense);CHKERRQ(ierr);
1351   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr);
1352   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr);
1353   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense);CHKERRQ(ierr);
1354   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense);CHKERRQ(ierr);
1355   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);CHKERRQ(ierr);
1356   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);CHKERRQ(ierr);
1357 #if defined(PETSC_HAVE_ELEMENTAL)
1358   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr);
1359 #endif
1360   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1361   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
1362   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
1363   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1364   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1365   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_nest_mpidense_C",MatMatMultSymbolic_Nest_Dense);CHKERRQ(ierr);
1366   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_nest_mpidense_C",MatMatMultNumeric_Nest_Dense);CHKERRQ(ierr);
1367 
1368   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",MatTransposeMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1369   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",MatTransposeMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1370   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense);CHKERRQ(ierr);
1371   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense);CHKERRQ(ierr);
1372   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1373   PetscFunctionReturn(0);
1374 }
1375 
1376 /*MC
1377    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1378 
1379    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1380    and MATMPIDENSE otherwise.
1381 
1382    Options Database Keys:
1383 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1384 
1385   Level: beginner
1386 
1387 
1388 .seealso: MATSEQDENSE,MATMPIDENSE
1389 M*/
1390 
1391 /*@C
1392    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1393 
1394    Not collective
1395 
1396    Input Parameters:
1397 .  B - the matrix
1398 -  data - optional location of matrix data.  Set data=NULL for PETSc
1399    to control all matrix memory allocation.
1400 
1401    Notes:
1402    The dense format is fully compatible with standard Fortran 77
1403    storage by columns.
1404 
1405    The data input variable is intended primarily for Fortran programmers
1406    who wish to allocate their own matrix memory space.  Most users should
1407    set data=NULL.
1408 
1409    Level: intermediate
1410 
1411 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1412 @*/
1413 PetscErrorCode  MatMPIDenseSetPreallocation(Mat B,PetscScalar *data)
1414 {
1415   PetscErrorCode ierr;
1416 
1417   PetscFunctionBegin;
1418   ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr);
1419   PetscFunctionReturn(0);
1420 }
1421 
1422 /*@
1423    MatDensePlaceArray - Allows one to replace the array in a dense array with an
1424    array provided by the user. This is useful to avoid copying an array
1425    into a matrix
1426 
1427    Not Collective
1428 
1429    Input Parameters:
1430 +  mat - the matrix
1431 -  array - the array in column major order
1432 
1433    Notes:
1434    You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be
1435    freed when the matrix is destroyed.
1436 
1437    Level: developer
1438 
1439 .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
1440 
1441 @*/
1442 PetscErrorCode  MatDensePlaceArray(Mat mat,const PetscScalar array[])
1443 {
1444   PetscErrorCode ierr;
1445   PetscFunctionBegin;
1446   ierr = PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
1447   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
1448   PetscFunctionReturn(0);
1449 }
1450 
1451 /*@
1452    MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray()
1453 
1454    Not Collective
1455 
1456    Input Parameters:
1457 .  mat - the matrix
1458 
1459    Notes:
1460    You can only call this after a call to MatDensePlaceArray()
1461 
1462    Level: developer
1463 
1464 .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
1465 
1466 @*/
1467 PetscErrorCode  MatDenseResetArray(Mat mat)
1468 {
1469   PetscErrorCode ierr;
1470   PetscFunctionBegin;
1471   ierr = PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));CHKERRQ(ierr);
1472   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
1473   PetscFunctionReturn(0);
1474 }
1475 
1476 /*@C
1477    MatCreateDense - Creates a parallel matrix in dense format.
1478 
1479    Collective
1480 
1481    Input Parameters:
1482 +  comm - MPI communicator
1483 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1484 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1485 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1486 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1487 -  data - optional location of matrix data.  Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1488    to control all matrix memory allocation.
1489 
1490    Output Parameter:
1491 .  A - the matrix
1492 
1493    Notes:
1494    The dense format is fully compatible with standard Fortran 77
1495    storage by columns.
1496 
1497    The data input variable is intended primarily for Fortran programmers
1498    who wish to allocate their own matrix memory space.  Most users should
1499    set data=NULL (PETSC_NULL_SCALAR for Fortran users).
1500 
1501    The user MUST specify either the local or global matrix dimensions
1502    (possibly both).
1503 
1504    Level: intermediate
1505 
1506 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1507 @*/
1508 PetscErrorCode  MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1509 {
1510   PetscErrorCode ierr;
1511   PetscMPIInt    size;
1512 
1513   PetscFunctionBegin;
1514   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1515   PetscValidLogicalCollectiveBool(*A,!!data,6);
1516   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1517   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1518   if (size > 1) {
1519     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1520     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1521     if (data) {  /* user provided data array, so no need to assemble */
1522       ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr);
1523       (*A)->assembled = PETSC_TRUE;
1524     }
1525   } else {
1526     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1527     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1528   }
1529   PetscFunctionReturn(0);
1530 }
1531 
1532 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1533 {
1534   Mat            mat;
1535   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
1536   PetscErrorCode ierr;
1537 
1538   PetscFunctionBegin;
1539   *newmat = 0;
1540   ierr    = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr);
1541   ierr    = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1542   ierr    = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1543   a       = (Mat_MPIDense*)mat->data;
1544 
1545   mat->factortype   = A->factortype;
1546   mat->assembled    = PETSC_TRUE;
1547   mat->preallocated = PETSC_TRUE;
1548 
1549   a->size         = oldmat->size;
1550   a->rank         = oldmat->rank;
1551   mat->insertmode = NOT_SET_VALUES;
1552   a->nvec         = oldmat->nvec;
1553   a->donotstash   = oldmat->donotstash;
1554 
1555   ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr);
1556   ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr);
1557 
1558   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1559   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1560   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1561 
1562   *newmat = mat;
1563   PetscFunctionReturn(0);
1564 }
1565 
1566 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
1567 {
1568   PetscErrorCode ierr;
1569   PetscBool      isbinary;
1570 #if defined(PETSC_HAVE_HDF5)
1571   PetscBool      ishdf5;
1572 #endif
1573 
1574   PetscFunctionBegin;
1575   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
1576   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
1577   /* force binary viewer to load .info file if it has not yet done so */
1578   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1579   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1580 #if defined(PETSC_HAVE_HDF5)
1581   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
1582 #endif
1583   if (isbinary) {
1584     ierr = MatLoad_Dense_Binary(newMat,viewer);CHKERRQ(ierr);
1585 #if defined(PETSC_HAVE_HDF5)
1586   } else if (ishdf5) {
1587     ierr = MatLoad_Dense_HDF5(newMat,viewer);CHKERRQ(ierr);
1588 #endif
1589   } else SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
1590   PetscFunctionReturn(0);
1591 }
1592 
1593 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool  *flag)
1594 {
1595   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
1596   Mat            a,b;
1597   PetscBool      flg;
1598   PetscErrorCode ierr;
1599 
1600   PetscFunctionBegin;
1601   a    = matA->A;
1602   b    = matB->A;
1603   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
1604   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1605   PetscFunctionReturn(0);
1606 }
1607 
1608 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(Mat A)
1609 {
1610   PetscErrorCode        ierr;
1611   Mat_MPIDense          *a = (Mat_MPIDense*)A->data;
1612   Mat_TransMatMultDense *atb = a->atbdense;
1613 
1614   PetscFunctionBegin;
1615   ierr = PetscFree3(atb->sendbuf,atb->atbarray,atb->recvcounts);CHKERRQ(ierr);
1616   ierr = (atb->destroy)(A);CHKERRQ(ierr);
1617   ierr = PetscFree(atb);CHKERRQ(ierr);
1618   PetscFunctionReturn(0);
1619 }
1620 
1621 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(Mat A)
1622 {
1623   PetscErrorCode        ierr;
1624   Mat_MPIDense          *a = (Mat_MPIDense*)A->data;
1625   Mat_MatTransMultDense *abt = a->abtdense;
1626 
1627   PetscFunctionBegin;
1628   ierr = PetscFree2(abt->buf[0],abt->buf[1]);CHKERRQ(ierr);
1629   ierr = PetscFree2(abt->recvcounts,abt->recvdispls);CHKERRQ(ierr);
1630   ierr = (abt->destroy)(A);CHKERRQ(ierr);
1631   ierr = PetscFree(abt);CHKERRQ(ierr);
1632   PetscFunctionReturn(0);
1633 }
1634 
1635 PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1636 {
1637   Mat_MPIDense   *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
1638   Mat_SeqDense   *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data;
1639   Mat_TransMatMultDense *atb = c->atbdense;
1640   PetscErrorCode ierr;
1641   MPI_Comm       comm;
1642   PetscMPIInt    rank,size,*recvcounts=atb->recvcounts;
1643   PetscScalar    *carray,*atbarray=atb->atbarray,*sendbuf=atb->sendbuf;
1644   PetscInt       i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j;
1645   PetscScalar    _DOne=1.0,_DZero=0.0;
1646   PetscBLASInt   am,an,bn,aN;
1647   const PetscInt *ranges;
1648 
1649   PetscFunctionBegin;
1650   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1651   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1652   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1653 
1654   /* compute atbarray = aseq^T * bseq */
1655   ierr = PetscBLASIntCast(a->A->cmap->n,&an);CHKERRQ(ierr);
1656   ierr = PetscBLASIntCast(b->A->cmap->n,&bn);CHKERRQ(ierr);
1657   ierr = PetscBLASIntCast(a->A->rmap->n,&am);CHKERRQ(ierr);
1658   ierr = PetscBLASIntCast(A->cmap->N,&aN);CHKERRQ(ierr);
1659   PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&an,&bn,&am,&_DOne,aseq->v,&aseq->lda,bseq->v,&bseq->lda,&_DZero,atbarray,&aN));
1660 
1661   ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr);
1662   for (i=0; i<size; i++) recvcounts[i] = (ranges[i+1] - ranges[i])*cN;
1663 
1664   /* arrange atbarray into sendbuf */
1665   k = 0;
1666   for (proc=0; proc<size; proc++) {
1667     for (j=0; j<cN; j++) {
1668       for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM];
1669     }
1670   }
1671   /* sum all atbarray to local values of C */
1672   ierr = MatDenseGetArray(c->A,&carray);CHKERRQ(ierr);
1673   ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRQ(ierr);
1674   ierr = MatDenseRestoreArray(c->A,&carray);CHKERRQ(ierr);
1675   PetscFunctionReturn(0);
1676 }
1677 
1678 PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)
1679 {
1680   PetscErrorCode        ierr;
1681   MPI_Comm              comm;
1682   PetscMPIInt           size;
1683   PetscInt              cm=A->cmap->n,cM,cN=B->cmap->N;
1684   Mat_MPIDense          *c;
1685   Mat_TransMatMultDense *atb;
1686 
1687   PetscFunctionBegin;
1688   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1689   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
1690     SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
1691   }
1692 
1693   /* create matrix product C */
1694   ierr = MatSetSizes(C,cm,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
1695   ierr = MatSetType(C,MATMPIDENSE);CHKERRQ(ierr);
1696   ierr = MatSetUp(C);CHKERRQ(ierr);
1697   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1698   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1699 
1700   /* create data structure for reuse C */
1701   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1702   ierr = PetscNew(&atb);CHKERRQ(ierr);
1703   cM = C->rmap->N;
1704   ierr = PetscMalloc3(cM*cN,&atb->sendbuf,cM*cN,&atb->atbarray,size,&atb->recvcounts);CHKERRQ(ierr);
1705 
1706   c                    = (Mat_MPIDense*)C->data;
1707   c->atbdense          = atb;
1708   atb->destroy         = C->ops->destroy;
1709   C->ops->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
1710   PetscFunctionReturn(0);
1711 }
1712 
1713 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
1714 {
1715   PetscErrorCode        ierr;
1716   MPI_Comm              comm;
1717   PetscMPIInt           i, size;
1718   PetscInt              maxRows, bufsiz;
1719   Mat_MPIDense          *c;
1720   PetscMPIInt           tag;
1721   PetscInt              alg;
1722   Mat_MatTransMultDense *abt;
1723   Mat_Product           *product = C->product;
1724   PetscBool             flg;
1725 
1726   PetscFunctionBegin;
1727   /* check local size of A and B */
1728   if (A->cmap->n != B->cmap->n) {
1729     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local column dimensions are incompatible, A (%D) != B (%D)",A->cmap->n,B->cmap->n);
1730   }
1731 
1732   ierr = PetscStrcmp(product->alg,"allgatherv",&flg);CHKERRQ(ierr);
1733   if (flg) {
1734     alg = 0;
1735   } else alg = 1;
1736 
1737   /* setup matrix product C */
1738   ierr = MatSetSizes(C,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N);CHKERRQ(ierr);
1739   ierr = MatSetType(C,MATMPIDENSE);CHKERRQ(ierr);
1740   ierr = MatSetUp(C);CHKERRQ(ierr);
1741   ierr = PetscObjectGetNewTag((PetscObject)C, &tag);CHKERRQ(ierr);
1742   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1743   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1744 
1745   /* create data structure for reuse C */
1746   ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr);
1747   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1748   ierr = PetscNew(&abt);CHKERRQ(ierr);
1749   abt->tag = tag;
1750   abt->alg = alg;
1751   switch (alg) {
1752   case 1: /* alg: "cyclic" */
1753     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i]));
1754     bufsiz = A->cmap->N * maxRows;
1755     ierr = PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1]));CHKERRQ(ierr);
1756     break;
1757   default: /* alg: "allgatherv" */
1758     ierr = PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]));CHKERRQ(ierr);
1759     ierr = PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls));CHKERRQ(ierr);
1760     for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N;
1761     for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i];
1762     break;
1763   }
1764 
1765   c                               = (Mat_MPIDense*)C->data;
1766   c->abtdense                     = abt;
1767   abt->destroy                    = C->ops->destroy;
1768   C->ops->destroy                 = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
1769   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense;
1770   PetscFunctionReturn(0);
1771 }
1772 
1773 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
1774 {
1775   Mat_MPIDense   *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
1776   Mat_SeqDense   *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data;
1777   Mat_SeqDense   *cseq=(Mat_SeqDense*)(c->A)->data;
1778   Mat_MatTransMultDense *abt = c->abtdense;
1779   PetscErrorCode ierr;
1780   MPI_Comm       comm;
1781   PetscMPIInt    rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom;
1782   PetscScalar    *sendbuf, *recvbuf=0, *carray;
1783   PetscInt       i,cK=A->cmap->N,k,j,bn;
1784   PetscScalar    _DOne=1.0,_DZero=0.0;
1785   PetscBLASInt   cm, cn, ck;
1786   MPI_Request    reqs[2];
1787   const PetscInt *ranges;
1788 
1789   PetscFunctionBegin;
1790   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1791   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1792   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1793 
1794   ierr = MatGetOwnershipRanges(B,&ranges);CHKERRQ(ierr);
1795   bn = B->rmap->n;
1796   if (bseq->lda == bn) {
1797     sendbuf = bseq->v;
1798   } else {
1799     sendbuf = abt->buf[0];
1800     for (k = 0, i = 0; i < cK; i++) {
1801       for (j = 0; j < bn; j++, k++) {
1802         sendbuf[k] = bseq->v[i * bseq->lda + j];
1803       }
1804     }
1805   }
1806   if (size > 1) {
1807     sendto = (rank + size - 1) % size;
1808     recvfrom = (rank + size + 1) % size;
1809   } else {
1810     sendto = recvfrom = 0;
1811   }
1812   ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr);
1813   ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr);
1814   recvisfrom = rank;
1815   for (i = 0; i < size; i++) {
1816     /* we have finished receiving in sending, bufs can be read/modified */
1817     PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
1818     PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
1819 
1820     if (nextrecvisfrom != rank) {
1821       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
1822       sendsiz = cK * bn;
1823       recvsiz = cK * nextbn;
1824       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
1825       ierr = MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]);CHKERRQ(ierr);
1826       ierr = MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]);CHKERRQ(ierr);
1827     }
1828 
1829     /* local aseq * sendbuf^T */
1830     ierr = PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn);CHKERRQ(ierr);
1831     carray = &cseq->v[cseq->lda * ranges[recvisfrom]];
1832     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,aseq->v,&aseq->lda,sendbuf,&cn,&_DZero,carray,&cseq->lda));
1833 
1834     if (nextrecvisfrom != rank) {
1835       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
1836       ierr = MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE);CHKERRQ(ierr);
1837     }
1838     bn = nextbn;
1839     recvisfrom = nextrecvisfrom;
1840     sendbuf = recvbuf;
1841   }
1842   PetscFunctionReturn(0);
1843 }
1844 
1845 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
1846 {
1847   Mat_MPIDense   *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
1848   Mat_SeqDense   *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data;
1849   Mat_SeqDense   *cseq=(Mat_SeqDense*)(c->A)->data;
1850   Mat_MatTransMultDense *abt = c->abtdense;
1851   PetscErrorCode ierr;
1852   MPI_Comm       comm;
1853   PetscMPIInt    rank,size;
1854   PetscScalar    *sendbuf, *recvbuf;
1855   PetscInt       i,cK=A->cmap->N,k,j,bn;
1856   PetscScalar    _DOne=1.0,_DZero=0.0;
1857   PetscBLASInt   cm, cn, ck;
1858 
1859   PetscFunctionBegin;
1860   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1861   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1862   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1863 
1864   /* copy transpose of B into buf[0] */
1865   bn      = B->rmap->n;
1866   sendbuf = abt->buf[0];
1867   recvbuf = abt->buf[1];
1868   for (k = 0, j = 0; j < bn; j++) {
1869     for (i = 0; i < cK; i++, k++) {
1870       sendbuf[k] = bseq->v[i * bseq->lda + j];
1871     }
1872   }
1873   ierr = MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm);CHKERRQ(ierr);
1874   ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr);
1875   ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr);
1876   ierr = PetscBLASIntCast(c->A->cmap->n,&cn);CHKERRQ(ierr);
1877   PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,aseq->v,&aseq->lda,recvbuf,&ck,&_DZero,cseq->v,&cseq->lda));
1878   PetscFunctionReturn(0);
1879 }
1880 
1881 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
1882 {
1883   Mat_MPIDense          *c=(Mat_MPIDense*)C->data;
1884   Mat_MatTransMultDense *abt = c->abtdense;
1885   PetscErrorCode        ierr;
1886 
1887   PetscFunctionBegin;
1888   switch (abt->alg) {
1889   case 1:
1890     ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C);CHKERRQ(ierr);
1891     break;
1892   default:
1893     ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C);CHKERRQ(ierr);
1894     break;
1895   }
1896   PetscFunctionReturn(0);
1897 }
1898 
1899 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(Mat A)
1900 {
1901   PetscErrorCode   ierr;
1902   Mat_MPIDense     *a = (Mat_MPIDense*)A->data;
1903   Mat_MatMultDense *ab = a->abdense;
1904 
1905   PetscFunctionBegin;
1906   ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr);
1907   ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr);
1908   ierr = MatDestroy(&ab->Be);CHKERRQ(ierr);
1909 
1910   ierr = (ab->destroy)(A);CHKERRQ(ierr);
1911   ierr = PetscFree(ab);CHKERRQ(ierr);
1912   PetscFunctionReturn(0);
1913 }
1914 
1915 #if defined(PETSC_HAVE_ELEMENTAL)
1916 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1917 {
1918   PetscErrorCode   ierr;
1919   Mat_MPIDense     *c=(Mat_MPIDense*)C->data;
1920   Mat_MatMultDense *ab=c->abdense;
1921 
1922   PetscFunctionBegin;
1923   ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr);
1924   ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr);
1925   ierr = MatMatMultNumeric_Elemental(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr);
1926   ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
1927   PetscFunctionReturn(0);
1928 }
1929 
1930 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)
1931 {
1932   PetscErrorCode   ierr;
1933   Mat              Ae,Be,Ce;
1934   Mat_MPIDense     *c;
1935   Mat_MatMultDense *ab;
1936 
1937   PetscFunctionBegin;
1938   /* check local size of A and B */
1939   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
1940     SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
1941   }
1942 
1943   /* create elemental matrices Ae and Be */
1944   ierr = MatCreate(PetscObjectComm((PetscObject)A), &Ae);CHKERRQ(ierr);
1945   ierr = MatSetSizes(Ae,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1946   ierr = MatSetType(Ae,MATELEMENTAL);CHKERRQ(ierr);
1947   ierr = MatSetUp(Ae);CHKERRQ(ierr);
1948   ierr = MatSetOption(Ae,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
1949 
1950   ierr = MatCreate(PetscObjectComm((PetscObject)B), &Be);CHKERRQ(ierr);
1951   ierr = MatSetSizes(Be,PETSC_DECIDE,PETSC_DECIDE,B->rmap->N,B->cmap->N);CHKERRQ(ierr);
1952   ierr = MatSetType(Be,MATELEMENTAL);CHKERRQ(ierr);
1953   ierr = MatSetUp(Be);CHKERRQ(ierr);
1954   ierr = MatSetOption(Be,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
1955 
1956   /* compute symbolic Ce = Ae*Be */
1957   ierr = MatCreate(PetscObjectComm((PetscObject)C),&Ce);CHKERRQ(ierr);
1958   ierr = MatMatMultSymbolic_Elemental(Ae,Be,fill,Ce);CHKERRQ(ierr);
1959 
1960   /* setup C */
1961   ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
1962   ierr = MatSetType(C,MATDENSE);CHKERRQ(ierr);
1963   ierr = MatSetUp(C);CHKERRQ(ierr);
1964 
1965   /* create data structure for reuse Cdense */
1966   ierr = PetscNew(&ab);CHKERRQ(ierr);
1967   c                  = (Mat_MPIDense*)C->data;
1968   c->abdense         = ab;
1969 
1970   ab->Ae             = Ae;
1971   ab->Be             = Be;
1972   ab->Ce             = Ce;
1973   ab->destroy        = C->ops->destroy;
1974   C->ops->destroy        = MatDestroy_MatMatMult_MPIDense_MPIDense;
1975   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
1976   C->ops->productnumeric = MatProductNumeric_AB;
1977   PetscFunctionReturn(0);
1978 }
1979 #endif
1980 /* ----------------------------------------------- */
1981 #if defined(PETSC_HAVE_ELEMENTAL)
1982 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C)
1983 {
1984   PetscFunctionBegin;
1985   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense;
1986   C->ops->productsymbolic = MatProductSymbolic_AB;
1987   C->ops->productnumeric  = MatProductNumeric_AB;
1988   PetscFunctionReturn(0);
1989 }
1990 #endif
1991 
1992 static PetscErrorCode MatProductSymbolic_AtB_MPIDense(Mat C)
1993 {
1994   PetscErrorCode ierr;
1995   Mat_Product    *product = C->product;
1996 
1997   PetscFunctionBegin;
1998   ierr = MatTransposeMatMultSymbolic_MPIDense_MPIDense(product->A,product->B,product->fill,C);CHKERRQ(ierr);
1999   C->ops->productnumeric          = MatProductNumeric_AtB;
2000   C->ops->transposematmultnumeric = MatTransposeMatMultNumeric_MPIDense_MPIDense;
2001   PetscFunctionReturn(0);
2002 }
2003 
2004 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C)
2005 {
2006   Mat_Product *product = C->product;
2007   Mat         A = product->A,B=product->B;
2008 
2009   PetscFunctionBegin;
2010   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend)
2011     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2012 
2013   C->ops->productsymbolic = MatProductSymbolic_AtB_MPIDense;
2014   PetscFunctionReturn(0);
2015 }
2016 
2017 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C)
2018 {
2019   PetscErrorCode ierr;
2020   Mat_Product    *product = C->product;
2021   const char     *algTypes[2] = {"allgatherv","cyclic"};
2022   PetscInt       alg,nalg = 2;
2023   PetscBool      flg = PETSC_FALSE;
2024 
2025   PetscFunctionBegin;
2026   /* Set default algorithm */
2027   alg = 0; /* default is allgatherv */
2028   ierr = PetscStrcmp(product->alg,"default",&flg);CHKERRQ(ierr);
2029   if (flg) {
2030     ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr);
2031   }
2032 
2033   /* Get runtime option */
2034   if (product->api_user) {
2035     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatTransposeMult","Mat");CHKERRQ(ierr);
2036     ierr = PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr);
2037     ierr = PetscOptionsEnd();CHKERRQ(ierr);
2038   } else {
2039     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABt","Mat");CHKERRQ(ierr);
2040     ierr = PetscOptionsEList("-matproduct_abt_mpidense_mpidense_via","Algorithmic approach","MatProduct_ABt",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr);
2041     ierr = PetscOptionsEnd();CHKERRQ(ierr);
2042   }
2043   if (flg) {
2044     ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr);
2045   }
2046 
2047   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense;
2048   C->ops->productsymbolic          = MatProductSymbolic_ABt;
2049   C->ops->productnumeric           = MatProductNumeric_ABt;
2050   PetscFunctionReturn(0);
2051 }
2052 
2053 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C)
2054 {
2055   PetscErrorCode ierr;
2056   Mat_Product    *product = C->product;
2057 
2058   PetscFunctionBegin;
2059   switch (product->type) {
2060 #if defined(PETSC_HAVE_ELEMENTAL)
2061   case MATPRODUCT_AB:
2062     ierr = MatProductSetFromOptions_MPIDense_AB(C);CHKERRQ(ierr);
2063     break;
2064 #endif
2065   case MATPRODUCT_AtB:
2066     ierr = MatProductSetFromOptions_MPIDense_AtB(C);CHKERRQ(ierr);
2067     break;
2068   case MATPRODUCT_ABt:
2069     ierr = MatProductSetFromOptions_MPIDense_ABt(C);CHKERRQ(ierr);
2070     break;
2071   default: SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"MatProduct type %s is not supported for MPIDense and MPIDense matrices",MatProductTypes[product->type]);
2072   }
2073   PetscFunctionReturn(0);
2074 }
2075