xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 50c5671dffa46513473c2503d649ca60c7ec5643)
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4 */
5 
6 #include <../src/mat/impls/dense/mpi/mpidense.h>    /*I   "petscmat.h"  I*/
7 #include <../src/mat/impls/aij/mpi/mpiaij.h>
8 #include <petscblaslapack.h>
9 
10 /*@
11 
12       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
13               matrix that represents the operator. For sequential matrices it returns itself.
14 
15     Input Parameter:
16 .      A - the Seq or MPI dense matrix
17 
18     Output Parameter:
19 .      B - the inner matrix
20 
21     Level: intermediate
22 
23 @*/
24 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
25 {
26   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
27   PetscErrorCode ierr;
28   PetscBool      flg;
29 
30   PetscFunctionBegin;
31   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
32   if (flg) *B = mat->A;
33   else *B = A;
34   PetscFunctionReturn(0);
35 }
36 
37 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
38 {
39   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
40   PetscErrorCode ierr;
41   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
42 
43   PetscFunctionBegin;
44   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
45   lrow = row - rstart;
46   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
47   PetscFunctionReturn(0);
48 }
49 
50 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
51 {
52   PetscErrorCode ierr;
53 
54   PetscFunctionBegin;
55   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
56   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
57   PetscFunctionReturn(0);
58 }
59 
60 PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
61 {
62   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
63   PetscErrorCode ierr;
64   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
65   PetscScalar    *array;
66   MPI_Comm       comm;
67   Mat            B;
68 
69   PetscFunctionBegin;
70   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
71 
72   ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr);
73   if (!B) {
74     ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
75     ierr = MatCreate(comm,&B);CHKERRQ(ierr);
76     ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr);
77     ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
78     ierr = MatDenseGetArray(mdn->A,&array);CHKERRQ(ierr);
79     ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr);
80     ierr = MatDenseRestoreArray(mdn->A,&array);CHKERRQ(ierr);
81     ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
82     ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
83     ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr);
84     *a   = B;
85     ierr = MatDestroy(&B);CHKERRQ(ierr);
86   } else *a = B;
87   PetscFunctionReturn(0);
88 }
89 
90 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
91 {
92   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
93   PetscErrorCode ierr;
94   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
95   PetscBool      roworiented = A->roworiented;
96 
97   PetscFunctionBegin;
98   for (i=0; i<m; i++) {
99     if (idxm[i] < 0) continue;
100     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
101     if (idxm[i] >= rstart && idxm[i] < rend) {
102       row = idxm[i] - rstart;
103       if (roworiented) {
104         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
105       } else {
106         for (j=0; j<n; j++) {
107           if (idxn[j] < 0) continue;
108           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
109           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
110         }
111       }
112     } else if (!A->donotstash) {
113       mat->assembled = PETSC_FALSE;
114       if (roworiented) {
115         ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
116       } else {
117         ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
118       }
119     }
120   }
121   PetscFunctionReturn(0);
122 }
123 
124 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
125 {
126   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
127   PetscErrorCode ierr;
128   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
129 
130   PetscFunctionBegin;
131   for (i=0; i<m; i++) {
132     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
133     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
134     if (idxm[i] >= rstart && idxm[i] < rend) {
135       row = idxm[i] - rstart;
136       for (j=0; j<n; j++) {
137         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
138         if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
139         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
140       }
141     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
142   }
143   PetscFunctionReturn(0);
144 }
145 
146 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda)
147 {
148   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
149   PetscErrorCode ierr;
150 
151   PetscFunctionBegin;
152   ierr = MatDenseGetLDA(a->A,lda);CHKERRQ(ierr);
153   PetscFunctionReturn(0);
154 }
155 
156 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[])
157 {
158   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
159   PetscErrorCode ierr;
160 
161   PetscFunctionBegin;
162   ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr);
163   PetscFunctionReturn(0);
164 }
165 
166 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar *array[])
167 {
168   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
169   PetscErrorCode ierr;
170 
171   PetscFunctionBegin;
172   ierr = MatDenseGetArrayRead(a->A,array);CHKERRQ(ierr);
173   PetscFunctionReturn(0);
174 }
175 
176 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar array[])
177 {
178   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
179   PetscErrorCode ierr;
180 
181   PetscFunctionBegin;
182   ierr = MatDensePlaceArray(a->A,array);CHKERRQ(ierr);
183   PetscFunctionReturn(0);
184 }
185 
186 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
187 {
188   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
189   PetscErrorCode ierr;
190 
191   PetscFunctionBegin;
192   ierr = MatDenseResetArray(a->A);CHKERRQ(ierr);
193   PetscFunctionReturn(0);
194 }
195 
196 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
197 {
198   Mat_MPIDense   *mat  = (Mat_MPIDense*)A->data,*newmatd;
199   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
200   PetscErrorCode ierr;
201   PetscInt       i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
202   const PetscInt *irow,*icol;
203   PetscScalar    *av,*bv,*v = lmat->v;
204   Mat            newmat;
205   IS             iscol_local;
206   MPI_Comm       comm_is,comm_mat;
207 
208   PetscFunctionBegin;
209   ierr = PetscObjectGetComm((PetscObject)A,&comm_mat);CHKERRQ(ierr);
210   ierr = PetscObjectGetComm((PetscObject)iscol,&comm_is);CHKERRQ(ierr);
211   if (comm_mat != comm_is) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator");
212 
213   ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
214   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
215   ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr);
216   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
217   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
218   ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */
219 
220   /* No parallel redistribution currently supported! Should really check each index set
221      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
222      original matrix! */
223 
224   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
225   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
226 
227   /* Check submatrix call */
228   if (scall == MAT_REUSE_MATRIX) {
229     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
230     /* Really need to test rows and column sizes! */
231     newmat = *B;
232   } else {
233     /* Create and fill new matrix */
234     ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr);
235     ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
236     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
237     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
238   }
239 
240   /* Now extract the data pointers and do the copy, column at a time */
241   newmatd = (Mat_MPIDense*)newmat->data;
242   bv      = ((Mat_SeqDense*)newmatd->A->data)->v;
243 
244   for (i=0; i<Ncols; i++) {
245     av = v + ((Mat_SeqDense*)mat->A->data)->lda*icol[i];
246     for (j=0; j<nrows; j++) {
247       *bv++ = av[irow[j] - rstart];
248     }
249   }
250 
251   /* Assemble the matrices so that the correct flags are set */
252   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
253   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
254 
255   /* Free work space */
256   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
257   ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr);
258   ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
259   *B   = newmat;
260   PetscFunctionReturn(0);
261 }
262 
263 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[])
264 {
265   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
266   PetscErrorCode ierr;
267 
268   PetscFunctionBegin;
269   ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr);
270   PetscFunctionReturn(0);
271 }
272 
273 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar *array[])
274 {
275   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
276   PetscErrorCode ierr;
277 
278   PetscFunctionBegin;
279   ierr = MatDenseRestoreArrayRead(a->A,array);CHKERRQ(ierr);
280   PetscFunctionReturn(0);
281 }
282 
283 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
284 {
285   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
286   PetscErrorCode ierr;
287   PetscInt       nstash,reallocs;
288 
289   PetscFunctionBegin;
290   if (mdn->donotstash || mat->nooffprocentries) return(0);
291 
292   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
293   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
294   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
295   PetscFunctionReturn(0);
296 }
297 
298 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
299 {
300   Mat_MPIDense   *mdn=(Mat_MPIDense*)mat->data;
301   PetscErrorCode ierr;
302   PetscInt       i,*row,*col,flg,j,rstart,ncols;
303   PetscMPIInt    n;
304   PetscScalar    *val;
305 
306   PetscFunctionBegin;
307   if (!mdn->donotstash && !mat->nooffprocentries) {
308     /*  wait on receives */
309     while (1) {
310       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
311       if (!flg) break;
312 
313       for (i=0; i<n;) {
314         /* Now identify the consecutive vals belonging to the same row */
315         for (j=i,rstart=row[j]; j<n; j++) {
316           if (row[j] != rstart) break;
317         }
318         if (j < n) ncols = j-i;
319         else       ncols = n-i;
320         /* Now assemble all these values with a single function call */
321         ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
322         i    = j;
323       }
324     }
325     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
326   }
327 
328   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
329   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
330 
331   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
332     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
333   }
334   PetscFunctionReturn(0);
335 }
336 
337 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
338 {
339   PetscErrorCode ierr;
340   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
341 
342   PetscFunctionBegin;
343   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
344   PetscFunctionReturn(0);
345 }
346 
347 /* the code does not do the diagonal entries correctly unless the
348    matrix is square and the column and row owerships are identical.
349    This is a BUG. The only way to fix it seems to be to access
350    mdn->A and mdn->B directly and not through the MatZeroRows()
351    routine.
352 */
353 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
354 {
355   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
356   PetscErrorCode    ierr;
357   PetscInt          i,*owners = A->rmap->range;
358   PetscInt          *sizes,j,idx,nsends;
359   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
360   PetscInt          *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source;
361   PetscInt          *lens,*lrows,*values;
362   PetscMPIInt       n,imdex,rank = l->rank,size = l->size;
363   MPI_Comm          comm;
364   MPI_Request       *send_waits,*recv_waits;
365   MPI_Status        recv_status,*send_status;
366   PetscBool         found;
367   const PetscScalar *xx;
368   PetscScalar       *bb;
369 
370   PetscFunctionBegin;
371   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
372   if (A->rmap->N != A->cmap->N) SETERRQ(comm,PETSC_ERR_SUP,"Only handles square matrices");
373   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership");
374   /*  first count number of contributors to each processor */
375   ierr = PetscCalloc1(2*size,&sizes);CHKERRQ(ierr);
376   ierr = PetscMalloc1(N+1,&owner);CHKERRQ(ierr);  /* see note*/
377   for (i=0; i<N; i++) {
378     idx   = rows[i];
379     found = PETSC_FALSE;
380     for (j=0; j<size; j++) {
381       if (idx >= owners[j] && idx < owners[j+1]) {
382         sizes[2*j]++; sizes[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
383       }
384     }
385     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
386   }
387   nsends = 0;
388   for (i=0; i<size; i++) nsends += sizes[2*i+1];
389 
390   /* inform other processors of number of messages and max length*/
391   ierr = PetscMaxSum(comm,sizes,&nmax,&nrecvs);CHKERRQ(ierr);
392 
393   /* post receives:   */
394   ierr = PetscMalloc1((nrecvs+1)*(nmax+1),&rvalues);CHKERRQ(ierr);
395   ierr = PetscMalloc1(nrecvs+1,&recv_waits);CHKERRQ(ierr);
396   for (i=0; i<nrecvs; i++) {
397     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
398   }
399 
400   /* do sends:
401       1) starts[i] gives the starting index in svalues for stuff going to
402          the ith processor
403   */
404   ierr = PetscMalloc1(N+1,&svalues);CHKERRQ(ierr);
405   ierr = PetscMalloc1(nsends+1,&send_waits);CHKERRQ(ierr);
406   ierr = PetscMalloc1(size+1,&starts);CHKERRQ(ierr);
407 
408   starts[0] = 0;
409   for (i=1; i<size; i++) starts[i] = starts[i-1] + sizes[2*i-2];
410   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
411 
412   starts[0] = 0;
413   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + sizes[2*i-2];
414   count = 0;
415   for (i=0; i<size; i++) {
416     if (sizes[2*i+1]) {
417       ierr = MPI_Isend(svalues+starts[i],sizes[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
418     }
419   }
420   ierr = PetscFree(starts);CHKERRQ(ierr);
421 
422   base = owners[rank];
423 
424   /*  wait on receives */
425   ierr  = PetscMalloc2(nrecvs,&lens,nrecvs,&source);CHKERRQ(ierr);
426   count = nrecvs;
427   slen  = 0;
428   while (count) {
429     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
430     /* unpack receives into our local space */
431     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
432 
433     source[imdex] = recv_status.MPI_SOURCE;
434     lens[imdex]   = n;
435     slen += n;
436     count--;
437   }
438   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
439 
440   /* move the data into the send scatter */
441   ierr  = PetscMalloc1(slen+1,&lrows);CHKERRQ(ierr);
442   count = 0;
443   for (i=0; i<nrecvs; i++) {
444     values = rvalues + i*nmax;
445     for (j=0; j<lens[i]; j++) {
446       lrows[count++] = values[j] - base;
447     }
448   }
449   ierr = PetscFree(rvalues);CHKERRQ(ierr);
450   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
451   ierr = PetscFree(owner);CHKERRQ(ierr);
452   ierr = PetscFree(sizes);CHKERRQ(ierr);
453 
454   /* fix right hand side if needed */
455   if (x && b) {
456     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
457     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
458     for (i=0; i<slen; i++) {
459       bb[lrows[i]] = diag*xx[lrows[i]];
460     }
461     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
462     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
463   }
464 
465   /* actually zap the local rows */
466   ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
467   if (diag != 0.0) {
468     Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data;
469     PetscInt     m   = ll->lda, i;
470 
471     for (i=0; i<slen; i++) {
472       ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag;
473     }
474   }
475   ierr = PetscFree(lrows);CHKERRQ(ierr);
476 
477   /* wait on sends */
478   if (nsends) {
479     ierr = PetscMalloc1(nsends,&send_status);CHKERRQ(ierr);
480     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
481     ierr = PetscFree(send_status);CHKERRQ(ierr);
482   }
483   ierr = PetscFree(send_waits);CHKERRQ(ierr);
484   ierr = PetscFree(svalues);CHKERRQ(ierr);
485   PetscFunctionReturn(0);
486 }
487 
488 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec);
489 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec);
490 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec);
491 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec);
492 
493 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
494 {
495   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
496   PetscErrorCode ierr;
497 
498   PetscFunctionBegin;
499   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
500   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
501   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
502   PetscFunctionReturn(0);
503 }
504 
505 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
506 {
507   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
508   PetscErrorCode ierr;
509 
510   PetscFunctionBegin;
511   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
512   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
513   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
514   PetscFunctionReturn(0);
515 }
516 
517 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
518 {
519   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
520   PetscErrorCode ierr;
521   PetscScalar    zero = 0.0;
522 
523   PetscFunctionBegin;
524   ierr = VecSet(yy,zero);CHKERRQ(ierr);
525   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
526   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
527   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
528   PetscFunctionReturn(0);
529 }
530 
531 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
532 {
533   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
534   PetscErrorCode ierr;
535 
536   PetscFunctionBegin;
537   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
538   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
539   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
540   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
541   PetscFunctionReturn(0);
542 }
543 
544 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
545 {
546   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
547   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
548   PetscErrorCode ierr;
549   PetscInt       len,i,n,m = A->rmap->n,radd;
550   PetscScalar    *x,zero = 0.0;
551 
552   PetscFunctionBegin;
553   ierr = VecSet(v,zero);CHKERRQ(ierr);
554   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
555   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
556   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
557   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
558   radd = A->rmap->rstart*m;
559   for (i=0; i<len; i++) {
560     x[i] = aloc->v[radd + i*m + i];
561   }
562   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
563   PetscFunctionReturn(0);
564 }
565 
566 PetscErrorCode MatDestroy_MPIDense(Mat mat)
567 {
568   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
569   PetscErrorCode ierr;
570 
571   PetscFunctionBegin;
572 #if defined(PETSC_USE_LOG)
573   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
574 #endif
575   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
576   ierr = MatDestroy(&mdn->A);CHKERRQ(ierr);
577   ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr);
578   ierr = VecScatterDestroy(&mdn->Mvctx);CHKERRQ(ierr);
579 
580   ierr = PetscFree(mat->data);CHKERRQ(ierr);
581   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
582 
583   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL);CHKERRQ(ierr);
584   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr);
585   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr);
586   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL);CHKERRQ(ierr);
587   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL);CHKERRQ(ierr);
588   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);CHKERRQ(ierr);
589   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);CHKERRQ(ierr);
590 #if defined(PETSC_HAVE_ELEMENTAL)
591   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr);
592 #endif
593   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr);
594   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
595   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",NULL);CHKERRQ(ierr);
596   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
597   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
598   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_nest_mpidense_C",NULL);CHKERRQ(ierr);
599   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_nest_mpidense_C",NULL);CHKERRQ(ierr);
600   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
601   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
602   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL);CHKERRQ(ierr);
603   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL);CHKERRQ(ierr);
604   PetscFunctionReturn(0);
605 }
606 
607 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat,PetscViewer);
608 
609 #include <petscdraw.h>
610 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
611 {
612   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
613   PetscErrorCode    ierr;
614   PetscMPIInt       rank = mdn->rank;
615   PetscViewerType   vtype;
616   PetscBool         iascii,isdraw;
617   PetscViewer       sviewer;
618   PetscViewerFormat format;
619 
620   PetscFunctionBegin;
621   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
622   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
623   if (iascii) {
624     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
625     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
626     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
627       MatInfo info;
628       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
629       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
630       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
631       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
632       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
633       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
634       PetscFunctionReturn(0);
635     } else if (format == PETSC_VIEWER_ASCII_INFO) {
636       PetscFunctionReturn(0);
637     }
638   } else if (isdraw) {
639     PetscDraw draw;
640     PetscBool isnull;
641 
642     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
643     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
644     if (isnull) PetscFunctionReturn(0);
645   }
646 
647   {
648     /* assemble the entire matrix onto first processor. */
649     Mat         A;
650     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
651     PetscInt    *cols;
652     PetscScalar *vals;
653 
654     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
655     if (!rank) {
656       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
657     } else {
658       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
659     }
660     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
661     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
662     ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr);
663     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
664 
665     /* Copy the matrix ... This isn't the most efficient means,
666        but it's quick for now */
667     A->insertmode = INSERT_VALUES;
668 
669     row = mat->rmap->rstart;
670     m   = mdn->A->rmap->n;
671     for (i=0; i<m; i++) {
672       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
673       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
674       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
675       row++;
676     }
677 
678     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
679     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
680     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
681     if (!rank) {
682       ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
683       ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
684     }
685     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
686     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
687     ierr = MatDestroy(&A);CHKERRQ(ierr);
688   }
689   PetscFunctionReturn(0);
690 }
691 
692 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
693 {
694   PetscErrorCode ierr;
695   PetscBool      iascii,isbinary,isdraw,issocket;
696 
697   PetscFunctionBegin;
698   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
699   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
700   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
701   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
702 
703   if (iascii || issocket || isdraw) {
704     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
705   } else if (isbinary) {
706     ierr = MatView_Dense_Binary(mat,viewer);CHKERRQ(ierr);
707   }
708   PetscFunctionReturn(0);
709 }
710 
711 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
712 {
713   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
714   Mat            mdn  = mat->A;
715   PetscErrorCode ierr;
716   PetscLogDouble isend[5],irecv[5];
717 
718   PetscFunctionBegin;
719   info->block_size = 1.0;
720 
721   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
722 
723   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
724   isend[3] = info->memory;  isend[4] = info->mallocs;
725   if (flag == MAT_LOCAL) {
726     info->nz_used      = isend[0];
727     info->nz_allocated = isend[1];
728     info->nz_unneeded  = isend[2];
729     info->memory       = isend[3];
730     info->mallocs      = isend[4];
731   } else if (flag == MAT_GLOBAL_MAX) {
732     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
733 
734     info->nz_used      = irecv[0];
735     info->nz_allocated = irecv[1];
736     info->nz_unneeded  = irecv[2];
737     info->memory       = irecv[3];
738     info->mallocs      = irecv[4];
739   } else if (flag == MAT_GLOBAL_SUM) {
740     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
741 
742     info->nz_used      = irecv[0];
743     info->nz_allocated = irecv[1];
744     info->nz_unneeded  = irecv[2];
745     info->memory       = irecv[3];
746     info->mallocs      = irecv[4];
747   }
748   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
749   info->fill_ratio_needed = 0;
750   info->factor_mallocs    = 0;
751   PetscFunctionReturn(0);
752 }
753 
754 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
755 {
756   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
757   PetscErrorCode ierr;
758 
759   PetscFunctionBegin;
760   switch (op) {
761   case MAT_NEW_NONZERO_LOCATIONS:
762   case MAT_NEW_NONZERO_LOCATION_ERR:
763   case MAT_NEW_NONZERO_ALLOCATION_ERR:
764     MatCheckPreallocated(A,1);
765     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
766     break;
767   case MAT_ROW_ORIENTED:
768     MatCheckPreallocated(A,1);
769     a->roworiented = flg;
770     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
771     break;
772   case MAT_NEW_DIAGONALS:
773   case MAT_KEEP_NONZERO_PATTERN:
774   case MAT_USE_HASH_TABLE:
775   case MAT_SORTED_FULL:
776     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
777     break;
778   case MAT_IGNORE_OFF_PROC_ENTRIES:
779     a->donotstash = flg;
780     break;
781   case MAT_SYMMETRIC:
782   case MAT_STRUCTURALLY_SYMMETRIC:
783   case MAT_HERMITIAN:
784   case MAT_SYMMETRY_ETERNAL:
785   case MAT_IGNORE_LOWER_TRIANGULAR:
786   case MAT_IGNORE_ZERO_ENTRIES:
787     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
788     break;
789   default:
790     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
791   }
792   PetscFunctionReturn(0);
793 }
794 
795 
796 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
797 {
798   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
799   Mat_SeqDense      *mat = (Mat_SeqDense*)mdn->A->data;
800   const PetscScalar *l,*r;
801   PetscScalar       x,*v;
802   PetscErrorCode    ierr;
803   PetscInt          i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n;
804 
805   PetscFunctionBegin;
806   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
807   if (ll) {
808     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
809     if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
810     ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr);
811     for (i=0; i<m; i++) {
812       x = l[i];
813       v = mat->v + i;
814       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
815     }
816     ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr);
817     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
818   }
819   if (rr) {
820     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
821     if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
822     ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
823     ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
824     ierr = VecGetArrayRead(mdn->lvec,&r);CHKERRQ(ierr);
825     for (i=0; i<n; i++) {
826       x = r[i];
827       v = mat->v + i*m;
828       for (j=0; j<m; j++) (*v++) *= x;
829     }
830     ierr = VecRestoreArrayRead(mdn->lvec,&r);CHKERRQ(ierr);
831     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
832   }
833   PetscFunctionReturn(0);
834 }
835 
836 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
837 {
838   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
839   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
840   PetscErrorCode ierr;
841   PetscInt       i,j;
842   PetscReal      sum = 0.0;
843   PetscScalar    *v  = mat->v;
844 
845   PetscFunctionBegin;
846   if (mdn->size == 1) {
847     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
848   } else {
849     if (type == NORM_FROBENIUS) {
850       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
851         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
852       }
853       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
854       *nrm = PetscSqrtReal(*nrm);
855       ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr);
856     } else if (type == NORM_1) {
857       PetscReal *tmp,*tmp2;
858       ierr = PetscCalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr);
859       *nrm = 0.0;
860       v    = mat->v;
861       for (j=0; j<mdn->A->cmap->n; j++) {
862         for (i=0; i<mdn->A->rmap->n; i++) {
863           tmp[j] += PetscAbsScalar(*v);  v++;
864         }
865       }
866       ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
867       for (j=0; j<A->cmap->N; j++) {
868         if (tmp2[j] > *nrm) *nrm = tmp2[j];
869       }
870       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
871       ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr);
872     } else if (type == NORM_INFINITY) { /* max row norm */
873       PetscReal ntemp;
874       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
875       ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
876     } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
877   }
878   PetscFunctionReturn(0);
879 }
880 
881 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
882 {
883   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
884   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
885   Mat            B;
886   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
887   PetscErrorCode ierr;
888   PetscInt       j,i;
889   PetscScalar    *v;
890 
891   PetscFunctionBegin;
892   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
893     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
894     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
895     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
896     ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr);
897   } else {
898     B = *matout;
899   }
900 
901   m    = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v;
902   ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr);
903   for (i=0; i<m; i++) rwork[i] = rstart + i;
904   for (j=0; j<n; j++) {
905     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
906     v   += m;
907   }
908   ierr = PetscFree(rwork);CHKERRQ(ierr);
909   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
910   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
911   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
912     *matout = B;
913   } else {
914     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
915   }
916   PetscFunctionReturn(0);
917 }
918 
919 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
920 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
921 
922 PetscErrorCode MatSetUp_MPIDense(Mat A)
923 {
924   PetscErrorCode ierr;
925 
926   PetscFunctionBegin;
927   ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
928   ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
929   if (!A->preallocated) {
930     ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
931   }
932   PetscFunctionReturn(0);
933 }
934 
935 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
936 {
937   PetscErrorCode ierr;
938   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
939 
940   PetscFunctionBegin;
941   ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr);
942   ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
943   PetscFunctionReturn(0);
944 }
945 
946 PetscErrorCode  MatConjugate_MPIDense(Mat mat)
947 {
948   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
949   PetscErrorCode ierr;
950 
951   PetscFunctionBegin;
952   ierr = MatConjugate(a->A);CHKERRQ(ierr);
953   PetscFunctionReturn(0);
954 }
955 
956 PetscErrorCode MatRealPart_MPIDense(Mat A)
957 {
958   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
959   PetscErrorCode ierr;
960 
961   PetscFunctionBegin;
962   ierr = MatRealPart(a->A);CHKERRQ(ierr);
963   PetscFunctionReturn(0);
964 }
965 
966 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
967 {
968   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
969   PetscErrorCode ierr;
970 
971   PetscFunctionBegin;
972   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
973   PetscFunctionReturn(0);
974 }
975 
976 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col)
977 {
978   PetscErrorCode    ierr;
979   PetscScalar       *x;
980   const PetscScalar *a;
981   PetscInt          lda;
982 
983   PetscFunctionBegin;
984   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
985   ierr = MatDenseGetLDA(A,&lda);CHKERRQ(ierr);
986   ierr = MatDenseGetArrayRead(A,&a);CHKERRQ(ierr);
987   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
988   ierr = PetscArraycpy(x,a+col*lda,A->rmap->n);CHKERRQ(ierr);
989   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
990   ierr = MatDenseGetArrayRead(A,&a);CHKERRQ(ierr);
991   PetscFunctionReturn(0);
992 }
993 
994 PETSC_INTERN PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*);
995 
996 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms)
997 {
998   PetscErrorCode ierr;
999   PetscInt       i,n;
1000   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
1001   PetscReal      *work;
1002 
1003   PetscFunctionBegin;
1004   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
1005   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
1006   ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr);
1007   if (type == NORM_2) {
1008     for (i=0; i<n; i++) work[i] *= work[i];
1009   }
1010   if (type == NORM_INFINITY) {
1011     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
1012   } else {
1013     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
1014   }
1015   ierr = PetscFree(work);CHKERRQ(ierr);
1016   if (type == NORM_2) {
1017     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
1018   }
1019   PetscFunctionReturn(0);
1020 }
1021 
1022 static PetscErrorCode  MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1023 {
1024   Mat_MPIDense   *d = (Mat_MPIDense*)x->data;
1025   PetscErrorCode ierr;
1026   PetscScalar    *a;
1027   PetscInt       m,n,i;
1028 
1029   PetscFunctionBegin;
1030   ierr = MatGetSize(d->A,&m,&n);CHKERRQ(ierr);
1031   ierr = MatDenseGetArray(d->A,&a);CHKERRQ(ierr);
1032   for (i=0; i<m*n; i++) {
1033     ierr = PetscRandomGetValue(rctx,a+i);CHKERRQ(ierr);
1034   }
1035   ierr = MatDenseRestoreArray(d->A,&a);CHKERRQ(ierr);
1036   PetscFunctionReturn(0);
1037 }
1038 
1039 PETSC_INTERN PetscErrorCode MatMatMultNumeric_MPIDense(Mat A,Mat,Mat);
1040 
1041 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool  *missing,PetscInt *d)
1042 {
1043   PetscFunctionBegin;
1044   *missing = PETSC_FALSE;
1045   PetscFunctionReturn(0);
1046 }
1047 
1048 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat);
1049 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);
1050 
1051 /* -------------------------------------------------------------------*/
1052 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1053                                         MatGetRow_MPIDense,
1054                                         MatRestoreRow_MPIDense,
1055                                         MatMult_MPIDense,
1056                                 /*  4*/ MatMultAdd_MPIDense,
1057                                         MatMultTranspose_MPIDense,
1058                                         MatMultTransposeAdd_MPIDense,
1059                                         0,
1060                                         0,
1061                                         0,
1062                                 /* 10*/ 0,
1063                                         0,
1064                                         0,
1065                                         0,
1066                                         MatTranspose_MPIDense,
1067                                 /* 15*/ MatGetInfo_MPIDense,
1068                                         MatEqual_MPIDense,
1069                                         MatGetDiagonal_MPIDense,
1070                                         MatDiagonalScale_MPIDense,
1071                                         MatNorm_MPIDense,
1072                                 /* 20*/ MatAssemblyBegin_MPIDense,
1073                                         MatAssemblyEnd_MPIDense,
1074                                         MatSetOption_MPIDense,
1075                                         MatZeroEntries_MPIDense,
1076                                 /* 24*/ MatZeroRows_MPIDense,
1077                                         0,
1078                                         0,
1079                                         0,
1080                                         0,
1081                                 /* 29*/ MatSetUp_MPIDense,
1082                                         0,
1083                                         0,
1084                                         MatGetDiagonalBlock_MPIDense,
1085                                         0,
1086                                 /* 34*/ MatDuplicate_MPIDense,
1087                                         0,
1088                                         0,
1089                                         0,
1090                                         0,
1091                                 /* 39*/ MatAXPY_MPIDense,
1092                                         MatCreateSubMatrices_MPIDense,
1093                                         0,
1094                                         MatGetValues_MPIDense,
1095                                         0,
1096                                 /* 44*/ 0,
1097                                         MatScale_MPIDense,
1098                                         MatShift_Basic,
1099                                         0,
1100                                         0,
1101                                 /* 49*/ MatSetRandom_MPIDense,
1102                                         0,
1103                                         0,
1104                                         0,
1105                                         0,
1106                                 /* 54*/ 0,
1107                                         0,
1108                                         0,
1109                                         0,
1110                                         0,
1111                                 /* 59*/ MatCreateSubMatrix_MPIDense,
1112                                         MatDestroy_MPIDense,
1113                                         MatView_MPIDense,
1114                                         0,
1115                                         0,
1116                                 /* 64*/ 0,
1117                                         0,
1118                                         0,
1119                                         0,
1120                                         0,
1121                                 /* 69*/ 0,
1122                                         0,
1123                                         0,
1124                                         0,
1125                                         0,
1126                                 /* 74*/ 0,
1127                                         0,
1128                                         0,
1129                                         0,
1130                                         0,
1131                                 /* 79*/ 0,
1132                                         0,
1133                                         0,
1134                                         0,
1135                                 /* 83*/ MatLoad_MPIDense,
1136                                         0,
1137                                         0,
1138                                         0,
1139                                         0,
1140                                         0,
1141 #if defined(PETSC_HAVE_ELEMENTAL)
1142                                 /* 89*/ 0,
1143                                         0,
1144 #else
1145                                 /* 89*/ 0,
1146                                         0,
1147 #endif
1148                                         MatMatMultNumeric_MPIDense,
1149                                         0,
1150                                         0,
1151                                 /* 94*/ 0,
1152                                         0,
1153                                         0,
1154                                         MatMatTransposeMultNumeric_MPIDense_MPIDense,
1155                                         0,
1156                                 /* 99*/ MatProductSetFromOptions_MPIDense,
1157                                         0,
1158                                         0,
1159                                         MatConjugate_MPIDense,
1160                                         0,
1161                                 /*104*/ 0,
1162                                         MatRealPart_MPIDense,
1163                                         MatImaginaryPart_MPIDense,
1164                                         0,
1165                                         0,
1166                                 /*109*/ 0,
1167                                         0,
1168                                         0,
1169                                         MatGetColumnVector_MPIDense,
1170                                         MatMissingDiagonal_MPIDense,
1171                                 /*114*/ 0,
1172                                         0,
1173                                         0,
1174                                         0,
1175                                         0,
1176                                 /*119*/ 0,
1177                                         0,
1178                                         0,
1179                                         0,
1180                                         0,
1181                                 /*124*/ 0,
1182                                         MatGetColumnNorms_MPIDense,
1183                                         0,
1184                                         0,
1185                                         0,
1186                                 /*129*/ 0,
1187                                         0,
1188                                         0,
1189                                         MatTransposeMatMultNumeric_MPIDense_MPIDense,
1190                                         0,
1191                                 /*134*/ 0,
1192                                         0,
1193                                         0,
1194                                         0,
1195                                         0,
1196                                 /*139*/ 0,
1197                                         0,
1198                                         0,
1199                                         0,
1200                                         0,
1201                                         MatCreateMPIMatConcatenateSeqMat_MPIDense,
1202                                 /*145*/ 0,
1203                                         0,
1204                                         0
1205 };
1206 
1207 PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1208 {
1209   Mat_MPIDense   *a;
1210   PetscErrorCode ierr;
1211 
1212   PetscFunctionBegin;
1213   if (mat->preallocated) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Matrix has already been preallocated");
1214   mat->preallocated = PETSC_TRUE;
1215   /* Note:  For now, when data is specified above, this assumes the user correctly
1216    allocates the local dense storage space.  We should add error checking. */
1217 
1218   a       = (Mat_MPIDense*)mat->data;
1219   ierr    = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
1220   ierr    = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
1221   a->nvec = mat->cmap->n;
1222 
1223   ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1224   ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr);
1225   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
1226   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1227   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1228   PetscFunctionReturn(0);
1229 }
1230 
1231 #if defined(PETSC_HAVE_ELEMENTAL)
1232 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1233 {
1234   Mat            mat_elemental;
1235   PetscErrorCode ierr;
1236   PetscScalar    *v;
1237   PetscInt       m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols;
1238 
1239   PetscFunctionBegin;
1240   if (reuse == MAT_REUSE_MATRIX) {
1241     mat_elemental = *newmat;
1242     ierr = MatZeroEntries(*newmat);CHKERRQ(ierr);
1243   } else {
1244     ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr);
1245     ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1246     ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr);
1247     ierr = MatSetUp(mat_elemental);CHKERRQ(ierr);
1248     ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
1249   }
1250 
1251   ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr);
1252   for (i=0; i<N; i++) cols[i] = i;
1253   for (i=0; i<m; i++) rows[i] = rstart + i;
1254 
1255   /* PETSc-Elemental interaface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1256   ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr);
1257   ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr);
1258   ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1259   ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1260   ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr);
1261   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1262 
1263   if (reuse == MAT_INPLACE_MATRIX) {
1264     ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr);
1265   } else {
1266     *newmat = mat_elemental;
1267   }
1268   PetscFunctionReturn(0);
1269 }
1270 #endif
1271 
1272 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals)
1273 {
1274   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1275   PetscErrorCode ierr;
1276 
1277   PetscFunctionBegin;
1278   ierr = MatDenseGetColumn(mat->A,col,vals);CHKERRQ(ierr);
1279   PetscFunctionReturn(0);
1280 }
1281 
1282 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals)
1283 {
1284   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1285   PetscErrorCode ierr;
1286 
1287   PetscFunctionBegin;
1288   ierr = MatDenseRestoreColumn(mat->A,vals);CHKERRQ(ierr);
1289   PetscFunctionReturn(0);
1290 }
1291 
1292 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
1293 {
1294   PetscErrorCode ierr;
1295   Mat_MPIDense   *mat;
1296   PetscInt       m,nloc,N;
1297 
1298   PetscFunctionBegin;
1299   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
1300   ierr = MatGetLocalSize(inmat,NULL,&nloc);CHKERRQ(ierr);
1301   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1302     PetscInt sum;
1303 
1304     if (n == PETSC_DECIDE) {
1305       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
1306     }
1307     /* Check sum(n) = N */
1308     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
1309     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
1310 
1311     ierr = MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat);CHKERRQ(ierr);
1312   }
1313 
1314   /* numeric phase */
1315   mat = (Mat_MPIDense*)(*outmat)->data;
1316   ierr = MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1317   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1318   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1319   PetscFunctionReturn(0);
1320 }
1321 
1322 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1323 {
1324   Mat_MPIDense   *a;
1325   PetscErrorCode ierr;
1326 
1327   PetscFunctionBegin;
1328   ierr      = PetscNewLog(mat,&a);CHKERRQ(ierr);
1329   mat->data = (void*)a;
1330   ierr      = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1331 
1332   mat->insertmode = NOT_SET_VALUES;
1333   ierr            = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);CHKERRQ(ierr);
1334   ierr            = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);CHKERRQ(ierr);
1335 
1336   /* build cache for off array entries formed */
1337   a->donotstash = PETSC_FALSE;
1338 
1339   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr);
1340 
1341   /* stuff used for matrix vector multiply */
1342   a->lvec        = 0;
1343   a->Mvctx       = 0;
1344   a->roworiented = PETSC_TRUE;
1345 
1346   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense);CHKERRQ(ierr);
1347   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr);
1348   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr);
1349   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense);CHKERRQ(ierr);
1350   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense);CHKERRQ(ierr);
1351   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);CHKERRQ(ierr);
1352   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);CHKERRQ(ierr);
1353 #if defined(PETSC_HAVE_ELEMENTAL)
1354   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr);
1355 #endif
1356   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1357   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
1358   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
1359   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1360   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1361   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_nest_mpidense_C",MatMatMultSymbolic_Nest_Dense);CHKERRQ(ierr);
1362   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_nest_mpidense_C",MatMatMultNumeric_Nest_Dense);CHKERRQ(ierr);
1363 
1364   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",MatTransposeMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1365   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",MatTransposeMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1366   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense);CHKERRQ(ierr);
1367   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense);CHKERRQ(ierr);
1368   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1369   PetscFunctionReturn(0);
1370 }
1371 
1372 /*MC
1373    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1374 
1375    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1376    and MATMPIDENSE otherwise.
1377 
1378    Options Database Keys:
1379 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1380 
1381   Level: beginner
1382 
1383 
1384 .seealso: MATSEQDENSE,MATMPIDENSE
1385 M*/
1386 
1387 /*@C
1388    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1389 
1390    Collective
1391 
1392    Input Parameters:
1393 .  B - the matrix
1394 -  data - optional location of matrix data.  Set data=NULL for PETSc
1395    to control all matrix memory allocation.
1396 
1397    Notes:
1398    The dense format is fully compatible with standard Fortran 77
1399    storage by columns.
1400 
1401    The data input variable is intended primarily for Fortran programmers
1402    who wish to allocate their own matrix memory space.  Most users should
1403    set data=NULL.
1404 
1405    Level: intermediate
1406 
1407 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1408 @*/
1409 PetscErrorCode  MatMPIDenseSetPreallocation(Mat B,PetscScalar *data)
1410 {
1411   PetscErrorCode ierr;
1412 
1413   PetscFunctionBegin;
1414   ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr);
1415   PetscFunctionReturn(0);
1416 }
1417 
1418 /*@
1419    MatDensePlaceArray - Allows one to replace the array in a dense array with an
1420    array provided by the user. This is useful to avoid copying an array
1421    into a matrix
1422 
1423    Not Collective
1424 
1425    Input Parameters:
1426 +  mat - the matrix
1427 -  array - the array in column major order
1428 
1429    Notes:
1430    You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be
1431    freed when the matrix is destroyed.
1432 
1433    Level: developer
1434 
1435 .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
1436 
1437 @*/
1438 PetscErrorCode  MatDensePlaceArray(Mat mat,const PetscScalar array[])
1439 {
1440   PetscErrorCode ierr;
1441   PetscFunctionBegin;
1442   ierr = PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
1443   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
1444   PetscFunctionReturn(0);
1445 }
1446 
1447 /*@
1448    MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray()
1449 
1450    Not Collective
1451 
1452    Input Parameters:
1453 .  mat - the matrix
1454 
1455    Notes:
1456    You can only call this after a call to MatDensePlaceArray()
1457 
1458    Level: developer
1459 
1460 .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
1461 
1462 @*/
1463 PetscErrorCode  MatDenseResetArray(Mat mat)
1464 {
1465   PetscErrorCode ierr;
1466   PetscFunctionBegin;
1467   ierr = PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));CHKERRQ(ierr);
1468   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
1469   PetscFunctionReturn(0);
1470 }
1471 
1472 /*@C
1473    MatCreateDense - Creates a parallel matrix in dense format.
1474 
1475    Collective
1476 
1477    Input Parameters:
1478 +  comm - MPI communicator
1479 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1480 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1481 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1482 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1483 -  data - optional location of matrix data.  Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1484    to control all matrix memory allocation.
1485 
1486    Output Parameter:
1487 .  A - the matrix
1488 
1489    Notes:
1490    The dense format is fully compatible with standard Fortran 77
1491    storage by columns.
1492 
1493    The data input variable is intended primarily for Fortran programmers
1494    who wish to allocate their own matrix memory space.  Most users should
1495    set data=NULL (PETSC_NULL_SCALAR for Fortran users).
1496 
1497    The user MUST specify either the local or global matrix dimensions
1498    (possibly both).
1499 
1500    Level: intermediate
1501 
1502 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1503 @*/
1504 PetscErrorCode  MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1505 {
1506   PetscErrorCode ierr;
1507   PetscMPIInt    size;
1508 
1509   PetscFunctionBegin;
1510   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1511   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1512   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1513   if (size > 1) {
1514     PetscBool havedata = (PetscBool)!!data;
1515 
1516     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1517     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1518     ierr = MPIU_Allreduce(MPI_IN_PLACE,&havedata,1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr);
1519     if (havedata) {  /* user provided data array, so no need to assemble */
1520       ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr);
1521       (*A)->assembled = PETSC_TRUE;
1522     }
1523   } else {
1524     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1525     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1526   }
1527   PetscFunctionReturn(0);
1528 }
1529 
1530 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1531 {
1532   Mat            mat;
1533   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
1534   PetscErrorCode ierr;
1535 
1536   PetscFunctionBegin;
1537   *newmat = 0;
1538   ierr    = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr);
1539   ierr    = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1540   ierr    = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1541   a       = (Mat_MPIDense*)mat->data;
1542 
1543   mat->factortype   = A->factortype;
1544   mat->assembled    = PETSC_TRUE;
1545   mat->preallocated = PETSC_TRUE;
1546 
1547   a->size         = oldmat->size;
1548   a->rank         = oldmat->rank;
1549   mat->insertmode = NOT_SET_VALUES;
1550   a->nvec         = oldmat->nvec;
1551   a->donotstash   = oldmat->donotstash;
1552 
1553   ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr);
1554   ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr);
1555 
1556   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1557   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1558   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1559 
1560   *newmat = mat;
1561   PetscFunctionReturn(0);
1562 }
1563 
1564 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
1565 {
1566   PetscErrorCode ierr;
1567   PetscBool      isbinary;
1568 #if defined(PETSC_HAVE_HDF5)
1569   PetscBool      ishdf5;
1570 #endif
1571 
1572   PetscFunctionBegin;
1573   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
1574   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
1575   /* force binary viewer to load .info file if it has not yet done so */
1576   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1577   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1578 #if defined(PETSC_HAVE_HDF5)
1579   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
1580 #endif
1581   if (isbinary) {
1582     ierr = MatLoad_Dense_Binary(newMat,viewer);CHKERRQ(ierr);
1583 #if defined(PETSC_HAVE_HDF5)
1584   } else if (ishdf5) {
1585     ierr = MatLoad_Dense_HDF5(newMat,viewer);CHKERRQ(ierr);
1586 #endif
1587   } else SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
1588   PetscFunctionReturn(0);
1589 }
1590 
1591 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool  *flag)
1592 {
1593   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
1594   Mat            a,b;
1595   PetscBool      flg;
1596   PetscErrorCode ierr;
1597 
1598   PetscFunctionBegin;
1599   a    = matA->A;
1600   b    = matB->A;
1601   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
1602   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1603   PetscFunctionReturn(0);
1604 }
1605 
1606 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(Mat A)
1607 {
1608   PetscErrorCode        ierr;
1609   Mat_MPIDense          *a = (Mat_MPIDense*)A->data;
1610   Mat_TransMatMultDense *atb = a->atbdense;
1611 
1612   PetscFunctionBegin;
1613   ierr = PetscFree3(atb->sendbuf,atb->atbarray,atb->recvcounts);CHKERRQ(ierr);
1614   ierr = (atb->destroy)(A);CHKERRQ(ierr);
1615   ierr = PetscFree(atb);CHKERRQ(ierr);
1616   PetscFunctionReturn(0);
1617 }
1618 
1619 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(Mat A)
1620 {
1621   PetscErrorCode        ierr;
1622   Mat_MPIDense          *a = (Mat_MPIDense*)A->data;
1623   Mat_MatTransMultDense *abt = a->abtdense;
1624 
1625   PetscFunctionBegin;
1626   ierr = PetscFree2(abt->buf[0],abt->buf[1]);CHKERRQ(ierr);
1627   ierr = PetscFree2(abt->recvcounts,abt->recvdispls);CHKERRQ(ierr);
1628   ierr = (abt->destroy)(A);CHKERRQ(ierr);
1629   ierr = PetscFree(abt);CHKERRQ(ierr);
1630   PetscFunctionReturn(0);
1631 }
1632 
1633 PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1634 {
1635   Mat_MPIDense   *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
1636   Mat_SeqDense   *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data;
1637   Mat_TransMatMultDense *atb = c->atbdense;
1638   PetscErrorCode ierr;
1639   MPI_Comm       comm;
1640   PetscMPIInt    rank,size,*recvcounts=atb->recvcounts;
1641   PetscScalar    *carray,*atbarray=atb->atbarray,*sendbuf=atb->sendbuf;
1642   PetscInt       i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j;
1643   PetscScalar    _DOne=1.0,_DZero=0.0;
1644   PetscBLASInt   am,an,bn,aN;
1645   const PetscInt *ranges;
1646 
1647   PetscFunctionBegin;
1648   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1649   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1650   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1651 
1652   /* compute atbarray = aseq^T * bseq */
1653   ierr = PetscBLASIntCast(a->A->cmap->n,&an);CHKERRQ(ierr);
1654   ierr = PetscBLASIntCast(b->A->cmap->n,&bn);CHKERRQ(ierr);
1655   ierr = PetscBLASIntCast(a->A->rmap->n,&am);CHKERRQ(ierr);
1656   ierr = PetscBLASIntCast(A->cmap->N,&aN);CHKERRQ(ierr);
1657   PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&an,&bn,&am,&_DOne,aseq->v,&aseq->lda,bseq->v,&bseq->lda,&_DZero,atbarray,&aN));
1658 
1659   ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr);
1660   for (i=0; i<size; i++) recvcounts[i] = (ranges[i+1] - ranges[i])*cN;
1661 
1662   /* arrange atbarray into sendbuf */
1663   k = 0;
1664   for (proc=0; proc<size; proc++) {
1665     for (j=0; j<cN; j++) {
1666       for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM];
1667     }
1668   }
1669   /* sum all atbarray to local values of C */
1670   ierr = MatDenseGetArray(c->A,&carray);CHKERRQ(ierr);
1671   ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRQ(ierr);
1672   ierr = MatDenseRestoreArray(c->A,&carray);CHKERRQ(ierr);
1673   PetscFunctionReturn(0);
1674 }
1675 
1676 PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)
1677 {
1678   PetscErrorCode        ierr;
1679   MPI_Comm              comm;
1680   PetscMPIInt           size;
1681   PetscInt              cm=A->cmap->n,cM,cN=B->cmap->N;
1682   Mat_MPIDense          *c;
1683   Mat_TransMatMultDense *atb;
1684 
1685   PetscFunctionBegin;
1686   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1687   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
1688     SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
1689   }
1690 
1691   /* create matrix product C */
1692   ierr = MatSetSizes(C,cm,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
1693   ierr = MatSetType(C,MATMPIDENSE);CHKERRQ(ierr);
1694   ierr = MatSetUp(C);CHKERRQ(ierr);
1695   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1696   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1697 
1698   /* create data structure for reuse C */
1699   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1700   ierr = PetscNew(&atb);CHKERRQ(ierr);
1701   cM = C->rmap->N;
1702   ierr = PetscMalloc3(cM*cN,&atb->sendbuf,cM*cN,&atb->atbarray,size,&atb->recvcounts);CHKERRQ(ierr);
1703 
1704   c                    = (Mat_MPIDense*)C->data;
1705   c->atbdense          = atb;
1706   atb->destroy         = C->ops->destroy;
1707   C->ops->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
1708   PetscFunctionReturn(0);
1709 }
1710 
1711 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
1712 {
1713   PetscErrorCode        ierr;
1714   MPI_Comm              comm;
1715   PetscMPIInt           i, size;
1716   PetscInt              maxRows, bufsiz;
1717   Mat_MPIDense          *c;
1718   PetscMPIInt           tag;
1719   PetscInt              alg;
1720   Mat_MatTransMultDense *abt;
1721   Mat_Product           *product = C->product;
1722   PetscBool             flg;
1723 
1724   PetscFunctionBegin;
1725   /* check local size of A and B */
1726   if (A->cmap->n != B->cmap->n) {
1727     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local column dimensions are incompatible, A (%D) != B (%D)",A->cmap->n,B->cmap->n);
1728   }
1729 
1730   ierr = PetscStrcmp(product->alg,"allgatherv",&flg);CHKERRQ(ierr);
1731   if (flg) {
1732     alg = 0;
1733   } else alg = 1;
1734 
1735   /* setup matrix product C */
1736   ierr = MatSetSizes(C,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N);CHKERRQ(ierr);
1737   ierr = MatSetType(C,MATMPIDENSE);CHKERRQ(ierr);
1738   ierr = MatSetUp(C);CHKERRQ(ierr);
1739   ierr = PetscObjectGetNewTag((PetscObject)C, &tag);CHKERRQ(ierr);
1740   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1741   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1742 
1743   /* create data structure for reuse C */
1744   ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr);
1745   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1746   ierr = PetscNew(&abt);CHKERRQ(ierr);
1747   abt->tag = tag;
1748   abt->alg = alg;
1749   switch (alg) {
1750   case 1: /* alg: "cyclic" */
1751     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i]));
1752     bufsiz = A->cmap->N * maxRows;
1753     ierr = PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1]));CHKERRQ(ierr);
1754     break;
1755   default: /* alg: "allgatherv" */
1756     ierr = PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]));CHKERRQ(ierr);
1757     ierr = PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls));CHKERRQ(ierr);
1758     for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N;
1759     for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i];
1760     break;
1761   }
1762 
1763   c                               = (Mat_MPIDense*)C->data;
1764   c->abtdense                     = abt;
1765   abt->destroy                    = C->ops->destroy;
1766   C->ops->destroy                 = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
1767   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense;
1768   PetscFunctionReturn(0);
1769 }
1770 
1771 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
1772 {
1773   Mat_MPIDense   *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
1774   Mat_SeqDense   *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data;
1775   Mat_SeqDense   *cseq=(Mat_SeqDense*)(c->A)->data;
1776   Mat_MatTransMultDense *abt = c->abtdense;
1777   PetscErrorCode ierr;
1778   MPI_Comm       comm;
1779   PetscMPIInt    rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom;
1780   PetscScalar    *sendbuf, *recvbuf=0, *carray;
1781   PetscInt       i,cK=A->cmap->N,k,j,bn;
1782   PetscScalar    _DOne=1.0,_DZero=0.0;
1783   PetscBLASInt   cm, cn, ck;
1784   MPI_Request    reqs[2];
1785   const PetscInt *ranges;
1786 
1787   PetscFunctionBegin;
1788   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1789   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1790   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1791 
1792   ierr = MatGetOwnershipRanges(B,&ranges);CHKERRQ(ierr);
1793   bn = B->rmap->n;
1794   if (bseq->lda == bn) {
1795     sendbuf = bseq->v;
1796   } else {
1797     sendbuf = abt->buf[0];
1798     for (k = 0, i = 0; i < cK; i++) {
1799       for (j = 0; j < bn; j++, k++) {
1800         sendbuf[k] = bseq->v[i * bseq->lda + j];
1801       }
1802     }
1803   }
1804   if (size > 1) {
1805     sendto = (rank + size - 1) % size;
1806     recvfrom = (rank + size + 1) % size;
1807   } else {
1808     sendto = recvfrom = 0;
1809   }
1810   ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr);
1811   ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr);
1812   recvisfrom = rank;
1813   for (i = 0; i < size; i++) {
1814     /* we have finished receiving in sending, bufs can be read/modified */
1815     PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
1816     PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
1817 
1818     if (nextrecvisfrom != rank) {
1819       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
1820       sendsiz = cK * bn;
1821       recvsiz = cK * nextbn;
1822       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
1823       ierr = MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]);CHKERRQ(ierr);
1824       ierr = MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]);CHKERRQ(ierr);
1825     }
1826 
1827     /* local aseq * sendbuf^T */
1828     ierr = PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn);CHKERRQ(ierr);
1829     carray = &cseq->v[cseq->lda * ranges[recvisfrom]];
1830     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,aseq->v,&aseq->lda,sendbuf,&cn,&_DZero,carray,&cseq->lda));
1831 
1832     if (nextrecvisfrom != rank) {
1833       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
1834       ierr = MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE);CHKERRQ(ierr);
1835     }
1836     bn = nextbn;
1837     recvisfrom = nextrecvisfrom;
1838     sendbuf = recvbuf;
1839   }
1840   PetscFunctionReturn(0);
1841 }
1842 
1843 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
1844 {
1845   Mat_MPIDense   *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
1846   Mat_SeqDense   *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data;
1847   Mat_SeqDense   *cseq=(Mat_SeqDense*)(c->A)->data;
1848   Mat_MatTransMultDense *abt = c->abtdense;
1849   PetscErrorCode ierr;
1850   MPI_Comm       comm;
1851   PetscMPIInt    rank,size;
1852   PetscScalar    *sendbuf, *recvbuf;
1853   PetscInt       i,cK=A->cmap->N,k,j,bn;
1854   PetscScalar    _DOne=1.0,_DZero=0.0;
1855   PetscBLASInt   cm, cn, ck;
1856 
1857   PetscFunctionBegin;
1858   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1859   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1860   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1861 
1862   /* copy transpose of B into buf[0] */
1863   bn      = B->rmap->n;
1864   sendbuf = abt->buf[0];
1865   recvbuf = abt->buf[1];
1866   for (k = 0, j = 0; j < bn; j++) {
1867     for (i = 0; i < cK; i++, k++) {
1868       sendbuf[k] = bseq->v[i * bseq->lda + j];
1869     }
1870   }
1871   ierr = MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm);CHKERRQ(ierr);
1872   ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr);
1873   ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr);
1874   ierr = PetscBLASIntCast(c->A->cmap->n,&cn);CHKERRQ(ierr);
1875   PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,aseq->v,&aseq->lda,recvbuf,&ck,&_DZero,cseq->v,&cseq->lda));
1876   PetscFunctionReturn(0);
1877 }
1878 
1879 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
1880 {
1881   Mat_MPIDense          *c=(Mat_MPIDense*)C->data;
1882   Mat_MatTransMultDense *abt = c->abtdense;
1883   PetscErrorCode        ierr;
1884 
1885   PetscFunctionBegin;
1886   switch (abt->alg) {
1887   case 1:
1888     ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C);CHKERRQ(ierr);
1889     break;
1890   default:
1891     ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C);CHKERRQ(ierr);
1892     break;
1893   }
1894   PetscFunctionReturn(0);
1895 }
1896 
1897 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(Mat A)
1898 {
1899   PetscErrorCode   ierr;
1900   Mat_MPIDense     *a = (Mat_MPIDense*)A->data;
1901   Mat_MatMultDense *ab = a->abdense;
1902 
1903   PetscFunctionBegin;
1904   ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr);
1905   ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr);
1906   ierr = MatDestroy(&ab->Be);CHKERRQ(ierr);
1907 
1908   ierr = (ab->destroy)(A);CHKERRQ(ierr);
1909   ierr = PetscFree(ab);CHKERRQ(ierr);
1910   PetscFunctionReturn(0);
1911 }
1912 
1913 #if defined(PETSC_HAVE_ELEMENTAL)
1914 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1915 {
1916   PetscErrorCode   ierr;
1917   Mat_MPIDense     *c=(Mat_MPIDense*)C->data;
1918   Mat_MatMultDense *ab=c->abdense;
1919 
1920   PetscFunctionBegin;
1921   ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr);
1922   ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr);
1923   ierr = MatMatMultNumeric_Elemental(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr);
1924   ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
1925   PetscFunctionReturn(0);
1926 }
1927 
1928 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)
1929 {
1930   PetscErrorCode   ierr;
1931   Mat              Ae,Be,Ce;
1932   Mat_MPIDense     *c;
1933   Mat_MatMultDense *ab;
1934 
1935   PetscFunctionBegin;
1936   /* check local size of A and B */
1937   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
1938     SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
1939   }
1940 
1941   /* create elemental matrices Ae and Be */
1942   ierr = MatCreate(PetscObjectComm((PetscObject)A), &Ae);CHKERRQ(ierr);
1943   ierr = MatSetSizes(Ae,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1944   ierr = MatSetType(Ae,MATELEMENTAL);CHKERRQ(ierr);
1945   ierr = MatSetUp(Ae);CHKERRQ(ierr);
1946   ierr = MatSetOption(Ae,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
1947 
1948   ierr = MatCreate(PetscObjectComm((PetscObject)B), &Be);CHKERRQ(ierr);
1949   ierr = MatSetSizes(Be,PETSC_DECIDE,PETSC_DECIDE,B->rmap->N,B->cmap->N);CHKERRQ(ierr);
1950   ierr = MatSetType(Be,MATELEMENTAL);CHKERRQ(ierr);
1951   ierr = MatSetUp(Be);CHKERRQ(ierr);
1952   ierr = MatSetOption(Be,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
1953 
1954   /* compute symbolic Ce = Ae*Be */
1955   ierr = MatCreate(PetscObjectComm((PetscObject)C),&Ce);CHKERRQ(ierr);
1956   ierr = MatMatMultSymbolic_Elemental(Ae,Be,fill,Ce);CHKERRQ(ierr);
1957 
1958   /* setup C */
1959   ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
1960   ierr = MatSetType(C,MATDENSE);CHKERRQ(ierr);
1961   ierr = MatSetUp(C);CHKERRQ(ierr);
1962 
1963   /* create data structure for reuse Cdense */
1964   ierr = PetscNew(&ab);CHKERRQ(ierr);
1965   c                  = (Mat_MPIDense*)C->data;
1966   c->abdense         = ab;
1967 
1968   ab->Ae             = Ae;
1969   ab->Be             = Be;
1970   ab->Ce             = Ce;
1971   ab->destroy        = C->ops->destroy;
1972   C->ops->destroy        = MatDestroy_MatMatMult_MPIDense_MPIDense;
1973   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
1974   C->ops->productnumeric = MatProductNumeric_AB;
1975   PetscFunctionReturn(0);
1976 }
1977 #endif
1978 /* ----------------------------------------------- */
1979 #if defined(PETSC_HAVE_ELEMENTAL)
1980 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C)
1981 {
1982   PetscFunctionBegin;
1983   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense;
1984   C->ops->productsymbolic = MatProductSymbolic_AB;
1985   C->ops->productnumeric  = MatProductNumeric_AB;
1986   PetscFunctionReturn(0);
1987 }
1988 #endif
1989 
1990 static PetscErrorCode MatProductSymbolic_AtB_MPIDense(Mat C)
1991 {
1992   PetscErrorCode ierr;
1993   Mat_Product    *product = C->product;
1994 
1995   PetscFunctionBegin;
1996   ierr = MatTransposeMatMultSymbolic_MPIDense_MPIDense(product->A,product->B,product->fill,C);CHKERRQ(ierr);
1997   C->ops->productnumeric          = MatProductNumeric_AtB;
1998   C->ops->transposematmultnumeric = MatTransposeMatMultNumeric_MPIDense_MPIDense;
1999   PetscFunctionReturn(0);
2000 }
2001 
2002 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C)
2003 {
2004   Mat_Product *product = C->product;
2005   Mat         A = product->A,B=product->B;
2006 
2007   PetscFunctionBegin;
2008   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend)
2009     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2010 
2011   C->ops->productsymbolic = MatProductSymbolic_AtB_MPIDense;
2012   PetscFunctionReturn(0);
2013 }
2014 
2015 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C)
2016 {
2017   PetscErrorCode ierr;
2018   Mat_Product    *product = C->product;
2019   const char     *algTypes[2] = {"allgatherv","cyclic"};
2020   PetscInt       alg,nalg = 2;
2021   PetscBool      flg = PETSC_FALSE;
2022 
2023   PetscFunctionBegin;
2024   /* Set default algorithm */
2025   alg = 0; /* default is allgatherv */
2026   ierr = PetscStrcmp(product->alg,"default",&flg);CHKERRQ(ierr);
2027   if (flg) {
2028     ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr);
2029   }
2030 
2031   /* Get runtime option */
2032   if (product->api_user) {
2033     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatTransposeMult","Mat");CHKERRQ(ierr);
2034     ierr = PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr);
2035     ierr = PetscOptionsEnd();CHKERRQ(ierr);
2036   } else {
2037     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABt","Mat");CHKERRQ(ierr);
2038     ierr = PetscOptionsEList("-matproduct_abt_mpidense_mpidense_via","Algorithmic approach","MatProduct_ABt",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr);
2039     ierr = PetscOptionsEnd();CHKERRQ(ierr);
2040   }
2041   if (flg) {
2042     ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr);
2043   }
2044 
2045   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense;
2046   C->ops->productsymbolic          = MatProductSymbolic_ABt;
2047   C->ops->productnumeric           = MatProductNumeric_ABt;
2048   PetscFunctionReturn(0);
2049 }
2050 
2051 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C)
2052 {
2053   PetscErrorCode ierr;
2054   Mat_Product    *product = C->product;
2055 
2056   PetscFunctionBegin;
2057   switch (product->type) {
2058 #if defined(PETSC_HAVE_ELEMENTAL)
2059   case MATPRODUCT_AB:
2060     ierr = MatProductSetFromOptions_MPIDense_AB(C);CHKERRQ(ierr);
2061     break;
2062 #endif
2063   case MATPRODUCT_AtB:
2064     ierr = MatProductSetFromOptions_MPIDense_AtB(C);CHKERRQ(ierr);
2065     break;
2066   case MATPRODUCT_ABt:
2067     ierr = MatProductSetFromOptions_MPIDense_ABt(C);CHKERRQ(ierr);
2068     break;
2069   default: SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"MatProduct type %s is not supported for MPIDense and MPIDense matrices",MatProductTypes[product->type]);
2070   }
2071   PetscFunctionReturn(0);
2072 }
2073