xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision c4762a1b19cd2af06abeed90e8f9d34fb975dd94)
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4 */
5 
6 
7 #include <../src/mat/impls/dense/mpi/mpidense.h>    /*I   "petscmat.h"  I*/
8 #include <../src/mat/impls/aij/mpi/mpiaij.h>
9 #include <petscblaslapack.h>
10 
11 /*@
12 
13       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
14               matrix that represents the operator. For sequential matrices it returns itself.
15 
16     Input Parameter:
17 .      A - the Seq or MPI dense matrix
18 
19     Output Parameter:
20 .      B - the inner matrix
21 
22     Level: intermediate
23 
24 @*/
25 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
26 {
27   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
28   PetscErrorCode ierr;
29   PetscBool      flg;
30 
31   PetscFunctionBegin;
32   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
33   if (flg) *B = mat->A;
34   else *B = A;
35   PetscFunctionReturn(0);
36 }
37 
38 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
39 {
40   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
41   PetscErrorCode ierr;
42   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
43 
44   PetscFunctionBegin;
45   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
46   lrow = row - rstart;
47   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
48   PetscFunctionReturn(0);
49 }
50 
51 PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
52 {
53   PetscErrorCode ierr;
54 
55   PetscFunctionBegin;
56   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
57   if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);}
58   PetscFunctionReturn(0);
59 }
60 
61 PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
62 {
63   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
64   PetscErrorCode ierr;
65   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
66   PetscScalar    *array;
67   MPI_Comm       comm;
68   Mat            B;
69 
70   PetscFunctionBegin;
71   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
72 
73   ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr);
74   if (!B) {
75     ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
76     ierr = MatCreate(comm,&B);CHKERRQ(ierr);
77     ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr);
78     ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
79     ierr = MatDenseGetArray(mdn->A,&array);CHKERRQ(ierr);
80     ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr);
81     ierr = MatDenseRestoreArray(mdn->A,&array);CHKERRQ(ierr);
82     ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
83     ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
84     ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr);
85     *a   = B;
86     ierr = MatDestroy(&B);CHKERRQ(ierr);
87   } else *a = B;
88   PetscFunctionReturn(0);
89 }
90 
91 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
92 {
93   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
94   PetscErrorCode ierr;
95   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
96   PetscBool      roworiented = A->roworiented;
97 
98   PetscFunctionBegin;
99   for (i=0; i<m; i++) {
100     if (idxm[i] < 0) continue;
101     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
102     if (idxm[i] >= rstart && idxm[i] < rend) {
103       row = idxm[i] - rstart;
104       if (roworiented) {
105         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
106       } else {
107         for (j=0; j<n; j++) {
108           if (idxn[j] < 0) continue;
109           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
110           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
111         }
112       }
113     } else if (!A->donotstash) {
114       mat->assembled = PETSC_FALSE;
115       if (roworiented) {
116         ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
117       } else {
118         ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
119       }
120     }
121   }
122   PetscFunctionReturn(0);
123 }
124 
125 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
126 {
127   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
128   PetscErrorCode ierr;
129   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
130 
131   PetscFunctionBegin;
132   for (i=0; i<m; i++) {
133     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
134     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
135     if (idxm[i] >= rstart && idxm[i] < rend) {
136       row = idxm[i] - rstart;
137       for (j=0; j<n; j++) {
138         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
139         if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
140         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
141       }
142     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
143   }
144   PetscFunctionReturn(0);
145 }
146 
147 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda)
148 {
149   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
150   PetscErrorCode ierr;
151 
152   PetscFunctionBegin;
153   ierr = MatDenseGetLDA(a->A,lda);CHKERRQ(ierr);
154   PetscFunctionReturn(0);
155 }
156 
157 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar *array[])
158 {
159   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
160   PetscErrorCode ierr;
161 
162   PetscFunctionBegin;
163   ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr);
164   PetscFunctionReturn(0);
165 }
166 
167 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar *array[])
168 {
169   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
170   PetscErrorCode ierr;
171 
172   PetscFunctionBegin;
173   ierr = MatDenseGetArrayRead(a->A,array);CHKERRQ(ierr);
174   PetscFunctionReturn(0);
175 }
176 
177 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar array[])
178 {
179   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
180   PetscErrorCode ierr;
181 
182   PetscFunctionBegin;
183   ierr = MatDensePlaceArray(a->A,array);CHKERRQ(ierr);
184   PetscFunctionReturn(0);
185 }
186 
187 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
188 {
189   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
190   PetscErrorCode ierr;
191 
192   PetscFunctionBegin;
193   ierr = MatDenseResetArray(a->A);CHKERRQ(ierr);
194   PetscFunctionReturn(0);
195 }
196 
197 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
198 {
199   Mat_MPIDense   *mat  = (Mat_MPIDense*)A->data,*newmatd;
200   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
201   PetscErrorCode ierr;
202   PetscInt       i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
203   const PetscInt *irow,*icol;
204   PetscScalar    *av,*bv,*v = lmat->v;
205   Mat            newmat;
206   IS             iscol_local;
207   MPI_Comm       comm_is,comm_mat;
208 
209   PetscFunctionBegin;
210   ierr = PetscObjectGetComm((PetscObject)A,&comm_mat);CHKERRQ(ierr);
211   ierr = PetscObjectGetComm((PetscObject)iscol,&comm_is);CHKERRQ(ierr);
212   if (comm_mat != comm_is) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator");
213 
214   ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
215   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
216   ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr);
217   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
218   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
219   ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */
220 
221   /* No parallel redistribution currently supported! Should really check each index set
222      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
223      original matrix! */
224 
225   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
226   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
227 
228   /* Check submatrix call */
229   if (scall == MAT_REUSE_MATRIX) {
230     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
231     /* Really need to test rows and column sizes! */
232     newmat = *B;
233   } else {
234     /* Create and fill new matrix */
235     ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr);
236     ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
237     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
238     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
239   }
240 
241   /* Now extract the data pointers and do the copy, column at a time */
242   newmatd = (Mat_MPIDense*)newmat->data;
243   bv      = ((Mat_SeqDense*)newmatd->A->data)->v;
244 
245   for (i=0; i<Ncols; i++) {
246     av = v + ((Mat_SeqDense*)mat->A->data)->lda*icol[i];
247     for (j=0; j<nrows; j++) {
248       *bv++ = av[irow[j] - rstart];
249     }
250   }
251 
252   /* Assemble the matrices so that the correct flags are set */
253   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
254   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
255 
256   /* Free work space */
257   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
258   ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr);
259   ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
260   *B   = newmat;
261   PetscFunctionReturn(0);
262 }
263 
264 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar *array[])
265 {
266   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
267   PetscErrorCode ierr;
268 
269   PetscFunctionBegin;
270   ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr);
271   PetscFunctionReturn(0);
272 }
273 
274 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar *array[])
275 {
276   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
277   PetscErrorCode ierr;
278 
279   PetscFunctionBegin;
280   ierr = MatDenseRestoreArrayRead(a->A,array);CHKERRQ(ierr);
281   PetscFunctionReturn(0);
282 }
283 
284 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
285 {
286   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
287   MPI_Comm       comm;
288   PetscErrorCode ierr;
289   PetscInt       nstash,reallocs;
290   InsertMode     addv;
291 
292   PetscFunctionBegin;
293   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
294   /* make sure all processors are either in INSERTMODE or ADDMODE */
295   ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,comm);CHKERRQ(ierr);
296   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
297   mat->insertmode = addv; /* in case this processor had no cache */
298 
299   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
300   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
301   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
302   PetscFunctionReturn(0);
303 }
304 
305 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
306 {
307   Mat_MPIDense   *mdn=(Mat_MPIDense*)mat->data;
308   PetscErrorCode ierr;
309   PetscInt       i,*row,*col,flg,j,rstart,ncols;
310   PetscMPIInt    n;
311   PetscScalar    *val;
312 
313   PetscFunctionBegin;
314   /*  wait on receives */
315   while (1) {
316     ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
317     if (!flg) break;
318 
319     for (i=0; i<n;) {
320       /* Now identify the consecutive vals belonging to the same row */
321       for (j=i,rstart=row[j]; j<n; j++) {
322         if (row[j] != rstart) break;
323       }
324       if (j < n) ncols = j-i;
325       else       ncols = n-i;
326       /* Now assemble all these values with a single function call */
327       ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
328       i    = j;
329     }
330   }
331   ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
332 
333   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
334   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
335 
336   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
337     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
338   }
339   PetscFunctionReturn(0);
340 }
341 
342 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
343 {
344   PetscErrorCode ierr;
345   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
346 
347   PetscFunctionBegin;
348   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
349   PetscFunctionReturn(0);
350 }
351 
352 /* the code does not do the diagonal entries correctly unless the
353    matrix is square and the column and row owerships are identical.
354    This is a BUG. The only way to fix it seems to be to access
355    mdn->A and mdn->B directly and not through the MatZeroRows()
356    routine.
357 */
358 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
359 {
360   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
361   PetscErrorCode    ierr;
362   PetscInt          i,*owners = A->rmap->range;
363   PetscInt          *sizes,j,idx,nsends;
364   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
365   PetscInt          *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source;
366   PetscInt          *lens,*lrows,*values;
367   PetscMPIInt       n,imdex,rank = l->rank,size = l->size;
368   MPI_Comm          comm;
369   MPI_Request       *send_waits,*recv_waits;
370   MPI_Status        recv_status,*send_status;
371   PetscBool         found;
372   const PetscScalar *xx;
373   PetscScalar       *bb;
374 
375   PetscFunctionBegin;
376   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
377   if (A->rmap->N != A->cmap->N) SETERRQ(comm,PETSC_ERR_SUP,"Only handles square matrices");
378   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only handles matrices with identical column and row ownership");
379   /*  first count number of contributors to each processor */
380   ierr = PetscCalloc1(2*size,&sizes);CHKERRQ(ierr);
381   ierr = PetscMalloc1(N+1,&owner);CHKERRQ(ierr);  /* see note*/
382   for (i=0; i<N; i++) {
383     idx   = rows[i];
384     found = PETSC_FALSE;
385     for (j=0; j<size; j++) {
386       if (idx >= owners[j] && idx < owners[j+1]) {
387         sizes[2*j]++; sizes[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
388       }
389     }
390     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
391   }
392   nsends = 0;
393   for (i=0; i<size; i++) nsends += sizes[2*i+1];
394 
395   /* inform other processors of number of messages and max length*/
396   ierr = PetscMaxSum(comm,sizes,&nmax,&nrecvs);CHKERRQ(ierr);
397 
398   /* post receives:   */
399   ierr = PetscMalloc1((nrecvs+1)*(nmax+1),&rvalues);CHKERRQ(ierr);
400   ierr = PetscMalloc1(nrecvs+1,&recv_waits);CHKERRQ(ierr);
401   for (i=0; i<nrecvs; i++) {
402     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
403   }
404 
405   /* do sends:
406       1) starts[i] gives the starting index in svalues for stuff going to
407          the ith processor
408   */
409   ierr = PetscMalloc1(N+1,&svalues);CHKERRQ(ierr);
410   ierr = PetscMalloc1(nsends+1,&send_waits);CHKERRQ(ierr);
411   ierr = PetscMalloc1(size+1,&starts);CHKERRQ(ierr);
412 
413   starts[0] = 0;
414   for (i=1; i<size; i++) starts[i] = starts[i-1] + sizes[2*i-2];
415   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
416 
417   starts[0] = 0;
418   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + sizes[2*i-2];
419   count = 0;
420   for (i=0; i<size; i++) {
421     if (sizes[2*i+1]) {
422       ierr = MPI_Isend(svalues+starts[i],sizes[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
423     }
424   }
425   ierr = PetscFree(starts);CHKERRQ(ierr);
426 
427   base = owners[rank];
428 
429   /*  wait on receives */
430   ierr  = PetscMalloc2(nrecvs,&lens,nrecvs,&source);CHKERRQ(ierr);
431   count = nrecvs;
432   slen  = 0;
433   while (count) {
434     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
435     /* unpack receives into our local space */
436     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
437 
438     source[imdex] = recv_status.MPI_SOURCE;
439     lens[imdex]   = n;
440     slen += n;
441     count--;
442   }
443   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
444 
445   /* move the data into the send scatter */
446   ierr  = PetscMalloc1(slen+1,&lrows);CHKERRQ(ierr);
447   count = 0;
448   for (i=0; i<nrecvs; i++) {
449     values = rvalues + i*nmax;
450     for (j=0; j<lens[i]; j++) {
451       lrows[count++] = values[j] - base;
452     }
453   }
454   ierr = PetscFree(rvalues);CHKERRQ(ierr);
455   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
456   ierr = PetscFree(owner);CHKERRQ(ierr);
457   ierr = PetscFree(sizes);CHKERRQ(ierr);
458 
459   /* fix right hand side if needed */
460   if (x && b) {
461     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
462     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
463     for (i=0; i<slen; i++) {
464       bb[lrows[i]] = diag*xx[lrows[i]];
465     }
466     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
467     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
468   }
469 
470   /* actually zap the local rows */
471   ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
472   if (diag != 0.0) {
473     Mat_SeqDense *ll = (Mat_SeqDense*)l->A->data;
474     PetscInt     m   = ll->lda, i;
475 
476     for (i=0; i<slen; i++) {
477       ll->v[lrows[i] + m*(A->cmap->rstart + lrows[i])] = diag;
478     }
479   }
480   ierr = PetscFree(lrows);CHKERRQ(ierr);
481 
482   /* wait on sends */
483   if (nsends) {
484     ierr = PetscMalloc1(nsends,&send_status);CHKERRQ(ierr);
485     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
486     ierr = PetscFree(send_status);CHKERRQ(ierr);
487   }
488   ierr = PetscFree(send_waits);CHKERRQ(ierr);
489   ierr = PetscFree(svalues);CHKERRQ(ierr);
490   PetscFunctionReturn(0);
491 }
492 
493 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec);
494 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec);
495 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec);
496 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec);
497 
498 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
499 {
500   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
501   PetscErrorCode ierr;
502 
503   PetscFunctionBegin;
504   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
505   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
506   ierr = MatMult_SeqDense(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
507   PetscFunctionReturn(0);
508 }
509 
510 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
511 {
512   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
513   PetscErrorCode ierr;
514 
515   PetscFunctionBegin;
516   ierr = VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
517   ierr = VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
518   ierr = MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
519   PetscFunctionReturn(0);
520 }
521 
522 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
523 {
524   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
525   PetscErrorCode ierr;
526   PetscScalar    zero = 0.0;
527 
528   PetscFunctionBegin;
529   ierr = VecSet(yy,zero);CHKERRQ(ierr);
530   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
531   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
532   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
533   PetscFunctionReturn(0);
534 }
535 
536 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
537 {
538   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
539   PetscErrorCode ierr;
540 
541   PetscFunctionBegin;
542   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
543   ierr = MatMultTranspose_SeqDense(a->A,xx,a->lvec);CHKERRQ(ierr);
544   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
545   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
546   PetscFunctionReturn(0);
547 }
548 
549 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
550 {
551   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
552   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
553   PetscErrorCode ierr;
554   PetscInt       len,i,n,m = A->rmap->n,radd;
555   PetscScalar    *x,zero = 0.0;
556 
557   PetscFunctionBegin;
558   ierr = VecSet(v,zero);CHKERRQ(ierr);
559   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
560   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
561   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
562   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
563   radd = A->rmap->rstart*m;
564   for (i=0; i<len; i++) {
565     x[i] = aloc->v[radd + i*m + i];
566   }
567   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
568   PetscFunctionReturn(0);
569 }
570 
571 PetscErrorCode MatDestroy_MPIDense(Mat mat)
572 {
573   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
574   PetscErrorCode ierr;
575 
576   PetscFunctionBegin;
577 #if defined(PETSC_USE_LOG)
578   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
579 #endif
580   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
581   ierr = MatDestroy(&mdn->A);CHKERRQ(ierr);
582   ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr);
583   ierr = VecScatterDestroy(&mdn->Mvctx);CHKERRQ(ierr);
584 
585   ierr = PetscFree(mat->data);CHKERRQ(ierr);
586   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
587 
588   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL);CHKERRQ(ierr);
589   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr);
590   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr);
591   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL);CHKERRQ(ierr);
592   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL);CHKERRQ(ierr);
593   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);CHKERRQ(ierr);
594   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);CHKERRQ(ierr);
595 #if defined(PETSC_HAVE_ELEMENTAL)
596   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr);
597 #endif
598   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr);
599   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
600   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
601   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
602   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_nest_mpidense_C",NULL);CHKERRQ(ierr);
603   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_nest_mpidense_C",NULL);CHKERRQ(ierr);
604   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_nest_mpidense_C",NULL);CHKERRQ(ierr);
605   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
606   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
607   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
608   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL);CHKERRQ(ierr);
609   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL);CHKERRQ(ierr);
610   PetscFunctionReturn(0);
611 }
612 
613 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat,PetscViewer);
614 
615 #include <petscdraw.h>
616 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
617 {
618   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
619   PetscErrorCode    ierr;
620   PetscMPIInt       rank = mdn->rank;
621   PetscViewerType   vtype;
622   PetscBool         iascii,isdraw;
623   PetscViewer       sviewer;
624   PetscViewerFormat format;
625 
626   PetscFunctionBegin;
627   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
628   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
629   if (iascii) {
630     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
631     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
632     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
633       MatInfo info;
634       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
635       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
636       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
637       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
638       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
639       ierr = VecScatterView(mdn->Mvctx,viewer);CHKERRQ(ierr);
640       PetscFunctionReturn(0);
641     } else if (format == PETSC_VIEWER_ASCII_INFO) {
642       PetscFunctionReturn(0);
643     }
644   } else if (isdraw) {
645     PetscDraw draw;
646     PetscBool isnull;
647 
648     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
649     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
650     if (isnull) PetscFunctionReturn(0);
651   }
652 
653   {
654     /* assemble the entire matrix onto first processor. */
655     Mat         A;
656     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
657     PetscInt    *cols;
658     PetscScalar *vals;
659 
660     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
661     if (!rank) {
662       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
663     } else {
664       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
665     }
666     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
667     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
668     ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr);
669     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
670 
671     /* Copy the matrix ... This isn't the most efficient means,
672        but it's quick for now */
673     A->insertmode = INSERT_VALUES;
674 
675     row = mat->rmap->rstart;
676     m   = mdn->A->rmap->n;
677     for (i=0; i<m; i++) {
678       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
679       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
680       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
681       row++;
682     }
683 
684     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
685     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
686     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
687     if (!rank) {
688       ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
689       ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
690     }
691     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
692     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
693     ierr = MatDestroy(&A);CHKERRQ(ierr);
694   }
695   PetscFunctionReturn(0);
696 }
697 
698 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
699 {
700   PetscErrorCode ierr;
701   PetscBool      iascii,isbinary,isdraw,issocket;
702 
703   PetscFunctionBegin;
704   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
705   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
706   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
707   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
708 
709   if (iascii || issocket || isdraw) {
710     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
711   } else if (isbinary) {
712     ierr = MatView_Dense_Binary(mat,viewer);CHKERRQ(ierr);
713   }
714   PetscFunctionReturn(0);
715 }
716 
717 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
718 {
719   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
720   Mat            mdn  = mat->A;
721   PetscErrorCode ierr;
722   PetscLogDouble isend[5],irecv[5];
723 
724   PetscFunctionBegin;
725   info->block_size = 1.0;
726 
727   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
728 
729   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
730   isend[3] = info->memory;  isend[4] = info->mallocs;
731   if (flag == MAT_LOCAL) {
732     info->nz_used      = isend[0];
733     info->nz_allocated = isend[1];
734     info->nz_unneeded  = isend[2];
735     info->memory       = isend[3];
736     info->mallocs      = isend[4];
737   } else if (flag == MAT_GLOBAL_MAX) {
738     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
739 
740     info->nz_used      = irecv[0];
741     info->nz_allocated = irecv[1];
742     info->nz_unneeded  = irecv[2];
743     info->memory       = irecv[3];
744     info->mallocs      = irecv[4];
745   } else if (flag == MAT_GLOBAL_SUM) {
746     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
747 
748     info->nz_used      = irecv[0];
749     info->nz_allocated = irecv[1];
750     info->nz_unneeded  = irecv[2];
751     info->memory       = irecv[3];
752     info->mallocs      = irecv[4];
753   }
754   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
755   info->fill_ratio_needed = 0;
756   info->factor_mallocs    = 0;
757   PetscFunctionReturn(0);
758 }
759 
760 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
761 {
762   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
763   PetscErrorCode ierr;
764 
765   PetscFunctionBegin;
766   switch (op) {
767   case MAT_NEW_NONZERO_LOCATIONS:
768   case MAT_NEW_NONZERO_LOCATION_ERR:
769   case MAT_NEW_NONZERO_ALLOCATION_ERR:
770     MatCheckPreallocated(A,1);
771     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
772     break;
773   case MAT_ROW_ORIENTED:
774     MatCheckPreallocated(A,1);
775     a->roworiented = flg;
776     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
777     break;
778   case MAT_NEW_DIAGONALS:
779   case MAT_KEEP_NONZERO_PATTERN:
780   case MAT_USE_HASH_TABLE:
781   case MAT_SORTED_FULL:
782     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
783     break;
784   case MAT_IGNORE_OFF_PROC_ENTRIES:
785     a->donotstash = flg;
786     break;
787   case MAT_SYMMETRIC:
788   case MAT_STRUCTURALLY_SYMMETRIC:
789   case MAT_HERMITIAN:
790   case MAT_SYMMETRY_ETERNAL:
791   case MAT_IGNORE_LOWER_TRIANGULAR:
792   case MAT_IGNORE_ZERO_ENTRIES:
793     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
794     break;
795   default:
796     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
797   }
798   PetscFunctionReturn(0);
799 }
800 
801 
802 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
803 {
804   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
805   Mat_SeqDense      *mat = (Mat_SeqDense*)mdn->A->data;
806   const PetscScalar *l,*r;
807   PetscScalar       x,*v;
808   PetscErrorCode    ierr;
809   PetscInt          i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n;
810 
811   PetscFunctionBegin;
812   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
813   if (ll) {
814     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
815     if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
816     ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr);
817     for (i=0; i<m; i++) {
818       x = l[i];
819       v = mat->v + i;
820       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
821     }
822     ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr);
823     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
824   }
825   if (rr) {
826     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
827     if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
828     ierr = VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
829     ierr = VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
830     ierr = VecGetArrayRead(mdn->lvec,&r);CHKERRQ(ierr);
831     for (i=0; i<n; i++) {
832       x = r[i];
833       v = mat->v + i*m;
834       for (j=0; j<m; j++) (*v++) *= x;
835     }
836     ierr = VecRestoreArrayRead(mdn->lvec,&r);CHKERRQ(ierr);
837     ierr = PetscLogFlops(n*m);CHKERRQ(ierr);
838   }
839   PetscFunctionReturn(0);
840 }
841 
842 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
843 {
844   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
845   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
846   PetscErrorCode ierr;
847   PetscInt       i,j;
848   PetscReal      sum = 0.0;
849   PetscScalar    *v  = mat->v;
850 
851   PetscFunctionBegin;
852   if (mdn->size == 1) {
853     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
854   } else {
855     if (type == NORM_FROBENIUS) {
856       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
857         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
858       }
859       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
860       *nrm = PetscSqrtReal(*nrm);
861       ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr);
862     } else if (type == NORM_1) {
863       PetscReal *tmp,*tmp2;
864       ierr = PetscCalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr);
865       *nrm = 0.0;
866       v    = mat->v;
867       for (j=0; j<mdn->A->cmap->n; j++) {
868         for (i=0; i<mdn->A->rmap->n; i++) {
869           tmp[j] += PetscAbsScalar(*v);  v++;
870         }
871       }
872       ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
873       for (j=0; j<A->cmap->N; j++) {
874         if (tmp2[j] > *nrm) *nrm = tmp2[j];
875       }
876       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
877       ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr);
878     } else if (type == NORM_INFINITY) { /* max row norm */
879       PetscReal ntemp;
880       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
881       ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
882     } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
883   }
884   PetscFunctionReturn(0);
885 }
886 
887 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
888 {
889   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
890   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
891   Mat            B;
892   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
893   PetscErrorCode ierr;
894   PetscInt       j,i;
895   PetscScalar    *v;
896 
897   PetscFunctionBegin;
898   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
899     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
900     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
901     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
902     ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr);
903   } else {
904     B = *matout;
905   }
906 
907   m    = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v;
908   ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr);
909   for (i=0; i<m; i++) rwork[i] = rstart + i;
910   for (j=0; j<n; j++) {
911     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
912     v   += m;
913   }
914   ierr = PetscFree(rwork);CHKERRQ(ierr);
915   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
916   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
917   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
918     *matout = B;
919   } else {
920     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
921   }
922   PetscFunctionReturn(0);
923 }
924 
925 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
926 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
927 
928 PetscErrorCode MatSetUp_MPIDense(Mat A)
929 {
930   PetscErrorCode ierr;
931 
932   PetscFunctionBegin;
933   ierr =  MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
934   PetscFunctionReturn(0);
935 }
936 
937 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
938 {
939   PetscErrorCode ierr;
940   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
941 
942   PetscFunctionBegin;
943   ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr);
944   ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
945   PetscFunctionReturn(0);
946 }
947 
948 PetscErrorCode  MatConjugate_MPIDense(Mat mat)
949 {
950   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
951   PetscErrorCode ierr;
952 
953   PetscFunctionBegin;
954   ierr = MatConjugate(a->A);CHKERRQ(ierr);
955   PetscFunctionReturn(0);
956 }
957 
958 PetscErrorCode MatRealPart_MPIDense(Mat A)
959 {
960   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
961   PetscErrorCode ierr;
962 
963   PetscFunctionBegin;
964   ierr = MatRealPart(a->A);CHKERRQ(ierr);
965   PetscFunctionReturn(0);
966 }
967 
968 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
969 {
970   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
971   PetscErrorCode ierr;
972 
973   PetscFunctionBegin;
974   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
975   PetscFunctionReturn(0);
976 }
977 
978 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col)
979 {
980   PetscErrorCode    ierr;
981   PetscScalar       *x;
982   const PetscScalar *a;
983   PetscInt          lda;
984 
985   PetscFunctionBegin;
986   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
987   ierr = MatDenseGetLDA(A,&lda);CHKERRQ(ierr);
988   ierr = MatDenseGetArrayRead(A,&a);CHKERRQ(ierr);
989   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
990   ierr = PetscArraycpy(x,a+col*lda,A->rmap->n);CHKERRQ(ierr);
991   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
992   ierr = MatDenseGetArrayRead(A,&a);CHKERRQ(ierr);
993   PetscFunctionReturn(0);
994 }
995 
996 PETSC_INTERN PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*);
997 
998 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms)
999 {
1000   PetscErrorCode ierr;
1001   PetscInt       i,n;
1002   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
1003   PetscReal      *work;
1004 
1005   PetscFunctionBegin;
1006   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
1007   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
1008   ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr);
1009   if (type == NORM_2) {
1010     for (i=0; i<n; i++) work[i] *= work[i];
1011   }
1012   if (type == NORM_INFINITY) {
1013     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
1014   } else {
1015     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
1016   }
1017   ierr = PetscFree(work);CHKERRQ(ierr);
1018   if (type == NORM_2) {
1019     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
1020   }
1021   PetscFunctionReturn(0);
1022 }
1023 
1024 static PetscErrorCode  MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1025 {
1026   Mat_MPIDense   *d = (Mat_MPIDense*)x->data;
1027   PetscErrorCode ierr;
1028   PetscScalar    *a;
1029   PetscInt       m,n,i;
1030 
1031   PetscFunctionBegin;
1032   ierr = MatGetSize(d->A,&m,&n);CHKERRQ(ierr);
1033   ierr = MatDenseGetArray(d->A,&a);CHKERRQ(ierr);
1034   for (i=0; i<m*n; i++) {
1035     ierr = PetscRandomGetValue(rctx,a+i);CHKERRQ(ierr);
1036   }
1037   ierr = MatDenseRestoreArray(d->A,&a);CHKERRQ(ierr);
1038   PetscFunctionReturn(0);
1039 }
1040 
1041 PETSC_INTERN PetscErrorCode MatMatMultNumeric_MPIDense(Mat A,Mat,Mat);
1042 
1043 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool  *missing,PetscInt *d)
1044 {
1045   PetscFunctionBegin;
1046   *missing = PETSC_FALSE;
1047   PetscFunctionReturn(0);
1048 }
1049 
1050 static PetscErrorCode MatMatTransposeMult_MPIDense_MPIDense(Mat,Mat,MatReuse,PetscReal,Mat*);
1051 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat*);
1052 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);
1053 
1054 /* -------------------------------------------------------------------*/
1055 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1056                                         MatGetRow_MPIDense,
1057                                         MatRestoreRow_MPIDense,
1058                                         MatMult_MPIDense,
1059                                 /*  4*/ MatMultAdd_MPIDense,
1060                                         MatMultTranspose_MPIDense,
1061                                         MatMultTransposeAdd_MPIDense,
1062                                         0,
1063                                         0,
1064                                         0,
1065                                 /* 10*/ 0,
1066                                         0,
1067                                         0,
1068                                         0,
1069                                         MatTranspose_MPIDense,
1070                                 /* 15*/ MatGetInfo_MPIDense,
1071                                         MatEqual_MPIDense,
1072                                         MatGetDiagonal_MPIDense,
1073                                         MatDiagonalScale_MPIDense,
1074                                         MatNorm_MPIDense,
1075                                 /* 20*/ MatAssemblyBegin_MPIDense,
1076                                         MatAssemblyEnd_MPIDense,
1077                                         MatSetOption_MPIDense,
1078                                         MatZeroEntries_MPIDense,
1079                                 /* 24*/ MatZeroRows_MPIDense,
1080                                         0,
1081                                         0,
1082                                         0,
1083                                         0,
1084                                 /* 29*/ MatSetUp_MPIDense,
1085                                         0,
1086                                         0,
1087                                         MatGetDiagonalBlock_MPIDense,
1088                                         0,
1089                                 /* 34*/ MatDuplicate_MPIDense,
1090                                         0,
1091                                         0,
1092                                         0,
1093                                         0,
1094                                 /* 39*/ MatAXPY_MPIDense,
1095                                         MatCreateSubMatrices_MPIDense,
1096                                         0,
1097                                         MatGetValues_MPIDense,
1098                                         0,
1099                                 /* 44*/ 0,
1100                                         MatScale_MPIDense,
1101                                         MatShift_Basic,
1102                                         0,
1103                                         0,
1104                                 /* 49*/ MatSetRandom_MPIDense,
1105                                         0,
1106                                         0,
1107                                         0,
1108                                         0,
1109                                 /* 54*/ 0,
1110                                         0,
1111                                         0,
1112                                         0,
1113                                         0,
1114                                 /* 59*/ MatCreateSubMatrix_MPIDense,
1115                                         MatDestroy_MPIDense,
1116                                         MatView_MPIDense,
1117                                         0,
1118                                         0,
1119                                 /* 64*/ 0,
1120                                         0,
1121                                         0,
1122                                         0,
1123                                         0,
1124                                 /* 69*/ 0,
1125                                         0,
1126                                         0,
1127                                         0,
1128                                         0,
1129                                 /* 74*/ 0,
1130                                         0,
1131                                         0,
1132                                         0,
1133                                         0,
1134                                 /* 79*/ 0,
1135                                         0,
1136                                         0,
1137                                         0,
1138                                 /* 83*/ MatLoad_MPIDense,
1139                                         0,
1140                                         0,
1141                                         0,
1142                                         0,
1143                                         0,
1144 #if defined(PETSC_HAVE_ELEMENTAL)
1145                                 /* 89*/ MatMatMult_MPIDense_MPIDense,
1146                                         MatMatMultSymbolic_MPIDense_MPIDense,
1147 #else
1148                                 /* 89*/ 0,
1149                                         0,
1150 #endif
1151                                         MatMatMultNumeric_MPIDense,
1152                                         0,
1153                                         0,
1154                                 /* 94*/ 0,
1155                                         MatMatTransposeMult_MPIDense_MPIDense,
1156                                         MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1157                                         MatMatTransposeMultNumeric_MPIDense_MPIDense,
1158                                         0,
1159                                 /* 99*/ 0,
1160                                         0,
1161                                         0,
1162                                         MatConjugate_MPIDense,
1163                                         0,
1164                                 /*104*/ 0,
1165                                         MatRealPart_MPIDense,
1166                                         MatImaginaryPart_MPIDense,
1167                                         0,
1168                                         0,
1169                                 /*109*/ 0,
1170                                         0,
1171                                         0,
1172                                         MatGetColumnVector_MPIDense,
1173                                         MatMissingDiagonal_MPIDense,
1174                                 /*114*/ 0,
1175                                         0,
1176                                         0,
1177                                         0,
1178                                         0,
1179                                 /*119*/ 0,
1180                                         0,
1181                                         0,
1182                                         0,
1183                                         0,
1184                                 /*124*/ 0,
1185                                         MatGetColumnNorms_MPIDense,
1186                                         0,
1187                                         0,
1188                                         0,
1189                                 /*129*/ 0,
1190                                         MatTransposeMatMult_MPIDense_MPIDense,
1191                                         MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1192                                         MatTransposeMatMultNumeric_MPIDense_MPIDense,
1193                                         0,
1194                                 /*134*/ 0,
1195                                         0,
1196                                         0,
1197                                         0,
1198                                         0,
1199                                 /*139*/ 0,
1200                                         0,
1201                                         0,
1202                                         0,
1203                                         0,
1204                                 /*144*/ MatCreateMPIMatConcatenateSeqMat_MPIDense
1205 };
1206 
1207 PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1208 {
1209   Mat_MPIDense   *a;
1210   PetscErrorCode ierr;
1211 
1212   PetscFunctionBegin;
1213   if (mat->preallocated) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Matrix has already been preallocated");
1214   mat->preallocated = PETSC_TRUE;
1215   /* Note:  For now, when data is specified above, this assumes the user correctly
1216    allocates the local dense storage space.  We should add error checking. */
1217 
1218   a       = (Mat_MPIDense*)mat->data;
1219   ierr    = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
1220   ierr    = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
1221   a->nvec = mat->cmap->n;
1222 
1223   ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1224   ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr);
1225   ierr = MatSetType(a->A,MATSEQDENSE);CHKERRQ(ierr);
1226   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1227   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1228   PetscFunctionReturn(0);
1229 }
1230 
1231 #if defined(PETSC_HAVE_ELEMENTAL)
1232 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1233 {
1234   Mat            mat_elemental;
1235   PetscErrorCode ierr;
1236   PetscScalar    *v;
1237   PetscInt       m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols;
1238 
1239   PetscFunctionBegin;
1240   if (reuse == MAT_REUSE_MATRIX) {
1241     mat_elemental = *newmat;
1242     ierr = MatZeroEntries(*newmat);CHKERRQ(ierr);
1243   } else {
1244     ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr);
1245     ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1246     ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr);
1247     ierr = MatSetUp(mat_elemental);CHKERRQ(ierr);
1248     ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
1249   }
1250 
1251   ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr);
1252   for (i=0; i<N; i++) cols[i] = i;
1253   for (i=0; i<m; i++) rows[i] = rstart + i;
1254 
1255   /* PETSc-Elemental interaface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1256   ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr);
1257   ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr);
1258   ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1259   ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1260   ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr);
1261   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1262 
1263   if (reuse == MAT_INPLACE_MATRIX) {
1264     ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr);
1265   } else {
1266     *newmat = mat_elemental;
1267   }
1268   PetscFunctionReturn(0);
1269 }
1270 #endif
1271 
1272 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals)
1273 {
1274   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1275   PetscErrorCode ierr;
1276 
1277   PetscFunctionBegin;
1278   ierr = MatDenseGetColumn(mat->A,col,vals);CHKERRQ(ierr);
1279   PetscFunctionReturn(0);
1280 }
1281 
1282 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals)
1283 {
1284   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1285   PetscErrorCode ierr;
1286 
1287   PetscFunctionBegin;
1288   ierr = MatDenseRestoreColumn(mat->A,vals);CHKERRQ(ierr);
1289   PetscFunctionReturn(0);
1290 }
1291 
1292 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
1293 {
1294   PetscErrorCode ierr;
1295   Mat_MPIDense   *mat;
1296   PetscInt       m,nloc,N;
1297 
1298   PetscFunctionBegin;
1299   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
1300   ierr = MatGetLocalSize(inmat,NULL,&nloc);CHKERRQ(ierr);
1301   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1302     PetscInt sum;
1303 
1304     if (n == PETSC_DECIDE) {
1305       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
1306     }
1307     /* Check sum(n) = N */
1308     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
1309     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
1310 
1311     ierr = MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat);CHKERRQ(ierr);
1312   }
1313 
1314   /* numeric phase */
1315   mat = (Mat_MPIDense*)(*outmat)->data;
1316   ierr = MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1317   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1318   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1319   PetscFunctionReturn(0);
1320 }
1321 
1322 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1323 {
1324   Mat_MPIDense   *a;
1325   PetscErrorCode ierr;
1326 
1327   PetscFunctionBegin;
1328   ierr      = PetscNewLog(mat,&a);CHKERRQ(ierr);
1329   mat->data = (void*)a;
1330   ierr      = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1331 
1332   mat->insertmode = NOT_SET_VALUES;
1333   ierr            = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);CHKERRQ(ierr);
1334   ierr            = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);CHKERRQ(ierr);
1335 
1336   /* build cache for off array entries formed */
1337   a->donotstash = PETSC_FALSE;
1338 
1339   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr);
1340 
1341   /* stuff used for matrix vector multiply */
1342   a->lvec        = 0;
1343   a->Mvctx       = 0;
1344   a->roworiented = PETSC_TRUE;
1345 
1346   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense);CHKERRQ(ierr);
1347   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr);
1348   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr);
1349   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense);CHKERRQ(ierr);
1350   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense);CHKERRQ(ierr);
1351   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);CHKERRQ(ierr);
1352   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);CHKERRQ(ierr);
1353 #if defined(PETSC_HAVE_ELEMENTAL)
1354   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr);
1355 #endif
1356   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1357   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1358   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1359   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1360   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_nest_mpidense_C",MatMatMult_Nest_Dense);CHKERRQ(ierr);
1361   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_nest_mpidense_C",MatMatMultSymbolic_Nest_Dense);CHKERRQ(ierr);
1362   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_nest_mpidense_C",MatMatMultNumeric_Nest_Dense);CHKERRQ(ierr);
1363 
1364   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMult_mpiaij_mpidense_C",MatTransposeMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
1365   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",MatTransposeMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1366   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",MatTransposeMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1367   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense);CHKERRQ(ierr);
1368   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense);CHKERRQ(ierr);
1369   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1370   PetscFunctionReturn(0);
1371 }
1372 
1373 /*MC
1374    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1375 
1376    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1377    and MATMPIDENSE otherwise.
1378 
1379    Options Database Keys:
1380 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1381 
1382   Level: beginner
1383 
1384 
1385 .seealso: MATSEQDENSE,MATMPIDENSE
1386 M*/
1387 
1388 /*@C
1389    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1390 
1391    Not collective
1392 
1393    Input Parameters:
1394 .  B - the matrix
1395 -  data - optional location of matrix data.  Set data=NULL for PETSc
1396    to control all matrix memory allocation.
1397 
1398    Notes:
1399    The dense format is fully compatible with standard Fortran 77
1400    storage by columns.
1401 
1402    The data input variable is intended primarily for Fortran programmers
1403    who wish to allocate their own matrix memory space.  Most users should
1404    set data=NULL.
1405 
1406    Level: intermediate
1407 
1408 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1409 @*/
1410 PetscErrorCode  MatMPIDenseSetPreallocation(Mat B,PetscScalar *data)
1411 {
1412   PetscErrorCode ierr;
1413 
1414   PetscFunctionBegin;
1415   ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr);
1416   PetscFunctionReturn(0);
1417 }
1418 
1419 /*@
1420    MatDensePlaceArray - Allows one to replace the array in a dense array with an
1421    array provided by the user. This is useful to avoid copying an array
1422    into a matrix
1423 
1424    Not Collective
1425 
1426    Input Parameters:
1427 +  mat - the matrix
1428 -  array - the array in column major order
1429 
1430    Notes:
1431    You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be
1432    freed when the matrix is destroyed.
1433 
1434    Level: developer
1435 
1436 .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
1437 
1438 @*/
1439 PetscErrorCode  MatDensePlaceArray(Mat mat,const PetscScalar array[])
1440 {
1441   PetscErrorCode ierr;
1442   PetscFunctionBegin;
1443   ierr = PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
1444   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
1445   PetscFunctionReturn(0);
1446 }
1447 
1448 /*@
1449    MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray()
1450 
1451    Not Collective
1452 
1453    Input Parameters:
1454 .  mat - the matrix
1455 
1456    Notes:
1457    You can only call this after a call to MatDensePlaceArray()
1458 
1459    Level: developer
1460 
1461 .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
1462 
1463 @*/
1464 PetscErrorCode  MatDenseResetArray(Mat mat)
1465 {
1466   PetscErrorCode ierr;
1467   PetscFunctionBegin;
1468   ierr = PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));CHKERRQ(ierr);
1469   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
1470   PetscFunctionReturn(0);
1471 }
1472 
1473 /*@C
1474    MatCreateDense - Creates a parallel matrix in dense format.
1475 
1476    Collective
1477 
1478    Input Parameters:
1479 +  comm - MPI communicator
1480 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1481 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1482 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1483 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1484 -  data - optional location of matrix data.  Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1485    to control all matrix memory allocation.
1486 
1487    Output Parameter:
1488 .  A - the matrix
1489 
1490    Notes:
1491    The dense format is fully compatible with standard Fortran 77
1492    storage by columns.
1493 
1494    The data input variable is intended primarily for Fortran programmers
1495    who wish to allocate their own matrix memory space.  Most users should
1496    set data=NULL (PETSC_NULL_SCALAR for Fortran users).
1497 
1498    The user MUST specify either the local or global matrix dimensions
1499    (possibly both).
1500 
1501    Level: intermediate
1502 
1503 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1504 @*/
1505 PetscErrorCode  MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1506 {
1507   PetscErrorCode ierr;
1508   PetscMPIInt    size;
1509 
1510   PetscFunctionBegin;
1511   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1512   PetscValidLogicalCollectiveBool(*A,!!data,6);
1513   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1514   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1515   if (size > 1) {
1516     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1517     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1518     if (data) {  /* user provided data array, so no need to assemble */
1519       ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr);
1520       (*A)->assembled = PETSC_TRUE;
1521     }
1522   } else {
1523     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1524     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1525   }
1526   PetscFunctionReturn(0);
1527 }
1528 
1529 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1530 {
1531   Mat            mat;
1532   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
1533   PetscErrorCode ierr;
1534 
1535   PetscFunctionBegin;
1536   *newmat = 0;
1537   ierr    = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr);
1538   ierr    = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1539   ierr    = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1540   a       = (Mat_MPIDense*)mat->data;
1541 
1542   mat->factortype   = A->factortype;
1543   mat->assembled    = PETSC_TRUE;
1544   mat->preallocated = PETSC_TRUE;
1545 
1546   a->size         = oldmat->size;
1547   a->rank         = oldmat->rank;
1548   mat->insertmode = NOT_SET_VALUES;
1549   a->nvec         = oldmat->nvec;
1550   a->donotstash   = oldmat->donotstash;
1551 
1552   ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr);
1553   ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr);
1554 
1555   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1556   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1557   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1558 
1559   *newmat = mat;
1560   PetscFunctionReturn(0);
1561 }
1562 
1563 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
1564 {
1565   PetscErrorCode ierr;
1566   PetscBool      isbinary;
1567 #if defined(PETSC_HAVE_HDF5)
1568   PetscBool      ishdf5;
1569 #endif
1570 
1571   PetscFunctionBegin;
1572   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
1573   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
1574   /* force binary viewer to load .info file if it has not yet done so */
1575   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1576   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1577 #if defined(PETSC_HAVE_HDF5)
1578   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
1579 #endif
1580   if (isbinary) {
1581     ierr = MatLoad_Dense_Binary(newMat,viewer);CHKERRQ(ierr);
1582 #if defined(PETSC_HAVE_HDF5)
1583   } else if (ishdf5) {
1584     ierr = MatLoad_Dense_HDF5(newMat,viewer);CHKERRQ(ierr);
1585 #endif
1586   } else SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
1587   PetscFunctionReturn(0);
1588 }
1589 
1590 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool  *flag)
1591 {
1592   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
1593   Mat            a,b;
1594   PetscBool      flg;
1595   PetscErrorCode ierr;
1596 
1597   PetscFunctionBegin;
1598   a    = matA->A;
1599   b    = matB->A;
1600   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
1601   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1602   PetscFunctionReturn(0);
1603 }
1604 
1605 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(Mat A)
1606 {
1607   PetscErrorCode        ierr;
1608   Mat_MPIDense          *a = (Mat_MPIDense*)A->data;
1609   Mat_TransMatMultDense *atb = a->atbdense;
1610 
1611   PetscFunctionBegin;
1612   ierr = PetscFree3(atb->sendbuf,atb->atbarray,atb->recvcounts);CHKERRQ(ierr);
1613   ierr = (atb->destroy)(A);CHKERRQ(ierr);
1614   ierr = PetscFree(atb);CHKERRQ(ierr);
1615   PetscFunctionReturn(0);
1616 }
1617 
1618 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(Mat A)
1619 {
1620   PetscErrorCode        ierr;
1621   Mat_MPIDense          *a = (Mat_MPIDense*)A->data;
1622   Mat_MatTransMultDense *abt = a->abtdense;
1623 
1624   PetscFunctionBegin;
1625   ierr = PetscFree2(abt->buf[0],abt->buf[1]);CHKERRQ(ierr);
1626   ierr = PetscFree2(abt->recvcounts,abt->recvdispls);CHKERRQ(ierr);
1627   ierr = (abt->destroy)(A);CHKERRQ(ierr);
1628   ierr = PetscFree(abt);CHKERRQ(ierr);
1629   PetscFunctionReturn(0);
1630 }
1631 
1632 PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1633 {
1634   Mat_MPIDense   *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
1635   Mat_SeqDense   *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data;
1636   Mat_TransMatMultDense *atb = c->atbdense;
1637   PetscErrorCode ierr;
1638   MPI_Comm       comm;
1639   PetscMPIInt    rank,size,*recvcounts=atb->recvcounts;
1640   PetscScalar    *carray,*atbarray=atb->atbarray,*sendbuf=atb->sendbuf;
1641   PetscInt       i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j;
1642   PetscScalar    _DOne=1.0,_DZero=0.0;
1643   PetscBLASInt   am,an,bn,aN;
1644   const PetscInt *ranges;
1645 
1646   PetscFunctionBegin;
1647   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1648   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1649   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1650 
1651   /* compute atbarray = aseq^T * bseq */
1652   ierr = PetscBLASIntCast(a->A->cmap->n,&an);CHKERRQ(ierr);
1653   ierr = PetscBLASIntCast(b->A->cmap->n,&bn);CHKERRQ(ierr);
1654   ierr = PetscBLASIntCast(a->A->rmap->n,&am);CHKERRQ(ierr);
1655   ierr = PetscBLASIntCast(A->cmap->N,&aN);CHKERRQ(ierr);
1656   PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&an,&bn,&am,&_DOne,aseq->v,&aseq->lda,bseq->v,&bseq->lda,&_DZero,atbarray,&aN));
1657 
1658   ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr);
1659   for (i=0; i<size; i++) recvcounts[i] = (ranges[i+1] - ranges[i])*cN;
1660 
1661   /* arrange atbarray into sendbuf */
1662   k = 0;
1663   for (proc=0; proc<size; proc++) {
1664     for (j=0; j<cN; j++) {
1665       for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM];
1666     }
1667   }
1668   /* sum all atbarray to local values of C */
1669   ierr = MatDenseGetArray(c->A,&carray);CHKERRQ(ierr);
1670   ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRQ(ierr);
1671   ierr = MatDenseRestoreArray(c->A,&carray);CHKERRQ(ierr);
1672   PetscFunctionReturn(0);
1673 }
1674 
1675 PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1676 {
1677   PetscErrorCode        ierr;
1678   Mat                   Cdense;
1679   MPI_Comm              comm;
1680   PetscMPIInt           size;
1681   PetscInt              cm=A->cmap->n,cM,cN=B->cmap->N;
1682   Mat_MPIDense          *c;
1683   Mat_TransMatMultDense *atb;
1684 
1685   PetscFunctionBegin;
1686   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1687   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
1688     SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
1689   }
1690 
1691   /* create matrix product Cdense */
1692   ierr = MatCreate(comm,&Cdense);CHKERRQ(ierr);
1693   ierr = MatSetSizes(Cdense,cm,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
1694   ierr = MatSetType(Cdense,MATMPIDENSE);CHKERRQ(ierr);
1695   ierr = MatMPIDenseSetPreallocation(Cdense,NULL);CHKERRQ(ierr);
1696   ierr = MatAssemblyBegin(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1697   ierr = MatAssemblyEnd(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1698   *C   = Cdense;
1699 
1700   /* create data structure for reuse Cdense */
1701   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1702   ierr = PetscNew(&atb);CHKERRQ(ierr);
1703   cM = Cdense->rmap->N;
1704   ierr = PetscMalloc3(cM*cN,&atb->sendbuf,cM*cN,&atb->atbarray,size,&atb->recvcounts);CHKERRQ(ierr);
1705 
1706   c                    = (Mat_MPIDense*)Cdense->data;
1707   c->atbdense          = atb;
1708   atb->destroy         = Cdense->ops->destroy;
1709   Cdense->ops->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
1710   PetscFunctionReturn(0);
1711 }
1712 
1713 PetscErrorCode MatTransposeMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
1714 {
1715   PetscErrorCode ierr;
1716 
1717   PetscFunctionBegin;
1718   if (scall == MAT_INITIAL_MATRIX) {
1719     ierr = MatTransposeMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr);
1720   }
1721   ierr = MatTransposeMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr);
1722   PetscFunctionReturn(0);
1723 }
1724 
1725 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat *C)
1726 {
1727   PetscErrorCode        ierr;
1728   Mat                   Cdense;
1729   MPI_Comm              comm;
1730   PetscMPIInt           i, size;
1731   PetscInt              maxRows, bufsiz;
1732   Mat_MPIDense          *c;
1733   PetscMPIInt           tag;
1734   const char            *algTypes[2] = {"allgatherv","cyclic"};
1735   PetscInt              alg, nalg = 2;
1736   Mat_MatTransMultDense *abt;
1737 
1738   PetscFunctionBegin;
1739   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1740   alg = 0; /* default is allgatherv */
1741   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MatMatTransposeMult","Mat");CHKERRQ(ierr);
1742   ierr = PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[0],&alg,NULL);CHKERRQ(ierr);
1743   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1744   if (A->cmap->N != B->cmap->N) {
1745     SETERRQ2(comm,PETSC_ERR_ARG_SIZ,"Matrix global column dimensions are incompatible, A (%D) != B (%D)",A->cmap->N,B->cmap->N);
1746   }
1747 
1748   /* create matrix product Cdense */
1749   ierr = MatCreate(comm,&Cdense);CHKERRQ(ierr);
1750   ierr = MatSetSizes(Cdense,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N);CHKERRQ(ierr);
1751   ierr = MatSetType(Cdense,MATMPIDENSE);CHKERRQ(ierr);
1752   ierr = MatMPIDenseSetPreallocation(Cdense,NULL);CHKERRQ(ierr);
1753   ierr = PetscObjectGetNewTag((PetscObject)Cdense, &tag);CHKERRQ(ierr);
1754   ierr = MatAssemblyBegin(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1755   ierr = MatAssemblyEnd(Cdense,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1756   *C   = Cdense;
1757 
1758   /* create data structure for reuse Cdense */
1759   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1760   ierr = PetscNew(&abt);CHKERRQ(ierr);
1761   abt->tag = tag;
1762   abt->alg = alg;
1763   switch (alg) {
1764   case 1:
1765     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i]));
1766     bufsiz = A->cmap->N * maxRows;
1767     ierr = PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1]));CHKERRQ(ierr);
1768     break;
1769   default:
1770     ierr = PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]));CHKERRQ(ierr);
1771     ierr = PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls));CHKERRQ(ierr);
1772     for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N;
1773     for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i];
1774     break;
1775   }
1776 
1777   c                    = (Mat_MPIDense*)Cdense->data;
1778   c->abtdense          = abt;
1779   abt->destroy         = Cdense->ops->destroy;
1780   Cdense->ops->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
1781   PetscFunctionReturn(0);
1782 }
1783 
1784 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
1785 {
1786   Mat_MPIDense   *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
1787   Mat_SeqDense   *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data;
1788   Mat_SeqDense   *cseq=(Mat_SeqDense*)(c->A)->data;
1789   Mat_MatTransMultDense *abt = c->abtdense;
1790   PetscErrorCode ierr;
1791   MPI_Comm       comm;
1792   PetscMPIInt    rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom;
1793   PetscScalar    *sendbuf, *recvbuf=0, *carray;
1794   PetscInt       i,cK=A->cmap->N,k,j,bn;
1795   PetscScalar    _DOne=1.0,_DZero=0.0;
1796   PetscBLASInt   cm, cn, ck;
1797   MPI_Request    reqs[2];
1798   const PetscInt *ranges;
1799 
1800   PetscFunctionBegin;
1801   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1802   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1803   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1804 
1805   ierr = MatGetOwnershipRanges(B,&ranges);CHKERRQ(ierr);
1806   bn = B->rmap->n;
1807   if (bseq->lda == bn) {
1808     sendbuf = bseq->v;
1809   } else {
1810     sendbuf = abt->buf[0];
1811     for (k = 0, i = 0; i < cK; i++) {
1812       for (j = 0; j < bn; j++, k++) {
1813         sendbuf[k] = bseq->v[i * bseq->lda + j];
1814       }
1815     }
1816   }
1817   if (size > 1) {
1818     sendto = (rank + size - 1) % size;
1819     recvfrom = (rank + size + 1) % size;
1820   } else {
1821     sendto = recvfrom = 0;
1822   }
1823   ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr);
1824   ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr);
1825   recvisfrom = rank;
1826   for (i = 0; i < size; i++) {
1827     /* we have finished receiving in sending, bufs can be read/modified */
1828     PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
1829     PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
1830 
1831     if (nextrecvisfrom != rank) {
1832       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
1833       sendsiz = cK * bn;
1834       recvsiz = cK * nextbn;
1835       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
1836       ierr = MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]);CHKERRQ(ierr);
1837       ierr = MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]);CHKERRQ(ierr);
1838     }
1839 
1840     /* local aseq * sendbuf^T */
1841     ierr = PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn);CHKERRQ(ierr);
1842     carray = &cseq->v[cseq->lda * ranges[recvisfrom]];
1843     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,aseq->v,&aseq->lda,sendbuf,&cn,&_DZero,carray,&cseq->lda));
1844 
1845     if (nextrecvisfrom != rank) {
1846       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
1847       ierr = MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE);CHKERRQ(ierr);
1848     }
1849     bn = nextbn;
1850     recvisfrom = nextrecvisfrom;
1851     sendbuf = recvbuf;
1852   }
1853   PetscFunctionReturn(0);
1854 }
1855 
1856 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
1857 {
1858   Mat_MPIDense   *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
1859   Mat_SeqDense   *aseq=(Mat_SeqDense*)(a->A)->data, *bseq=(Mat_SeqDense*)(b->A)->data;
1860   Mat_SeqDense   *cseq=(Mat_SeqDense*)(c->A)->data;
1861   Mat_MatTransMultDense *abt = c->abtdense;
1862   PetscErrorCode ierr;
1863   MPI_Comm       comm;
1864   PetscMPIInt    rank,size;
1865   PetscScalar    *sendbuf, *recvbuf;
1866   PetscInt       i,cK=A->cmap->N,k,j,bn;
1867   PetscScalar    _DOne=1.0,_DZero=0.0;
1868   PetscBLASInt   cm, cn, ck;
1869 
1870   PetscFunctionBegin;
1871   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1872   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1873   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1874 
1875   /* copy transpose of B into buf[0] */
1876   bn      = B->rmap->n;
1877   sendbuf = abt->buf[0];
1878   recvbuf = abt->buf[1];
1879   for (k = 0, j = 0; j < bn; j++) {
1880     for (i = 0; i < cK; i++, k++) {
1881       sendbuf[k] = bseq->v[i * bseq->lda + j];
1882     }
1883   }
1884   ierr = MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm);CHKERRQ(ierr);
1885   ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr);
1886   ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr);
1887   ierr = PetscBLASIntCast(c->A->cmap->n,&cn);CHKERRQ(ierr);
1888   PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,aseq->v,&aseq->lda,recvbuf,&ck,&_DZero,cseq->v,&cseq->lda));
1889   PetscFunctionReturn(0);
1890 }
1891 
1892 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
1893 {
1894   Mat_MPIDense   *c=(Mat_MPIDense*)C->data;
1895   Mat_MatTransMultDense *abt = c->abtdense;
1896   PetscErrorCode ierr;
1897 
1898   PetscFunctionBegin;
1899   switch (abt->alg) {
1900   case 1:
1901     ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C);CHKERRQ(ierr);
1902     break;
1903   default:
1904     ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C);CHKERRQ(ierr);
1905     break;
1906   }
1907   PetscFunctionReturn(0);
1908 }
1909 
1910 static PetscErrorCode MatMatTransposeMult_MPIDense_MPIDense(Mat A,Mat B, MatReuse scall, PetscReal fill, Mat *C)
1911 {
1912   PetscErrorCode ierr;
1913 
1914   PetscFunctionBegin;
1915   if (scall == MAT_INITIAL_MATRIX) {
1916     ierr = MatMatTransposeMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr);
1917   }
1918   ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr);
1919   PetscFunctionReturn(0);
1920 }
1921 
1922 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(Mat A)
1923 {
1924   PetscErrorCode   ierr;
1925   Mat_MPIDense     *a = (Mat_MPIDense*)A->data;
1926   Mat_MatMultDense *ab = a->abdense;
1927 
1928   PetscFunctionBegin;
1929   ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr);
1930   ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr);
1931   ierr = MatDestroy(&ab->Be);CHKERRQ(ierr);
1932 
1933   ierr = (ab->destroy)(A);CHKERRQ(ierr);
1934   ierr = PetscFree(ab);CHKERRQ(ierr);
1935   PetscFunctionReturn(0);
1936 }
1937 
1938 #if defined(PETSC_HAVE_ELEMENTAL)
1939 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1940 {
1941   PetscErrorCode   ierr;
1942   Mat_MPIDense     *c=(Mat_MPIDense*)C->data;
1943   Mat_MatMultDense *ab=c->abdense;
1944 
1945   PetscFunctionBegin;
1946   ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr);
1947   ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr);
1948   ierr = MatMatMultNumeric(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr);
1949   ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
1950   PetscFunctionReturn(0);
1951 }
1952 
1953 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1954 {
1955   PetscErrorCode   ierr;
1956   Mat              Ae,Be,Ce;
1957   Mat_MPIDense     *c;
1958   Mat_MatMultDense *ab;
1959 
1960   PetscFunctionBegin;
1961   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
1962     SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
1963   }
1964 
1965   /* convert A and B to Elemental matrices Ae and Be */
1966   ierr = MatConvert(A,MATELEMENTAL,MAT_INITIAL_MATRIX, &Ae);CHKERRQ(ierr);
1967   ierr = MatConvert(B,MATELEMENTAL,MAT_INITIAL_MATRIX, &Be);CHKERRQ(ierr);
1968 
1969   /* Ce = Ae*Be */
1970   ierr = MatMatMultSymbolic(Ae,Be,fill,&Ce);CHKERRQ(ierr);
1971   ierr = MatMatMultNumeric(Ae,Be,Ce);CHKERRQ(ierr);
1972 
1973   /* convert Ce to C */
1974   ierr = MatConvert(Ce,MATMPIDENSE,MAT_INITIAL_MATRIX,C);CHKERRQ(ierr);
1975 
1976   /* create data structure for reuse Cdense */
1977   ierr = PetscNew(&ab);CHKERRQ(ierr);
1978   c                  = (Mat_MPIDense*)(*C)->data;
1979   c->abdense         = ab;
1980 
1981   ab->Ae             = Ae;
1982   ab->Be             = Be;
1983   ab->Ce             = Ce;
1984   ab->destroy        = (*C)->ops->destroy;
1985   (*C)->ops->destroy        = MatDestroy_MatMatMult_MPIDense_MPIDense;
1986   (*C)->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
1987   PetscFunctionReturn(0);
1988 }
1989 
1990 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
1991 {
1992   PetscErrorCode ierr;
1993 
1994   PetscFunctionBegin;
1995   if (scall == MAT_INITIAL_MATRIX) { /* symbolic product includes numeric product */
1996     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
1997     ierr = MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);CHKERRQ(ierr);
1998     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
1999   } else {
2000     ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
2001     ierr = MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);CHKERRQ(ierr);
2002     ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
2003   }
2004   PetscFunctionReturn(0);
2005 }
2006 #endif
2007 
2008