xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision f26271d2d7c4dd1ad5e4ef1066d481888558d6fe)
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4 */
5 
6 #include <../src/mat/impls/dense/mpi/mpidense.h>    /*I   "petscmat.h"  I*/
7 #include <../src/mat/impls/aij/mpi/mpiaij.h>
8 #include <petscblaslapack.h>
9 
10 /*@
11 
12       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
13               matrix that represents the operator. For sequential matrices it returns itself.
14 
15     Input Parameter:
16 .      A - the Seq or MPI dense matrix
17 
18     Output Parameter:
19 .      B - the inner matrix
20 
21     Level: intermediate
22 
23 @*/
24 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
25 {
26   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
27   PetscErrorCode ierr;
28   PetscBool      flg;
29 
30   PetscFunctionBegin;
31   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
32   if (flg) *B = mat->A;
33   else *B = A;
34   PetscFunctionReturn(0);
35 }
36 
37 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
38 {
39   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
40   PetscErrorCode ierr;
41   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
42 
43   PetscFunctionBegin;
44   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
45   lrow = row - rstart;
46   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
47   PetscFunctionReturn(0);
48 }
49 
50 PetscErrorCode MatRestoreRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
51 {
52   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
53   PetscErrorCode ierr;
54   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
55 
56   PetscFunctionBegin;
57   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
58   lrow = row - rstart;
59   ierr = MatRestoreRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
60   PetscFunctionReturn(0);
61 }
62 
63 PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
64 {
65   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
66   PetscErrorCode ierr;
67   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
68   PetscScalar    *array;
69   MPI_Comm       comm;
70   PetscBool      cong;
71   Mat            B;
72 
73   PetscFunctionBegin;
74   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
75   if (!cong) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
76   ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr);
77   if (!B) {
78     ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
79     ierr = MatCreate(comm,&B);CHKERRQ(ierr);
80     ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr);
81     ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
82     ierr = MatDenseGetArray(mdn->A,&array);CHKERRQ(ierr);
83     ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr);
84     ierr = MatDenseRestoreArray(mdn->A,&array);CHKERRQ(ierr);
85     ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
86     ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
87     ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr);
88     *a   = B;
89     ierr = MatDestroy(&B);CHKERRQ(ierr);
90   } else *a = B;
91   PetscFunctionReturn(0);
92 }
93 
94 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
95 {
96   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
97   PetscErrorCode ierr;
98   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
99   PetscBool      roworiented = A->roworiented;
100 
101   PetscFunctionBegin;
102   for (i=0; i<m; i++) {
103     if (idxm[i] < 0) continue;
104     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
105     if (idxm[i] >= rstart && idxm[i] < rend) {
106       row = idxm[i] - rstart;
107       if (roworiented) {
108         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
109       } else {
110         for (j=0; j<n; j++) {
111           if (idxn[j] < 0) continue;
112           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
113           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
114         }
115       }
116     } else if (!A->donotstash) {
117       mat->assembled = PETSC_FALSE;
118       if (roworiented) {
119         ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
120       } else {
121         ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
122       }
123     }
124   }
125   PetscFunctionReturn(0);
126 }
127 
128 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
129 {
130   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
131   PetscErrorCode ierr;
132   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
133 
134   PetscFunctionBegin;
135   for (i=0; i<m; i++) {
136     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
137     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
138     if (idxm[i] >= rstart && idxm[i] < rend) {
139       row = idxm[i] - rstart;
140       for (j=0; j<n; j++) {
141         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
142         if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
143         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
144       }
145     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
146   }
147   PetscFunctionReturn(0);
148 }
149 
150 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda)
151 {
152   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
153   PetscErrorCode ierr;
154 
155   PetscFunctionBegin;
156   ierr = MatDenseGetLDA(a->A,lda);CHKERRQ(ierr);
157   PetscFunctionReturn(0);
158 }
159 
160 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar **array)
161 {
162   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
163   PetscErrorCode ierr;
164 
165   PetscFunctionBegin;
166   ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr);
167   PetscFunctionReturn(0);
168 }
169 
170 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar **array)
171 {
172   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
173   PetscErrorCode ierr;
174 
175   PetscFunctionBegin;
176   ierr = MatDenseGetArrayRead(a->A,array);CHKERRQ(ierr);
177   PetscFunctionReturn(0);
178 }
179 
180 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar *array)
181 {
182   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
183   PetscErrorCode ierr;
184 
185   PetscFunctionBegin;
186   ierr = MatDensePlaceArray(a->A,array);CHKERRQ(ierr);
187   PetscFunctionReturn(0);
188 }
189 
190 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
191 {
192   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
193   PetscErrorCode ierr;
194 
195   PetscFunctionBegin;
196   ierr = MatDenseResetArray(a->A);CHKERRQ(ierr);
197   PetscFunctionReturn(0);
198 }
199 
200 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
201 {
202   Mat_MPIDense      *mat  = (Mat_MPIDense*)A->data,*newmatd;
203   PetscErrorCode    ierr;
204   PetscInt          lda,i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
205   const PetscInt    *irow,*icol;
206   const PetscScalar *v;
207   PetscScalar       *bv;
208   Mat               newmat;
209   IS                iscol_local;
210   MPI_Comm          comm_is,comm_mat;
211 
212   PetscFunctionBegin;
213   ierr = PetscObjectGetComm((PetscObject)A,&comm_mat);CHKERRQ(ierr);
214   ierr = PetscObjectGetComm((PetscObject)iscol,&comm_is);CHKERRQ(ierr);
215   if (comm_mat != comm_is) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator");
216 
217   ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
218   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
219   ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr);
220   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
221   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
222   ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */
223 
224   /* No parallel redistribution currently supported! Should really check each index set
225      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
226      original matrix! */
227 
228   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
229   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
230 
231   /* Check submatrix call */
232   if (scall == MAT_REUSE_MATRIX) {
233     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
234     /* Really need to test rows and column sizes! */
235     newmat = *B;
236   } else {
237     /* Create and fill new matrix */
238     ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr);
239     ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
240     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
241     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
242   }
243 
244   /* Now extract the data pointers and do the copy, column at a time */
245   newmatd = (Mat_MPIDense*)newmat->data;
246   ierr = MatDenseGetArray(newmatd->A,&bv);CHKERRQ(ierr);
247   ierr = MatDenseGetArrayRead(mat->A,&v);CHKERRQ(ierr);
248   ierr = MatDenseGetLDA(mat->A,&lda);CHKERRQ(ierr);
249   for (i=0; i<Ncols; i++) {
250     const PetscScalar *av = v + lda*icol[i];
251     for (j=0; j<nrows; j++) {
252       *bv++ = av[irow[j] - rstart];
253     }
254   }
255   ierr = MatDenseRestoreArrayRead(mat->A,&v);CHKERRQ(ierr);
256   ierr = MatDenseRestoreArray(newmatd->A,&bv);CHKERRQ(ierr);
257 
258   /* Assemble the matrices so that the correct flags are set */
259   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
260   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
261 
262   /* Free work space */
263   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
264   ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr);
265   ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
266   *B   = newmat;
267   PetscFunctionReturn(0);
268 }
269 
270 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar **array)
271 {
272   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
273   PetscErrorCode ierr;
274 
275   PetscFunctionBegin;
276   ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr);
277   PetscFunctionReturn(0);
278 }
279 
280 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar **array)
281 {
282   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
283   PetscErrorCode ierr;
284 
285   PetscFunctionBegin;
286   ierr = MatDenseRestoreArrayRead(a->A,array);CHKERRQ(ierr);
287   PetscFunctionReturn(0);
288 }
289 
290 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
291 {
292   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
293   PetscErrorCode ierr;
294   PetscInt       nstash,reallocs;
295 
296   PetscFunctionBegin;
297   if (mdn->donotstash || mat->nooffprocentries) return(0);
298 
299   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
300   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
301   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
302   PetscFunctionReturn(0);
303 }
304 
305 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
306 {
307   Mat_MPIDense   *mdn=(Mat_MPIDense*)mat->data;
308   PetscErrorCode ierr;
309   PetscInt       i,*row,*col,flg,j,rstart,ncols;
310   PetscMPIInt    n;
311   PetscScalar    *val;
312 
313   PetscFunctionBegin;
314   if (!mdn->donotstash && !mat->nooffprocentries) {
315     /*  wait on receives */
316     while (1) {
317       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
318       if (!flg) break;
319 
320       for (i=0; i<n;) {
321         /* Now identify the consecutive vals belonging to the same row */
322         for (j=i,rstart=row[j]; j<n; j++) {
323           if (row[j] != rstart) break;
324         }
325         if (j < n) ncols = j-i;
326         else       ncols = n-i;
327         /* Now assemble all these values with a single function call */
328         ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
329         i    = j;
330       }
331     }
332     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
333   }
334 
335   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
336   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
337 
338   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
339     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
340   }
341   PetscFunctionReturn(0);
342 }
343 
344 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
345 {
346   PetscErrorCode ierr;
347   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
348 
349   PetscFunctionBegin;
350   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
351   PetscFunctionReturn(0);
352 }
353 
354 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt n,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
355 {
356   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
357   PetscErrorCode    ierr;
358   PetscInt          i,len,*lrows;
359 
360   PetscFunctionBegin;
361   /* get locally owned rows */
362   ierr = PetscLayoutMapLocal(A->rmap,n,rows,&len,&lrows,NULL);CHKERRQ(ierr);
363   /* fix right hand side if needed */
364   if (x && b) {
365     const PetscScalar *xx;
366     PetscScalar       *bb;
367 
368     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
369     ierr = VecGetArrayWrite(b, &bb);CHKERRQ(ierr);
370     for (i=0;i<len;++i) bb[lrows[i]] = diag*xx[lrows[i]];
371     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
372     ierr = VecRestoreArrayWrite(b, &bb);CHKERRQ(ierr);
373   }
374   ierr = MatZeroRows(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
375   if (diag != 0.0) {
376     Vec d;
377 
378     ierr = MatCreateVecs(A,NULL,&d);CHKERRQ(ierr);
379     ierr = VecSet(d,diag);CHKERRQ(ierr);
380     ierr = MatDiagonalSet(A,d,INSERT_VALUES);CHKERRQ(ierr);
381     ierr = VecDestroy(&d);CHKERRQ(ierr);
382   }
383   ierr = PetscFree(lrows);CHKERRQ(ierr);
384   PetscFunctionReturn(0);
385 }
386 
387 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec);
388 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec);
389 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec);
390 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec);
391 
392 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
393 {
394   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
395   PetscErrorCode    ierr;
396   const PetscScalar *ax;
397   PetscScalar       *ay;
398 
399   PetscFunctionBegin;
400   ierr = VecGetArrayReadInPlace(xx,&ax);CHKERRQ(ierr);
401   ierr = VecGetArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr);
402   ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr);
403   ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr);
404   ierr = VecRestoreArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr);
405   ierr = VecRestoreArrayReadInPlace(xx,&ax);CHKERRQ(ierr);
406   ierr = (*mdn->A->ops->mult)(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
407   PetscFunctionReturn(0);
408 }
409 
410 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
411 {
412   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
413   PetscErrorCode    ierr;
414   const PetscScalar *ax;
415   PetscScalar       *ay;
416 
417   PetscFunctionBegin;
418   ierr = VecGetArrayReadInPlace(xx,&ax);CHKERRQ(ierr);
419   ierr = VecGetArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr);
420   ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr);
421   ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr);
422   ierr = VecRestoreArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr);
423   ierr = VecRestoreArrayReadInPlace(xx,&ax);CHKERRQ(ierr);
424   ierr = (*mdn->A->ops->multadd)(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
425   PetscFunctionReturn(0);
426 }
427 
428 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
429 {
430   Mat_MPIDense      *a = (Mat_MPIDense*)A->data;
431   PetscErrorCode    ierr;
432   const PetscScalar *ax;
433   PetscScalar       *ay;
434 
435   PetscFunctionBegin;
436   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
437   ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr);
438   ierr = VecGetArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr);
439   ierr = VecGetArrayInPlace(yy,&ay);CHKERRQ(ierr);
440   ierr = PetscSFReduceBegin(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr);
441   ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr);
442   ierr = VecRestoreArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr);
443   ierr = VecRestoreArrayInPlace(yy,&ay);CHKERRQ(ierr);
444   PetscFunctionReturn(0);
445 }
446 
447 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
448 {
449   Mat_MPIDense      *a = (Mat_MPIDense*)A->data;
450   PetscErrorCode    ierr;
451   const PetscScalar *ax;
452   PetscScalar       *ay;
453 
454   PetscFunctionBegin;
455   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
456   ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr);
457   ierr = VecGetArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr);
458   ierr = VecGetArrayInPlace(zz,&ay);CHKERRQ(ierr);
459   ierr = PetscSFReduceBegin(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr);
460   ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr);
461   ierr = VecRestoreArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr);
462   ierr = VecRestoreArrayInPlace(zz,&ay);CHKERRQ(ierr);
463   PetscFunctionReturn(0);
464 }
465 
466 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
467 {
468   Mat_MPIDense      *a    = (Mat_MPIDense*)A->data;
469   PetscErrorCode    ierr;
470   PetscInt          lda,len,i,n,m = A->rmap->n,radd;
471   PetscScalar       *x,zero = 0.0;
472   const PetscScalar *av;
473 
474   PetscFunctionBegin;
475   ierr = VecSet(v,zero);CHKERRQ(ierr);
476   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
477   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
478   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
479   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
480   radd = A->rmap->rstart*m;
481   ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr);
482   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
483   for (i=0; i<len; i++) {
484     x[i] = av[radd + i*lda + i];
485   }
486   ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr);
487   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
488   PetscFunctionReturn(0);
489 }
490 
491 PetscErrorCode MatDestroy_MPIDense(Mat mat)
492 {
493   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
494   PetscErrorCode ierr;
495 
496   PetscFunctionBegin;
497 #if defined(PETSC_USE_LOG)
498   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
499 #endif
500   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
501   ierr = MatDestroy(&mdn->A);CHKERRQ(ierr);
502   ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr);
503   ierr = PetscSFDestroy(&mdn->Mvctx);CHKERRQ(ierr);
504 
505   ierr = PetscFree(mat->data);CHKERRQ(ierr);
506   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
507 
508   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL);CHKERRQ(ierr);
509   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr);
510   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr);
511   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL);CHKERRQ(ierr);
512   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL);CHKERRQ(ierr);
513   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);CHKERRQ(ierr);
514   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);CHKERRQ(ierr);
515 #if defined(PETSC_HAVE_ELEMENTAL)
516   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr);
517 #endif
518   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr);
519   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
520   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",NULL);CHKERRQ(ierr);
521   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
522   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
523   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_nest_mpidense_C",NULL);CHKERRQ(ierr);
524   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_nest_mpidense_C",NULL);CHKERRQ(ierr);
525   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
526   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
527   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL);CHKERRQ(ierr);
528   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL);CHKERRQ(ierr);
529   PetscFunctionReturn(0);
530 }
531 
532 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat,PetscViewer);
533 
534 #include <petscdraw.h>
535 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
536 {
537   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
538   PetscErrorCode    ierr;
539   PetscMPIInt       rank = mdn->rank;
540   PetscViewerType   vtype;
541   PetscBool         iascii,isdraw;
542   PetscViewer       sviewer;
543   PetscViewerFormat format;
544 
545   PetscFunctionBegin;
546   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
547   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
548   if (iascii) {
549     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
550     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
551     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
552       MatInfo info;
553       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
554       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
555       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
556       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
557       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
558       ierr = PetscSFView(mdn->Mvctx,viewer);CHKERRQ(ierr);
559       PetscFunctionReturn(0);
560     } else if (format == PETSC_VIEWER_ASCII_INFO) {
561       PetscFunctionReturn(0);
562     }
563   } else if (isdraw) {
564     PetscDraw draw;
565     PetscBool isnull;
566 
567     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
568     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
569     if (isnull) PetscFunctionReturn(0);
570   }
571 
572   {
573     /* assemble the entire matrix onto first processor. */
574     Mat         A;
575     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
576     PetscInt    *cols;
577     PetscScalar *vals;
578 
579     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
580     if (!rank) {
581       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
582     } else {
583       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
584     }
585     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
586     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
587     ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr);
588     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
589 
590     /* Copy the matrix ... This isn't the most efficient means,
591        but it's quick for now */
592     A->insertmode = INSERT_VALUES;
593 
594     row = mat->rmap->rstart;
595     m   = mdn->A->rmap->n;
596     for (i=0; i<m; i++) {
597       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
598       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
599       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
600       row++;
601     }
602 
603     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
604     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
605     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
606     if (!rank) {
607       ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
608       ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
609     }
610     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
611     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
612     ierr = MatDestroy(&A);CHKERRQ(ierr);
613   }
614   PetscFunctionReturn(0);
615 }
616 
617 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
618 {
619   PetscErrorCode ierr;
620   PetscBool      iascii,isbinary,isdraw,issocket;
621 
622   PetscFunctionBegin;
623   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
624   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
625   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
626   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
627 
628   if (iascii || issocket || isdraw) {
629     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
630   } else if (isbinary) {
631     ierr = MatView_Dense_Binary(mat,viewer);CHKERRQ(ierr);
632   }
633   PetscFunctionReturn(0);
634 }
635 
636 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
637 {
638   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
639   Mat            mdn  = mat->A;
640   PetscErrorCode ierr;
641   PetscLogDouble isend[5],irecv[5];
642 
643   PetscFunctionBegin;
644   info->block_size = 1.0;
645 
646   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
647 
648   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
649   isend[3] = info->memory;  isend[4] = info->mallocs;
650   if (flag == MAT_LOCAL) {
651     info->nz_used      = isend[0];
652     info->nz_allocated = isend[1];
653     info->nz_unneeded  = isend[2];
654     info->memory       = isend[3];
655     info->mallocs      = isend[4];
656   } else if (flag == MAT_GLOBAL_MAX) {
657     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
658 
659     info->nz_used      = irecv[0];
660     info->nz_allocated = irecv[1];
661     info->nz_unneeded  = irecv[2];
662     info->memory       = irecv[3];
663     info->mallocs      = irecv[4];
664   } else if (flag == MAT_GLOBAL_SUM) {
665     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
666 
667     info->nz_used      = irecv[0];
668     info->nz_allocated = irecv[1];
669     info->nz_unneeded  = irecv[2];
670     info->memory       = irecv[3];
671     info->mallocs      = irecv[4];
672   }
673   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
674   info->fill_ratio_needed = 0;
675   info->factor_mallocs    = 0;
676   PetscFunctionReturn(0);
677 }
678 
679 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
680 {
681   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
682   PetscErrorCode ierr;
683 
684   PetscFunctionBegin;
685   switch (op) {
686   case MAT_NEW_NONZERO_LOCATIONS:
687   case MAT_NEW_NONZERO_LOCATION_ERR:
688   case MAT_NEW_NONZERO_ALLOCATION_ERR:
689     MatCheckPreallocated(A,1);
690     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
691     break;
692   case MAT_ROW_ORIENTED:
693     MatCheckPreallocated(A,1);
694     a->roworiented = flg;
695     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
696     break;
697   case MAT_NEW_DIAGONALS:
698   case MAT_KEEP_NONZERO_PATTERN:
699   case MAT_USE_HASH_TABLE:
700   case MAT_SORTED_FULL:
701     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
702     break;
703   case MAT_IGNORE_OFF_PROC_ENTRIES:
704     a->donotstash = flg;
705     break;
706   case MAT_SYMMETRIC:
707   case MAT_STRUCTURALLY_SYMMETRIC:
708   case MAT_HERMITIAN:
709   case MAT_SYMMETRY_ETERNAL:
710   case MAT_IGNORE_LOWER_TRIANGULAR:
711   case MAT_IGNORE_ZERO_ENTRIES:
712     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
713     break;
714   default:
715     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
716   }
717   PetscFunctionReturn(0);
718 }
719 
720 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
721 {
722   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
723   const PetscScalar *l;
724   PetscScalar       x,*v,*vv,*r;
725   PetscErrorCode    ierr;
726   PetscInt          i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n,lda;
727 
728   PetscFunctionBegin;
729   ierr = MatDenseGetArray(mdn->A,&vv);CHKERRQ(ierr);
730   ierr = MatDenseGetLDA(mdn->A,&lda);CHKERRQ(ierr);
731   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
732   if (ll) {
733     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
734     if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %D != %D", s2a, s2);
735     ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr);
736     for (i=0; i<m; i++) {
737       x = l[i];
738       v = vv + i;
739       for (j=0; j<n; j++) { (*v) *= x; v+= lda;}
740     }
741     ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr);
742     ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr);
743   }
744   if (rr) {
745     const PetscScalar *ar;
746 
747     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
748     if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
749     ierr = VecGetArrayRead(rr,&ar);CHKERRQ(ierr);
750     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
751     ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ar,r);CHKERRQ(ierr);
752     ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ar,r);CHKERRQ(ierr);
753     ierr = VecRestoreArrayRead(rr,&ar);CHKERRQ(ierr);
754     for (i=0; i<n; i++) {
755       x = r[i];
756       v = vv + i*lda;
757       for (j=0; j<m; j++) (*v++) *= x;
758     }
759     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
760     ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr);
761   }
762   ierr = MatDenseRestoreArray(mdn->A,&vv);CHKERRQ(ierr);
763   PetscFunctionReturn(0);
764 }
765 
766 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
767 {
768   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
769   PetscErrorCode    ierr;
770   PetscInt          i,j;
771   PetscReal         sum = 0.0;
772   const PetscScalar *av,*v;
773 
774   PetscFunctionBegin;
775   ierr = MatDenseGetArrayRead(mdn->A,&av);CHKERRQ(ierr);
776   v    = av;
777   if (mdn->size == 1) {
778     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
779   } else {
780     if (type == NORM_FROBENIUS) {
781       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
782         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
783       }
784       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
785       *nrm = PetscSqrtReal(*nrm);
786       ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr);
787     } else if (type == NORM_1) {
788       PetscReal *tmp,*tmp2;
789       ierr = PetscCalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr);
790       *nrm = 0.0;
791       v    = av;
792       for (j=0; j<mdn->A->cmap->n; j++) {
793         for (i=0; i<mdn->A->rmap->n; i++) {
794           tmp[j] += PetscAbsScalar(*v);  v++;
795         }
796       }
797       ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
798       for (j=0; j<A->cmap->N; j++) {
799         if (tmp2[j] > *nrm) *nrm = tmp2[j];
800       }
801       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
802       ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr);
803     } else if (type == NORM_INFINITY) { /* max row norm */
804       PetscReal ntemp;
805       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
806       ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
807     } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
808   }
809   ierr = MatDenseRestoreArrayRead(mdn->A,&av);CHKERRQ(ierr);
810   PetscFunctionReturn(0);
811 }
812 
813 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
814 {
815   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
816   Mat            B;
817   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
818   PetscErrorCode ierr;
819   PetscInt       j,i,lda;
820   PetscScalar    *v;
821 
822   PetscFunctionBegin;
823   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
824     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
825     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
826     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
827     ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr);
828   } else B = *matout;
829 
830   m    = a->A->rmap->n; n = a->A->cmap->n;
831   ierr = MatDenseGetArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr);
832   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
833   ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr);
834   for (i=0; i<m; i++) rwork[i] = rstart + i;
835   for (j=0; j<n; j++) {
836     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
837     v   += lda;
838   }
839   ierr = MatDenseRestoreArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr);
840   ierr = PetscFree(rwork);CHKERRQ(ierr);
841   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
842   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
843   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
844     *matout = B;
845   } else {
846     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
847   }
848   PetscFunctionReturn(0);
849 }
850 
851 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
852 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
853 
854 PetscErrorCode MatSetUp_MPIDense(Mat A)
855 {
856   PetscErrorCode ierr;
857 
858   PetscFunctionBegin;
859   ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
860   ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
861   if (!A->preallocated) {
862     ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
863   }
864   PetscFunctionReturn(0);
865 }
866 
867 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
868 {
869   PetscErrorCode ierr;
870   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
871 
872   PetscFunctionBegin;
873   ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr);
874   PetscFunctionReturn(0);
875 }
876 
877 PetscErrorCode MatConjugate_MPIDense(Mat mat)
878 {
879   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
880   PetscErrorCode ierr;
881 
882   PetscFunctionBegin;
883   ierr = MatConjugate(a->A);CHKERRQ(ierr);
884   PetscFunctionReturn(0);
885 }
886 
887 PetscErrorCode MatRealPart_MPIDense(Mat A)
888 {
889   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
890   PetscErrorCode ierr;
891 
892   PetscFunctionBegin;
893   ierr = MatRealPart(a->A);CHKERRQ(ierr);
894   PetscFunctionReturn(0);
895 }
896 
897 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
898 {
899   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
900   PetscErrorCode ierr;
901 
902   PetscFunctionBegin;
903   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
904   PetscFunctionReturn(0);
905 }
906 
907 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col)
908 {
909   PetscErrorCode ierr;
910   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
911 
912   PetscFunctionBegin;
913   if (!a->A) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing local matrix");
914   if (!a->A->ops->getcolumnvector) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing get column operation");
915   ierr = (*a->A->ops->getcolumnvector)(a->A,v,col);CHKERRQ(ierr);
916   PetscFunctionReturn(0);
917 }
918 
919 PETSC_INTERN PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*);
920 
921 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms)
922 {
923   PetscErrorCode ierr;
924   PetscInt       i,n;
925   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
926   PetscReal      *work;
927 
928   PetscFunctionBegin;
929   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
930   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
931   ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr);
932   if (type == NORM_2) {
933     for (i=0; i<n; i++) work[i] *= work[i];
934   }
935   if (type == NORM_INFINITY) {
936     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
937   } else {
938     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
939   }
940   ierr = PetscFree(work);CHKERRQ(ierr);
941   if (type == NORM_2) {
942     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
943   }
944   PetscFunctionReturn(0);
945 }
946 
947 #if defined(PETSC_HAVE_CUDA)
948 static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
949 {
950   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
951   PetscErrorCode ierr;
952 
953   PetscFunctionBegin;
954   ierr = MatDenseCUDAPlaceArray(l->A,a);CHKERRQ(ierr);
955   PetscFunctionReturn(0);
956 }
957 
958 static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A)
959 {
960   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
961   PetscErrorCode ierr;
962 
963   PetscFunctionBegin;
964   ierr = MatDenseCUDAResetArray(l->A);CHKERRQ(ierr);
965   PetscFunctionReturn(0);
966 }
967 
968 static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
969 {
970   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
971   PetscErrorCode ierr;
972 
973   PetscFunctionBegin;
974   ierr = MatDenseCUDAGetArrayWrite(l->A,a);CHKERRQ(ierr);
975   PetscFunctionReturn(0);
976 }
977 
978 static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
979 {
980   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
981   PetscErrorCode ierr;
982 
983   PetscFunctionBegin;
984   ierr = MatDenseCUDARestoreArrayWrite(l->A,a);CHKERRQ(ierr);
985   PetscFunctionReturn(0);
986 }
987 
988 static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
989 {
990   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
991   PetscErrorCode ierr;
992 
993   PetscFunctionBegin;
994   ierr = MatDenseCUDAGetArrayRead(l->A,a);CHKERRQ(ierr);
995   PetscFunctionReturn(0);
996 }
997 
998 static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
999 {
1000   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1001   PetscErrorCode ierr;
1002 
1003   PetscFunctionBegin;
1004   ierr = MatDenseCUDARestoreArrayRead(l->A,a);CHKERRQ(ierr);
1005   PetscFunctionReturn(0);
1006 }
1007 
1008 static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1009 {
1010   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1011   PetscErrorCode ierr;
1012 
1013   PetscFunctionBegin;
1014   ierr = MatDenseCUDAGetArray(l->A,a);CHKERRQ(ierr);
1015   PetscFunctionReturn(0);
1016 }
1017 
1018 static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1019 {
1020   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1021   PetscErrorCode ierr;
1022 
1023   PetscFunctionBegin;
1024   ierr = MatDenseCUDARestoreArray(l->A,a);CHKERRQ(ierr);
1025   PetscFunctionReturn(0);
1026 }
1027 
1028 static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat,PetscBool bind)
1029 {
1030   Mat_MPIDense   *d = (Mat_MPIDense*)mat->data;
1031   PetscErrorCode ierr;
1032 
1033   PetscFunctionBegin;
1034   if (d->A) {
1035     ierr = MatBindToCPU(d->A,bind);CHKERRQ(ierr);
1036   }
1037   mat->boundtocpu = bind;
1038   PetscFunctionReturn(0);
1039 }
1040 
1041 PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data)
1042 {
1043   Mat_MPIDense   *d = (Mat_MPIDense*)A->data;
1044   PetscErrorCode ierr;
1045   PetscBool      iscuda;
1046 
1047   PetscFunctionBegin;
1048   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr);
1049   if (!iscuda) PetscFunctionReturn(0);
1050   ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
1051   ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
1052   if (!d->A) {
1053     ierr = MatCreate(PETSC_COMM_SELF,&d->A);CHKERRQ(ierr);
1054     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)d->A);CHKERRQ(ierr);
1055     ierr = MatSetSizes(d->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N);CHKERRQ(ierr);
1056   }
1057   ierr = MatSetType(d->A,MATSEQDENSECUDA);CHKERRQ(ierr);
1058   ierr = MatSeqDenseCUDASetPreallocation(d->A,d_data);CHKERRQ(ierr);
1059   A->preallocated = PETSC_TRUE;
1060   PetscFunctionReturn(0);
1061 }
1062 #endif
1063 
1064 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1065 {
1066   Mat_MPIDense   *d = (Mat_MPIDense*)x->data;
1067   PetscErrorCode ierr;
1068 
1069   PetscFunctionBegin;
1070   ierr = MatSetRandom(d->A,rctx);CHKERRQ(ierr);
1071   PetscFunctionReturn(0);
1072 }
1073 
1074 PETSC_INTERN PetscErrorCode MatMatMultNumeric_MPIDense(Mat A,Mat,Mat);
1075 
1076 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool  *missing,PetscInt *d)
1077 {
1078   PetscFunctionBegin;
1079   *missing = PETSC_FALSE;
1080   PetscFunctionReturn(0);
1081 }
1082 
1083 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat);
1084 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);
1085 
1086 /* -------------------------------------------------------------------*/
1087 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1088                                         MatGetRow_MPIDense,
1089                                         MatRestoreRow_MPIDense,
1090                                         MatMult_MPIDense,
1091                                 /*  4*/ MatMultAdd_MPIDense,
1092                                         MatMultTranspose_MPIDense,
1093                                         MatMultTransposeAdd_MPIDense,
1094                                         0,
1095                                         0,
1096                                         0,
1097                                 /* 10*/ 0,
1098                                         0,
1099                                         0,
1100                                         0,
1101                                         MatTranspose_MPIDense,
1102                                 /* 15*/ MatGetInfo_MPIDense,
1103                                         MatEqual_MPIDense,
1104                                         MatGetDiagonal_MPIDense,
1105                                         MatDiagonalScale_MPIDense,
1106                                         MatNorm_MPIDense,
1107                                 /* 20*/ MatAssemblyBegin_MPIDense,
1108                                         MatAssemblyEnd_MPIDense,
1109                                         MatSetOption_MPIDense,
1110                                         MatZeroEntries_MPIDense,
1111                                 /* 24*/ MatZeroRows_MPIDense,
1112                                         0,
1113                                         0,
1114                                         0,
1115                                         0,
1116                                 /* 29*/ MatSetUp_MPIDense,
1117                                         0,
1118                                         0,
1119                                         MatGetDiagonalBlock_MPIDense,
1120                                         0,
1121                                 /* 34*/ MatDuplicate_MPIDense,
1122                                         0,
1123                                         0,
1124                                         0,
1125                                         0,
1126                                 /* 39*/ MatAXPY_MPIDense,
1127                                         MatCreateSubMatrices_MPIDense,
1128                                         0,
1129                                         MatGetValues_MPIDense,
1130                                         0,
1131                                 /* 44*/ 0,
1132                                         MatScale_MPIDense,
1133                                         MatShift_Basic,
1134                                         0,
1135                                         0,
1136                                 /* 49*/ MatSetRandom_MPIDense,
1137                                         0,
1138                                         0,
1139                                         0,
1140                                         0,
1141                                 /* 54*/ 0,
1142                                         0,
1143                                         0,
1144                                         0,
1145                                         0,
1146                                 /* 59*/ MatCreateSubMatrix_MPIDense,
1147                                         MatDestroy_MPIDense,
1148                                         MatView_MPIDense,
1149                                         0,
1150                                         0,
1151                                 /* 64*/ 0,
1152                                         0,
1153                                         0,
1154                                         0,
1155                                         0,
1156                                 /* 69*/ 0,
1157                                         0,
1158                                         0,
1159                                         0,
1160                                         0,
1161                                 /* 74*/ 0,
1162                                         0,
1163                                         0,
1164                                         0,
1165                                         0,
1166                                 /* 79*/ 0,
1167                                         0,
1168                                         0,
1169                                         0,
1170                                 /* 83*/ MatLoad_MPIDense,
1171                                         0,
1172                                         0,
1173                                         0,
1174                                         0,
1175                                         0,
1176                                 /* 89*/ 0,
1177                                         0,
1178                                         MatMatMultNumeric_MPIDense,
1179                                         0,
1180                                         0,
1181                                 /* 94*/ 0,
1182                                         0,
1183                                         0,
1184                                         MatMatTransposeMultNumeric_MPIDense_MPIDense,
1185                                         0,
1186                                 /* 99*/ MatProductSetFromOptions_MPIDense,
1187                                         0,
1188                                         0,
1189                                         MatConjugate_MPIDense,
1190                                         0,
1191                                 /*104*/ 0,
1192                                         MatRealPart_MPIDense,
1193                                         MatImaginaryPart_MPIDense,
1194                                         0,
1195                                         0,
1196                                 /*109*/ 0,
1197                                         0,
1198                                         0,
1199                                         MatGetColumnVector_MPIDense,
1200                                         MatMissingDiagonal_MPIDense,
1201                                 /*114*/ 0,
1202                                         0,
1203                                         0,
1204                                         0,
1205                                         0,
1206                                 /*119*/ 0,
1207                                         0,
1208                                         0,
1209                                         0,
1210                                         0,
1211                                 /*124*/ 0,
1212                                         MatGetColumnNorms_MPIDense,
1213                                         0,
1214                                         0,
1215                                         0,
1216                                 /*129*/ 0,
1217                                         0,
1218                                         0,
1219                                         MatTransposeMatMultNumeric_MPIDense_MPIDense,
1220                                         0,
1221                                 /*134*/ 0,
1222                                         0,
1223                                         0,
1224                                         0,
1225                                         0,
1226                                 /*139*/ 0,
1227                                         0,
1228                                         0,
1229                                         0,
1230                                         0,
1231                                         MatCreateMPIMatConcatenateSeqMat_MPIDense,
1232                                 /*145*/ 0,
1233                                         0,
1234                                         0
1235 };
1236 
1237 PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1238 {
1239   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
1240   PetscBool      iscuda;
1241   PetscErrorCode ierr;
1242 
1243   PetscFunctionBegin;
1244   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
1245   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
1246   if (!a->A) {
1247     ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1248     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1249     ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr);
1250   }
1251   ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr);
1252   ierr = MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE);CHKERRQ(ierr);
1253   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1254   mat->preallocated = PETSC_TRUE;
1255   PetscFunctionReturn(0);
1256 }
1257 
1258 #if defined(PETSC_HAVE_ELEMENTAL)
1259 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1260 {
1261   Mat            mat_elemental;
1262   PetscErrorCode ierr;
1263   PetscScalar    *v;
1264   PetscInt       m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols;
1265 
1266   PetscFunctionBegin;
1267   if (reuse == MAT_REUSE_MATRIX) {
1268     mat_elemental = *newmat;
1269     ierr = MatZeroEntries(*newmat);CHKERRQ(ierr);
1270   } else {
1271     ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr);
1272     ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1273     ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr);
1274     ierr = MatSetUp(mat_elemental);CHKERRQ(ierr);
1275     ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
1276   }
1277 
1278   ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr);
1279   for (i=0; i<N; i++) cols[i] = i;
1280   for (i=0; i<m; i++) rows[i] = rstart + i;
1281 
1282   /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1283   ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr);
1284   ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr);
1285   ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1286   ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1287   ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr);
1288   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1289 
1290   if (reuse == MAT_INPLACE_MATRIX) {
1291     ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr);
1292   } else {
1293     *newmat = mat_elemental;
1294   }
1295   PetscFunctionReturn(0);
1296 }
1297 #endif
1298 
1299 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals)
1300 {
1301   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1302   PetscErrorCode ierr;
1303 
1304   PetscFunctionBegin;
1305   ierr = MatDenseGetColumn(mat->A,col,vals);CHKERRQ(ierr);
1306   PetscFunctionReturn(0);
1307 }
1308 
1309 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals)
1310 {
1311   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1312   PetscErrorCode ierr;
1313 
1314   PetscFunctionBegin;
1315   ierr = MatDenseRestoreColumn(mat->A,vals);CHKERRQ(ierr);
1316   PetscFunctionReturn(0);
1317 }
1318 
1319 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
1320 {
1321   PetscErrorCode ierr;
1322   Mat_MPIDense   *mat;
1323   PetscInt       m,nloc,N;
1324 
1325   PetscFunctionBegin;
1326   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
1327   ierr = MatGetLocalSize(inmat,NULL,&nloc);CHKERRQ(ierr);
1328   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1329     PetscInt sum;
1330 
1331     if (n == PETSC_DECIDE) {
1332       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
1333     }
1334     /* Check sum(n) = N */
1335     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
1336     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
1337 
1338     ierr = MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat);CHKERRQ(ierr);
1339   }
1340 
1341   /* numeric phase */
1342   mat = (Mat_MPIDense*)(*outmat)->data;
1343   ierr = MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1344   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1345   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1346   PetscFunctionReturn(0);
1347 }
1348 
1349 #if defined(PETSC_HAVE_CUDA)
1350 PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M,MatType type,MatReuse reuse,Mat *newmat)
1351 {
1352   Mat            B;
1353   Mat_MPIDense   *m;
1354   PetscErrorCode ierr;
1355 
1356   PetscFunctionBegin;
1357   if (reuse == MAT_INITIAL_MATRIX) {
1358     ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr);
1359   } else if (reuse == MAT_REUSE_MATRIX) {
1360     ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1361   }
1362 
1363   B    = *newmat;
1364   ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_TRUE);CHKERRQ(ierr);
1365   ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr);
1366   ierr = PetscStrallocpy(VECSTANDARD,&B->defaultvectype);CHKERRQ(ierr);
1367   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSE);CHKERRQ(ierr);
1368   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",NULL);CHKERRQ(ierr);
1369   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",NULL);CHKERRQ(ierr);
1370   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",NULL);CHKERRQ(ierr);
1371   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",NULL);CHKERRQ(ierr);
1372   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",NULL);CHKERRQ(ierr);
1373   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",NULL);CHKERRQ(ierr);
1374   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",NULL);CHKERRQ(ierr);
1375   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",NULL);CHKERRQ(ierr);
1376   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",NULL);CHKERRQ(ierr);
1377   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",NULL);CHKERRQ(ierr);
1378   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",NULL);CHKERRQ(ierr);
1379   m    = (Mat_MPIDense*)(B)->data;
1380   if (m->A) {
1381     ierr = MatConvert(m->A,MATSEQDENSE,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr);
1382     ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr);
1383   }
1384   B->ops->bindtocpu = NULL;
1385   B->offloadmask    = PETSC_OFFLOAD_CPU;
1386   PetscFunctionReturn(0);
1387 }
1388 
1389 PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M,MatType type,MatReuse reuse,Mat *newmat)
1390 {
1391   Mat            B;
1392   Mat_MPIDense   *m;
1393   PetscErrorCode ierr;
1394 
1395   PetscFunctionBegin;
1396   if (reuse == MAT_INITIAL_MATRIX) {
1397     ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr);
1398   } else if (reuse == MAT_REUSE_MATRIX) {
1399     ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1400   }
1401 
1402   B    = *newmat;
1403   ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr);
1404   ierr = PetscStrallocpy(VECCUDA,&B->defaultvectype);CHKERRQ(ierr);
1405   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSECUDA);CHKERRQ(ierr);
1406   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",            MatConvert_MPIDenseCUDA_MPIDense);CHKERRQ(ierr);
1407   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
1408   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
1409   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",                        MatDenseCUDAGetArray_MPIDenseCUDA);CHKERRQ(ierr);
1410   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",                    MatDenseCUDAGetArrayRead_MPIDenseCUDA);CHKERRQ(ierr);
1411   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",                   MatDenseCUDAGetArrayWrite_MPIDenseCUDA);CHKERRQ(ierr);
1412   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",                    MatDenseCUDARestoreArray_MPIDenseCUDA);CHKERRQ(ierr);
1413   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",                MatDenseCUDARestoreArrayRead_MPIDenseCUDA);CHKERRQ(ierr);
1414   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",               MatDenseCUDARestoreArrayWrite_MPIDenseCUDA);CHKERRQ(ierr);
1415   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",                      MatDenseCUDAPlaceArray_MPIDenseCUDA);CHKERRQ(ierr);
1416   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",                      MatDenseCUDAResetArray_MPIDenseCUDA);CHKERRQ(ierr);
1417   m    = (Mat_MPIDense*)(B)->data;
1418   if (m->A) {
1419     ierr = MatConvert(m->A,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr);
1420     ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr);
1421     B->offloadmask = PETSC_OFFLOAD_BOTH;
1422   } else {
1423     B->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
1424   }
1425   ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_FALSE);CHKERRQ(ierr);
1426 
1427   B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA;
1428   PetscFunctionReturn(0);
1429 }
1430 #endif
1431 
1432 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1433 {
1434   Mat_MPIDense   *a;
1435   PetscErrorCode ierr;
1436 
1437   PetscFunctionBegin;
1438   ierr      = PetscNewLog(mat,&a);CHKERRQ(ierr);
1439   mat->data = (void*)a;
1440   ierr      = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1441 
1442   mat->insertmode = NOT_SET_VALUES;
1443   ierr            = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);CHKERRQ(ierr);
1444   ierr            = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);CHKERRQ(ierr);
1445 
1446   /* build cache for off array entries formed */
1447   a->donotstash = PETSC_FALSE;
1448 
1449   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr);
1450 
1451   /* stuff used for matrix vector multiply */
1452   a->lvec        = 0;
1453   a->Mvctx       = 0;
1454   a->roworiented = PETSC_TRUE;
1455 
1456   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense);CHKERRQ(ierr);
1457   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr);
1458   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr);
1459   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense);CHKERRQ(ierr);
1460   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense);CHKERRQ(ierr);
1461   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);CHKERRQ(ierr);
1462   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);CHKERRQ(ierr);
1463 #if defined(PETSC_HAVE_ELEMENTAL)
1464   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr);
1465 #endif
1466 #if defined(PETSC_HAVE_CUDA)
1467   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpidensecuda_C",MatConvert_MPIDense_MPIDenseCUDA);CHKERRQ(ierr);
1468 #endif
1469   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1470   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
1471   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
1472   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1473   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1474   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_nest_mpidense_C",MatMatMultSymbolic_Nest_Dense);CHKERRQ(ierr);
1475   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_nest_mpidense_C",MatMatMultNumeric_Nest_Dense);CHKERRQ(ierr);
1476 
1477   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",MatTransposeMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1478   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",MatTransposeMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1479   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense);CHKERRQ(ierr);
1480   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense);CHKERRQ(ierr);
1481   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1482   PetscFunctionReturn(0);
1483 }
1484 
1485 /*MC
1486    MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs.
1487 
1488    Options Database Keys:
1489 . -mat_type mpidensecuda - sets the matrix type to "mpidensecuda" during a call to MatSetFromOptions()
1490 
1491   Level: beginner
1492 
1493 .seealso:
1494 
1495 M*/
1496 #if defined(PETSC_HAVE_CUDA)
1497 PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B)
1498 {
1499   PetscErrorCode ierr;
1500 
1501   PetscFunctionBegin;
1502   ierr = MatCreate_MPIDense(B);CHKERRQ(ierr);
1503   ierr = MatConvert_MPIDense_MPIDenseCUDA(B,MATMPIDENSECUDA,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr);
1504   PetscFunctionReturn(0);
1505 }
1506 #endif
1507 
1508 /*MC
1509    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1510 
1511    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1512    and MATMPIDENSE otherwise.
1513 
1514    Options Database Keys:
1515 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1516 
1517   Level: beginner
1518 
1519 
1520 .seealso: MATSEQDENSE,MATMPIDENSE
1521 M*/
1522 
1523 /*@C
1524    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1525 
1526    Not collective
1527 
1528    Input Parameters:
1529 .  B - the matrix
1530 -  data - optional location of matrix data.  Set data=NULL for PETSc
1531    to control all matrix memory allocation.
1532 
1533    Notes:
1534    The dense format is fully compatible with standard Fortran 77
1535    storage by columns.
1536 
1537    The data input variable is intended primarily for Fortran programmers
1538    who wish to allocate their own matrix memory space.  Most users should
1539    set data=NULL.
1540 
1541    Level: intermediate
1542 
1543 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1544 @*/
1545 PetscErrorCode  MatMPIDenseSetPreallocation(Mat B,PetscScalar *data)
1546 {
1547   PetscErrorCode ierr;
1548 
1549   PetscFunctionBegin;
1550   ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr);
1551   PetscFunctionReturn(0);
1552 }
1553 
1554 /*@
1555    MatDensePlaceArray - Allows one to replace the array in a dense matrix with an
1556    array provided by the user. This is useful to avoid copying an array
1557    into a matrix
1558 
1559    Not Collective
1560 
1561    Input Parameters:
1562 +  mat - the matrix
1563 -  array - the array in column major order
1564 
1565    Notes:
1566    You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be
1567    freed when the matrix is destroyed.
1568 
1569    Level: developer
1570 
1571 .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
1572 
1573 @*/
1574 PetscErrorCode  MatDensePlaceArray(Mat mat,const PetscScalar *array)
1575 {
1576   PetscErrorCode ierr;
1577 
1578   PetscFunctionBegin;
1579   ierr = PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
1580   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
1581 #if defined(PETSC_HAVE_CUDA)
1582   mat->offloadmask = PETSC_OFFLOAD_CPU;
1583 #endif
1584   PetscFunctionReturn(0);
1585 }
1586 
1587 /*@
1588    MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray()
1589 
1590    Not Collective
1591 
1592    Input Parameters:
1593 .  mat - the matrix
1594 
1595    Notes:
1596    You can only call this after a call to MatDensePlaceArray()
1597 
1598    Level: developer
1599 
1600 .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
1601 
1602 @*/
1603 PetscErrorCode  MatDenseResetArray(Mat mat)
1604 {
1605   PetscErrorCode ierr;
1606 
1607   PetscFunctionBegin;
1608   ierr = PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));CHKERRQ(ierr);
1609   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
1610   PetscFunctionReturn(0);
1611 }
1612 
1613 #if defined(PETSC_HAVE_CUDA)
1614 /*@C
1615    MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a dense matrix with an
1616    array provided by the user. This is useful to avoid copying an array
1617    into a matrix
1618 
1619    Not Collective
1620 
1621    Input Parameters:
1622 +  mat - the matrix
1623 -  array - the array in column major order
1624 
1625    Notes:
1626    You can return to the original array with a call to MatDenseCUDAResetArray(). The user is responsible for freeing this array; it will not be
1627    freed when the matrix is destroyed. The array must have been allocated with cudaMalloc().
1628 
1629    Level: developer
1630 
1631 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAResetArray()
1632 @*/
1633 PetscErrorCode  MatDenseCUDAPlaceArray(Mat mat,const PetscScalar *array)
1634 {
1635   PetscErrorCode ierr;
1636 
1637   PetscFunctionBegin;
1638   ierr = PetscUseMethod(mat,"MatDenseCUDAPlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
1639   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
1640   mat->offloadmask = PETSC_OFFLOAD_GPU;
1641   PetscFunctionReturn(0);
1642 }
1643 
1644 /*@C
1645    MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to MatDenseCUDAPlaceArray()
1646 
1647    Not Collective
1648 
1649    Input Parameters:
1650 .  mat - the matrix
1651 
1652    Notes:
1653    You can only call this after a call to MatDenseCUDAPlaceArray()
1654 
1655    Level: developer
1656 
1657 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray()
1658 
1659 @*/
1660 PetscErrorCode  MatDenseCUDAResetArray(Mat mat)
1661 {
1662   PetscErrorCode ierr;
1663 
1664   PetscFunctionBegin;
1665   ierr = PetscUseMethod(mat,"MatDenseCUDAResetArray_C",(Mat),(mat));CHKERRQ(ierr);
1666   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
1667   PetscFunctionReturn(0);
1668 }
1669 
1670 /*@C
1671    MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a dense matrix.
1672 
1673    Not Collective
1674 
1675    Input Parameters:
1676 .  A - the matrix
1677 
1678    Output Parameters
1679 .  array - the GPU array in column major order
1680 
1681    Notes:
1682    The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use MatDenseCUDAGetArray(). The array must be restored with MatDenseCUDARestoreArrayWrite() when no longer needed.
1683 
1684    Level: developer
1685 
1686 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArrayRead()
1687 @*/
1688 PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a)
1689 {
1690   PetscErrorCode ierr;
1691 
1692   PetscFunctionBegin;
1693   ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
1694   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
1695   PetscFunctionReturn(0);
1696 }
1697 
1698 /*@C
1699    MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArrayWrite().
1700 
1701    Not Collective
1702 
1703    Input Parameters:
1704 +  A - the matrix
1705 -  array - the GPU array in column major order
1706 
1707    Notes:
1708 
1709    Level: developer
1710 
1711 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead()
1712 @*/
1713 PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a)
1714 {
1715   PetscErrorCode ierr;
1716 
1717   PetscFunctionBegin;
1718   ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
1719   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
1720   A->offloadmask = PETSC_OFFLOAD_GPU;
1721   PetscFunctionReturn(0);
1722 }
1723 
1724 /*@C
1725    MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArrayRead() when no longer needed.
1726 
1727    Not Collective
1728 
1729    Input Parameters:
1730 .  A - the matrix
1731 
1732    Output Parameters
1733 .  array - the GPU array in column major order
1734 
1735    Notes:
1736    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite().
1737 
1738    Level: developer
1739 
1740 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead()
1741 @*/
1742 PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a)
1743 {
1744   PetscErrorCode ierr;
1745 
1746   PetscFunctionBegin;
1747   ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr);
1748   PetscFunctionReturn(0);
1749 }
1750 
1751 /*@C
1752    MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a dense matrix previously obtained with a call to MatDenseCUDAGetArrayRead().
1753 
1754    Not Collective
1755 
1756    Input Parameters:
1757 +  A - the matrix
1758 -  array - the GPU array in column major order
1759 
1760    Notes:
1761    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite().
1762 
1763    Level: developer
1764 
1765 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDAGetArrayRead()
1766 @*/
1767 PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a)
1768 {
1769   PetscErrorCode ierr;
1770 
1771   PetscFunctionBegin;
1772   ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr);
1773   PetscFunctionReturn(0);
1774 }
1775 
1776 /*@C
1777    MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArray() when no longer needed.
1778 
1779    Not Collective
1780 
1781    Input Parameters:
1782 .  A - the matrix
1783 
1784    Output Parameters
1785 .  array - the GPU array in column major order
1786 
1787    Notes:
1788    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). For read-only access, use MatDenseCUDAGetArrayRead().
1789 
1790    Level: developer
1791 
1792 .seealso: MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead()
1793 @*/
1794 PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a)
1795 {
1796   PetscErrorCode ierr;
1797 
1798   PetscFunctionBegin;
1799   ierr = PetscUseMethod(A,"MatDenseCUDAGetArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
1800   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
1801   PetscFunctionReturn(0);
1802 }
1803 
1804 /*@C
1805    MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArray().
1806 
1807    Not Collective
1808 
1809    Input Parameters:
1810 +  A - the matrix
1811 -  array - the GPU array in column major order
1812 
1813    Notes:
1814 
1815    Level: developer
1816 
1817 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead()
1818 @*/
1819 PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a)
1820 {
1821   PetscErrorCode ierr;
1822 
1823   PetscFunctionBegin;
1824   ierr = PetscUseMethod(A,"MatDenseCUDARestoreArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
1825   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
1826   A->offloadmask = PETSC_OFFLOAD_GPU;
1827   PetscFunctionReturn(0);
1828 }
1829 #endif
1830 
1831 /*@C
1832    MatCreateDense - Creates a matrix in dense format.
1833 
1834    Collective
1835 
1836    Input Parameters:
1837 +  comm - MPI communicator
1838 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1839 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1840 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1841 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1842 -  data - optional location of matrix data.  Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1843    to control all matrix memory allocation.
1844 
1845    Output Parameter:
1846 .  A - the matrix
1847 
1848    Notes:
1849    The dense format is fully compatible with standard Fortran 77
1850    storage by columns.
1851 
1852    The data input variable is intended primarily for Fortran programmers
1853    who wish to allocate their own matrix memory space.  Most users should
1854    set data=NULL (PETSC_NULL_SCALAR for Fortran users).
1855 
1856    The user MUST specify either the local or global matrix dimensions
1857    (possibly both).
1858 
1859    Level: intermediate
1860 
1861 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1862 @*/
1863 PetscErrorCode  MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1864 {
1865   PetscErrorCode ierr;
1866   PetscMPIInt    size;
1867 
1868   PetscFunctionBegin;
1869   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1870   PetscValidLogicalCollectiveBool(*A,!!data,6);
1871   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1872   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1873   if (size > 1) {
1874     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
1875     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1876     if (data) {  /* user provided data array, so no need to assemble */
1877       ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr);
1878       (*A)->assembled = PETSC_TRUE;
1879     }
1880   } else {
1881     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
1882     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
1883   }
1884   PetscFunctionReturn(0);
1885 }
1886 
1887 #if defined(PETSC_HAVE_CUDA)
1888 /*@C
1889    MatCreateDenseCUDA - Creates a matrix in dense format using CUDA.
1890 
1891    Collective
1892 
1893    Input Parameters:
1894 +  comm - MPI communicator
1895 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1896 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1897 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1898 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1899 -  data - optional location of GPU matrix data.  Set data=NULL for PETSc
1900    to control matrix memory allocation.
1901 
1902    Output Parameter:
1903 .  A - the matrix
1904 
1905    Notes:
1906 
1907    Level: intermediate
1908 
1909 .seealso: MatCreate(), MatCreateDense()
1910 @*/
1911 PetscErrorCode  MatCreateDenseCUDA(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1912 {
1913   PetscErrorCode ierr;
1914   PetscMPIInt    size;
1915 
1916   PetscFunctionBegin;
1917   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1918   PetscValidLogicalCollectiveBool(*A,!!data,6);
1919   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1920   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1921   if (size > 1) {
1922     ierr = MatSetType(*A,MATMPIDENSECUDA);CHKERRQ(ierr);
1923     ierr = MatMPIDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr);
1924     if (data) {  /* user provided data array, so no need to assemble */
1925       ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr);
1926       (*A)->assembled = PETSC_TRUE;
1927     }
1928   } else {
1929     ierr = MatSetType(*A,MATSEQDENSECUDA);CHKERRQ(ierr);
1930     ierr = MatSeqDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr);
1931   }
1932   PetscFunctionReturn(0);
1933 }
1934 #endif
1935 
1936 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1937 {
1938   Mat            mat;
1939   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
1940   PetscErrorCode ierr;
1941 
1942   PetscFunctionBegin;
1943   *newmat = 0;
1944   ierr    = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr);
1945   ierr    = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1946   ierr    = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1947   a       = (Mat_MPIDense*)mat->data;
1948 
1949   mat->factortype   = A->factortype;
1950   mat->assembled    = PETSC_TRUE;
1951   mat->preallocated = PETSC_TRUE;
1952 
1953   a->size         = oldmat->size;
1954   a->rank         = oldmat->rank;
1955   mat->insertmode = NOT_SET_VALUES;
1956   a->donotstash   = oldmat->donotstash;
1957 
1958   ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr);
1959   ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr);
1960 
1961   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1962   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1963   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
1964 
1965   *newmat = mat;
1966   PetscFunctionReturn(0);
1967 }
1968 
1969 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
1970 {
1971   PetscErrorCode ierr;
1972   PetscBool      isbinary;
1973 #if defined(PETSC_HAVE_HDF5)
1974   PetscBool      ishdf5;
1975 #endif
1976 
1977   PetscFunctionBegin;
1978   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
1979   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
1980   /* force binary viewer to load .info file if it has not yet done so */
1981   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1982   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1983 #if defined(PETSC_HAVE_HDF5)
1984   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
1985 #endif
1986   if (isbinary) {
1987     ierr = MatLoad_Dense_Binary(newMat,viewer);CHKERRQ(ierr);
1988 #if defined(PETSC_HAVE_HDF5)
1989   } else if (ishdf5) {
1990     ierr = MatLoad_Dense_HDF5(newMat,viewer);CHKERRQ(ierr);
1991 #endif
1992   } else SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
1993   PetscFunctionReturn(0);
1994 }
1995 
1996 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool  *flag)
1997 {
1998   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
1999   Mat            a,b;
2000   PetscBool      flg;
2001   PetscErrorCode ierr;
2002 
2003   PetscFunctionBegin;
2004   a    = matA->A;
2005   b    = matB->A;
2006   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
2007   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2008   PetscFunctionReturn(0);
2009 }
2010 
2011 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(Mat A)
2012 {
2013   PetscErrorCode        ierr;
2014   Mat_MPIDense          *a = (Mat_MPIDense*)A->data;
2015   Mat_TransMatMultDense *atb = a->atbdense;
2016 
2017   PetscFunctionBegin;
2018   ierr = PetscFree2(atb->sendbuf,atb->recvcounts);CHKERRQ(ierr);
2019   ierr = MatDestroy(&atb->atb);CHKERRQ(ierr);
2020   ierr = (*atb->destroy)(A);CHKERRQ(ierr);
2021   ierr = PetscFree(atb);CHKERRQ(ierr);
2022   PetscFunctionReturn(0);
2023 }
2024 
2025 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(Mat A)
2026 {
2027   PetscErrorCode        ierr;
2028   Mat_MPIDense          *a = (Mat_MPIDense*)A->data;
2029   Mat_MatTransMultDense *abt = a->abtdense;
2030 
2031   PetscFunctionBegin;
2032   ierr = PetscFree2(abt->buf[0],abt->buf[1]);CHKERRQ(ierr);
2033   ierr = PetscFree2(abt->recvcounts,abt->recvdispls);CHKERRQ(ierr);
2034   ierr = (abt->destroy)(A);CHKERRQ(ierr);
2035   ierr = PetscFree(abt);CHKERRQ(ierr);
2036   PetscFunctionReturn(0);
2037 }
2038 
2039 PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2040 {
2041   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2042   Mat_TransMatMultDense *atb = c->atbdense;
2043   PetscErrorCode        ierr;
2044   MPI_Comm              comm;
2045   PetscMPIInt           size,*recvcounts=atb->recvcounts;
2046   PetscScalar           *carray,*sendbuf=atb->sendbuf;
2047   const PetscScalar     *atbarray;
2048   PetscInt              i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j;
2049   const PetscInt        *ranges;
2050 
2051   PetscFunctionBegin;
2052   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2053   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2054 
2055   /* compute atbarray = aseq^T * bseq */
2056   ierr = MatTransposeMatMult(a->A,b->A,atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX,PETSC_DEFAULT,&atb->atb);CHKERRQ(ierr);
2057 
2058   ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr);
2059   for (i=0; i<size; i++) recvcounts[i] = (ranges[i+1] - ranges[i])*cN;
2060 
2061   /* arrange atbarray into sendbuf */
2062   ierr = MatDenseGetArrayRead(atb->atb,&atbarray);CHKERRQ(ierr);
2063   for (proc=0, k=0; proc<size; proc++) {
2064     for (j=0; j<cN; j++) {
2065       for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM];
2066     }
2067   }
2068   ierr = MatDenseRestoreArrayRead(atb->atb,&atbarray);CHKERRQ(ierr);
2069 
2070   /* sum all atbarray to local values of C */
2071   ierr = MatDenseGetArray(c->A,&carray);CHKERRQ(ierr);
2072   ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRQ(ierr);
2073   ierr = MatDenseRestoreArray(c->A,&carray);CHKERRQ(ierr);
2074   PetscFunctionReturn(0);
2075 }
2076 
2077 PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)
2078 {
2079   PetscErrorCode        ierr;
2080   MPI_Comm              comm;
2081   PetscMPIInt           size;
2082   PetscInt              cm=A->cmap->n,cM,cN=B->cmap->N;
2083   Mat_MPIDense          *c;
2084   Mat_TransMatMultDense *atb;
2085 
2086   PetscFunctionBegin;
2087   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2088   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
2089     SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2090   }
2091 
2092   /* create matrix product C */
2093   ierr = MatSetSizes(C,cm,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2094   ierr = MatSetType(C,MATMPIDENSE);CHKERRQ(ierr);
2095   ierr = MatSetUp(C);CHKERRQ(ierr);
2096   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2097   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2098 
2099   /* create data structure for reuse C */
2100   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2101   ierr = PetscNew(&atb);CHKERRQ(ierr);
2102   cM   = C->rmap->N;
2103   ierr = PetscMalloc2(cM*cN,&atb->sendbuf,size,&atb->recvcounts);CHKERRQ(ierr);
2104 
2105   c               = (Mat_MPIDense*)C->data;
2106   c->atbdense     = atb;
2107   atb->destroy    = C->ops->destroy;
2108   C->ops->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
2109   PetscFunctionReturn(0);
2110 }
2111 
2112 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2113 {
2114   PetscErrorCode        ierr;
2115   MPI_Comm              comm;
2116   PetscMPIInt           i, size;
2117   PetscInt              maxRows, bufsiz;
2118   Mat_MPIDense          *c;
2119   PetscMPIInt           tag;
2120   PetscInt              alg;
2121   Mat_MatTransMultDense *abt;
2122   Mat_Product           *product = C->product;
2123   PetscBool             flg;
2124 
2125   PetscFunctionBegin;
2126   /* check local size of A and B */
2127   if (A->cmap->n != B->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local column dimensions are incompatible, A (%D) != B (%D)",A->cmap->n,B->cmap->n);
2128 
2129   ierr = PetscStrcmp(product->alg,"allgatherv",&flg);CHKERRQ(ierr);
2130   alg  = flg ? 0 : 1;
2131 
2132   /* setup matrix product C */
2133   ierr = MatSetSizes(C,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N);CHKERRQ(ierr);
2134   ierr = MatSetType(C,MATMPIDENSE);CHKERRQ(ierr);
2135   ierr = MatSetUp(C);CHKERRQ(ierr);
2136   ierr = PetscObjectGetNewTag((PetscObject)C, &tag);CHKERRQ(ierr);
2137   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2138   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2139 
2140   /* create data structure for reuse C */
2141   ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr);
2142   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2143   ierr = PetscNew(&abt);CHKERRQ(ierr);
2144   abt->tag = tag;
2145   abt->alg = alg;
2146   switch (alg) {
2147   case 1: /* alg: "cyclic" */
2148     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i]));
2149     bufsiz = A->cmap->N * maxRows;
2150     ierr = PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1]));CHKERRQ(ierr);
2151     break;
2152   default: /* alg: "allgatherv" */
2153     ierr = PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]));CHKERRQ(ierr);
2154     ierr = PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls));CHKERRQ(ierr);
2155     for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N;
2156     for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i];
2157     break;
2158   }
2159 
2160   c                               = (Mat_MPIDense*)C->data;
2161   c->abtdense                     = abt;
2162   abt->destroy                    = C->ops->destroy;
2163   C->ops->destroy                 = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
2164   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense;
2165   PetscFunctionReturn(0);
2166 }
2167 
2168 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
2169 {
2170   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2171   Mat_MatTransMultDense *abt = c->abtdense;
2172   PetscErrorCode        ierr;
2173   MPI_Comm              comm;
2174   PetscMPIInt           rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom;
2175   PetscScalar           *sendbuf, *recvbuf=0, *cv;
2176   PetscInt              i,cK=A->cmap->N,k,j,bn;
2177   PetscScalar           _DOne=1.0,_DZero=0.0;
2178   const PetscScalar     *av,*bv;
2179   PetscBLASInt          cm, cn, ck, alda, blda, clda;
2180   MPI_Request           reqs[2];
2181   const PetscInt        *ranges;
2182 
2183   PetscFunctionBegin;
2184   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2185   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2186   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2187   ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr);
2188   ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr);
2189   ierr = MatDenseGetArray(c->A,&cv);CHKERRQ(ierr);
2190   ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr);
2191   ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr);
2192   ierr = MatDenseGetLDA(b->A,&i);CHKERRQ(ierr);
2193   ierr = PetscBLASIntCast(i,&blda);CHKERRQ(ierr);
2194   ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr);
2195   ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr);
2196   ierr = MatGetOwnershipRanges(B,&ranges);CHKERRQ(ierr);
2197   bn   = B->rmap->n;
2198   if (blda == bn) {
2199     sendbuf = (PetscScalar*)bv;
2200   } else {
2201     sendbuf = abt->buf[0];
2202     for (k = 0, i = 0; i < cK; i++) {
2203       for (j = 0; j < bn; j++, k++) {
2204         sendbuf[k] = bv[i * blda + j];
2205       }
2206     }
2207   }
2208   if (size > 1) {
2209     sendto = (rank + size - 1) % size;
2210     recvfrom = (rank + size + 1) % size;
2211   } else {
2212     sendto = recvfrom = 0;
2213   }
2214   ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr);
2215   ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr);
2216   recvisfrom = rank;
2217   for (i = 0; i < size; i++) {
2218     /* we have finished receiving in sending, bufs can be read/modified */
2219     PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2220     PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
2221 
2222     if (nextrecvisfrom != rank) {
2223       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2224       sendsiz = cK * bn;
2225       recvsiz = cK * nextbn;
2226       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
2227       ierr = MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]);CHKERRQ(ierr);
2228       ierr = MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]);CHKERRQ(ierr);
2229     }
2230 
2231     /* local aseq * sendbuf^T */
2232     ierr = PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn);CHKERRQ(ierr);
2233     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,av,&alda,sendbuf,&cn,&_DZero,cv + clda * ranges[recvisfrom],&clda));
2234 
2235     if (nextrecvisfrom != rank) {
2236       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
2237       ierr = MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2238     }
2239     bn = nextbn;
2240     recvisfrom = nextrecvisfrom;
2241     sendbuf = recvbuf;
2242   }
2243   ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr);
2244   ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr);
2245   ierr = MatDenseRestoreArray(c->A,&cv);CHKERRQ(ierr);
2246   PetscFunctionReturn(0);
2247 }
2248 
2249 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
2250 {
2251   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2252   Mat_MatTransMultDense *abt = c->abtdense;
2253   PetscErrorCode        ierr;
2254   MPI_Comm              comm;
2255   PetscMPIInt           size;
2256   PetscScalar           *cv, *sendbuf, *recvbuf;
2257   const PetscScalar     *av,*bv;
2258   PetscInt              blda,i,cK=A->cmap->N,k,j,bn;
2259   PetscScalar           _DOne=1.0,_DZero=0.0;
2260   PetscBLASInt          cm, cn, ck, alda, clda;
2261 
2262   PetscFunctionBegin;
2263   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2264   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2265   ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr);
2266   ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr);
2267   ierr = MatDenseGetArray(c->A,&cv);CHKERRQ(ierr);
2268   ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr);
2269   ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr);
2270   ierr = MatDenseGetLDA(b->A,&blda);CHKERRQ(ierr);
2271   ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr);
2272   ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr);
2273   /* copy transpose of B into buf[0] */
2274   bn      = B->rmap->n;
2275   sendbuf = abt->buf[0];
2276   recvbuf = abt->buf[1];
2277   for (k = 0, j = 0; j < bn; j++) {
2278     for (i = 0; i < cK; i++, k++) {
2279       sendbuf[k] = bv[i * blda + j];
2280     }
2281   }
2282   ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr);
2283   ierr = MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm);CHKERRQ(ierr);
2284   ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr);
2285   ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr);
2286   ierr = PetscBLASIntCast(c->A->cmap->n,&cn);CHKERRQ(ierr);
2287   PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,av,&alda,recvbuf,&ck,&_DZero,cv,&clda));
2288   ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr);
2289   ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr);
2290   ierr = MatDenseRestoreArray(c->A,&cv);CHKERRQ(ierr);
2291   PetscFunctionReturn(0);
2292 }
2293 
2294 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2295 {
2296   Mat_MPIDense          *c=(Mat_MPIDense*)C->data;
2297   Mat_MatTransMultDense *abt = c->abtdense;
2298   PetscErrorCode        ierr;
2299 
2300   PetscFunctionBegin;
2301   switch (abt->alg) {
2302   case 1:
2303     ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C);CHKERRQ(ierr);
2304     break;
2305   default:
2306     ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C);CHKERRQ(ierr);
2307     break;
2308   }
2309   PetscFunctionReturn(0);
2310 }
2311 
2312 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(Mat A)
2313 {
2314   PetscErrorCode   ierr;
2315   Mat_MPIDense     *a = (Mat_MPIDense*)A->data;
2316   Mat_MatMultDense *ab = a->abdense;
2317 
2318   PetscFunctionBegin;
2319   ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr);
2320   ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr);
2321   ierr = MatDestroy(&ab->Be);CHKERRQ(ierr);
2322 
2323   ierr = (ab->destroy)(A);CHKERRQ(ierr);
2324   ierr = PetscFree(ab);CHKERRQ(ierr);
2325   PetscFunctionReturn(0);
2326 }
2327 
2328 #if defined(PETSC_HAVE_ELEMENTAL)
2329 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2330 {
2331   PetscErrorCode   ierr;
2332   Mat_MPIDense     *c=(Mat_MPIDense*)C->data;
2333   Mat_MatMultDense *ab=c->abdense;
2334 
2335   PetscFunctionBegin;
2336   ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr);
2337   ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr);
2338   ierr = MatMatMultNumeric_Elemental(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr);
2339   ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
2340   PetscFunctionReturn(0);
2341 }
2342 
2343 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)
2344 {
2345   PetscErrorCode   ierr;
2346   Mat              Ae,Be,Ce;
2347   Mat_MPIDense     *c;
2348   Mat_MatMultDense *ab;
2349 
2350   PetscFunctionBegin;
2351   /* check local size of A and B */
2352   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
2353     SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2354   }
2355 
2356   /* create elemental matrices Ae and Be */
2357   ierr = MatCreate(PetscObjectComm((PetscObject)A), &Ae);CHKERRQ(ierr);
2358   ierr = MatSetSizes(Ae,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
2359   ierr = MatSetType(Ae,MATELEMENTAL);CHKERRQ(ierr);
2360   ierr = MatSetUp(Ae);CHKERRQ(ierr);
2361   ierr = MatSetOption(Ae,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2362 
2363   ierr = MatCreate(PetscObjectComm((PetscObject)B), &Be);CHKERRQ(ierr);
2364   ierr = MatSetSizes(Be,PETSC_DECIDE,PETSC_DECIDE,B->rmap->N,B->cmap->N);CHKERRQ(ierr);
2365   ierr = MatSetType(Be,MATELEMENTAL);CHKERRQ(ierr);
2366   ierr = MatSetUp(Be);CHKERRQ(ierr);
2367   ierr = MatSetOption(Be,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2368 
2369   /* compute symbolic Ce = Ae*Be */
2370   ierr = MatCreate(PetscObjectComm((PetscObject)C),&Ce);CHKERRQ(ierr);
2371   ierr = MatMatMultSymbolic_Elemental(Ae,Be,fill,Ce);CHKERRQ(ierr);
2372 
2373   /* setup C */
2374   ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2375   ierr = MatSetType(C,MATDENSE);CHKERRQ(ierr);
2376   ierr = MatSetUp(C);CHKERRQ(ierr);
2377 
2378   /* create data structure for reuse Cdense */
2379   ierr = PetscNew(&ab);CHKERRQ(ierr);
2380   c                  = (Mat_MPIDense*)C->data;
2381   c->abdense         = ab;
2382 
2383   ab->Ae             = Ae;
2384   ab->Be             = Be;
2385   ab->Ce             = Ce;
2386   ab->destroy        = C->ops->destroy;
2387   C->ops->destroy        = MatDestroy_MatMatMult_MPIDense_MPIDense;
2388   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
2389   C->ops->productnumeric = MatProductNumeric_AB;
2390   PetscFunctionReturn(0);
2391 }
2392 #endif
2393 /* ----------------------------------------------- */
2394 #if defined(PETSC_HAVE_ELEMENTAL)
2395 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C)
2396 {
2397   PetscFunctionBegin;
2398   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense;
2399   C->ops->productsymbolic = MatProductSymbolic_AB;
2400   C->ops->productnumeric  = MatProductNumeric_AB;
2401   PetscFunctionReturn(0);
2402 }
2403 #endif
2404 
2405 static PetscErrorCode MatProductSymbolic_AtB_MPIDense(Mat C)
2406 {
2407   PetscErrorCode ierr;
2408   Mat_Product    *product = C->product;
2409 
2410   PetscFunctionBegin;
2411   ierr = MatTransposeMatMultSymbolic_MPIDense_MPIDense(product->A,product->B,product->fill,C);CHKERRQ(ierr);
2412   C->ops->productnumeric          = MatProductNumeric_AtB;
2413   C->ops->transposematmultnumeric = MatTransposeMatMultNumeric_MPIDense_MPIDense;
2414   PetscFunctionReturn(0);
2415 }
2416 
2417 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C)
2418 {
2419   Mat_Product *product = C->product;
2420   Mat         A = product->A,B=product->B;
2421 
2422   PetscFunctionBegin;
2423   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend)
2424     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2425 
2426   C->ops->productsymbolic = MatProductSymbolic_AtB_MPIDense;
2427   PetscFunctionReturn(0);
2428 }
2429 
2430 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C)
2431 {
2432   PetscErrorCode ierr;
2433   Mat_Product    *product = C->product;
2434   const char     *algTypes[2] = {"allgatherv","cyclic"};
2435   PetscInt       alg,nalg = 2;
2436   PetscBool      flg = PETSC_FALSE;
2437 
2438   PetscFunctionBegin;
2439   /* Set default algorithm */
2440   alg = 0; /* default is allgatherv */
2441   ierr = PetscStrcmp(product->alg,"default",&flg);CHKERRQ(ierr);
2442   if (flg) {
2443     ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr);
2444   }
2445 
2446   /* Get runtime option */
2447   if (product->api_user) {
2448     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatTransposeMult","Mat");CHKERRQ(ierr);
2449     ierr = PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr);
2450     ierr = PetscOptionsEnd();CHKERRQ(ierr);
2451   } else {
2452     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABt","Mat");CHKERRQ(ierr);
2453     ierr = PetscOptionsEList("-matproduct_abt_mpidense_mpidense_via","Algorithmic approach","MatProduct_ABt",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr);
2454     ierr = PetscOptionsEnd();CHKERRQ(ierr);
2455   }
2456   if (flg) {
2457     ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr);
2458   }
2459 
2460   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense;
2461   C->ops->productsymbolic          = MatProductSymbolic_ABt;
2462   C->ops->productnumeric           = MatProductNumeric_ABt;
2463   PetscFunctionReturn(0);
2464 }
2465 
2466 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C)
2467 {
2468   PetscErrorCode ierr;
2469   Mat_Product    *product = C->product;
2470 
2471   PetscFunctionBegin;
2472   switch (product->type) {
2473 #if defined(PETSC_HAVE_ELEMENTAL)
2474   case MATPRODUCT_AB:
2475     ierr = MatProductSetFromOptions_MPIDense_AB(C);CHKERRQ(ierr);
2476     break;
2477 #endif
2478   case MATPRODUCT_AtB:
2479     ierr = MatProductSetFromOptions_MPIDense_AtB(C);CHKERRQ(ierr);
2480     break;
2481   case MATPRODUCT_ABt:
2482     ierr = MatProductSetFromOptions_MPIDense_ABt(C);CHKERRQ(ierr);
2483     break;
2484   default: SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"MatProduct type %s is not supported for MPIDense and MPIDense matrices",MatProductTypes[product->type]);
2485   }
2486   PetscFunctionReturn(0);
2487 }
2488