xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 5ea7661a5c6ebfe94618efd9ae12232cbd7a6c1a)
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4 */
5 
6 #include <../src/mat/impls/dense/mpi/mpidense.h>    /*I   "petscmat.h"  I*/
7 #include <../src/mat/impls/aij/mpi/mpiaij.h>
8 #include <petscblaslapack.h>
9 
10 /*@
11 
12       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
13               matrix that represents the operator. For sequential matrices it returns itself.
14 
15     Input Parameter:
16 .      A - the Seq or MPI dense matrix
17 
18     Output Parameter:
19 .      B - the inner matrix
20 
21     Level: intermediate
22 
23 @*/
24 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
25 {
26   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
27   PetscErrorCode ierr;
28   PetscBool      flg;
29 
30   PetscFunctionBegin;
31   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
32   PetscValidPointer(B,2);
33   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
34   if (flg) *B = mat->A;
35   else *B = A;
36   PetscFunctionReturn(0);
37 }
38 
39 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
40 {
41   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
42   PetscErrorCode ierr;
43   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
44 
45   PetscFunctionBegin;
46   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
47   lrow = row - rstart;
48   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
49   PetscFunctionReturn(0);
50 }
51 
52 PetscErrorCode MatRestoreRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
53 {
54   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
55   PetscErrorCode ierr;
56   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
57 
58   PetscFunctionBegin;
59   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
60   lrow = row - rstart;
61   ierr = MatRestoreRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
62   PetscFunctionReturn(0);
63 }
64 
65 PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
66 {
67   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
68   PetscErrorCode ierr;
69   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
70   PetscScalar    *array;
71   MPI_Comm       comm;
72   PetscBool      flg;
73   Mat            B;
74 
75   PetscFunctionBegin;
76   ierr = MatHasCongruentLayouts(A,&flg);CHKERRQ(ierr);
77   if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
78   ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr);
79   if (!B) { /* This should use a new function, e.g. MatDenseGetSubMatrix (not create) */
80 
81     ierr = PetscObjectTypeCompare((PetscObject)mdn->A,MATSEQDENSECUDA,&flg);CHKERRQ(ierr);
82     if (flg) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature",MATSEQDENSECUDA);
83     ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
84     ierr = MatCreate(comm,&B);CHKERRQ(ierr);
85     ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr);
86     ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
87     ierr = MatDenseGetArrayRead(mdn->A,(const PetscScalar**)&array);CHKERRQ(ierr);
88     ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr);
89     ierr = MatDenseRestoreArrayRead(mdn->A,(const PetscScalar**)&array);CHKERRQ(ierr);
90     ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
91     ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
92     ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr);
93     *a   = B;
94     ierr = MatDestroy(&B);CHKERRQ(ierr);
95   } else *a = B;
96   PetscFunctionReturn(0);
97 }
98 
99 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
100 {
101   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
102   PetscErrorCode ierr;
103   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
104   PetscBool      roworiented = A->roworiented;
105 
106   PetscFunctionBegin;
107   for (i=0; i<m; i++) {
108     if (idxm[i] < 0) continue;
109     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
110     if (idxm[i] >= rstart && idxm[i] < rend) {
111       row = idxm[i] - rstart;
112       if (roworiented) {
113         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
114       } else {
115         for (j=0; j<n; j++) {
116           if (idxn[j] < 0) continue;
117           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
118           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
119         }
120       }
121     } else if (!A->donotstash) {
122       mat->assembled = PETSC_FALSE;
123       if (roworiented) {
124         ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
125       } else {
126         ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
127       }
128     }
129   }
130   PetscFunctionReturn(0);
131 }
132 
133 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
134 {
135   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
136   PetscErrorCode ierr;
137   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
138 
139   PetscFunctionBegin;
140   for (i=0; i<m; i++) {
141     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
142     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
143     if (idxm[i] >= rstart && idxm[i] < rend) {
144       row = idxm[i] - rstart;
145       for (j=0; j<n; j++) {
146         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
147         if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
148         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
149       }
150     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
151   }
152   PetscFunctionReturn(0);
153 }
154 
155 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda)
156 {
157   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
158   PetscErrorCode ierr;
159 
160   PetscFunctionBegin;
161   ierr = MatDenseGetLDA(a->A,lda);CHKERRQ(ierr);
162   PetscFunctionReturn(0);
163 }
164 
165 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A,PetscInt lda)
166 {
167   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
168   PetscErrorCode ierr;
169 
170   PetscFunctionBegin;
171   ierr = MatDenseSetLDA(a->A,lda);CHKERRQ(ierr);
172   PetscFunctionReturn(0);
173 }
174 
175 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar **array)
176 {
177   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
178   PetscErrorCode ierr;
179 
180   PetscFunctionBegin;
181   ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr);
182   PetscFunctionReturn(0);
183 }
184 
185 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar **array)
186 {
187   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
188   PetscErrorCode ierr;
189 
190   PetscFunctionBegin;
191   ierr = MatDenseGetArrayRead(a->A,array);CHKERRQ(ierr);
192   PetscFunctionReturn(0);
193 }
194 
195 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A,PetscScalar **array)
196 {
197   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
198   PetscErrorCode ierr;
199 
200   PetscFunctionBegin;
201   ierr = MatDenseGetArrayWrite(a->A,array);CHKERRQ(ierr);
202   PetscFunctionReturn(0);
203 }
204 
205 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar *array)
206 {
207   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
208   PetscErrorCode ierr;
209 
210   PetscFunctionBegin;
211   if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
212   if (a->matinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
213   ierr = MatDensePlaceArray(a->A,array);CHKERRQ(ierr);
214   PetscFunctionReturn(0);
215 }
216 
217 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
218 {
219   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
220   PetscErrorCode ierr;
221 
222   PetscFunctionBegin;
223   if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
224   if (a->matinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
225   ierr = MatDenseResetArray(a->A);CHKERRQ(ierr);
226   PetscFunctionReturn(0);
227 }
228 
229 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A,const PetscScalar *array)
230 {
231   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
232   PetscErrorCode ierr;
233 
234   PetscFunctionBegin;
235   if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
236   if (a->matinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
237   ierr = MatDenseReplaceArray(a->A,array);CHKERRQ(ierr);
238   PetscFunctionReturn(0);
239 }
240 
241 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
242 {
243   Mat_MPIDense      *mat  = (Mat_MPIDense*)A->data,*newmatd;
244   PetscErrorCode    ierr;
245   PetscInt          lda,i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
246   const PetscInt    *irow,*icol;
247   const PetscScalar *v;
248   PetscScalar       *bv;
249   Mat               newmat;
250   IS                iscol_local;
251   MPI_Comm          comm_is,comm_mat;
252 
253   PetscFunctionBegin;
254   ierr = PetscObjectGetComm((PetscObject)A,&comm_mat);CHKERRQ(ierr);
255   ierr = PetscObjectGetComm((PetscObject)iscol,&comm_is);CHKERRQ(ierr);
256   if (comm_mat != comm_is) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator");
257 
258   ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
259   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
260   ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr);
261   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
262   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
263   ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */
264 
265   /* No parallel redistribution currently supported! Should really check each index set
266      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
267      original matrix! */
268 
269   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
270   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
271 
272   /* Check submatrix call */
273   if (scall == MAT_REUSE_MATRIX) {
274     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
275     /* Really need to test rows and column sizes! */
276     newmat = *B;
277   } else {
278     /* Create and fill new matrix */
279     ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr);
280     ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
281     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
282     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
283   }
284 
285   /* Now extract the data pointers and do the copy, column at a time */
286   newmatd = (Mat_MPIDense*)newmat->data;
287   ierr = MatDenseGetArray(newmatd->A,&bv);CHKERRQ(ierr);
288   ierr = MatDenseGetArrayRead(mat->A,&v);CHKERRQ(ierr);
289   ierr = MatDenseGetLDA(mat->A,&lda);CHKERRQ(ierr);
290   for (i=0; i<Ncols; i++) {
291     const PetscScalar *av = v + lda*icol[i];
292     for (j=0; j<nrows; j++) {
293       *bv++ = av[irow[j] - rstart];
294     }
295   }
296   ierr = MatDenseRestoreArrayRead(mat->A,&v);CHKERRQ(ierr);
297   ierr = MatDenseRestoreArray(newmatd->A,&bv);CHKERRQ(ierr);
298 
299   /* Assemble the matrices so that the correct flags are set */
300   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
301   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
302 
303   /* Free work space */
304   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
305   ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr);
306   ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
307   *B   = newmat;
308   PetscFunctionReturn(0);
309 }
310 
311 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar **array)
312 {
313   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
314   PetscErrorCode ierr;
315 
316   PetscFunctionBegin;
317   ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr);
318   PetscFunctionReturn(0);
319 }
320 
321 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar **array)
322 {
323   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
324   PetscErrorCode ierr;
325 
326   PetscFunctionBegin;
327   ierr = MatDenseRestoreArrayRead(a->A,array);CHKERRQ(ierr);
328   PetscFunctionReturn(0);
329 }
330 
331 PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A,PetscScalar **array)
332 {
333   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
334   PetscErrorCode ierr;
335 
336   PetscFunctionBegin;
337   ierr = MatDenseRestoreArrayWrite(a->A,array);CHKERRQ(ierr);
338   PetscFunctionReturn(0);
339 }
340 
341 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
342 {
343   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
344   PetscErrorCode ierr;
345   PetscInt       nstash,reallocs;
346 
347   PetscFunctionBegin;
348   if (mdn->donotstash || mat->nooffprocentries) return(0);
349 
350   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
351   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
352   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
353   PetscFunctionReturn(0);
354 }
355 
356 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
357 {
358   Mat_MPIDense   *mdn=(Mat_MPIDense*)mat->data;
359   PetscErrorCode ierr;
360   PetscInt       i,*row,*col,flg,j,rstart,ncols;
361   PetscMPIInt    n;
362   PetscScalar    *val;
363 
364   PetscFunctionBegin;
365   if (!mdn->donotstash && !mat->nooffprocentries) {
366     /*  wait on receives */
367     while (1) {
368       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
369       if (!flg) break;
370 
371       for (i=0; i<n;) {
372         /* Now identify the consecutive vals belonging to the same row */
373         for (j=i,rstart=row[j]; j<n; j++) {
374           if (row[j] != rstart) break;
375         }
376         if (j < n) ncols = j-i;
377         else       ncols = n-i;
378         /* Now assemble all these values with a single function call */
379         ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
380         i    = j;
381       }
382     }
383     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
384   }
385 
386   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
387   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
388 
389   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
390     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
391   }
392   PetscFunctionReturn(0);
393 }
394 
395 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
396 {
397   PetscErrorCode ierr;
398   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
399 
400   PetscFunctionBegin;
401   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
402   PetscFunctionReturn(0);
403 }
404 
405 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt n,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
406 {
407   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
408   PetscErrorCode    ierr;
409   PetscInt          i,len,*lrows;
410 
411   PetscFunctionBegin;
412   /* get locally owned rows */
413   ierr = PetscLayoutMapLocal(A->rmap,n,rows,&len,&lrows,NULL);CHKERRQ(ierr);
414   /* fix right hand side if needed */
415   if (x && b) {
416     const PetscScalar *xx;
417     PetscScalar       *bb;
418 
419     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
420     ierr = VecGetArrayWrite(b, &bb);CHKERRQ(ierr);
421     for (i=0;i<len;++i) bb[lrows[i]] = diag*xx[lrows[i]];
422     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
423     ierr = VecRestoreArrayWrite(b, &bb);CHKERRQ(ierr);
424   }
425   ierr = MatZeroRows(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
426   if (diag != 0.0) {
427     Vec d;
428 
429     ierr = MatCreateVecs(A,NULL,&d);CHKERRQ(ierr);
430     ierr = VecSet(d,diag);CHKERRQ(ierr);
431     ierr = MatDiagonalSet(A,d,INSERT_VALUES);CHKERRQ(ierr);
432     ierr = VecDestroy(&d);CHKERRQ(ierr);
433   }
434   ierr = PetscFree(lrows);CHKERRQ(ierr);
435   PetscFunctionReturn(0);
436 }
437 
438 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec);
439 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec);
440 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec);
441 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec);
442 
443 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
444 {
445   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
446   PetscErrorCode    ierr;
447   const PetscScalar *ax;
448   PetscScalar       *ay;
449 
450   PetscFunctionBegin;
451   ierr = VecGetArrayReadInPlace(xx,&ax);CHKERRQ(ierr);
452   ierr = VecGetArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr);
453   ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr);
454   ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr);
455   ierr = VecRestoreArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr);
456   ierr = VecRestoreArrayReadInPlace(xx,&ax);CHKERRQ(ierr);
457   ierr = (*mdn->A->ops->mult)(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
458   PetscFunctionReturn(0);
459 }
460 
461 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
462 {
463   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
464   PetscErrorCode    ierr;
465   const PetscScalar *ax;
466   PetscScalar       *ay;
467 
468   PetscFunctionBegin;
469   ierr = VecGetArrayReadInPlace(xx,&ax);CHKERRQ(ierr);
470   ierr = VecGetArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr);
471   ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr);
472   ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr);
473   ierr = VecRestoreArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr);
474   ierr = VecRestoreArrayReadInPlace(xx,&ax);CHKERRQ(ierr);
475   ierr = (*mdn->A->ops->multadd)(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
476   PetscFunctionReturn(0);
477 }
478 
479 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
480 {
481   Mat_MPIDense      *a = (Mat_MPIDense*)A->data;
482   PetscErrorCode    ierr;
483   const PetscScalar *ax;
484   PetscScalar       *ay;
485 
486   PetscFunctionBegin;
487   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
488   ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr);
489   ierr = VecGetArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr);
490   ierr = VecGetArrayInPlace(yy,&ay);CHKERRQ(ierr);
491   ierr = PetscSFReduceBegin(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr);
492   ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr);
493   ierr = VecRestoreArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr);
494   ierr = VecRestoreArrayInPlace(yy,&ay);CHKERRQ(ierr);
495   PetscFunctionReturn(0);
496 }
497 
498 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
499 {
500   Mat_MPIDense      *a = (Mat_MPIDense*)A->data;
501   PetscErrorCode    ierr;
502   const PetscScalar *ax;
503   PetscScalar       *ay;
504 
505   PetscFunctionBegin;
506   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
507   ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr);
508   ierr = VecGetArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr);
509   ierr = VecGetArrayInPlace(zz,&ay);CHKERRQ(ierr);
510   ierr = PetscSFReduceBegin(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr);
511   ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr);
512   ierr = VecRestoreArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr);
513   ierr = VecRestoreArrayInPlace(zz,&ay);CHKERRQ(ierr);
514   PetscFunctionReturn(0);
515 }
516 
517 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
518 {
519   Mat_MPIDense      *a    = (Mat_MPIDense*)A->data;
520   PetscErrorCode    ierr;
521   PetscInt          lda,len,i,n,m = A->rmap->n,radd;
522   PetscScalar       *x,zero = 0.0;
523   const PetscScalar *av;
524 
525   PetscFunctionBegin;
526   ierr = VecSet(v,zero);CHKERRQ(ierr);
527   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
528   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
529   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
530   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
531   radd = A->rmap->rstart*m;
532   ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr);
533   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
534   for (i=0; i<len; i++) {
535     x[i] = av[radd + i*lda + i];
536   }
537   ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr);
538   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
539   PetscFunctionReturn(0);
540 }
541 
542 PetscErrorCode MatDestroy_MPIDense(Mat mat)
543 {
544   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
545   PetscErrorCode ierr;
546 
547   PetscFunctionBegin;
548 #if defined(PETSC_USE_LOG)
549   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
550 #endif
551   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
552   if (mdn->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
553   if (mdn->matinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
554   ierr = MatDestroy(&mdn->A);CHKERRQ(ierr);
555   ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr);
556   ierr = PetscSFDestroy(&mdn->Mvctx);CHKERRQ(ierr);
557   ierr = VecDestroy(&mdn->cvec);CHKERRQ(ierr);
558   ierr = MatDestroy(&mdn->cmat);CHKERRQ(ierr);
559 
560   ierr = PetscFree(mat->data);CHKERRQ(ierr);
561   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
562 
563   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL);CHKERRQ(ierr);
564   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",NULL);CHKERRQ(ierr);
565   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr);
566   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr);
567   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL);CHKERRQ(ierr);
568   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL);CHKERRQ(ierr);
569   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",NULL);CHKERRQ(ierr);
570   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",NULL);CHKERRQ(ierr);
571   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);CHKERRQ(ierr);
572   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);CHKERRQ(ierr);
573   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",NULL);CHKERRQ(ierr);
574 #if defined(PETSC_HAVE_ELEMENTAL)
575   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr);
576 #endif
577   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr);
578   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
579   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",NULL);CHKERRQ(ierr);
580 #if defined (PETSC_HAVE_CUDA)
581   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",NULL);CHKERRQ(ierr);
582   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",NULL);CHKERRQ(ierr);
583 #endif
584   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL);CHKERRQ(ierr);
585   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL);CHKERRQ(ierr);
586   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",NULL);CHKERRQ(ierr);
587   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",NULL);CHKERRQ(ierr);
588   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",NULL);CHKERRQ(ierr);
589   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",NULL);CHKERRQ(ierr);
590   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",NULL);CHKERRQ(ierr);
591   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",NULL);CHKERRQ(ierr);
592   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",NULL);CHKERRQ(ierr);
593   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",NULL);CHKERRQ(ierr);
594   PetscFunctionReturn(0);
595 }
596 
597 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat,PetscViewer);
598 
599 #include <petscdraw.h>
600 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
601 {
602   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
603   PetscErrorCode    ierr;
604   PetscMPIInt       rank = mdn->rank;
605   PetscViewerType   vtype;
606   PetscBool         iascii,isdraw;
607   PetscViewer       sviewer;
608   PetscViewerFormat format;
609 
610   PetscFunctionBegin;
611   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
612   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
613   if (iascii) {
614     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
615     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
616     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
617       MatInfo info;
618       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
619       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
620       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
621       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
622       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
623       ierr = PetscSFView(mdn->Mvctx,viewer);CHKERRQ(ierr);
624       PetscFunctionReturn(0);
625     } else if (format == PETSC_VIEWER_ASCII_INFO) {
626       PetscFunctionReturn(0);
627     }
628   } else if (isdraw) {
629     PetscDraw draw;
630     PetscBool isnull;
631 
632     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
633     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
634     if (isnull) PetscFunctionReturn(0);
635   }
636 
637   {
638     /* assemble the entire matrix onto first processor. */
639     Mat         A;
640     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
641     PetscInt    *cols;
642     PetscScalar *vals;
643 
644     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
645     if (!rank) {
646       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
647     } else {
648       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
649     }
650     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
651     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
652     ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr);
653     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
654 
655     /* Copy the matrix ... This isn't the most efficient means,
656        but it's quick for now */
657     A->insertmode = INSERT_VALUES;
658 
659     row = mat->rmap->rstart;
660     m   = mdn->A->rmap->n;
661     for (i=0; i<m; i++) {
662       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
663       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
664       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
665       row++;
666     }
667 
668     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
669     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
670     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
671     if (!rank) {
672       ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
673       ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
674     }
675     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
676     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
677     ierr = MatDestroy(&A);CHKERRQ(ierr);
678   }
679   PetscFunctionReturn(0);
680 }
681 
682 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
683 {
684   PetscErrorCode ierr;
685   PetscBool      iascii,isbinary,isdraw,issocket;
686 
687   PetscFunctionBegin;
688   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
689   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
690   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
691   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
692 
693   if (iascii || issocket || isdraw) {
694     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
695   } else if (isbinary) {
696     ierr = MatView_Dense_Binary(mat,viewer);CHKERRQ(ierr);
697   }
698   PetscFunctionReturn(0);
699 }
700 
701 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
702 {
703   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
704   Mat            mdn  = mat->A;
705   PetscErrorCode ierr;
706   PetscLogDouble isend[5],irecv[5];
707 
708   PetscFunctionBegin;
709   info->block_size = 1.0;
710 
711   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
712 
713   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
714   isend[3] = info->memory;  isend[4] = info->mallocs;
715   if (flag == MAT_LOCAL) {
716     info->nz_used      = isend[0];
717     info->nz_allocated = isend[1];
718     info->nz_unneeded  = isend[2];
719     info->memory       = isend[3];
720     info->mallocs      = isend[4];
721   } else if (flag == MAT_GLOBAL_MAX) {
722     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
723 
724     info->nz_used      = irecv[0];
725     info->nz_allocated = irecv[1];
726     info->nz_unneeded  = irecv[2];
727     info->memory       = irecv[3];
728     info->mallocs      = irecv[4];
729   } else if (flag == MAT_GLOBAL_SUM) {
730     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
731 
732     info->nz_used      = irecv[0];
733     info->nz_allocated = irecv[1];
734     info->nz_unneeded  = irecv[2];
735     info->memory       = irecv[3];
736     info->mallocs      = irecv[4];
737   }
738   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
739   info->fill_ratio_needed = 0;
740   info->factor_mallocs    = 0;
741   PetscFunctionReturn(0);
742 }
743 
744 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
745 {
746   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
747   PetscErrorCode ierr;
748 
749   PetscFunctionBegin;
750   switch (op) {
751   case MAT_NEW_NONZERO_LOCATIONS:
752   case MAT_NEW_NONZERO_LOCATION_ERR:
753   case MAT_NEW_NONZERO_ALLOCATION_ERR:
754     MatCheckPreallocated(A,1);
755     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
756     break;
757   case MAT_ROW_ORIENTED:
758     MatCheckPreallocated(A,1);
759     a->roworiented = flg;
760     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
761     break;
762   case MAT_NEW_DIAGONALS:
763   case MAT_KEEP_NONZERO_PATTERN:
764   case MAT_USE_HASH_TABLE:
765   case MAT_SORTED_FULL:
766     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
767     break;
768   case MAT_IGNORE_OFF_PROC_ENTRIES:
769     a->donotstash = flg;
770     break;
771   case MAT_SYMMETRIC:
772   case MAT_STRUCTURALLY_SYMMETRIC:
773   case MAT_HERMITIAN:
774   case MAT_SYMMETRY_ETERNAL:
775   case MAT_IGNORE_LOWER_TRIANGULAR:
776   case MAT_IGNORE_ZERO_ENTRIES:
777     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
778     break;
779   default:
780     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
781   }
782   PetscFunctionReturn(0);
783 }
784 
785 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
786 {
787   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
788   const PetscScalar *l;
789   PetscScalar       x,*v,*vv,*r;
790   PetscErrorCode    ierr;
791   PetscInt          i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n,lda;
792 
793   PetscFunctionBegin;
794   ierr = MatDenseGetArray(mdn->A,&vv);CHKERRQ(ierr);
795   ierr = MatDenseGetLDA(mdn->A,&lda);CHKERRQ(ierr);
796   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
797   if (ll) {
798     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
799     if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %D != %D", s2a, s2);
800     ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr);
801     for (i=0; i<m; i++) {
802       x = l[i];
803       v = vv + i;
804       for (j=0; j<n; j++) { (*v) *= x; v+= lda;}
805     }
806     ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr);
807     ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr);
808   }
809   if (rr) {
810     const PetscScalar *ar;
811 
812     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
813     if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
814     ierr = VecGetArrayRead(rr,&ar);CHKERRQ(ierr);
815     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
816     ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ar,r);CHKERRQ(ierr);
817     ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ar,r);CHKERRQ(ierr);
818     ierr = VecRestoreArrayRead(rr,&ar);CHKERRQ(ierr);
819     for (i=0; i<n; i++) {
820       x = r[i];
821       v = vv + i*lda;
822       for (j=0; j<m; j++) (*v++) *= x;
823     }
824     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
825     ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr);
826   }
827   ierr = MatDenseRestoreArray(mdn->A,&vv);CHKERRQ(ierr);
828   PetscFunctionReturn(0);
829 }
830 
831 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
832 {
833   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
834   PetscErrorCode    ierr;
835   PetscInt          i,j;
836   PetscReal         sum = 0.0;
837   const PetscScalar *av,*v;
838 
839   PetscFunctionBegin;
840   ierr = MatDenseGetArrayRead(mdn->A,&av);CHKERRQ(ierr);
841   v    = av;
842   if (mdn->size == 1) {
843     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
844   } else {
845     if (type == NORM_FROBENIUS) {
846       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
847         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
848       }
849       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
850       *nrm = PetscSqrtReal(*nrm);
851       ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr);
852     } else if (type == NORM_1) {
853       PetscReal *tmp,*tmp2;
854       ierr = PetscCalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr);
855       *nrm = 0.0;
856       v    = av;
857       for (j=0; j<mdn->A->cmap->n; j++) {
858         for (i=0; i<mdn->A->rmap->n; i++) {
859           tmp[j] += PetscAbsScalar(*v);  v++;
860         }
861       }
862       ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
863       for (j=0; j<A->cmap->N; j++) {
864         if (tmp2[j] > *nrm) *nrm = tmp2[j];
865       }
866       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
867       ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr);
868     } else if (type == NORM_INFINITY) { /* max row norm */
869       PetscReal ntemp;
870       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
871       ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
872     } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
873   }
874   ierr = MatDenseRestoreArrayRead(mdn->A,&av);CHKERRQ(ierr);
875   PetscFunctionReturn(0);
876 }
877 
878 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
879 {
880   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
881   Mat            B;
882   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
883   PetscErrorCode ierr;
884   PetscInt       j,i,lda;
885   PetscScalar    *v;
886 
887   PetscFunctionBegin;
888   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
889     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
890     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
891     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
892     ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr);
893   } else B = *matout;
894 
895   m    = a->A->rmap->n; n = a->A->cmap->n;
896   ierr = MatDenseGetArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr);
897   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
898   ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr);
899   for (i=0; i<m; i++) rwork[i] = rstart + i;
900   for (j=0; j<n; j++) {
901     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
902     v   += lda;
903   }
904   ierr = MatDenseRestoreArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr);
905   ierr = PetscFree(rwork);CHKERRQ(ierr);
906   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
907   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
908   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
909     *matout = B;
910   } else {
911     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
912   }
913   PetscFunctionReturn(0);
914 }
915 
916 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
917 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
918 
919 PetscErrorCode MatSetUp_MPIDense(Mat A)
920 {
921   PetscErrorCode ierr;
922 
923   PetscFunctionBegin;
924   ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
925   ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
926   if (!A->preallocated) {
927     ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
928   }
929   PetscFunctionReturn(0);
930 }
931 
932 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
933 {
934   PetscErrorCode ierr;
935   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
936 
937   PetscFunctionBegin;
938   ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr);
939   PetscFunctionReturn(0);
940 }
941 
942 PetscErrorCode MatConjugate_MPIDense(Mat mat)
943 {
944   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
945   PetscErrorCode ierr;
946 
947   PetscFunctionBegin;
948   ierr = MatConjugate(a->A);CHKERRQ(ierr);
949   PetscFunctionReturn(0);
950 }
951 
952 PetscErrorCode MatRealPart_MPIDense(Mat A)
953 {
954   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
955   PetscErrorCode ierr;
956 
957   PetscFunctionBegin;
958   ierr = MatRealPart(a->A);CHKERRQ(ierr);
959   PetscFunctionReturn(0);
960 }
961 
962 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
963 {
964   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
965   PetscErrorCode ierr;
966 
967   PetscFunctionBegin;
968   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
969   PetscFunctionReturn(0);
970 }
971 
972 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col)
973 {
974   PetscErrorCode ierr;
975   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
976 
977   PetscFunctionBegin;
978   if (!a->A) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing local matrix");
979   if (!a->A->ops->getcolumnvector) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing get column operation");
980   ierr = (*a->A->ops->getcolumnvector)(a->A,v,col);CHKERRQ(ierr);
981   PetscFunctionReturn(0);
982 }
983 
984 PETSC_INTERN PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*);
985 
986 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms)
987 {
988   PetscErrorCode ierr;
989   PetscInt       i,n;
990   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
991   PetscReal      *work;
992 
993   PetscFunctionBegin;
994   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
995   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
996   ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr);
997   if (type == NORM_2) {
998     for (i=0; i<n; i++) work[i] *= work[i];
999   }
1000   if (type == NORM_INFINITY) {
1001     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
1002   } else {
1003     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
1004   }
1005   ierr = PetscFree(work);CHKERRQ(ierr);
1006   if (type == NORM_2) {
1007     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
1008   }
1009   PetscFunctionReturn(0);
1010 }
1011 
1012 #if defined(PETSC_HAVE_CUDA)
1013 static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1014 {
1015   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1016   PetscErrorCode ierr;
1017   PetscInt       lda;
1018 
1019   PetscFunctionBegin;
1020   if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1021   if (a->matinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1022   if (!a->cvec) {
1023     ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1024   }
1025   a->vecinuse = col + 1;
1026   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1027   ierr = MatDenseCUDAGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1028   ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1029   *v   = a->cvec;
1030   PetscFunctionReturn(0);
1031 }
1032 
1033 static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1034 {
1035   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1036   PetscErrorCode ierr;
1037 
1038   PetscFunctionBegin;
1039   if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1040   if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1041   a->vecinuse = 0;
1042   ierr = MatDenseCUDARestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1043   ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr);
1044   *v   = NULL;
1045   PetscFunctionReturn(0);
1046 }
1047 
1048 static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1049 {
1050   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1051   PetscInt       lda;
1052   PetscErrorCode ierr;
1053 
1054   PetscFunctionBegin;
1055   if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1056   if (a->matinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1057   if (!a->cvec) {
1058     ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1059   }
1060   a->vecinuse = col + 1;
1061   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1062   ierr = MatDenseCUDAGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
1063   ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1064   ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr);
1065   *v   = a->cvec;
1066   PetscFunctionReturn(0);
1067 }
1068 
1069 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1070 {
1071   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1072   PetscErrorCode ierr;
1073 
1074   PetscFunctionBegin;
1075   if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1076   if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1077   a->vecinuse = 0;
1078   ierr = MatDenseCUDARestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
1079   ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr);
1080   ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr);
1081   *v   = NULL;
1082   PetscFunctionReturn(0);
1083 }
1084 
1085 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1086 {
1087   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1088   PetscErrorCode ierr;
1089   PetscInt       lda;
1090 
1091   PetscFunctionBegin;
1092   if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1093   if (a->matinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1094   if (!a->cvec) {
1095     ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1096   }
1097   a->vecinuse = col + 1;
1098   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1099   ierr = MatDenseCUDAGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1100   ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1101   *v   = a->cvec;
1102   PetscFunctionReturn(0);
1103 }
1104 
1105 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1106 {
1107   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1108   PetscErrorCode ierr;
1109 
1110   PetscFunctionBegin;
1111   if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1112   if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1113   a->vecinuse = 0;
1114   ierr = MatDenseCUDARestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1115   ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr);
1116   *v   = NULL;
1117   PetscFunctionReturn(0);
1118 }
1119 
1120 static PetscErrorCode MatDenseGetSubMatrix_MPIDenseCUDA(Mat A,PetscInt cbegin,PetscInt cend,Mat *v)
1121 {
1122   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1123   PetscInt       lda;
1124   PetscErrorCode ierr;
1125 
1126   PetscFunctionBegin;
1127   if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1128   if (a->matinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1129   if (a->cmat && cend-cbegin != a->cmat->cmap->N) {
1130     ierr = MatDestroy(&a->cmat);CHKERRQ(ierr);
1131   }
1132   ierr = MatDenseCUDAGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1133   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1134   if (!a->cmat) {
1135     ierr = MatCreateDenseCUDA(PetscObjectComm((PetscObject)A),A->rmap->n,PETSC_DECIDE,A->rmap->N,cend-cbegin,(PetscScalar*)a->ptrinuse + (size_t)cbegin * (size_t)lda,&a->cmat);CHKERRQ(ierr);
1136     ierr = MatDenseSetLDA(a->cmat,lda);CHKERRQ(ierr);
1137   } else {
1138     ierr = MatDenseCUDAPlaceArray(a->cmat,a->ptrinuse + (size_t)cbegin * (size_t)lda);CHKERRQ(ierr);
1139   }
1140   a->matinuse = cbegin + 1;
1141   *v = a->cmat;
1142   PetscFunctionReturn(0);
1143 }
1144 
1145 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDenseCUDA(Mat A,Mat *v)
1146 {
1147   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1148   PetscErrorCode ierr;
1149 
1150   PetscFunctionBegin;
1151   if (!a->matinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetSubMatrix() first");
1152   if (!a->cmat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column matrix");
1153   a->matinuse = 0;
1154   ierr = MatDenseCUDARestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1155   ierr = MatDenseCUDAResetArray(a->cmat);CHKERRQ(ierr);
1156   *v   = NULL;
1157   PetscFunctionReturn(0);
1158 }
1159 
1160 static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1161 {
1162   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1163   PetscErrorCode ierr;
1164 
1165   PetscFunctionBegin;
1166   if (l->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1167   if (l->matinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1168   ierr = MatDenseCUDAPlaceArray(l->A,a);CHKERRQ(ierr);
1169   PetscFunctionReturn(0);
1170 }
1171 
1172 static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A)
1173 {
1174   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1175   PetscErrorCode ierr;
1176 
1177   PetscFunctionBegin;
1178   if (l->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1179   if (l->matinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1180   ierr = MatDenseCUDAResetArray(l->A);CHKERRQ(ierr);
1181   PetscFunctionReturn(0);
1182 }
1183 
1184 static PetscErrorCode MatDenseCUDAReplaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1185 {
1186   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1187   PetscErrorCode ierr;
1188 
1189   PetscFunctionBegin;
1190   if (l->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1191   if (l->matinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1192   ierr = MatDenseCUDAReplaceArray(l->A,a);CHKERRQ(ierr);
1193   PetscFunctionReturn(0);
1194 }
1195 
1196 static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1197 {
1198   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1199   PetscErrorCode ierr;
1200 
1201   PetscFunctionBegin;
1202   ierr = MatDenseCUDAGetArrayWrite(l->A,a);CHKERRQ(ierr);
1203   PetscFunctionReturn(0);
1204 }
1205 
1206 static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1207 {
1208   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1209   PetscErrorCode ierr;
1210 
1211   PetscFunctionBegin;
1212   ierr = MatDenseCUDARestoreArrayWrite(l->A,a);CHKERRQ(ierr);
1213   PetscFunctionReturn(0);
1214 }
1215 
1216 static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1217 {
1218   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1219   PetscErrorCode ierr;
1220 
1221   PetscFunctionBegin;
1222   ierr = MatDenseCUDAGetArrayRead(l->A,a);CHKERRQ(ierr);
1223   PetscFunctionReturn(0);
1224 }
1225 
1226 static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1227 {
1228   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1229   PetscErrorCode ierr;
1230 
1231   PetscFunctionBegin;
1232   ierr = MatDenseCUDARestoreArrayRead(l->A,a);CHKERRQ(ierr);
1233   PetscFunctionReturn(0);
1234 }
1235 
1236 static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1237 {
1238   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1239   PetscErrorCode ierr;
1240 
1241   PetscFunctionBegin;
1242   ierr = MatDenseCUDAGetArray(l->A,a);CHKERRQ(ierr);
1243   PetscFunctionReturn(0);
1244 }
1245 
1246 static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1247 {
1248   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1249   PetscErrorCode ierr;
1250 
1251   PetscFunctionBegin;
1252   ierr = MatDenseCUDARestoreArray(l->A,a);CHKERRQ(ierr);
1253   PetscFunctionReturn(0);
1254 }
1255 
1256 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat,PetscInt,Vec*);
1257 static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat,PetscInt,Vec*);
1258 static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat,PetscInt,Vec*);
1259 static PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat,PetscInt,PetscInt,Mat*);
1260 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat,PetscInt,Vec*);
1261 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat,PetscInt,Vec*);
1262 static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat,PetscInt,Vec*);
1263 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat,Mat*);
1264 
1265 static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat,PetscBool bind)
1266 {
1267   Mat_MPIDense   *d = (Mat_MPIDense*)mat->data;
1268   PetscErrorCode ierr;
1269 
1270   PetscFunctionBegin;
1271   if (d->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1272   if (d->matinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1273   if (d->A) {
1274     ierr = MatBindToCPU(d->A,bind);CHKERRQ(ierr);
1275   }
1276   mat->boundtocpu = bind;
1277   if (!bind) {
1278     PetscBool iscuda;
1279 
1280     ierr = PetscObjectTypeCompare((PetscObject)d->cvec,VECMPICUDA,&iscuda);CHKERRQ(ierr);
1281     if (!iscuda) {
1282       ierr = VecDestroy(&d->cvec);CHKERRQ(ierr);
1283       ierr = MatDestroy(&d->cmat);CHKERRQ(ierr);
1284     }
1285     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDenseCUDA);CHKERRQ(ierr);
1286     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDenseCUDA);CHKERRQ(ierr);
1287     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr);
1288     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr);
1289     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr);
1290     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr);
1291     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",MatDenseGetSubMatrix_MPIDenseCUDA);CHKERRQ(ierr);
1292     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",MatDenseRestoreSubMatrix_MPIDenseCUDA);CHKERRQ(ierr);
1293   } else {
1294     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr);
1295     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr);
1296     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr);
1297     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr);
1298     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr);
1299     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr);
1300     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",MatDenseGetSubMatrix_MPIDense);CHKERRQ(ierr);
1301     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",MatDenseRestoreSubMatrix_MPIDense);CHKERRQ(ierr);
1302   }
1303   PetscFunctionReturn(0);
1304 }
1305 
1306 PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data)
1307 {
1308   Mat_MPIDense   *d = (Mat_MPIDense*)A->data;
1309   PetscErrorCode ierr;
1310   PetscBool      iscuda;
1311 
1312   PetscFunctionBegin;
1313   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1314   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr);
1315   if (!iscuda) PetscFunctionReturn(0);
1316   ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
1317   ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
1318   if (!d->A) {
1319     ierr = MatCreate(PETSC_COMM_SELF,&d->A);CHKERRQ(ierr);
1320     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)d->A);CHKERRQ(ierr);
1321     ierr = MatSetSizes(d->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N);CHKERRQ(ierr);
1322   }
1323   ierr = MatSetType(d->A,MATSEQDENSECUDA);CHKERRQ(ierr);
1324   ierr = MatSeqDenseCUDASetPreallocation(d->A,d_data);CHKERRQ(ierr);
1325   A->preallocated = PETSC_TRUE;
1326   PetscFunctionReturn(0);
1327 }
1328 #endif
1329 
1330 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1331 {
1332   Mat_MPIDense   *d = (Mat_MPIDense*)x->data;
1333   PetscErrorCode ierr;
1334 
1335   PetscFunctionBegin;
1336   ierr = MatSetRandom(d->A,rctx);CHKERRQ(ierr);
1337   PetscFunctionReturn(0);
1338 }
1339 
1340 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool  *missing,PetscInt *d)
1341 {
1342   PetscFunctionBegin;
1343   *missing = PETSC_FALSE;
1344   PetscFunctionReturn(0);
1345 }
1346 
1347 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat);
1348 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);
1349 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);
1350 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat);
1351 static PetscErrorCode MatEqual_MPIDense(Mat,Mat,PetscBool*);
1352 static PetscErrorCode MatLoad_MPIDense(Mat,PetscViewer);
1353 
1354 /* -------------------------------------------------------------------*/
1355 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1356                                         MatGetRow_MPIDense,
1357                                         MatRestoreRow_MPIDense,
1358                                         MatMult_MPIDense,
1359                                 /*  4*/ MatMultAdd_MPIDense,
1360                                         MatMultTranspose_MPIDense,
1361                                         MatMultTransposeAdd_MPIDense,
1362                                         0,
1363                                         0,
1364                                         0,
1365                                 /* 10*/ 0,
1366                                         0,
1367                                         0,
1368                                         0,
1369                                         MatTranspose_MPIDense,
1370                                 /* 15*/ MatGetInfo_MPIDense,
1371                                         MatEqual_MPIDense,
1372                                         MatGetDiagonal_MPIDense,
1373                                         MatDiagonalScale_MPIDense,
1374                                         MatNorm_MPIDense,
1375                                 /* 20*/ MatAssemblyBegin_MPIDense,
1376                                         MatAssemblyEnd_MPIDense,
1377                                         MatSetOption_MPIDense,
1378                                         MatZeroEntries_MPIDense,
1379                                 /* 24*/ MatZeroRows_MPIDense,
1380                                         0,
1381                                         0,
1382                                         0,
1383                                         0,
1384                                 /* 29*/ MatSetUp_MPIDense,
1385                                         0,
1386                                         0,
1387                                         MatGetDiagonalBlock_MPIDense,
1388                                         0,
1389                                 /* 34*/ MatDuplicate_MPIDense,
1390                                         0,
1391                                         0,
1392                                         0,
1393                                         0,
1394                                 /* 39*/ MatAXPY_MPIDense,
1395                                         MatCreateSubMatrices_MPIDense,
1396                                         0,
1397                                         MatGetValues_MPIDense,
1398                                         0,
1399                                 /* 44*/ 0,
1400                                         MatScale_MPIDense,
1401                                         MatShift_Basic,
1402                                         0,
1403                                         0,
1404                                 /* 49*/ MatSetRandom_MPIDense,
1405                                         0,
1406                                         0,
1407                                         0,
1408                                         0,
1409                                 /* 54*/ 0,
1410                                         0,
1411                                         0,
1412                                         0,
1413                                         0,
1414                                 /* 59*/ MatCreateSubMatrix_MPIDense,
1415                                         MatDestroy_MPIDense,
1416                                         MatView_MPIDense,
1417                                         0,
1418                                         0,
1419                                 /* 64*/ 0,
1420                                         0,
1421                                         0,
1422                                         0,
1423                                         0,
1424                                 /* 69*/ 0,
1425                                         0,
1426                                         0,
1427                                         0,
1428                                         0,
1429                                 /* 74*/ 0,
1430                                         0,
1431                                         0,
1432                                         0,
1433                                         0,
1434                                 /* 79*/ 0,
1435                                         0,
1436                                         0,
1437                                         0,
1438                                 /* 83*/ MatLoad_MPIDense,
1439                                         0,
1440                                         0,
1441                                         0,
1442                                         0,
1443                                         0,
1444                                 /* 89*/ 0,
1445                                         0,
1446                                         0,
1447                                         0,
1448                                         0,
1449                                 /* 94*/ 0,
1450                                         0,
1451                                         MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1452                                         MatMatTransposeMultNumeric_MPIDense_MPIDense,
1453                                         0,
1454                                 /* 99*/ MatProductSetFromOptions_MPIDense,
1455                                         0,
1456                                         0,
1457                                         MatConjugate_MPIDense,
1458                                         0,
1459                                 /*104*/ 0,
1460                                         MatRealPart_MPIDense,
1461                                         MatImaginaryPart_MPIDense,
1462                                         0,
1463                                         0,
1464                                 /*109*/ 0,
1465                                         0,
1466                                         0,
1467                                         MatGetColumnVector_MPIDense,
1468                                         MatMissingDiagonal_MPIDense,
1469                                 /*114*/ 0,
1470                                         0,
1471                                         0,
1472                                         0,
1473                                         0,
1474                                 /*119*/ 0,
1475                                         0,
1476                                         0,
1477                                         0,
1478                                         0,
1479                                 /*124*/ 0,
1480                                         MatGetColumnNorms_MPIDense,
1481                                         0,
1482                                         0,
1483                                         0,
1484                                 /*129*/ 0,
1485                                         0,
1486                                         MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1487                                         MatTransposeMatMultNumeric_MPIDense_MPIDense,
1488                                         0,
1489                                 /*134*/ 0,
1490                                         0,
1491                                         0,
1492                                         0,
1493                                         0,
1494                                 /*139*/ 0,
1495                                         0,
1496                                         0,
1497                                         0,
1498                                         0,
1499                                         MatCreateMPIMatConcatenateSeqMat_MPIDense,
1500                                 /*145*/ 0,
1501                                         0,
1502                                         0
1503 };
1504 
1505 PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1506 {
1507   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
1508   PetscBool      iscuda;
1509   PetscErrorCode ierr;
1510 
1511   PetscFunctionBegin;
1512   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
1513   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
1514   if (!a->A) {
1515     ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1516     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1517     ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr);
1518   }
1519   ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr);
1520   ierr = MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE);CHKERRQ(ierr);
1521   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1522   mat->preallocated = PETSC_TRUE;
1523   PetscFunctionReturn(0);
1524 }
1525 
1526 #if defined(PETSC_HAVE_ELEMENTAL)
1527 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1528 {
1529   Mat            mat_elemental;
1530   PetscErrorCode ierr;
1531   PetscScalar    *v;
1532   PetscInt       m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols;
1533 
1534   PetscFunctionBegin;
1535   if (reuse == MAT_REUSE_MATRIX) {
1536     mat_elemental = *newmat;
1537     ierr = MatZeroEntries(*newmat);CHKERRQ(ierr);
1538   } else {
1539     ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr);
1540     ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1541     ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr);
1542     ierr = MatSetUp(mat_elemental);CHKERRQ(ierr);
1543     ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
1544   }
1545 
1546   ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr);
1547   for (i=0; i<N; i++) cols[i] = i;
1548   for (i=0; i<m; i++) rows[i] = rstart + i;
1549 
1550   /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1551   ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr);
1552   ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr);
1553   ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1554   ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1555   ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr);
1556   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1557 
1558   if (reuse == MAT_INPLACE_MATRIX) {
1559     ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr);
1560   } else {
1561     *newmat = mat_elemental;
1562   }
1563   PetscFunctionReturn(0);
1564 }
1565 #endif
1566 
1567 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals)
1568 {
1569   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1570   PetscErrorCode ierr;
1571 
1572   PetscFunctionBegin;
1573   ierr = MatDenseGetColumn(mat->A,col,vals);CHKERRQ(ierr);
1574   PetscFunctionReturn(0);
1575 }
1576 
1577 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals)
1578 {
1579   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1580   PetscErrorCode ierr;
1581 
1582   PetscFunctionBegin;
1583   ierr = MatDenseRestoreColumn(mat->A,vals);CHKERRQ(ierr);
1584   PetscFunctionReturn(0);
1585 }
1586 
1587 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
1588 {
1589   PetscErrorCode ierr;
1590   Mat_MPIDense   *mat;
1591   PetscInt       m,nloc,N;
1592 
1593   PetscFunctionBegin;
1594   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
1595   ierr = MatGetLocalSize(inmat,NULL,&nloc);CHKERRQ(ierr);
1596   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1597     PetscInt sum;
1598 
1599     if (n == PETSC_DECIDE) {
1600       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
1601     }
1602     /* Check sum(n) = N */
1603     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
1604     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
1605 
1606     ierr = MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat);CHKERRQ(ierr);
1607   }
1608 
1609   /* numeric phase */
1610   mat = (Mat_MPIDense*)(*outmat)->data;
1611   ierr = MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1612   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1613   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1614   PetscFunctionReturn(0);
1615 }
1616 
1617 #if defined(PETSC_HAVE_CUDA)
1618 PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M,MatType type,MatReuse reuse,Mat *newmat)
1619 {
1620   Mat            B;
1621   Mat_MPIDense   *m;
1622   PetscErrorCode ierr;
1623 
1624   PetscFunctionBegin;
1625   if (reuse == MAT_INITIAL_MATRIX) {
1626     ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr);
1627   } else if (reuse == MAT_REUSE_MATRIX) {
1628     ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1629   }
1630 
1631   B    = *newmat;
1632   ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_TRUE);CHKERRQ(ierr);
1633   ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr);
1634   ierr = PetscStrallocpy(VECSTANDARD,&B->defaultvectype);CHKERRQ(ierr);
1635   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSE);CHKERRQ(ierr);
1636   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",NULL);CHKERRQ(ierr);
1637   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",NULL);CHKERRQ(ierr);
1638   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",NULL);CHKERRQ(ierr);
1639   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",NULL);CHKERRQ(ierr);
1640   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",NULL);CHKERRQ(ierr);
1641   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",NULL);CHKERRQ(ierr);
1642   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",NULL);CHKERRQ(ierr);
1643   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",NULL);CHKERRQ(ierr);
1644   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",NULL);CHKERRQ(ierr);
1645   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",NULL);CHKERRQ(ierr);
1646   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",NULL);CHKERRQ(ierr);
1647   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",NULL);CHKERRQ(ierr);
1648   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",NULL);CHKERRQ(ierr);
1649   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",NULL);CHKERRQ(ierr);
1650   m    = (Mat_MPIDense*)(B)->data;
1651   if (m->A) {
1652     ierr = MatConvert(m->A,MATSEQDENSE,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr);
1653     ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr);
1654   }
1655   B->ops->bindtocpu = NULL;
1656   B->offloadmask    = PETSC_OFFLOAD_CPU;
1657   PetscFunctionReturn(0);
1658 }
1659 
1660 PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M,MatType type,MatReuse reuse,Mat *newmat)
1661 {
1662   Mat            B;
1663   Mat_MPIDense   *m;
1664   PetscErrorCode ierr;
1665 
1666   PetscFunctionBegin;
1667   if (reuse == MAT_INITIAL_MATRIX) {
1668     ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr);
1669   } else if (reuse == MAT_REUSE_MATRIX) {
1670     ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1671   }
1672 
1673   B    = *newmat;
1674   ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr);
1675   ierr = PetscStrallocpy(VECCUDA,&B->defaultvectype);CHKERRQ(ierr);
1676   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSECUDA);CHKERRQ(ierr);
1677   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",                    MatConvert_MPIDenseCUDA_MPIDense);CHKERRQ(ierr);
1678   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",        MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
1679   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
1680   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",        MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
1681   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
1682   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",                                MatDenseCUDAGetArray_MPIDenseCUDA);CHKERRQ(ierr);
1683   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",                            MatDenseCUDAGetArrayRead_MPIDenseCUDA);CHKERRQ(ierr);
1684   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",                           MatDenseCUDAGetArrayWrite_MPIDenseCUDA);CHKERRQ(ierr);
1685   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",                            MatDenseCUDARestoreArray_MPIDenseCUDA);CHKERRQ(ierr);
1686   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",                        MatDenseCUDARestoreArrayRead_MPIDenseCUDA);CHKERRQ(ierr);
1687   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",                       MatDenseCUDARestoreArrayWrite_MPIDenseCUDA);CHKERRQ(ierr);
1688   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",                              MatDenseCUDAPlaceArray_MPIDenseCUDA);CHKERRQ(ierr);
1689   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",                              MatDenseCUDAResetArray_MPIDenseCUDA);CHKERRQ(ierr);
1690   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",                            MatDenseCUDAReplaceArray_MPIDenseCUDA);CHKERRQ(ierr);
1691   m    = (Mat_MPIDense*)(B)->data;
1692   if (m->A) {
1693     ierr = MatConvert(m->A,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr);
1694     ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr);
1695     B->offloadmask = PETSC_OFFLOAD_BOTH;
1696   } else {
1697     B->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
1698   }
1699   ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_FALSE);CHKERRQ(ierr);
1700 
1701   B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA;
1702   PetscFunctionReturn(0);
1703 }
1704 #endif
1705 
1706 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A,PetscInt col,Vec *v)
1707 {
1708   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1709   PetscErrorCode ierr;
1710   PetscInt       lda;
1711 
1712   PetscFunctionBegin;
1713   if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1714   if (a->matinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1715   if (!a->cvec) {
1716     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1717   }
1718   a->vecinuse = col + 1;
1719   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1720   ierr = MatDenseGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1721   ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1722   *v   = a->cvec;
1723   PetscFunctionReturn(0);
1724 }
1725 
1726 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A,PetscInt col,Vec *v)
1727 {
1728   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1729   PetscErrorCode ierr;
1730 
1731   PetscFunctionBegin;
1732   if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1733   if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1734   a->vecinuse = 0;
1735   ierr = MatDenseRestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1736   ierr = VecResetArray(a->cvec);CHKERRQ(ierr);
1737   *v   = NULL;
1738   PetscFunctionReturn(0);
1739 }
1740 
1741 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v)
1742 {
1743   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1744   PetscErrorCode ierr;
1745   PetscInt       lda;
1746 
1747   PetscFunctionBegin;
1748   if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1749   if (a->matinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1750   if (!a->cvec) {
1751     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1752   }
1753   a->vecinuse = col + 1;
1754   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1755   ierr = MatDenseGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
1756   ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1757   ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr);
1758   *v   = a->cvec;
1759   PetscFunctionReturn(0);
1760 }
1761 
1762 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v)
1763 {
1764   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1765   PetscErrorCode ierr;
1766 
1767   PetscFunctionBegin;
1768   if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1769   if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1770   a->vecinuse = 0;
1771   ierr = MatDenseRestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
1772   ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr);
1773   ierr = VecResetArray(a->cvec);CHKERRQ(ierr);
1774   *v   = NULL;
1775   PetscFunctionReturn(0);
1776 }
1777 
1778 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v)
1779 {
1780   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1781   PetscErrorCode ierr;
1782   PetscInt       lda;
1783 
1784   PetscFunctionBegin;
1785   if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1786   if (a->matinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1787   if (!a->cvec) {
1788     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1789   }
1790   a->vecinuse = col + 1;
1791   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1792   ierr = MatDenseGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1793   ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1794   *v   = a->cvec;
1795   PetscFunctionReturn(0);
1796 }
1797 
1798 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v)
1799 {
1800   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1801   PetscErrorCode ierr;
1802 
1803   PetscFunctionBegin;
1804   if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1805   if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1806   a->vecinuse = 0;
1807   ierr = MatDenseRestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1808   ierr = VecResetArray(a->cvec);CHKERRQ(ierr);
1809   *v   = NULL;
1810   PetscFunctionReturn(0);
1811 }
1812 
1813 PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A,PetscInt cbegin,PetscInt cend,Mat *v)
1814 {
1815   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1816   PetscInt       lda;
1817   PetscErrorCode ierr;
1818 
1819   PetscFunctionBegin;
1820   if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1821   if (a->matinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1822   if (a->cmat && cend-cbegin != a->cmat->cmap->N) {
1823     ierr = MatDestroy(&a->cmat);CHKERRQ(ierr);
1824   }
1825   ierr = MatDenseGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1826   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1827   if (!a->cmat) {
1828     ierr = MatCreateDense(PetscObjectComm((PetscObject)A),A->rmap->n,PETSC_DECIDE,A->rmap->N,cend-cbegin,(PetscScalar*)a->ptrinuse + (size_t)cbegin * (size_t)lda,&a->cmat);CHKERRQ(ierr);
1829     ierr = MatDenseSetLDA(a->cmat,lda);CHKERRQ(ierr);
1830   } else {
1831     ierr = MatDensePlaceArray(a->cmat,a->ptrinuse + (size_t)cbegin * (size_t)lda);CHKERRQ(ierr);
1832   }
1833   a->matinuse = cbegin + 1;
1834   *v = a->cmat;
1835   PetscFunctionReturn(0);
1836 }
1837 
1838 PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A,Mat *v)
1839 {
1840   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1841   PetscErrorCode ierr;
1842 
1843   PetscFunctionBegin;
1844   if (!a->matinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetSubMatrix() first");
1845   if (!a->cmat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal matrix");
1846   a->matinuse = 0;
1847   ierr = MatDenseRestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1848   ierr = MatDenseResetArray(a->cmat);CHKERRQ(ierr);
1849   *v   = NULL;
1850   PetscFunctionReturn(0);
1851 }
1852 
1853 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1854 {
1855   Mat_MPIDense   *a;
1856   PetscErrorCode ierr;
1857 
1858   PetscFunctionBegin;
1859   ierr      = PetscNewLog(mat,&a);CHKERRQ(ierr);
1860   mat->data = (void*)a;
1861   ierr      = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1862 
1863   mat->insertmode = NOT_SET_VALUES;
1864   ierr            = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);CHKERRQ(ierr);
1865   ierr            = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);CHKERRQ(ierr);
1866 
1867   /* build cache for off array entries formed */
1868   a->donotstash = PETSC_FALSE;
1869 
1870   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr);
1871 
1872   /* stuff used for matrix vector multiply */
1873   a->lvec        = 0;
1874   a->Mvctx       = 0;
1875   a->roworiented = PETSC_TRUE;
1876 
1877   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense);CHKERRQ(ierr);
1878   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",MatDenseSetLDA_MPIDense);CHKERRQ(ierr);
1879   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr);
1880   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr);
1881   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense);CHKERRQ(ierr);
1882   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense);CHKERRQ(ierr);
1883   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",MatDenseGetArrayWrite_MPIDense);CHKERRQ(ierr);
1884   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",MatDenseRestoreArrayWrite_MPIDense);CHKERRQ(ierr);
1885   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);CHKERRQ(ierr);
1886   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);CHKERRQ(ierr);
1887   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",MatDenseReplaceArray_MPIDense);CHKERRQ(ierr);
1888   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr);
1889   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr);
1890   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr);
1891   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr);
1892   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr);
1893   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr);
1894   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",MatDenseGetSubMatrix_MPIDense);CHKERRQ(ierr);
1895   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",MatDenseRestoreSubMatrix_MPIDense);CHKERRQ(ierr);
1896 #if defined(PETSC_HAVE_ELEMENTAL)
1897   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr);
1898 #endif
1899 #if defined(PETSC_HAVE_CUDA)
1900   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpidensecuda_C",MatConvert_MPIDense_MPIDenseCUDA);CHKERRQ(ierr);
1901 #endif
1902   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1903   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
1904   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
1905 #if defined(PETSC_HAVE_CUDA)
1906   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
1907   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
1908 #endif
1909 
1910   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense);CHKERRQ(ierr);
1911   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense);CHKERRQ(ierr);
1912   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1913   PetscFunctionReturn(0);
1914 }
1915 
1916 /*MC
1917    MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs.
1918 
1919    Options Database Keys:
1920 . -mat_type mpidensecuda - sets the matrix type to "mpidensecuda" during a call to MatSetFromOptions()
1921 
1922   Level: beginner
1923 
1924 .seealso:
1925 
1926 M*/
1927 #if defined(PETSC_HAVE_CUDA)
1928 PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B)
1929 {
1930   PetscErrorCode ierr;
1931 
1932   PetscFunctionBegin;
1933   ierr = MatCreate_MPIDense(B);CHKERRQ(ierr);
1934   ierr = MatConvert_MPIDense_MPIDenseCUDA(B,MATMPIDENSECUDA,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr);
1935   PetscFunctionReturn(0);
1936 }
1937 #endif
1938 
1939 /*MC
1940    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1941 
1942    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1943    and MATMPIDENSE otherwise.
1944 
1945    Options Database Keys:
1946 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1947 
1948   Level: beginner
1949 
1950 
1951 .seealso: MATSEQDENSE,MATMPIDENSE,MATDENSECUDA
1952 M*/
1953 
1954 /*MC
1955    MATDENSECUDA - MATDENSECUDA = "densecuda" - A matrix type to be used for dense matrices on GPUs.
1956 
1957    This matrix type is identical to MATSEQDENSECUDA when constructed with a single process communicator,
1958    and MATMPIDENSECUDA otherwise.
1959 
1960    Options Database Keys:
1961 . -mat_type densecuda - sets the matrix type to "densecuda" during a call to MatSetFromOptions()
1962 
1963   Level: beginner
1964 
1965 .seealso: MATSEQDENSECUDA,MATMPIDENSECUDA,MATDENSE
1966 M*/
1967 
1968 /*@C
1969    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1970 
1971    Not collective
1972 
1973    Input Parameters:
1974 .  B - the matrix
1975 -  data - optional location of matrix data.  Set data=NULL for PETSc
1976    to control all matrix memory allocation.
1977 
1978    Notes:
1979    The dense format is fully compatible with standard Fortran 77
1980    storage by columns.
1981 
1982    The data input variable is intended primarily for Fortran programmers
1983    who wish to allocate their own matrix memory space.  Most users should
1984    set data=NULL.
1985 
1986    Level: intermediate
1987 
1988 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1989 @*/
1990 PetscErrorCode  MatMPIDenseSetPreallocation(Mat B,PetscScalar *data)
1991 {
1992   PetscErrorCode ierr;
1993 
1994   PetscFunctionBegin;
1995   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
1996   ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr);
1997   PetscFunctionReturn(0);
1998 }
1999 
2000 /*@
2001    MatDensePlaceArray - Allows one to replace the array in a dense matrix with an
2002    array provided by the user. This is useful to avoid copying an array
2003    into a matrix
2004 
2005    Not Collective
2006 
2007    Input Parameters:
2008 +  mat - the matrix
2009 -  array - the array in column major order
2010 
2011    Notes:
2012    You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be
2013    freed when the matrix is destroyed.
2014 
2015    Level: developer
2016 
2017 .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
2018 
2019 @*/
2020 PetscErrorCode  MatDensePlaceArray(Mat mat,const PetscScalar *array)
2021 {
2022   PetscErrorCode ierr;
2023 
2024   PetscFunctionBegin;
2025   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2026   ierr = PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
2027   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2028 #if defined(PETSC_HAVE_CUDA)
2029   mat->offloadmask = PETSC_OFFLOAD_CPU;
2030 #endif
2031   PetscFunctionReturn(0);
2032 }
2033 
2034 /*@
2035    MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray()
2036 
2037    Not Collective
2038 
2039    Input Parameters:
2040 .  mat - the matrix
2041 
2042    Notes:
2043    You can only call this after a call to MatDensePlaceArray()
2044 
2045    Level: developer
2046 
2047 .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
2048 
2049 @*/
2050 PetscErrorCode  MatDenseResetArray(Mat mat)
2051 {
2052   PetscErrorCode ierr;
2053 
2054   PetscFunctionBegin;
2055   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2056   ierr = PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));CHKERRQ(ierr);
2057   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2058   PetscFunctionReturn(0);
2059 }
2060 
2061 /*@
2062    MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an
2063    array provided by the user. This is useful to avoid copying an array
2064    into a matrix
2065 
2066    Not Collective
2067 
2068    Input Parameters:
2069 +  mat - the matrix
2070 -  array - the array in column major order
2071 
2072    Notes:
2073    The memory passed in MUST be obtained with PetscMalloc() and CANNOT be
2074    freed by the user. It will be freed when the matrix is destroyed.
2075 
2076    Level: developer
2077 
2078 .seealso: MatDenseGetArray(), VecReplaceArray()
2079 @*/
2080 PetscErrorCode  MatDenseReplaceArray(Mat mat,const PetscScalar *array)
2081 {
2082   PetscErrorCode ierr;
2083 
2084   PetscFunctionBegin;
2085   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2086   ierr = PetscUseMethod(mat,"MatDenseReplaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
2087   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2088 #if defined(PETSC_HAVE_CUDA)
2089   mat->offloadmask = PETSC_OFFLOAD_CPU;
2090 #endif
2091   PetscFunctionReturn(0);
2092 }
2093 
2094 #if defined(PETSC_HAVE_CUDA)
2095 /*@C
2096    MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a dense matrix with an
2097    array provided by the user. This is useful to avoid copying an array
2098    into a matrix
2099 
2100    Not Collective
2101 
2102    Input Parameters:
2103 +  mat - the matrix
2104 -  array - the array in column major order
2105 
2106    Notes:
2107    You can return to the original array with a call to MatDenseCUDAResetArray(). The user is responsible for freeing this array; it will not be
2108    freed when the matrix is destroyed. The array must have been allocated with cudaMalloc().
2109 
2110    Level: developer
2111 
2112 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAResetArray()
2113 @*/
2114 PetscErrorCode  MatDenseCUDAPlaceArray(Mat mat,const PetscScalar *array)
2115 {
2116   PetscErrorCode ierr;
2117 
2118   PetscFunctionBegin;
2119   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2120   ierr = PetscUseMethod(mat,"MatDenseCUDAPlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
2121   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2122   mat->offloadmask = PETSC_OFFLOAD_GPU;
2123   PetscFunctionReturn(0);
2124 }
2125 
2126 /*@C
2127    MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to MatDenseCUDAPlaceArray()
2128 
2129    Not Collective
2130 
2131    Input Parameters:
2132 .  mat - the matrix
2133 
2134    Notes:
2135    You can only call this after a call to MatDenseCUDAPlaceArray()
2136 
2137    Level: developer
2138 
2139 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray()
2140 
2141 @*/
2142 PetscErrorCode  MatDenseCUDAResetArray(Mat mat)
2143 {
2144   PetscErrorCode ierr;
2145 
2146   PetscFunctionBegin;
2147   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2148   ierr = PetscUseMethod(mat,"MatDenseCUDAResetArray_C",(Mat),(mat));CHKERRQ(ierr);
2149   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2150   PetscFunctionReturn(0);
2151 }
2152 
2153 /*@C
2154    MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a dense matrix with an
2155    array provided by the user. This is useful to avoid copying an array
2156    into a matrix
2157 
2158    Not Collective
2159 
2160    Input Parameters:
2161 +  mat - the matrix
2162 -  array - the array in column major order
2163 
2164    Notes:
2165    This permanently replaces the GPU array and frees the memory associated with the old GPU array.
2166    The memory passed in CANNOT be freed by the user. It will be freed
2167    when the matrix is destroyed. The array should respect the matrix leading dimension.
2168 
2169    Level: developer
2170 
2171 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray(), MatDenseCUDAResetArray()
2172 @*/
2173 PetscErrorCode  MatDenseCUDAReplaceArray(Mat mat,const PetscScalar *array)
2174 {
2175   PetscErrorCode ierr;
2176 
2177   PetscFunctionBegin;
2178   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2179   ierr = PetscUseMethod(mat,"MatDenseCUDAReplaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
2180   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2181   mat->offloadmask = PETSC_OFFLOAD_GPU;
2182   PetscFunctionReturn(0);
2183 }
2184 
2185 /*@C
2186    MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a dense matrix.
2187 
2188    Not Collective
2189 
2190    Input Parameters:
2191 .  A - the matrix
2192 
2193    Output Parameters
2194 .  array - the GPU array in column major order
2195 
2196    Notes:
2197    The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use MatDenseCUDAGetArray(). The array must be restored with MatDenseCUDARestoreArrayWrite() when no longer needed.
2198 
2199    Level: developer
2200 
2201 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArrayRead()
2202 @*/
2203 PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a)
2204 {
2205   PetscErrorCode ierr;
2206 
2207   PetscFunctionBegin;
2208   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2209   ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2210   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2211   PetscFunctionReturn(0);
2212 }
2213 
2214 /*@C
2215    MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArrayWrite().
2216 
2217    Not Collective
2218 
2219    Input Parameters:
2220 +  A - the matrix
2221 -  array - the GPU array in column major order
2222 
2223    Notes:
2224 
2225    Level: developer
2226 
2227 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead()
2228 @*/
2229 PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a)
2230 {
2231   PetscErrorCode ierr;
2232 
2233   PetscFunctionBegin;
2234   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2235   ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2236   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2237   A->offloadmask = PETSC_OFFLOAD_GPU;
2238   PetscFunctionReturn(0);
2239 }
2240 
2241 /*@C
2242    MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArrayRead() when no longer needed.
2243 
2244    Not Collective
2245 
2246    Input Parameters:
2247 .  A - the matrix
2248 
2249    Output Parameters
2250 .  array - the GPU array in column major order
2251 
2252    Notes:
2253    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite().
2254 
2255    Level: developer
2256 
2257 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead()
2258 @*/
2259 PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a)
2260 {
2261   PetscErrorCode ierr;
2262 
2263   PetscFunctionBegin;
2264   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2265   ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr);
2266   PetscFunctionReturn(0);
2267 }
2268 
2269 /*@C
2270    MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a dense matrix previously obtained with a call to MatDenseCUDAGetArrayRead().
2271 
2272    Not Collective
2273 
2274    Input Parameters:
2275 +  A - the matrix
2276 -  array - the GPU array in column major order
2277 
2278    Notes:
2279    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite().
2280 
2281    Level: developer
2282 
2283 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDAGetArrayRead()
2284 @*/
2285 PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a)
2286 {
2287   PetscErrorCode ierr;
2288 
2289   PetscFunctionBegin;
2290   ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr);
2291   PetscFunctionReturn(0);
2292 }
2293 
2294 /*@C
2295    MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArray() when no longer needed.
2296 
2297    Not Collective
2298 
2299    Input Parameters:
2300 .  A - the matrix
2301 
2302    Output Parameters
2303 .  array - the GPU array in column major order
2304 
2305    Notes:
2306    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). For read-only access, use MatDenseCUDAGetArrayRead().
2307 
2308    Level: developer
2309 
2310 .seealso: MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead()
2311 @*/
2312 PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a)
2313 {
2314   PetscErrorCode ierr;
2315 
2316   PetscFunctionBegin;
2317   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2318   ierr = PetscUseMethod(A,"MatDenseCUDAGetArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2319   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2320   PetscFunctionReturn(0);
2321 }
2322 
2323 /*@C
2324    MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArray().
2325 
2326    Not Collective
2327 
2328    Input Parameters:
2329 +  A - the matrix
2330 -  array - the GPU array in column major order
2331 
2332    Notes:
2333 
2334    Level: developer
2335 
2336 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead()
2337 @*/
2338 PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a)
2339 {
2340   PetscErrorCode ierr;
2341 
2342   PetscFunctionBegin;
2343   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2344   ierr = PetscUseMethod(A,"MatDenseCUDARestoreArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2345   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2346   A->offloadmask = PETSC_OFFLOAD_GPU;
2347   PetscFunctionReturn(0);
2348 }
2349 #endif
2350 
2351 /*@C
2352    MatCreateDense - Creates a matrix in dense format.
2353 
2354    Collective
2355 
2356    Input Parameters:
2357 +  comm - MPI communicator
2358 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2359 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2360 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
2361 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
2362 -  data - optional location of matrix data.  Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
2363    to control all matrix memory allocation.
2364 
2365    Output Parameter:
2366 .  A - the matrix
2367 
2368    Notes:
2369    The dense format is fully compatible with standard Fortran 77
2370    storage by columns.
2371 
2372    The data input variable is intended primarily for Fortran programmers
2373    who wish to allocate their own matrix memory space.  Most users should
2374    set data=NULL (PETSC_NULL_SCALAR for Fortran users).
2375 
2376    The user MUST specify either the local or global matrix dimensions
2377    (possibly both).
2378 
2379    Level: intermediate
2380 
2381 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
2382 @*/
2383 PetscErrorCode  MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
2384 {
2385   PetscErrorCode ierr;
2386   PetscMPIInt    size;
2387 
2388   PetscFunctionBegin;
2389   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2390   PetscValidLogicalCollectiveBool(*A,!!data,6);
2391   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2392   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2393   if (size > 1) {
2394     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
2395     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
2396     if (data) {  /* user provided data array, so no need to assemble */
2397       ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr);
2398       (*A)->assembled = PETSC_TRUE;
2399     }
2400   } else {
2401     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
2402     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
2403   }
2404   PetscFunctionReturn(0);
2405 }
2406 
2407 #if defined(PETSC_HAVE_CUDA)
2408 /*@C
2409    MatCreateDenseCUDA - Creates a matrix in dense format using CUDA.
2410 
2411    Collective
2412 
2413    Input Parameters:
2414 +  comm - MPI communicator
2415 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2416 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2417 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
2418 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
2419 -  data - optional location of GPU matrix data.  Set data=NULL for PETSc
2420    to control matrix memory allocation.
2421 
2422    Output Parameter:
2423 .  A - the matrix
2424 
2425    Notes:
2426 
2427    Level: intermediate
2428 
2429 .seealso: MatCreate(), MatCreateDense()
2430 @*/
2431 PetscErrorCode  MatCreateDenseCUDA(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
2432 {
2433   PetscErrorCode ierr;
2434   PetscMPIInt    size;
2435 
2436   PetscFunctionBegin;
2437   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2438   PetscValidLogicalCollectiveBool(*A,!!data,6);
2439   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2440   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2441   if (size > 1) {
2442     ierr = MatSetType(*A,MATMPIDENSECUDA);CHKERRQ(ierr);
2443     ierr = MatMPIDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr);
2444     if (data) {  /* user provided data array, so no need to assemble */
2445       ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr);
2446       (*A)->assembled = PETSC_TRUE;
2447     }
2448   } else {
2449     ierr = MatSetType(*A,MATSEQDENSECUDA);CHKERRQ(ierr);
2450     ierr = MatSeqDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr);
2451   }
2452   PetscFunctionReturn(0);
2453 }
2454 #endif
2455 
2456 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
2457 {
2458   Mat            mat;
2459   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
2460   PetscErrorCode ierr;
2461 
2462   PetscFunctionBegin;
2463   *newmat = 0;
2464   ierr    = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr);
2465   ierr    = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
2466   ierr    = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
2467   a       = (Mat_MPIDense*)mat->data;
2468 
2469   mat->factortype   = A->factortype;
2470   mat->assembled    = PETSC_TRUE;
2471   mat->preallocated = PETSC_TRUE;
2472 
2473   a->size         = oldmat->size;
2474   a->rank         = oldmat->rank;
2475   mat->insertmode = NOT_SET_VALUES;
2476   a->donotstash   = oldmat->donotstash;
2477 
2478   ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr);
2479   ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr);
2480 
2481   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2482   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2483   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
2484 
2485   *newmat = mat;
2486   PetscFunctionReturn(0);
2487 }
2488 
2489 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
2490 {
2491   PetscErrorCode ierr;
2492   PetscBool      isbinary;
2493 #if defined(PETSC_HAVE_HDF5)
2494   PetscBool      ishdf5;
2495 #endif
2496 
2497   PetscFunctionBegin;
2498   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
2499   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
2500   /* force binary viewer to load .info file if it has not yet done so */
2501   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2502   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
2503 #if defined(PETSC_HAVE_HDF5)
2504   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
2505 #endif
2506   if (isbinary) {
2507     ierr = MatLoad_Dense_Binary(newMat,viewer);CHKERRQ(ierr);
2508 #if defined(PETSC_HAVE_HDF5)
2509   } else if (ishdf5) {
2510     ierr = MatLoad_Dense_HDF5(newMat,viewer);CHKERRQ(ierr);
2511 #endif
2512   } else SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
2513   PetscFunctionReturn(0);
2514 }
2515 
2516 static PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool  *flag)
2517 {
2518   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
2519   Mat            a,b;
2520   PetscBool      flg;
2521   PetscErrorCode ierr;
2522 
2523   PetscFunctionBegin;
2524   a    = matA->A;
2525   b    = matB->A;
2526   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
2527   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2528   PetscFunctionReturn(0);
2529 }
2530 
2531 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data)
2532 {
2533   PetscErrorCode        ierr;
2534   Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data;
2535 
2536   PetscFunctionBegin;
2537   ierr = PetscFree2(atb->sendbuf,atb->recvcounts);CHKERRQ(ierr);
2538   ierr = MatDestroy(&atb->atb);CHKERRQ(ierr);
2539   ierr = PetscFree(atb);CHKERRQ(ierr);
2540   PetscFunctionReturn(0);
2541 }
2542 
2543 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data)
2544 {
2545   PetscErrorCode        ierr;
2546   Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data;
2547 
2548   PetscFunctionBegin;
2549   ierr = PetscFree2(abt->buf[0],abt->buf[1]);CHKERRQ(ierr);
2550   ierr = PetscFree2(abt->recvcounts,abt->recvdispls);CHKERRQ(ierr);
2551   ierr = PetscFree(abt);CHKERRQ(ierr);
2552   PetscFunctionReturn(0);
2553 }
2554 
2555 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2556 {
2557   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2558   Mat_TransMatMultDense *atb;
2559   PetscErrorCode        ierr;
2560   MPI_Comm              comm;
2561   PetscMPIInt           size,*recvcounts;
2562   PetscScalar           *carray,*sendbuf;
2563   const PetscScalar     *atbarray;
2564   PetscInt              i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j;
2565   const PetscInt        *ranges;
2566 
2567   PetscFunctionBegin;
2568   MatCheckProduct(C,3);
2569   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2570   atb = (Mat_TransMatMultDense *)C->product->data;
2571   recvcounts = atb->recvcounts;
2572   sendbuf = atb->sendbuf;
2573 
2574   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2575   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2576 
2577   /* compute atbarray = aseq^T * bseq */
2578   ierr = MatTransposeMatMult(a->A,b->A,atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX,PETSC_DEFAULT,&atb->atb);CHKERRQ(ierr);
2579 
2580   ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr);
2581   for (i=0; i<size; i++) recvcounts[i] = (ranges[i+1] - ranges[i])*cN;
2582 
2583   /* arrange atbarray into sendbuf */
2584   ierr = MatDenseGetArrayRead(atb->atb,&atbarray);CHKERRQ(ierr);
2585   for (proc=0, k=0; proc<size; proc++) {
2586     for (j=0; j<cN; j++) {
2587       for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM];
2588     }
2589   }
2590   ierr = MatDenseRestoreArrayRead(atb->atb,&atbarray);CHKERRQ(ierr);
2591 
2592   /* sum all atbarray to local values of C */
2593   ierr = MatDenseGetArray(c->A,&carray);CHKERRQ(ierr);
2594   ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRQ(ierr);
2595   ierr = MatDenseRestoreArray(c->A,&carray);CHKERRQ(ierr);
2596   PetscFunctionReturn(0);
2597 }
2598 
2599 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)
2600 {
2601   PetscErrorCode        ierr;
2602   MPI_Comm              comm;
2603   PetscMPIInt           size;
2604   PetscInt              cm=A->cmap->n,cM,cN=B->cmap->N;
2605   Mat_TransMatMultDense *atb;
2606   PetscBool             cisdense;
2607 
2608   PetscFunctionBegin;
2609   MatCheckProduct(C,3);
2610   if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2611   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2612   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
2613     SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2614   }
2615 
2616   /* create matrix product C */
2617   ierr = MatSetSizes(C,cm,B->cmap->n,A->cmap->N,B->cmap->N);CHKERRQ(ierr);
2618   ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr);
2619   if (!cisdense) {
2620     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
2621   }
2622   ierr = MatSetUp(C);CHKERRQ(ierr);
2623   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2624   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2625 
2626   /* create data structure for reuse C */
2627   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2628   ierr = PetscNew(&atb);CHKERRQ(ierr);
2629   cM   = C->rmap->N;
2630   ierr = PetscMalloc2((size_t)cM*(size_t)cN,&atb->sendbuf,size,&atb->recvcounts);CHKERRQ(ierr);
2631 
2632   C->product->data    = atb;
2633   C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
2634   PetscFunctionReturn(0);
2635 }
2636 
2637 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2638 {
2639   PetscErrorCode        ierr;
2640   MPI_Comm              comm;
2641   PetscMPIInt           i, size;
2642   PetscInt              maxRows, bufsiz;
2643   PetscMPIInt           tag;
2644   PetscInt              alg;
2645   Mat_MatTransMultDense *abt;
2646   Mat_Product           *product = C->product;
2647   PetscBool             flg;
2648 
2649   PetscFunctionBegin;
2650   MatCheckProduct(C,4);
2651   if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2652   /* check local size of A and B */
2653   if (A->cmap->n != B->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local column dimensions are incompatible, A (%D) != B (%D)",A->cmap->n,B->cmap->n);
2654 
2655   ierr = PetscStrcmp(product->alg,"allgatherv",&flg);CHKERRQ(ierr);
2656   alg  = flg ? 0 : 1;
2657 
2658   /* setup matrix product C */
2659   ierr = MatSetSizes(C,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N);CHKERRQ(ierr);
2660   ierr = MatSetType(C,MATMPIDENSE);CHKERRQ(ierr);
2661   ierr = MatSetUp(C);CHKERRQ(ierr);
2662   ierr = PetscObjectGetNewTag((PetscObject)C,&tag);CHKERRQ(ierr);
2663   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2664   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2665 
2666   /* create data structure for reuse C */
2667   ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr);
2668   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2669   ierr = PetscNew(&abt);CHKERRQ(ierr);
2670   abt->tag = tag;
2671   abt->alg = alg;
2672   switch (alg) {
2673   case 1: /* alg: "cyclic" */
2674     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i]));
2675     bufsiz = A->cmap->N * maxRows;
2676     ierr = PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1]));CHKERRQ(ierr);
2677     break;
2678   default: /* alg: "allgatherv" */
2679     ierr = PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]));CHKERRQ(ierr);
2680     ierr = PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls));CHKERRQ(ierr);
2681     for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N;
2682     for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i];
2683     break;
2684   }
2685 
2686   C->product->data    = abt;
2687   C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
2688   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense;
2689   PetscFunctionReturn(0);
2690 }
2691 
2692 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
2693 {
2694   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2695   Mat_MatTransMultDense *abt;
2696   PetscErrorCode        ierr;
2697   MPI_Comm              comm;
2698   PetscMPIInt           rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom;
2699   PetscScalar           *sendbuf, *recvbuf=0, *cv;
2700   PetscInt              i,cK=A->cmap->N,k,j,bn;
2701   PetscScalar           _DOne=1.0,_DZero=0.0;
2702   const PetscScalar     *av,*bv;
2703   PetscBLASInt          cm, cn, ck, alda, blda, clda;
2704   MPI_Request           reqs[2];
2705   const PetscInt        *ranges;
2706 
2707   PetscFunctionBegin;
2708   MatCheckProduct(C,3);
2709   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2710   abt  = (Mat_MatTransMultDense*)C->product->data;
2711   ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr);
2712   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2713   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2714   ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr);
2715   ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr);
2716   ierr = MatDenseGetArray(c->A,&cv);CHKERRQ(ierr);
2717   ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr);
2718   ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr);
2719   ierr = MatDenseGetLDA(b->A,&i);CHKERRQ(ierr);
2720   ierr = PetscBLASIntCast(i,&blda);CHKERRQ(ierr);
2721   ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr);
2722   ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr);
2723   ierr = MatGetOwnershipRanges(B,&ranges);CHKERRQ(ierr);
2724   bn   = B->rmap->n;
2725   if (blda == bn) {
2726     sendbuf = (PetscScalar*)bv;
2727   } else {
2728     sendbuf = abt->buf[0];
2729     for (k = 0, i = 0; i < cK; i++) {
2730       for (j = 0; j < bn; j++, k++) {
2731         sendbuf[k] = bv[i * blda + j];
2732       }
2733     }
2734   }
2735   if (size > 1) {
2736     sendto = (rank + size - 1) % size;
2737     recvfrom = (rank + size + 1) % size;
2738   } else {
2739     sendto = recvfrom = 0;
2740   }
2741   ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr);
2742   ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr);
2743   recvisfrom = rank;
2744   for (i = 0; i < size; i++) {
2745     /* we have finished receiving in sending, bufs can be read/modified */
2746     PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2747     PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
2748 
2749     if (nextrecvisfrom != rank) {
2750       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2751       sendsiz = cK * bn;
2752       recvsiz = cK * nextbn;
2753       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
2754       ierr = MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]);CHKERRQ(ierr);
2755       ierr = MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]);CHKERRQ(ierr);
2756     }
2757 
2758     /* local aseq * sendbuf^T */
2759     ierr = PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn);CHKERRQ(ierr);
2760     if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,av,&alda,sendbuf,&cn,&_DZero,cv + clda * ranges[recvisfrom],&clda));
2761 
2762     if (nextrecvisfrom != rank) {
2763       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
2764       ierr = MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2765     }
2766     bn = nextbn;
2767     recvisfrom = nextrecvisfrom;
2768     sendbuf = recvbuf;
2769   }
2770   ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr);
2771   ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr);
2772   ierr = MatDenseRestoreArray(c->A,&cv);CHKERRQ(ierr);
2773   PetscFunctionReturn(0);
2774 }
2775 
2776 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
2777 {
2778   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2779   Mat_MatTransMultDense *abt;
2780   PetscErrorCode        ierr;
2781   MPI_Comm              comm;
2782   PetscMPIInt           size;
2783   PetscScalar           *cv, *sendbuf, *recvbuf;
2784   const PetscScalar     *av,*bv;
2785   PetscInt              blda,i,cK=A->cmap->N,k,j,bn;
2786   PetscScalar           _DOne=1.0,_DZero=0.0;
2787   PetscBLASInt          cm, cn, ck, alda, clda;
2788 
2789   PetscFunctionBegin;
2790   MatCheckProduct(C,3);
2791   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2792   abt  = (Mat_MatTransMultDense*)C->product->data;
2793   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2794   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2795   ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr);
2796   ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr);
2797   ierr = MatDenseGetArray(c->A,&cv);CHKERRQ(ierr);
2798   ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr);
2799   ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr);
2800   ierr = MatDenseGetLDA(b->A,&blda);CHKERRQ(ierr);
2801   ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr);
2802   ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr);
2803   /* copy transpose of B into buf[0] */
2804   bn      = B->rmap->n;
2805   sendbuf = abt->buf[0];
2806   recvbuf = abt->buf[1];
2807   for (k = 0, j = 0; j < bn; j++) {
2808     for (i = 0; i < cK; i++, k++) {
2809       sendbuf[k] = bv[i * blda + j];
2810     }
2811   }
2812   ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr);
2813   ierr = MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm);CHKERRQ(ierr);
2814   ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr);
2815   ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr);
2816   ierr = PetscBLASIntCast(c->A->cmap->n,&cn);CHKERRQ(ierr);
2817   if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,av,&alda,recvbuf,&ck,&_DZero,cv,&clda));
2818   ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr);
2819   ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr);
2820   ierr = MatDenseRestoreArray(c->A,&cv);CHKERRQ(ierr);
2821   PetscFunctionReturn(0);
2822 }
2823 
2824 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2825 {
2826   Mat_MatTransMultDense *abt;
2827   PetscErrorCode        ierr;
2828 
2829   PetscFunctionBegin;
2830   MatCheckProduct(C,3);
2831   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2832   abt = (Mat_MatTransMultDense*)C->product->data;
2833   switch (abt->alg) {
2834   case 1:
2835     ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C);CHKERRQ(ierr);
2836     break;
2837   default:
2838     ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C);CHKERRQ(ierr);
2839     break;
2840   }
2841   PetscFunctionReturn(0);
2842 }
2843 
2844 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data)
2845 {
2846   PetscErrorCode   ierr;
2847   Mat_MatMultDense *ab = (Mat_MatMultDense*)data;
2848 
2849   PetscFunctionBegin;
2850   ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr);
2851   ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr);
2852   ierr = MatDestroy(&ab->Be);CHKERRQ(ierr);
2853   ierr = PetscFree(ab);CHKERRQ(ierr);
2854   PetscFunctionReturn(0);
2855 }
2856 
2857 #if defined(PETSC_HAVE_ELEMENTAL)
2858 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2859 {
2860   PetscErrorCode   ierr;
2861   Mat_MatMultDense *ab;
2862 
2863   PetscFunctionBegin;
2864   MatCheckProduct(C,3);
2865   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Missing product data");
2866   ab   = (Mat_MatMultDense*)C->product->data;
2867   ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr);
2868   ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr);
2869   ierr = MatMatMultNumeric_Elemental(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr);
2870   ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
2871   PetscFunctionReturn(0);
2872 }
2873 
2874 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)
2875 {
2876   PetscErrorCode   ierr;
2877   Mat              Ae,Be,Ce;
2878   Mat_MatMultDense *ab;
2879 
2880   PetscFunctionBegin;
2881   MatCheckProduct(C,4);
2882   if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2883   /* check local size of A and B */
2884   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
2885     SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2886   }
2887 
2888   /* create elemental matrices Ae and Be */
2889   ierr = MatCreate(PetscObjectComm((PetscObject)A), &Ae);CHKERRQ(ierr);
2890   ierr = MatSetSizes(Ae,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
2891   ierr = MatSetType(Ae,MATELEMENTAL);CHKERRQ(ierr);
2892   ierr = MatSetUp(Ae);CHKERRQ(ierr);
2893   ierr = MatSetOption(Ae,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2894 
2895   ierr = MatCreate(PetscObjectComm((PetscObject)B), &Be);CHKERRQ(ierr);
2896   ierr = MatSetSizes(Be,PETSC_DECIDE,PETSC_DECIDE,B->rmap->N,B->cmap->N);CHKERRQ(ierr);
2897   ierr = MatSetType(Be,MATELEMENTAL);CHKERRQ(ierr);
2898   ierr = MatSetUp(Be);CHKERRQ(ierr);
2899   ierr = MatSetOption(Be,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2900 
2901   /* compute symbolic Ce = Ae*Be */
2902   ierr = MatCreate(PetscObjectComm((PetscObject)C),&Ce);CHKERRQ(ierr);
2903   ierr = MatMatMultSymbolic_Elemental(Ae,Be,fill,Ce);CHKERRQ(ierr);
2904 
2905   /* setup C */
2906   ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2907   ierr = MatSetType(C,MATDENSE);CHKERRQ(ierr);
2908   ierr = MatSetUp(C);CHKERRQ(ierr);
2909 
2910   /* create data structure for reuse Cdense */
2911   ierr = PetscNew(&ab);CHKERRQ(ierr);
2912   ab->Ae = Ae;
2913   ab->Be = Be;
2914   ab->Ce = Ce;
2915 
2916   C->product->data    = ab;
2917   C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense;
2918   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
2919   PetscFunctionReturn(0);
2920 }
2921 #endif
2922 /* ----------------------------------------------- */
2923 #if defined(PETSC_HAVE_ELEMENTAL)
2924 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C)
2925 {
2926   PetscFunctionBegin;
2927   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense;
2928   C->ops->productsymbolic = MatProductSymbolic_AB;
2929   PetscFunctionReturn(0);
2930 }
2931 #endif
2932 
2933 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C)
2934 {
2935   Mat_Product *product = C->product;
2936   Mat         A = product->A,B=product->B;
2937 
2938   PetscFunctionBegin;
2939   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend)
2940     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2941   C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense;
2942   C->ops->productsymbolic = MatProductSymbolic_AtB;
2943   PetscFunctionReturn(0);
2944 }
2945 
2946 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C)
2947 {
2948   PetscErrorCode ierr;
2949   Mat_Product    *product = C->product;
2950   const char     *algTypes[2] = {"allgatherv","cyclic"};
2951   PetscInt       alg,nalg = 2;
2952   PetscBool      flg = PETSC_FALSE;
2953 
2954   PetscFunctionBegin;
2955   /* Set default algorithm */
2956   alg = 0; /* default is allgatherv */
2957   ierr = PetscStrcmp(product->alg,"default",&flg);CHKERRQ(ierr);
2958   if (flg) {
2959     ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr);
2960   }
2961 
2962   /* Get runtime option */
2963   if (product->api_user) {
2964     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatTransposeMult","Mat");CHKERRQ(ierr);
2965     ierr = PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr);
2966     ierr = PetscOptionsEnd();CHKERRQ(ierr);
2967   } else {
2968     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABt","Mat");CHKERRQ(ierr);
2969     ierr = PetscOptionsEList("-matproduct_abt_mpidense_mpidense_via","Algorithmic approach","MatProduct_ABt",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr);
2970     ierr = PetscOptionsEnd();CHKERRQ(ierr);
2971   }
2972   if (flg) {
2973     ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr);
2974   }
2975 
2976   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense;
2977   C->ops->productsymbolic          = MatProductSymbolic_ABt;
2978   PetscFunctionReturn(0);
2979 }
2980 
2981 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C)
2982 {
2983   PetscErrorCode ierr;
2984   Mat_Product    *product = C->product;
2985 
2986   PetscFunctionBegin;
2987   switch (product->type) {
2988 #if defined(PETSC_HAVE_ELEMENTAL)
2989   case MATPRODUCT_AB:
2990     ierr = MatProductSetFromOptions_MPIDense_AB(C);CHKERRQ(ierr);
2991     break;
2992 #endif
2993   case MATPRODUCT_AtB:
2994     ierr = MatProductSetFromOptions_MPIDense_AtB(C);CHKERRQ(ierr);
2995     break;
2996   case MATPRODUCT_ABt:
2997     ierr = MatProductSetFromOptions_MPIDense_ABt(C);CHKERRQ(ierr);
2998     break;
2999   default:
3000     break;
3001   }
3002   PetscFunctionReturn(0);
3003 }
3004