xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 40badf4fbc550ac1f60bd080eaff6de6d55b946d)
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4 */
5 
6 #include <../src/mat/impls/dense/mpi/mpidense.h>    /*I   "petscmat.h"  I*/
7 #include <../src/mat/impls/aij/mpi/mpiaij.h>
8 #include <petscblaslapack.h>
9 
10 /*@
11 
12       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
13               matrix that represents the operator. For sequential matrices it returns itself.
14 
15     Input Parameter:
16 .      A - the Seq or MPI dense matrix
17 
18     Output Parameter:
19 .      B - the inner matrix
20 
21     Level: intermediate
22 
23 @*/
24 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
25 {
26   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
27   PetscBool      flg;
28 
29   PetscFunctionBegin;
30   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
31   PetscValidPointer(B,2);
32   CHKERRQ(PetscObjectBaseTypeCompare((PetscObject)A,MATMPIDENSE,&flg));
33   if (flg) *B = mat->A;
34   else {
35     CHKERRQ(PetscObjectBaseTypeCompare((PetscObject)A,MATSEQDENSE,&flg));
36     PetscCheckFalse(!flg,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Not for matrix type %s",((PetscObject)A)->type_name);
37     *B = A;
38   }
39   PetscFunctionReturn(0);
40 }
41 
42 PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s)
43 {
44   Mat_MPIDense   *Amat = (Mat_MPIDense*)A->data;
45   Mat_MPIDense   *Bmat = (Mat_MPIDense*)B->data;
46 
47   PetscFunctionBegin;
48   CHKERRQ(MatCopy(Amat->A,Bmat->A,s));
49   PetscFunctionReturn(0);
50 }
51 
52 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
53 {
54   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
55   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
56 
57   PetscFunctionBegin;
58   PetscCheckFalse(row < rstart || row >= rend,PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
59   lrow = row - rstart;
60   CHKERRQ(MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v));
61   PetscFunctionReturn(0);
62 }
63 
64 PetscErrorCode MatRestoreRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
65 {
66   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
67   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
68 
69   PetscFunctionBegin;
70   PetscCheckFalse(row < rstart || row >= rend,PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
71   lrow = row - rstart;
72   CHKERRQ(MatRestoreRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v));
73   PetscFunctionReturn(0);
74 }
75 
76 PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
77 {
78   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
79   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
80   PetscScalar    *array;
81   MPI_Comm       comm;
82   PetscBool      flg;
83   Mat            B;
84 
85   PetscFunctionBegin;
86   CHKERRQ(MatHasCongruentLayouts(A,&flg));
87   PetscCheckFalse(!flg,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
88   CHKERRQ(PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B));
89   if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */
90 
91     CHKERRQ(PetscObjectTypeCompare((PetscObject)mdn->A,MATSEQDENSECUDA,&flg));
92     PetscCheckFalse(flg,PETSC_COMM_SELF,PETSC_ERR_SUP,"Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature",MATSEQDENSECUDA);
93     CHKERRQ(PetscObjectGetComm((PetscObject)(mdn->A),&comm));
94     CHKERRQ(MatCreate(comm,&B));
95     CHKERRQ(MatSetSizes(B,m,m,m,m));
96     CHKERRQ(MatSetType(B,((PetscObject)mdn->A)->type_name));
97     CHKERRQ(MatDenseGetArrayRead(mdn->A,(const PetscScalar**)&array));
98     CHKERRQ(MatSeqDenseSetPreallocation(B,array+m*rstart));
99     CHKERRQ(MatDenseRestoreArrayRead(mdn->A,(const PetscScalar**)&array));
100     CHKERRQ(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
101     CHKERRQ(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
102     CHKERRQ(PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B));
103     *a   = B;
104     CHKERRQ(MatDestroy(&B));
105   } else *a = B;
106   PetscFunctionReturn(0);
107 }
108 
109 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
110 {
111   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
112   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
113   PetscBool      roworiented = A->roworiented;
114 
115   PetscFunctionBegin;
116   for (i=0; i<m; i++) {
117     if (idxm[i] < 0) continue;
118     PetscCheckFalse(idxm[i] >= mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
119     if (idxm[i] >= rstart && idxm[i] < rend) {
120       row = idxm[i] - rstart;
121       if (roworiented) {
122         CHKERRQ(MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv));
123       } else {
124         for (j=0; j<n; j++) {
125           if (idxn[j] < 0) continue;
126           PetscCheckFalse(idxn[j] >= mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
127           CHKERRQ(MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv));
128         }
129       }
130     } else if (!A->donotstash) {
131       mat->assembled = PETSC_FALSE;
132       if (roworiented) {
133         CHKERRQ(MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE));
134       } else {
135         CHKERRQ(MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE));
136       }
137     }
138   }
139   PetscFunctionReturn(0);
140 }
141 
142 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
143 {
144   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
145   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
146 
147   PetscFunctionBegin;
148   for (i=0; i<m; i++) {
149     if (idxm[i] < 0) continue; /* negative row */
150     PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
151     if (idxm[i] >= rstart && idxm[i] < rend) {
152       row = idxm[i] - rstart;
153       for (j=0; j<n; j++) {
154         if (idxn[j] < 0) continue; /* negative column */
155         PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
156         CHKERRQ(MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j));
157       }
158     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
159   }
160   PetscFunctionReturn(0);
161 }
162 
163 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda)
164 {
165   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
166 
167   PetscFunctionBegin;
168   CHKERRQ(MatDenseGetLDA(a->A,lda));
169   PetscFunctionReturn(0);
170 }
171 
172 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A,PetscInt lda)
173 {
174   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
175   PetscBool      iscuda;
176 
177   PetscFunctionBegin;
178   if (!a->A) {
179     PetscCheckFalse(a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
180     CHKERRQ(PetscLayoutSetUp(A->rmap));
181     CHKERRQ(PetscLayoutSetUp(A->cmap));
182     CHKERRQ(MatCreate(PETSC_COMM_SELF,&a->A));
183     CHKERRQ(PetscLogObjectParent((PetscObject)A,(PetscObject)a->A));
184     CHKERRQ(MatSetSizes(a->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N));
185     CHKERRQ(PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda));
186     CHKERRQ(MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE));
187   }
188   CHKERRQ(MatDenseSetLDA(a->A,lda));
189   PetscFunctionReturn(0);
190 }
191 
192 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar **array)
193 {
194   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
195 
196   PetscFunctionBegin;
197   PetscCheckFalse(a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
198   CHKERRQ(MatDenseGetArray(a->A,array));
199   PetscFunctionReturn(0);
200 }
201 
202 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar **array)
203 {
204   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
205 
206   PetscFunctionBegin;
207   PetscCheckFalse(a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
208   CHKERRQ(MatDenseGetArrayRead(a->A,array));
209   PetscFunctionReturn(0);
210 }
211 
212 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A,PetscScalar **array)
213 {
214   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
215 
216   PetscFunctionBegin;
217   PetscCheckFalse(a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
218   CHKERRQ(MatDenseGetArrayWrite(a->A,array));
219   PetscFunctionReturn(0);
220 }
221 
222 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar *array)
223 {
224   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
225 
226   PetscFunctionBegin;
227   PetscCheckFalse(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
228   PetscCheckFalse(a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
229   CHKERRQ(MatDensePlaceArray(a->A,array));
230   PetscFunctionReturn(0);
231 }
232 
233 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
234 {
235   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
236 
237   PetscFunctionBegin;
238   PetscCheckFalse(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
239   PetscCheckFalse(a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
240   CHKERRQ(MatDenseResetArray(a->A));
241   PetscFunctionReturn(0);
242 }
243 
244 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A,const PetscScalar *array)
245 {
246   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
247 
248   PetscFunctionBegin;
249   PetscCheckFalse(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
250   PetscCheckFalse(a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
251   CHKERRQ(MatDenseReplaceArray(a->A,array));
252   PetscFunctionReturn(0);
253 }
254 
255 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
256 {
257   Mat_MPIDense      *mat  = (Mat_MPIDense*)A->data,*newmatd;
258   PetscInt          lda,i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
259   const PetscInt    *irow,*icol;
260   const PetscScalar *v;
261   PetscScalar       *bv;
262   Mat               newmat;
263   IS                iscol_local;
264   MPI_Comm          comm_is,comm_mat;
265 
266   PetscFunctionBegin;
267   CHKERRQ(PetscObjectGetComm((PetscObject)A,&comm_mat));
268   CHKERRQ(PetscObjectGetComm((PetscObject)iscol,&comm_is));
269   PetscCheckFalse(comm_mat != comm_is,PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator");
270 
271   CHKERRQ(ISAllGather(iscol,&iscol_local));
272   CHKERRQ(ISGetIndices(isrow,&irow));
273   CHKERRQ(ISGetIndices(iscol_local,&icol));
274   CHKERRQ(ISGetLocalSize(isrow,&nrows));
275   CHKERRQ(ISGetLocalSize(iscol,&ncols));
276   CHKERRQ(ISGetSize(iscol,&Ncols)); /* global number of columns, size of iscol_local */
277 
278   /* No parallel redistribution currently supported! Should really check each index set
279      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
280      original matrix! */
281 
282   CHKERRQ(MatGetLocalSize(A,&nlrows,&nlcols));
283   CHKERRQ(MatGetOwnershipRange(A,&rstart,&rend));
284 
285   /* Check submatrix call */
286   if (scall == MAT_REUSE_MATRIX) {
287     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
288     /* Really need to test rows and column sizes! */
289     newmat = *B;
290   } else {
291     /* Create and fill new matrix */
292     CHKERRQ(MatCreate(PetscObjectComm((PetscObject)A),&newmat));
293     CHKERRQ(MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols));
294     CHKERRQ(MatSetType(newmat,((PetscObject)A)->type_name));
295     CHKERRQ(MatMPIDenseSetPreallocation(newmat,NULL));
296   }
297 
298   /* Now extract the data pointers and do the copy, column at a time */
299   newmatd = (Mat_MPIDense*)newmat->data;
300   CHKERRQ(MatDenseGetArray(newmatd->A,&bv));
301   CHKERRQ(MatDenseGetArrayRead(mat->A,&v));
302   CHKERRQ(MatDenseGetLDA(mat->A,&lda));
303   for (i=0; i<Ncols; i++) {
304     const PetscScalar *av = v + lda*icol[i];
305     for (j=0; j<nrows; j++) {
306       *bv++ = av[irow[j] - rstart];
307     }
308   }
309   CHKERRQ(MatDenseRestoreArrayRead(mat->A,&v));
310   CHKERRQ(MatDenseRestoreArray(newmatd->A,&bv));
311 
312   /* Assemble the matrices so that the correct flags are set */
313   CHKERRQ(MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY));
314   CHKERRQ(MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY));
315 
316   /* Free work space */
317   CHKERRQ(ISRestoreIndices(isrow,&irow));
318   CHKERRQ(ISRestoreIndices(iscol_local,&icol));
319   CHKERRQ(ISDestroy(&iscol_local));
320   *B   = newmat;
321   PetscFunctionReturn(0);
322 }
323 
324 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar **array)
325 {
326   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
327 
328   PetscFunctionBegin;
329   CHKERRQ(MatDenseRestoreArray(a->A,array));
330   PetscFunctionReturn(0);
331 }
332 
333 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar **array)
334 {
335   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
336 
337   PetscFunctionBegin;
338   CHKERRQ(MatDenseRestoreArrayRead(a->A,array));
339   PetscFunctionReturn(0);
340 }
341 
342 PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A,PetscScalar **array)
343 {
344   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
345 
346   PetscFunctionBegin;
347   CHKERRQ(MatDenseRestoreArrayWrite(a->A,array));
348   PetscFunctionReturn(0);
349 }
350 
351 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
352 {
353   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
354   PetscInt       nstash,reallocs;
355 
356   PetscFunctionBegin;
357   if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
358 
359   CHKERRQ(MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range));
360   CHKERRQ(MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs));
361   CHKERRQ(PetscInfo(mdn->A,"Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs));
362   PetscFunctionReturn(0);
363 }
364 
365 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
366 {
367   Mat_MPIDense   *mdn=(Mat_MPIDense*)mat->data;
368   PetscInt       i,*row,*col,flg,j,rstart,ncols;
369   PetscMPIInt    n;
370   PetscScalar    *val;
371 
372   PetscFunctionBegin;
373   if (!mdn->donotstash && !mat->nooffprocentries) {
374     /*  wait on receives */
375     while (1) {
376       CHKERRQ(MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg));
377       if (!flg) break;
378 
379       for (i=0; i<n;) {
380         /* Now identify the consecutive vals belonging to the same row */
381         for (j=i,rstart=row[j]; j<n; j++) {
382           if (row[j] != rstart) break;
383         }
384         if (j < n) ncols = j-i;
385         else       ncols = n-i;
386         /* Now assemble all these values with a single function call */
387         CHKERRQ(MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode));
388         i    = j;
389       }
390     }
391     CHKERRQ(MatStashScatterEnd_Private(&mat->stash));
392   }
393 
394   CHKERRQ(MatAssemblyBegin(mdn->A,mode));
395   CHKERRQ(MatAssemblyEnd(mdn->A,mode));
396 
397   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
398     CHKERRQ(MatSetUpMultiply_MPIDense(mat));
399   }
400   PetscFunctionReturn(0);
401 }
402 
403 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
404 {
405   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
406 
407   PetscFunctionBegin;
408   CHKERRQ(MatZeroEntries(l->A));
409   PetscFunctionReturn(0);
410 }
411 
412 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt n,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
413 {
414   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
415   PetscInt          i,len,*lrows;
416 
417   PetscFunctionBegin;
418   /* get locally owned rows */
419   CHKERRQ(PetscLayoutMapLocal(A->rmap,n,rows,&len,&lrows,NULL));
420   /* fix right hand side if needed */
421   if (x && b) {
422     const PetscScalar *xx;
423     PetscScalar       *bb;
424 
425     CHKERRQ(VecGetArrayRead(x, &xx));
426     CHKERRQ(VecGetArrayWrite(b, &bb));
427     for (i=0;i<len;++i) bb[lrows[i]] = diag*xx[lrows[i]];
428     CHKERRQ(VecRestoreArrayRead(x, &xx));
429     CHKERRQ(VecRestoreArrayWrite(b, &bb));
430   }
431   CHKERRQ(MatZeroRows(l->A,len,lrows,0.0,NULL,NULL));
432   if (diag != 0.0) {
433     Vec d;
434 
435     CHKERRQ(MatCreateVecs(A,NULL,&d));
436     CHKERRQ(VecSet(d,diag));
437     CHKERRQ(MatDiagonalSet(A,d,INSERT_VALUES));
438     CHKERRQ(VecDestroy(&d));
439   }
440   CHKERRQ(PetscFree(lrows));
441   PetscFunctionReturn(0);
442 }
443 
444 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec);
445 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec);
446 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec);
447 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec);
448 
449 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
450 {
451   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
452   const PetscScalar *ax;
453   PetscScalar       *ay;
454   PetscMemType      axmtype,aymtype;
455 
456   PetscFunctionBegin;
457   CHKERRQ(VecGetArrayReadAndMemType(xx,&ax,&axmtype));
458   CHKERRQ(VecGetArrayAndMemType(mdn->lvec,&ay,&aymtype));
459   CHKERRQ(PetscSFBcastWithMemTypeBegin(mdn->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPI_REPLACE));
460   CHKERRQ(PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay,MPI_REPLACE));
461   CHKERRQ(VecRestoreArrayAndMemType(mdn->lvec,&ay));
462   CHKERRQ(VecRestoreArrayReadAndMemType(xx,&ax));
463   CHKERRQ((*mdn->A->ops->mult)(mdn->A,mdn->lvec,yy));
464   PetscFunctionReturn(0);
465 }
466 
467 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
468 {
469   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
470   const PetscScalar *ax;
471   PetscScalar       *ay;
472   PetscMemType      axmtype,aymtype;
473 
474   PetscFunctionBegin;
475   CHKERRQ(VecGetArrayReadAndMemType(xx,&ax,&axmtype));
476   CHKERRQ(VecGetArrayAndMemType(mdn->lvec,&ay,&aymtype));
477   CHKERRQ(PetscSFBcastWithMemTypeBegin(mdn->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPI_REPLACE));
478   CHKERRQ(PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay,MPI_REPLACE));
479   CHKERRQ(VecRestoreArrayAndMemType(mdn->lvec,&ay));
480   CHKERRQ(VecRestoreArrayReadAndMemType(xx,&ax));
481   CHKERRQ((*mdn->A->ops->multadd)(mdn->A,mdn->lvec,yy,zz));
482   PetscFunctionReturn(0);
483 }
484 
485 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
486 {
487   Mat_MPIDense      *a = (Mat_MPIDense*)A->data;
488   const PetscScalar *ax;
489   PetscScalar       *ay;
490   PetscMemType      axmtype,aymtype;
491 
492   PetscFunctionBegin;
493   CHKERRQ(VecSet(yy,0.0));
494   CHKERRQ((*a->A->ops->multtranspose)(a->A,xx,a->lvec));
495   CHKERRQ(VecGetArrayReadAndMemType(a->lvec,&ax,&axmtype));
496   CHKERRQ(VecGetArrayAndMemType(yy,&ay,&aymtype));
497   CHKERRQ(PetscSFReduceWithMemTypeBegin(a->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPIU_SUM));
498   CHKERRQ(PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM));
499   CHKERRQ(VecRestoreArrayReadAndMemType(a->lvec,&ax));
500   CHKERRQ(VecRestoreArrayAndMemType(yy,&ay));
501   PetscFunctionReturn(0);
502 }
503 
504 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
505 {
506   Mat_MPIDense      *a = (Mat_MPIDense*)A->data;
507   const PetscScalar *ax;
508   PetscScalar       *ay;
509   PetscMemType      axmtype,aymtype;
510 
511   PetscFunctionBegin;
512   CHKERRQ(VecCopy(yy,zz));
513   CHKERRQ((*a->A->ops->multtranspose)(a->A,xx,a->lvec));
514   CHKERRQ(VecGetArrayReadAndMemType(a->lvec,&ax,&axmtype));
515   CHKERRQ(VecGetArrayAndMemType(zz,&ay,&aymtype));
516   CHKERRQ(PetscSFReduceWithMemTypeBegin(a->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPIU_SUM));
517   CHKERRQ(PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM));
518   CHKERRQ(VecRestoreArrayReadAndMemType(a->lvec,&ax));
519   CHKERRQ(VecRestoreArrayAndMemType(zz,&ay));
520   PetscFunctionReturn(0);
521 }
522 
523 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
524 {
525   Mat_MPIDense      *a    = (Mat_MPIDense*)A->data;
526   PetscInt          lda,len,i,n,m = A->rmap->n,radd;
527   PetscScalar       *x,zero = 0.0;
528   const PetscScalar *av;
529 
530   PetscFunctionBegin;
531   CHKERRQ(VecSet(v,zero));
532   CHKERRQ(VecGetArray(v,&x));
533   CHKERRQ(VecGetSize(v,&n));
534   PetscCheckFalse(n != A->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
535   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
536   radd = A->rmap->rstart*m;
537   CHKERRQ(MatDenseGetArrayRead(a->A,&av));
538   CHKERRQ(MatDenseGetLDA(a->A,&lda));
539   for (i=0; i<len; i++) {
540     x[i] = av[radd + i*lda + i];
541   }
542   CHKERRQ(MatDenseRestoreArrayRead(a->A,&av));
543   CHKERRQ(VecRestoreArray(v,&x));
544   PetscFunctionReturn(0);
545 }
546 
547 PetscErrorCode MatDestroy_MPIDense(Mat mat)
548 {
549   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
550 
551   PetscFunctionBegin;
552 #if defined(PETSC_USE_LOG)
553   PetscLogObjectState((PetscObject)mat,"Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT,mat->rmap->N,mat->cmap->N);
554 #endif
555   CHKERRQ(MatStashDestroy_Private(&mat->stash));
556   PetscCheckFalse(mdn->vecinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
557   PetscCheckFalse(mdn->matinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
558   CHKERRQ(MatDestroy(&mdn->A));
559   CHKERRQ(VecDestroy(&mdn->lvec));
560   CHKERRQ(PetscSFDestroy(&mdn->Mvctx));
561   CHKERRQ(VecDestroy(&mdn->cvec));
562   CHKERRQ(MatDestroy(&mdn->cmat));
563 
564   CHKERRQ(PetscFree(mat->data));
565   CHKERRQ(PetscObjectChangeTypeName((PetscObject)mat,NULL));
566 
567   CHKERRQ(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL));
568   CHKERRQ(PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",NULL));
569   CHKERRQ(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL));
570   CHKERRQ(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL));
571   CHKERRQ(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL));
572   CHKERRQ(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL));
573   CHKERRQ(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",NULL));
574   CHKERRQ(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",NULL));
575   CHKERRQ(PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL));
576   CHKERRQ(PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL));
577   CHKERRQ(PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",NULL));
578   CHKERRQ(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",NULL));
579   CHKERRQ(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpiaij_C",NULL));
580 #if defined(PETSC_HAVE_ELEMENTAL)
581   CHKERRQ(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL));
582 #endif
583 #if defined(PETSC_HAVE_SCALAPACK)
584   CHKERRQ(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",NULL));
585 #endif
586   CHKERRQ(PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL));
587   CHKERRQ(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",NULL));
588   CHKERRQ(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",NULL));
589 #if defined (PETSC_HAVE_CUDA)
590   CHKERRQ(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",NULL));
591   CHKERRQ(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",NULL));
592 #endif
593   CHKERRQ(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL));
594   CHKERRQ(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL));
595   CHKERRQ(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",NULL));
596   CHKERRQ(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",NULL));
597   CHKERRQ(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",NULL));
598   CHKERRQ(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",NULL));
599   CHKERRQ(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",NULL));
600   CHKERRQ(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",NULL));
601   CHKERRQ(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",NULL));
602   CHKERRQ(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",NULL));
603   PetscFunctionReturn(0);
604 }
605 
606 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat,PetscViewer);
607 
608 #include <petscdraw.h>
609 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
610 {
611   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
612   PetscMPIInt       rank;
613   PetscViewerType   vtype;
614   PetscBool         iascii,isdraw;
615   PetscViewer       sviewer;
616   PetscViewerFormat format;
617 
618   PetscFunctionBegin;
619   CHKERRMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank));
620   CHKERRQ(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
621   CHKERRQ(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw));
622   if (iascii) {
623     CHKERRQ(PetscViewerGetType(viewer,&vtype));
624     CHKERRQ(PetscViewerGetFormat(viewer,&format));
625     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
626       MatInfo info;
627       CHKERRQ(MatGetInfo(mat,MAT_LOCAL,&info));
628       CHKERRQ(PetscViewerASCIIPushSynchronized(viewer));
629       CHKERRQ(PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory));
630       CHKERRQ(PetscViewerFlush(viewer));
631       CHKERRQ(PetscViewerASCIIPopSynchronized(viewer));
632       CHKERRQ(PetscSFView(mdn->Mvctx,viewer));
633       PetscFunctionReturn(0);
634     } else if (format == PETSC_VIEWER_ASCII_INFO) {
635       PetscFunctionReturn(0);
636     }
637   } else if (isdraw) {
638     PetscDraw draw;
639     PetscBool isnull;
640 
641     CHKERRQ(PetscViewerDrawGetDraw(viewer,0,&draw));
642     CHKERRQ(PetscDrawIsNull(draw,&isnull));
643     if (isnull) PetscFunctionReturn(0);
644   }
645 
646   {
647     /* assemble the entire matrix onto first processor. */
648     Mat         A;
649     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
650     PetscInt    *cols;
651     PetscScalar *vals;
652 
653     CHKERRQ(MatCreate(PetscObjectComm((PetscObject)mat),&A));
654     if (rank == 0) {
655       CHKERRQ(MatSetSizes(A,M,N,M,N));
656     } else {
657       CHKERRQ(MatSetSizes(A,0,0,M,N));
658     }
659     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
660     CHKERRQ(MatSetType(A,MATMPIDENSE));
661     CHKERRQ(MatMPIDenseSetPreallocation(A,NULL));
662     CHKERRQ(PetscLogObjectParent((PetscObject)mat,(PetscObject)A));
663 
664     /* Copy the matrix ... This isn't the most efficient means,
665        but it's quick for now */
666     A->insertmode = INSERT_VALUES;
667 
668     row = mat->rmap->rstart;
669     m   = mdn->A->rmap->n;
670     for (i=0; i<m; i++) {
671       CHKERRQ(MatGetRow_MPIDense(mat,row,&nz,&cols,&vals));
672       CHKERRQ(MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES));
673       CHKERRQ(MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals));
674       row++;
675     }
676 
677     CHKERRQ(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY));
678     CHKERRQ(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY));
679     CHKERRQ(PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer));
680     if (rank == 0) {
681       CHKERRQ(PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name));
682       CHKERRQ(MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer));
683     }
684     CHKERRQ(PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer));
685     CHKERRQ(PetscViewerFlush(viewer));
686     CHKERRQ(MatDestroy(&A));
687   }
688   PetscFunctionReturn(0);
689 }
690 
691 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
692 {
693   PetscBool      iascii,isbinary,isdraw,issocket;
694 
695   PetscFunctionBegin;
696   CHKERRQ(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
697   CHKERRQ(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
698   CHKERRQ(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket));
699   CHKERRQ(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw));
700 
701   if (iascii || issocket || isdraw) {
702     CHKERRQ(MatView_MPIDense_ASCIIorDraworSocket(mat,viewer));
703   } else if (isbinary) {
704     CHKERRQ(MatView_Dense_Binary(mat,viewer));
705   }
706   PetscFunctionReturn(0);
707 }
708 
709 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
710 {
711   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
712   Mat            mdn  = mat->A;
713   PetscLogDouble isend[5],irecv[5];
714 
715   PetscFunctionBegin;
716   info->block_size = 1.0;
717 
718   CHKERRQ(MatGetInfo(mdn,MAT_LOCAL,info));
719 
720   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
721   isend[3] = info->memory;  isend[4] = info->mallocs;
722   if (flag == MAT_LOCAL) {
723     info->nz_used      = isend[0];
724     info->nz_allocated = isend[1];
725     info->nz_unneeded  = isend[2];
726     info->memory       = isend[3];
727     info->mallocs      = isend[4];
728   } else if (flag == MAT_GLOBAL_MAX) {
729     CHKERRMPI(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)A)));
730 
731     info->nz_used      = irecv[0];
732     info->nz_allocated = irecv[1];
733     info->nz_unneeded  = irecv[2];
734     info->memory       = irecv[3];
735     info->mallocs      = irecv[4];
736   } else if (flag == MAT_GLOBAL_SUM) {
737     CHKERRMPI(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)A)));
738 
739     info->nz_used      = irecv[0];
740     info->nz_allocated = irecv[1];
741     info->nz_unneeded  = irecv[2];
742     info->memory       = irecv[3];
743     info->mallocs      = irecv[4];
744   }
745   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
746   info->fill_ratio_needed = 0;
747   info->factor_mallocs    = 0;
748   PetscFunctionReturn(0);
749 }
750 
751 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
752 {
753   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
754 
755   PetscFunctionBegin;
756   switch (op) {
757   case MAT_NEW_NONZERO_LOCATIONS:
758   case MAT_NEW_NONZERO_LOCATION_ERR:
759   case MAT_NEW_NONZERO_ALLOCATION_ERR:
760     MatCheckPreallocated(A,1);
761     CHKERRQ(MatSetOption(a->A,op,flg));
762     break;
763   case MAT_ROW_ORIENTED:
764     MatCheckPreallocated(A,1);
765     a->roworiented = flg;
766     CHKERRQ(MatSetOption(a->A,op,flg));
767     break;
768   case MAT_FORCE_DIAGONAL_ENTRIES:
769   case MAT_KEEP_NONZERO_PATTERN:
770   case MAT_USE_HASH_TABLE:
771   case MAT_SORTED_FULL:
772     CHKERRQ(PetscInfo(A,"Option %s ignored\n",MatOptions[op]));
773     break;
774   case MAT_IGNORE_OFF_PROC_ENTRIES:
775     a->donotstash = flg;
776     break;
777   case MAT_SYMMETRIC:
778   case MAT_STRUCTURALLY_SYMMETRIC:
779   case MAT_HERMITIAN:
780   case MAT_SYMMETRY_ETERNAL:
781   case MAT_IGNORE_LOWER_TRIANGULAR:
782   case MAT_IGNORE_ZERO_ENTRIES:
783     CHKERRQ(PetscInfo(A,"Option %s ignored\n",MatOptions[op]));
784     break;
785   default:
786     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
787   }
788   PetscFunctionReturn(0);
789 }
790 
791 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
792 {
793   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
794   const PetscScalar *l;
795   PetscScalar       x,*v,*vv,*r;
796   PetscInt          i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n,lda;
797 
798   PetscFunctionBegin;
799   CHKERRQ(MatDenseGetArray(mdn->A,&vv));
800   CHKERRQ(MatDenseGetLDA(mdn->A,&lda));
801   CHKERRQ(MatGetLocalSize(A,&s2,&s3));
802   if (ll) {
803     CHKERRQ(VecGetLocalSize(ll,&s2a));
804     PetscCheckFalse(s2a != s2,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2);
805     CHKERRQ(VecGetArrayRead(ll,&l));
806     for (i=0; i<m; i++) {
807       x = l[i];
808       v = vv + i;
809       for (j=0; j<n; j++) { (*v) *= x; v+= lda;}
810     }
811     CHKERRQ(VecRestoreArrayRead(ll,&l));
812     CHKERRQ(PetscLogFlops(1.0*n*m));
813   }
814   if (rr) {
815     const PetscScalar *ar;
816 
817     CHKERRQ(VecGetLocalSize(rr,&s3a));
818     PetscCheckFalse(s3a != s3,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3);
819     CHKERRQ(VecGetArrayRead(rr,&ar));
820     CHKERRQ(VecGetArray(mdn->lvec,&r));
821     CHKERRQ(PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ar,r,MPI_REPLACE));
822     CHKERRQ(PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ar,r,MPI_REPLACE));
823     CHKERRQ(VecRestoreArrayRead(rr,&ar));
824     for (i=0; i<n; i++) {
825       x = r[i];
826       v = vv + i*lda;
827       for (j=0; j<m; j++) (*v++) *= x;
828     }
829     CHKERRQ(VecRestoreArray(mdn->lvec,&r));
830     CHKERRQ(PetscLogFlops(1.0*n*m));
831   }
832   CHKERRQ(MatDenseRestoreArray(mdn->A,&vv));
833   PetscFunctionReturn(0);
834 }
835 
836 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
837 {
838   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
839   PetscInt          i,j;
840   PetscMPIInt       size;
841   PetscReal         sum = 0.0;
842   const PetscScalar *av,*v;
843 
844   PetscFunctionBegin;
845   CHKERRQ(MatDenseGetArrayRead(mdn->A,&av));
846   v    = av;
847   CHKERRMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
848   if (size == 1) {
849     CHKERRQ(MatNorm(mdn->A,type,nrm));
850   } else {
851     if (type == NORM_FROBENIUS) {
852       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
853         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
854       }
855       CHKERRMPI(MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A)));
856       *nrm = PetscSqrtReal(*nrm);
857       CHKERRQ(PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n));
858     } else if (type == NORM_1) {
859       PetscReal *tmp,*tmp2;
860       CHKERRQ(PetscCalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2));
861       *nrm = 0.0;
862       v    = av;
863       for (j=0; j<mdn->A->cmap->n; j++) {
864         for (i=0; i<mdn->A->rmap->n; i++) {
865           tmp[j] += PetscAbsScalar(*v);  v++;
866         }
867       }
868       CHKERRMPI(MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A)));
869       for (j=0; j<A->cmap->N; j++) {
870         if (tmp2[j] > *nrm) *nrm = tmp2[j];
871       }
872       CHKERRQ(PetscFree2(tmp,tmp2));
873       CHKERRQ(PetscLogFlops(A->cmap->n*A->rmap->n));
874     } else if (type == NORM_INFINITY) { /* max row norm */
875       PetscReal ntemp;
876       CHKERRQ(MatNorm(mdn->A,type,&ntemp));
877       CHKERRMPI(MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A)));
878     } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
879   }
880   CHKERRQ(MatDenseRestoreArrayRead(mdn->A,&av));
881   PetscFunctionReturn(0);
882 }
883 
884 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
885 {
886   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
887   Mat            B;
888   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
889   PetscInt       j,i,lda;
890   PetscScalar    *v;
891 
892   PetscFunctionBegin;
893   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
894     CHKERRQ(MatCreate(PetscObjectComm((PetscObject)A),&B));
895     CHKERRQ(MatSetSizes(B,A->cmap->n,A->rmap->n,N,M));
896     CHKERRQ(MatSetType(B,((PetscObject)A)->type_name));
897     CHKERRQ(MatMPIDenseSetPreallocation(B,NULL));
898   } else B = *matout;
899 
900   m    = a->A->rmap->n; n = a->A->cmap->n;
901   CHKERRQ(MatDenseGetArrayRead(a->A,(const PetscScalar**)&v));
902   CHKERRQ(MatDenseGetLDA(a->A,&lda));
903   CHKERRQ(PetscMalloc1(m,&rwork));
904   for (i=0; i<m; i++) rwork[i] = rstart + i;
905   for (j=0; j<n; j++) {
906     CHKERRQ(MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES));
907     v   += lda;
908   }
909   CHKERRQ(MatDenseRestoreArrayRead(a->A,(const PetscScalar**)&v));
910   CHKERRQ(PetscFree(rwork));
911   CHKERRQ(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
912   CHKERRQ(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
913   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
914     *matout = B;
915   } else {
916     CHKERRQ(MatHeaderMerge(A,&B));
917   }
918   PetscFunctionReturn(0);
919 }
920 
921 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
922 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
923 
924 PetscErrorCode MatSetUp_MPIDense(Mat A)
925 {
926   PetscFunctionBegin;
927   CHKERRQ(PetscLayoutSetUp(A->rmap));
928   CHKERRQ(PetscLayoutSetUp(A->cmap));
929   if (!A->preallocated) {
930     CHKERRQ(MatMPIDenseSetPreallocation(A,NULL));
931   }
932   PetscFunctionReturn(0);
933 }
934 
935 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
936 {
937   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
938 
939   PetscFunctionBegin;
940   CHKERRQ(MatAXPY(A->A,alpha,B->A,str));
941   PetscFunctionReturn(0);
942 }
943 
944 PetscErrorCode MatConjugate_MPIDense(Mat mat)
945 {
946   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
947 
948   PetscFunctionBegin;
949   CHKERRQ(MatConjugate(a->A));
950   PetscFunctionReturn(0);
951 }
952 
953 PetscErrorCode MatRealPart_MPIDense(Mat A)
954 {
955   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
956 
957   PetscFunctionBegin;
958   CHKERRQ(MatRealPart(a->A));
959   PetscFunctionReturn(0);
960 }
961 
962 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
963 {
964   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
965 
966   PetscFunctionBegin;
967   CHKERRQ(MatImaginaryPart(a->A));
968   PetscFunctionReturn(0);
969 }
970 
971 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col)
972 {
973   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
974 
975   PetscFunctionBegin;
976   PetscCheckFalse(!a->A,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing local matrix");
977   PetscCheckFalse(!a->A->ops->getcolumnvector,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing get column operation");
978   CHKERRQ((*a->A->ops->getcolumnvector)(a->A,v,col));
979   PetscFunctionReturn(0);
980 }
981 
982 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat,PetscInt,PetscReal*);
983 
984 PetscErrorCode MatGetColumnReductions_MPIDense(Mat A,PetscInt type,PetscReal *reductions)
985 {
986   PetscInt       i,m,n;
987   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
988   PetscReal      *work;
989 
990   PetscFunctionBegin;
991   CHKERRQ(MatGetSize(A,&m,&n));
992   CHKERRQ(PetscMalloc1(n,&work));
993   if (type == REDUCTION_MEAN_REALPART) {
994     CHKERRQ(MatGetColumnReductions_SeqDense(a->A,(PetscInt)REDUCTION_SUM_REALPART,work));
995   } else if (type == REDUCTION_MEAN_IMAGINARYPART) {
996     CHKERRQ(MatGetColumnReductions_SeqDense(a->A,(PetscInt)REDUCTION_SUM_IMAGINARYPART,work));
997   } else {
998     CHKERRQ(MatGetColumnReductions_SeqDense(a->A,type,work));
999   }
1000   if (type == NORM_2) {
1001     for (i=0; i<n; i++) work[i] *= work[i];
1002   }
1003   if (type == NORM_INFINITY) {
1004     CHKERRMPI(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_MAX,A->hdr.comm));
1005   } else {
1006     CHKERRMPI(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_SUM,A->hdr.comm));
1007   }
1008   CHKERRQ(PetscFree(work));
1009   if (type == NORM_2) {
1010     for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
1011   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
1012     for (i=0; i<n; i++) reductions[i] /= m;
1013   }
1014   PetscFunctionReturn(0);
1015 }
1016 
1017 #if defined(PETSC_HAVE_CUDA)
1018 static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1019 {
1020   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1021   PetscInt       lda;
1022 
1023   PetscFunctionBegin;
1024   PetscCheckFalse(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1025   PetscCheckFalse(a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1026   if (!a->cvec) {
1027     CHKERRQ(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec));
1028     CHKERRQ(PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec));
1029   }
1030   a->vecinuse = col + 1;
1031   CHKERRQ(MatDenseGetLDA(a->A,&lda));
1032   CHKERRQ(MatDenseCUDAGetArray(a->A,(PetscScalar**)&a->ptrinuse));
1033   CHKERRQ(VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda));
1034   *v   = a->cvec;
1035   PetscFunctionReturn(0);
1036 }
1037 
1038 static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1039 {
1040   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1041 
1042   PetscFunctionBegin;
1043   PetscCheckFalse(!a->vecinuse,PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1044   PetscCheckFalse(!a->cvec,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1045   a->vecinuse = 0;
1046   CHKERRQ(MatDenseCUDARestoreArray(a->A,(PetscScalar**)&a->ptrinuse));
1047   CHKERRQ(VecCUDAResetArray(a->cvec));
1048   *v   = NULL;
1049   PetscFunctionReturn(0);
1050 }
1051 
1052 static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1053 {
1054   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1055   PetscInt       lda;
1056 
1057   PetscFunctionBegin;
1058   PetscCheckFalse(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1059   PetscCheckFalse(a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1060   if (!a->cvec) {
1061     CHKERRQ(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec));
1062     CHKERRQ(PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec));
1063   }
1064   a->vecinuse = col + 1;
1065   CHKERRQ(MatDenseGetLDA(a->A,&lda));
1066   CHKERRQ(MatDenseCUDAGetArrayRead(a->A,&a->ptrinuse));
1067   CHKERRQ(VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda));
1068   CHKERRQ(VecLockReadPush(a->cvec));
1069   *v   = a->cvec;
1070   PetscFunctionReturn(0);
1071 }
1072 
1073 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1074 {
1075   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1076 
1077   PetscFunctionBegin;
1078   PetscCheckFalse(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1079   PetscCheckFalse(!a->cvec,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1080   a->vecinuse = 0;
1081   CHKERRQ(MatDenseCUDARestoreArrayRead(a->A,&a->ptrinuse));
1082   CHKERRQ(VecLockReadPop(a->cvec));
1083   CHKERRQ(VecCUDAResetArray(a->cvec));
1084   *v   = NULL;
1085   PetscFunctionReturn(0);
1086 }
1087 
1088 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1089 {
1090   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1091   PetscInt       lda;
1092 
1093   PetscFunctionBegin;
1094   PetscCheckFalse(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1095   PetscCheckFalse(a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1096   if (!a->cvec) {
1097     CHKERRQ(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec));
1098     CHKERRQ(PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec));
1099   }
1100   a->vecinuse = col + 1;
1101   CHKERRQ(MatDenseGetLDA(a->A,&lda));
1102   CHKERRQ(MatDenseCUDAGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse));
1103   CHKERRQ(VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda));
1104   *v   = a->cvec;
1105   PetscFunctionReturn(0);
1106 }
1107 
1108 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1109 {
1110   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1111 
1112   PetscFunctionBegin;
1113   PetscCheckFalse(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1114   PetscCheckFalse(!a->cvec,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1115   a->vecinuse = 0;
1116   CHKERRQ(MatDenseCUDARestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse));
1117   CHKERRQ(VecCUDAResetArray(a->cvec));
1118   *v   = NULL;
1119   PetscFunctionReturn(0);
1120 }
1121 
1122 static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1123 {
1124   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1125 
1126   PetscFunctionBegin;
1127   PetscCheckFalse(l->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1128   PetscCheckFalse(l->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1129   CHKERRQ(MatDenseCUDAPlaceArray(l->A,a));
1130   PetscFunctionReturn(0);
1131 }
1132 
1133 static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A)
1134 {
1135   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1136 
1137   PetscFunctionBegin;
1138   PetscCheckFalse(l->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1139   PetscCheckFalse(l->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1140   CHKERRQ(MatDenseCUDAResetArray(l->A));
1141   PetscFunctionReturn(0);
1142 }
1143 
1144 static PetscErrorCode MatDenseCUDAReplaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1145 {
1146   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1147 
1148   PetscFunctionBegin;
1149   PetscCheckFalse(l->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1150   PetscCheckFalse(l->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1151   CHKERRQ(MatDenseCUDAReplaceArray(l->A,a));
1152   PetscFunctionReturn(0);
1153 }
1154 
1155 static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1156 {
1157   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1158 
1159   PetscFunctionBegin;
1160   CHKERRQ(MatDenseCUDAGetArrayWrite(l->A,a));
1161   PetscFunctionReturn(0);
1162 }
1163 
1164 static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1165 {
1166   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1167 
1168   PetscFunctionBegin;
1169   CHKERRQ(MatDenseCUDARestoreArrayWrite(l->A,a));
1170   PetscFunctionReturn(0);
1171 }
1172 
1173 static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1174 {
1175   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1176 
1177   PetscFunctionBegin;
1178   CHKERRQ(MatDenseCUDAGetArrayRead(l->A,a));
1179   PetscFunctionReturn(0);
1180 }
1181 
1182 static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1183 {
1184   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1185 
1186   PetscFunctionBegin;
1187   CHKERRQ(MatDenseCUDARestoreArrayRead(l->A,a));
1188   PetscFunctionReturn(0);
1189 }
1190 
1191 static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1192 {
1193   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1194 
1195   PetscFunctionBegin;
1196   CHKERRQ(MatDenseCUDAGetArray(l->A,a));
1197   PetscFunctionReturn(0);
1198 }
1199 
1200 static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1201 {
1202   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1203 
1204   PetscFunctionBegin;
1205   CHKERRQ(MatDenseCUDARestoreArray(l->A,a));
1206   PetscFunctionReturn(0);
1207 }
1208 
1209 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat,PetscInt,Vec*);
1210 static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat,PetscInt,Vec*);
1211 static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat,PetscInt,Vec*);
1212 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat,PetscInt,Vec*);
1213 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat,PetscInt,Vec*);
1214 static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat,PetscInt,Vec*);
1215 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat,Mat*);
1216 
1217 static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat,PetscBool bind)
1218 {
1219   Mat_MPIDense   *d = (Mat_MPIDense*)mat->data;
1220 
1221   PetscFunctionBegin;
1222   PetscCheckFalse(d->vecinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1223   PetscCheckFalse(d->matinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1224   if (d->A) {
1225     CHKERRQ(MatBindToCPU(d->A,bind));
1226   }
1227   mat->boundtocpu = bind;
1228   if (!bind) {
1229     PetscBool iscuda;
1230 
1231     CHKERRQ(PetscObjectTypeCompare((PetscObject)d->cvec,VECMPICUDA,&iscuda));
1232     if (!iscuda) {
1233       CHKERRQ(VecDestroy(&d->cvec));
1234     }
1235     CHKERRQ(PetscObjectTypeCompare((PetscObject)d->cmat,MATMPIDENSECUDA,&iscuda));
1236     if (!iscuda) {
1237       CHKERRQ(MatDestroy(&d->cmat));
1238     }
1239     CHKERRQ(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDenseCUDA));
1240     CHKERRQ(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDenseCUDA));
1241     CHKERRQ(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDenseCUDA));
1242     CHKERRQ(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDenseCUDA));
1243     CHKERRQ(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDenseCUDA));
1244     CHKERRQ(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDenseCUDA));
1245   } else {
1246     CHKERRQ(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense));
1247     CHKERRQ(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense));
1248     CHKERRQ(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense));
1249     CHKERRQ(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense));
1250     CHKERRQ(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense));
1251     CHKERRQ(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense));
1252   }
1253   if (d->cmat) {
1254     CHKERRQ(MatBindToCPU(d->cmat,bind));
1255   }
1256   PetscFunctionReturn(0);
1257 }
1258 
1259 PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data)
1260 {
1261   Mat_MPIDense   *d = (Mat_MPIDense*)A->data;
1262   PetscBool      iscuda;
1263 
1264   PetscFunctionBegin;
1265   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1266   CHKERRQ(PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda));
1267   if (!iscuda) PetscFunctionReturn(0);
1268   CHKERRQ(PetscLayoutSetUp(A->rmap));
1269   CHKERRQ(PetscLayoutSetUp(A->cmap));
1270   if (!d->A) {
1271     CHKERRQ(MatCreate(PETSC_COMM_SELF,&d->A));
1272     CHKERRQ(PetscLogObjectParent((PetscObject)A,(PetscObject)d->A));
1273     CHKERRQ(MatSetSizes(d->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N));
1274   }
1275   CHKERRQ(MatSetType(d->A,MATSEQDENSECUDA));
1276   CHKERRQ(MatSeqDenseCUDASetPreallocation(d->A,d_data));
1277   A->preallocated = PETSC_TRUE;
1278   PetscFunctionReturn(0);
1279 }
1280 #endif
1281 
1282 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1283 {
1284   Mat_MPIDense   *d = (Mat_MPIDense*)x->data;
1285 
1286   PetscFunctionBegin;
1287   CHKERRQ(MatSetRandom(d->A,rctx));
1288   PetscFunctionReturn(0);
1289 }
1290 
1291 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool  *missing,PetscInt *d)
1292 {
1293   PetscFunctionBegin;
1294   *missing = PETSC_FALSE;
1295   PetscFunctionReturn(0);
1296 }
1297 
1298 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat);
1299 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);
1300 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat);
1301 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);
1302 static PetscErrorCode MatEqual_MPIDense(Mat,Mat,PetscBool*);
1303 static PetscErrorCode MatLoad_MPIDense(Mat,PetscViewer);
1304 
1305 /* -------------------------------------------------------------------*/
1306 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1307                                         MatGetRow_MPIDense,
1308                                         MatRestoreRow_MPIDense,
1309                                         MatMult_MPIDense,
1310                                 /*  4*/ MatMultAdd_MPIDense,
1311                                         MatMultTranspose_MPIDense,
1312                                         MatMultTransposeAdd_MPIDense,
1313                                         NULL,
1314                                         NULL,
1315                                         NULL,
1316                                 /* 10*/ NULL,
1317                                         NULL,
1318                                         NULL,
1319                                         NULL,
1320                                         MatTranspose_MPIDense,
1321                                 /* 15*/ MatGetInfo_MPIDense,
1322                                         MatEqual_MPIDense,
1323                                         MatGetDiagonal_MPIDense,
1324                                         MatDiagonalScale_MPIDense,
1325                                         MatNorm_MPIDense,
1326                                 /* 20*/ MatAssemblyBegin_MPIDense,
1327                                         MatAssemblyEnd_MPIDense,
1328                                         MatSetOption_MPIDense,
1329                                         MatZeroEntries_MPIDense,
1330                                 /* 24*/ MatZeroRows_MPIDense,
1331                                         NULL,
1332                                         NULL,
1333                                         NULL,
1334                                         NULL,
1335                                 /* 29*/ MatSetUp_MPIDense,
1336                                         NULL,
1337                                         NULL,
1338                                         MatGetDiagonalBlock_MPIDense,
1339                                         NULL,
1340                                 /* 34*/ MatDuplicate_MPIDense,
1341                                         NULL,
1342                                         NULL,
1343                                         NULL,
1344                                         NULL,
1345                                 /* 39*/ MatAXPY_MPIDense,
1346                                         MatCreateSubMatrices_MPIDense,
1347                                         NULL,
1348                                         MatGetValues_MPIDense,
1349                                         MatCopy_MPIDense,
1350                                 /* 44*/ NULL,
1351                                         MatScale_MPIDense,
1352                                         MatShift_Basic,
1353                                         NULL,
1354                                         NULL,
1355                                 /* 49*/ MatSetRandom_MPIDense,
1356                                         NULL,
1357                                         NULL,
1358                                         NULL,
1359                                         NULL,
1360                                 /* 54*/ NULL,
1361                                         NULL,
1362                                         NULL,
1363                                         NULL,
1364                                         NULL,
1365                                 /* 59*/ MatCreateSubMatrix_MPIDense,
1366                                         MatDestroy_MPIDense,
1367                                         MatView_MPIDense,
1368                                         NULL,
1369                                         NULL,
1370                                 /* 64*/ NULL,
1371                                         NULL,
1372                                         NULL,
1373                                         NULL,
1374                                         NULL,
1375                                 /* 69*/ NULL,
1376                                         NULL,
1377                                         NULL,
1378                                         NULL,
1379                                         NULL,
1380                                 /* 74*/ NULL,
1381                                         NULL,
1382                                         NULL,
1383                                         NULL,
1384                                         NULL,
1385                                 /* 79*/ NULL,
1386                                         NULL,
1387                                         NULL,
1388                                         NULL,
1389                                 /* 83*/ MatLoad_MPIDense,
1390                                         NULL,
1391                                         NULL,
1392                                         NULL,
1393                                         NULL,
1394                                         NULL,
1395                                 /* 89*/ NULL,
1396                                         NULL,
1397                                         NULL,
1398                                         NULL,
1399                                         NULL,
1400                                 /* 94*/ NULL,
1401                                         NULL,
1402                                         MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1403                                         MatMatTransposeMultNumeric_MPIDense_MPIDense,
1404                                         NULL,
1405                                 /* 99*/ MatProductSetFromOptions_MPIDense,
1406                                         NULL,
1407                                         NULL,
1408                                         MatConjugate_MPIDense,
1409                                         NULL,
1410                                 /*104*/ NULL,
1411                                         MatRealPart_MPIDense,
1412                                         MatImaginaryPart_MPIDense,
1413                                         NULL,
1414                                         NULL,
1415                                 /*109*/ NULL,
1416                                         NULL,
1417                                         NULL,
1418                                         MatGetColumnVector_MPIDense,
1419                                         MatMissingDiagonal_MPIDense,
1420                                 /*114*/ NULL,
1421                                         NULL,
1422                                         NULL,
1423                                         NULL,
1424                                         NULL,
1425                                 /*119*/ NULL,
1426                                         NULL,
1427                                         NULL,
1428                                         NULL,
1429                                         NULL,
1430                                 /*124*/ NULL,
1431                                         MatGetColumnReductions_MPIDense,
1432                                         NULL,
1433                                         NULL,
1434                                         NULL,
1435                                 /*129*/ NULL,
1436                                         NULL,
1437                                         MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1438                                         MatTransposeMatMultNumeric_MPIDense_MPIDense,
1439                                         NULL,
1440                                 /*134*/ NULL,
1441                                         NULL,
1442                                         NULL,
1443                                         NULL,
1444                                         NULL,
1445                                 /*139*/ NULL,
1446                                         NULL,
1447                                         NULL,
1448                                         NULL,
1449                                         NULL,
1450                                         MatCreateMPIMatConcatenateSeqMat_MPIDense,
1451                                 /*145*/ NULL,
1452                                         NULL,
1453                                         NULL
1454 };
1455 
1456 PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1457 {
1458   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
1459   PetscBool      iscuda;
1460 
1461   PetscFunctionBegin;
1462   PetscCheckFalse(a->matinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1463   CHKERRQ(PetscLayoutSetUp(mat->rmap));
1464   CHKERRQ(PetscLayoutSetUp(mat->cmap));
1465   if (!a->A) {
1466     CHKERRQ(MatCreate(PETSC_COMM_SELF,&a->A));
1467     CHKERRQ(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A));
1468     CHKERRQ(MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N));
1469   }
1470   CHKERRQ(PetscObjectTypeCompare((PetscObject)mat,MATMPIDENSECUDA,&iscuda));
1471   CHKERRQ(MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE));
1472   CHKERRQ(MatSeqDenseSetPreallocation(a->A,data));
1473   mat->preallocated = PETSC_TRUE;
1474   PetscFunctionReturn(0);
1475 }
1476 
1477 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1478 {
1479   Mat            B,C;
1480 
1481   PetscFunctionBegin;
1482   CHKERRQ(MatMPIAIJGetLocalMat(A,MAT_INITIAL_MATRIX,&C));
1483   CHKERRQ(MatConvert_SeqAIJ_SeqDense(C,MATSEQDENSE,MAT_INITIAL_MATRIX,&B));
1484   CHKERRQ(MatDestroy(&C));
1485   if (reuse == MAT_REUSE_MATRIX) {
1486     C = *newmat;
1487   } else C = NULL;
1488   CHKERRQ(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A),B,A->cmap->n,!C?MAT_INITIAL_MATRIX:MAT_REUSE_MATRIX,&C));
1489   CHKERRQ(MatDestroy(&B));
1490   if (reuse == MAT_INPLACE_MATRIX) {
1491     CHKERRQ(MatHeaderReplace(A,&C));
1492   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1493   PetscFunctionReturn(0);
1494 }
1495 
1496 PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1497 {
1498   Mat            B,C;
1499 
1500   PetscFunctionBegin;
1501   CHKERRQ(MatDenseGetLocalMatrix(A,&C));
1502   CHKERRQ(MatConvert_SeqDense_SeqAIJ(C,MATSEQAIJ,MAT_INITIAL_MATRIX,&B));
1503   if (reuse == MAT_REUSE_MATRIX) {
1504     C = *newmat;
1505   } else C = NULL;
1506   CHKERRQ(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A),B,A->cmap->n,!C?MAT_INITIAL_MATRIX:MAT_REUSE_MATRIX,&C));
1507   CHKERRQ(MatDestroy(&B));
1508   if (reuse == MAT_INPLACE_MATRIX) {
1509     CHKERRQ(MatHeaderReplace(A,&C));
1510   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1511   PetscFunctionReturn(0);
1512 }
1513 
1514 #if defined(PETSC_HAVE_ELEMENTAL)
1515 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1516 {
1517   Mat            mat_elemental;
1518   PetscScalar    *v;
1519   PetscInt       m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols;
1520 
1521   PetscFunctionBegin;
1522   if (reuse == MAT_REUSE_MATRIX) {
1523     mat_elemental = *newmat;
1524     CHKERRQ(MatZeroEntries(*newmat));
1525   } else {
1526     CHKERRQ(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental));
1527     CHKERRQ(MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N));
1528     CHKERRQ(MatSetType(mat_elemental,MATELEMENTAL));
1529     CHKERRQ(MatSetUp(mat_elemental));
1530     CHKERRQ(MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE));
1531   }
1532 
1533   CHKERRQ(PetscMalloc2(m,&rows,N,&cols));
1534   for (i=0; i<N; i++) cols[i] = i;
1535   for (i=0; i<m; i++) rows[i] = rstart + i;
1536 
1537   /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1538   CHKERRQ(MatDenseGetArray(A,&v));
1539   CHKERRQ(MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES));
1540   CHKERRQ(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY));
1541   CHKERRQ(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY));
1542   CHKERRQ(MatDenseRestoreArray(A,&v));
1543   CHKERRQ(PetscFree2(rows,cols));
1544 
1545   if (reuse == MAT_INPLACE_MATRIX) {
1546     CHKERRQ(MatHeaderReplace(A,&mat_elemental));
1547   } else {
1548     *newmat = mat_elemental;
1549   }
1550   PetscFunctionReturn(0);
1551 }
1552 #endif
1553 
1554 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals)
1555 {
1556   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1557 
1558   PetscFunctionBegin;
1559   CHKERRQ(MatDenseGetColumn(mat->A,col,vals));
1560   PetscFunctionReturn(0);
1561 }
1562 
1563 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals)
1564 {
1565   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1566 
1567   PetscFunctionBegin;
1568   CHKERRQ(MatDenseRestoreColumn(mat->A,vals));
1569   PetscFunctionReturn(0);
1570 }
1571 
1572 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
1573 {
1574   Mat_MPIDense   *mat;
1575   PetscInt       m,nloc,N;
1576 
1577   PetscFunctionBegin;
1578   CHKERRQ(MatGetSize(inmat,&m,&N));
1579   CHKERRQ(MatGetLocalSize(inmat,NULL,&nloc));
1580   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1581     PetscInt sum;
1582 
1583     if (n == PETSC_DECIDE) {
1584       CHKERRQ(PetscSplitOwnership(comm,&n,&N));
1585     }
1586     /* Check sum(n) = N */
1587     CHKERRMPI(MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm));
1588     PetscCheckFalse(sum != N,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT,sum,N);
1589 
1590     CHKERRQ(MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat));
1591     CHKERRQ(MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE));
1592   }
1593 
1594   /* numeric phase */
1595   mat = (Mat_MPIDense*)(*outmat)->data;
1596   CHKERRQ(MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN));
1597   CHKERRQ(MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY));
1598   CHKERRQ(MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY));
1599   PetscFunctionReturn(0);
1600 }
1601 
1602 #if defined(PETSC_HAVE_CUDA)
1603 PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M,MatType type,MatReuse reuse,Mat *newmat)
1604 {
1605   Mat            B;
1606   Mat_MPIDense   *m;
1607 
1608   PetscFunctionBegin;
1609   if (reuse == MAT_INITIAL_MATRIX) {
1610     CHKERRQ(MatDuplicate(M,MAT_COPY_VALUES,newmat));
1611   } else if (reuse == MAT_REUSE_MATRIX) {
1612     CHKERRQ(MatCopy(M,*newmat,SAME_NONZERO_PATTERN));
1613   }
1614 
1615   B    = *newmat;
1616   CHKERRQ(MatBindToCPU_MPIDenseCUDA(B,PETSC_TRUE));
1617   CHKERRQ(PetscFree(B->defaultvectype));
1618   CHKERRQ(PetscStrallocpy(VECSTANDARD,&B->defaultvectype));
1619   CHKERRQ(PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSE));
1620   CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",NULL));
1621   CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",NULL));
1622   CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",NULL));
1623   CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",NULL));
1624   CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",NULL));
1625   CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",NULL));
1626   CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",NULL));
1627   CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",NULL));
1628   CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",NULL));
1629   CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",NULL));
1630   CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",NULL));
1631   CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",NULL));
1632   CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",NULL));
1633   CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",NULL));
1634   m    = (Mat_MPIDense*)(B)->data;
1635   if (m->A) {
1636     CHKERRQ(MatConvert(m->A,MATSEQDENSE,MAT_INPLACE_MATRIX,&m->A));
1637     CHKERRQ(MatSetUpMultiply_MPIDense(B));
1638   }
1639   B->ops->bindtocpu = NULL;
1640   B->offloadmask    = PETSC_OFFLOAD_CPU;
1641   PetscFunctionReturn(0);
1642 }
1643 
1644 PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M,MatType type,MatReuse reuse,Mat *newmat)
1645 {
1646   Mat            B;
1647   Mat_MPIDense   *m;
1648 
1649   PetscFunctionBegin;
1650   if (reuse == MAT_INITIAL_MATRIX) {
1651     CHKERRQ(MatDuplicate(M,MAT_COPY_VALUES,newmat));
1652   } else if (reuse == MAT_REUSE_MATRIX) {
1653     CHKERRQ(MatCopy(M,*newmat,SAME_NONZERO_PATTERN));
1654   }
1655 
1656   B    = *newmat;
1657   CHKERRQ(PetscFree(B->defaultvectype));
1658   CHKERRQ(PetscStrallocpy(VECCUDA,&B->defaultvectype));
1659   CHKERRQ(PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSECUDA));
1660   CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",                    MatConvert_MPIDenseCUDA_MPIDense));
1661   CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",        MatProductSetFromOptions_MPIAIJ_MPIDense));
1662   CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",MatProductSetFromOptions_MPIAIJ_MPIDense));
1663   CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",        MatProductSetFromOptions_MPIDense_MPIAIJ));
1664   CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ));
1665   CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",                                MatDenseCUDAGetArray_MPIDenseCUDA));
1666   CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",                            MatDenseCUDAGetArrayRead_MPIDenseCUDA));
1667   CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",                           MatDenseCUDAGetArrayWrite_MPIDenseCUDA));
1668   CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",                            MatDenseCUDARestoreArray_MPIDenseCUDA));
1669   CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",                        MatDenseCUDARestoreArrayRead_MPIDenseCUDA));
1670   CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",                       MatDenseCUDARestoreArrayWrite_MPIDenseCUDA));
1671   CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",                              MatDenseCUDAPlaceArray_MPIDenseCUDA));
1672   CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",                              MatDenseCUDAResetArray_MPIDenseCUDA));
1673   CHKERRQ(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",                            MatDenseCUDAReplaceArray_MPIDenseCUDA));
1674   m    = (Mat_MPIDense*)(B->data);
1675   if (m->A) {
1676     CHKERRQ(MatConvert(m->A,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&m->A));
1677     CHKERRQ(MatSetUpMultiply_MPIDense(B));
1678     B->offloadmask = PETSC_OFFLOAD_BOTH;
1679   } else {
1680     B->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
1681   }
1682   CHKERRQ(MatBindToCPU_MPIDenseCUDA(B,PETSC_FALSE));
1683 
1684   B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA;
1685   PetscFunctionReturn(0);
1686 }
1687 #endif
1688 
1689 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A,PetscInt col,Vec *v)
1690 {
1691   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1692   PetscInt       lda;
1693 
1694   PetscFunctionBegin;
1695   PetscCheckFalse(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1696   PetscCheckFalse(a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1697   if (!a->cvec) {
1698     CHKERRQ(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec));
1699   }
1700   a->vecinuse = col + 1;
1701   CHKERRQ(MatDenseGetLDA(a->A,&lda));
1702   CHKERRQ(MatDenseGetArray(a->A,(PetscScalar**)&a->ptrinuse));
1703   CHKERRQ(VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda));
1704   *v   = a->cvec;
1705   PetscFunctionReturn(0);
1706 }
1707 
1708 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A,PetscInt col,Vec *v)
1709 {
1710   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1711 
1712   PetscFunctionBegin;
1713   PetscCheckFalse(!a->vecinuse,PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1714   PetscCheckFalse(!a->cvec,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1715   a->vecinuse = 0;
1716   CHKERRQ(MatDenseRestoreArray(a->A,(PetscScalar**)&a->ptrinuse));
1717   CHKERRQ(VecResetArray(a->cvec));
1718   *v   = NULL;
1719   PetscFunctionReturn(0);
1720 }
1721 
1722 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v)
1723 {
1724   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1725   PetscInt       lda;
1726 
1727   PetscFunctionBegin;
1728   PetscCheckFalse(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1729   PetscCheckFalse(a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1730   if (!a->cvec) {
1731     CHKERRQ(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec));
1732   }
1733   a->vecinuse = col + 1;
1734   CHKERRQ(MatDenseGetLDA(a->A,&lda));
1735   CHKERRQ(MatDenseGetArrayRead(a->A,&a->ptrinuse));
1736   CHKERRQ(VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda));
1737   CHKERRQ(VecLockReadPush(a->cvec));
1738   *v   = a->cvec;
1739   PetscFunctionReturn(0);
1740 }
1741 
1742 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v)
1743 {
1744   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1745 
1746   PetscFunctionBegin;
1747   PetscCheck(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1748   PetscCheck(a->cvec,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1749   a->vecinuse = 0;
1750   CHKERRQ(MatDenseRestoreArrayRead(a->A,&a->ptrinuse));
1751   CHKERRQ(VecLockReadPop(a->cvec));
1752   CHKERRQ(VecResetArray(a->cvec));
1753   *v   = NULL;
1754   PetscFunctionReturn(0);
1755 }
1756 
1757 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v)
1758 {
1759   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1760   PetscInt       lda;
1761 
1762   PetscFunctionBegin;
1763   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1764   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1765   if (!a->cvec) {
1766     CHKERRQ(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec));
1767   }
1768   a->vecinuse = col + 1;
1769   CHKERRQ(MatDenseGetLDA(a->A,&lda));
1770   CHKERRQ(MatDenseGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse));
1771   CHKERRQ(VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda));
1772   *v   = a->cvec;
1773   PetscFunctionReturn(0);
1774 }
1775 
1776 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v)
1777 {
1778   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
1779 
1780   PetscFunctionBegin;
1781   PetscCheck(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1782   PetscCheck(a->cvec,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1783   a->vecinuse = 0;
1784   CHKERRQ(MatDenseRestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse));
1785   CHKERRQ(VecResetArray(a->cvec));
1786   *v   = NULL;
1787   PetscFunctionReturn(0);
1788 }
1789 
1790 PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A,PetscInt cbegin,PetscInt cend,Mat *v)
1791 {
1792   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1793   Mat_MPIDense   *c;
1794   MPI_Comm       comm;
1795   PetscBool      setup = PETSC_FALSE;
1796 
1797   PetscFunctionBegin;
1798   CHKERRQ(PetscObjectGetComm((PetscObject)A,&comm));
1799   PetscCheck(!a->vecinuse,comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1800   PetscCheck(!a->matinuse,comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1801   if (!a->cmat) {
1802     setup = PETSC_TRUE;
1803     CHKERRQ(MatCreate(comm,&a->cmat));
1804     CHKERRQ(PetscLogObjectParent((PetscObject)A,(PetscObject)a->cmat));
1805     CHKERRQ(MatSetType(a->cmat,((PetscObject)A)->type_name));
1806     CHKERRQ(PetscLayoutReference(A->rmap,&a->cmat->rmap));
1807     CHKERRQ(PetscLayoutSetSize(a->cmat->cmap,cend-cbegin));
1808     CHKERRQ(PetscLayoutSetUp(a->cmat->cmap));
1809   } else if (cend-cbegin != a->cmat->cmap->N) {
1810     setup = PETSC_TRUE;
1811     CHKERRQ(PetscLayoutDestroy(&a->cmat->cmap));
1812     CHKERRQ(PetscLayoutCreate(comm,&a->cmat->cmap));
1813     CHKERRQ(PetscLayoutSetSize(a->cmat->cmap,cend-cbegin));
1814     CHKERRQ(PetscLayoutSetUp(a->cmat->cmap));
1815   }
1816   c = (Mat_MPIDense*)a->cmat->data;
1817   PetscCheck(!c->A,comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1818   CHKERRQ(MatDenseGetSubMatrix(a->A,cbegin,cend,&c->A));
1819   if (setup) { /* do we really need this? */
1820     CHKERRQ(MatSetUpMultiply_MPIDense(a->cmat));
1821   }
1822   a->cmat->preallocated = PETSC_TRUE;
1823   a->cmat->assembled = PETSC_TRUE;
1824   a->matinuse = cbegin + 1;
1825   *v = a->cmat;
1826   PetscFunctionReturn(0);
1827 }
1828 
1829 PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A,Mat *v)
1830 {
1831   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
1832   Mat_MPIDense *c;
1833 
1834   PetscFunctionBegin;
1835   PetscCheck(a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetSubMatrix() first");
1836   PetscCheck(a->cmat,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal matrix");
1837   PetscCheck(*v == a->cmat,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Not the matrix obtained from MatDenseGetSubMatrix()");
1838   a->matinuse = 0;
1839   c    = (Mat_MPIDense*)a->cmat->data;
1840   CHKERRQ(MatDenseRestoreSubMatrix(a->A,&c->A));
1841   *v   = NULL;
1842   PetscFunctionReturn(0);
1843 }
1844 
1845 /*MC
1846    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1847 
1848    Options Database Keys:
1849 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions()
1850 
1851   Level: beginner
1852 
1853 .seealso: MatCreateDense()
1854 
1855 M*/
1856 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1857 {
1858   Mat_MPIDense   *a;
1859 
1860   PetscFunctionBegin;
1861   CHKERRQ(PetscNewLog(mat,&a));
1862   mat->data = (void*)a;
1863   CHKERRQ(PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps)));
1864 
1865   mat->insertmode = NOT_SET_VALUES;
1866 
1867   /* build cache for off array entries formed */
1868   a->donotstash = PETSC_FALSE;
1869 
1870   CHKERRQ(MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash));
1871 
1872   /* stuff used for matrix vector multiply */
1873   a->lvec        = NULL;
1874   a->Mvctx       = NULL;
1875   a->roworiented = PETSC_TRUE;
1876 
1877   CHKERRQ(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense));
1878   CHKERRQ(PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",MatDenseSetLDA_MPIDense));
1879   CHKERRQ(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense));
1880   CHKERRQ(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense));
1881   CHKERRQ(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense));
1882   CHKERRQ(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense));
1883   CHKERRQ(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",MatDenseGetArrayWrite_MPIDense));
1884   CHKERRQ(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",MatDenseRestoreArrayWrite_MPIDense));
1885   CHKERRQ(PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense));
1886   CHKERRQ(PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense));
1887   CHKERRQ(PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",MatDenseReplaceArray_MPIDense));
1888   CHKERRQ(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense));
1889   CHKERRQ(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense));
1890   CHKERRQ(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense));
1891   CHKERRQ(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense));
1892   CHKERRQ(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense));
1893   CHKERRQ(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense));
1894   CHKERRQ(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",MatDenseGetSubMatrix_MPIDense));
1895   CHKERRQ(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",MatDenseRestoreSubMatrix_MPIDense));
1896   CHKERRQ(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",MatConvert_MPIAIJ_MPIDense));
1897   CHKERRQ(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpiaij_C",MatConvert_MPIDense_MPIAIJ));
1898 #if defined(PETSC_HAVE_ELEMENTAL)
1899   CHKERRQ(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental));
1900 #endif
1901 #if defined(PETSC_HAVE_SCALAPACK)
1902   CHKERRQ(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",MatConvert_Dense_ScaLAPACK));
1903 #endif
1904 #if defined(PETSC_HAVE_CUDA)
1905   CHKERRQ(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpidensecuda_C",MatConvert_MPIDense_MPIDenseCUDA));
1906 #endif
1907   CHKERRQ(PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense));
1908   CHKERRQ(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense));
1909   CHKERRQ(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",MatProductSetFromOptions_MPIDense_MPIAIJ));
1910 #if defined(PETSC_HAVE_CUDA)
1911   CHKERRQ(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense));
1912   CHKERRQ(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ));
1913 #endif
1914 
1915   CHKERRQ(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense));
1916   CHKERRQ(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense));
1917   CHKERRQ(PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE));
1918   PetscFunctionReturn(0);
1919 }
1920 
1921 /*MC
1922    MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs.
1923 
1924    Options Database Keys:
1925 . -mat_type mpidensecuda - sets the matrix type to "mpidensecuda" during a call to MatSetFromOptions()
1926 
1927   Level: beginner
1928 
1929 .seealso:
1930 
1931 M*/
1932 #if defined(PETSC_HAVE_CUDA)
1933 #include <petsc/private/deviceimpl.h>
1934 PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B)
1935 {
1936   PetscFunctionBegin;
1937   CHKERRQ(PetscDeviceInitialize(PETSC_DEVICE_CUDA));
1938   CHKERRQ(MatCreate_MPIDense(B));
1939   CHKERRQ(MatConvert_MPIDense_MPIDenseCUDA(B,MATMPIDENSECUDA,MAT_INPLACE_MATRIX,&B));
1940   PetscFunctionReturn(0);
1941 }
1942 #endif
1943 
1944 /*MC
1945    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1946 
1947    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1948    and MATMPIDENSE otherwise.
1949 
1950    Options Database Keys:
1951 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1952 
1953   Level: beginner
1954 
1955 .seealso: MATSEQDENSE,MATMPIDENSE,MATDENSECUDA
1956 M*/
1957 
1958 /*MC
1959    MATDENSECUDA - MATDENSECUDA = "densecuda" - A matrix type to be used for dense matrices on GPUs.
1960 
1961    This matrix type is identical to MATSEQDENSECUDA when constructed with a single process communicator,
1962    and MATMPIDENSECUDA otherwise.
1963 
1964    Options Database Keys:
1965 . -mat_type densecuda - sets the matrix type to "densecuda" during a call to MatSetFromOptions()
1966 
1967   Level: beginner
1968 
1969 .seealso: MATSEQDENSECUDA,MATMPIDENSECUDA,MATDENSE
1970 M*/
1971 
1972 /*@C
1973    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1974 
1975    Collective
1976 
1977    Input Parameters:
1978 .  B - the matrix
1979 -  data - optional location of matrix data.  Set data=NULL for PETSc
1980    to control all matrix memory allocation.
1981 
1982    Notes:
1983    The dense format is fully compatible with standard Fortran 77
1984    storage by columns.
1985 
1986    The data input variable is intended primarily for Fortran programmers
1987    who wish to allocate their own matrix memory space.  Most users should
1988    set data=NULL.
1989 
1990    Level: intermediate
1991 
1992 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1993 @*/
1994 PetscErrorCode  MatMPIDenseSetPreallocation(Mat B,PetscScalar *data)
1995 {
1996   PetscFunctionBegin;
1997   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
1998   CHKERRQ(PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data)));
1999   PetscFunctionReturn(0);
2000 }
2001 
2002 /*@
2003    MatDensePlaceArray - Allows one to replace the array in a dense matrix with an
2004    array provided by the user. This is useful to avoid copying an array
2005    into a matrix
2006 
2007    Not Collective
2008 
2009    Input Parameters:
2010 +  mat - the matrix
2011 -  array - the array in column major order
2012 
2013    Notes:
2014    You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be
2015    freed when the matrix is destroyed.
2016 
2017    Level: developer
2018 
2019 .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
2020 
2021 @*/
2022 PetscErrorCode  MatDensePlaceArray(Mat mat,const PetscScalar *array)
2023 {
2024   PetscFunctionBegin;
2025   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2026   CHKERRQ(PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array)));
2027   CHKERRQ(PetscObjectStateIncrease((PetscObject)mat));
2028 #if defined(PETSC_HAVE_CUDA)
2029   mat->offloadmask = PETSC_OFFLOAD_CPU;
2030 #endif
2031   PetscFunctionReturn(0);
2032 }
2033 
2034 /*@
2035    MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray()
2036 
2037    Not Collective
2038 
2039    Input Parameters:
2040 .  mat - the matrix
2041 
2042    Notes:
2043    You can only call this after a call to MatDensePlaceArray()
2044 
2045    Level: developer
2046 
2047 .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
2048 
2049 @*/
2050 PetscErrorCode  MatDenseResetArray(Mat mat)
2051 {
2052   PetscFunctionBegin;
2053   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2054   CHKERRQ(PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat)));
2055   CHKERRQ(PetscObjectStateIncrease((PetscObject)mat));
2056   PetscFunctionReturn(0);
2057 }
2058 
2059 /*@
2060    MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an
2061    array provided by the user. This is useful to avoid copying an array
2062    into a matrix
2063 
2064    Not Collective
2065 
2066    Input Parameters:
2067 +  mat - the matrix
2068 -  array - the array in column major order
2069 
2070    Notes:
2071    The memory passed in MUST be obtained with PetscMalloc() and CANNOT be
2072    freed by the user. It will be freed when the matrix is destroyed.
2073 
2074    Level: developer
2075 
2076 .seealso: MatDenseGetArray(), VecReplaceArray()
2077 @*/
2078 PetscErrorCode  MatDenseReplaceArray(Mat mat,const PetscScalar *array)
2079 {
2080   PetscFunctionBegin;
2081   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2082   CHKERRQ(PetscUseMethod(mat,"MatDenseReplaceArray_C",(Mat,const PetscScalar*),(mat,array)));
2083   CHKERRQ(PetscObjectStateIncrease((PetscObject)mat));
2084 #if defined(PETSC_HAVE_CUDA)
2085   mat->offloadmask = PETSC_OFFLOAD_CPU;
2086 #endif
2087   PetscFunctionReturn(0);
2088 }
2089 
2090 #if defined(PETSC_HAVE_CUDA)
2091 /*@C
2092    MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a dense matrix with an
2093    array provided by the user. This is useful to avoid copying an array
2094    into a matrix
2095 
2096    Not Collective
2097 
2098    Input Parameters:
2099 +  mat - the matrix
2100 -  array - the array in column major order
2101 
2102    Notes:
2103    You can return to the original array with a call to MatDenseCUDAResetArray(). The user is responsible for freeing this array; it will not be
2104    freed when the matrix is destroyed. The array must have been allocated with cudaMalloc().
2105 
2106    Level: developer
2107 
2108 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAResetArray()
2109 @*/
2110 PetscErrorCode  MatDenseCUDAPlaceArray(Mat mat,const PetscScalar *array)
2111 {
2112   PetscFunctionBegin;
2113   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2114   CHKERRQ(PetscUseMethod(mat,"MatDenseCUDAPlaceArray_C",(Mat,const PetscScalar*),(mat,array)));
2115   CHKERRQ(PetscObjectStateIncrease((PetscObject)mat));
2116   mat->offloadmask = PETSC_OFFLOAD_GPU;
2117   PetscFunctionReturn(0);
2118 }
2119 
2120 /*@C
2121    MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to MatDenseCUDAPlaceArray()
2122 
2123    Not Collective
2124 
2125    Input Parameters:
2126 .  mat - the matrix
2127 
2128    Notes:
2129    You can only call this after a call to MatDenseCUDAPlaceArray()
2130 
2131    Level: developer
2132 
2133 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray()
2134 
2135 @*/
2136 PetscErrorCode  MatDenseCUDAResetArray(Mat mat)
2137 {
2138   PetscFunctionBegin;
2139   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2140   CHKERRQ(PetscUseMethod(mat,"MatDenseCUDAResetArray_C",(Mat),(mat)));
2141   CHKERRQ(PetscObjectStateIncrease((PetscObject)mat));
2142   PetscFunctionReturn(0);
2143 }
2144 
2145 /*@C
2146    MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a dense matrix with an
2147    array provided by the user. This is useful to avoid copying an array
2148    into a matrix
2149 
2150    Not Collective
2151 
2152    Input Parameters:
2153 +  mat - the matrix
2154 -  array - the array in column major order
2155 
2156    Notes:
2157    This permanently replaces the GPU array and frees the memory associated with the old GPU array.
2158    The memory passed in CANNOT be freed by the user. It will be freed
2159    when the matrix is destroyed. The array should respect the matrix leading dimension.
2160 
2161    Level: developer
2162 
2163 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray(), MatDenseCUDAResetArray()
2164 @*/
2165 PetscErrorCode  MatDenseCUDAReplaceArray(Mat mat,const PetscScalar *array)
2166 {
2167   PetscFunctionBegin;
2168   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2169   CHKERRQ(PetscUseMethod(mat,"MatDenseCUDAReplaceArray_C",(Mat,const PetscScalar*),(mat,array)));
2170   CHKERRQ(PetscObjectStateIncrease((PetscObject)mat));
2171   mat->offloadmask = PETSC_OFFLOAD_GPU;
2172   PetscFunctionReturn(0);
2173 }
2174 
2175 /*@C
2176    MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a dense matrix.
2177 
2178    Not Collective
2179 
2180    Input Parameters:
2181 .  A - the matrix
2182 
2183    Output Parameters
2184 .  array - the GPU array in column major order
2185 
2186    Notes:
2187    The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use MatDenseCUDAGetArray(). The array must be restored with MatDenseCUDARestoreArrayWrite() when no longer needed.
2188 
2189    Level: developer
2190 
2191 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArrayRead()
2192 @*/
2193 PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a)
2194 {
2195   PetscFunctionBegin;
2196   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2197   CHKERRQ(PetscUseMethod(A,"MatDenseCUDAGetArrayWrite_C",(Mat,PetscScalar**),(A,a)));
2198   CHKERRQ(PetscObjectStateIncrease((PetscObject)A));
2199   PetscFunctionReturn(0);
2200 }
2201 
2202 /*@C
2203    MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArrayWrite().
2204 
2205    Not Collective
2206 
2207    Input Parameters:
2208 +  A - the matrix
2209 -  array - the GPU array in column major order
2210 
2211    Notes:
2212 
2213    Level: developer
2214 
2215 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead()
2216 @*/
2217 PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a)
2218 {
2219   PetscFunctionBegin;
2220   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2221   CHKERRQ(PetscUseMethod(A,"MatDenseCUDARestoreArrayWrite_C",(Mat,PetscScalar**),(A,a)));
2222   CHKERRQ(PetscObjectStateIncrease((PetscObject)A));
2223   A->offloadmask = PETSC_OFFLOAD_GPU;
2224   PetscFunctionReturn(0);
2225 }
2226 
2227 /*@C
2228    MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArrayRead() when no longer needed.
2229 
2230    Not Collective
2231 
2232    Input Parameters:
2233 .  A - the matrix
2234 
2235    Output Parameters
2236 .  array - the GPU array in column major order
2237 
2238    Notes:
2239    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite().
2240 
2241    Level: developer
2242 
2243 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead()
2244 @*/
2245 PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a)
2246 {
2247   PetscFunctionBegin;
2248   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2249   CHKERRQ(PetscUseMethod(A,"MatDenseCUDAGetArrayRead_C",(Mat,const PetscScalar**),(A,a)));
2250   PetscFunctionReturn(0);
2251 }
2252 
2253 /*@C
2254    MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a dense matrix previously obtained with a call to MatDenseCUDAGetArrayRead().
2255 
2256    Not Collective
2257 
2258    Input Parameters:
2259 +  A - the matrix
2260 -  array - the GPU array in column major order
2261 
2262    Notes:
2263    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite().
2264 
2265    Level: developer
2266 
2267 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDAGetArrayRead()
2268 @*/
2269 PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a)
2270 {
2271   PetscFunctionBegin;
2272   CHKERRQ(PetscUseMethod(A,"MatDenseCUDARestoreArrayRead_C",(Mat,const PetscScalar**),(A,a)));
2273   PetscFunctionReturn(0);
2274 }
2275 
2276 /*@C
2277    MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArray() when no longer needed.
2278 
2279    Not Collective
2280 
2281    Input Parameters:
2282 .  A - the matrix
2283 
2284    Output Parameters
2285 .  array - the GPU array in column major order
2286 
2287    Notes:
2288    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). For read-only access, use MatDenseCUDAGetArrayRead().
2289 
2290    Level: developer
2291 
2292 .seealso: MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead()
2293 @*/
2294 PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a)
2295 {
2296   PetscFunctionBegin;
2297   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2298   CHKERRQ(PetscUseMethod(A,"MatDenseCUDAGetArray_C",(Mat,PetscScalar**),(A,a)));
2299   CHKERRQ(PetscObjectStateIncrease((PetscObject)A));
2300   PetscFunctionReturn(0);
2301 }
2302 
2303 /*@C
2304    MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArray().
2305 
2306    Not Collective
2307 
2308    Input Parameters:
2309 +  A - the matrix
2310 -  array - the GPU array in column major order
2311 
2312    Notes:
2313 
2314    Level: developer
2315 
2316 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead()
2317 @*/
2318 PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a)
2319 {
2320   PetscFunctionBegin;
2321   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2322   CHKERRQ(PetscUseMethod(A,"MatDenseCUDARestoreArray_C",(Mat,PetscScalar**),(A,a)));
2323   CHKERRQ(PetscObjectStateIncrease((PetscObject)A));
2324   A->offloadmask = PETSC_OFFLOAD_GPU;
2325   PetscFunctionReturn(0);
2326 }
2327 #endif
2328 
2329 /*@C
2330    MatCreateDense - Creates a matrix in dense format.
2331 
2332    Collective
2333 
2334    Input Parameters:
2335 +  comm - MPI communicator
2336 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2337 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2338 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
2339 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
2340 -  data - optional location of matrix data.  Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
2341    to control all matrix memory allocation.
2342 
2343    Output Parameter:
2344 .  A - the matrix
2345 
2346    Notes:
2347    The dense format is fully compatible with standard Fortran 77
2348    storage by columns.
2349    Note that, although local portions of the matrix are stored in column-major
2350    order, the matrix is partitioned across MPI ranks by row.
2351 
2352    The data input variable is intended primarily for Fortran programmers
2353    who wish to allocate their own matrix memory space.  Most users should
2354    set data=NULL (PETSC_NULL_SCALAR for Fortran users).
2355 
2356    The user MUST specify either the local or global matrix dimensions
2357    (possibly both).
2358 
2359    Level: intermediate
2360 
2361 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
2362 @*/
2363 PetscErrorCode  MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
2364 {
2365   PetscMPIInt    size;
2366 
2367   PetscFunctionBegin;
2368   CHKERRQ(MatCreate(comm,A));
2369   CHKERRQ(MatSetSizes(*A,m,n,M,N));
2370   CHKERRMPI(MPI_Comm_size(comm,&size));
2371   if (size > 1) {
2372     PetscBool havedata = (PetscBool)!!data;
2373 
2374     CHKERRQ(MatSetType(*A,MATMPIDENSE));
2375     CHKERRQ(MatMPIDenseSetPreallocation(*A,data));
2376     CHKERRMPI(MPIU_Allreduce(MPI_IN_PLACE,&havedata,1,MPIU_BOOL,MPI_LOR,comm));
2377     if (havedata) {  /* user provided data array, so no need to assemble */
2378       CHKERRQ(MatSetUpMultiply_MPIDense(*A));
2379       (*A)->assembled = PETSC_TRUE;
2380     }
2381   } else {
2382     CHKERRQ(MatSetType(*A,MATSEQDENSE));
2383     CHKERRQ(MatSeqDenseSetPreallocation(*A,data));
2384   }
2385   PetscFunctionReturn(0);
2386 }
2387 
2388 #if defined(PETSC_HAVE_CUDA)
2389 /*@C
2390    MatCreateDenseCUDA - Creates a matrix in dense format using CUDA.
2391 
2392    Collective
2393 
2394    Input Parameters:
2395 +  comm - MPI communicator
2396 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2397 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2398 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
2399 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
2400 -  data - optional location of GPU matrix data.  Set data=NULL for PETSc
2401    to control matrix memory allocation.
2402 
2403    Output Parameter:
2404 .  A - the matrix
2405 
2406    Notes:
2407 
2408    Level: intermediate
2409 
2410 .seealso: MatCreate(), MatCreateDense()
2411 @*/
2412 PetscErrorCode  MatCreateDenseCUDA(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
2413 {
2414   PetscMPIInt    size;
2415 
2416   PetscFunctionBegin;
2417   CHKERRQ(MatCreate(comm,A));
2418   PetscValidLogicalCollectiveBool(*A,!!data,6);
2419   CHKERRQ(MatSetSizes(*A,m,n,M,N));
2420   CHKERRMPI(MPI_Comm_size(comm,&size));
2421   if (size > 1) {
2422     CHKERRQ(MatSetType(*A,MATMPIDENSECUDA));
2423     CHKERRQ(MatMPIDenseCUDASetPreallocation(*A,data));
2424     if (data) {  /* user provided data array, so no need to assemble */
2425       CHKERRQ(MatSetUpMultiply_MPIDense(*A));
2426       (*A)->assembled = PETSC_TRUE;
2427     }
2428   } else {
2429     CHKERRQ(MatSetType(*A,MATSEQDENSECUDA));
2430     CHKERRQ(MatSeqDenseCUDASetPreallocation(*A,data));
2431   }
2432   PetscFunctionReturn(0);
2433 }
2434 #endif
2435 
2436 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
2437 {
2438   Mat            mat;
2439   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
2440 
2441   PetscFunctionBegin;
2442   *newmat = NULL;
2443   CHKERRQ(MatCreate(PetscObjectComm((PetscObject)A),&mat));
2444   CHKERRQ(MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N));
2445   CHKERRQ(MatSetType(mat,((PetscObject)A)->type_name));
2446   a       = (Mat_MPIDense*)mat->data;
2447 
2448   mat->factortype   = A->factortype;
2449   mat->assembled    = PETSC_TRUE;
2450   mat->preallocated = PETSC_TRUE;
2451 
2452   mat->insertmode = NOT_SET_VALUES;
2453   a->donotstash   = oldmat->donotstash;
2454 
2455   CHKERRQ(PetscLayoutReference(A->rmap,&mat->rmap));
2456   CHKERRQ(PetscLayoutReference(A->cmap,&mat->cmap));
2457 
2458   CHKERRQ(MatDuplicate(oldmat->A,cpvalues,&a->A));
2459   CHKERRQ(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A));
2460   CHKERRQ(MatSetUpMultiply_MPIDense(mat));
2461 
2462   *newmat = mat;
2463   PetscFunctionReturn(0);
2464 }
2465 
2466 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
2467 {
2468   PetscBool      isbinary;
2469 #if defined(PETSC_HAVE_HDF5)
2470   PetscBool      ishdf5;
2471 #endif
2472 
2473   PetscFunctionBegin;
2474   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
2475   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
2476   /* force binary viewer to load .info file if it has not yet done so */
2477   CHKERRQ(PetscViewerSetUp(viewer));
2478   CHKERRQ(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
2479 #if defined(PETSC_HAVE_HDF5)
2480   CHKERRQ(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5));
2481 #endif
2482   if (isbinary) {
2483     CHKERRQ(MatLoad_Dense_Binary(newMat,viewer));
2484 #if defined(PETSC_HAVE_HDF5)
2485   } else if (ishdf5) {
2486     CHKERRQ(MatLoad_Dense_HDF5(newMat,viewer));
2487 #endif
2488   } else SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
2489   PetscFunctionReturn(0);
2490 }
2491 
2492 static PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool  *flag)
2493 {
2494   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
2495   Mat            a,b;
2496   PetscBool      flg;
2497 
2498   PetscFunctionBegin;
2499   a    = matA->A;
2500   b    = matB->A;
2501   CHKERRQ(MatEqual(a,b,&flg));
2502   CHKERRMPI(MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A)));
2503   PetscFunctionReturn(0);
2504 }
2505 
2506 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data)
2507 {
2508   Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data;
2509 
2510   PetscFunctionBegin;
2511   CHKERRQ(PetscFree2(atb->sendbuf,atb->recvcounts));
2512   CHKERRQ(MatDestroy(&atb->atb));
2513   CHKERRQ(PetscFree(atb));
2514   PetscFunctionReturn(0);
2515 }
2516 
2517 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data)
2518 {
2519   Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data;
2520 
2521   PetscFunctionBegin;
2522   CHKERRQ(PetscFree2(abt->buf[0],abt->buf[1]));
2523   CHKERRQ(PetscFree2(abt->recvcounts,abt->recvdispls));
2524   CHKERRQ(PetscFree(abt));
2525   PetscFunctionReturn(0);
2526 }
2527 
2528 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2529 {
2530   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2531   Mat_TransMatMultDense *atb;
2532   MPI_Comm              comm;
2533   PetscMPIInt           size,*recvcounts;
2534   PetscScalar           *carray,*sendbuf;
2535   const PetscScalar     *atbarray;
2536   PetscInt              i,cN=C->cmap->N,proc,k,j,lda;
2537   const PetscInt        *ranges;
2538 
2539   PetscFunctionBegin;
2540   MatCheckProduct(C,3);
2541   PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2542   atb = (Mat_TransMatMultDense *)C->product->data;
2543   recvcounts = atb->recvcounts;
2544   sendbuf = atb->sendbuf;
2545 
2546   CHKERRQ(PetscObjectGetComm((PetscObject)A,&comm));
2547   CHKERRMPI(MPI_Comm_size(comm,&size));
2548 
2549   /* compute atbarray = aseq^T * bseq */
2550   CHKERRQ(MatTransposeMatMult(a->A,b->A,atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX,PETSC_DEFAULT,&atb->atb));
2551 
2552   CHKERRQ(MatGetOwnershipRanges(C,&ranges));
2553 
2554   /* arrange atbarray into sendbuf */
2555   CHKERRQ(MatDenseGetArrayRead(atb->atb,&atbarray));
2556   CHKERRQ(MatDenseGetLDA(atb->atb,&lda));
2557   for (proc=0, k=0; proc<size; proc++) {
2558     for (j=0; j<cN; j++) {
2559       for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*lda];
2560     }
2561   }
2562   CHKERRQ(MatDenseRestoreArrayRead(atb->atb,&atbarray));
2563 
2564   /* sum all atbarray to local values of C */
2565   CHKERRQ(MatDenseGetArrayWrite(c->A,&carray));
2566   CHKERRMPI(MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm));
2567   CHKERRQ(MatDenseRestoreArrayWrite(c->A,&carray));
2568   CHKERRQ(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY));
2569   CHKERRQ(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY));
2570   PetscFunctionReturn(0);
2571 }
2572 
2573 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)
2574 {
2575   MPI_Comm              comm;
2576   PetscMPIInt           size;
2577   PetscInt              cm=A->cmap->n,cM,cN=B->cmap->N;
2578   Mat_TransMatMultDense *atb;
2579   PetscBool             cisdense;
2580   PetscInt              i;
2581   const PetscInt        *ranges;
2582 
2583   PetscFunctionBegin;
2584   MatCheckProduct(C,3);
2585   PetscCheck(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2586   CHKERRQ(PetscObjectGetComm((PetscObject)A,&comm));
2587   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
2588     SETERRQ(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2589   }
2590 
2591   /* create matrix product C */
2592   CHKERRQ(MatSetSizes(C,cm,B->cmap->n,A->cmap->N,B->cmap->N));
2593   CHKERRQ(PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,""));
2594   if (!cisdense) {
2595     CHKERRQ(MatSetType(C,((PetscObject)A)->type_name));
2596   }
2597   CHKERRQ(MatSetUp(C));
2598 
2599   /* create data structure for reuse C */
2600   CHKERRMPI(MPI_Comm_size(comm,&size));
2601   CHKERRQ(PetscNew(&atb));
2602   cM   = C->rmap->N;
2603   CHKERRQ(PetscMalloc2(cM*cN,&atb->sendbuf,size,&atb->recvcounts));
2604   CHKERRQ(MatGetOwnershipRanges(C,&ranges));
2605   for (i=0; i<size; i++) atb->recvcounts[i] = (ranges[i+1] - ranges[i])*cN;
2606 
2607   C->product->data    = atb;
2608   C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
2609   PetscFunctionReturn(0);
2610 }
2611 
2612 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2613 {
2614   MPI_Comm              comm;
2615   PetscMPIInt           i, size;
2616   PetscInt              maxRows, bufsiz;
2617   PetscMPIInt           tag;
2618   PetscInt              alg;
2619   Mat_MatTransMultDense *abt;
2620   Mat_Product           *product = C->product;
2621   PetscBool             flg;
2622 
2623   PetscFunctionBegin;
2624   MatCheckProduct(C,4);
2625   PetscCheck(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2626   /* check local size of A and B */
2627   PetscCheck(A->cmap->n == B->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")",A->cmap->n,B->cmap->n);
2628 
2629   CHKERRQ(PetscStrcmp(product->alg,"allgatherv",&flg));
2630   alg  = flg ? 0 : 1;
2631 
2632   /* setup matrix product C */
2633   CHKERRQ(MatSetSizes(C,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N));
2634   CHKERRQ(MatSetType(C,MATMPIDENSE));
2635   CHKERRQ(MatSetUp(C));
2636   CHKERRQ(PetscObjectGetNewTag((PetscObject)C,&tag));
2637 
2638   /* create data structure for reuse C */
2639   CHKERRQ(PetscObjectGetComm((PetscObject)C,&comm));
2640   CHKERRMPI(MPI_Comm_size(comm,&size));
2641   CHKERRQ(PetscNew(&abt));
2642   abt->tag = tag;
2643   abt->alg = alg;
2644   switch (alg) {
2645   case 1: /* alg: "cyclic" */
2646     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i]));
2647     bufsiz = A->cmap->N * maxRows;
2648     CHKERRQ(PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1])));
2649     break;
2650   default: /* alg: "allgatherv" */
2651     CHKERRQ(PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1])));
2652     CHKERRQ(PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls)));
2653     for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N;
2654     for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i];
2655     break;
2656   }
2657 
2658   C->product->data    = abt;
2659   C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
2660   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense;
2661   PetscFunctionReturn(0);
2662 }
2663 
2664 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
2665 {
2666   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2667   Mat_MatTransMultDense *abt;
2668   MPI_Comm              comm;
2669   PetscMPIInt           rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom;
2670   PetscScalar           *sendbuf, *recvbuf=NULL, *cv;
2671   PetscInt              i,cK=A->cmap->N,k,j,bn;
2672   PetscScalar           _DOne=1.0,_DZero=0.0;
2673   const PetscScalar     *av,*bv;
2674   PetscBLASInt          cm, cn, ck, alda, blda = 0, clda;
2675   MPI_Request           reqs[2];
2676   const PetscInt        *ranges;
2677 
2678   PetscFunctionBegin;
2679   MatCheckProduct(C,3);
2680   PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2681   abt  = (Mat_MatTransMultDense*)C->product->data;
2682   CHKERRQ(PetscObjectGetComm((PetscObject)C,&comm));
2683   CHKERRMPI(MPI_Comm_rank(comm,&rank));
2684   CHKERRMPI(MPI_Comm_size(comm,&size));
2685   CHKERRQ(MatDenseGetArrayRead(a->A,&av));
2686   CHKERRQ(MatDenseGetArrayRead(b->A,&bv));
2687   CHKERRQ(MatDenseGetArrayWrite(c->A,&cv));
2688   CHKERRQ(MatDenseGetLDA(a->A,&i));
2689   CHKERRQ(PetscBLASIntCast(i,&alda));
2690   CHKERRQ(MatDenseGetLDA(b->A,&i));
2691   CHKERRQ(PetscBLASIntCast(i,&blda));
2692   CHKERRQ(MatDenseGetLDA(c->A,&i));
2693   CHKERRQ(PetscBLASIntCast(i,&clda));
2694   CHKERRQ(MatGetOwnershipRanges(B,&ranges));
2695   bn   = B->rmap->n;
2696   if (blda == bn) {
2697     sendbuf = (PetscScalar*)bv;
2698   } else {
2699     sendbuf = abt->buf[0];
2700     for (k = 0, i = 0; i < cK; i++) {
2701       for (j = 0; j < bn; j++, k++) {
2702         sendbuf[k] = bv[i * blda + j];
2703       }
2704     }
2705   }
2706   if (size > 1) {
2707     sendto = (rank + size - 1) % size;
2708     recvfrom = (rank + size + 1) % size;
2709   } else {
2710     sendto = recvfrom = 0;
2711   }
2712   CHKERRQ(PetscBLASIntCast(cK,&ck));
2713   CHKERRQ(PetscBLASIntCast(c->A->rmap->n,&cm));
2714   recvisfrom = rank;
2715   for (i = 0; i < size; i++) {
2716     /* we have finished receiving in sending, bufs can be read/modified */
2717     PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2718     PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
2719 
2720     if (nextrecvisfrom != rank) {
2721       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2722       sendsiz = cK * bn;
2723       recvsiz = cK * nextbn;
2724       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
2725       CHKERRMPI(MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]));
2726       CHKERRMPI(MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]));
2727     }
2728 
2729     /* local aseq * sendbuf^T */
2730     CHKERRQ(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn));
2731     if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,av,&alda,sendbuf,&cn,&_DZero,cv + clda * ranges[recvisfrom],&clda));
2732 
2733     if (nextrecvisfrom != rank) {
2734       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
2735       CHKERRMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE));
2736     }
2737     bn = nextbn;
2738     recvisfrom = nextrecvisfrom;
2739     sendbuf = recvbuf;
2740   }
2741   CHKERRQ(MatDenseRestoreArrayRead(a->A,&av));
2742   CHKERRQ(MatDenseRestoreArrayRead(b->A,&bv));
2743   CHKERRQ(MatDenseRestoreArrayWrite(c->A,&cv));
2744   CHKERRQ(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY));
2745   CHKERRQ(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY));
2746   PetscFunctionReturn(0);
2747 }
2748 
2749 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
2750 {
2751   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2752   Mat_MatTransMultDense *abt;
2753   MPI_Comm              comm;
2754   PetscMPIInt           size;
2755   PetscScalar           *cv, *sendbuf, *recvbuf;
2756   const PetscScalar     *av,*bv;
2757   PetscInt              blda,i,cK=A->cmap->N,k,j,bn;
2758   PetscScalar           _DOne=1.0,_DZero=0.0;
2759   PetscBLASInt          cm, cn, ck, alda, clda;
2760 
2761   PetscFunctionBegin;
2762   MatCheckProduct(C,3);
2763   PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2764   abt  = (Mat_MatTransMultDense*)C->product->data;
2765   CHKERRQ(PetscObjectGetComm((PetscObject)A,&comm));
2766   CHKERRMPI(MPI_Comm_size(comm,&size));
2767   CHKERRQ(MatDenseGetArrayRead(a->A,&av));
2768   CHKERRQ(MatDenseGetArrayRead(b->A,&bv));
2769   CHKERRQ(MatDenseGetArrayWrite(c->A,&cv));
2770   CHKERRQ(MatDenseGetLDA(a->A,&i));
2771   CHKERRQ(PetscBLASIntCast(i,&alda));
2772   CHKERRQ(MatDenseGetLDA(b->A,&blda));
2773   CHKERRQ(MatDenseGetLDA(c->A,&i));
2774   CHKERRQ(PetscBLASIntCast(i,&clda));
2775   /* copy transpose of B into buf[0] */
2776   bn      = B->rmap->n;
2777   sendbuf = abt->buf[0];
2778   recvbuf = abt->buf[1];
2779   for (k = 0, j = 0; j < bn; j++) {
2780     for (i = 0; i < cK; i++, k++) {
2781       sendbuf[k] = bv[i * blda + j];
2782     }
2783   }
2784   CHKERRQ(MatDenseRestoreArrayRead(b->A,&bv));
2785   CHKERRMPI(MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm));
2786   CHKERRQ(PetscBLASIntCast(cK,&ck));
2787   CHKERRQ(PetscBLASIntCast(c->A->rmap->n,&cm));
2788   CHKERRQ(PetscBLASIntCast(c->A->cmap->n,&cn));
2789   if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,av,&alda,recvbuf,&ck,&_DZero,cv,&clda));
2790   CHKERRQ(MatDenseRestoreArrayRead(a->A,&av));
2791   CHKERRQ(MatDenseRestoreArrayRead(b->A,&bv));
2792   CHKERRQ(MatDenseRestoreArrayWrite(c->A,&cv));
2793   CHKERRQ(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY));
2794   CHKERRQ(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY));
2795   PetscFunctionReturn(0);
2796 }
2797 
2798 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2799 {
2800   Mat_MatTransMultDense *abt;
2801 
2802   PetscFunctionBegin;
2803   MatCheckProduct(C,3);
2804   PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2805   abt = (Mat_MatTransMultDense*)C->product->data;
2806   switch (abt->alg) {
2807   case 1:
2808     CHKERRQ(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C));
2809     break;
2810   default:
2811     CHKERRQ(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C));
2812     break;
2813   }
2814   PetscFunctionReturn(0);
2815 }
2816 
2817 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data)
2818 {
2819   Mat_MatMultDense *ab = (Mat_MatMultDense*)data;
2820 
2821   PetscFunctionBegin;
2822   CHKERRQ(MatDestroy(&ab->Ce));
2823   CHKERRQ(MatDestroy(&ab->Ae));
2824   CHKERRQ(MatDestroy(&ab->Be));
2825   CHKERRQ(PetscFree(ab));
2826   PetscFunctionReturn(0);
2827 }
2828 
2829 #if defined(PETSC_HAVE_ELEMENTAL)
2830 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2831 {
2832   Mat_MatMultDense *ab;
2833 
2834   PetscFunctionBegin;
2835   MatCheckProduct(C,3);
2836   PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Missing product data");
2837   ab   = (Mat_MatMultDense*)C->product->data;
2838   CHKERRQ(MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae));
2839   CHKERRQ(MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be));
2840   CHKERRQ(MatMatMultNumeric_Elemental(ab->Ae,ab->Be,ab->Ce));
2841   CHKERRQ(MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C));
2842   PetscFunctionReturn(0);
2843 }
2844 
2845 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)
2846 {
2847   Mat              Ae,Be,Ce;
2848   Mat_MatMultDense *ab;
2849 
2850   PetscFunctionBegin;
2851   MatCheckProduct(C,4);
2852   PetscCheck(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2853   /* check local size of A and B */
2854   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
2855     SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2856   }
2857 
2858   /* create elemental matrices Ae and Be */
2859   CHKERRQ(MatCreate(PetscObjectComm((PetscObject)A), &Ae));
2860   CHKERRQ(MatSetSizes(Ae,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N));
2861   CHKERRQ(MatSetType(Ae,MATELEMENTAL));
2862   CHKERRQ(MatSetUp(Ae));
2863   CHKERRQ(MatSetOption(Ae,MAT_ROW_ORIENTED,PETSC_FALSE));
2864 
2865   CHKERRQ(MatCreate(PetscObjectComm((PetscObject)B), &Be));
2866   CHKERRQ(MatSetSizes(Be,PETSC_DECIDE,PETSC_DECIDE,B->rmap->N,B->cmap->N));
2867   CHKERRQ(MatSetType(Be,MATELEMENTAL));
2868   CHKERRQ(MatSetUp(Be));
2869   CHKERRQ(MatSetOption(Be,MAT_ROW_ORIENTED,PETSC_FALSE));
2870 
2871   /* compute symbolic Ce = Ae*Be */
2872   CHKERRQ(MatCreate(PetscObjectComm((PetscObject)C),&Ce));
2873   CHKERRQ(MatMatMultSymbolic_Elemental(Ae,Be,fill,Ce));
2874 
2875   /* setup C */
2876   CHKERRQ(MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE));
2877   CHKERRQ(MatSetType(C,MATDENSE));
2878   CHKERRQ(MatSetUp(C));
2879 
2880   /* create data structure for reuse Cdense */
2881   CHKERRQ(PetscNew(&ab));
2882   ab->Ae = Ae;
2883   ab->Be = Be;
2884   ab->Ce = Ce;
2885 
2886   C->product->data    = ab;
2887   C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense;
2888   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
2889   PetscFunctionReturn(0);
2890 }
2891 #endif
2892 /* ----------------------------------------------- */
2893 #if defined(PETSC_HAVE_ELEMENTAL)
2894 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C)
2895 {
2896   PetscFunctionBegin;
2897   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense;
2898   C->ops->productsymbolic = MatProductSymbolic_AB;
2899   PetscFunctionReturn(0);
2900 }
2901 #endif
2902 
2903 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C)
2904 {
2905   Mat_Product *product = C->product;
2906   Mat         A = product->A,B=product->B;
2907 
2908   PetscFunctionBegin;
2909   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend)
2910     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2911   C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense;
2912   C->ops->productsymbolic = MatProductSymbolic_AtB;
2913   PetscFunctionReturn(0);
2914 }
2915 
2916 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C)
2917 {
2918   PetscErrorCode ierr;
2919   Mat_Product    *product = C->product;
2920   const char     *algTypes[2] = {"allgatherv","cyclic"};
2921   PetscInt       alg,nalg = 2;
2922   PetscBool      flg = PETSC_FALSE;
2923 
2924   PetscFunctionBegin;
2925   /* Set default algorithm */
2926   alg = 0; /* default is allgatherv */
2927   CHKERRQ(PetscStrcmp(product->alg,"default",&flg));
2928   if (flg) {
2929     CHKERRQ(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]));
2930   }
2931 
2932   /* Get runtime option */
2933   if (product->api_user) {
2934     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatTransposeMult","Mat");CHKERRQ(ierr);
2935     CHKERRQ(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[alg],&alg,&flg));
2936     ierr = PetscOptionsEnd();CHKERRQ(ierr);
2937   } else {
2938     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABt","Mat");CHKERRQ(ierr);
2939     CHKERRQ(PetscOptionsEList("-mat_product_algorithm","Algorithmic approach","MatProduct_ABt",algTypes,nalg,algTypes[alg],&alg,&flg));
2940     ierr = PetscOptionsEnd();CHKERRQ(ierr);
2941   }
2942   if (flg) {
2943     CHKERRQ(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]));
2944   }
2945 
2946   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense;
2947   C->ops->productsymbolic          = MatProductSymbolic_ABt;
2948   PetscFunctionReturn(0);
2949 }
2950 
2951 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C)
2952 {
2953   Mat_Product    *product = C->product;
2954 
2955   PetscFunctionBegin;
2956   switch (product->type) {
2957 #if defined(PETSC_HAVE_ELEMENTAL)
2958   case MATPRODUCT_AB:
2959     CHKERRQ(MatProductSetFromOptions_MPIDense_AB(C));
2960     break;
2961 #endif
2962   case MATPRODUCT_AtB:
2963     CHKERRQ(MatProductSetFromOptions_MPIDense_AtB(C));
2964     break;
2965   case MATPRODUCT_ABt:
2966     CHKERRQ(MatProductSetFromOptions_MPIDense_ABt(C));
2967     break;
2968   default:
2969     break;
2970   }
2971   PetscFunctionReturn(0);
2972 }
2973