xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision ebead697dbf761eb322f829370bbe90b3bd93fa3)
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4 */
5 
6 #include <../src/mat/impls/dense/mpi/mpidense.h>    /*I   "petscmat.h"  I*/
7 #include <../src/mat/impls/aij/mpi/mpiaij.h>
8 #include <petscblaslapack.h>
9 
10 /*@
11 
12       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
13               matrix that represents the operator. For sequential matrices it returns itself.
14 
15     Input Parameter:
16 .      A - the Seq or MPI dense matrix
17 
18     Output Parameter:
19 .      B - the inner matrix
20 
21     Level: intermediate
22 
23 @*/
24 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
25 {
26   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
27   PetscBool      flg;
28 
29   PetscFunctionBegin;
30   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
31   PetscValidPointer(B,2);
32   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A,MATMPIDENSE,&flg));
33   if (flg) *B = mat->A;
34   else {
35     PetscCall(PetscObjectBaseTypeCompare((PetscObject)A,MATSEQDENSE,&flg));
36     PetscCheck(flg,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Not for matrix type %s",((PetscObject)A)->type_name);
37     *B = A;
38   }
39   PetscFunctionReturn(0);
40 }
41 
42 PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s)
43 {
44   Mat_MPIDense   *Amat = (Mat_MPIDense*)A->data;
45   Mat_MPIDense   *Bmat = (Mat_MPIDense*)B->data;
46 
47   PetscFunctionBegin;
48   PetscCall(MatCopy(Amat->A,Bmat->A,s));
49   PetscFunctionReturn(0);
50 }
51 
52 PetscErrorCode MatShift_MPIDense(Mat A,PetscScalar alpha)
53 {
54   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
55   PetscInt       j,lda,rstart = A->rmap->rstart,rend = A->rmap->rend,rend2;
56   PetscScalar    *v;
57 
58   PetscFunctionBegin;
59   PetscCall(MatDenseGetArray(mat->A,&v));
60   PetscCall(MatDenseGetLDA(mat->A,&lda));
61   rend2 = PetscMin(rend,A->cmap->N);
62   if (rend2>rstart) {
63     for (j=rstart; j<rend2; j++) v[j-rstart+j*lda] += alpha;
64     PetscCall(PetscLogFlops(rend2-rstart));
65   }
66   PetscCall(MatDenseRestoreArray(mat->A,&v));
67   PetscFunctionReturn(0);
68 }
69 
70 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
71 {
72   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
73   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
74 
75   PetscFunctionBegin;
76   PetscCheck(row >= rstart && row < rend,PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
77   lrow = row - rstart;
78   PetscCall(MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v));
79   PetscFunctionReturn(0);
80 }
81 
82 PetscErrorCode MatRestoreRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
83 {
84   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
85   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
86 
87   PetscFunctionBegin;
88   PetscCheck(row >= rstart && row < rend,PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
89   lrow = row - rstart;
90   PetscCall(MatRestoreRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v));
91   PetscFunctionReturn(0);
92 }
93 
94 PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
95 {
96   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
97   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
98   PetscScalar    *array;
99   MPI_Comm       comm;
100   PetscBool      flg;
101   Mat            B;
102 
103   PetscFunctionBegin;
104   PetscCall(MatHasCongruentLayouts(A,&flg));
105   PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
106   PetscCall(PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B));
107   if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */
108 
109     PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A,MATSEQDENSECUDA,&flg));
110     PetscCheck(!flg,PETSC_COMM_SELF,PETSC_ERR_SUP,"Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature",MATSEQDENSECUDA);
111     PetscCall(PetscObjectGetComm((PetscObject)(mdn->A),&comm));
112     PetscCall(MatCreate(comm,&B));
113     PetscCall(MatSetSizes(B,m,m,m,m));
114     PetscCall(MatSetType(B,((PetscObject)mdn->A)->type_name));
115     PetscCall(MatDenseGetArrayRead(mdn->A,(const PetscScalar**)&array));
116     PetscCall(MatSeqDenseSetPreallocation(B,array+m*rstart));
117     PetscCall(MatDenseRestoreArrayRead(mdn->A,(const PetscScalar**)&array));
118     PetscCall(PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B));
119     *a   = B;
120     PetscCall(MatDestroy(&B));
121   } else *a = B;
122   PetscFunctionReturn(0);
123 }
124 
125 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
126 {
127   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
128   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
129   PetscBool      roworiented = A->roworiented;
130 
131   PetscFunctionBegin;
132   for (i=0; i<m; i++) {
133     if (idxm[i] < 0) continue;
134     PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
135     if (idxm[i] >= rstart && idxm[i] < rend) {
136       row = idxm[i] - rstart;
137       if (roworiented) {
138         PetscCall(MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv));
139       } else {
140         for (j=0; j<n; j++) {
141           if (idxn[j] < 0) continue;
142           PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
143           PetscCall(MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv));
144         }
145       }
146     } else if (!A->donotstash) {
147       mat->assembled = PETSC_FALSE;
148       if (roworiented) {
149         PetscCall(MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE));
150       } else {
151         PetscCall(MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE));
152       }
153     }
154   }
155   PetscFunctionReturn(0);
156 }
157 
158 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
159 {
160   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
161   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
162 
163   PetscFunctionBegin;
164   for (i=0; i<m; i++) {
165     if (idxm[i] < 0) continue; /* negative row */
166     PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
167     if (idxm[i] >= rstart && idxm[i] < rend) {
168       row = idxm[i] - rstart;
169       for (j=0; j<n; j++) {
170         if (idxn[j] < 0) continue; /* negative column */
171         PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
172         PetscCall(MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j));
173       }
174     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
175   }
176   PetscFunctionReturn(0);
177 }
178 
179 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda)
180 {
181   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
182 
183   PetscFunctionBegin;
184   PetscCall(MatDenseGetLDA(a->A,lda));
185   PetscFunctionReturn(0);
186 }
187 
188 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A,PetscInt lda)
189 {
190   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
191   PetscBool      iscuda;
192 
193   PetscFunctionBegin;
194   if (!a->A) {
195     PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
196     PetscCall(PetscLayoutSetUp(A->rmap));
197     PetscCall(PetscLayoutSetUp(A->cmap));
198     PetscCall(MatCreate(PETSC_COMM_SELF,&a->A));
199     PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->A));
200     PetscCall(MatSetSizes(a->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N));
201     PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda));
202     PetscCall(MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE));
203   }
204   PetscCall(MatDenseSetLDA(a->A,lda));
205   PetscFunctionReturn(0);
206 }
207 
208 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar **array)
209 {
210   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
211 
212   PetscFunctionBegin;
213   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
214   PetscCall(MatDenseGetArray(a->A,array));
215   PetscFunctionReturn(0);
216 }
217 
218 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar **array)
219 {
220   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
221 
222   PetscFunctionBegin;
223   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
224   PetscCall(MatDenseGetArrayRead(a->A,array));
225   PetscFunctionReturn(0);
226 }
227 
228 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A,PetscScalar **array)
229 {
230   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
231 
232   PetscFunctionBegin;
233   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
234   PetscCall(MatDenseGetArrayWrite(a->A,array));
235   PetscFunctionReturn(0);
236 }
237 
238 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar *array)
239 {
240   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
241 
242   PetscFunctionBegin;
243   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
244   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
245   PetscCall(MatDensePlaceArray(a->A,array));
246   PetscFunctionReturn(0);
247 }
248 
249 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
250 {
251   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
252 
253   PetscFunctionBegin;
254   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
255   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
256   PetscCall(MatDenseResetArray(a->A));
257   PetscFunctionReturn(0);
258 }
259 
260 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A,const PetscScalar *array)
261 {
262   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
263 
264   PetscFunctionBegin;
265   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
266   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
267   PetscCall(MatDenseReplaceArray(a->A,array));
268   PetscFunctionReturn(0);
269 }
270 
271 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
272 {
273   Mat_MPIDense      *mat  = (Mat_MPIDense*)A->data,*newmatd;
274   PetscInt          lda,i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
275   const PetscInt    *irow,*icol;
276   const PetscScalar *v;
277   PetscScalar       *bv;
278   Mat               newmat;
279   IS                iscol_local;
280   MPI_Comm          comm_is,comm_mat;
281 
282   PetscFunctionBegin;
283   PetscCall(PetscObjectGetComm((PetscObject)A,&comm_mat));
284   PetscCall(PetscObjectGetComm((PetscObject)iscol,&comm_is));
285   PetscCheck(comm_mat == comm_is,PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator");
286 
287   PetscCall(ISAllGather(iscol,&iscol_local));
288   PetscCall(ISGetIndices(isrow,&irow));
289   PetscCall(ISGetIndices(iscol_local,&icol));
290   PetscCall(ISGetLocalSize(isrow,&nrows));
291   PetscCall(ISGetLocalSize(iscol,&ncols));
292   PetscCall(ISGetSize(iscol,&Ncols)); /* global number of columns, size of iscol_local */
293 
294   /* No parallel redistribution currently supported! Should really check each index set
295      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
296      original matrix! */
297 
298   PetscCall(MatGetLocalSize(A,&nlrows,&nlcols));
299   PetscCall(MatGetOwnershipRange(A,&rstart,&rend));
300 
301   /* Check submatrix call */
302   if (scall == MAT_REUSE_MATRIX) {
303     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
304     /* Really need to test rows and column sizes! */
305     newmat = *B;
306   } else {
307     /* Create and fill new matrix */
308     PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&newmat));
309     PetscCall(MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols));
310     PetscCall(MatSetType(newmat,((PetscObject)A)->type_name));
311     PetscCall(MatMPIDenseSetPreallocation(newmat,NULL));
312   }
313 
314   /* Now extract the data pointers and do the copy, column at a time */
315   newmatd = (Mat_MPIDense*)newmat->data;
316   PetscCall(MatDenseGetArray(newmatd->A,&bv));
317   PetscCall(MatDenseGetArrayRead(mat->A,&v));
318   PetscCall(MatDenseGetLDA(mat->A,&lda));
319   for (i=0; i<Ncols; i++) {
320     const PetscScalar *av = v + lda*icol[i];
321     for (j=0; j<nrows; j++) {
322       *bv++ = av[irow[j] - rstart];
323     }
324   }
325   PetscCall(MatDenseRestoreArrayRead(mat->A,&v));
326   PetscCall(MatDenseRestoreArray(newmatd->A,&bv));
327 
328   /* Assemble the matrices so that the correct flags are set */
329   PetscCall(MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY));
330   PetscCall(MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY));
331 
332   /* Free work space */
333   PetscCall(ISRestoreIndices(isrow,&irow));
334   PetscCall(ISRestoreIndices(iscol_local,&icol));
335   PetscCall(ISDestroy(&iscol_local));
336   *B   = newmat;
337   PetscFunctionReturn(0);
338 }
339 
340 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar **array)
341 {
342   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
343 
344   PetscFunctionBegin;
345   PetscCall(MatDenseRestoreArray(a->A,array));
346   PetscFunctionReturn(0);
347 }
348 
349 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar **array)
350 {
351   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
352 
353   PetscFunctionBegin;
354   PetscCall(MatDenseRestoreArrayRead(a->A,array));
355   PetscFunctionReturn(0);
356 }
357 
358 PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A,PetscScalar **array)
359 {
360   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
361 
362   PetscFunctionBegin;
363   PetscCall(MatDenseRestoreArrayWrite(a->A,array));
364   PetscFunctionReturn(0);
365 }
366 
367 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
368 {
369   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
370   PetscInt       nstash,reallocs;
371 
372   PetscFunctionBegin;
373   if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
374 
375   PetscCall(MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range));
376   PetscCall(MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs));
377   PetscCall(PetscInfo(mdn->A,"Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs));
378   PetscFunctionReturn(0);
379 }
380 
381 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
382 {
383   Mat_MPIDense   *mdn=(Mat_MPIDense*)mat->data;
384   PetscInt       i,*row,*col,flg,j,rstart,ncols;
385   PetscMPIInt    n;
386   PetscScalar    *val;
387 
388   PetscFunctionBegin;
389   if (!mdn->donotstash && !mat->nooffprocentries) {
390     /*  wait on receives */
391     while (1) {
392       PetscCall(MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg));
393       if (!flg) break;
394 
395       for (i=0; i<n;) {
396         /* Now identify the consecutive vals belonging to the same row */
397         for (j=i,rstart=row[j]; j<n; j++) {
398           if (row[j] != rstart) break;
399         }
400         if (j < n) ncols = j-i;
401         else       ncols = n-i;
402         /* Now assemble all these values with a single function call */
403         PetscCall(MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode));
404         i    = j;
405       }
406     }
407     PetscCall(MatStashScatterEnd_Private(&mat->stash));
408   }
409 
410   PetscCall(MatAssemblyBegin(mdn->A,mode));
411   PetscCall(MatAssemblyEnd(mdn->A,mode));
412   PetscFunctionReturn(0);
413 }
414 
415 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
416 {
417   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
418 
419   PetscFunctionBegin;
420   PetscCall(MatZeroEntries(l->A));
421   PetscFunctionReturn(0);
422 }
423 
424 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt n,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
425 {
426   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
427   PetscInt          i,len,*lrows;
428 
429   PetscFunctionBegin;
430   /* get locally owned rows */
431   PetscCall(PetscLayoutMapLocal(A->rmap,n,rows,&len,&lrows,NULL));
432   /* fix right hand side if needed */
433   if (x && b) {
434     const PetscScalar *xx;
435     PetscScalar       *bb;
436 
437     PetscCall(VecGetArrayRead(x, &xx));
438     PetscCall(VecGetArrayWrite(b, &bb));
439     for (i=0;i<len;++i) bb[lrows[i]] = diag*xx[lrows[i]];
440     PetscCall(VecRestoreArrayRead(x, &xx));
441     PetscCall(VecRestoreArrayWrite(b, &bb));
442   }
443   PetscCall(MatZeroRows(l->A,len,lrows,0.0,NULL,NULL));
444   if (diag != 0.0) {
445     Vec d;
446 
447     PetscCall(MatCreateVecs(A,NULL,&d));
448     PetscCall(VecSet(d,diag));
449     PetscCall(MatDiagonalSet(A,d,INSERT_VALUES));
450     PetscCall(VecDestroy(&d));
451   }
452   PetscCall(PetscFree(lrows));
453   PetscFunctionReturn(0);
454 }
455 
456 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec);
457 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec);
458 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec);
459 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec);
460 
461 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
462 {
463   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
464   const PetscScalar *ax;
465   PetscScalar       *ay;
466   PetscMemType      axmtype,aymtype;
467 
468   PetscFunctionBegin;
469   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
470   PetscCall(VecGetArrayReadAndMemType(xx,&ax,&axmtype));
471   PetscCall(VecGetArrayAndMemType(mdn->lvec,&ay,&aymtype));
472   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPI_REPLACE));
473   PetscCall(PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay,MPI_REPLACE));
474   PetscCall(VecRestoreArrayAndMemType(mdn->lvec,&ay));
475   PetscCall(VecRestoreArrayReadAndMemType(xx,&ax));
476   PetscCall((*mdn->A->ops->mult)(mdn->A,mdn->lvec,yy));
477   PetscFunctionReturn(0);
478 }
479 
480 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
481 {
482   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
483   const PetscScalar *ax;
484   PetscScalar       *ay;
485   PetscMemType      axmtype,aymtype;
486 
487   PetscFunctionBegin;
488   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
489   PetscCall(VecGetArrayReadAndMemType(xx,&ax,&axmtype));
490   PetscCall(VecGetArrayAndMemType(mdn->lvec,&ay,&aymtype));
491   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPI_REPLACE));
492   PetscCall(PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay,MPI_REPLACE));
493   PetscCall(VecRestoreArrayAndMemType(mdn->lvec,&ay));
494   PetscCall(VecRestoreArrayReadAndMemType(xx,&ax));
495   PetscCall((*mdn->A->ops->multadd)(mdn->A,mdn->lvec,yy,zz));
496   PetscFunctionReturn(0);
497 }
498 
499 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
500 {
501   Mat_MPIDense      *a = (Mat_MPIDense*)A->data;
502   const PetscScalar *ax;
503   PetscScalar       *ay;
504   PetscMemType      axmtype,aymtype;
505 
506   PetscFunctionBegin;
507   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
508   PetscCall(VecSet(yy,0.0));
509   PetscCall((*a->A->ops->multtranspose)(a->A,xx,a->lvec));
510   PetscCall(VecGetArrayReadAndMemType(a->lvec,&ax,&axmtype));
511   PetscCall(VecGetArrayAndMemType(yy,&ay,&aymtype));
512   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPIU_SUM));
513   PetscCall(PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM));
514   PetscCall(VecRestoreArrayReadAndMemType(a->lvec,&ax));
515   PetscCall(VecRestoreArrayAndMemType(yy,&ay));
516   PetscFunctionReturn(0);
517 }
518 
519 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
520 {
521   Mat_MPIDense      *a = (Mat_MPIDense*)A->data;
522   const PetscScalar *ax;
523   PetscScalar       *ay;
524   PetscMemType      axmtype,aymtype;
525 
526   PetscFunctionBegin;
527   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
528   PetscCall(VecCopy(yy,zz));
529   PetscCall((*a->A->ops->multtranspose)(a->A,xx,a->lvec));
530   PetscCall(VecGetArrayReadAndMemType(a->lvec,&ax,&axmtype));
531   PetscCall(VecGetArrayAndMemType(zz,&ay,&aymtype));
532   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPIU_SUM));
533   PetscCall(PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM));
534   PetscCall(VecRestoreArrayReadAndMemType(a->lvec,&ax));
535   PetscCall(VecRestoreArrayAndMemType(zz,&ay));
536   PetscFunctionReturn(0);
537 }
538 
539 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
540 {
541   Mat_MPIDense      *a    = (Mat_MPIDense*)A->data;
542   PetscInt          lda,len,i,n,m = A->rmap->n,radd;
543   PetscScalar       *x,zero = 0.0;
544   const PetscScalar *av;
545 
546   PetscFunctionBegin;
547   PetscCall(VecSet(v,zero));
548   PetscCall(VecGetArray(v,&x));
549   PetscCall(VecGetSize(v,&n));
550   PetscCheck(n == A->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
551   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
552   radd = A->rmap->rstart*m;
553   PetscCall(MatDenseGetArrayRead(a->A,&av));
554   PetscCall(MatDenseGetLDA(a->A,&lda));
555   for (i=0; i<len; i++) {
556     x[i] = av[radd + i*lda + i];
557   }
558   PetscCall(MatDenseRestoreArrayRead(a->A,&av));
559   PetscCall(VecRestoreArray(v,&x));
560   PetscFunctionReturn(0);
561 }
562 
563 PetscErrorCode MatDestroy_MPIDense(Mat mat)
564 {
565   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
566 
567   PetscFunctionBegin;
568 #if defined(PETSC_USE_LOG)
569   PetscLogObjectState((PetscObject)mat,"Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT,mat->rmap->N,mat->cmap->N);
570 #endif
571   PetscCall(MatStashDestroy_Private(&mat->stash));
572   PetscCheck(!mdn->vecinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
573   PetscCheck(!mdn->matinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
574   PetscCall(MatDestroy(&mdn->A));
575   PetscCall(VecDestroy(&mdn->lvec));
576   PetscCall(PetscSFDestroy(&mdn->Mvctx));
577   PetscCall(VecDestroy(&mdn->cvec));
578   PetscCall(MatDestroy(&mdn->cmat));
579 
580   PetscCall(PetscFree(mat->data));
581   PetscCall(PetscObjectChangeTypeName((PetscObject)mat,NULL));
582 
583   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL));
584   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",NULL));
585   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL));
586   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL));
587   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL));
588   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL));
589   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",NULL));
590   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",NULL));
591   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL));
592   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL));
593   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",NULL));
594   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",NULL));
595   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpiaij_C",NULL));
596 #if defined(PETSC_HAVE_ELEMENTAL)
597   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL));
598 #endif
599 #if defined(PETSC_HAVE_SCALAPACK)
600   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",NULL));
601 #endif
602   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL));
603   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",NULL));
604   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",NULL));
605 #if defined (PETSC_HAVE_CUDA)
606   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",NULL));
607   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",NULL));
608   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpidensecuda_C",NULL));
609   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidensecuda_mpidense_C",NULL));
610   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",NULL));
611   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",NULL));
612   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",NULL));
613   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",NULL));
614   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseCUDAGetArray_C",NULL));
615   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseCUDAGetArrayRead_C",NULL));
616   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseCUDAGetArrayWrite_C",NULL));
617   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseCUDARestoreArray_C",NULL));
618   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseCUDARestoreArrayRead_C",NULL));
619   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseCUDARestoreArrayWrite_C",NULL));
620   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseCUDAPlaceArray_C",NULL));
621   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseCUDAResetArray_C",NULL));
622   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseCUDAReplaceArray_C",NULL));
623 #endif
624   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL));
625   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL));
626   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",NULL));
627   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",NULL));
628   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",NULL));
629   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",NULL));
630   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",NULL));
631   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",NULL));
632   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",NULL));
633   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",NULL));
634 
635   PetscCall(PetscObjectCompose((PetscObject)mat,"DiagonalBlock",NULL));
636   PetscFunctionReturn(0);
637 }
638 
639 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat,PetscViewer);
640 
641 #include <petscdraw.h>
642 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
643 {
644   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
645   PetscMPIInt       rank;
646   PetscViewerType   vtype;
647   PetscBool         iascii,isdraw;
648   PetscViewer       sviewer;
649   PetscViewerFormat format;
650 
651   PetscFunctionBegin;
652   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank));
653   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
654   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw));
655   if (iascii) {
656     PetscCall(PetscViewerGetType(viewer,&vtype));
657     PetscCall(PetscViewerGetFormat(viewer,&format));
658     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
659       MatInfo info;
660       PetscCall(MatGetInfo(mat,MAT_LOCAL,&info));
661       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
662       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory));
663       PetscCall(PetscViewerFlush(viewer));
664       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
665       if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx,viewer));
666       PetscFunctionReturn(0);
667     } else if (format == PETSC_VIEWER_ASCII_INFO) {
668       PetscFunctionReturn(0);
669     }
670   } else if (isdraw) {
671     PetscDraw draw;
672     PetscBool isnull;
673 
674     PetscCall(PetscViewerDrawGetDraw(viewer,0,&draw));
675     PetscCall(PetscDrawIsNull(draw,&isnull));
676     if (isnull) PetscFunctionReturn(0);
677   }
678 
679   {
680     /* assemble the entire matrix onto first processor. */
681     Mat         A;
682     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
683     PetscInt    *cols;
684     PetscScalar *vals;
685 
686     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat),&A));
687     if (rank == 0) {
688       PetscCall(MatSetSizes(A,M,N,M,N));
689     } else {
690       PetscCall(MatSetSizes(A,0,0,M,N));
691     }
692     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
693     PetscCall(MatSetType(A,MATMPIDENSE));
694     PetscCall(MatMPIDenseSetPreallocation(A,NULL));
695     PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)A));
696 
697     /* Copy the matrix ... This isn't the most efficient means,
698        but it's quick for now */
699     A->insertmode = INSERT_VALUES;
700 
701     row = mat->rmap->rstart;
702     m   = mdn->A->rmap->n;
703     for (i=0; i<m; i++) {
704       PetscCall(MatGetRow_MPIDense(mat,row,&nz,&cols,&vals));
705       PetscCall(MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES));
706       PetscCall(MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals));
707       row++;
708     }
709 
710     PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY));
711     PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY));
712     PetscCall(PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer));
713     if (rank == 0) {
714       PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name));
715       PetscCall(MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer));
716     }
717     PetscCall(PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer));
718     PetscCall(PetscViewerFlush(viewer));
719     PetscCall(MatDestroy(&A));
720   }
721   PetscFunctionReturn(0);
722 }
723 
724 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
725 {
726   PetscBool      iascii,isbinary,isdraw,issocket;
727 
728   PetscFunctionBegin;
729   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
730   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
731   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket));
732   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw));
733 
734   if (iascii || issocket || isdraw) {
735     PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat,viewer));
736   } else if (isbinary) PetscCall(MatView_Dense_Binary(mat,viewer));
737   PetscFunctionReturn(0);
738 }
739 
740 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
741 {
742   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
743   Mat            mdn  = mat->A;
744   PetscLogDouble isend[5],irecv[5];
745 
746   PetscFunctionBegin;
747   info->block_size = 1.0;
748 
749   PetscCall(MatGetInfo(mdn,MAT_LOCAL,info));
750 
751   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
752   isend[3] = info->memory;  isend[4] = info->mallocs;
753   if (flag == MAT_LOCAL) {
754     info->nz_used      = isend[0];
755     info->nz_allocated = isend[1];
756     info->nz_unneeded  = isend[2];
757     info->memory       = isend[3];
758     info->mallocs      = isend[4];
759   } else if (flag == MAT_GLOBAL_MAX) {
760     PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)A)));
761 
762     info->nz_used      = irecv[0];
763     info->nz_allocated = irecv[1];
764     info->nz_unneeded  = irecv[2];
765     info->memory       = irecv[3];
766     info->mallocs      = irecv[4];
767   } else if (flag == MAT_GLOBAL_SUM) {
768     PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)A)));
769 
770     info->nz_used      = irecv[0];
771     info->nz_allocated = irecv[1];
772     info->nz_unneeded  = irecv[2];
773     info->memory       = irecv[3];
774     info->mallocs      = irecv[4];
775   }
776   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
777   info->fill_ratio_needed = 0;
778   info->factor_mallocs    = 0;
779   PetscFunctionReturn(0);
780 }
781 
782 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
783 {
784   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
785 
786   PetscFunctionBegin;
787   switch (op) {
788   case MAT_NEW_NONZERO_LOCATIONS:
789   case MAT_NEW_NONZERO_LOCATION_ERR:
790   case MAT_NEW_NONZERO_ALLOCATION_ERR:
791     MatCheckPreallocated(A,1);
792     PetscCall(MatSetOption(a->A,op,flg));
793     break;
794   case MAT_ROW_ORIENTED:
795     MatCheckPreallocated(A,1);
796     a->roworiented = flg;
797     PetscCall(MatSetOption(a->A,op,flg));
798     break;
799   case MAT_FORCE_DIAGONAL_ENTRIES:
800   case MAT_KEEP_NONZERO_PATTERN:
801   case MAT_USE_HASH_TABLE:
802   case MAT_SORTED_FULL:
803     PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op]));
804     break;
805   case MAT_IGNORE_OFF_PROC_ENTRIES:
806     a->donotstash = flg;
807     break;
808   case MAT_SYMMETRIC:
809   case MAT_STRUCTURALLY_SYMMETRIC:
810   case MAT_HERMITIAN:
811   case MAT_SYMMETRY_ETERNAL:
812   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
813   case MAT_SPD:
814   case MAT_IGNORE_LOWER_TRIANGULAR:
815   case MAT_IGNORE_ZERO_ENTRIES:
816   case MAT_SPD_ETERNAL:
817     /* if the diagonal matrix is square it inherits some of the properties above */
818     PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op]));
819     break;
820   default:
821     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
822   }
823   PetscFunctionReturn(0);
824 }
825 
826 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
827 {
828   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
829   const PetscScalar *l;
830   PetscScalar       x,*v,*vv,*r;
831   PetscInt          i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n,lda;
832 
833   PetscFunctionBegin;
834   PetscCall(MatDenseGetArray(mdn->A,&vv));
835   PetscCall(MatDenseGetLDA(mdn->A,&lda));
836   PetscCall(MatGetLocalSize(A,&s2,&s3));
837   if (ll) {
838     PetscCall(VecGetLocalSize(ll,&s2a));
839     PetscCheck(s2a == s2,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2);
840     PetscCall(VecGetArrayRead(ll,&l));
841     for (i=0; i<m; i++) {
842       x = l[i];
843       v = vv + i;
844       for (j=0; j<n; j++) { (*v) *= x; v+= lda;}
845     }
846     PetscCall(VecRestoreArrayRead(ll,&l));
847     PetscCall(PetscLogFlops(1.0*n*m));
848   }
849   if (rr) {
850     const PetscScalar *ar;
851 
852     PetscCall(VecGetLocalSize(rr,&s3a));
853     PetscCheck(s3a == s3,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3);
854     PetscCall(VecGetArrayRead(rr,&ar));
855     if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
856     PetscCall(VecGetArray(mdn->lvec,&r));
857     PetscCall(PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ar,r,MPI_REPLACE));
858     PetscCall(PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ar,r,MPI_REPLACE));
859     PetscCall(VecRestoreArrayRead(rr,&ar));
860     for (i=0; i<n; i++) {
861       x = r[i];
862       v = vv + i*lda;
863       for (j=0; j<m; j++) (*v++) *= x;
864     }
865     PetscCall(VecRestoreArray(mdn->lvec,&r));
866     PetscCall(PetscLogFlops(1.0*n*m));
867   }
868   PetscCall(MatDenseRestoreArray(mdn->A,&vv));
869   PetscFunctionReturn(0);
870 }
871 
872 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
873 {
874   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
875   PetscInt          i,j;
876   PetscMPIInt       size;
877   PetscReal         sum = 0.0;
878   const PetscScalar *av,*v;
879 
880   PetscFunctionBegin;
881   PetscCall(MatDenseGetArrayRead(mdn->A,&av));
882   v    = av;
883   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
884   if (size == 1) {
885     PetscCall(MatNorm(mdn->A,type,nrm));
886   } else {
887     if (type == NORM_FROBENIUS) {
888       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
889         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
890       }
891       PetscCall(MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A)));
892       *nrm = PetscSqrtReal(*nrm);
893       PetscCall(PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n));
894     } else if (type == NORM_1) {
895       PetscReal *tmp,*tmp2;
896       PetscCall(PetscCalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2));
897       *nrm = 0.0;
898       v    = av;
899       for (j=0; j<mdn->A->cmap->n; j++) {
900         for (i=0; i<mdn->A->rmap->n; i++) {
901           tmp[j] += PetscAbsScalar(*v);  v++;
902         }
903       }
904       PetscCall(MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A)));
905       for (j=0; j<A->cmap->N; j++) {
906         if (tmp2[j] > *nrm) *nrm = tmp2[j];
907       }
908       PetscCall(PetscFree2(tmp,tmp2));
909       PetscCall(PetscLogFlops(A->cmap->n*A->rmap->n));
910     } else if (type == NORM_INFINITY) { /* max row norm */
911       PetscReal ntemp;
912       PetscCall(MatNorm(mdn->A,type,&ntemp));
913       PetscCall(MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A)));
914     } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
915   }
916   PetscCall(MatDenseRestoreArrayRead(mdn->A,&av));
917   PetscFunctionReturn(0);
918 }
919 
920 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
921 {
922   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
923   Mat            B;
924   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
925   PetscInt       j,i,lda;
926   PetscScalar    *v;
927 
928   PetscFunctionBegin;
929   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A,*matout));
930   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
931     PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B));
932     PetscCall(MatSetSizes(B,A->cmap->n,A->rmap->n,N,M));
933     PetscCall(MatSetType(B,((PetscObject)A)->type_name));
934     PetscCall(MatMPIDenseSetPreallocation(B,NULL));
935   } else B = *matout;
936 
937   m    = a->A->rmap->n; n = a->A->cmap->n;
938   PetscCall(MatDenseGetArrayRead(a->A,(const PetscScalar**)&v));
939   PetscCall(MatDenseGetLDA(a->A,&lda));
940   PetscCall(PetscMalloc1(m,&rwork));
941   for (i=0; i<m; i++) rwork[i] = rstart + i;
942   for (j=0; j<n; j++) {
943     PetscCall(MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES));
944     v   += lda;
945   }
946   PetscCall(MatDenseRestoreArrayRead(a->A,(const PetscScalar**)&v));
947   PetscCall(PetscFree(rwork));
948   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
949   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
950   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
951     *matout = B;
952   } else {
953     PetscCall(MatHeaderMerge(A,&B));
954   }
955   PetscFunctionReturn(0);
956 }
957 
958 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
959 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
960 
961 PetscErrorCode MatSetUp_MPIDense(Mat A)
962 {
963   PetscFunctionBegin;
964   PetscCall(PetscLayoutSetUp(A->rmap));
965   PetscCall(PetscLayoutSetUp(A->cmap));
966   if (!A->preallocated) {
967     PetscCall(MatMPIDenseSetPreallocation(A,NULL));
968   }
969   PetscFunctionReturn(0);
970 }
971 
972 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
973 {
974   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
975 
976   PetscFunctionBegin;
977   PetscCall(MatAXPY(A->A,alpha,B->A,str));
978   PetscFunctionReturn(0);
979 }
980 
981 PetscErrorCode MatConjugate_MPIDense(Mat mat)
982 {
983   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
984 
985   PetscFunctionBegin;
986   PetscCall(MatConjugate(a->A));
987   PetscFunctionReturn(0);
988 }
989 
990 PetscErrorCode MatRealPart_MPIDense(Mat A)
991 {
992   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
993 
994   PetscFunctionBegin;
995   PetscCall(MatRealPart(a->A));
996   PetscFunctionReturn(0);
997 }
998 
999 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1000 {
1001   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1002 
1003   PetscFunctionBegin;
1004   PetscCall(MatImaginaryPart(a->A));
1005   PetscFunctionReturn(0);
1006 }
1007 
1008 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col)
1009 {
1010   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
1011 
1012   PetscFunctionBegin;
1013   PetscCheck(a->A,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing local matrix");
1014   PetscCheck(a->A->ops->getcolumnvector,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing get column operation");
1015   PetscCall((*a->A->ops->getcolumnvector)(a->A,v,col));
1016   PetscFunctionReturn(0);
1017 }
1018 
1019 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat,PetscInt,PetscReal*);
1020 
1021 PetscErrorCode MatGetColumnReductions_MPIDense(Mat A,PetscInt type,PetscReal *reductions)
1022 {
1023   PetscInt       i,m,n;
1024   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
1025   PetscReal      *work;
1026 
1027   PetscFunctionBegin;
1028   PetscCall(MatGetSize(A,&m,&n));
1029   PetscCall(PetscMalloc1(n,&work));
1030   if (type == REDUCTION_MEAN_REALPART) {
1031     PetscCall(MatGetColumnReductions_SeqDense(a->A,(PetscInt)REDUCTION_SUM_REALPART,work));
1032   } else if (type == REDUCTION_MEAN_IMAGINARYPART) {
1033     PetscCall(MatGetColumnReductions_SeqDense(a->A,(PetscInt)REDUCTION_SUM_IMAGINARYPART,work));
1034   } else {
1035     PetscCall(MatGetColumnReductions_SeqDense(a->A,type,work));
1036   }
1037   if (type == NORM_2) {
1038     for (i=0; i<n; i++) work[i] *= work[i];
1039   }
1040   if (type == NORM_INFINITY) {
1041     PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_MAX,A->hdr.comm));
1042   } else {
1043     PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_SUM,A->hdr.comm));
1044   }
1045   PetscCall(PetscFree(work));
1046   if (type == NORM_2) {
1047     for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
1048   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
1049     for (i=0; i<n; i++) reductions[i] /= m;
1050   }
1051   PetscFunctionReturn(0);
1052 }
1053 
1054 #if defined(PETSC_HAVE_CUDA)
1055 PetscErrorCode MatShift_MPIDenseCUDA(Mat A,PetscScalar alpha)
1056 {
1057   PetscScalar *da;
1058   PetscInt    lda;
1059 
1060   PetscFunctionBegin;
1061   PetscCall(MatDenseCUDAGetArray(A,&da));
1062   PetscCall(MatDenseGetLDA(A,&lda));
1063   PetscCall(PetscInfo(A,"Performing Shift on backend\n"));
1064   PetscCall(MatShift_DenseCUDA_Private(da,alpha,lda,A->rmap->rstart,A->rmap->rend,A->cmap->N));
1065   PetscCall(MatDenseCUDARestoreArray(A,&da));
1066   PetscFunctionReturn(0);
1067 }
1068 
1069 static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1070 {
1071   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1072   PetscInt       lda;
1073 
1074   PetscFunctionBegin;
1075   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1076   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1077   if (!a->cvec) {
1078     PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec));
1079     PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec));
1080   }
1081   a->vecinuse = col + 1;
1082   PetscCall(MatDenseGetLDA(a->A,&lda));
1083   PetscCall(MatDenseCUDAGetArray(a->A,(PetscScalar**)&a->ptrinuse));
1084   PetscCall(VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda));
1085   *v   = a->cvec;
1086   PetscFunctionReturn(0);
1087 }
1088 
1089 static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1090 {
1091   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1092 
1093   PetscFunctionBegin;
1094   PetscCheck(a->vecinuse,PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1095   PetscCheck(a->cvec,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1096   a->vecinuse = 0;
1097   PetscCall(MatDenseCUDARestoreArray(a->A,(PetscScalar**)&a->ptrinuse));
1098   PetscCall(VecCUDAResetArray(a->cvec));
1099   *v   = NULL;
1100   PetscFunctionReturn(0);
1101 }
1102 
1103 static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1104 {
1105   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1106   PetscInt       lda;
1107 
1108   PetscFunctionBegin;
1109   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1110   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1111   if (!a->cvec) {
1112     PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec));
1113     PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec));
1114   }
1115   a->vecinuse = col + 1;
1116   PetscCall(MatDenseGetLDA(a->A,&lda));
1117   PetscCall(MatDenseCUDAGetArrayRead(a->A,&a->ptrinuse));
1118   PetscCall(VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda));
1119   PetscCall(VecLockReadPush(a->cvec));
1120   *v   = a->cvec;
1121   PetscFunctionReturn(0);
1122 }
1123 
1124 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1125 {
1126   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1127 
1128   PetscFunctionBegin;
1129   PetscCheck(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1130   PetscCheck(a->cvec,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1131   a->vecinuse = 0;
1132   PetscCall(MatDenseCUDARestoreArrayRead(a->A,&a->ptrinuse));
1133   PetscCall(VecLockReadPop(a->cvec));
1134   PetscCall(VecCUDAResetArray(a->cvec));
1135   *v   = NULL;
1136   PetscFunctionReturn(0);
1137 }
1138 
1139 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1140 {
1141   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1142   PetscInt       lda;
1143 
1144   PetscFunctionBegin;
1145   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1146   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1147   if (!a->cvec) {
1148     PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec));
1149     PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec));
1150   }
1151   a->vecinuse = col + 1;
1152   PetscCall(MatDenseGetLDA(a->A,&lda));
1153   PetscCall(MatDenseCUDAGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse));
1154   PetscCall(VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda));
1155   *v   = a->cvec;
1156   PetscFunctionReturn(0);
1157 }
1158 
1159 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1160 {
1161   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1162 
1163   PetscFunctionBegin;
1164   PetscCheck(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1165   PetscCheck(a->cvec,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1166   a->vecinuse = 0;
1167   PetscCall(MatDenseCUDARestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse));
1168   PetscCall(VecCUDAResetArray(a->cvec));
1169   *v   = NULL;
1170   PetscFunctionReturn(0);
1171 }
1172 
1173 static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1174 {
1175   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1176 
1177   PetscFunctionBegin;
1178   PetscCheck(!l->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1179   PetscCheck(!l->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1180   PetscCall(MatDenseCUDAPlaceArray(l->A,a));
1181   PetscFunctionReturn(0);
1182 }
1183 
1184 static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A)
1185 {
1186   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1187 
1188   PetscFunctionBegin;
1189   PetscCheck(!l->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1190   PetscCheck(!l->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1191   PetscCall(MatDenseCUDAResetArray(l->A));
1192   PetscFunctionReturn(0);
1193 }
1194 
1195 static PetscErrorCode MatDenseCUDAReplaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1196 {
1197   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1198 
1199   PetscFunctionBegin;
1200   PetscCheck(!l->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1201   PetscCheck(!l->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1202   PetscCall(MatDenseCUDAReplaceArray(l->A,a));
1203   PetscFunctionReturn(0);
1204 }
1205 
1206 static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1207 {
1208   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1209 
1210   PetscFunctionBegin;
1211   PetscCall(MatDenseCUDAGetArrayWrite(l->A,a));
1212   PetscFunctionReturn(0);
1213 }
1214 
1215 static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1216 {
1217   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1218 
1219   PetscFunctionBegin;
1220   PetscCall(MatDenseCUDARestoreArrayWrite(l->A,a));
1221   PetscFunctionReturn(0);
1222 }
1223 
1224 static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1225 {
1226   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1227 
1228   PetscFunctionBegin;
1229   PetscCall(MatDenseCUDAGetArrayRead(l->A,a));
1230   PetscFunctionReturn(0);
1231 }
1232 
1233 static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1234 {
1235   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1236 
1237   PetscFunctionBegin;
1238   PetscCall(MatDenseCUDARestoreArrayRead(l->A,a));
1239   PetscFunctionReturn(0);
1240 }
1241 
1242 static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1243 {
1244   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1245 
1246   PetscFunctionBegin;
1247   PetscCall(MatDenseCUDAGetArray(l->A,a));
1248   PetscFunctionReturn(0);
1249 }
1250 
1251 static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1252 {
1253   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1254 
1255   PetscFunctionBegin;
1256   PetscCall(MatDenseCUDARestoreArray(l->A,a));
1257   PetscFunctionReturn(0);
1258 }
1259 
1260 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat,PetscInt,Vec*);
1261 static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat,PetscInt,Vec*);
1262 static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat,PetscInt,Vec*);
1263 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat,PetscInt,Vec*);
1264 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat,PetscInt,Vec*);
1265 static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat,PetscInt,Vec*);
1266 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat,Mat*);
1267 
1268 static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat,PetscBool bind)
1269 {
1270   Mat_MPIDense   *d = (Mat_MPIDense*)mat->data;
1271 
1272   PetscFunctionBegin;
1273   PetscCheck(!d->vecinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1274   PetscCheck(!d->matinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1275   if (d->A) PetscCall(MatBindToCPU(d->A,bind));
1276   mat->boundtocpu = bind;
1277   if (!bind) {
1278     PetscBool iscuda;
1279 
1280     PetscCall(PetscObjectTypeCompare((PetscObject)d->cvec,VECMPICUDA,&iscuda));
1281     if (!iscuda) {
1282       PetscCall(VecDestroy(&d->cvec));
1283     }
1284     PetscCall(PetscObjectTypeCompare((PetscObject)d->cmat,MATMPIDENSECUDA,&iscuda));
1285     if (!iscuda) {
1286       PetscCall(MatDestroy(&d->cmat));
1287     }
1288     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDenseCUDA));
1289     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDenseCUDA));
1290     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDenseCUDA));
1291     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDenseCUDA));
1292     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDenseCUDA));
1293     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDenseCUDA));
1294     mat->ops->shift                   = MatShift_MPIDenseCUDA;
1295   } else {
1296     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense));
1297     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense));
1298     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense));
1299     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense));
1300     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense));
1301     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense));
1302     mat->ops->shift                   = MatShift_MPIDense;
1303   }
1304   if (d->cmat) PetscCall(MatBindToCPU(d->cmat,bind));
1305   PetscFunctionReturn(0);
1306 }
1307 
1308 PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data)
1309 {
1310   Mat_MPIDense   *d = (Mat_MPIDense*)A->data;
1311   PetscBool      iscuda;
1312 
1313   PetscFunctionBegin;
1314   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1315   PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda));
1316   if (!iscuda) PetscFunctionReturn(0);
1317   PetscCall(PetscLayoutSetUp(A->rmap));
1318   PetscCall(PetscLayoutSetUp(A->cmap));
1319   if (!d->A) {
1320     PetscCall(MatCreate(PETSC_COMM_SELF,&d->A));
1321     PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)d->A));
1322     PetscCall(MatSetSizes(d->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N));
1323   }
1324   PetscCall(MatSetType(d->A,MATSEQDENSECUDA));
1325   PetscCall(MatSeqDenseCUDASetPreallocation(d->A,d_data));
1326   A->preallocated = PETSC_TRUE;
1327   A->assembled = PETSC_TRUE;
1328   PetscFunctionReturn(0);
1329 }
1330 #endif
1331 
1332 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1333 {
1334   Mat_MPIDense   *d = (Mat_MPIDense*)x->data;
1335 
1336   PetscFunctionBegin;
1337   PetscCall(MatSetRandom(d->A,rctx));
1338   PetscFunctionReturn(0);
1339 }
1340 
1341 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool  *missing,PetscInt *d)
1342 {
1343   PetscFunctionBegin;
1344   *missing = PETSC_FALSE;
1345   PetscFunctionReturn(0);
1346 }
1347 
1348 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat);
1349 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);
1350 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat);
1351 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);
1352 static PetscErrorCode MatEqual_MPIDense(Mat,Mat,PetscBool*);
1353 static PetscErrorCode MatLoad_MPIDense(Mat,PetscViewer);
1354 
1355 /* -------------------------------------------------------------------*/
1356 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1357                                         MatGetRow_MPIDense,
1358                                         MatRestoreRow_MPIDense,
1359                                         MatMult_MPIDense,
1360                                 /*  4*/ MatMultAdd_MPIDense,
1361                                         MatMultTranspose_MPIDense,
1362                                         MatMultTransposeAdd_MPIDense,
1363                                         NULL,
1364                                         NULL,
1365                                         NULL,
1366                                 /* 10*/ NULL,
1367                                         NULL,
1368                                         NULL,
1369                                         NULL,
1370                                         MatTranspose_MPIDense,
1371                                 /* 15*/ MatGetInfo_MPIDense,
1372                                         MatEqual_MPIDense,
1373                                         MatGetDiagonal_MPIDense,
1374                                         MatDiagonalScale_MPIDense,
1375                                         MatNorm_MPIDense,
1376                                 /* 20*/ MatAssemblyBegin_MPIDense,
1377                                         MatAssemblyEnd_MPIDense,
1378                                         MatSetOption_MPIDense,
1379                                         MatZeroEntries_MPIDense,
1380                                 /* 24*/ MatZeroRows_MPIDense,
1381                                         NULL,
1382                                         NULL,
1383                                         NULL,
1384                                         NULL,
1385                                 /* 29*/ MatSetUp_MPIDense,
1386                                         NULL,
1387                                         NULL,
1388                                         MatGetDiagonalBlock_MPIDense,
1389                                         NULL,
1390                                 /* 34*/ MatDuplicate_MPIDense,
1391                                         NULL,
1392                                         NULL,
1393                                         NULL,
1394                                         NULL,
1395                                 /* 39*/ MatAXPY_MPIDense,
1396                                         MatCreateSubMatrices_MPIDense,
1397                                         NULL,
1398                                         MatGetValues_MPIDense,
1399                                         MatCopy_MPIDense,
1400                                 /* 44*/ NULL,
1401                                         MatScale_MPIDense,
1402                                         MatShift_MPIDense,
1403                                         NULL,
1404                                         NULL,
1405                                 /* 49*/ MatSetRandom_MPIDense,
1406                                         NULL,
1407                                         NULL,
1408                                         NULL,
1409                                         NULL,
1410                                 /* 54*/ NULL,
1411                                         NULL,
1412                                         NULL,
1413                                         NULL,
1414                                         NULL,
1415                                 /* 59*/ MatCreateSubMatrix_MPIDense,
1416                                         MatDestroy_MPIDense,
1417                                         MatView_MPIDense,
1418                                         NULL,
1419                                         NULL,
1420                                 /* 64*/ NULL,
1421                                         NULL,
1422                                         NULL,
1423                                         NULL,
1424                                         NULL,
1425                                 /* 69*/ NULL,
1426                                         NULL,
1427                                         NULL,
1428                                         NULL,
1429                                         NULL,
1430                                 /* 74*/ NULL,
1431                                         NULL,
1432                                         NULL,
1433                                         NULL,
1434                                         NULL,
1435                                 /* 79*/ NULL,
1436                                         NULL,
1437                                         NULL,
1438                                         NULL,
1439                                 /* 83*/ MatLoad_MPIDense,
1440                                         NULL,
1441                                         NULL,
1442                                         NULL,
1443                                         NULL,
1444                                         NULL,
1445                                 /* 89*/ NULL,
1446                                         NULL,
1447                                         NULL,
1448                                         NULL,
1449                                         NULL,
1450                                 /* 94*/ NULL,
1451                                         NULL,
1452                                         MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1453                                         MatMatTransposeMultNumeric_MPIDense_MPIDense,
1454                                         NULL,
1455                                 /* 99*/ MatProductSetFromOptions_MPIDense,
1456                                         NULL,
1457                                         NULL,
1458                                         MatConjugate_MPIDense,
1459                                         NULL,
1460                                 /*104*/ NULL,
1461                                         MatRealPart_MPIDense,
1462                                         MatImaginaryPart_MPIDense,
1463                                         NULL,
1464                                         NULL,
1465                                 /*109*/ NULL,
1466                                         NULL,
1467                                         NULL,
1468                                         MatGetColumnVector_MPIDense,
1469                                         MatMissingDiagonal_MPIDense,
1470                                 /*114*/ NULL,
1471                                         NULL,
1472                                         NULL,
1473                                         NULL,
1474                                         NULL,
1475                                 /*119*/ NULL,
1476                                         NULL,
1477                                         NULL,
1478                                         NULL,
1479                                         NULL,
1480                                 /*124*/ NULL,
1481                                         MatGetColumnReductions_MPIDense,
1482                                         NULL,
1483                                         NULL,
1484                                         NULL,
1485                                 /*129*/ NULL,
1486                                         NULL,
1487                                         MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1488                                         MatTransposeMatMultNumeric_MPIDense_MPIDense,
1489                                         NULL,
1490                                 /*134*/ NULL,
1491                                         NULL,
1492                                         NULL,
1493                                         NULL,
1494                                         NULL,
1495                                 /*139*/ NULL,
1496                                         NULL,
1497                                         NULL,
1498                                         NULL,
1499                                         NULL,
1500                                         MatCreateMPIMatConcatenateSeqMat_MPIDense,
1501                                 /*145*/ NULL,
1502                                         NULL,
1503                                         NULL,
1504                                         NULL,
1505                                         NULL,
1506                                 /*150*/ NULL
1507 };
1508 
1509 PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1510 {
1511   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
1512   PetscBool      iscuda;
1513 
1514   PetscFunctionBegin;
1515   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1516   PetscCall(PetscLayoutSetUp(mat->rmap));
1517   PetscCall(PetscLayoutSetUp(mat->cmap));
1518   if (!a->A) {
1519     PetscCall(MatCreate(PETSC_COMM_SELF,&a->A));
1520     PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A));
1521     PetscCall(MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N));
1522   }
1523   PetscCall(PetscObjectTypeCompare((PetscObject)mat,MATMPIDENSECUDA,&iscuda));
1524   PetscCall(MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE));
1525   PetscCall(MatSeqDenseSetPreallocation(a->A,data));
1526   mat->preallocated = PETSC_TRUE;
1527   mat->assembled = PETSC_TRUE;
1528   PetscFunctionReturn(0);
1529 }
1530 
1531 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1532 {
1533   Mat            B,C;
1534 
1535   PetscFunctionBegin;
1536   PetscCall(MatMPIAIJGetLocalMat(A,MAT_INITIAL_MATRIX,&C));
1537   PetscCall(MatConvert_SeqAIJ_SeqDense(C,MATSEQDENSE,MAT_INITIAL_MATRIX,&B));
1538   PetscCall(MatDestroy(&C));
1539   if (reuse == MAT_REUSE_MATRIX) {
1540     C = *newmat;
1541   } else C = NULL;
1542   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A),B,A->cmap->n,!C?MAT_INITIAL_MATRIX:MAT_REUSE_MATRIX,&C));
1543   PetscCall(MatDestroy(&B));
1544   if (reuse == MAT_INPLACE_MATRIX) {
1545     PetscCall(MatHeaderReplace(A,&C));
1546   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1547   PetscFunctionReturn(0);
1548 }
1549 
1550 PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1551 {
1552   Mat            B,C;
1553 
1554   PetscFunctionBegin;
1555   PetscCall(MatDenseGetLocalMatrix(A,&C));
1556   PetscCall(MatConvert_SeqDense_SeqAIJ(C,MATSEQAIJ,MAT_INITIAL_MATRIX,&B));
1557   if (reuse == MAT_REUSE_MATRIX) {
1558     C = *newmat;
1559   } else C = NULL;
1560   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A),B,A->cmap->n,!C?MAT_INITIAL_MATRIX:MAT_REUSE_MATRIX,&C));
1561   PetscCall(MatDestroy(&B));
1562   if (reuse == MAT_INPLACE_MATRIX) {
1563     PetscCall(MatHeaderReplace(A,&C));
1564   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1565   PetscFunctionReturn(0);
1566 }
1567 
1568 #if defined(PETSC_HAVE_ELEMENTAL)
1569 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1570 {
1571   Mat            mat_elemental;
1572   PetscScalar    *v;
1573   PetscInt       m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols;
1574 
1575   PetscFunctionBegin;
1576   if (reuse == MAT_REUSE_MATRIX) {
1577     mat_elemental = *newmat;
1578     PetscCall(MatZeroEntries(*newmat));
1579   } else {
1580     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental));
1581     PetscCall(MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N));
1582     PetscCall(MatSetType(mat_elemental,MATELEMENTAL));
1583     PetscCall(MatSetUp(mat_elemental));
1584     PetscCall(MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE));
1585   }
1586 
1587   PetscCall(PetscMalloc2(m,&rows,N,&cols));
1588   for (i=0; i<N; i++) cols[i] = i;
1589   for (i=0; i<m; i++) rows[i] = rstart + i;
1590 
1591   /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1592   PetscCall(MatDenseGetArray(A,&v));
1593   PetscCall(MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES));
1594   PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY));
1595   PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY));
1596   PetscCall(MatDenseRestoreArray(A,&v));
1597   PetscCall(PetscFree2(rows,cols));
1598 
1599   if (reuse == MAT_INPLACE_MATRIX) {
1600     PetscCall(MatHeaderReplace(A,&mat_elemental));
1601   } else {
1602     *newmat = mat_elemental;
1603   }
1604   PetscFunctionReturn(0);
1605 }
1606 #endif
1607 
1608 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals)
1609 {
1610   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1611 
1612   PetscFunctionBegin;
1613   PetscCall(MatDenseGetColumn(mat->A,col,vals));
1614   PetscFunctionReturn(0);
1615 }
1616 
1617 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals)
1618 {
1619   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1620 
1621   PetscFunctionBegin;
1622   PetscCall(MatDenseRestoreColumn(mat->A,vals));
1623   PetscFunctionReturn(0);
1624 }
1625 
1626 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
1627 {
1628   Mat_MPIDense   *mat;
1629   PetscInt       m,nloc,N;
1630 
1631   PetscFunctionBegin;
1632   PetscCall(MatGetSize(inmat,&m,&N));
1633   PetscCall(MatGetLocalSize(inmat,NULL,&nloc));
1634   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1635     PetscInt sum;
1636 
1637     if (n == PETSC_DECIDE) {
1638       PetscCall(PetscSplitOwnership(comm,&n,&N));
1639     }
1640     /* Check sum(n) = N */
1641     PetscCall(MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm));
1642     PetscCheck(sum == N,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT,sum,N);
1643 
1644     PetscCall(MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat));
1645     PetscCall(MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE));
1646   }
1647 
1648   /* numeric phase */
1649   mat = (Mat_MPIDense*)(*outmat)->data;
1650   PetscCall(MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN));
1651   PetscFunctionReturn(0);
1652 }
1653 
1654 #if defined(PETSC_HAVE_CUDA)
1655 PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M,MatType type,MatReuse reuse,Mat *newmat)
1656 {
1657   Mat            B;
1658   Mat_MPIDense   *m;
1659 
1660   PetscFunctionBegin;
1661   if (reuse == MAT_INITIAL_MATRIX) {
1662     PetscCall(MatDuplicate(M,MAT_COPY_VALUES,newmat));
1663   } else if (reuse == MAT_REUSE_MATRIX) {
1664     PetscCall(MatCopy(M,*newmat,SAME_NONZERO_PATTERN));
1665   }
1666 
1667   B    = *newmat;
1668   PetscCall(MatBindToCPU_MPIDenseCUDA(B,PETSC_TRUE));
1669   PetscCall(PetscFree(B->defaultvectype));
1670   PetscCall(PetscStrallocpy(VECSTANDARD,&B->defaultvectype));
1671   PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSE));
1672   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",NULL));
1673   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",NULL));
1674   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",NULL));
1675   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",NULL));
1676   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",NULL));
1677   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",NULL));
1678   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",NULL));
1679   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",NULL));
1680   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",NULL));
1681   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",NULL));
1682   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",NULL));
1683   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",NULL));
1684   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",NULL));
1685   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",NULL));
1686   m    = (Mat_MPIDense*)(B)->data;
1687   if (m->A) {
1688     PetscCall(MatConvert(m->A,MATSEQDENSE,MAT_INPLACE_MATRIX,&m->A));
1689   }
1690   B->ops->bindtocpu = NULL;
1691   B->offloadmask    = PETSC_OFFLOAD_CPU;
1692   PetscFunctionReturn(0);
1693 }
1694 
1695 PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M,MatType type,MatReuse reuse,Mat *newmat)
1696 {
1697   Mat            B;
1698   Mat_MPIDense   *m;
1699 
1700   PetscFunctionBegin;
1701   if (reuse == MAT_INITIAL_MATRIX) {
1702     PetscCall(MatDuplicate(M,MAT_COPY_VALUES,newmat));
1703   } else if (reuse == MAT_REUSE_MATRIX) {
1704     PetscCall(MatCopy(M,*newmat,SAME_NONZERO_PATTERN));
1705   }
1706 
1707   B    = *newmat;
1708   PetscCall(PetscFree(B->defaultvectype));
1709   PetscCall(PetscStrallocpy(VECCUDA,&B->defaultvectype));
1710   PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSECUDA));
1711   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",                    MatConvert_MPIDenseCUDA_MPIDense));
1712   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",        MatProductSetFromOptions_MPIAIJ_MPIDense));
1713   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",MatProductSetFromOptions_MPIAIJ_MPIDense));
1714   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",        MatProductSetFromOptions_MPIDense_MPIAIJ));
1715   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ));
1716   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",                                MatDenseCUDAGetArray_MPIDenseCUDA));
1717   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",                            MatDenseCUDAGetArrayRead_MPIDenseCUDA));
1718   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",                           MatDenseCUDAGetArrayWrite_MPIDenseCUDA));
1719   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",                            MatDenseCUDARestoreArray_MPIDenseCUDA));
1720   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",                        MatDenseCUDARestoreArrayRead_MPIDenseCUDA));
1721   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",                       MatDenseCUDARestoreArrayWrite_MPIDenseCUDA));
1722   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",                              MatDenseCUDAPlaceArray_MPIDenseCUDA));
1723   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",                              MatDenseCUDAResetArray_MPIDenseCUDA));
1724   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",                            MatDenseCUDAReplaceArray_MPIDenseCUDA));
1725   m    = (Mat_MPIDense*)(B->data);
1726   if (m->A) {
1727     PetscCall(MatConvert(m->A,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&m->A));
1728     B->offloadmask = PETSC_OFFLOAD_BOTH;
1729   } else {
1730     B->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
1731   }
1732   PetscCall(MatBindToCPU_MPIDenseCUDA(B,PETSC_FALSE));
1733 
1734   B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA;
1735   PetscFunctionReturn(0);
1736 }
1737 #endif
1738 
1739 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A,PetscInt col,Vec *v)
1740 {
1741   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1742   PetscInt       lda;
1743 
1744   PetscFunctionBegin;
1745   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1746   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1747   if (!a->cvec) {
1748     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec));
1749   }
1750   a->vecinuse = col + 1;
1751   PetscCall(MatDenseGetLDA(a->A,&lda));
1752   PetscCall(MatDenseGetArray(a->A,(PetscScalar**)&a->ptrinuse));
1753   PetscCall(VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda));
1754   *v   = a->cvec;
1755   PetscFunctionReturn(0);
1756 }
1757 
1758 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A,PetscInt col,Vec *v)
1759 {
1760   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1761 
1762   PetscFunctionBegin;
1763   PetscCheck(a->vecinuse,PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1764   PetscCheck(a->cvec,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1765   a->vecinuse = 0;
1766   PetscCall(MatDenseRestoreArray(a->A,(PetscScalar**)&a->ptrinuse));
1767   PetscCall(VecResetArray(a->cvec));
1768   *v   = NULL;
1769   PetscFunctionReturn(0);
1770 }
1771 
1772 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v)
1773 {
1774   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1775   PetscInt       lda;
1776 
1777   PetscFunctionBegin;
1778   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1779   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1780   if (!a->cvec) {
1781     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec));
1782   }
1783   a->vecinuse = col + 1;
1784   PetscCall(MatDenseGetLDA(a->A,&lda));
1785   PetscCall(MatDenseGetArrayRead(a->A,&a->ptrinuse));
1786   PetscCall(VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda));
1787   PetscCall(VecLockReadPush(a->cvec));
1788   *v   = a->cvec;
1789   PetscFunctionReturn(0);
1790 }
1791 
1792 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v)
1793 {
1794   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1795 
1796   PetscFunctionBegin;
1797   PetscCheck(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1798   PetscCheck(a->cvec,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1799   a->vecinuse = 0;
1800   PetscCall(MatDenseRestoreArrayRead(a->A,&a->ptrinuse));
1801   PetscCall(VecLockReadPop(a->cvec));
1802   PetscCall(VecResetArray(a->cvec));
1803   *v   = NULL;
1804   PetscFunctionReturn(0);
1805 }
1806 
1807 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v)
1808 {
1809   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1810   PetscInt       lda;
1811 
1812   PetscFunctionBegin;
1813   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1814   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1815   if (!a->cvec) {
1816     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec));
1817   }
1818   a->vecinuse = col + 1;
1819   PetscCall(MatDenseGetLDA(a->A,&lda));
1820   PetscCall(MatDenseGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse));
1821   PetscCall(VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda));
1822   *v   = a->cvec;
1823   PetscFunctionReturn(0);
1824 }
1825 
1826 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v)
1827 {
1828   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
1829 
1830   PetscFunctionBegin;
1831   PetscCheck(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1832   PetscCheck(a->cvec,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1833   a->vecinuse = 0;
1834   PetscCall(MatDenseRestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse));
1835   PetscCall(VecResetArray(a->cvec));
1836   *v   = NULL;
1837   PetscFunctionReturn(0);
1838 }
1839 
1840 PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A,PetscInt rbegin,PetscInt rend,PetscInt cbegin,PetscInt cend,Mat *v)
1841 {
1842   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
1843   Mat_MPIDense *c;
1844   MPI_Comm     comm;
1845   PetscInt     pbegin, pend;
1846 
1847   PetscFunctionBegin;
1848   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
1849   PetscCheck(!a->vecinuse,comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1850   PetscCheck(!a->matinuse,comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1851   pbegin = PetscMax(0,PetscMin(A->rmap->rend,rbegin)-A->rmap->rstart);
1852   pend = PetscMin(A->rmap->n,PetscMax(0,rend-A->rmap->rstart));
1853   if (!a->cmat) {
1854     PetscCall(MatCreate(comm,&a->cmat));
1855     PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->cmat));
1856     PetscCall(MatSetType(a->cmat,((PetscObject)A)->type_name));
1857     if (rend-rbegin==A->rmap->N) PetscCall(PetscLayoutReference(A->rmap,&a->cmat->rmap));
1858     else {
1859       PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap,pend-pbegin));
1860       PetscCall(PetscLayoutSetSize(a->cmat->rmap,rend-rbegin));
1861       PetscCall(PetscLayoutSetUp(a->cmat->rmap));
1862     }
1863     PetscCall(PetscLayoutSetSize(a->cmat->cmap,cend-cbegin));
1864     PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1865   } else {
1866     PetscBool same = (PetscBool)(rend-rbegin == a->cmat->rmap->N);
1867     if (same && a->cmat->rmap->N != A->rmap->N) {
1868       same = (PetscBool)(pend-pbegin == a->cmat->rmap->n);
1869       PetscCall(MPIU_Allreduce(MPI_IN_PLACE,&same,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A)));
1870     }
1871     if (!same) {
1872       PetscCall(PetscLayoutDestroy(&a->cmat->rmap));
1873       PetscCall(PetscLayoutCreate(comm,&a->cmat->rmap));
1874       PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap,pend-pbegin));
1875       PetscCall(PetscLayoutSetSize(a->cmat->rmap,rend-rbegin));
1876       PetscCall(PetscLayoutSetUp(a->cmat->rmap));
1877     }
1878     if (cend-cbegin != a->cmat->cmap->N) {
1879       PetscCall(PetscLayoutDestroy(&a->cmat->cmap));
1880       PetscCall(PetscLayoutCreate(comm,&a->cmat->cmap));
1881       PetscCall(PetscLayoutSetSize(a->cmat->cmap,cend-cbegin));
1882       PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1883     }
1884   }
1885   c = (Mat_MPIDense*)a->cmat->data;
1886   PetscCheck(!c->A,comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1887   PetscCall(MatDenseGetSubMatrix(a->A,pbegin,pend,cbegin,cend,&c->A));
1888   a->cmat->preallocated = PETSC_TRUE;
1889   a->cmat->assembled = PETSC_TRUE;
1890   a->matinuse = cbegin + 1;
1891   *v = a->cmat;
1892   PetscFunctionReturn(0);
1893 }
1894 
1895 PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A,Mat *v)
1896 {
1897   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
1898   Mat_MPIDense *c;
1899 
1900   PetscFunctionBegin;
1901   PetscCheck(a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetSubMatrix() first");
1902   PetscCheck(a->cmat,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal matrix");
1903   PetscCheck(*v == a->cmat,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Not the matrix obtained from MatDenseGetSubMatrix()");
1904   a->matinuse = 0;
1905   c = (Mat_MPIDense*)a->cmat->data;
1906   PetscCall(MatDenseRestoreSubMatrix(a->A,&c->A));
1907   *v = NULL;
1908   PetscFunctionReturn(0);
1909 }
1910 
1911 /*MC
1912    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1913 
1914    Options Database Keys:
1915 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions()
1916 
1917   Level: beginner
1918 
1919 .seealso: `MatCreateDense()`
1920 
1921 M*/
1922 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1923 {
1924   Mat_MPIDense   *a;
1925 
1926   PetscFunctionBegin;
1927   PetscCall(PetscNewLog(mat,&a));
1928   mat->data = (void*)a;
1929   PetscCall(PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps)));
1930 
1931   mat->insertmode = NOT_SET_VALUES;
1932 
1933   /* build cache for off array entries formed */
1934   a->donotstash = PETSC_FALSE;
1935 
1936   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash));
1937 
1938   /* stuff used for matrix vector multiply */
1939   a->lvec        = NULL;
1940   a->Mvctx       = NULL;
1941   a->roworiented = PETSC_TRUE;
1942 
1943   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense));
1944   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",MatDenseSetLDA_MPIDense));
1945   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense));
1946   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense));
1947   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense));
1948   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense));
1949   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",MatDenseGetArrayWrite_MPIDense));
1950   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",MatDenseRestoreArrayWrite_MPIDense));
1951   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense));
1952   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense));
1953   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",MatDenseReplaceArray_MPIDense));
1954   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense));
1955   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense));
1956   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense));
1957   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense));
1958   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense));
1959   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense));
1960   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",MatDenseGetSubMatrix_MPIDense));
1961   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",MatDenseRestoreSubMatrix_MPIDense));
1962   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",MatConvert_MPIAIJ_MPIDense));
1963   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpiaij_C",MatConvert_MPIDense_MPIAIJ));
1964 #if defined(PETSC_HAVE_ELEMENTAL)
1965   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental));
1966 #endif
1967 #if defined(PETSC_HAVE_SCALAPACK)
1968   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",MatConvert_Dense_ScaLAPACK));
1969 #endif
1970 #if defined(PETSC_HAVE_CUDA)
1971   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpidensecuda_C",MatConvert_MPIDense_MPIDenseCUDA));
1972 #endif
1973   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense));
1974   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense));
1975   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",MatProductSetFromOptions_MPIDense_MPIAIJ));
1976 #if defined(PETSC_HAVE_CUDA)
1977   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense));
1978   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ));
1979 #endif
1980 
1981   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense));
1982   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense));
1983   PetscCall(PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE));
1984   PetscFunctionReturn(0);
1985 }
1986 
1987 /*MC
1988    MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs.
1989 
1990    Options Database Keys:
1991 . -mat_type mpidensecuda - sets the matrix type to "mpidensecuda" during a call to MatSetFromOptions()
1992 
1993   Level: beginner
1994 
1995 .seealso:
1996 
1997 M*/
1998 #if defined(PETSC_HAVE_CUDA)
1999 #include <petsc/private/deviceimpl.h>
2000 PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B)
2001 {
2002   PetscFunctionBegin;
2003   PetscCall(PetscDeviceInitialize(PETSC_DEVICE_CUDA));
2004   PetscCall(MatCreate_MPIDense(B));
2005   PetscCall(MatConvert_MPIDense_MPIDenseCUDA(B,MATMPIDENSECUDA,MAT_INPLACE_MATRIX,&B));
2006   PetscFunctionReturn(0);
2007 }
2008 #endif
2009 
2010 /*MC
2011    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
2012 
2013    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
2014    and MATMPIDENSE otherwise.
2015 
2016    Options Database Keys:
2017 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
2018 
2019   Level: beginner
2020 
2021 .seealso: `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA`
2022 M*/
2023 
2024 /*MC
2025    MATDENSECUDA - MATDENSECUDA = "densecuda" - A matrix type to be used for dense matrices on GPUs.
2026 
2027    This matrix type is identical to MATSEQDENSECUDA when constructed with a single process communicator,
2028    and MATMPIDENSECUDA otherwise.
2029 
2030    Options Database Keys:
2031 . -mat_type densecuda - sets the matrix type to "densecuda" during a call to MatSetFromOptions()
2032 
2033   Level: beginner
2034 
2035 .seealso: `MATSEQDENSECUDA`, `MATMPIDENSECUDA`, `MATDENSE`
2036 M*/
2037 
2038 /*@C
2039    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
2040 
2041    Collective
2042 
2043    Input Parameters:
2044 .  B - the matrix
2045 -  data - optional location of matrix data.  Set data=NULL for PETSc
2046    to control all matrix memory allocation.
2047 
2048    Notes:
2049    The dense format is fully compatible with standard Fortran 77
2050    storage by columns.
2051 
2052    The data input variable is intended primarily for Fortran programmers
2053    who wish to allocate their own matrix memory space.  Most users should
2054    set data=NULL.
2055 
2056    Level: intermediate
2057 
2058 .seealso: `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
2059 @*/
2060 PetscErrorCode  MatMPIDenseSetPreallocation(Mat B,PetscScalar *data)
2061 {
2062   PetscFunctionBegin;
2063   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2064   PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));
2065   PetscFunctionReturn(0);
2066 }
2067 
2068 /*@
2069    MatDensePlaceArray - Allows one to replace the array in a dense matrix with an
2070    array provided by the user. This is useful to avoid copying an array
2071    into a matrix
2072 
2073    Not Collective
2074 
2075    Input Parameters:
2076 +  mat - the matrix
2077 -  array - the array in column major order
2078 
2079    Notes:
2080    You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be
2081    freed when the matrix is destroyed.
2082 
2083    Level: developer
2084 
2085 .seealso: `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`
2086 
2087 @*/
2088 PetscErrorCode  MatDensePlaceArray(Mat mat,const PetscScalar *array)
2089 {
2090   PetscFunctionBegin;
2091   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2092   PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));
2093   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2094 #if defined(PETSC_HAVE_CUDA)
2095   mat->offloadmask = PETSC_OFFLOAD_CPU;
2096 #endif
2097   PetscFunctionReturn(0);
2098 }
2099 
2100 /*@
2101    MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray()
2102 
2103    Not Collective
2104 
2105    Input Parameters:
2106 .  mat - the matrix
2107 
2108    Notes:
2109    You can only call this after a call to MatDensePlaceArray()
2110 
2111    Level: developer
2112 
2113 .seealso: `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`
2114 
2115 @*/
2116 PetscErrorCode  MatDenseResetArray(Mat mat)
2117 {
2118   PetscFunctionBegin;
2119   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2120   PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));
2121   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2122   PetscFunctionReturn(0);
2123 }
2124 
2125 /*@
2126    MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an
2127    array provided by the user. This is useful to avoid copying an array
2128    into a matrix
2129 
2130    Not Collective
2131 
2132    Input Parameters:
2133 +  mat - the matrix
2134 -  array - the array in column major order
2135 
2136    Notes:
2137    The memory passed in MUST be obtained with PetscMalloc() and CANNOT be
2138    freed by the user. It will be freed when the matrix is destroyed.
2139 
2140    Level: developer
2141 
2142 .seealso: `MatDenseGetArray()`, `VecReplaceArray()`
2143 @*/
2144 PetscErrorCode  MatDenseReplaceArray(Mat mat,const PetscScalar *array)
2145 {
2146   PetscFunctionBegin;
2147   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2148   PetscUseMethod(mat,"MatDenseReplaceArray_C",(Mat,const PetscScalar*),(mat,array));
2149   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2150 #if defined(PETSC_HAVE_CUDA)
2151   mat->offloadmask = PETSC_OFFLOAD_CPU;
2152 #endif
2153   PetscFunctionReturn(0);
2154 }
2155 
2156 #if defined(PETSC_HAVE_CUDA)
2157 /*@C
2158    MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a dense matrix with an
2159    array provided by the user. This is useful to avoid copying an array
2160    into a matrix
2161 
2162    Not Collective
2163 
2164    Input Parameters:
2165 +  mat - the matrix
2166 -  array - the array in column major order
2167 
2168    Notes:
2169    You can return to the original array with a call to MatDenseCUDAResetArray(). The user is responsible for freeing this array; it will not be
2170    freed when the matrix is destroyed. The array must have been allocated with cudaMalloc().
2171 
2172    Level: developer
2173 
2174 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDAResetArray()`
2175 @*/
2176 PetscErrorCode  MatDenseCUDAPlaceArray(Mat mat,const PetscScalar *array)
2177 {
2178   PetscFunctionBegin;
2179   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2180   PetscUseMethod(mat,"MatDenseCUDAPlaceArray_C",(Mat,const PetscScalar*),(mat,array));
2181   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2182   mat->offloadmask = PETSC_OFFLOAD_GPU;
2183   PetscFunctionReturn(0);
2184 }
2185 
2186 /*@C
2187    MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to MatDenseCUDAPlaceArray()
2188 
2189    Not Collective
2190 
2191    Input Parameters:
2192 .  mat - the matrix
2193 
2194    Notes:
2195    You can only call this after a call to MatDenseCUDAPlaceArray()
2196 
2197    Level: developer
2198 
2199 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()`
2200 
2201 @*/
2202 PetscErrorCode  MatDenseCUDAResetArray(Mat mat)
2203 {
2204   PetscFunctionBegin;
2205   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2206   PetscUseMethod(mat,"MatDenseCUDAResetArray_C",(Mat),(mat));
2207   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2208   PetscFunctionReturn(0);
2209 }
2210 
2211 /*@C
2212    MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a dense matrix with an
2213    array provided by the user. This is useful to avoid copying an array
2214    into a matrix
2215 
2216    Not Collective
2217 
2218    Input Parameters:
2219 +  mat - the matrix
2220 -  array - the array in column major order
2221 
2222    Notes:
2223    This permanently replaces the GPU array and frees the memory associated with the old GPU array.
2224    The memory passed in CANNOT be freed by the user. It will be freed
2225    when the matrix is destroyed. The array should respect the matrix leading dimension.
2226 
2227    Level: developer
2228 
2229 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()`, `MatDenseCUDAResetArray()`
2230 @*/
2231 PetscErrorCode  MatDenseCUDAReplaceArray(Mat mat,const PetscScalar *array)
2232 {
2233   PetscFunctionBegin;
2234   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2235   PetscUseMethod(mat,"MatDenseCUDAReplaceArray_C",(Mat,const PetscScalar*),(mat,array));
2236   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2237   mat->offloadmask = PETSC_OFFLOAD_GPU;
2238   PetscFunctionReturn(0);
2239 }
2240 
2241 /*@C
2242    MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a dense matrix.
2243 
2244    Not Collective
2245 
2246    Input Parameters:
2247 .  A - the matrix
2248 
2249    Output Parameters
2250 .  array - the GPU array in column major order
2251 
2252    Notes:
2253    The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use MatDenseCUDAGetArray(). The array must be restored with MatDenseCUDARestoreArrayWrite() when no longer needed.
2254 
2255    Level: developer
2256 
2257 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayRead()`, `MatDenseCUDARestoreArrayRead()`
2258 @*/
2259 PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a)
2260 {
2261   PetscFunctionBegin;
2262   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2263   PetscUseMethod(A,"MatDenseCUDAGetArrayWrite_C",(Mat,PetscScalar**),(A,a));
2264   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2265   PetscFunctionReturn(0);
2266 }
2267 
2268 /*@C
2269    MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArrayWrite().
2270 
2271    Not Collective
2272 
2273    Input Parameters:
2274 +  A - the matrix
2275 -  array - the GPU array in column major order
2276 
2277    Notes:
2278 
2279    Level: developer
2280 
2281 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()`
2282 @*/
2283 PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a)
2284 {
2285   PetscFunctionBegin;
2286   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2287   PetscUseMethod(A,"MatDenseCUDARestoreArrayWrite_C",(Mat,PetscScalar**),(A,a));
2288   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2289   A->offloadmask = PETSC_OFFLOAD_GPU;
2290   PetscFunctionReturn(0);
2291 }
2292 
2293 /*@C
2294    MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArrayRead() when no longer needed.
2295 
2296    Not Collective
2297 
2298    Input Parameters:
2299 .  A - the matrix
2300 
2301    Output Parameters
2302 .  array - the GPU array in column major order
2303 
2304    Notes:
2305    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite().
2306 
2307    Level: developer
2308 
2309 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`
2310 @*/
2311 PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a)
2312 {
2313   PetscFunctionBegin;
2314   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2315   PetscUseMethod(A,"MatDenseCUDAGetArrayRead_C",(Mat,const PetscScalar**),(A,a));
2316   PetscFunctionReturn(0);
2317 }
2318 
2319 /*@C
2320    MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a dense matrix previously obtained with a call to MatDenseCUDAGetArrayRead().
2321 
2322    Not Collective
2323 
2324    Input Parameters:
2325 +  A - the matrix
2326 -  array - the GPU array in column major order
2327 
2328    Notes:
2329    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite().
2330 
2331    Level: developer
2332 
2333 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDAGetArrayRead()`
2334 @*/
2335 PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a)
2336 {
2337   PetscFunctionBegin;
2338   PetscUseMethod(A,"MatDenseCUDARestoreArrayRead_C",(Mat,const PetscScalar**),(A,a));
2339   PetscFunctionReturn(0);
2340 }
2341 
2342 /*@C
2343    MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArray() when no longer needed.
2344 
2345    Not Collective
2346 
2347    Input Parameters:
2348 .  A - the matrix
2349 
2350    Output Parameters
2351 .  array - the GPU array in column major order
2352 
2353    Notes:
2354    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). For read-only access, use MatDenseCUDAGetArrayRead().
2355 
2356    Level: developer
2357 
2358 .seealso: `MatDenseCUDAGetArrayRead()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`
2359 @*/
2360 PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a)
2361 {
2362   PetscFunctionBegin;
2363   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2364   PetscUseMethod(A,"MatDenseCUDAGetArray_C",(Mat,PetscScalar**),(A,a));
2365   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2366   PetscFunctionReturn(0);
2367 }
2368 
2369 /*@C
2370    MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArray().
2371 
2372    Not Collective
2373 
2374    Input Parameters:
2375 +  A - the matrix
2376 -  array - the GPU array in column major order
2377 
2378    Notes:
2379 
2380    Level: developer
2381 
2382 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()`
2383 @*/
2384 PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a)
2385 {
2386   PetscFunctionBegin;
2387   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2388   PetscUseMethod(A,"MatDenseCUDARestoreArray_C",(Mat,PetscScalar**),(A,a));
2389   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2390   A->offloadmask = PETSC_OFFLOAD_GPU;
2391   PetscFunctionReturn(0);
2392 }
2393 #endif
2394 
2395 /*@C
2396    MatCreateDense - Creates a matrix in dense format.
2397 
2398    Collective
2399 
2400    Input Parameters:
2401 +  comm - MPI communicator
2402 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2403 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2404 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
2405 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
2406 -  data - optional location of matrix data.  Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
2407    to control all matrix memory allocation.
2408 
2409    Output Parameter:
2410 .  A - the matrix
2411 
2412    Notes:
2413    The dense format is fully compatible with standard Fortran 77
2414    storage by columns.
2415    Note that, although local portions of the matrix are stored in column-major
2416    order, the matrix is partitioned across MPI ranks by row.
2417 
2418    The data input variable is intended primarily for Fortran programmers
2419    who wish to allocate their own matrix memory space.  Most users should
2420    set data=NULL (PETSC_NULL_SCALAR for Fortran users).
2421 
2422    The user MUST specify either the local or global matrix dimensions
2423    (possibly both).
2424 
2425    Level: intermediate
2426 
2427 .seealso: `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
2428 @*/
2429 PetscErrorCode  MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
2430 {
2431   PetscFunctionBegin;
2432   PetscCall(MatCreate(comm,A));
2433   PetscCall(MatSetSizes(*A,m,n,M,N));
2434   PetscCall(MatSetType(*A,MATDENSE));
2435   PetscCall(MatSeqDenseSetPreallocation(*A,data));
2436   PetscCall(MatMPIDenseSetPreallocation(*A,data));
2437   PetscFunctionReturn(0);
2438 }
2439 
2440 #if defined(PETSC_HAVE_CUDA)
2441 /*@C
2442    MatCreateDenseCUDA - Creates a matrix in dense format using CUDA.
2443 
2444    Collective
2445 
2446    Input Parameters:
2447 +  comm - MPI communicator
2448 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2449 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2450 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
2451 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
2452 -  data - optional location of GPU matrix data.  Set data=NULL for PETSc
2453    to control matrix memory allocation.
2454 
2455    Output Parameter:
2456 .  A - the matrix
2457 
2458    Notes:
2459 
2460    Level: intermediate
2461 
2462 .seealso: `MatCreate()`, `MatCreateDense()`
2463 @*/
2464 PetscErrorCode  MatCreateDenseCUDA(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
2465 {
2466   PetscFunctionBegin;
2467   PetscCall(MatCreate(comm,A));
2468   PetscValidLogicalCollectiveBool(*A,!!data,6);
2469   PetscCall(MatSetSizes(*A,m,n,M,N));
2470   PetscCall(MatSetType(*A,MATDENSECUDA));
2471   PetscCall(MatSeqDenseCUDASetPreallocation(*A,data));
2472   PetscCall(MatMPIDenseCUDASetPreallocation(*A,data));
2473   PetscFunctionReturn(0);
2474 }
2475 #endif
2476 
2477 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
2478 {
2479   Mat            mat;
2480   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
2481 
2482   PetscFunctionBegin;
2483   *newmat = NULL;
2484   PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&mat));
2485   PetscCall(MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N));
2486   PetscCall(MatSetType(mat,((PetscObject)A)->type_name));
2487   a       = (Mat_MPIDense*)mat->data;
2488 
2489   mat->factortype   = A->factortype;
2490   mat->assembled    = PETSC_TRUE;
2491   mat->preallocated = PETSC_TRUE;
2492 
2493   mat->insertmode = NOT_SET_VALUES;
2494   a->donotstash   = oldmat->donotstash;
2495 
2496   PetscCall(PetscLayoutReference(A->rmap,&mat->rmap));
2497   PetscCall(PetscLayoutReference(A->cmap,&mat->cmap));
2498 
2499   PetscCall(MatDuplicate(oldmat->A,cpvalues,&a->A));
2500   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A));
2501 
2502   *newmat = mat;
2503   PetscFunctionReturn(0);
2504 }
2505 
2506 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
2507 {
2508   PetscBool      isbinary;
2509 #if defined(PETSC_HAVE_HDF5)
2510   PetscBool      ishdf5;
2511 #endif
2512 
2513   PetscFunctionBegin;
2514   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
2515   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
2516   /* force binary viewer to load .info file if it has not yet done so */
2517   PetscCall(PetscViewerSetUp(viewer));
2518   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
2519 #if defined(PETSC_HAVE_HDF5)
2520   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5));
2521 #endif
2522   if (isbinary) {
2523     PetscCall(MatLoad_Dense_Binary(newMat,viewer));
2524 #if defined(PETSC_HAVE_HDF5)
2525   } else if (ishdf5) {
2526     PetscCall(MatLoad_Dense_HDF5(newMat,viewer));
2527 #endif
2528   } else SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
2529   PetscFunctionReturn(0);
2530 }
2531 
2532 static PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag)
2533 {
2534   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
2535   Mat            a,b;
2536 
2537   PetscFunctionBegin;
2538   a    = matA->A;
2539   b    = matB->A;
2540   PetscCall(MatEqual(a,b,flag));
2541   PetscCall(MPIU_Allreduce(MPI_IN_PLACE,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A)));
2542   PetscFunctionReturn(0);
2543 }
2544 
2545 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data)
2546 {
2547   Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data;
2548 
2549   PetscFunctionBegin;
2550   PetscCall(PetscFree2(atb->sendbuf,atb->recvcounts));
2551   PetscCall(MatDestroy(&atb->atb));
2552   PetscCall(PetscFree(atb));
2553   PetscFunctionReturn(0);
2554 }
2555 
2556 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data)
2557 {
2558   Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data;
2559 
2560   PetscFunctionBegin;
2561   PetscCall(PetscFree2(abt->buf[0],abt->buf[1]));
2562   PetscCall(PetscFree2(abt->recvcounts,abt->recvdispls));
2563   PetscCall(PetscFree(abt));
2564   PetscFunctionReturn(0);
2565 }
2566 
2567 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2568 {
2569   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2570   Mat_TransMatMultDense *atb;
2571   MPI_Comm              comm;
2572   PetscMPIInt           size,*recvcounts;
2573   PetscScalar           *carray,*sendbuf;
2574   const PetscScalar     *atbarray;
2575   PetscInt              i,cN=C->cmap->N,proc,k,j,lda;
2576   const PetscInt        *ranges;
2577 
2578   PetscFunctionBegin;
2579   MatCheckProduct(C,3);
2580   PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2581   atb = (Mat_TransMatMultDense *)C->product->data;
2582   recvcounts = atb->recvcounts;
2583   sendbuf = atb->sendbuf;
2584 
2585   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
2586   PetscCallMPI(MPI_Comm_size(comm,&size));
2587 
2588   /* compute atbarray = aseq^T * bseq */
2589   PetscCall(MatTransposeMatMult(a->A,b->A,atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX,PETSC_DEFAULT,&atb->atb));
2590 
2591   PetscCall(MatGetOwnershipRanges(C,&ranges));
2592 
2593   /* arrange atbarray into sendbuf */
2594   PetscCall(MatDenseGetArrayRead(atb->atb,&atbarray));
2595   PetscCall(MatDenseGetLDA(atb->atb,&lda));
2596   for (proc=0, k=0; proc<size; proc++) {
2597     for (j=0; j<cN; j++) {
2598       for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*lda];
2599     }
2600   }
2601   PetscCall(MatDenseRestoreArrayRead(atb->atb,&atbarray));
2602 
2603   /* sum all atbarray to local values of C */
2604   PetscCall(MatDenseGetArrayWrite(c->A,&carray));
2605   PetscCallMPI(MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm));
2606   PetscCall(MatDenseRestoreArrayWrite(c->A,&carray));
2607   PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY));
2608   PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY));
2609   PetscFunctionReturn(0);
2610 }
2611 
2612 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)
2613 {
2614   MPI_Comm              comm;
2615   PetscMPIInt           size;
2616   PetscInt              cm=A->cmap->n,cM,cN=B->cmap->N;
2617   Mat_TransMatMultDense *atb;
2618   PetscBool             cisdense;
2619   PetscInt              i;
2620   const PetscInt        *ranges;
2621 
2622   PetscFunctionBegin;
2623   MatCheckProduct(C,3);
2624   PetscCheck(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2625   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
2626   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
2627     SETERRQ(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2628   }
2629 
2630   /* create matrix product C */
2631   PetscCall(MatSetSizes(C,cm,B->cmap->n,A->cmap->N,B->cmap->N));
2632   PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,""));
2633   if (!cisdense) {
2634     PetscCall(MatSetType(C,((PetscObject)A)->type_name));
2635   }
2636   PetscCall(MatSetUp(C));
2637 
2638   /* create data structure for reuse C */
2639   PetscCallMPI(MPI_Comm_size(comm,&size));
2640   PetscCall(PetscNew(&atb));
2641   cM   = C->rmap->N;
2642   PetscCall(PetscMalloc2(cM*cN,&atb->sendbuf,size,&atb->recvcounts));
2643   PetscCall(MatGetOwnershipRanges(C,&ranges));
2644   for (i=0; i<size; i++) atb->recvcounts[i] = (ranges[i+1] - ranges[i])*cN;
2645 
2646   C->product->data    = atb;
2647   C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
2648   PetscFunctionReturn(0);
2649 }
2650 
2651 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2652 {
2653   MPI_Comm              comm;
2654   PetscMPIInt           i, size;
2655   PetscInt              maxRows, bufsiz;
2656   PetscMPIInt           tag;
2657   PetscInt              alg;
2658   Mat_MatTransMultDense *abt;
2659   Mat_Product           *product = C->product;
2660   PetscBool             flg;
2661 
2662   PetscFunctionBegin;
2663   MatCheckProduct(C,4);
2664   PetscCheck(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2665   /* check local size of A and B */
2666   PetscCheck(A->cmap->n == B->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")",A->cmap->n,B->cmap->n);
2667 
2668   PetscCall(PetscStrcmp(product->alg,"allgatherv",&flg));
2669   alg  = flg ? 0 : 1;
2670 
2671   /* setup matrix product C */
2672   PetscCall(MatSetSizes(C,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N));
2673   PetscCall(MatSetType(C,MATMPIDENSE));
2674   PetscCall(MatSetUp(C));
2675   PetscCall(PetscObjectGetNewTag((PetscObject)C,&tag));
2676 
2677   /* create data structure for reuse C */
2678   PetscCall(PetscObjectGetComm((PetscObject)C,&comm));
2679   PetscCallMPI(MPI_Comm_size(comm,&size));
2680   PetscCall(PetscNew(&abt));
2681   abt->tag = tag;
2682   abt->alg = alg;
2683   switch (alg) {
2684   case 1: /* alg: "cyclic" */
2685     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i]));
2686     bufsiz = A->cmap->N * maxRows;
2687     PetscCall(PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1])));
2688     break;
2689   default: /* alg: "allgatherv" */
2690     PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1])));
2691     PetscCall(PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls)));
2692     for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N;
2693     for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i];
2694     break;
2695   }
2696 
2697   C->product->data    = abt;
2698   C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
2699   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense;
2700   PetscFunctionReturn(0);
2701 }
2702 
2703 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
2704 {
2705   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2706   Mat_MatTransMultDense *abt;
2707   MPI_Comm              comm;
2708   PetscMPIInt           rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom;
2709   PetscScalar           *sendbuf, *recvbuf=NULL, *cv;
2710   PetscInt              i,cK=A->cmap->N,k,j,bn;
2711   PetscScalar           _DOne=1.0,_DZero=0.0;
2712   const PetscScalar     *av,*bv;
2713   PetscBLASInt          cm, cn, ck, alda, blda = 0, clda;
2714   MPI_Request           reqs[2];
2715   const PetscInt        *ranges;
2716 
2717   PetscFunctionBegin;
2718   MatCheckProduct(C,3);
2719   PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2720   abt  = (Mat_MatTransMultDense*)C->product->data;
2721   PetscCall(PetscObjectGetComm((PetscObject)C,&comm));
2722   PetscCallMPI(MPI_Comm_rank(comm,&rank));
2723   PetscCallMPI(MPI_Comm_size(comm,&size));
2724   PetscCall(MatDenseGetArrayRead(a->A,&av));
2725   PetscCall(MatDenseGetArrayRead(b->A,&bv));
2726   PetscCall(MatDenseGetArrayWrite(c->A,&cv));
2727   PetscCall(MatDenseGetLDA(a->A,&i));
2728   PetscCall(PetscBLASIntCast(i,&alda));
2729   PetscCall(MatDenseGetLDA(b->A,&i));
2730   PetscCall(PetscBLASIntCast(i,&blda));
2731   PetscCall(MatDenseGetLDA(c->A,&i));
2732   PetscCall(PetscBLASIntCast(i,&clda));
2733   PetscCall(MatGetOwnershipRanges(B,&ranges));
2734   bn   = B->rmap->n;
2735   if (blda == bn) {
2736     sendbuf = (PetscScalar*)bv;
2737   } else {
2738     sendbuf = abt->buf[0];
2739     for (k = 0, i = 0; i < cK; i++) {
2740       for (j = 0; j < bn; j++, k++) {
2741         sendbuf[k] = bv[i * blda + j];
2742       }
2743     }
2744   }
2745   if (size > 1) {
2746     sendto = (rank + size - 1) % size;
2747     recvfrom = (rank + size + 1) % size;
2748   } else {
2749     sendto = recvfrom = 0;
2750   }
2751   PetscCall(PetscBLASIntCast(cK,&ck));
2752   PetscCall(PetscBLASIntCast(c->A->rmap->n,&cm));
2753   recvisfrom = rank;
2754   for (i = 0; i < size; i++) {
2755     /* we have finished receiving in sending, bufs can be read/modified */
2756     PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2757     PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
2758 
2759     if (nextrecvisfrom != rank) {
2760       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2761       sendsiz = cK * bn;
2762       recvsiz = cK * nextbn;
2763       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
2764       PetscCallMPI(MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]));
2765       PetscCallMPI(MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]));
2766     }
2767 
2768     /* local aseq * sendbuf^T */
2769     PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn));
2770     if (cm && cn && ck) PetscCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,av,&alda,sendbuf,&cn,&_DZero,cv + clda * ranges[recvisfrom],&clda));
2771 
2772     if (nextrecvisfrom != rank) {
2773       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
2774       PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE));
2775     }
2776     bn = nextbn;
2777     recvisfrom = nextrecvisfrom;
2778     sendbuf = recvbuf;
2779   }
2780   PetscCall(MatDenseRestoreArrayRead(a->A,&av));
2781   PetscCall(MatDenseRestoreArrayRead(b->A,&bv));
2782   PetscCall(MatDenseRestoreArrayWrite(c->A,&cv));
2783   PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY));
2784   PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY));
2785   PetscFunctionReturn(0);
2786 }
2787 
2788 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
2789 {
2790   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2791   Mat_MatTransMultDense *abt;
2792   MPI_Comm              comm;
2793   PetscMPIInt           size;
2794   PetscScalar           *cv, *sendbuf, *recvbuf;
2795   const PetscScalar     *av,*bv;
2796   PetscInt              blda,i,cK=A->cmap->N,k,j,bn;
2797   PetscScalar           _DOne=1.0,_DZero=0.0;
2798   PetscBLASInt          cm, cn, ck, alda, clda;
2799 
2800   PetscFunctionBegin;
2801   MatCheckProduct(C,3);
2802   PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2803   abt  = (Mat_MatTransMultDense*)C->product->data;
2804   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
2805   PetscCallMPI(MPI_Comm_size(comm,&size));
2806   PetscCall(MatDenseGetArrayRead(a->A,&av));
2807   PetscCall(MatDenseGetArrayRead(b->A,&bv));
2808   PetscCall(MatDenseGetArrayWrite(c->A,&cv));
2809   PetscCall(MatDenseGetLDA(a->A,&i));
2810   PetscCall(PetscBLASIntCast(i,&alda));
2811   PetscCall(MatDenseGetLDA(b->A,&blda));
2812   PetscCall(MatDenseGetLDA(c->A,&i));
2813   PetscCall(PetscBLASIntCast(i,&clda));
2814   /* copy transpose of B into buf[0] */
2815   bn      = B->rmap->n;
2816   sendbuf = abt->buf[0];
2817   recvbuf = abt->buf[1];
2818   for (k = 0, j = 0; j < bn; j++) {
2819     for (i = 0; i < cK; i++, k++) {
2820       sendbuf[k] = bv[i * blda + j];
2821     }
2822   }
2823   PetscCall(MatDenseRestoreArrayRead(b->A,&bv));
2824   PetscCallMPI(MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm));
2825   PetscCall(PetscBLASIntCast(cK,&ck));
2826   PetscCall(PetscBLASIntCast(c->A->rmap->n,&cm));
2827   PetscCall(PetscBLASIntCast(c->A->cmap->n,&cn));
2828   if (cm && cn && ck) PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,av,&alda,recvbuf,&ck,&_DZero,cv,&clda));
2829   PetscCall(MatDenseRestoreArrayRead(a->A,&av));
2830   PetscCall(MatDenseRestoreArrayRead(b->A,&bv));
2831   PetscCall(MatDenseRestoreArrayWrite(c->A,&cv));
2832   PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY));
2833   PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY));
2834   PetscFunctionReturn(0);
2835 }
2836 
2837 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2838 {
2839   Mat_MatTransMultDense *abt;
2840 
2841   PetscFunctionBegin;
2842   MatCheckProduct(C,3);
2843   PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2844   abt = (Mat_MatTransMultDense*)C->product->data;
2845   switch (abt->alg) {
2846   case 1:
2847     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C));
2848     break;
2849   default:
2850     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C));
2851     break;
2852   }
2853   PetscFunctionReturn(0);
2854 }
2855 
2856 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data)
2857 {
2858   Mat_MatMultDense *ab = (Mat_MatMultDense*)data;
2859 
2860   PetscFunctionBegin;
2861   PetscCall(MatDestroy(&ab->Ce));
2862   PetscCall(MatDestroy(&ab->Ae));
2863   PetscCall(MatDestroy(&ab->Be));
2864   PetscCall(PetscFree(ab));
2865   PetscFunctionReturn(0);
2866 }
2867 
2868 #if defined(PETSC_HAVE_ELEMENTAL)
2869 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2870 {
2871   Mat_MatMultDense *ab;
2872 
2873   PetscFunctionBegin;
2874   MatCheckProduct(C,3);
2875   PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Missing product data");
2876   ab   = (Mat_MatMultDense*)C->product->data;
2877   PetscCall(MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae));
2878   PetscCall(MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be));
2879   PetscCall(MatMatMultNumeric_Elemental(ab->Ae,ab->Be,ab->Ce));
2880   PetscCall(MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C));
2881   PetscFunctionReturn(0);
2882 }
2883 
2884 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)
2885 {
2886   Mat              Ae,Be,Ce;
2887   Mat_MatMultDense *ab;
2888 
2889   PetscFunctionBegin;
2890   MatCheckProduct(C,4);
2891   PetscCheck(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2892   /* check local size of A and B */
2893   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
2894     SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2895   }
2896 
2897   /* create elemental matrices Ae and Be */
2898   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Ae));
2899   PetscCall(MatSetSizes(Ae,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N));
2900   PetscCall(MatSetType(Ae,MATELEMENTAL));
2901   PetscCall(MatSetUp(Ae));
2902   PetscCall(MatSetOption(Ae,MAT_ROW_ORIENTED,PETSC_FALSE));
2903 
2904   PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &Be));
2905   PetscCall(MatSetSizes(Be,PETSC_DECIDE,PETSC_DECIDE,B->rmap->N,B->cmap->N));
2906   PetscCall(MatSetType(Be,MATELEMENTAL));
2907   PetscCall(MatSetUp(Be));
2908   PetscCall(MatSetOption(Be,MAT_ROW_ORIENTED,PETSC_FALSE));
2909 
2910   /* compute symbolic Ce = Ae*Be */
2911   PetscCall(MatCreate(PetscObjectComm((PetscObject)C),&Ce));
2912   PetscCall(MatMatMultSymbolic_Elemental(Ae,Be,fill,Ce));
2913 
2914   /* setup C */
2915   PetscCall(MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE));
2916   PetscCall(MatSetType(C,MATDENSE));
2917   PetscCall(MatSetUp(C));
2918 
2919   /* create data structure for reuse Cdense */
2920   PetscCall(PetscNew(&ab));
2921   ab->Ae = Ae;
2922   ab->Be = Be;
2923   ab->Ce = Ce;
2924 
2925   C->product->data    = ab;
2926   C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense;
2927   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
2928   PetscFunctionReturn(0);
2929 }
2930 #endif
2931 /* ----------------------------------------------- */
2932 #if defined(PETSC_HAVE_ELEMENTAL)
2933 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C)
2934 {
2935   PetscFunctionBegin;
2936   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense;
2937   C->ops->productsymbolic = MatProductSymbolic_AB;
2938   PetscFunctionReturn(0);
2939 }
2940 #endif
2941 
2942 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C)
2943 {
2944   Mat_Product *product = C->product;
2945   Mat         A = product->A,B=product->B;
2946 
2947   PetscFunctionBegin;
2948   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend)
2949     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2950   C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense;
2951   C->ops->productsymbolic = MatProductSymbolic_AtB;
2952   PetscFunctionReturn(0);
2953 }
2954 
2955 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C)
2956 {
2957   Mat_Product    *product = C->product;
2958   const char     *algTypes[2] = {"allgatherv","cyclic"};
2959   PetscInt       alg,nalg = 2;
2960   PetscBool      flg = PETSC_FALSE;
2961 
2962   PetscFunctionBegin;
2963   /* Set default algorithm */
2964   alg = 0; /* default is allgatherv */
2965   PetscCall(PetscStrcmp(product->alg,"default",&flg));
2966   if (flg) {
2967     PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]));
2968   }
2969 
2970   /* Get runtime option */
2971   if (product->api_user) {
2972     PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatTransposeMult","Mat");
2973     PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[alg],&alg,&flg));
2974     PetscOptionsEnd();
2975   } else {
2976     PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABt","Mat");
2977     PetscCall(PetscOptionsEList("-mat_product_algorithm","Algorithmic approach","MatProduct_ABt",algTypes,nalg,algTypes[alg],&alg,&flg));
2978     PetscOptionsEnd();
2979   }
2980   if (flg) {
2981     PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]));
2982   }
2983 
2984   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense;
2985   C->ops->productsymbolic          = MatProductSymbolic_ABt;
2986   PetscFunctionReturn(0);
2987 }
2988 
2989 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C)
2990 {
2991   Mat_Product    *product = C->product;
2992 
2993   PetscFunctionBegin;
2994   switch (product->type) {
2995 #if defined(PETSC_HAVE_ELEMENTAL)
2996   case MATPRODUCT_AB:
2997     PetscCall(MatProductSetFromOptions_MPIDense_AB(C));
2998     break;
2999 #endif
3000   case MATPRODUCT_AtB:
3001     PetscCall(MatProductSetFromOptions_MPIDense_AtB(C));
3002     break;
3003   case MATPRODUCT_ABt:
3004     PetscCall(MatProductSetFromOptions_MPIDense_ABt(C));
3005     break;
3006   default:
3007     break;
3008   }
3009   PetscFunctionReturn(0);
3010 }
3011