xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision fdf6c4e30aafdbc795e4f76379caa977fd5cdf5a)
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4 */
5 
6 #include <../src/mat/impls/dense/mpi/mpidense.h>    /*I   "petscmat.h"  I*/
7 #include <../src/mat/impls/aij/mpi/mpiaij.h>
8 #include <petscblaslapack.h>
9 
10 /*@
11 
12       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
13               matrix that represents the operator. For sequential matrices it returns itself.
14 
15     Input Parameter:
16 .      A - the Seq or MPI dense matrix
17 
18     Output Parameter:
19 .      B - the inner matrix
20 
21     Level: intermediate
22 
23 @*/
24 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
25 {
26   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
27   PetscBool      flg;
28 
29   PetscFunctionBegin;
30   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
31   PetscValidPointer(B,2);
32   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A,MATMPIDENSE,&flg));
33   if (flg) *B = mat->A;
34   else {
35     PetscCall(PetscObjectBaseTypeCompare((PetscObject)A,MATSEQDENSE,&flg));
36     PetscCheck(flg,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Not for matrix type %s",((PetscObject)A)->type_name);
37     *B = A;
38   }
39   PetscFunctionReturn(0);
40 }
41 
42 PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s)
43 {
44   Mat_MPIDense   *Amat = (Mat_MPIDense*)A->data;
45   Mat_MPIDense   *Bmat = (Mat_MPIDense*)B->data;
46 
47   PetscFunctionBegin;
48   PetscCall(MatCopy(Amat->A,Bmat->A,s));
49   PetscFunctionReturn(0);
50 }
51 
52 PetscErrorCode MatShift_MPIDense(Mat A,PetscScalar alpha)
53 {
54   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
55   PetscInt       j,lda,rstart = A->rmap->rstart,rend = A->rmap->rend,rend2;
56   PetscScalar    *v;
57 
58   PetscFunctionBegin;
59   PetscCall(MatDenseGetArray(mat->A,&v));
60   PetscCall(MatDenseGetLDA(mat->A,&lda));
61   rend2 = PetscMin(rend,A->cmap->N);
62   if (rend2>rstart) {
63     for (j=rstart; j<rend2; j++) v[j-rstart+j*lda] += alpha;
64     PetscCall(PetscLogFlops(rend2-rstart));
65   }
66   PetscCall(MatDenseRestoreArray(mat->A,&v));
67   PetscFunctionReturn(0);
68 }
69 
70 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
71 {
72   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
73   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
74 
75   PetscFunctionBegin;
76   PetscCheck(row >= rstart && row < rend,PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
77   lrow = row - rstart;
78   PetscCall(MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v));
79   PetscFunctionReturn(0);
80 }
81 
82 PetscErrorCode MatRestoreRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
83 {
84   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
85   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
86 
87   PetscFunctionBegin;
88   PetscCheck(row >= rstart && row < rend,PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
89   lrow = row - rstart;
90   PetscCall(MatRestoreRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v));
91   PetscFunctionReturn(0);
92 }
93 
94 PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
95 {
96   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
97   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
98   PetscScalar    *array;
99   MPI_Comm       comm;
100   PetscBool      flg;
101   Mat            B;
102 
103   PetscFunctionBegin;
104   PetscCall(MatHasCongruentLayouts(A,&flg));
105   PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
106   PetscCall(PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B));
107   if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */
108 
109     PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A,MATSEQDENSECUDA,&flg));
110     PetscCheck(!flg,PETSC_COMM_SELF,PETSC_ERR_SUP,"Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature",MATSEQDENSECUDA);
111     PetscCall(PetscObjectGetComm((PetscObject)(mdn->A),&comm));
112     PetscCall(MatCreate(comm,&B));
113     PetscCall(MatSetSizes(B,m,m,m,m));
114     PetscCall(MatSetType(B,((PetscObject)mdn->A)->type_name));
115     PetscCall(MatDenseGetArrayRead(mdn->A,(const PetscScalar**)&array));
116     PetscCall(MatSeqDenseSetPreallocation(B,array+m*rstart));
117     PetscCall(MatDenseRestoreArrayRead(mdn->A,(const PetscScalar**)&array));
118     PetscCall(PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B));
119     *a   = B;
120     PetscCall(MatDestroy(&B));
121   } else *a = B;
122   PetscFunctionReturn(0);
123 }
124 
125 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
126 {
127   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
128   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
129   PetscBool      roworiented = A->roworiented;
130 
131   PetscFunctionBegin;
132   for (i=0; i<m; i++) {
133     if (idxm[i] < 0) continue;
134     PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
135     if (idxm[i] >= rstart && idxm[i] < rend) {
136       row = idxm[i] - rstart;
137       if (roworiented) {
138         PetscCall(MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv));
139       } else {
140         for (j=0; j<n; j++) {
141           if (idxn[j] < 0) continue;
142           PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
143           PetscCall(MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv));
144         }
145       }
146     } else if (!A->donotstash) {
147       mat->assembled = PETSC_FALSE;
148       if (roworiented) {
149         PetscCall(MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE));
150       } else {
151         PetscCall(MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE));
152       }
153     }
154   }
155   PetscFunctionReturn(0);
156 }
157 
158 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
159 {
160   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
161   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
162 
163   PetscFunctionBegin;
164   for (i=0; i<m; i++) {
165     if (idxm[i] < 0) continue; /* negative row */
166     PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
167     if (idxm[i] >= rstart && idxm[i] < rend) {
168       row = idxm[i] - rstart;
169       for (j=0; j<n; j++) {
170         if (idxn[j] < 0) continue; /* negative column */
171         PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
172         PetscCall(MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j));
173       }
174     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
175   }
176   PetscFunctionReturn(0);
177 }
178 
179 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda)
180 {
181   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
182 
183   PetscFunctionBegin;
184   PetscCall(MatDenseGetLDA(a->A,lda));
185   PetscFunctionReturn(0);
186 }
187 
188 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A,PetscInt lda)
189 {
190   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
191   PetscBool      iscuda;
192 
193   PetscFunctionBegin;
194   if (!a->A) {
195     PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
196     PetscCall(PetscLayoutSetUp(A->rmap));
197     PetscCall(PetscLayoutSetUp(A->cmap));
198     PetscCall(MatCreate(PETSC_COMM_SELF,&a->A));
199     PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->A));
200     PetscCall(MatSetSizes(a->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N));
201     PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda));
202     PetscCall(MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE));
203   }
204   PetscCall(MatDenseSetLDA(a->A,lda));
205   PetscFunctionReturn(0);
206 }
207 
208 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar **array)
209 {
210   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
211 
212   PetscFunctionBegin;
213   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
214   PetscCall(MatDenseGetArray(a->A,array));
215   PetscFunctionReturn(0);
216 }
217 
218 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar **array)
219 {
220   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
221 
222   PetscFunctionBegin;
223   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
224   PetscCall(MatDenseGetArrayRead(a->A,array));
225   PetscFunctionReturn(0);
226 }
227 
228 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A,PetscScalar **array)
229 {
230   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
231 
232   PetscFunctionBegin;
233   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
234   PetscCall(MatDenseGetArrayWrite(a->A,array));
235   PetscFunctionReturn(0);
236 }
237 
238 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar *array)
239 {
240   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
241 
242   PetscFunctionBegin;
243   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
244   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
245   PetscCall(MatDensePlaceArray(a->A,array));
246   PetscFunctionReturn(0);
247 }
248 
249 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
250 {
251   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
252 
253   PetscFunctionBegin;
254   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
255   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
256   PetscCall(MatDenseResetArray(a->A));
257   PetscFunctionReturn(0);
258 }
259 
260 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A,const PetscScalar *array)
261 {
262   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
263 
264   PetscFunctionBegin;
265   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
266   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
267   PetscCall(MatDenseReplaceArray(a->A,array));
268   PetscFunctionReturn(0);
269 }
270 
271 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
272 {
273   Mat_MPIDense      *mat  = (Mat_MPIDense*)A->data,*newmatd;
274   PetscInt          lda,i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
275   const PetscInt    *irow,*icol;
276   const PetscScalar *v;
277   PetscScalar       *bv;
278   Mat               newmat;
279   IS                iscol_local;
280   MPI_Comm          comm_is,comm_mat;
281 
282   PetscFunctionBegin;
283   PetscCall(PetscObjectGetComm((PetscObject)A,&comm_mat));
284   PetscCall(PetscObjectGetComm((PetscObject)iscol,&comm_is));
285   PetscCheck(comm_mat == comm_is,PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator");
286 
287   PetscCall(ISAllGather(iscol,&iscol_local));
288   PetscCall(ISGetIndices(isrow,&irow));
289   PetscCall(ISGetIndices(iscol_local,&icol));
290   PetscCall(ISGetLocalSize(isrow,&nrows));
291   PetscCall(ISGetLocalSize(iscol,&ncols));
292   PetscCall(ISGetSize(iscol,&Ncols)); /* global number of columns, size of iscol_local */
293 
294   /* No parallel redistribution currently supported! Should really check each index set
295      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
296      original matrix! */
297 
298   PetscCall(MatGetLocalSize(A,&nlrows,&nlcols));
299   PetscCall(MatGetOwnershipRange(A,&rstart,&rend));
300 
301   /* Check submatrix call */
302   if (scall == MAT_REUSE_MATRIX) {
303     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
304     /* Really need to test rows and column sizes! */
305     newmat = *B;
306   } else {
307     /* Create and fill new matrix */
308     PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&newmat));
309     PetscCall(MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols));
310     PetscCall(MatSetType(newmat,((PetscObject)A)->type_name));
311     PetscCall(MatMPIDenseSetPreallocation(newmat,NULL));
312   }
313 
314   /* Now extract the data pointers and do the copy, column at a time */
315   newmatd = (Mat_MPIDense*)newmat->data;
316   PetscCall(MatDenseGetArray(newmatd->A,&bv));
317   PetscCall(MatDenseGetArrayRead(mat->A,&v));
318   PetscCall(MatDenseGetLDA(mat->A,&lda));
319   for (i=0; i<Ncols; i++) {
320     const PetscScalar *av = v + lda*icol[i];
321     for (j=0; j<nrows; j++) {
322       *bv++ = av[irow[j] - rstart];
323     }
324   }
325   PetscCall(MatDenseRestoreArrayRead(mat->A,&v));
326   PetscCall(MatDenseRestoreArray(newmatd->A,&bv));
327 
328   /* Assemble the matrices so that the correct flags are set */
329   PetscCall(MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY));
330   PetscCall(MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY));
331 
332   /* Free work space */
333   PetscCall(ISRestoreIndices(isrow,&irow));
334   PetscCall(ISRestoreIndices(iscol_local,&icol));
335   PetscCall(ISDestroy(&iscol_local));
336   *B   = newmat;
337   PetscFunctionReturn(0);
338 }
339 
340 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar **array)
341 {
342   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
343 
344   PetscFunctionBegin;
345   PetscCall(MatDenseRestoreArray(a->A,array));
346   PetscFunctionReturn(0);
347 }
348 
349 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar **array)
350 {
351   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
352 
353   PetscFunctionBegin;
354   PetscCall(MatDenseRestoreArrayRead(a->A,array));
355   PetscFunctionReturn(0);
356 }
357 
358 PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A,PetscScalar **array)
359 {
360   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
361 
362   PetscFunctionBegin;
363   PetscCall(MatDenseRestoreArrayWrite(a->A,array));
364   PetscFunctionReturn(0);
365 }
366 
367 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
368 {
369   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
370   PetscInt       nstash,reallocs;
371 
372   PetscFunctionBegin;
373   if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
374 
375   PetscCall(MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range));
376   PetscCall(MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs));
377   PetscCall(PetscInfo(mdn->A,"Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs));
378   PetscFunctionReturn(0);
379 }
380 
381 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
382 {
383   Mat_MPIDense   *mdn=(Mat_MPIDense*)mat->data;
384   PetscInt       i,*row,*col,flg,j,rstart,ncols;
385   PetscMPIInt    n;
386   PetscScalar    *val;
387 
388   PetscFunctionBegin;
389   if (!mdn->donotstash && !mat->nooffprocentries) {
390     /*  wait on receives */
391     while (1) {
392       PetscCall(MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg));
393       if (!flg) break;
394 
395       for (i=0; i<n;) {
396         /* Now identify the consecutive vals belonging to the same row */
397         for (j=i,rstart=row[j]; j<n; j++) {
398           if (row[j] != rstart) break;
399         }
400         if (j < n) ncols = j-i;
401         else       ncols = n-i;
402         /* Now assemble all these values with a single function call */
403         PetscCall(MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode));
404         i    = j;
405       }
406     }
407     PetscCall(MatStashScatterEnd_Private(&mat->stash));
408   }
409 
410   PetscCall(MatAssemblyBegin(mdn->A,mode));
411   PetscCall(MatAssemblyEnd(mdn->A,mode));
412   PetscFunctionReturn(0);
413 }
414 
415 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
416 {
417   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
418 
419   PetscFunctionBegin;
420   PetscCall(MatZeroEntries(l->A));
421   PetscFunctionReturn(0);
422 }
423 
424 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt n,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
425 {
426   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
427   PetscInt          i,len,*lrows;
428 
429   PetscFunctionBegin;
430   /* get locally owned rows */
431   PetscCall(PetscLayoutMapLocal(A->rmap,n,rows,&len,&lrows,NULL));
432   /* fix right hand side if needed */
433   if (x && b) {
434     const PetscScalar *xx;
435     PetscScalar       *bb;
436 
437     PetscCall(VecGetArrayRead(x, &xx));
438     PetscCall(VecGetArrayWrite(b, &bb));
439     for (i=0;i<len;++i) bb[lrows[i]] = diag*xx[lrows[i]];
440     PetscCall(VecRestoreArrayRead(x, &xx));
441     PetscCall(VecRestoreArrayWrite(b, &bb));
442   }
443   PetscCall(MatZeroRows(l->A,len,lrows,0.0,NULL,NULL));
444   if (diag != 0.0) {
445     Vec d;
446 
447     PetscCall(MatCreateVecs(A,NULL,&d));
448     PetscCall(VecSet(d,diag));
449     PetscCall(MatDiagonalSet(A,d,INSERT_VALUES));
450     PetscCall(VecDestroy(&d));
451   }
452   PetscCall(PetscFree(lrows));
453   PetscFunctionReturn(0);
454 }
455 
456 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec);
457 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec);
458 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec);
459 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec);
460 
461 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
462 {
463   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
464   const PetscScalar *ax;
465   PetscScalar       *ay;
466   PetscMemType      axmtype,aymtype;
467 
468   PetscFunctionBegin;
469   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
470   PetscCall(VecGetArrayReadAndMemType(xx,&ax,&axmtype));
471   PetscCall(VecGetArrayAndMemType(mdn->lvec,&ay,&aymtype));
472   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPI_REPLACE));
473   PetscCall(PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay,MPI_REPLACE));
474   PetscCall(VecRestoreArrayAndMemType(mdn->lvec,&ay));
475   PetscCall(VecRestoreArrayReadAndMemType(xx,&ax));
476   PetscCall((*mdn->A->ops->mult)(mdn->A,mdn->lvec,yy));
477   PetscFunctionReturn(0);
478 }
479 
480 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
481 {
482   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
483   const PetscScalar *ax;
484   PetscScalar       *ay;
485   PetscMemType      axmtype,aymtype;
486 
487   PetscFunctionBegin;
488   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
489   PetscCall(VecGetArrayReadAndMemType(xx,&ax,&axmtype));
490   PetscCall(VecGetArrayAndMemType(mdn->lvec,&ay,&aymtype));
491   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPI_REPLACE));
492   PetscCall(PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay,MPI_REPLACE));
493   PetscCall(VecRestoreArrayAndMemType(mdn->lvec,&ay));
494   PetscCall(VecRestoreArrayReadAndMemType(xx,&ax));
495   PetscCall((*mdn->A->ops->multadd)(mdn->A,mdn->lvec,yy,zz));
496   PetscFunctionReturn(0);
497 }
498 
499 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
500 {
501   Mat_MPIDense      *a = (Mat_MPIDense*)A->data;
502   const PetscScalar *ax;
503   PetscScalar       *ay;
504   PetscMemType      axmtype,aymtype;
505 
506   PetscFunctionBegin;
507   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
508   PetscCall(VecSet(yy,0.0));
509   PetscCall((*a->A->ops->multtranspose)(a->A,xx,a->lvec));
510   PetscCall(VecGetArrayReadAndMemType(a->lvec,&ax,&axmtype));
511   PetscCall(VecGetArrayAndMemType(yy,&ay,&aymtype));
512   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPIU_SUM));
513   PetscCall(PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM));
514   PetscCall(VecRestoreArrayReadAndMemType(a->lvec,&ax));
515   PetscCall(VecRestoreArrayAndMemType(yy,&ay));
516   PetscFunctionReturn(0);
517 }
518 
519 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
520 {
521   Mat_MPIDense      *a = (Mat_MPIDense*)A->data;
522   const PetscScalar *ax;
523   PetscScalar       *ay;
524   PetscMemType      axmtype,aymtype;
525 
526   PetscFunctionBegin;
527   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
528   PetscCall(VecCopy(yy,zz));
529   PetscCall((*a->A->ops->multtranspose)(a->A,xx,a->lvec));
530   PetscCall(VecGetArrayReadAndMemType(a->lvec,&ax,&axmtype));
531   PetscCall(VecGetArrayAndMemType(zz,&ay,&aymtype));
532   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPIU_SUM));
533   PetscCall(PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM));
534   PetscCall(VecRestoreArrayReadAndMemType(a->lvec,&ax));
535   PetscCall(VecRestoreArrayAndMemType(zz,&ay));
536   PetscFunctionReturn(0);
537 }
538 
539 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
540 {
541   Mat_MPIDense      *a    = (Mat_MPIDense*)A->data;
542   PetscInt          lda,len,i,n,m = A->rmap->n,radd;
543   PetscScalar       *x,zero = 0.0;
544   const PetscScalar *av;
545 
546   PetscFunctionBegin;
547   PetscCall(VecSet(v,zero));
548   PetscCall(VecGetArray(v,&x));
549   PetscCall(VecGetSize(v,&n));
550   PetscCheck(n == A->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
551   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
552   radd = A->rmap->rstart*m;
553   PetscCall(MatDenseGetArrayRead(a->A,&av));
554   PetscCall(MatDenseGetLDA(a->A,&lda));
555   for (i=0; i<len; i++) {
556     x[i] = av[radd + i*lda + i];
557   }
558   PetscCall(MatDenseRestoreArrayRead(a->A,&av));
559   PetscCall(VecRestoreArray(v,&x));
560   PetscFunctionReturn(0);
561 }
562 
563 PetscErrorCode MatDestroy_MPIDense(Mat mat)
564 {
565   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
566 
567   PetscFunctionBegin;
568 #if defined(PETSC_USE_LOG)
569   PetscLogObjectState((PetscObject)mat,"Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT,mat->rmap->N,mat->cmap->N);
570 #endif
571   PetscCall(MatStashDestroy_Private(&mat->stash));
572   PetscCheck(!mdn->vecinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
573   PetscCheck(!mdn->matinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
574   PetscCall(MatDestroy(&mdn->A));
575   PetscCall(VecDestroy(&mdn->lvec));
576   PetscCall(PetscSFDestroy(&mdn->Mvctx));
577   PetscCall(VecDestroy(&mdn->cvec));
578   PetscCall(MatDestroy(&mdn->cmat));
579 
580   PetscCall(PetscFree(mat->data));
581   PetscCall(PetscObjectChangeTypeName((PetscObject)mat,NULL));
582 
583   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL));
584   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",NULL));
585   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL));
586   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL));
587   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL));
588   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL));
589   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",NULL));
590   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",NULL));
591   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL));
592   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL));
593   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",NULL));
594   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",NULL));
595   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpiaij_C",NULL));
596 #if defined(PETSC_HAVE_ELEMENTAL)
597   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL));
598 #endif
599 #if defined(PETSC_HAVE_SCALAPACK)
600   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",NULL));
601 #endif
602   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL));
603   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",NULL));
604   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",NULL));
605 #if defined (PETSC_HAVE_CUDA)
606   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",NULL));
607   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",NULL));
608   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpidensecuda_C",NULL));
609   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidensecuda_mpidense_C",NULL));
610   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",NULL));
611   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",NULL));
612   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",NULL));
613   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",NULL));
614   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseCUDAGetArray_C",NULL));
615   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseCUDAGetArrayRead_C",NULL));
616   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseCUDAGetArrayWrite_C",NULL));
617   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseCUDARestoreArray_C",NULL));
618   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseCUDARestoreArrayRead_C",NULL));
619   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseCUDARestoreArrayWrite_C",NULL));
620   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseCUDAPlaceArray_C",NULL));
621   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseCUDAResetArray_C",NULL));
622   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseCUDAReplaceArray_C",NULL));
623 #endif
624   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL));
625   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL));
626   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",NULL));
627   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",NULL));
628   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",NULL));
629   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",NULL));
630   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",NULL));
631   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",NULL));
632   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",NULL));
633   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",NULL));
634 
635   PetscCall(PetscObjectCompose((PetscObject)mat,"DiagonalBlock",NULL));
636   PetscFunctionReturn(0);
637 }
638 
639 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat,PetscViewer);
640 
641 #include <petscdraw.h>
642 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
643 {
644   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
645   PetscMPIInt       rank;
646   PetscViewerType   vtype;
647   PetscBool         iascii,isdraw;
648   PetscViewer       sviewer;
649   PetscViewerFormat format;
650 
651   PetscFunctionBegin;
652   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank));
653   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
654   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw));
655   if (iascii) {
656     PetscCall(PetscViewerGetType(viewer,&vtype));
657     PetscCall(PetscViewerGetFormat(viewer,&format));
658     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
659       MatInfo info;
660       PetscCall(MatGetInfo(mat,MAT_LOCAL,&info));
661       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
662       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory));
663       PetscCall(PetscViewerFlush(viewer));
664       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
665       if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx,viewer));
666       PetscFunctionReturn(0);
667     } else if (format == PETSC_VIEWER_ASCII_INFO) {
668       PetscFunctionReturn(0);
669     }
670   } else if (isdraw) {
671     PetscDraw draw;
672     PetscBool isnull;
673 
674     PetscCall(PetscViewerDrawGetDraw(viewer,0,&draw));
675     PetscCall(PetscDrawIsNull(draw,&isnull));
676     if (isnull) PetscFunctionReturn(0);
677   }
678 
679   {
680     /* assemble the entire matrix onto first processor. */
681     Mat         A;
682     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
683     PetscInt    *cols;
684     PetscScalar *vals;
685 
686     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat),&A));
687     if (rank == 0) {
688       PetscCall(MatSetSizes(A,M,N,M,N));
689     } else {
690       PetscCall(MatSetSizes(A,0,0,M,N));
691     }
692     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
693     PetscCall(MatSetType(A,MATMPIDENSE));
694     PetscCall(MatMPIDenseSetPreallocation(A,NULL));
695     PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)A));
696 
697     /* Copy the matrix ... This isn't the most efficient means,
698        but it's quick for now */
699     A->insertmode = INSERT_VALUES;
700 
701     row = mat->rmap->rstart;
702     m   = mdn->A->rmap->n;
703     for (i=0; i<m; i++) {
704       PetscCall(MatGetRow_MPIDense(mat,row,&nz,&cols,&vals));
705       PetscCall(MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES));
706       PetscCall(MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals));
707       row++;
708     }
709 
710     PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY));
711     PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY));
712     PetscCall(PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer));
713     if (rank == 0) {
714       PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name));
715       PetscCall(MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer));
716     }
717     PetscCall(PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer));
718     PetscCall(PetscViewerFlush(viewer));
719     PetscCall(MatDestroy(&A));
720   }
721   PetscFunctionReturn(0);
722 }
723 
724 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
725 {
726   PetscBool      iascii,isbinary,isdraw,issocket;
727 
728   PetscFunctionBegin;
729   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
730   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
731   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket));
732   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw));
733 
734   if (iascii || issocket || isdraw) {
735     PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat,viewer));
736   } else if (isbinary) PetscCall(MatView_Dense_Binary(mat,viewer));
737   PetscFunctionReturn(0);
738 }
739 
740 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
741 {
742   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
743   Mat            mdn  = mat->A;
744   PetscLogDouble isend[5],irecv[5];
745 
746   PetscFunctionBegin;
747   info->block_size = 1.0;
748 
749   PetscCall(MatGetInfo(mdn,MAT_LOCAL,info));
750 
751   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
752   isend[3] = info->memory;  isend[4] = info->mallocs;
753   if (flag == MAT_LOCAL) {
754     info->nz_used      = isend[0];
755     info->nz_allocated = isend[1];
756     info->nz_unneeded  = isend[2];
757     info->memory       = isend[3];
758     info->mallocs      = isend[4];
759   } else if (flag == MAT_GLOBAL_MAX) {
760     PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)A)));
761 
762     info->nz_used      = irecv[0];
763     info->nz_allocated = irecv[1];
764     info->nz_unneeded  = irecv[2];
765     info->memory       = irecv[3];
766     info->mallocs      = irecv[4];
767   } else if (flag == MAT_GLOBAL_SUM) {
768     PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)A)));
769 
770     info->nz_used      = irecv[0];
771     info->nz_allocated = irecv[1];
772     info->nz_unneeded  = irecv[2];
773     info->memory       = irecv[3];
774     info->mallocs      = irecv[4];
775   }
776   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
777   info->fill_ratio_needed = 0;
778   info->factor_mallocs    = 0;
779   PetscFunctionReturn(0);
780 }
781 
782 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
783 {
784   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
785 
786   PetscFunctionBegin;
787   switch (op) {
788   case MAT_NEW_NONZERO_LOCATIONS:
789   case MAT_NEW_NONZERO_LOCATION_ERR:
790   case MAT_NEW_NONZERO_ALLOCATION_ERR:
791     MatCheckPreallocated(A,1);
792     PetscCall(MatSetOption(a->A,op,flg));
793     break;
794   case MAT_ROW_ORIENTED:
795     MatCheckPreallocated(A,1);
796     a->roworiented = flg;
797     PetscCall(MatSetOption(a->A,op,flg));
798     break;
799   case MAT_FORCE_DIAGONAL_ENTRIES:
800   case MAT_KEEP_NONZERO_PATTERN:
801   case MAT_USE_HASH_TABLE:
802   case MAT_SORTED_FULL:
803     PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op]));
804     break;
805   case MAT_IGNORE_OFF_PROC_ENTRIES:
806     a->donotstash = flg;
807     break;
808   case MAT_SYMMETRIC:
809   case MAT_STRUCTURALLY_SYMMETRIC:
810   case MAT_HERMITIAN:
811   case MAT_SYMMETRY_ETERNAL:
812   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
813   case MAT_SPD:
814   case MAT_IGNORE_LOWER_TRIANGULAR:
815   case MAT_IGNORE_ZERO_ENTRIES:
816   case MAT_SPD_ETERNAL:
817     /* if the diagonal matrix is square it inherits some of the properties above */
818     PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op]));
819     break;
820   default:
821     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
822   }
823   PetscFunctionReturn(0);
824 }
825 
826 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
827 {
828   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
829   const PetscScalar *l;
830   PetscScalar       x,*v,*vv,*r;
831   PetscInt          i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n,lda;
832 
833   PetscFunctionBegin;
834   PetscCall(MatDenseGetArray(mdn->A,&vv));
835   PetscCall(MatDenseGetLDA(mdn->A,&lda));
836   PetscCall(MatGetLocalSize(A,&s2,&s3));
837   if (ll) {
838     PetscCall(VecGetLocalSize(ll,&s2a));
839     PetscCheck(s2a == s2,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2);
840     PetscCall(VecGetArrayRead(ll,&l));
841     for (i=0; i<m; i++) {
842       x = l[i];
843       v = vv + i;
844       for (j=0; j<n; j++) { (*v) *= x; v+= lda;}
845     }
846     PetscCall(VecRestoreArrayRead(ll,&l));
847     PetscCall(PetscLogFlops(1.0*n*m));
848   }
849   if (rr) {
850     const PetscScalar *ar;
851 
852     PetscCall(VecGetLocalSize(rr,&s3a));
853     PetscCheck(s3a == s3,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3);
854     PetscCall(VecGetArrayRead(rr,&ar));
855     if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
856     PetscCall(VecGetArray(mdn->lvec,&r));
857     PetscCall(PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ar,r,MPI_REPLACE));
858     PetscCall(PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ar,r,MPI_REPLACE));
859     PetscCall(VecRestoreArrayRead(rr,&ar));
860     for (i=0; i<n; i++) {
861       x = r[i];
862       v = vv + i*lda;
863       for (j=0; j<m; j++) (*v++) *= x;
864     }
865     PetscCall(VecRestoreArray(mdn->lvec,&r));
866     PetscCall(PetscLogFlops(1.0*n*m));
867   }
868   PetscCall(MatDenseRestoreArray(mdn->A,&vv));
869   PetscFunctionReturn(0);
870 }
871 
872 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
873 {
874   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
875   PetscInt          i,j;
876   PetscMPIInt       size;
877   PetscReal         sum = 0.0;
878   const PetscScalar *av,*v;
879 
880   PetscFunctionBegin;
881   PetscCall(MatDenseGetArrayRead(mdn->A,&av));
882   v    = av;
883   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
884   if (size == 1) {
885     PetscCall(MatNorm(mdn->A,type,nrm));
886   } else {
887     if (type == NORM_FROBENIUS) {
888       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
889         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
890       }
891       PetscCall(MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A)));
892       *nrm = PetscSqrtReal(*nrm);
893       PetscCall(PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n));
894     } else if (type == NORM_1) {
895       PetscReal *tmp,*tmp2;
896       PetscCall(PetscCalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2));
897       *nrm = 0.0;
898       v    = av;
899       for (j=0; j<mdn->A->cmap->n; j++) {
900         for (i=0; i<mdn->A->rmap->n; i++) {
901           tmp[j] += PetscAbsScalar(*v);  v++;
902         }
903       }
904       PetscCall(MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A)));
905       for (j=0; j<A->cmap->N; j++) {
906         if (tmp2[j] > *nrm) *nrm = tmp2[j];
907       }
908       PetscCall(PetscFree2(tmp,tmp2));
909       PetscCall(PetscLogFlops(A->cmap->n*A->rmap->n));
910     } else if (type == NORM_INFINITY) { /* max row norm */
911       PetscReal ntemp;
912       PetscCall(MatNorm(mdn->A,type,&ntemp));
913       PetscCall(MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A)));
914     } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
915   }
916   PetscCall(MatDenseRestoreArrayRead(mdn->A,&av));
917   PetscFunctionReturn(0);
918 }
919 
920 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
921 {
922   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
923   Mat            B;
924   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
925   PetscInt       j,i,lda;
926   PetscScalar    *v;
927 
928   PetscFunctionBegin;
929   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
930     PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B));
931     PetscCall(MatSetSizes(B,A->cmap->n,A->rmap->n,N,M));
932     PetscCall(MatSetType(B,((PetscObject)A)->type_name));
933     PetscCall(MatMPIDenseSetPreallocation(B,NULL));
934   } else B = *matout;
935 
936   m    = a->A->rmap->n; n = a->A->cmap->n;
937   PetscCall(MatDenseGetArrayRead(a->A,(const PetscScalar**)&v));
938   PetscCall(MatDenseGetLDA(a->A,&lda));
939   PetscCall(PetscMalloc1(m,&rwork));
940   for (i=0; i<m; i++) rwork[i] = rstart + i;
941   for (j=0; j<n; j++) {
942     PetscCall(MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES));
943     v   += lda;
944   }
945   PetscCall(MatDenseRestoreArrayRead(a->A,(const PetscScalar**)&v));
946   PetscCall(PetscFree(rwork));
947   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
948   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
949   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
950     *matout = B;
951   } else {
952     PetscCall(MatHeaderMerge(A,&B));
953   }
954   PetscFunctionReturn(0);
955 }
956 
957 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
958 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
959 
960 PetscErrorCode MatSetUp_MPIDense(Mat A)
961 {
962   PetscFunctionBegin;
963   PetscCall(PetscLayoutSetUp(A->rmap));
964   PetscCall(PetscLayoutSetUp(A->cmap));
965   if (!A->preallocated) {
966     PetscCall(MatMPIDenseSetPreallocation(A,NULL));
967   }
968   PetscFunctionReturn(0);
969 }
970 
971 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
972 {
973   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
974 
975   PetscFunctionBegin;
976   PetscCall(MatAXPY(A->A,alpha,B->A,str));
977   PetscFunctionReturn(0);
978 }
979 
980 PetscErrorCode MatConjugate_MPIDense(Mat mat)
981 {
982   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
983 
984   PetscFunctionBegin;
985   PetscCall(MatConjugate(a->A));
986   PetscFunctionReturn(0);
987 }
988 
989 PetscErrorCode MatRealPart_MPIDense(Mat A)
990 {
991   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
992 
993   PetscFunctionBegin;
994   PetscCall(MatRealPart(a->A));
995   PetscFunctionReturn(0);
996 }
997 
998 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
999 {
1000   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1001 
1002   PetscFunctionBegin;
1003   PetscCall(MatImaginaryPart(a->A));
1004   PetscFunctionReturn(0);
1005 }
1006 
1007 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col)
1008 {
1009   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
1010 
1011   PetscFunctionBegin;
1012   PetscCheck(a->A,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing local matrix");
1013   PetscCheck(a->A->ops->getcolumnvector,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing get column operation");
1014   PetscCall((*a->A->ops->getcolumnvector)(a->A,v,col));
1015   PetscFunctionReturn(0);
1016 }
1017 
1018 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat,PetscInt,PetscReal*);
1019 
1020 PetscErrorCode MatGetColumnReductions_MPIDense(Mat A,PetscInt type,PetscReal *reductions)
1021 {
1022   PetscInt       i,m,n;
1023   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
1024   PetscReal      *work;
1025 
1026   PetscFunctionBegin;
1027   PetscCall(MatGetSize(A,&m,&n));
1028   PetscCall(PetscMalloc1(n,&work));
1029   if (type == REDUCTION_MEAN_REALPART) {
1030     PetscCall(MatGetColumnReductions_SeqDense(a->A,(PetscInt)REDUCTION_SUM_REALPART,work));
1031   } else if (type == REDUCTION_MEAN_IMAGINARYPART) {
1032     PetscCall(MatGetColumnReductions_SeqDense(a->A,(PetscInt)REDUCTION_SUM_IMAGINARYPART,work));
1033   } else {
1034     PetscCall(MatGetColumnReductions_SeqDense(a->A,type,work));
1035   }
1036   if (type == NORM_2) {
1037     for (i=0; i<n; i++) work[i] *= work[i];
1038   }
1039   if (type == NORM_INFINITY) {
1040     PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_MAX,A->hdr.comm));
1041   } else {
1042     PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_SUM,A->hdr.comm));
1043   }
1044   PetscCall(PetscFree(work));
1045   if (type == NORM_2) {
1046     for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
1047   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
1048     for (i=0; i<n; i++) reductions[i] /= m;
1049   }
1050   PetscFunctionReturn(0);
1051 }
1052 
1053 #if defined(PETSC_HAVE_CUDA)
1054 PetscErrorCode MatShift_MPIDenseCUDA(Mat A,PetscScalar alpha)
1055 {
1056   PetscScalar *da;
1057   PetscInt    lda;
1058 
1059   PetscFunctionBegin;
1060   PetscCall(MatDenseCUDAGetArray(A,&da));
1061   PetscCall(MatDenseGetLDA(A,&lda));
1062   PetscCall(PetscInfo(A,"Performing Shift on backend\n"));
1063   PetscCall(MatShift_DenseCUDA_Private(da,alpha,lda,A->rmap->rstart,A->rmap->rend,A->cmap->N));
1064   PetscCall(MatDenseCUDARestoreArray(A,&da));
1065   PetscFunctionReturn(0);
1066 }
1067 
1068 static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1069 {
1070   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1071   PetscInt       lda;
1072 
1073   PetscFunctionBegin;
1074   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1075   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1076   if (!a->cvec) {
1077     PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec));
1078     PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec));
1079   }
1080   a->vecinuse = col + 1;
1081   PetscCall(MatDenseGetLDA(a->A,&lda));
1082   PetscCall(MatDenseCUDAGetArray(a->A,(PetscScalar**)&a->ptrinuse));
1083   PetscCall(VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda));
1084   *v   = a->cvec;
1085   PetscFunctionReturn(0);
1086 }
1087 
1088 static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1089 {
1090   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1091 
1092   PetscFunctionBegin;
1093   PetscCheck(a->vecinuse,PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1094   PetscCheck(a->cvec,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1095   a->vecinuse = 0;
1096   PetscCall(MatDenseCUDARestoreArray(a->A,(PetscScalar**)&a->ptrinuse));
1097   PetscCall(VecCUDAResetArray(a->cvec));
1098   *v   = NULL;
1099   PetscFunctionReturn(0);
1100 }
1101 
1102 static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1103 {
1104   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1105   PetscInt       lda;
1106 
1107   PetscFunctionBegin;
1108   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1109   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1110   if (!a->cvec) {
1111     PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec));
1112     PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec));
1113   }
1114   a->vecinuse = col + 1;
1115   PetscCall(MatDenseGetLDA(a->A,&lda));
1116   PetscCall(MatDenseCUDAGetArrayRead(a->A,&a->ptrinuse));
1117   PetscCall(VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda));
1118   PetscCall(VecLockReadPush(a->cvec));
1119   *v   = a->cvec;
1120   PetscFunctionReturn(0);
1121 }
1122 
1123 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1124 {
1125   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1126 
1127   PetscFunctionBegin;
1128   PetscCheck(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1129   PetscCheck(a->cvec,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1130   a->vecinuse = 0;
1131   PetscCall(MatDenseCUDARestoreArrayRead(a->A,&a->ptrinuse));
1132   PetscCall(VecLockReadPop(a->cvec));
1133   PetscCall(VecCUDAResetArray(a->cvec));
1134   *v   = NULL;
1135   PetscFunctionReturn(0);
1136 }
1137 
1138 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1139 {
1140   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1141   PetscInt       lda;
1142 
1143   PetscFunctionBegin;
1144   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1145   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1146   if (!a->cvec) {
1147     PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec));
1148     PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec));
1149   }
1150   a->vecinuse = col + 1;
1151   PetscCall(MatDenseGetLDA(a->A,&lda));
1152   PetscCall(MatDenseCUDAGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse));
1153   PetscCall(VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda));
1154   *v   = a->cvec;
1155   PetscFunctionReturn(0);
1156 }
1157 
1158 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1159 {
1160   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1161 
1162   PetscFunctionBegin;
1163   PetscCheck(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1164   PetscCheck(a->cvec,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1165   a->vecinuse = 0;
1166   PetscCall(MatDenseCUDARestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse));
1167   PetscCall(VecCUDAResetArray(a->cvec));
1168   *v   = NULL;
1169   PetscFunctionReturn(0);
1170 }
1171 
1172 static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1173 {
1174   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1175 
1176   PetscFunctionBegin;
1177   PetscCheck(!l->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1178   PetscCheck(!l->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1179   PetscCall(MatDenseCUDAPlaceArray(l->A,a));
1180   PetscFunctionReturn(0);
1181 }
1182 
1183 static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A)
1184 {
1185   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1186 
1187   PetscFunctionBegin;
1188   PetscCheck(!l->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1189   PetscCheck(!l->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1190   PetscCall(MatDenseCUDAResetArray(l->A));
1191   PetscFunctionReturn(0);
1192 }
1193 
1194 static PetscErrorCode MatDenseCUDAReplaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1195 {
1196   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1197 
1198   PetscFunctionBegin;
1199   PetscCheck(!l->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1200   PetscCheck(!l->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1201   PetscCall(MatDenseCUDAReplaceArray(l->A,a));
1202   PetscFunctionReturn(0);
1203 }
1204 
1205 static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1206 {
1207   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1208 
1209   PetscFunctionBegin;
1210   PetscCall(MatDenseCUDAGetArrayWrite(l->A,a));
1211   PetscFunctionReturn(0);
1212 }
1213 
1214 static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1215 {
1216   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1217 
1218   PetscFunctionBegin;
1219   PetscCall(MatDenseCUDARestoreArrayWrite(l->A,a));
1220   PetscFunctionReturn(0);
1221 }
1222 
1223 static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1224 {
1225   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1226 
1227   PetscFunctionBegin;
1228   PetscCall(MatDenseCUDAGetArrayRead(l->A,a));
1229   PetscFunctionReturn(0);
1230 }
1231 
1232 static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1233 {
1234   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1235 
1236   PetscFunctionBegin;
1237   PetscCall(MatDenseCUDARestoreArrayRead(l->A,a));
1238   PetscFunctionReturn(0);
1239 }
1240 
1241 static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1242 {
1243   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1244 
1245   PetscFunctionBegin;
1246   PetscCall(MatDenseCUDAGetArray(l->A,a));
1247   PetscFunctionReturn(0);
1248 }
1249 
1250 static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1251 {
1252   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1253 
1254   PetscFunctionBegin;
1255   PetscCall(MatDenseCUDARestoreArray(l->A,a));
1256   PetscFunctionReturn(0);
1257 }
1258 
1259 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat,PetscInt,Vec*);
1260 static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat,PetscInt,Vec*);
1261 static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat,PetscInt,Vec*);
1262 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat,PetscInt,Vec*);
1263 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat,PetscInt,Vec*);
1264 static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat,PetscInt,Vec*);
1265 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat,Mat*);
1266 
1267 static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat,PetscBool bind)
1268 {
1269   Mat_MPIDense   *d = (Mat_MPIDense*)mat->data;
1270 
1271   PetscFunctionBegin;
1272   PetscCheck(!d->vecinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1273   PetscCheck(!d->matinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1274   if (d->A) PetscCall(MatBindToCPU(d->A,bind));
1275   mat->boundtocpu = bind;
1276   if (!bind) {
1277     PetscBool iscuda;
1278 
1279     PetscCall(PetscObjectTypeCompare((PetscObject)d->cvec,VECMPICUDA,&iscuda));
1280     if (!iscuda) {
1281       PetscCall(VecDestroy(&d->cvec));
1282     }
1283     PetscCall(PetscObjectTypeCompare((PetscObject)d->cmat,MATMPIDENSECUDA,&iscuda));
1284     if (!iscuda) {
1285       PetscCall(MatDestroy(&d->cmat));
1286     }
1287     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDenseCUDA));
1288     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDenseCUDA));
1289     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDenseCUDA));
1290     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDenseCUDA));
1291     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDenseCUDA));
1292     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDenseCUDA));
1293     mat->ops->shift                   = MatShift_MPIDenseCUDA;
1294   } else {
1295     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense));
1296     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense));
1297     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense));
1298     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense));
1299     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense));
1300     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense));
1301     mat->ops->shift                   = MatShift_MPIDense;
1302   }
1303   if (d->cmat) PetscCall(MatBindToCPU(d->cmat,bind));
1304   PetscFunctionReturn(0);
1305 }
1306 
1307 PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data)
1308 {
1309   Mat_MPIDense   *d = (Mat_MPIDense*)A->data;
1310   PetscBool      iscuda;
1311 
1312   PetscFunctionBegin;
1313   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1314   PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda));
1315   if (!iscuda) PetscFunctionReturn(0);
1316   PetscCall(PetscLayoutSetUp(A->rmap));
1317   PetscCall(PetscLayoutSetUp(A->cmap));
1318   if (!d->A) {
1319     PetscCall(MatCreate(PETSC_COMM_SELF,&d->A));
1320     PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)d->A));
1321     PetscCall(MatSetSizes(d->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N));
1322   }
1323   PetscCall(MatSetType(d->A,MATSEQDENSECUDA));
1324   PetscCall(MatSeqDenseCUDASetPreallocation(d->A,d_data));
1325   A->preallocated = PETSC_TRUE;
1326   A->assembled = PETSC_TRUE;
1327   PetscFunctionReturn(0);
1328 }
1329 #endif
1330 
1331 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1332 {
1333   Mat_MPIDense   *d = (Mat_MPIDense*)x->data;
1334 
1335   PetscFunctionBegin;
1336   PetscCall(MatSetRandom(d->A,rctx));
1337   PetscFunctionReturn(0);
1338 }
1339 
1340 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool  *missing,PetscInt *d)
1341 {
1342   PetscFunctionBegin;
1343   *missing = PETSC_FALSE;
1344   PetscFunctionReturn(0);
1345 }
1346 
1347 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat);
1348 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);
1349 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat);
1350 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);
1351 static PetscErrorCode MatEqual_MPIDense(Mat,Mat,PetscBool*);
1352 static PetscErrorCode MatLoad_MPIDense(Mat,PetscViewer);
1353 
1354 /* -------------------------------------------------------------------*/
1355 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1356                                         MatGetRow_MPIDense,
1357                                         MatRestoreRow_MPIDense,
1358                                         MatMult_MPIDense,
1359                                 /*  4*/ MatMultAdd_MPIDense,
1360                                         MatMultTranspose_MPIDense,
1361                                         MatMultTransposeAdd_MPIDense,
1362                                         NULL,
1363                                         NULL,
1364                                         NULL,
1365                                 /* 10*/ NULL,
1366                                         NULL,
1367                                         NULL,
1368                                         NULL,
1369                                         MatTranspose_MPIDense,
1370                                 /* 15*/ MatGetInfo_MPIDense,
1371                                         MatEqual_MPIDense,
1372                                         MatGetDiagonal_MPIDense,
1373                                         MatDiagonalScale_MPIDense,
1374                                         MatNorm_MPIDense,
1375                                 /* 20*/ MatAssemblyBegin_MPIDense,
1376                                         MatAssemblyEnd_MPIDense,
1377                                         MatSetOption_MPIDense,
1378                                         MatZeroEntries_MPIDense,
1379                                 /* 24*/ MatZeroRows_MPIDense,
1380                                         NULL,
1381                                         NULL,
1382                                         NULL,
1383                                         NULL,
1384                                 /* 29*/ MatSetUp_MPIDense,
1385                                         NULL,
1386                                         NULL,
1387                                         MatGetDiagonalBlock_MPIDense,
1388                                         NULL,
1389                                 /* 34*/ MatDuplicate_MPIDense,
1390                                         NULL,
1391                                         NULL,
1392                                         NULL,
1393                                         NULL,
1394                                 /* 39*/ MatAXPY_MPIDense,
1395                                         MatCreateSubMatrices_MPIDense,
1396                                         NULL,
1397                                         MatGetValues_MPIDense,
1398                                         MatCopy_MPIDense,
1399                                 /* 44*/ NULL,
1400                                         MatScale_MPIDense,
1401                                         MatShift_MPIDense,
1402                                         NULL,
1403                                         NULL,
1404                                 /* 49*/ MatSetRandom_MPIDense,
1405                                         NULL,
1406                                         NULL,
1407                                         NULL,
1408                                         NULL,
1409                                 /* 54*/ NULL,
1410                                         NULL,
1411                                         NULL,
1412                                         NULL,
1413                                         NULL,
1414                                 /* 59*/ MatCreateSubMatrix_MPIDense,
1415                                         MatDestroy_MPIDense,
1416                                         MatView_MPIDense,
1417                                         NULL,
1418                                         NULL,
1419                                 /* 64*/ NULL,
1420                                         NULL,
1421                                         NULL,
1422                                         NULL,
1423                                         NULL,
1424                                 /* 69*/ NULL,
1425                                         NULL,
1426                                         NULL,
1427                                         NULL,
1428                                         NULL,
1429                                 /* 74*/ NULL,
1430                                         NULL,
1431                                         NULL,
1432                                         NULL,
1433                                         NULL,
1434                                 /* 79*/ NULL,
1435                                         NULL,
1436                                         NULL,
1437                                         NULL,
1438                                 /* 83*/ MatLoad_MPIDense,
1439                                         NULL,
1440                                         NULL,
1441                                         NULL,
1442                                         NULL,
1443                                         NULL,
1444                                 /* 89*/ NULL,
1445                                         NULL,
1446                                         NULL,
1447                                         NULL,
1448                                         NULL,
1449                                 /* 94*/ NULL,
1450                                         NULL,
1451                                         MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1452                                         MatMatTransposeMultNumeric_MPIDense_MPIDense,
1453                                         NULL,
1454                                 /* 99*/ MatProductSetFromOptions_MPIDense,
1455                                         NULL,
1456                                         NULL,
1457                                         MatConjugate_MPIDense,
1458                                         NULL,
1459                                 /*104*/ NULL,
1460                                         MatRealPart_MPIDense,
1461                                         MatImaginaryPart_MPIDense,
1462                                         NULL,
1463                                         NULL,
1464                                 /*109*/ NULL,
1465                                         NULL,
1466                                         NULL,
1467                                         MatGetColumnVector_MPIDense,
1468                                         MatMissingDiagonal_MPIDense,
1469                                 /*114*/ NULL,
1470                                         NULL,
1471                                         NULL,
1472                                         NULL,
1473                                         NULL,
1474                                 /*119*/ NULL,
1475                                         NULL,
1476                                         NULL,
1477                                         NULL,
1478                                         NULL,
1479                                 /*124*/ NULL,
1480                                         MatGetColumnReductions_MPIDense,
1481                                         NULL,
1482                                         NULL,
1483                                         NULL,
1484                                 /*129*/ NULL,
1485                                         NULL,
1486                                         MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1487                                         MatTransposeMatMultNumeric_MPIDense_MPIDense,
1488                                         NULL,
1489                                 /*134*/ NULL,
1490                                         NULL,
1491                                         NULL,
1492                                         NULL,
1493                                         NULL,
1494                                 /*139*/ NULL,
1495                                         NULL,
1496                                         NULL,
1497                                         NULL,
1498                                         NULL,
1499                                         MatCreateMPIMatConcatenateSeqMat_MPIDense,
1500                                 /*145*/ NULL,
1501                                         NULL,
1502                                         NULL,
1503                                         NULL,
1504                                         NULL
1505 };
1506 
1507 PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1508 {
1509   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
1510   PetscBool      iscuda;
1511 
1512   PetscFunctionBegin;
1513   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1514   PetscCall(PetscLayoutSetUp(mat->rmap));
1515   PetscCall(PetscLayoutSetUp(mat->cmap));
1516   if (!a->A) {
1517     PetscCall(MatCreate(PETSC_COMM_SELF,&a->A));
1518     PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A));
1519     PetscCall(MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N));
1520   }
1521   PetscCall(PetscObjectTypeCompare((PetscObject)mat,MATMPIDENSECUDA,&iscuda));
1522   PetscCall(MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE));
1523   PetscCall(MatSeqDenseSetPreallocation(a->A,data));
1524   mat->preallocated = PETSC_TRUE;
1525   mat->assembled = PETSC_TRUE;
1526   PetscFunctionReturn(0);
1527 }
1528 
1529 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1530 {
1531   Mat            B,C;
1532 
1533   PetscFunctionBegin;
1534   PetscCall(MatMPIAIJGetLocalMat(A,MAT_INITIAL_MATRIX,&C));
1535   PetscCall(MatConvert_SeqAIJ_SeqDense(C,MATSEQDENSE,MAT_INITIAL_MATRIX,&B));
1536   PetscCall(MatDestroy(&C));
1537   if (reuse == MAT_REUSE_MATRIX) {
1538     C = *newmat;
1539   } else C = NULL;
1540   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A),B,A->cmap->n,!C?MAT_INITIAL_MATRIX:MAT_REUSE_MATRIX,&C));
1541   PetscCall(MatDestroy(&B));
1542   if (reuse == MAT_INPLACE_MATRIX) {
1543     PetscCall(MatHeaderReplace(A,&C));
1544   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1545   PetscFunctionReturn(0);
1546 }
1547 
1548 PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1549 {
1550   Mat            B,C;
1551 
1552   PetscFunctionBegin;
1553   PetscCall(MatDenseGetLocalMatrix(A,&C));
1554   PetscCall(MatConvert_SeqDense_SeqAIJ(C,MATSEQAIJ,MAT_INITIAL_MATRIX,&B));
1555   if (reuse == MAT_REUSE_MATRIX) {
1556     C = *newmat;
1557   } else C = NULL;
1558   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A),B,A->cmap->n,!C?MAT_INITIAL_MATRIX:MAT_REUSE_MATRIX,&C));
1559   PetscCall(MatDestroy(&B));
1560   if (reuse == MAT_INPLACE_MATRIX) {
1561     PetscCall(MatHeaderReplace(A,&C));
1562   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1563   PetscFunctionReturn(0);
1564 }
1565 
1566 #if defined(PETSC_HAVE_ELEMENTAL)
1567 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1568 {
1569   Mat            mat_elemental;
1570   PetscScalar    *v;
1571   PetscInt       m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols;
1572 
1573   PetscFunctionBegin;
1574   if (reuse == MAT_REUSE_MATRIX) {
1575     mat_elemental = *newmat;
1576     PetscCall(MatZeroEntries(*newmat));
1577   } else {
1578     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental));
1579     PetscCall(MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N));
1580     PetscCall(MatSetType(mat_elemental,MATELEMENTAL));
1581     PetscCall(MatSetUp(mat_elemental));
1582     PetscCall(MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE));
1583   }
1584 
1585   PetscCall(PetscMalloc2(m,&rows,N,&cols));
1586   for (i=0; i<N; i++) cols[i] = i;
1587   for (i=0; i<m; i++) rows[i] = rstart + i;
1588 
1589   /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1590   PetscCall(MatDenseGetArray(A,&v));
1591   PetscCall(MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES));
1592   PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY));
1593   PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY));
1594   PetscCall(MatDenseRestoreArray(A,&v));
1595   PetscCall(PetscFree2(rows,cols));
1596 
1597   if (reuse == MAT_INPLACE_MATRIX) {
1598     PetscCall(MatHeaderReplace(A,&mat_elemental));
1599   } else {
1600     *newmat = mat_elemental;
1601   }
1602   PetscFunctionReturn(0);
1603 }
1604 #endif
1605 
1606 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals)
1607 {
1608   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1609 
1610   PetscFunctionBegin;
1611   PetscCall(MatDenseGetColumn(mat->A,col,vals));
1612   PetscFunctionReturn(0);
1613 }
1614 
1615 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals)
1616 {
1617   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1618 
1619   PetscFunctionBegin;
1620   PetscCall(MatDenseRestoreColumn(mat->A,vals));
1621   PetscFunctionReturn(0);
1622 }
1623 
1624 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
1625 {
1626   Mat_MPIDense   *mat;
1627   PetscInt       m,nloc,N;
1628 
1629   PetscFunctionBegin;
1630   PetscCall(MatGetSize(inmat,&m,&N));
1631   PetscCall(MatGetLocalSize(inmat,NULL,&nloc));
1632   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1633     PetscInt sum;
1634 
1635     if (n == PETSC_DECIDE) {
1636       PetscCall(PetscSplitOwnership(comm,&n,&N));
1637     }
1638     /* Check sum(n) = N */
1639     PetscCall(MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm));
1640     PetscCheck(sum == N,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT,sum,N);
1641 
1642     PetscCall(MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat));
1643     PetscCall(MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE));
1644   }
1645 
1646   /* numeric phase */
1647   mat = (Mat_MPIDense*)(*outmat)->data;
1648   PetscCall(MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN));
1649   PetscFunctionReturn(0);
1650 }
1651 
1652 #if defined(PETSC_HAVE_CUDA)
1653 PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M,MatType type,MatReuse reuse,Mat *newmat)
1654 {
1655   Mat            B;
1656   Mat_MPIDense   *m;
1657 
1658   PetscFunctionBegin;
1659   if (reuse == MAT_INITIAL_MATRIX) {
1660     PetscCall(MatDuplicate(M,MAT_COPY_VALUES,newmat));
1661   } else if (reuse == MAT_REUSE_MATRIX) {
1662     PetscCall(MatCopy(M,*newmat,SAME_NONZERO_PATTERN));
1663   }
1664 
1665   B    = *newmat;
1666   PetscCall(MatBindToCPU_MPIDenseCUDA(B,PETSC_TRUE));
1667   PetscCall(PetscFree(B->defaultvectype));
1668   PetscCall(PetscStrallocpy(VECSTANDARD,&B->defaultvectype));
1669   PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSE));
1670   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",NULL));
1671   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",NULL));
1672   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",NULL));
1673   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",NULL));
1674   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",NULL));
1675   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",NULL));
1676   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",NULL));
1677   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",NULL));
1678   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",NULL));
1679   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",NULL));
1680   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",NULL));
1681   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",NULL));
1682   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",NULL));
1683   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",NULL));
1684   m    = (Mat_MPIDense*)(B)->data;
1685   if (m->A) {
1686     PetscCall(MatConvert(m->A,MATSEQDENSE,MAT_INPLACE_MATRIX,&m->A));
1687   }
1688   B->ops->bindtocpu = NULL;
1689   B->offloadmask    = PETSC_OFFLOAD_CPU;
1690   PetscFunctionReturn(0);
1691 }
1692 
1693 PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M,MatType type,MatReuse reuse,Mat *newmat)
1694 {
1695   Mat            B;
1696   Mat_MPIDense   *m;
1697 
1698   PetscFunctionBegin;
1699   if (reuse == MAT_INITIAL_MATRIX) {
1700     PetscCall(MatDuplicate(M,MAT_COPY_VALUES,newmat));
1701   } else if (reuse == MAT_REUSE_MATRIX) {
1702     PetscCall(MatCopy(M,*newmat,SAME_NONZERO_PATTERN));
1703   }
1704 
1705   B    = *newmat;
1706   PetscCall(PetscFree(B->defaultvectype));
1707   PetscCall(PetscStrallocpy(VECCUDA,&B->defaultvectype));
1708   PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSECUDA));
1709   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",                    MatConvert_MPIDenseCUDA_MPIDense));
1710   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",        MatProductSetFromOptions_MPIAIJ_MPIDense));
1711   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",MatProductSetFromOptions_MPIAIJ_MPIDense));
1712   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",        MatProductSetFromOptions_MPIDense_MPIAIJ));
1713   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ));
1714   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",                                MatDenseCUDAGetArray_MPIDenseCUDA));
1715   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",                            MatDenseCUDAGetArrayRead_MPIDenseCUDA));
1716   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",                           MatDenseCUDAGetArrayWrite_MPIDenseCUDA));
1717   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",                            MatDenseCUDARestoreArray_MPIDenseCUDA));
1718   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",                        MatDenseCUDARestoreArrayRead_MPIDenseCUDA));
1719   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",                       MatDenseCUDARestoreArrayWrite_MPIDenseCUDA));
1720   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",                              MatDenseCUDAPlaceArray_MPIDenseCUDA));
1721   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",                              MatDenseCUDAResetArray_MPIDenseCUDA));
1722   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",                            MatDenseCUDAReplaceArray_MPIDenseCUDA));
1723   m    = (Mat_MPIDense*)(B->data);
1724   if (m->A) {
1725     PetscCall(MatConvert(m->A,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&m->A));
1726     B->offloadmask = PETSC_OFFLOAD_BOTH;
1727   } else {
1728     B->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
1729   }
1730   PetscCall(MatBindToCPU_MPIDenseCUDA(B,PETSC_FALSE));
1731 
1732   B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA;
1733   PetscFunctionReturn(0);
1734 }
1735 #endif
1736 
1737 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A,PetscInt col,Vec *v)
1738 {
1739   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1740   PetscInt       lda;
1741 
1742   PetscFunctionBegin;
1743   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1744   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1745   if (!a->cvec) {
1746     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec));
1747   }
1748   a->vecinuse = col + 1;
1749   PetscCall(MatDenseGetLDA(a->A,&lda));
1750   PetscCall(MatDenseGetArray(a->A,(PetscScalar**)&a->ptrinuse));
1751   PetscCall(VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda));
1752   *v   = a->cvec;
1753   PetscFunctionReturn(0);
1754 }
1755 
1756 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A,PetscInt col,Vec *v)
1757 {
1758   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1759 
1760   PetscFunctionBegin;
1761   PetscCheck(a->vecinuse,PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1762   PetscCheck(a->cvec,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1763   a->vecinuse = 0;
1764   PetscCall(MatDenseRestoreArray(a->A,(PetscScalar**)&a->ptrinuse));
1765   PetscCall(VecResetArray(a->cvec));
1766   *v   = NULL;
1767   PetscFunctionReturn(0);
1768 }
1769 
1770 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v)
1771 {
1772   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1773   PetscInt       lda;
1774 
1775   PetscFunctionBegin;
1776   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1777   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1778   if (!a->cvec) {
1779     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec));
1780   }
1781   a->vecinuse = col + 1;
1782   PetscCall(MatDenseGetLDA(a->A,&lda));
1783   PetscCall(MatDenseGetArrayRead(a->A,&a->ptrinuse));
1784   PetscCall(VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda));
1785   PetscCall(VecLockReadPush(a->cvec));
1786   *v   = a->cvec;
1787   PetscFunctionReturn(0);
1788 }
1789 
1790 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v)
1791 {
1792   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1793 
1794   PetscFunctionBegin;
1795   PetscCheck(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1796   PetscCheck(a->cvec,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1797   a->vecinuse = 0;
1798   PetscCall(MatDenseRestoreArrayRead(a->A,&a->ptrinuse));
1799   PetscCall(VecLockReadPop(a->cvec));
1800   PetscCall(VecResetArray(a->cvec));
1801   *v   = NULL;
1802   PetscFunctionReturn(0);
1803 }
1804 
1805 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v)
1806 {
1807   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1808   PetscInt       lda;
1809 
1810   PetscFunctionBegin;
1811   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1812   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1813   if (!a->cvec) {
1814     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec));
1815   }
1816   a->vecinuse = col + 1;
1817   PetscCall(MatDenseGetLDA(a->A,&lda));
1818   PetscCall(MatDenseGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse));
1819   PetscCall(VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda));
1820   *v   = a->cvec;
1821   PetscFunctionReturn(0);
1822 }
1823 
1824 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v)
1825 {
1826   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
1827 
1828   PetscFunctionBegin;
1829   PetscCheck(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1830   PetscCheck(a->cvec,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1831   a->vecinuse = 0;
1832   PetscCall(MatDenseRestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse));
1833   PetscCall(VecResetArray(a->cvec));
1834   *v   = NULL;
1835   PetscFunctionReturn(0);
1836 }
1837 
1838 PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A,PetscInt rbegin,PetscInt rend,PetscInt cbegin,PetscInt cend,Mat *v)
1839 {
1840   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
1841   Mat_MPIDense *c;
1842   MPI_Comm     comm;
1843   PetscInt     pbegin, pend;
1844 
1845   PetscFunctionBegin;
1846   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
1847   PetscCheck(!a->vecinuse,comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1848   PetscCheck(!a->matinuse,comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1849   pbegin = PetscMax(0,PetscMin(A->rmap->rend,rbegin)-A->rmap->rstart);
1850   pend = PetscMin(A->rmap->n,PetscMax(0,rend-A->rmap->rstart));
1851   if (!a->cmat) {
1852     PetscCall(MatCreate(comm,&a->cmat));
1853     PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->cmat));
1854     PetscCall(MatSetType(a->cmat,((PetscObject)A)->type_name));
1855     if (rend-rbegin==A->rmap->N) PetscCall(PetscLayoutReference(A->rmap,&a->cmat->rmap));
1856     else {
1857       PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap,pend-pbegin));
1858       PetscCall(PetscLayoutSetSize(a->cmat->rmap,rend-rbegin));
1859       PetscCall(PetscLayoutSetUp(a->cmat->rmap));
1860     }
1861     PetscCall(PetscLayoutSetSize(a->cmat->cmap,cend-cbegin));
1862     PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1863   } else {
1864     PetscBool same = (PetscBool)(rend-rbegin == a->cmat->rmap->N);
1865     if (same && a->cmat->rmap->N != A->rmap->N) {
1866       same = (PetscBool)(pend-pbegin == a->cmat->rmap->n);
1867       PetscCall(MPIU_Allreduce(MPI_IN_PLACE,&same,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A)));
1868     }
1869     if (!same) {
1870       PetscCall(PetscLayoutDestroy(&a->cmat->rmap));
1871       PetscCall(PetscLayoutCreate(comm,&a->cmat->rmap));
1872       PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap,pend-pbegin));
1873       PetscCall(PetscLayoutSetSize(a->cmat->rmap,rend-rbegin));
1874       PetscCall(PetscLayoutSetUp(a->cmat->rmap));
1875     }
1876     if (cend-cbegin != a->cmat->cmap->N) {
1877       PetscCall(PetscLayoutDestroy(&a->cmat->cmap));
1878       PetscCall(PetscLayoutCreate(comm,&a->cmat->cmap));
1879       PetscCall(PetscLayoutSetSize(a->cmat->cmap,cend-cbegin));
1880       PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1881     }
1882   }
1883   c = (Mat_MPIDense*)a->cmat->data;
1884   PetscCheck(!c->A,comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1885   PetscCall(MatDenseGetSubMatrix(a->A,pbegin,pend,cbegin,cend,&c->A));
1886   a->cmat->preallocated = PETSC_TRUE;
1887   a->cmat->assembled = PETSC_TRUE;
1888   a->matinuse = cbegin + 1;
1889   *v = a->cmat;
1890   PetscFunctionReturn(0);
1891 }
1892 
1893 PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A,Mat *v)
1894 {
1895   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
1896   Mat_MPIDense *c;
1897 
1898   PetscFunctionBegin;
1899   PetscCheck(a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetSubMatrix() first");
1900   PetscCheck(a->cmat,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal matrix");
1901   PetscCheck(*v == a->cmat,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Not the matrix obtained from MatDenseGetSubMatrix()");
1902   a->matinuse = 0;
1903   c = (Mat_MPIDense*)a->cmat->data;
1904   PetscCall(MatDenseRestoreSubMatrix(a->A,&c->A));
1905   *v = NULL;
1906   PetscFunctionReturn(0);
1907 }
1908 
1909 /*MC
1910    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1911 
1912    Options Database Keys:
1913 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions()
1914 
1915   Level: beginner
1916 
1917 .seealso: `MatCreateDense()`
1918 
1919 M*/
1920 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1921 {
1922   Mat_MPIDense   *a;
1923 
1924   PetscFunctionBegin;
1925   PetscCall(PetscNewLog(mat,&a));
1926   mat->data = (void*)a;
1927   PetscCall(PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps)));
1928 
1929   mat->insertmode = NOT_SET_VALUES;
1930 
1931   /* build cache for off array entries formed */
1932   a->donotstash = PETSC_FALSE;
1933 
1934   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash));
1935 
1936   /* stuff used for matrix vector multiply */
1937   a->lvec        = NULL;
1938   a->Mvctx       = NULL;
1939   a->roworiented = PETSC_TRUE;
1940 
1941   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense));
1942   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",MatDenseSetLDA_MPIDense));
1943   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense));
1944   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense));
1945   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense));
1946   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense));
1947   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",MatDenseGetArrayWrite_MPIDense));
1948   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",MatDenseRestoreArrayWrite_MPIDense));
1949   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense));
1950   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense));
1951   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",MatDenseReplaceArray_MPIDense));
1952   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense));
1953   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense));
1954   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense));
1955   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense));
1956   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense));
1957   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense));
1958   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",MatDenseGetSubMatrix_MPIDense));
1959   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",MatDenseRestoreSubMatrix_MPIDense));
1960   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",MatConvert_MPIAIJ_MPIDense));
1961   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpiaij_C",MatConvert_MPIDense_MPIAIJ));
1962 #if defined(PETSC_HAVE_ELEMENTAL)
1963   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental));
1964 #endif
1965 #if defined(PETSC_HAVE_SCALAPACK)
1966   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",MatConvert_Dense_ScaLAPACK));
1967 #endif
1968 #if defined(PETSC_HAVE_CUDA)
1969   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpidensecuda_C",MatConvert_MPIDense_MPIDenseCUDA));
1970 #endif
1971   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense));
1972   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense));
1973   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",MatProductSetFromOptions_MPIDense_MPIAIJ));
1974 #if defined(PETSC_HAVE_CUDA)
1975   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense));
1976   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ));
1977 #endif
1978 
1979   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense));
1980   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense));
1981   PetscCall(PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE));
1982   PetscFunctionReturn(0);
1983 }
1984 
1985 /*MC
1986    MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs.
1987 
1988    Options Database Keys:
1989 . -mat_type mpidensecuda - sets the matrix type to "mpidensecuda" during a call to MatSetFromOptions()
1990 
1991   Level: beginner
1992 
1993 .seealso:
1994 
1995 M*/
1996 #if defined(PETSC_HAVE_CUDA)
1997 #include <petsc/private/deviceimpl.h>
1998 PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B)
1999 {
2000   PetscFunctionBegin;
2001   PetscCall(PetscDeviceInitialize(PETSC_DEVICE_CUDA));
2002   PetscCall(MatCreate_MPIDense(B));
2003   PetscCall(MatConvert_MPIDense_MPIDenseCUDA(B,MATMPIDENSECUDA,MAT_INPLACE_MATRIX,&B));
2004   PetscFunctionReturn(0);
2005 }
2006 #endif
2007 
2008 /*MC
2009    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
2010 
2011    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
2012    and MATMPIDENSE otherwise.
2013 
2014    Options Database Keys:
2015 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
2016 
2017   Level: beginner
2018 
2019 .seealso: `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA`
2020 M*/
2021 
2022 /*MC
2023    MATDENSECUDA - MATDENSECUDA = "densecuda" - A matrix type to be used for dense matrices on GPUs.
2024 
2025    This matrix type is identical to MATSEQDENSECUDA when constructed with a single process communicator,
2026    and MATMPIDENSECUDA otherwise.
2027 
2028    Options Database Keys:
2029 . -mat_type densecuda - sets the matrix type to "densecuda" during a call to MatSetFromOptions()
2030 
2031   Level: beginner
2032 
2033 .seealso: `MATSEQDENSECUDA`, `MATMPIDENSECUDA`, `MATDENSE`
2034 M*/
2035 
2036 /*@C
2037    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
2038 
2039    Collective
2040 
2041    Input Parameters:
2042 .  B - the matrix
2043 -  data - optional location of matrix data.  Set data=NULL for PETSc
2044    to control all matrix memory allocation.
2045 
2046    Notes:
2047    The dense format is fully compatible with standard Fortran 77
2048    storage by columns.
2049 
2050    The data input variable is intended primarily for Fortran programmers
2051    who wish to allocate their own matrix memory space.  Most users should
2052    set data=NULL.
2053 
2054    Level: intermediate
2055 
2056 .seealso: `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
2057 @*/
2058 PetscErrorCode  MatMPIDenseSetPreallocation(Mat B,PetscScalar *data)
2059 {
2060   PetscFunctionBegin;
2061   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2062   PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));
2063   PetscFunctionReturn(0);
2064 }
2065 
2066 /*@
2067    MatDensePlaceArray - Allows one to replace the array in a dense matrix with an
2068    array provided by the user. This is useful to avoid copying an array
2069    into a matrix
2070 
2071    Not Collective
2072 
2073    Input Parameters:
2074 +  mat - the matrix
2075 -  array - the array in column major order
2076 
2077    Notes:
2078    You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be
2079    freed when the matrix is destroyed.
2080 
2081    Level: developer
2082 
2083 .seealso: `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`
2084 
2085 @*/
2086 PetscErrorCode  MatDensePlaceArray(Mat mat,const PetscScalar *array)
2087 {
2088   PetscFunctionBegin;
2089   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2090   PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));
2091   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2092 #if defined(PETSC_HAVE_CUDA)
2093   mat->offloadmask = PETSC_OFFLOAD_CPU;
2094 #endif
2095   PetscFunctionReturn(0);
2096 }
2097 
2098 /*@
2099    MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray()
2100 
2101    Not Collective
2102 
2103    Input Parameters:
2104 .  mat - the matrix
2105 
2106    Notes:
2107    You can only call this after a call to MatDensePlaceArray()
2108 
2109    Level: developer
2110 
2111 .seealso: `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`
2112 
2113 @*/
2114 PetscErrorCode  MatDenseResetArray(Mat mat)
2115 {
2116   PetscFunctionBegin;
2117   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2118   PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));
2119   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2120   PetscFunctionReturn(0);
2121 }
2122 
2123 /*@
2124    MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an
2125    array provided by the user. This is useful to avoid copying an array
2126    into a matrix
2127 
2128    Not Collective
2129 
2130    Input Parameters:
2131 +  mat - the matrix
2132 -  array - the array in column major order
2133 
2134    Notes:
2135    The memory passed in MUST be obtained with PetscMalloc() and CANNOT be
2136    freed by the user. It will be freed when the matrix is destroyed.
2137 
2138    Level: developer
2139 
2140 .seealso: `MatDenseGetArray()`, `VecReplaceArray()`
2141 @*/
2142 PetscErrorCode  MatDenseReplaceArray(Mat mat,const PetscScalar *array)
2143 {
2144   PetscFunctionBegin;
2145   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2146   PetscUseMethod(mat,"MatDenseReplaceArray_C",(Mat,const PetscScalar*),(mat,array));
2147   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2148 #if defined(PETSC_HAVE_CUDA)
2149   mat->offloadmask = PETSC_OFFLOAD_CPU;
2150 #endif
2151   PetscFunctionReturn(0);
2152 }
2153 
2154 #if defined(PETSC_HAVE_CUDA)
2155 /*@C
2156    MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a dense matrix with an
2157    array provided by the user. This is useful to avoid copying an array
2158    into a matrix
2159 
2160    Not Collective
2161 
2162    Input Parameters:
2163 +  mat - the matrix
2164 -  array - the array in column major order
2165 
2166    Notes:
2167    You can return to the original array with a call to MatDenseCUDAResetArray(). The user is responsible for freeing this array; it will not be
2168    freed when the matrix is destroyed. The array must have been allocated with cudaMalloc().
2169 
2170    Level: developer
2171 
2172 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDAResetArray()`
2173 @*/
2174 PetscErrorCode  MatDenseCUDAPlaceArray(Mat mat,const PetscScalar *array)
2175 {
2176   PetscFunctionBegin;
2177   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2178   PetscUseMethod(mat,"MatDenseCUDAPlaceArray_C",(Mat,const PetscScalar*),(mat,array));
2179   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2180   mat->offloadmask = PETSC_OFFLOAD_GPU;
2181   PetscFunctionReturn(0);
2182 }
2183 
2184 /*@C
2185    MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to MatDenseCUDAPlaceArray()
2186 
2187    Not Collective
2188 
2189    Input Parameters:
2190 .  mat - the matrix
2191 
2192    Notes:
2193    You can only call this after a call to MatDenseCUDAPlaceArray()
2194 
2195    Level: developer
2196 
2197 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()`
2198 
2199 @*/
2200 PetscErrorCode  MatDenseCUDAResetArray(Mat mat)
2201 {
2202   PetscFunctionBegin;
2203   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2204   PetscUseMethod(mat,"MatDenseCUDAResetArray_C",(Mat),(mat));
2205   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2206   PetscFunctionReturn(0);
2207 }
2208 
2209 /*@C
2210    MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a dense matrix with an
2211    array provided by the user. This is useful to avoid copying an array
2212    into a matrix
2213 
2214    Not Collective
2215 
2216    Input Parameters:
2217 +  mat - the matrix
2218 -  array - the array in column major order
2219 
2220    Notes:
2221    This permanently replaces the GPU array and frees the memory associated with the old GPU array.
2222    The memory passed in CANNOT be freed by the user. It will be freed
2223    when the matrix is destroyed. The array should respect the matrix leading dimension.
2224 
2225    Level: developer
2226 
2227 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()`, `MatDenseCUDAResetArray()`
2228 @*/
2229 PetscErrorCode  MatDenseCUDAReplaceArray(Mat mat,const PetscScalar *array)
2230 {
2231   PetscFunctionBegin;
2232   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2233   PetscUseMethod(mat,"MatDenseCUDAReplaceArray_C",(Mat,const PetscScalar*),(mat,array));
2234   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2235   mat->offloadmask = PETSC_OFFLOAD_GPU;
2236   PetscFunctionReturn(0);
2237 }
2238 
2239 /*@C
2240    MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a dense matrix.
2241 
2242    Not Collective
2243 
2244    Input Parameters:
2245 .  A - the matrix
2246 
2247    Output Parameters
2248 .  array - the GPU array in column major order
2249 
2250    Notes:
2251    The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use MatDenseCUDAGetArray(). The array must be restored with MatDenseCUDARestoreArrayWrite() when no longer needed.
2252 
2253    Level: developer
2254 
2255 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayRead()`, `MatDenseCUDARestoreArrayRead()`
2256 @*/
2257 PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a)
2258 {
2259   PetscFunctionBegin;
2260   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2261   PetscUseMethod(A,"MatDenseCUDAGetArrayWrite_C",(Mat,PetscScalar**),(A,a));
2262   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2263   PetscFunctionReturn(0);
2264 }
2265 
2266 /*@C
2267    MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArrayWrite().
2268 
2269    Not Collective
2270 
2271    Input Parameters:
2272 +  A - the matrix
2273 -  array - the GPU array in column major order
2274 
2275    Notes:
2276 
2277    Level: developer
2278 
2279 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()`
2280 @*/
2281 PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a)
2282 {
2283   PetscFunctionBegin;
2284   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2285   PetscUseMethod(A,"MatDenseCUDARestoreArrayWrite_C",(Mat,PetscScalar**),(A,a));
2286   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2287   A->offloadmask = PETSC_OFFLOAD_GPU;
2288   PetscFunctionReturn(0);
2289 }
2290 
2291 /*@C
2292    MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArrayRead() when no longer needed.
2293 
2294    Not Collective
2295 
2296    Input Parameters:
2297 .  A - the matrix
2298 
2299    Output Parameters
2300 .  array - the GPU array in column major order
2301 
2302    Notes:
2303    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite().
2304 
2305    Level: developer
2306 
2307 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`
2308 @*/
2309 PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a)
2310 {
2311   PetscFunctionBegin;
2312   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2313   PetscUseMethod(A,"MatDenseCUDAGetArrayRead_C",(Mat,const PetscScalar**),(A,a));
2314   PetscFunctionReturn(0);
2315 }
2316 
2317 /*@C
2318    MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a dense matrix previously obtained with a call to MatDenseCUDAGetArrayRead().
2319 
2320    Not Collective
2321 
2322    Input Parameters:
2323 +  A - the matrix
2324 -  array - the GPU array in column major order
2325 
2326    Notes:
2327    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite().
2328 
2329    Level: developer
2330 
2331 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDAGetArrayRead()`
2332 @*/
2333 PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a)
2334 {
2335   PetscFunctionBegin;
2336   PetscUseMethod(A,"MatDenseCUDARestoreArrayRead_C",(Mat,const PetscScalar**),(A,a));
2337   PetscFunctionReturn(0);
2338 }
2339 
2340 /*@C
2341    MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArray() when no longer needed.
2342 
2343    Not Collective
2344 
2345    Input Parameters:
2346 .  A - the matrix
2347 
2348    Output Parameters
2349 .  array - the GPU array in column major order
2350 
2351    Notes:
2352    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). For read-only access, use MatDenseCUDAGetArrayRead().
2353 
2354    Level: developer
2355 
2356 .seealso: `MatDenseCUDAGetArrayRead()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`
2357 @*/
2358 PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a)
2359 {
2360   PetscFunctionBegin;
2361   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2362   PetscUseMethod(A,"MatDenseCUDAGetArray_C",(Mat,PetscScalar**),(A,a));
2363   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2364   PetscFunctionReturn(0);
2365 }
2366 
2367 /*@C
2368    MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArray().
2369 
2370    Not Collective
2371 
2372    Input Parameters:
2373 +  A - the matrix
2374 -  array - the GPU array in column major order
2375 
2376    Notes:
2377 
2378    Level: developer
2379 
2380 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()`
2381 @*/
2382 PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a)
2383 {
2384   PetscFunctionBegin;
2385   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2386   PetscUseMethod(A,"MatDenseCUDARestoreArray_C",(Mat,PetscScalar**),(A,a));
2387   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2388   A->offloadmask = PETSC_OFFLOAD_GPU;
2389   PetscFunctionReturn(0);
2390 }
2391 #endif
2392 
2393 /*@C
2394    MatCreateDense - Creates a matrix in dense format.
2395 
2396    Collective
2397 
2398    Input Parameters:
2399 +  comm - MPI communicator
2400 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2401 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2402 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
2403 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
2404 -  data - optional location of matrix data.  Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
2405    to control all matrix memory allocation.
2406 
2407    Output Parameter:
2408 .  A - the matrix
2409 
2410    Notes:
2411    The dense format is fully compatible with standard Fortran 77
2412    storage by columns.
2413    Note that, although local portions of the matrix are stored in column-major
2414    order, the matrix is partitioned across MPI ranks by row.
2415 
2416    The data input variable is intended primarily for Fortran programmers
2417    who wish to allocate their own matrix memory space.  Most users should
2418    set data=NULL (PETSC_NULL_SCALAR for Fortran users).
2419 
2420    The user MUST specify either the local or global matrix dimensions
2421    (possibly both).
2422 
2423    Level: intermediate
2424 
2425 .seealso: `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
2426 @*/
2427 PetscErrorCode  MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
2428 {
2429   PetscFunctionBegin;
2430   PetscCall(MatCreate(comm,A));
2431   PetscCall(MatSetSizes(*A,m,n,M,N));
2432   PetscCall(MatSetType(*A,MATDENSE));
2433   PetscCall(MatSeqDenseSetPreallocation(*A,data));
2434   PetscCall(MatMPIDenseSetPreallocation(*A,data));
2435   PetscFunctionReturn(0);
2436 }
2437 
2438 #if defined(PETSC_HAVE_CUDA)
2439 /*@C
2440    MatCreateDenseCUDA - Creates a matrix in dense format using CUDA.
2441 
2442    Collective
2443 
2444    Input Parameters:
2445 +  comm - MPI communicator
2446 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2447 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2448 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
2449 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
2450 -  data - optional location of GPU matrix data.  Set data=NULL for PETSc
2451    to control matrix memory allocation.
2452 
2453    Output Parameter:
2454 .  A - the matrix
2455 
2456    Notes:
2457 
2458    Level: intermediate
2459 
2460 .seealso: `MatCreate()`, `MatCreateDense()`
2461 @*/
2462 PetscErrorCode  MatCreateDenseCUDA(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
2463 {
2464   PetscFunctionBegin;
2465   PetscCall(MatCreate(comm,A));
2466   PetscValidLogicalCollectiveBool(*A,!!data,6);
2467   PetscCall(MatSetSizes(*A,m,n,M,N));
2468   PetscCall(MatSetType(*A,MATDENSECUDA));
2469   PetscCall(MatSeqDenseCUDASetPreallocation(*A,data));
2470   PetscCall(MatMPIDenseCUDASetPreallocation(*A,data));
2471   PetscFunctionReturn(0);
2472 }
2473 #endif
2474 
2475 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
2476 {
2477   Mat            mat;
2478   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
2479 
2480   PetscFunctionBegin;
2481   *newmat = NULL;
2482   PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&mat));
2483   PetscCall(MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N));
2484   PetscCall(MatSetType(mat,((PetscObject)A)->type_name));
2485   a       = (Mat_MPIDense*)mat->data;
2486 
2487   mat->factortype   = A->factortype;
2488   mat->assembled    = PETSC_TRUE;
2489   mat->preallocated = PETSC_TRUE;
2490 
2491   mat->insertmode = NOT_SET_VALUES;
2492   a->donotstash   = oldmat->donotstash;
2493 
2494   PetscCall(PetscLayoutReference(A->rmap,&mat->rmap));
2495   PetscCall(PetscLayoutReference(A->cmap,&mat->cmap));
2496 
2497   PetscCall(MatDuplicate(oldmat->A,cpvalues,&a->A));
2498   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A));
2499 
2500   *newmat = mat;
2501   PetscFunctionReturn(0);
2502 }
2503 
2504 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
2505 {
2506   PetscBool      isbinary;
2507 #if defined(PETSC_HAVE_HDF5)
2508   PetscBool      ishdf5;
2509 #endif
2510 
2511   PetscFunctionBegin;
2512   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
2513   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
2514   /* force binary viewer to load .info file if it has not yet done so */
2515   PetscCall(PetscViewerSetUp(viewer));
2516   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
2517 #if defined(PETSC_HAVE_HDF5)
2518   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5));
2519 #endif
2520   if (isbinary) {
2521     PetscCall(MatLoad_Dense_Binary(newMat,viewer));
2522 #if defined(PETSC_HAVE_HDF5)
2523   } else if (ishdf5) {
2524     PetscCall(MatLoad_Dense_HDF5(newMat,viewer));
2525 #endif
2526   } else SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
2527   PetscFunctionReturn(0);
2528 }
2529 
2530 static PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag)
2531 {
2532   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
2533   Mat            a,b;
2534 
2535   PetscFunctionBegin;
2536   a    = matA->A;
2537   b    = matB->A;
2538   PetscCall(MatEqual(a,b,flag));
2539   PetscCall(MPIU_Allreduce(MPI_IN_PLACE,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A)));
2540   PetscFunctionReturn(0);
2541 }
2542 
2543 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data)
2544 {
2545   Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data;
2546 
2547   PetscFunctionBegin;
2548   PetscCall(PetscFree2(atb->sendbuf,atb->recvcounts));
2549   PetscCall(MatDestroy(&atb->atb));
2550   PetscCall(PetscFree(atb));
2551   PetscFunctionReturn(0);
2552 }
2553 
2554 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data)
2555 {
2556   Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data;
2557 
2558   PetscFunctionBegin;
2559   PetscCall(PetscFree2(abt->buf[0],abt->buf[1]));
2560   PetscCall(PetscFree2(abt->recvcounts,abt->recvdispls));
2561   PetscCall(PetscFree(abt));
2562   PetscFunctionReturn(0);
2563 }
2564 
2565 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2566 {
2567   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2568   Mat_TransMatMultDense *atb;
2569   MPI_Comm              comm;
2570   PetscMPIInt           size,*recvcounts;
2571   PetscScalar           *carray,*sendbuf;
2572   const PetscScalar     *atbarray;
2573   PetscInt              i,cN=C->cmap->N,proc,k,j,lda;
2574   const PetscInt        *ranges;
2575 
2576   PetscFunctionBegin;
2577   MatCheckProduct(C,3);
2578   PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2579   atb = (Mat_TransMatMultDense *)C->product->data;
2580   recvcounts = atb->recvcounts;
2581   sendbuf = atb->sendbuf;
2582 
2583   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
2584   PetscCallMPI(MPI_Comm_size(comm,&size));
2585 
2586   /* compute atbarray = aseq^T * bseq */
2587   PetscCall(MatTransposeMatMult(a->A,b->A,atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX,PETSC_DEFAULT,&atb->atb));
2588 
2589   PetscCall(MatGetOwnershipRanges(C,&ranges));
2590 
2591   /* arrange atbarray into sendbuf */
2592   PetscCall(MatDenseGetArrayRead(atb->atb,&atbarray));
2593   PetscCall(MatDenseGetLDA(atb->atb,&lda));
2594   for (proc=0, k=0; proc<size; proc++) {
2595     for (j=0; j<cN; j++) {
2596       for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*lda];
2597     }
2598   }
2599   PetscCall(MatDenseRestoreArrayRead(atb->atb,&atbarray));
2600 
2601   /* sum all atbarray to local values of C */
2602   PetscCall(MatDenseGetArrayWrite(c->A,&carray));
2603   PetscCallMPI(MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm));
2604   PetscCall(MatDenseRestoreArrayWrite(c->A,&carray));
2605   PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY));
2606   PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY));
2607   PetscFunctionReturn(0);
2608 }
2609 
2610 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)
2611 {
2612   MPI_Comm              comm;
2613   PetscMPIInt           size;
2614   PetscInt              cm=A->cmap->n,cM,cN=B->cmap->N;
2615   Mat_TransMatMultDense *atb;
2616   PetscBool             cisdense;
2617   PetscInt              i;
2618   const PetscInt        *ranges;
2619 
2620   PetscFunctionBegin;
2621   MatCheckProduct(C,3);
2622   PetscCheck(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2623   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
2624   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
2625     SETERRQ(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2626   }
2627 
2628   /* create matrix product C */
2629   PetscCall(MatSetSizes(C,cm,B->cmap->n,A->cmap->N,B->cmap->N));
2630   PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,""));
2631   if (!cisdense) {
2632     PetscCall(MatSetType(C,((PetscObject)A)->type_name));
2633   }
2634   PetscCall(MatSetUp(C));
2635 
2636   /* create data structure for reuse C */
2637   PetscCallMPI(MPI_Comm_size(comm,&size));
2638   PetscCall(PetscNew(&atb));
2639   cM   = C->rmap->N;
2640   PetscCall(PetscMalloc2(cM*cN,&atb->sendbuf,size,&atb->recvcounts));
2641   PetscCall(MatGetOwnershipRanges(C,&ranges));
2642   for (i=0; i<size; i++) atb->recvcounts[i] = (ranges[i+1] - ranges[i])*cN;
2643 
2644   C->product->data    = atb;
2645   C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
2646   PetscFunctionReturn(0);
2647 }
2648 
2649 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2650 {
2651   MPI_Comm              comm;
2652   PetscMPIInt           i, size;
2653   PetscInt              maxRows, bufsiz;
2654   PetscMPIInt           tag;
2655   PetscInt              alg;
2656   Mat_MatTransMultDense *abt;
2657   Mat_Product           *product = C->product;
2658   PetscBool             flg;
2659 
2660   PetscFunctionBegin;
2661   MatCheckProduct(C,4);
2662   PetscCheck(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2663   /* check local size of A and B */
2664   PetscCheck(A->cmap->n == B->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")",A->cmap->n,B->cmap->n);
2665 
2666   PetscCall(PetscStrcmp(product->alg,"allgatherv",&flg));
2667   alg  = flg ? 0 : 1;
2668 
2669   /* setup matrix product C */
2670   PetscCall(MatSetSizes(C,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N));
2671   PetscCall(MatSetType(C,MATMPIDENSE));
2672   PetscCall(MatSetUp(C));
2673   PetscCall(PetscObjectGetNewTag((PetscObject)C,&tag));
2674 
2675   /* create data structure for reuse C */
2676   PetscCall(PetscObjectGetComm((PetscObject)C,&comm));
2677   PetscCallMPI(MPI_Comm_size(comm,&size));
2678   PetscCall(PetscNew(&abt));
2679   abt->tag = tag;
2680   abt->alg = alg;
2681   switch (alg) {
2682   case 1: /* alg: "cyclic" */
2683     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i]));
2684     bufsiz = A->cmap->N * maxRows;
2685     PetscCall(PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1])));
2686     break;
2687   default: /* alg: "allgatherv" */
2688     PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1])));
2689     PetscCall(PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls)));
2690     for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N;
2691     for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i];
2692     break;
2693   }
2694 
2695   C->product->data    = abt;
2696   C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
2697   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense;
2698   PetscFunctionReturn(0);
2699 }
2700 
2701 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
2702 {
2703   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2704   Mat_MatTransMultDense *abt;
2705   MPI_Comm              comm;
2706   PetscMPIInt           rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom;
2707   PetscScalar           *sendbuf, *recvbuf=NULL, *cv;
2708   PetscInt              i,cK=A->cmap->N,k,j,bn;
2709   PetscScalar           _DOne=1.0,_DZero=0.0;
2710   const PetscScalar     *av,*bv;
2711   PetscBLASInt          cm, cn, ck, alda, blda = 0, clda;
2712   MPI_Request           reqs[2];
2713   const PetscInt        *ranges;
2714 
2715   PetscFunctionBegin;
2716   MatCheckProduct(C,3);
2717   PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2718   abt  = (Mat_MatTransMultDense*)C->product->data;
2719   PetscCall(PetscObjectGetComm((PetscObject)C,&comm));
2720   PetscCallMPI(MPI_Comm_rank(comm,&rank));
2721   PetscCallMPI(MPI_Comm_size(comm,&size));
2722   PetscCall(MatDenseGetArrayRead(a->A,&av));
2723   PetscCall(MatDenseGetArrayRead(b->A,&bv));
2724   PetscCall(MatDenseGetArrayWrite(c->A,&cv));
2725   PetscCall(MatDenseGetLDA(a->A,&i));
2726   PetscCall(PetscBLASIntCast(i,&alda));
2727   PetscCall(MatDenseGetLDA(b->A,&i));
2728   PetscCall(PetscBLASIntCast(i,&blda));
2729   PetscCall(MatDenseGetLDA(c->A,&i));
2730   PetscCall(PetscBLASIntCast(i,&clda));
2731   PetscCall(MatGetOwnershipRanges(B,&ranges));
2732   bn   = B->rmap->n;
2733   if (blda == bn) {
2734     sendbuf = (PetscScalar*)bv;
2735   } else {
2736     sendbuf = abt->buf[0];
2737     for (k = 0, i = 0; i < cK; i++) {
2738       for (j = 0; j < bn; j++, k++) {
2739         sendbuf[k] = bv[i * blda + j];
2740       }
2741     }
2742   }
2743   if (size > 1) {
2744     sendto = (rank + size - 1) % size;
2745     recvfrom = (rank + size + 1) % size;
2746   } else {
2747     sendto = recvfrom = 0;
2748   }
2749   PetscCall(PetscBLASIntCast(cK,&ck));
2750   PetscCall(PetscBLASIntCast(c->A->rmap->n,&cm));
2751   recvisfrom = rank;
2752   for (i = 0; i < size; i++) {
2753     /* we have finished receiving in sending, bufs can be read/modified */
2754     PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2755     PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
2756 
2757     if (nextrecvisfrom != rank) {
2758       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2759       sendsiz = cK * bn;
2760       recvsiz = cK * nextbn;
2761       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
2762       PetscCallMPI(MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]));
2763       PetscCallMPI(MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]));
2764     }
2765 
2766     /* local aseq * sendbuf^T */
2767     PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn));
2768     if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,av,&alda,sendbuf,&cn,&_DZero,cv + clda * ranges[recvisfrom],&clda));
2769 
2770     if (nextrecvisfrom != rank) {
2771       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
2772       PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE));
2773     }
2774     bn = nextbn;
2775     recvisfrom = nextrecvisfrom;
2776     sendbuf = recvbuf;
2777   }
2778   PetscCall(MatDenseRestoreArrayRead(a->A,&av));
2779   PetscCall(MatDenseRestoreArrayRead(b->A,&bv));
2780   PetscCall(MatDenseRestoreArrayWrite(c->A,&cv));
2781   PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY));
2782   PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY));
2783   PetscFunctionReturn(0);
2784 }
2785 
2786 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
2787 {
2788   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2789   Mat_MatTransMultDense *abt;
2790   MPI_Comm              comm;
2791   PetscMPIInt           size;
2792   PetscScalar           *cv, *sendbuf, *recvbuf;
2793   const PetscScalar     *av,*bv;
2794   PetscInt              blda,i,cK=A->cmap->N,k,j,bn;
2795   PetscScalar           _DOne=1.0,_DZero=0.0;
2796   PetscBLASInt          cm, cn, ck, alda, clda;
2797 
2798   PetscFunctionBegin;
2799   MatCheckProduct(C,3);
2800   PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2801   abt  = (Mat_MatTransMultDense*)C->product->data;
2802   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
2803   PetscCallMPI(MPI_Comm_size(comm,&size));
2804   PetscCall(MatDenseGetArrayRead(a->A,&av));
2805   PetscCall(MatDenseGetArrayRead(b->A,&bv));
2806   PetscCall(MatDenseGetArrayWrite(c->A,&cv));
2807   PetscCall(MatDenseGetLDA(a->A,&i));
2808   PetscCall(PetscBLASIntCast(i,&alda));
2809   PetscCall(MatDenseGetLDA(b->A,&blda));
2810   PetscCall(MatDenseGetLDA(c->A,&i));
2811   PetscCall(PetscBLASIntCast(i,&clda));
2812   /* copy transpose of B into buf[0] */
2813   bn      = B->rmap->n;
2814   sendbuf = abt->buf[0];
2815   recvbuf = abt->buf[1];
2816   for (k = 0, j = 0; j < bn; j++) {
2817     for (i = 0; i < cK; i++, k++) {
2818       sendbuf[k] = bv[i * blda + j];
2819     }
2820   }
2821   PetscCall(MatDenseRestoreArrayRead(b->A,&bv));
2822   PetscCallMPI(MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm));
2823   PetscCall(PetscBLASIntCast(cK,&ck));
2824   PetscCall(PetscBLASIntCast(c->A->rmap->n,&cm));
2825   PetscCall(PetscBLASIntCast(c->A->cmap->n,&cn));
2826   if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,av,&alda,recvbuf,&ck,&_DZero,cv,&clda));
2827   PetscCall(MatDenseRestoreArrayRead(a->A,&av));
2828   PetscCall(MatDenseRestoreArrayRead(b->A,&bv));
2829   PetscCall(MatDenseRestoreArrayWrite(c->A,&cv));
2830   PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY));
2831   PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY));
2832   PetscFunctionReturn(0);
2833 }
2834 
2835 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2836 {
2837   Mat_MatTransMultDense *abt;
2838 
2839   PetscFunctionBegin;
2840   MatCheckProduct(C,3);
2841   PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2842   abt = (Mat_MatTransMultDense*)C->product->data;
2843   switch (abt->alg) {
2844   case 1:
2845     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C));
2846     break;
2847   default:
2848     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C));
2849     break;
2850   }
2851   PetscFunctionReturn(0);
2852 }
2853 
2854 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data)
2855 {
2856   Mat_MatMultDense *ab = (Mat_MatMultDense*)data;
2857 
2858   PetscFunctionBegin;
2859   PetscCall(MatDestroy(&ab->Ce));
2860   PetscCall(MatDestroy(&ab->Ae));
2861   PetscCall(MatDestroy(&ab->Be));
2862   PetscCall(PetscFree(ab));
2863   PetscFunctionReturn(0);
2864 }
2865 
2866 #if defined(PETSC_HAVE_ELEMENTAL)
2867 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2868 {
2869   Mat_MatMultDense *ab;
2870 
2871   PetscFunctionBegin;
2872   MatCheckProduct(C,3);
2873   PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Missing product data");
2874   ab   = (Mat_MatMultDense*)C->product->data;
2875   PetscCall(MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae));
2876   PetscCall(MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be));
2877   PetscCall(MatMatMultNumeric_Elemental(ab->Ae,ab->Be,ab->Ce));
2878   PetscCall(MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C));
2879   PetscFunctionReturn(0);
2880 }
2881 
2882 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)
2883 {
2884   Mat              Ae,Be,Ce;
2885   Mat_MatMultDense *ab;
2886 
2887   PetscFunctionBegin;
2888   MatCheckProduct(C,4);
2889   PetscCheck(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2890   /* check local size of A and B */
2891   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
2892     SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2893   }
2894 
2895   /* create elemental matrices Ae and Be */
2896   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Ae));
2897   PetscCall(MatSetSizes(Ae,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N));
2898   PetscCall(MatSetType(Ae,MATELEMENTAL));
2899   PetscCall(MatSetUp(Ae));
2900   PetscCall(MatSetOption(Ae,MAT_ROW_ORIENTED,PETSC_FALSE));
2901 
2902   PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &Be));
2903   PetscCall(MatSetSizes(Be,PETSC_DECIDE,PETSC_DECIDE,B->rmap->N,B->cmap->N));
2904   PetscCall(MatSetType(Be,MATELEMENTAL));
2905   PetscCall(MatSetUp(Be));
2906   PetscCall(MatSetOption(Be,MAT_ROW_ORIENTED,PETSC_FALSE));
2907 
2908   /* compute symbolic Ce = Ae*Be */
2909   PetscCall(MatCreate(PetscObjectComm((PetscObject)C),&Ce));
2910   PetscCall(MatMatMultSymbolic_Elemental(Ae,Be,fill,Ce));
2911 
2912   /* setup C */
2913   PetscCall(MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE));
2914   PetscCall(MatSetType(C,MATDENSE));
2915   PetscCall(MatSetUp(C));
2916 
2917   /* create data structure for reuse Cdense */
2918   PetscCall(PetscNew(&ab));
2919   ab->Ae = Ae;
2920   ab->Be = Be;
2921   ab->Ce = Ce;
2922 
2923   C->product->data    = ab;
2924   C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense;
2925   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
2926   PetscFunctionReturn(0);
2927 }
2928 #endif
2929 /* ----------------------------------------------- */
2930 #if defined(PETSC_HAVE_ELEMENTAL)
2931 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C)
2932 {
2933   PetscFunctionBegin;
2934   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense;
2935   C->ops->productsymbolic = MatProductSymbolic_AB;
2936   PetscFunctionReturn(0);
2937 }
2938 #endif
2939 
2940 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C)
2941 {
2942   Mat_Product *product = C->product;
2943   Mat         A = product->A,B=product->B;
2944 
2945   PetscFunctionBegin;
2946   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend)
2947     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2948   C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense;
2949   C->ops->productsymbolic = MatProductSymbolic_AtB;
2950   PetscFunctionReturn(0);
2951 }
2952 
2953 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C)
2954 {
2955   Mat_Product    *product = C->product;
2956   const char     *algTypes[2] = {"allgatherv","cyclic"};
2957   PetscInt       alg,nalg = 2;
2958   PetscBool      flg = PETSC_FALSE;
2959 
2960   PetscFunctionBegin;
2961   /* Set default algorithm */
2962   alg = 0; /* default is allgatherv */
2963   PetscCall(PetscStrcmp(product->alg,"default",&flg));
2964   if (flg) {
2965     PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]));
2966   }
2967 
2968   /* Get runtime option */
2969   if (product->api_user) {
2970     PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatTransposeMult","Mat");
2971     PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[alg],&alg,&flg));
2972     PetscOptionsEnd();
2973   } else {
2974     PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABt","Mat");
2975     PetscCall(PetscOptionsEList("-mat_product_algorithm","Algorithmic approach","MatProduct_ABt",algTypes,nalg,algTypes[alg],&alg,&flg));
2976     PetscOptionsEnd();
2977   }
2978   if (flg) {
2979     PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]));
2980   }
2981 
2982   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense;
2983   C->ops->productsymbolic          = MatProductSymbolic_ABt;
2984   PetscFunctionReturn(0);
2985 }
2986 
2987 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C)
2988 {
2989   Mat_Product    *product = C->product;
2990 
2991   PetscFunctionBegin;
2992   switch (product->type) {
2993 #if defined(PETSC_HAVE_ELEMENTAL)
2994   case MATPRODUCT_AB:
2995     PetscCall(MatProductSetFromOptions_MPIDense_AB(C));
2996     break;
2997 #endif
2998   case MATPRODUCT_AtB:
2999     PetscCall(MatProductSetFromOptions_MPIDense_AtB(C));
3000     break;
3001   case MATPRODUCT_ABt:
3002     PetscCall(MatProductSetFromOptions_MPIDense_ABt(C));
3003     break;
3004   default:
3005     break;
3006   }
3007   PetscFunctionReturn(0);
3008 }
3009