xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 8fb5bd83c3955fefcf33a54e3bb66920a9fa884b)
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4 */
5 
6 #include <../src/mat/impls/dense/mpi/mpidense.h>    /*I   "petscmat.h"  I*/
7 #include <../src/mat/impls/aij/mpi/mpiaij.h>
8 #include <petscblaslapack.h>
9 
10 /*@
11 
12       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
13               matrix that represents the operator. For sequential matrices it returns itself.
14 
15     Input Parameter:
16 .      A - the Seq or MPI dense matrix
17 
18     Output Parameter:
19 .      B - the inner matrix
20 
21     Level: intermediate
22 
23 @*/
24 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
25 {
26   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
27   PetscBool      flg;
28 
29   PetscFunctionBegin;
30   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
31   PetscValidPointer(B,2);
32   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A,MATMPIDENSE,&flg));
33   if (flg) *B = mat->A;
34   else {
35     PetscCall(PetscObjectBaseTypeCompare((PetscObject)A,MATSEQDENSE,&flg));
36     PetscCheck(flg,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Not for matrix type %s",((PetscObject)A)->type_name);
37     *B = A;
38   }
39   PetscFunctionReturn(0);
40 }
41 
42 PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s)
43 {
44   Mat_MPIDense   *Amat = (Mat_MPIDense*)A->data;
45   Mat_MPIDense   *Bmat = (Mat_MPIDense*)B->data;
46 
47   PetscFunctionBegin;
48   PetscCall(MatCopy(Amat->A,Bmat->A,s));
49   PetscFunctionReturn(0);
50 }
51 
52 PetscErrorCode MatShift_MPIDense(Mat A,PetscScalar alpha)
53 {
54   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
55   PetscInt       j,lda,rstart = A->rmap->rstart,rend = A->rmap->rend,rend2;
56   PetscScalar    *v;
57 
58   PetscFunctionBegin;
59   PetscCall(MatDenseGetArray(mat->A,&v));
60   PetscCall(MatDenseGetLDA(mat->A,&lda));
61   rend2 = PetscMin(rend,A->cmap->N);
62   if (rend2>rstart) {
63     for (j=rstart; j<rend2; j++) v[j-rstart+j*lda] += alpha;
64     PetscCall(PetscLogFlops(rend2-rstart));
65   }
66   PetscCall(MatDenseRestoreArray(mat->A,&v));
67   PetscFunctionReturn(0);
68 }
69 
70 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
71 {
72   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
73   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
74 
75   PetscFunctionBegin;
76   PetscCheck(row >= rstart && row < rend,PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
77   lrow = row - rstart;
78   PetscCall(MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v));
79   PetscFunctionReturn(0);
80 }
81 
82 PetscErrorCode MatRestoreRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
83 {
84   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
85   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
86 
87   PetscFunctionBegin;
88   PetscCheck(row >= rstart && row < rend,PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
89   lrow = row - rstart;
90   PetscCall(MatRestoreRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v));
91   PetscFunctionReturn(0);
92 }
93 
94 PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
95 {
96   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
97   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
98   PetscScalar    *array;
99   MPI_Comm       comm;
100   PetscBool      flg;
101   Mat            B;
102 
103   PetscFunctionBegin;
104   PetscCall(MatHasCongruentLayouts(A,&flg));
105   PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
106   PetscCall(PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B));
107   if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */
108 
109     PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A,MATSEQDENSECUDA,&flg));
110     PetscCheck(!flg,PETSC_COMM_SELF,PETSC_ERR_SUP,"Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature",MATSEQDENSECUDA);
111     PetscCall(PetscObjectGetComm((PetscObject)(mdn->A),&comm));
112     PetscCall(MatCreate(comm,&B));
113     PetscCall(MatSetSizes(B,m,m,m,m));
114     PetscCall(MatSetType(B,((PetscObject)mdn->A)->type_name));
115     PetscCall(MatDenseGetArrayRead(mdn->A,(const PetscScalar**)&array));
116     PetscCall(MatSeqDenseSetPreallocation(B,array+m*rstart));
117     PetscCall(MatDenseRestoreArrayRead(mdn->A,(const PetscScalar**)&array));
118     PetscCall(PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B));
119     *a   = B;
120     PetscCall(MatDestroy(&B));
121   } else *a = B;
122   PetscFunctionReturn(0);
123 }
124 
125 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
126 {
127   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
128   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
129   PetscBool      roworiented = A->roworiented;
130 
131   PetscFunctionBegin;
132   for (i=0; i<m; i++) {
133     if (idxm[i] < 0) continue;
134     PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
135     if (idxm[i] >= rstart && idxm[i] < rend) {
136       row = idxm[i] - rstart;
137       if (roworiented) {
138         PetscCall(MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv));
139       } else {
140         for (j=0; j<n; j++) {
141           if (idxn[j] < 0) continue;
142           PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
143           PetscCall(MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv));
144         }
145       }
146     } else if (!A->donotstash) {
147       mat->assembled = PETSC_FALSE;
148       if (roworiented) {
149         PetscCall(MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE));
150       } else {
151         PetscCall(MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE));
152       }
153     }
154   }
155   PetscFunctionReturn(0);
156 }
157 
158 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
159 {
160   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
161   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
162 
163   PetscFunctionBegin;
164   for (i=0; i<m; i++) {
165     if (idxm[i] < 0) continue; /* negative row */
166     PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
167     if (idxm[i] >= rstart && idxm[i] < rend) {
168       row = idxm[i] - rstart;
169       for (j=0; j<n; j++) {
170         if (idxn[j] < 0) continue; /* negative column */
171         PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
172         PetscCall(MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j));
173       }
174     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
175   }
176   PetscFunctionReturn(0);
177 }
178 
179 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda)
180 {
181   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
182 
183   PetscFunctionBegin;
184   PetscCall(MatDenseGetLDA(a->A,lda));
185   PetscFunctionReturn(0);
186 }
187 
188 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A,PetscInt lda)
189 {
190   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
191   PetscBool      iscuda;
192 
193   PetscFunctionBegin;
194   if (!a->A) {
195     PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
196     PetscCall(PetscLayoutSetUp(A->rmap));
197     PetscCall(PetscLayoutSetUp(A->cmap));
198     PetscCall(MatCreate(PETSC_COMM_SELF,&a->A));
199     PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->A));
200     PetscCall(MatSetSizes(a->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N));
201     PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda));
202     PetscCall(MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE));
203   }
204   PetscCall(MatDenseSetLDA(a->A,lda));
205   PetscFunctionReturn(0);
206 }
207 
208 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar **array)
209 {
210   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
211 
212   PetscFunctionBegin;
213   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
214   PetscCall(MatDenseGetArray(a->A,array));
215   PetscFunctionReturn(0);
216 }
217 
218 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar **array)
219 {
220   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
221 
222   PetscFunctionBegin;
223   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
224   PetscCall(MatDenseGetArrayRead(a->A,array));
225   PetscFunctionReturn(0);
226 }
227 
228 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A,PetscScalar **array)
229 {
230   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
231 
232   PetscFunctionBegin;
233   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
234   PetscCall(MatDenseGetArrayWrite(a->A,array));
235   PetscFunctionReturn(0);
236 }
237 
238 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar *array)
239 {
240   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
241 
242   PetscFunctionBegin;
243   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
244   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
245   PetscCall(MatDensePlaceArray(a->A,array));
246   PetscFunctionReturn(0);
247 }
248 
249 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
250 {
251   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
252 
253   PetscFunctionBegin;
254   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
255   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
256   PetscCall(MatDenseResetArray(a->A));
257   PetscFunctionReturn(0);
258 }
259 
260 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A,const PetscScalar *array)
261 {
262   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
263 
264   PetscFunctionBegin;
265   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
266   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
267   PetscCall(MatDenseReplaceArray(a->A,array));
268   PetscFunctionReturn(0);
269 }
270 
271 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
272 {
273   Mat_MPIDense      *mat  = (Mat_MPIDense*)A->data,*newmatd;
274   PetscInt          lda,i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
275   const PetscInt    *irow,*icol;
276   const PetscScalar *v;
277   PetscScalar       *bv;
278   Mat               newmat;
279   IS                iscol_local;
280   MPI_Comm          comm_is,comm_mat;
281 
282   PetscFunctionBegin;
283   PetscCall(PetscObjectGetComm((PetscObject)A,&comm_mat));
284   PetscCall(PetscObjectGetComm((PetscObject)iscol,&comm_is));
285   PetscCheck(comm_mat == comm_is,PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator");
286 
287   PetscCall(ISAllGather(iscol,&iscol_local));
288   PetscCall(ISGetIndices(isrow,&irow));
289   PetscCall(ISGetIndices(iscol_local,&icol));
290   PetscCall(ISGetLocalSize(isrow,&nrows));
291   PetscCall(ISGetLocalSize(iscol,&ncols));
292   PetscCall(ISGetSize(iscol,&Ncols)); /* global number of columns, size of iscol_local */
293 
294   /* No parallel redistribution currently supported! Should really check each index set
295      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
296      original matrix! */
297 
298   PetscCall(MatGetLocalSize(A,&nlrows,&nlcols));
299   PetscCall(MatGetOwnershipRange(A,&rstart,&rend));
300 
301   /* Check submatrix call */
302   if (scall == MAT_REUSE_MATRIX) {
303     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
304     /* Really need to test rows and column sizes! */
305     newmat = *B;
306   } else {
307     /* Create and fill new matrix */
308     PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&newmat));
309     PetscCall(MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols));
310     PetscCall(MatSetType(newmat,((PetscObject)A)->type_name));
311     PetscCall(MatMPIDenseSetPreallocation(newmat,NULL));
312   }
313 
314   /* Now extract the data pointers and do the copy, column at a time */
315   newmatd = (Mat_MPIDense*)newmat->data;
316   PetscCall(MatDenseGetArray(newmatd->A,&bv));
317   PetscCall(MatDenseGetArrayRead(mat->A,&v));
318   PetscCall(MatDenseGetLDA(mat->A,&lda));
319   for (i=0; i<Ncols; i++) {
320     const PetscScalar *av = v + lda*icol[i];
321     for (j=0; j<nrows; j++) {
322       *bv++ = av[irow[j] - rstart];
323     }
324   }
325   PetscCall(MatDenseRestoreArrayRead(mat->A,&v));
326   PetscCall(MatDenseRestoreArray(newmatd->A,&bv));
327 
328   /* Assemble the matrices so that the correct flags are set */
329   PetscCall(MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY));
330   PetscCall(MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY));
331 
332   /* Free work space */
333   PetscCall(ISRestoreIndices(isrow,&irow));
334   PetscCall(ISRestoreIndices(iscol_local,&icol));
335   PetscCall(ISDestroy(&iscol_local));
336   *B   = newmat;
337   PetscFunctionReturn(0);
338 }
339 
340 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar **array)
341 {
342   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
343 
344   PetscFunctionBegin;
345   PetscCall(MatDenseRestoreArray(a->A,array));
346   PetscFunctionReturn(0);
347 }
348 
349 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar **array)
350 {
351   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
352 
353   PetscFunctionBegin;
354   PetscCall(MatDenseRestoreArrayRead(a->A,array));
355   PetscFunctionReturn(0);
356 }
357 
358 PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A,PetscScalar **array)
359 {
360   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
361 
362   PetscFunctionBegin;
363   PetscCall(MatDenseRestoreArrayWrite(a->A,array));
364   PetscFunctionReturn(0);
365 }
366 
367 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
368 {
369   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
370   PetscInt       nstash,reallocs;
371 
372   PetscFunctionBegin;
373   if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
374 
375   PetscCall(MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range));
376   PetscCall(MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs));
377   PetscCall(PetscInfo(mdn->A,"Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs));
378   PetscFunctionReturn(0);
379 }
380 
381 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
382 {
383   Mat_MPIDense   *mdn=(Mat_MPIDense*)mat->data;
384   PetscInt       i,*row,*col,flg,j,rstart,ncols;
385   PetscMPIInt    n;
386   PetscScalar    *val;
387 
388   PetscFunctionBegin;
389   if (!mdn->donotstash && !mat->nooffprocentries) {
390     /*  wait on receives */
391     while (1) {
392       PetscCall(MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg));
393       if (!flg) break;
394 
395       for (i=0; i<n;) {
396         /* Now identify the consecutive vals belonging to the same row */
397         for (j=i,rstart=row[j]; j<n; j++) {
398           if (row[j] != rstart) break;
399         }
400         if (j < n) ncols = j-i;
401         else       ncols = n-i;
402         /* Now assemble all these values with a single function call */
403         PetscCall(MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode));
404         i    = j;
405       }
406     }
407     PetscCall(MatStashScatterEnd_Private(&mat->stash));
408   }
409 
410   PetscCall(MatAssemblyBegin(mdn->A,mode));
411   PetscCall(MatAssemblyEnd(mdn->A,mode));
412   PetscFunctionReturn(0);
413 }
414 
415 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
416 {
417   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
418 
419   PetscFunctionBegin;
420   PetscCall(MatZeroEntries(l->A));
421   PetscFunctionReturn(0);
422 }
423 
424 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt n,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
425 {
426   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
427   PetscInt          i,len,*lrows;
428 
429   PetscFunctionBegin;
430   /* get locally owned rows */
431   PetscCall(PetscLayoutMapLocal(A->rmap,n,rows,&len,&lrows,NULL));
432   /* fix right hand side if needed */
433   if (x && b) {
434     const PetscScalar *xx;
435     PetscScalar       *bb;
436 
437     PetscCall(VecGetArrayRead(x, &xx));
438     PetscCall(VecGetArrayWrite(b, &bb));
439     for (i=0;i<len;++i) bb[lrows[i]] = diag*xx[lrows[i]];
440     PetscCall(VecRestoreArrayRead(x, &xx));
441     PetscCall(VecRestoreArrayWrite(b, &bb));
442   }
443   PetscCall(MatZeroRows(l->A,len,lrows,0.0,NULL,NULL));
444   if (diag != 0.0) {
445     Vec d;
446 
447     PetscCall(MatCreateVecs(A,NULL,&d));
448     PetscCall(VecSet(d,diag));
449     PetscCall(MatDiagonalSet(A,d,INSERT_VALUES));
450     PetscCall(VecDestroy(&d));
451   }
452   PetscCall(PetscFree(lrows));
453   PetscFunctionReturn(0);
454 }
455 
456 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec);
457 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec);
458 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec);
459 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec);
460 
461 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
462 {
463   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
464   const PetscScalar *ax;
465   PetscScalar       *ay;
466   PetscMemType      axmtype,aymtype;
467 
468   PetscFunctionBegin;
469   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
470   PetscCall(VecGetArrayReadAndMemType(xx,&ax,&axmtype));
471   PetscCall(VecGetArrayAndMemType(mdn->lvec,&ay,&aymtype));
472   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPI_REPLACE));
473   PetscCall(PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay,MPI_REPLACE));
474   PetscCall(VecRestoreArrayAndMemType(mdn->lvec,&ay));
475   PetscCall(VecRestoreArrayReadAndMemType(xx,&ax));
476   PetscCall((*mdn->A->ops->mult)(mdn->A,mdn->lvec,yy));
477   PetscFunctionReturn(0);
478 }
479 
480 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
481 {
482   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
483   const PetscScalar *ax;
484   PetscScalar       *ay;
485   PetscMemType      axmtype,aymtype;
486 
487   PetscFunctionBegin;
488   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
489   PetscCall(VecGetArrayReadAndMemType(xx,&ax,&axmtype));
490   PetscCall(VecGetArrayAndMemType(mdn->lvec,&ay,&aymtype));
491   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPI_REPLACE));
492   PetscCall(PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay,MPI_REPLACE));
493   PetscCall(VecRestoreArrayAndMemType(mdn->lvec,&ay));
494   PetscCall(VecRestoreArrayReadAndMemType(xx,&ax));
495   PetscCall((*mdn->A->ops->multadd)(mdn->A,mdn->lvec,yy,zz));
496   PetscFunctionReturn(0);
497 }
498 
499 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
500 {
501   Mat_MPIDense      *a = (Mat_MPIDense*)A->data;
502   const PetscScalar *ax;
503   PetscScalar       *ay;
504   PetscMemType      axmtype,aymtype;
505 
506   PetscFunctionBegin;
507   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
508   PetscCall(VecSet(yy,0.0));
509   PetscCall((*a->A->ops->multtranspose)(a->A,xx,a->lvec));
510   PetscCall(VecGetArrayReadAndMemType(a->lvec,&ax,&axmtype));
511   PetscCall(VecGetArrayAndMemType(yy,&ay,&aymtype));
512   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPIU_SUM));
513   PetscCall(PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM));
514   PetscCall(VecRestoreArrayReadAndMemType(a->lvec,&ax));
515   PetscCall(VecRestoreArrayAndMemType(yy,&ay));
516   PetscFunctionReturn(0);
517 }
518 
519 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
520 {
521   Mat_MPIDense      *a = (Mat_MPIDense*)A->data;
522   const PetscScalar *ax;
523   PetscScalar       *ay;
524   PetscMemType      axmtype,aymtype;
525 
526   PetscFunctionBegin;
527   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
528   PetscCall(VecCopy(yy,zz));
529   PetscCall((*a->A->ops->multtranspose)(a->A,xx,a->lvec));
530   PetscCall(VecGetArrayReadAndMemType(a->lvec,&ax,&axmtype));
531   PetscCall(VecGetArrayAndMemType(zz,&ay,&aymtype));
532   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPIU_SUM));
533   PetscCall(PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM));
534   PetscCall(VecRestoreArrayReadAndMemType(a->lvec,&ax));
535   PetscCall(VecRestoreArrayAndMemType(zz,&ay));
536   PetscFunctionReturn(0);
537 }
538 
539 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
540 {
541   Mat_MPIDense      *a    = (Mat_MPIDense*)A->data;
542   PetscInt          lda,len,i,n,m = A->rmap->n,radd;
543   PetscScalar       *x,zero = 0.0;
544   const PetscScalar *av;
545 
546   PetscFunctionBegin;
547   PetscCall(VecSet(v,zero));
548   PetscCall(VecGetArray(v,&x));
549   PetscCall(VecGetSize(v,&n));
550   PetscCheck(n == A->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
551   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
552   radd = A->rmap->rstart*m;
553   PetscCall(MatDenseGetArrayRead(a->A,&av));
554   PetscCall(MatDenseGetLDA(a->A,&lda));
555   for (i=0; i<len; i++) {
556     x[i] = av[radd + i*lda + i];
557   }
558   PetscCall(MatDenseRestoreArrayRead(a->A,&av));
559   PetscCall(VecRestoreArray(v,&x));
560   PetscFunctionReturn(0);
561 }
562 
563 PetscErrorCode MatDestroy_MPIDense(Mat mat)
564 {
565   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
566 
567   PetscFunctionBegin;
568 #if defined(PETSC_USE_LOG)
569   PetscLogObjectState((PetscObject)mat,"Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT,mat->rmap->N,mat->cmap->N);
570 #endif
571   PetscCall(MatStashDestroy_Private(&mat->stash));
572   PetscCheck(!mdn->vecinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
573   PetscCheck(!mdn->matinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
574   PetscCall(MatDestroy(&mdn->A));
575   PetscCall(VecDestroy(&mdn->lvec));
576   PetscCall(PetscSFDestroy(&mdn->Mvctx));
577   PetscCall(VecDestroy(&mdn->cvec));
578   PetscCall(MatDestroy(&mdn->cmat));
579 
580   PetscCall(PetscFree(mat->data));
581   PetscCall(PetscObjectChangeTypeName((PetscObject)mat,NULL));
582 
583   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL));
584   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",NULL));
585   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL));
586   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL));
587   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL));
588   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL));
589   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",NULL));
590   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",NULL));
591   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL));
592   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL));
593   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",NULL));
594   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",NULL));
595   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpiaij_C",NULL));
596 #if defined(PETSC_HAVE_ELEMENTAL)
597   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL));
598 #endif
599 #if defined(PETSC_HAVE_SCALAPACK)
600   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",NULL));
601 #endif
602   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL));
603   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",NULL));
604   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",NULL));
605 #if defined (PETSC_HAVE_CUDA)
606   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",NULL));
607   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",NULL));
608   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpidensecuda_C",NULL));
609   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidensecuda_mpidense_C",NULL));
610   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",NULL));
611   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",NULL));
612   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",NULL));
613   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",NULL));
614   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseCUDAGetArray_C",NULL));
615   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseCUDAGetArrayRead_C",NULL));
616   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseCUDAGetArrayWrite_C",NULL));
617   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseCUDARestoreArray_C",NULL));
618   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseCUDARestoreArrayRead_C",NULL));
619   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseCUDARestoreArrayWrite_C",NULL));
620   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseCUDAPlaceArray_C",NULL));
621   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseCUDAResetArray_C",NULL));
622   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseCUDAReplaceArray_C",NULL));
623 #endif
624   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL));
625   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL));
626   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",NULL));
627   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",NULL));
628   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",NULL));
629   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",NULL));
630   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",NULL));
631   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",NULL));
632   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",NULL));
633   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",NULL));
634 
635   PetscCall(PetscObjectCompose((PetscObject)mat,"DiagonalBlock",NULL));
636   PetscFunctionReturn(0);
637 }
638 
639 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat,PetscViewer);
640 
641 #include <petscdraw.h>
642 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
643 {
644   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
645   PetscMPIInt       rank;
646   PetscViewerType   vtype;
647   PetscBool         iascii,isdraw;
648   PetscViewer       sviewer;
649   PetscViewerFormat format;
650 
651   PetscFunctionBegin;
652   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank));
653   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
654   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw));
655   if (iascii) {
656     PetscCall(PetscViewerGetType(viewer,&vtype));
657     PetscCall(PetscViewerGetFormat(viewer,&format));
658     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
659       MatInfo info;
660       PetscCall(MatGetInfo(mat,MAT_LOCAL,&info));
661       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
662       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory));
663       PetscCall(PetscViewerFlush(viewer));
664       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
665       if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx,viewer));
666       PetscFunctionReturn(0);
667     } else if (format == PETSC_VIEWER_ASCII_INFO) {
668       PetscFunctionReturn(0);
669     }
670   } else if (isdraw) {
671     PetscDraw draw;
672     PetscBool isnull;
673 
674     PetscCall(PetscViewerDrawGetDraw(viewer,0,&draw));
675     PetscCall(PetscDrawIsNull(draw,&isnull));
676     if (isnull) PetscFunctionReturn(0);
677   }
678 
679   {
680     /* assemble the entire matrix onto first processor. */
681     Mat         A;
682     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
683     PetscInt    *cols;
684     PetscScalar *vals;
685 
686     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat),&A));
687     if (rank == 0) {
688       PetscCall(MatSetSizes(A,M,N,M,N));
689     } else {
690       PetscCall(MatSetSizes(A,0,0,M,N));
691     }
692     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
693     PetscCall(MatSetType(A,MATMPIDENSE));
694     PetscCall(MatMPIDenseSetPreallocation(A,NULL));
695     PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)A));
696 
697     /* Copy the matrix ... This isn't the most efficient means,
698        but it's quick for now */
699     A->insertmode = INSERT_VALUES;
700 
701     row = mat->rmap->rstart;
702     m   = mdn->A->rmap->n;
703     for (i=0; i<m; i++) {
704       PetscCall(MatGetRow_MPIDense(mat,row,&nz,&cols,&vals));
705       PetscCall(MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES));
706       PetscCall(MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals));
707       row++;
708     }
709 
710     PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY));
711     PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY));
712     PetscCall(PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer));
713     if (rank == 0) {
714       PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name));
715       PetscCall(MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer));
716     }
717     PetscCall(PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer));
718     PetscCall(PetscViewerFlush(viewer));
719     PetscCall(MatDestroy(&A));
720   }
721   PetscFunctionReturn(0);
722 }
723 
724 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
725 {
726   PetscBool      iascii,isbinary,isdraw,issocket;
727 
728   PetscFunctionBegin;
729   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
730   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
731   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket));
732   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw));
733 
734   if (iascii || issocket || isdraw) {
735     PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat,viewer));
736   } else if (isbinary) PetscCall(MatView_Dense_Binary(mat,viewer));
737   PetscFunctionReturn(0);
738 }
739 
740 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
741 {
742   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
743   Mat            mdn  = mat->A;
744   PetscLogDouble isend[5],irecv[5];
745 
746   PetscFunctionBegin;
747   info->block_size = 1.0;
748 
749   PetscCall(MatGetInfo(mdn,MAT_LOCAL,info));
750 
751   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
752   isend[3] = info->memory;  isend[4] = info->mallocs;
753   if (flag == MAT_LOCAL) {
754     info->nz_used      = isend[0];
755     info->nz_allocated = isend[1];
756     info->nz_unneeded  = isend[2];
757     info->memory       = isend[3];
758     info->mallocs      = isend[4];
759   } else if (flag == MAT_GLOBAL_MAX) {
760     PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)A)));
761 
762     info->nz_used      = irecv[0];
763     info->nz_allocated = irecv[1];
764     info->nz_unneeded  = irecv[2];
765     info->memory       = irecv[3];
766     info->mallocs      = irecv[4];
767   } else if (flag == MAT_GLOBAL_SUM) {
768     PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)A)));
769 
770     info->nz_used      = irecv[0];
771     info->nz_allocated = irecv[1];
772     info->nz_unneeded  = irecv[2];
773     info->memory       = irecv[3];
774     info->mallocs      = irecv[4];
775   }
776   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
777   info->fill_ratio_needed = 0;
778   info->factor_mallocs    = 0;
779   PetscFunctionReturn(0);
780 }
781 
782 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
783 {
784   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
785 
786   PetscFunctionBegin;
787   switch (op) {
788   case MAT_NEW_NONZERO_LOCATIONS:
789   case MAT_NEW_NONZERO_LOCATION_ERR:
790   case MAT_NEW_NONZERO_ALLOCATION_ERR:
791     MatCheckPreallocated(A,1);
792     PetscCall(MatSetOption(a->A,op,flg));
793     break;
794   case MAT_ROW_ORIENTED:
795     MatCheckPreallocated(A,1);
796     a->roworiented = flg;
797     PetscCall(MatSetOption(a->A,op,flg));
798     break;
799   case MAT_FORCE_DIAGONAL_ENTRIES:
800   case MAT_KEEP_NONZERO_PATTERN:
801   case MAT_USE_HASH_TABLE:
802   case MAT_SORTED_FULL:
803     PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op]));
804     break;
805   case MAT_IGNORE_OFF_PROC_ENTRIES:
806     a->donotstash = flg;
807     break;
808   case MAT_SYMMETRIC:
809   case MAT_STRUCTURALLY_SYMMETRIC:
810   case MAT_HERMITIAN:
811   case MAT_SYMMETRY_ETERNAL:
812   case MAT_IGNORE_LOWER_TRIANGULAR:
813   case MAT_IGNORE_ZERO_ENTRIES:
814     PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op]));
815     break;
816   default:
817     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
818   }
819   PetscFunctionReturn(0);
820 }
821 
822 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
823 {
824   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
825   const PetscScalar *l;
826   PetscScalar       x,*v,*vv,*r;
827   PetscInt          i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n,lda;
828 
829   PetscFunctionBegin;
830   PetscCall(MatDenseGetArray(mdn->A,&vv));
831   PetscCall(MatDenseGetLDA(mdn->A,&lda));
832   PetscCall(MatGetLocalSize(A,&s2,&s3));
833   if (ll) {
834     PetscCall(VecGetLocalSize(ll,&s2a));
835     PetscCheck(s2a == s2,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2);
836     PetscCall(VecGetArrayRead(ll,&l));
837     for (i=0; i<m; i++) {
838       x = l[i];
839       v = vv + i;
840       for (j=0; j<n; j++) { (*v) *= x; v+= lda;}
841     }
842     PetscCall(VecRestoreArrayRead(ll,&l));
843     PetscCall(PetscLogFlops(1.0*n*m));
844   }
845   if (rr) {
846     const PetscScalar *ar;
847 
848     PetscCall(VecGetLocalSize(rr,&s3a));
849     PetscCheck(s3a == s3,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3);
850     PetscCall(VecGetArrayRead(rr,&ar));
851     if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
852     PetscCall(VecGetArray(mdn->lvec,&r));
853     PetscCall(PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ar,r,MPI_REPLACE));
854     PetscCall(PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ar,r,MPI_REPLACE));
855     PetscCall(VecRestoreArrayRead(rr,&ar));
856     for (i=0; i<n; i++) {
857       x = r[i];
858       v = vv + i*lda;
859       for (j=0; j<m; j++) (*v++) *= x;
860     }
861     PetscCall(VecRestoreArray(mdn->lvec,&r));
862     PetscCall(PetscLogFlops(1.0*n*m));
863   }
864   PetscCall(MatDenseRestoreArray(mdn->A,&vv));
865   PetscFunctionReturn(0);
866 }
867 
868 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
869 {
870   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
871   PetscInt          i,j;
872   PetscMPIInt       size;
873   PetscReal         sum = 0.0;
874   const PetscScalar *av,*v;
875 
876   PetscFunctionBegin;
877   PetscCall(MatDenseGetArrayRead(mdn->A,&av));
878   v    = av;
879   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
880   if (size == 1) {
881     PetscCall(MatNorm(mdn->A,type,nrm));
882   } else {
883     if (type == NORM_FROBENIUS) {
884       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
885         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
886       }
887       PetscCall(MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A)));
888       *nrm = PetscSqrtReal(*nrm);
889       PetscCall(PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n));
890     } else if (type == NORM_1) {
891       PetscReal *tmp,*tmp2;
892       PetscCall(PetscCalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2));
893       *nrm = 0.0;
894       v    = av;
895       for (j=0; j<mdn->A->cmap->n; j++) {
896         for (i=0; i<mdn->A->rmap->n; i++) {
897           tmp[j] += PetscAbsScalar(*v);  v++;
898         }
899       }
900       PetscCall(MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A)));
901       for (j=0; j<A->cmap->N; j++) {
902         if (tmp2[j] > *nrm) *nrm = tmp2[j];
903       }
904       PetscCall(PetscFree2(tmp,tmp2));
905       PetscCall(PetscLogFlops(A->cmap->n*A->rmap->n));
906     } else if (type == NORM_INFINITY) { /* max row norm */
907       PetscReal ntemp;
908       PetscCall(MatNorm(mdn->A,type,&ntemp));
909       PetscCall(MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A)));
910     } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
911   }
912   PetscCall(MatDenseRestoreArrayRead(mdn->A,&av));
913   PetscFunctionReturn(0);
914 }
915 
916 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
917 {
918   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
919   Mat            B;
920   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
921   PetscInt       j,i,lda;
922   PetscScalar    *v;
923 
924   PetscFunctionBegin;
925   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
926     PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B));
927     PetscCall(MatSetSizes(B,A->cmap->n,A->rmap->n,N,M));
928     PetscCall(MatSetType(B,((PetscObject)A)->type_name));
929     PetscCall(MatMPIDenseSetPreallocation(B,NULL));
930   } else B = *matout;
931 
932   m    = a->A->rmap->n; n = a->A->cmap->n;
933   PetscCall(MatDenseGetArrayRead(a->A,(const PetscScalar**)&v));
934   PetscCall(MatDenseGetLDA(a->A,&lda));
935   PetscCall(PetscMalloc1(m,&rwork));
936   for (i=0; i<m; i++) rwork[i] = rstart + i;
937   for (j=0; j<n; j++) {
938     PetscCall(MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES));
939     v   += lda;
940   }
941   PetscCall(MatDenseRestoreArrayRead(a->A,(const PetscScalar**)&v));
942   PetscCall(PetscFree(rwork));
943   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
944   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
945   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
946     *matout = B;
947   } else {
948     PetscCall(MatHeaderMerge(A,&B));
949   }
950   PetscFunctionReturn(0);
951 }
952 
953 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
954 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
955 
956 PetscErrorCode MatSetUp_MPIDense(Mat A)
957 {
958   PetscFunctionBegin;
959   PetscCall(PetscLayoutSetUp(A->rmap));
960   PetscCall(PetscLayoutSetUp(A->cmap));
961   if (!A->preallocated) {
962     PetscCall(MatMPIDenseSetPreallocation(A,NULL));
963   }
964   PetscFunctionReturn(0);
965 }
966 
967 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
968 {
969   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
970 
971   PetscFunctionBegin;
972   PetscCall(MatAXPY(A->A,alpha,B->A,str));
973   PetscFunctionReturn(0);
974 }
975 
976 PetscErrorCode MatConjugate_MPIDense(Mat mat)
977 {
978   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
979 
980   PetscFunctionBegin;
981   PetscCall(MatConjugate(a->A));
982   PetscFunctionReturn(0);
983 }
984 
985 PetscErrorCode MatRealPart_MPIDense(Mat A)
986 {
987   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
988 
989   PetscFunctionBegin;
990   PetscCall(MatRealPart(a->A));
991   PetscFunctionReturn(0);
992 }
993 
994 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
995 {
996   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
997 
998   PetscFunctionBegin;
999   PetscCall(MatImaginaryPart(a->A));
1000   PetscFunctionReturn(0);
1001 }
1002 
1003 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col)
1004 {
1005   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
1006 
1007   PetscFunctionBegin;
1008   PetscCheck(a->A,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing local matrix");
1009   PetscCheck(a->A->ops->getcolumnvector,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing get column operation");
1010   PetscCall((*a->A->ops->getcolumnvector)(a->A,v,col));
1011   PetscFunctionReturn(0);
1012 }
1013 
1014 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat,PetscInt,PetscReal*);
1015 
1016 PetscErrorCode MatGetColumnReductions_MPIDense(Mat A,PetscInt type,PetscReal *reductions)
1017 {
1018   PetscInt       i,m,n;
1019   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
1020   PetscReal      *work;
1021 
1022   PetscFunctionBegin;
1023   PetscCall(MatGetSize(A,&m,&n));
1024   PetscCall(PetscMalloc1(n,&work));
1025   if (type == REDUCTION_MEAN_REALPART) {
1026     PetscCall(MatGetColumnReductions_SeqDense(a->A,(PetscInt)REDUCTION_SUM_REALPART,work));
1027   } else if (type == REDUCTION_MEAN_IMAGINARYPART) {
1028     PetscCall(MatGetColumnReductions_SeqDense(a->A,(PetscInt)REDUCTION_SUM_IMAGINARYPART,work));
1029   } else {
1030     PetscCall(MatGetColumnReductions_SeqDense(a->A,type,work));
1031   }
1032   if (type == NORM_2) {
1033     for (i=0; i<n; i++) work[i] *= work[i];
1034   }
1035   if (type == NORM_INFINITY) {
1036     PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_MAX,A->hdr.comm));
1037   } else {
1038     PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_SUM,A->hdr.comm));
1039   }
1040   PetscCall(PetscFree(work));
1041   if (type == NORM_2) {
1042     for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
1043   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
1044     for (i=0; i<n; i++) reductions[i] /= m;
1045   }
1046   PetscFunctionReturn(0);
1047 }
1048 
1049 #if defined(PETSC_HAVE_CUDA)
1050 PetscErrorCode MatShift_MPIDenseCUDA(Mat A,PetscScalar alpha)
1051 {
1052   PetscScalar *da;
1053   PetscInt    lda;
1054 
1055   PetscFunctionBegin;
1056   PetscCall(MatDenseCUDAGetArray(A,&da));
1057   PetscCall(MatDenseGetLDA(A,&lda));
1058   PetscCall(PetscInfo(A,"Performing Shift on backend\n"));
1059   PetscCall(MatShift_DenseCUDA_Private(da,alpha,lda,A->rmap->rstart,A->rmap->rend,A->cmap->N));
1060   PetscCall(MatDenseCUDARestoreArray(A,&da));
1061   PetscFunctionReturn(0);
1062 }
1063 
1064 static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1065 {
1066   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1067   PetscInt       lda;
1068 
1069   PetscFunctionBegin;
1070   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1071   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1072   if (!a->cvec) {
1073     PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec));
1074     PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec));
1075   }
1076   a->vecinuse = col + 1;
1077   PetscCall(MatDenseGetLDA(a->A,&lda));
1078   PetscCall(MatDenseCUDAGetArray(a->A,(PetscScalar**)&a->ptrinuse));
1079   PetscCall(VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda));
1080   *v   = a->cvec;
1081   PetscFunctionReturn(0);
1082 }
1083 
1084 static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1085 {
1086   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1087 
1088   PetscFunctionBegin;
1089   PetscCheck(a->vecinuse,PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1090   PetscCheck(a->cvec,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1091   a->vecinuse = 0;
1092   PetscCall(MatDenseCUDARestoreArray(a->A,(PetscScalar**)&a->ptrinuse));
1093   PetscCall(VecCUDAResetArray(a->cvec));
1094   *v   = NULL;
1095   PetscFunctionReturn(0);
1096 }
1097 
1098 static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1099 {
1100   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1101   PetscInt       lda;
1102 
1103   PetscFunctionBegin;
1104   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1105   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1106   if (!a->cvec) {
1107     PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec));
1108     PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec));
1109   }
1110   a->vecinuse = col + 1;
1111   PetscCall(MatDenseGetLDA(a->A,&lda));
1112   PetscCall(MatDenseCUDAGetArrayRead(a->A,&a->ptrinuse));
1113   PetscCall(VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda));
1114   PetscCall(VecLockReadPush(a->cvec));
1115   *v   = a->cvec;
1116   PetscFunctionReturn(0);
1117 }
1118 
1119 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1120 {
1121   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1122 
1123   PetscFunctionBegin;
1124   PetscCheck(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1125   PetscCheck(a->cvec,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1126   a->vecinuse = 0;
1127   PetscCall(MatDenseCUDARestoreArrayRead(a->A,&a->ptrinuse));
1128   PetscCall(VecLockReadPop(a->cvec));
1129   PetscCall(VecCUDAResetArray(a->cvec));
1130   *v   = NULL;
1131   PetscFunctionReturn(0);
1132 }
1133 
1134 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1135 {
1136   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1137   PetscInt       lda;
1138 
1139   PetscFunctionBegin;
1140   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1141   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1142   if (!a->cvec) {
1143     PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec));
1144     PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec));
1145   }
1146   a->vecinuse = col + 1;
1147   PetscCall(MatDenseGetLDA(a->A,&lda));
1148   PetscCall(MatDenseCUDAGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse));
1149   PetscCall(VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda));
1150   *v   = a->cvec;
1151   PetscFunctionReturn(0);
1152 }
1153 
1154 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1155 {
1156   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1157 
1158   PetscFunctionBegin;
1159   PetscCheck(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1160   PetscCheck(a->cvec,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1161   a->vecinuse = 0;
1162   PetscCall(MatDenseCUDARestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse));
1163   PetscCall(VecCUDAResetArray(a->cvec));
1164   *v   = NULL;
1165   PetscFunctionReturn(0);
1166 }
1167 
1168 static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1169 {
1170   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1171 
1172   PetscFunctionBegin;
1173   PetscCheck(!l->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1174   PetscCheck(!l->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1175   PetscCall(MatDenseCUDAPlaceArray(l->A,a));
1176   PetscFunctionReturn(0);
1177 }
1178 
1179 static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A)
1180 {
1181   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1182 
1183   PetscFunctionBegin;
1184   PetscCheck(!l->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1185   PetscCheck(!l->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1186   PetscCall(MatDenseCUDAResetArray(l->A));
1187   PetscFunctionReturn(0);
1188 }
1189 
1190 static PetscErrorCode MatDenseCUDAReplaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1191 {
1192   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1193 
1194   PetscFunctionBegin;
1195   PetscCheck(!l->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1196   PetscCheck(!l->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1197   PetscCall(MatDenseCUDAReplaceArray(l->A,a));
1198   PetscFunctionReturn(0);
1199 }
1200 
1201 static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1202 {
1203   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1204 
1205   PetscFunctionBegin;
1206   PetscCall(MatDenseCUDAGetArrayWrite(l->A,a));
1207   PetscFunctionReturn(0);
1208 }
1209 
1210 static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1211 {
1212   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1213 
1214   PetscFunctionBegin;
1215   PetscCall(MatDenseCUDARestoreArrayWrite(l->A,a));
1216   PetscFunctionReturn(0);
1217 }
1218 
1219 static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1220 {
1221   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1222 
1223   PetscFunctionBegin;
1224   PetscCall(MatDenseCUDAGetArrayRead(l->A,a));
1225   PetscFunctionReturn(0);
1226 }
1227 
1228 static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1229 {
1230   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1231 
1232   PetscFunctionBegin;
1233   PetscCall(MatDenseCUDARestoreArrayRead(l->A,a));
1234   PetscFunctionReturn(0);
1235 }
1236 
1237 static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1238 {
1239   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1240 
1241   PetscFunctionBegin;
1242   PetscCall(MatDenseCUDAGetArray(l->A,a));
1243   PetscFunctionReturn(0);
1244 }
1245 
1246 static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1247 {
1248   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1249 
1250   PetscFunctionBegin;
1251   PetscCall(MatDenseCUDARestoreArray(l->A,a));
1252   PetscFunctionReturn(0);
1253 }
1254 
1255 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat,PetscInt,Vec*);
1256 static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat,PetscInt,Vec*);
1257 static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat,PetscInt,Vec*);
1258 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat,PetscInt,Vec*);
1259 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat,PetscInt,Vec*);
1260 static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat,PetscInt,Vec*);
1261 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat,Mat*);
1262 
1263 static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat,PetscBool bind)
1264 {
1265   Mat_MPIDense   *d = (Mat_MPIDense*)mat->data;
1266 
1267   PetscFunctionBegin;
1268   PetscCheck(!d->vecinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1269   PetscCheck(!d->matinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1270   if (d->A) PetscCall(MatBindToCPU(d->A,bind));
1271   mat->boundtocpu = bind;
1272   if (!bind) {
1273     PetscBool iscuda;
1274 
1275     PetscCall(PetscObjectTypeCompare((PetscObject)d->cvec,VECMPICUDA,&iscuda));
1276     if (!iscuda) {
1277       PetscCall(VecDestroy(&d->cvec));
1278     }
1279     PetscCall(PetscObjectTypeCompare((PetscObject)d->cmat,MATMPIDENSECUDA,&iscuda));
1280     if (!iscuda) {
1281       PetscCall(MatDestroy(&d->cmat));
1282     }
1283     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDenseCUDA));
1284     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDenseCUDA));
1285     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDenseCUDA));
1286     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDenseCUDA));
1287     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDenseCUDA));
1288     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDenseCUDA));
1289     mat->ops->shift                   = MatShift_MPIDenseCUDA;
1290   } else {
1291     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense));
1292     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense));
1293     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense));
1294     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense));
1295     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense));
1296     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense));
1297     mat->ops->shift                   = MatShift_MPIDense;
1298   }
1299   if (d->cmat) PetscCall(MatBindToCPU(d->cmat,bind));
1300   PetscFunctionReturn(0);
1301 }
1302 
1303 PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data)
1304 {
1305   Mat_MPIDense   *d = (Mat_MPIDense*)A->data;
1306   PetscBool      iscuda;
1307 
1308   PetscFunctionBegin;
1309   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1310   PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda));
1311   if (!iscuda) PetscFunctionReturn(0);
1312   PetscCall(PetscLayoutSetUp(A->rmap));
1313   PetscCall(PetscLayoutSetUp(A->cmap));
1314   if (!d->A) {
1315     PetscCall(MatCreate(PETSC_COMM_SELF,&d->A));
1316     PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)d->A));
1317     PetscCall(MatSetSizes(d->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N));
1318   }
1319   PetscCall(MatSetType(d->A,MATSEQDENSECUDA));
1320   PetscCall(MatSeqDenseCUDASetPreallocation(d->A,d_data));
1321   A->preallocated = PETSC_TRUE;
1322   A->assembled = PETSC_TRUE;
1323   PetscFunctionReturn(0);
1324 }
1325 #endif
1326 
1327 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1328 {
1329   Mat_MPIDense   *d = (Mat_MPIDense*)x->data;
1330 
1331   PetscFunctionBegin;
1332   PetscCall(MatSetRandom(d->A,rctx));
1333   PetscFunctionReturn(0);
1334 }
1335 
1336 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool  *missing,PetscInt *d)
1337 {
1338   PetscFunctionBegin;
1339   *missing = PETSC_FALSE;
1340   PetscFunctionReturn(0);
1341 }
1342 
1343 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat);
1344 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);
1345 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat);
1346 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);
1347 static PetscErrorCode MatEqual_MPIDense(Mat,Mat,PetscBool*);
1348 static PetscErrorCode MatLoad_MPIDense(Mat,PetscViewer);
1349 
1350 /* -------------------------------------------------------------------*/
1351 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1352                                         MatGetRow_MPIDense,
1353                                         MatRestoreRow_MPIDense,
1354                                         MatMult_MPIDense,
1355                                 /*  4*/ MatMultAdd_MPIDense,
1356                                         MatMultTranspose_MPIDense,
1357                                         MatMultTransposeAdd_MPIDense,
1358                                         NULL,
1359                                         NULL,
1360                                         NULL,
1361                                 /* 10*/ NULL,
1362                                         NULL,
1363                                         NULL,
1364                                         NULL,
1365                                         MatTranspose_MPIDense,
1366                                 /* 15*/ MatGetInfo_MPIDense,
1367                                         MatEqual_MPIDense,
1368                                         MatGetDiagonal_MPIDense,
1369                                         MatDiagonalScale_MPIDense,
1370                                         MatNorm_MPIDense,
1371                                 /* 20*/ MatAssemblyBegin_MPIDense,
1372                                         MatAssemblyEnd_MPIDense,
1373                                         MatSetOption_MPIDense,
1374                                         MatZeroEntries_MPIDense,
1375                                 /* 24*/ MatZeroRows_MPIDense,
1376                                         NULL,
1377                                         NULL,
1378                                         NULL,
1379                                         NULL,
1380                                 /* 29*/ MatSetUp_MPIDense,
1381                                         NULL,
1382                                         NULL,
1383                                         MatGetDiagonalBlock_MPIDense,
1384                                         NULL,
1385                                 /* 34*/ MatDuplicate_MPIDense,
1386                                         NULL,
1387                                         NULL,
1388                                         NULL,
1389                                         NULL,
1390                                 /* 39*/ MatAXPY_MPIDense,
1391                                         MatCreateSubMatrices_MPIDense,
1392                                         NULL,
1393                                         MatGetValues_MPIDense,
1394                                         MatCopy_MPIDense,
1395                                 /* 44*/ NULL,
1396                                         MatScale_MPIDense,
1397                                         MatShift_MPIDense,
1398                                         NULL,
1399                                         NULL,
1400                                 /* 49*/ MatSetRandom_MPIDense,
1401                                         NULL,
1402                                         NULL,
1403                                         NULL,
1404                                         NULL,
1405                                 /* 54*/ NULL,
1406                                         NULL,
1407                                         NULL,
1408                                         NULL,
1409                                         NULL,
1410                                 /* 59*/ MatCreateSubMatrix_MPIDense,
1411                                         MatDestroy_MPIDense,
1412                                         MatView_MPIDense,
1413                                         NULL,
1414                                         NULL,
1415                                 /* 64*/ NULL,
1416                                         NULL,
1417                                         NULL,
1418                                         NULL,
1419                                         NULL,
1420                                 /* 69*/ NULL,
1421                                         NULL,
1422                                         NULL,
1423                                         NULL,
1424                                         NULL,
1425                                 /* 74*/ NULL,
1426                                         NULL,
1427                                         NULL,
1428                                         NULL,
1429                                         NULL,
1430                                 /* 79*/ NULL,
1431                                         NULL,
1432                                         NULL,
1433                                         NULL,
1434                                 /* 83*/ MatLoad_MPIDense,
1435                                         NULL,
1436                                         NULL,
1437                                         NULL,
1438                                         NULL,
1439                                         NULL,
1440                                 /* 89*/ NULL,
1441                                         NULL,
1442                                         NULL,
1443                                         NULL,
1444                                         NULL,
1445                                 /* 94*/ NULL,
1446                                         NULL,
1447                                         MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1448                                         MatMatTransposeMultNumeric_MPIDense_MPIDense,
1449                                         NULL,
1450                                 /* 99*/ MatProductSetFromOptions_MPIDense,
1451                                         NULL,
1452                                         NULL,
1453                                         MatConjugate_MPIDense,
1454                                         NULL,
1455                                 /*104*/ NULL,
1456                                         MatRealPart_MPIDense,
1457                                         MatImaginaryPart_MPIDense,
1458                                         NULL,
1459                                         NULL,
1460                                 /*109*/ NULL,
1461                                         NULL,
1462                                         NULL,
1463                                         MatGetColumnVector_MPIDense,
1464                                         MatMissingDiagonal_MPIDense,
1465                                 /*114*/ NULL,
1466                                         NULL,
1467                                         NULL,
1468                                         NULL,
1469                                         NULL,
1470                                 /*119*/ NULL,
1471                                         NULL,
1472                                         NULL,
1473                                         NULL,
1474                                         NULL,
1475                                 /*124*/ NULL,
1476                                         MatGetColumnReductions_MPIDense,
1477                                         NULL,
1478                                         NULL,
1479                                         NULL,
1480                                 /*129*/ NULL,
1481                                         NULL,
1482                                         MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1483                                         MatTransposeMatMultNumeric_MPIDense_MPIDense,
1484                                         NULL,
1485                                 /*134*/ NULL,
1486                                         NULL,
1487                                         NULL,
1488                                         NULL,
1489                                         NULL,
1490                                 /*139*/ NULL,
1491                                         NULL,
1492                                         NULL,
1493                                         NULL,
1494                                         NULL,
1495                                         MatCreateMPIMatConcatenateSeqMat_MPIDense,
1496                                 /*145*/ NULL,
1497                                         NULL,
1498                                         NULL
1499 };
1500 
1501 PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1502 {
1503   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
1504   PetscBool      iscuda;
1505 
1506   PetscFunctionBegin;
1507   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1508   PetscCall(PetscLayoutSetUp(mat->rmap));
1509   PetscCall(PetscLayoutSetUp(mat->cmap));
1510   if (!a->A) {
1511     PetscCall(MatCreate(PETSC_COMM_SELF,&a->A));
1512     PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A));
1513     PetscCall(MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N));
1514   }
1515   PetscCall(PetscObjectTypeCompare((PetscObject)mat,MATMPIDENSECUDA,&iscuda));
1516   PetscCall(MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE));
1517   PetscCall(MatSeqDenseSetPreallocation(a->A,data));
1518   mat->preallocated = PETSC_TRUE;
1519   mat->assembled = PETSC_TRUE;
1520   PetscFunctionReturn(0);
1521 }
1522 
1523 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1524 {
1525   Mat            B,C;
1526 
1527   PetscFunctionBegin;
1528   PetscCall(MatMPIAIJGetLocalMat(A,MAT_INITIAL_MATRIX,&C));
1529   PetscCall(MatConvert_SeqAIJ_SeqDense(C,MATSEQDENSE,MAT_INITIAL_MATRIX,&B));
1530   PetscCall(MatDestroy(&C));
1531   if (reuse == MAT_REUSE_MATRIX) {
1532     C = *newmat;
1533   } else C = NULL;
1534   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A),B,A->cmap->n,!C?MAT_INITIAL_MATRIX:MAT_REUSE_MATRIX,&C));
1535   PetscCall(MatDestroy(&B));
1536   if (reuse == MAT_INPLACE_MATRIX) {
1537     PetscCall(MatHeaderReplace(A,&C));
1538   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1539   PetscFunctionReturn(0);
1540 }
1541 
1542 PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1543 {
1544   Mat            B,C;
1545 
1546   PetscFunctionBegin;
1547   PetscCall(MatDenseGetLocalMatrix(A,&C));
1548   PetscCall(MatConvert_SeqDense_SeqAIJ(C,MATSEQAIJ,MAT_INITIAL_MATRIX,&B));
1549   if (reuse == MAT_REUSE_MATRIX) {
1550     C = *newmat;
1551   } else C = NULL;
1552   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A),B,A->cmap->n,!C?MAT_INITIAL_MATRIX:MAT_REUSE_MATRIX,&C));
1553   PetscCall(MatDestroy(&B));
1554   if (reuse == MAT_INPLACE_MATRIX) {
1555     PetscCall(MatHeaderReplace(A,&C));
1556   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1557   PetscFunctionReturn(0);
1558 }
1559 
1560 #if defined(PETSC_HAVE_ELEMENTAL)
1561 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1562 {
1563   Mat            mat_elemental;
1564   PetscScalar    *v;
1565   PetscInt       m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols;
1566 
1567   PetscFunctionBegin;
1568   if (reuse == MAT_REUSE_MATRIX) {
1569     mat_elemental = *newmat;
1570     PetscCall(MatZeroEntries(*newmat));
1571   } else {
1572     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental));
1573     PetscCall(MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N));
1574     PetscCall(MatSetType(mat_elemental,MATELEMENTAL));
1575     PetscCall(MatSetUp(mat_elemental));
1576     PetscCall(MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE));
1577   }
1578 
1579   PetscCall(PetscMalloc2(m,&rows,N,&cols));
1580   for (i=0; i<N; i++) cols[i] = i;
1581   for (i=0; i<m; i++) rows[i] = rstart + i;
1582 
1583   /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1584   PetscCall(MatDenseGetArray(A,&v));
1585   PetscCall(MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES));
1586   PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY));
1587   PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY));
1588   PetscCall(MatDenseRestoreArray(A,&v));
1589   PetscCall(PetscFree2(rows,cols));
1590 
1591   if (reuse == MAT_INPLACE_MATRIX) {
1592     PetscCall(MatHeaderReplace(A,&mat_elemental));
1593   } else {
1594     *newmat = mat_elemental;
1595   }
1596   PetscFunctionReturn(0);
1597 }
1598 #endif
1599 
1600 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals)
1601 {
1602   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1603 
1604   PetscFunctionBegin;
1605   PetscCall(MatDenseGetColumn(mat->A,col,vals));
1606   PetscFunctionReturn(0);
1607 }
1608 
1609 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals)
1610 {
1611   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1612 
1613   PetscFunctionBegin;
1614   PetscCall(MatDenseRestoreColumn(mat->A,vals));
1615   PetscFunctionReturn(0);
1616 }
1617 
1618 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
1619 {
1620   Mat_MPIDense   *mat;
1621   PetscInt       m,nloc,N;
1622 
1623   PetscFunctionBegin;
1624   PetscCall(MatGetSize(inmat,&m,&N));
1625   PetscCall(MatGetLocalSize(inmat,NULL,&nloc));
1626   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1627     PetscInt sum;
1628 
1629     if (n == PETSC_DECIDE) {
1630       PetscCall(PetscSplitOwnership(comm,&n,&N));
1631     }
1632     /* Check sum(n) = N */
1633     PetscCall(MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm));
1634     PetscCheck(sum == N,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT,sum,N);
1635 
1636     PetscCall(MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat));
1637     PetscCall(MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE));
1638   }
1639 
1640   /* numeric phase */
1641   mat = (Mat_MPIDense*)(*outmat)->data;
1642   PetscCall(MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN));
1643   PetscFunctionReturn(0);
1644 }
1645 
1646 #if defined(PETSC_HAVE_CUDA)
1647 PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M,MatType type,MatReuse reuse,Mat *newmat)
1648 {
1649   Mat            B;
1650   Mat_MPIDense   *m;
1651 
1652   PetscFunctionBegin;
1653   if (reuse == MAT_INITIAL_MATRIX) {
1654     PetscCall(MatDuplicate(M,MAT_COPY_VALUES,newmat));
1655   } else if (reuse == MAT_REUSE_MATRIX) {
1656     PetscCall(MatCopy(M,*newmat,SAME_NONZERO_PATTERN));
1657   }
1658 
1659   B    = *newmat;
1660   PetscCall(MatBindToCPU_MPIDenseCUDA(B,PETSC_TRUE));
1661   PetscCall(PetscFree(B->defaultvectype));
1662   PetscCall(PetscStrallocpy(VECSTANDARD,&B->defaultvectype));
1663   PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSE));
1664   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",NULL));
1665   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",NULL));
1666   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",NULL));
1667   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",NULL));
1668   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",NULL));
1669   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",NULL));
1670   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",NULL));
1671   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",NULL));
1672   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",NULL));
1673   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",NULL));
1674   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",NULL));
1675   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",NULL));
1676   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",NULL));
1677   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",NULL));
1678   m    = (Mat_MPIDense*)(B)->data;
1679   if (m->A) {
1680     PetscCall(MatConvert(m->A,MATSEQDENSE,MAT_INPLACE_MATRIX,&m->A));
1681   }
1682   B->ops->bindtocpu = NULL;
1683   B->offloadmask    = PETSC_OFFLOAD_CPU;
1684   PetscFunctionReturn(0);
1685 }
1686 
1687 PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M,MatType type,MatReuse reuse,Mat *newmat)
1688 {
1689   Mat            B;
1690   Mat_MPIDense   *m;
1691 
1692   PetscFunctionBegin;
1693   if (reuse == MAT_INITIAL_MATRIX) {
1694     PetscCall(MatDuplicate(M,MAT_COPY_VALUES,newmat));
1695   } else if (reuse == MAT_REUSE_MATRIX) {
1696     PetscCall(MatCopy(M,*newmat,SAME_NONZERO_PATTERN));
1697   }
1698 
1699   B    = *newmat;
1700   PetscCall(PetscFree(B->defaultvectype));
1701   PetscCall(PetscStrallocpy(VECCUDA,&B->defaultvectype));
1702   PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSECUDA));
1703   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",                    MatConvert_MPIDenseCUDA_MPIDense));
1704   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",        MatProductSetFromOptions_MPIAIJ_MPIDense));
1705   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",MatProductSetFromOptions_MPIAIJ_MPIDense));
1706   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",        MatProductSetFromOptions_MPIDense_MPIAIJ));
1707   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ));
1708   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",                                MatDenseCUDAGetArray_MPIDenseCUDA));
1709   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",                            MatDenseCUDAGetArrayRead_MPIDenseCUDA));
1710   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",                           MatDenseCUDAGetArrayWrite_MPIDenseCUDA));
1711   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",                            MatDenseCUDARestoreArray_MPIDenseCUDA));
1712   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",                        MatDenseCUDARestoreArrayRead_MPIDenseCUDA));
1713   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",                       MatDenseCUDARestoreArrayWrite_MPIDenseCUDA));
1714   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",                              MatDenseCUDAPlaceArray_MPIDenseCUDA));
1715   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",                              MatDenseCUDAResetArray_MPIDenseCUDA));
1716   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",                            MatDenseCUDAReplaceArray_MPIDenseCUDA));
1717   m    = (Mat_MPIDense*)(B->data);
1718   if (m->A) {
1719     PetscCall(MatConvert(m->A,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&m->A));
1720     B->offloadmask = PETSC_OFFLOAD_BOTH;
1721   } else {
1722     B->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
1723   }
1724   PetscCall(MatBindToCPU_MPIDenseCUDA(B,PETSC_FALSE));
1725 
1726   B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA;
1727   PetscFunctionReturn(0);
1728 }
1729 #endif
1730 
1731 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A,PetscInt col,Vec *v)
1732 {
1733   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1734   PetscInt       lda;
1735 
1736   PetscFunctionBegin;
1737   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1738   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1739   if (!a->cvec) {
1740     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec));
1741   }
1742   a->vecinuse = col + 1;
1743   PetscCall(MatDenseGetLDA(a->A,&lda));
1744   PetscCall(MatDenseGetArray(a->A,(PetscScalar**)&a->ptrinuse));
1745   PetscCall(VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda));
1746   *v   = a->cvec;
1747   PetscFunctionReturn(0);
1748 }
1749 
1750 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A,PetscInt col,Vec *v)
1751 {
1752   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1753 
1754   PetscFunctionBegin;
1755   PetscCheck(a->vecinuse,PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1756   PetscCheck(a->cvec,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1757   a->vecinuse = 0;
1758   PetscCall(MatDenseRestoreArray(a->A,(PetscScalar**)&a->ptrinuse));
1759   PetscCall(VecResetArray(a->cvec));
1760   *v   = NULL;
1761   PetscFunctionReturn(0);
1762 }
1763 
1764 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v)
1765 {
1766   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1767   PetscInt       lda;
1768 
1769   PetscFunctionBegin;
1770   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1771   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1772   if (!a->cvec) {
1773     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec));
1774   }
1775   a->vecinuse = col + 1;
1776   PetscCall(MatDenseGetLDA(a->A,&lda));
1777   PetscCall(MatDenseGetArrayRead(a->A,&a->ptrinuse));
1778   PetscCall(VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda));
1779   PetscCall(VecLockReadPush(a->cvec));
1780   *v   = a->cvec;
1781   PetscFunctionReturn(0);
1782 }
1783 
1784 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v)
1785 {
1786   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1787 
1788   PetscFunctionBegin;
1789   PetscCheck(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1790   PetscCheck(a->cvec,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1791   a->vecinuse = 0;
1792   PetscCall(MatDenseRestoreArrayRead(a->A,&a->ptrinuse));
1793   PetscCall(VecLockReadPop(a->cvec));
1794   PetscCall(VecResetArray(a->cvec));
1795   *v   = NULL;
1796   PetscFunctionReturn(0);
1797 }
1798 
1799 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v)
1800 {
1801   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1802   PetscInt       lda;
1803 
1804   PetscFunctionBegin;
1805   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1806   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1807   if (!a->cvec) {
1808     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec));
1809   }
1810   a->vecinuse = col + 1;
1811   PetscCall(MatDenseGetLDA(a->A,&lda));
1812   PetscCall(MatDenseGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse));
1813   PetscCall(VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda));
1814   *v   = a->cvec;
1815   PetscFunctionReturn(0);
1816 }
1817 
1818 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v)
1819 {
1820   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
1821 
1822   PetscFunctionBegin;
1823   PetscCheck(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1824   PetscCheck(a->cvec,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1825   a->vecinuse = 0;
1826   PetscCall(MatDenseRestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse));
1827   PetscCall(VecResetArray(a->cvec));
1828   *v   = NULL;
1829   PetscFunctionReturn(0);
1830 }
1831 
1832 PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A,PetscInt rbegin,PetscInt rend,PetscInt cbegin,PetscInt cend,Mat *v)
1833 {
1834   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
1835   Mat_MPIDense *c;
1836   MPI_Comm     comm;
1837   PetscInt     pbegin, pend;
1838 
1839   PetscFunctionBegin;
1840   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
1841   PetscCheck(!a->vecinuse,comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1842   PetscCheck(!a->matinuse,comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1843   pbegin = PetscMax(0,PetscMin(A->rmap->rend,rbegin)-A->rmap->rstart);
1844   pend = PetscMin(A->rmap->n,PetscMax(0,rend-A->rmap->rstart));
1845   if (!a->cmat) {
1846     PetscCall(MatCreate(comm,&a->cmat));
1847     PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->cmat));
1848     PetscCall(MatSetType(a->cmat,((PetscObject)A)->type_name));
1849     if (rend-rbegin==A->rmap->N) PetscCall(PetscLayoutReference(A->rmap,&a->cmat->rmap));
1850     else {
1851       PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap,pend-pbegin));
1852       PetscCall(PetscLayoutSetSize(a->cmat->rmap,rend-rbegin));
1853       PetscCall(PetscLayoutSetUp(a->cmat->rmap));
1854     }
1855     PetscCall(PetscLayoutSetSize(a->cmat->cmap,cend-cbegin));
1856     PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1857   } else {
1858     PetscBool same = (PetscBool)(rend-rbegin == a->cmat->rmap->N);
1859     if (same && a->cmat->rmap->N != A->rmap->N) {
1860       same = (PetscBool)(pend-pbegin == a->cmat->rmap->n);
1861       PetscCall(MPIU_Allreduce(MPI_IN_PLACE,&same,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A)));
1862     }
1863     if (!same) {
1864       PetscCall(PetscLayoutDestroy(&a->cmat->rmap));
1865       PetscCall(PetscLayoutCreate(comm,&a->cmat->rmap));
1866       PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap,pend-pbegin));
1867       PetscCall(PetscLayoutSetSize(a->cmat->rmap,rend-rbegin));
1868       PetscCall(PetscLayoutSetUp(a->cmat->rmap));
1869     }
1870     if (cend-cbegin != a->cmat->cmap->N) {
1871       PetscCall(PetscLayoutDestroy(&a->cmat->cmap));
1872       PetscCall(PetscLayoutCreate(comm,&a->cmat->cmap));
1873       PetscCall(PetscLayoutSetSize(a->cmat->cmap,cend-cbegin));
1874       PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1875     }
1876   }
1877   c = (Mat_MPIDense*)a->cmat->data;
1878   PetscCheck(!c->A,comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1879   PetscCall(MatDenseGetSubMatrix(a->A,pbegin,pend,cbegin,cend,&c->A));
1880   a->cmat->preallocated = PETSC_TRUE;
1881   a->cmat->assembled = PETSC_TRUE;
1882   a->matinuse = cbegin + 1;
1883   *v = a->cmat;
1884   PetscFunctionReturn(0);
1885 }
1886 
1887 PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A,Mat *v)
1888 {
1889   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
1890   Mat_MPIDense *c;
1891 
1892   PetscFunctionBegin;
1893   PetscCheck(a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetSubMatrix() first");
1894   PetscCheck(a->cmat,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal matrix");
1895   PetscCheck(*v == a->cmat,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Not the matrix obtained from MatDenseGetSubMatrix()");
1896   a->matinuse = 0;
1897   c = (Mat_MPIDense*)a->cmat->data;
1898   PetscCall(MatDenseRestoreSubMatrix(a->A,&c->A));
1899   *v = NULL;
1900   PetscFunctionReturn(0);
1901 }
1902 
1903 /*MC
1904    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1905 
1906    Options Database Keys:
1907 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions()
1908 
1909   Level: beginner
1910 
1911 .seealso: `MatCreateDense()`
1912 
1913 M*/
1914 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1915 {
1916   Mat_MPIDense   *a;
1917 
1918   PetscFunctionBegin;
1919   PetscCall(PetscNewLog(mat,&a));
1920   mat->data = (void*)a;
1921   PetscCall(PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps)));
1922 
1923   mat->insertmode = NOT_SET_VALUES;
1924 
1925   /* build cache for off array entries formed */
1926   a->donotstash = PETSC_FALSE;
1927 
1928   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash));
1929 
1930   /* stuff used for matrix vector multiply */
1931   a->lvec        = NULL;
1932   a->Mvctx       = NULL;
1933   a->roworiented = PETSC_TRUE;
1934 
1935   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense));
1936   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",MatDenseSetLDA_MPIDense));
1937   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense));
1938   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense));
1939   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense));
1940   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense));
1941   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",MatDenseGetArrayWrite_MPIDense));
1942   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",MatDenseRestoreArrayWrite_MPIDense));
1943   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense));
1944   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense));
1945   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",MatDenseReplaceArray_MPIDense));
1946   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense));
1947   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense));
1948   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense));
1949   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense));
1950   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense));
1951   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense));
1952   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",MatDenseGetSubMatrix_MPIDense));
1953   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",MatDenseRestoreSubMatrix_MPIDense));
1954   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",MatConvert_MPIAIJ_MPIDense));
1955   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpiaij_C",MatConvert_MPIDense_MPIAIJ));
1956 #if defined(PETSC_HAVE_ELEMENTAL)
1957   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental));
1958 #endif
1959 #if defined(PETSC_HAVE_SCALAPACK)
1960   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",MatConvert_Dense_ScaLAPACK));
1961 #endif
1962 #if defined(PETSC_HAVE_CUDA)
1963   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpidensecuda_C",MatConvert_MPIDense_MPIDenseCUDA));
1964 #endif
1965   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense));
1966   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense));
1967   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",MatProductSetFromOptions_MPIDense_MPIAIJ));
1968 #if defined(PETSC_HAVE_CUDA)
1969   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense));
1970   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ));
1971 #endif
1972 
1973   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense));
1974   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense));
1975   PetscCall(PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE));
1976   PetscFunctionReturn(0);
1977 }
1978 
1979 /*MC
1980    MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs.
1981 
1982    Options Database Keys:
1983 . -mat_type mpidensecuda - sets the matrix type to "mpidensecuda" during a call to MatSetFromOptions()
1984 
1985   Level: beginner
1986 
1987 .seealso:
1988 
1989 M*/
1990 #if defined(PETSC_HAVE_CUDA)
1991 #include <petsc/private/deviceimpl.h>
1992 PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B)
1993 {
1994   PetscFunctionBegin;
1995   PetscCall(PetscDeviceInitialize(PETSC_DEVICE_CUDA));
1996   PetscCall(MatCreate_MPIDense(B));
1997   PetscCall(MatConvert_MPIDense_MPIDenseCUDA(B,MATMPIDENSECUDA,MAT_INPLACE_MATRIX,&B));
1998   PetscFunctionReturn(0);
1999 }
2000 #endif
2001 
2002 /*MC
2003    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
2004 
2005    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
2006    and MATMPIDENSE otherwise.
2007 
2008    Options Database Keys:
2009 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
2010 
2011   Level: beginner
2012 
2013 .seealso: `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA`
2014 M*/
2015 
2016 /*MC
2017    MATDENSECUDA - MATDENSECUDA = "densecuda" - A matrix type to be used for dense matrices on GPUs.
2018 
2019    This matrix type is identical to MATSEQDENSECUDA when constructed with a single process communicator,
2020    and MATMPIDENSECUDA otherwise.
2021 
2022    Options Database Keys:
2023 . -mat_type densecuda - sets the matrix type to "densecuda" during a call to MatSetFromOptions()
2024 
2025   Level: beginner
2026 
2027 .seealso: `MATSEQDENSECUDA`, `MATMPIDENSECUDA`, `MATDENSE`
2028 M*/
2029 
2030 /*@C
2031    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
2032 
2033    Collective
2034 
2035    Input Parameters:
2036 .  B - the matrix
2037 -  data - optional location of matrix data.  Set data=NULL for PETSc
2038    to control all matrix memory allocation.
2039 
2040    Notes:
2041    The dense format is fully compatible with standard Fortran 77
2042    storage by columns.
2043 
2044    The data input variable is intended primarily for Fortran programmers
2045    who wish to allocate their own matrix memory space.  Most users should
2046    set data=NULL.
2047 
2048    Level: intermediate
2049 
2050 .seealso: `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
2051 @*/
2052 PetscErrorCode  MatMPIDenseSetPreallocation(Mat B,PetscScalar *data)
2053 {
2054   PetscFunctionBegin;
2055   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2056   PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));
2057   PetscFunctionReturn(0);
2058 }
2059 
2060 /*@
2061    MatDensePlaceArray - Allows one to replace the array in a dense matrix with an
2062    array provided by the user. This is useful to avoid copying an array
2063    into a matrix
2064 
2065    Not Collective
2066 
2067    Input Parameters:
2068 +  mat - the matrix
2069 -  array - the array in column major order
2070 
2071    Notes:
2072    You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be
2073    freed when the matrix is destroyed.
2074 
2075    Level: developer
2076 
2077 .seealso: `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`
2078 
2079 @*/
2080 PetscErrorCode  MatDensePlaceArray(Mat mat,const PetscScalar *array)
2081 {
2082   PetscFunctionBegin;
2083   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2084   PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));
2085   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2086 #if defined(PETSC_HAVE_CUDA)
2087   mat->offloadmask = PETSC_OFFLOAD_CPU;
2088 #endif
2089   PetscFunctionReturn(0);
2090 }
2091 
2092 /*@
2093    MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray()
2094 
2095    Not Collective
2096 
2097    Input Parameters:
2098 .  mat - the matrix
2099 
2100    Notes:
2101    You can only call this after a call to MatDensePlaceArray()
2102 
2103    Level: developer
2104 
2105 .seealso: `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`
2106 
2107 @*/
2108 PetscErrorCode  MatDenseResetArray(Mat mat)
2109 {
2110   PetscFunctionBegin;
2111   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2112   PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));
2113   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2114   PetscFunctionReturn(0);
2115 }
2116 
2117 /*@
2118    MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an
2119    array provided by the user. This is useful to avoid copying an array
2120    into a matrix
2121 
2122    Not Collective
2123 
2124    Input Parameters:
2125 +  mat - the matrix
2126 -  array - the array in column major order
2127 
2128    Notes:
2129    The memory passed in MUST be obtained with PetscMalloc() and CANNOT be
2130    freed by the user. It will be freed when the matrix is destroyed.
2131 
2132    Level: developer
2133 
2134 .seealso: `MatDenseGetArray()`, `VecReplaceArray()`
2135 @*/
2136 PetscErrorCode  MatDenseReplaceArray(Mat mat,const PetscScalar *array)
2137 {
2138   PetscFunctionBegin;
2139   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2140   PetscUseMethod(mat,"MatDenseReplaceArray_C",(Mat,const PetscScalar*),(mat,array));
2141   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2142 #if defined(PETSC_HAVE_CUDA)
2143   mat->offloadmask = PETSC_OFFLOAD_CPU;
2144 #endif
2145   PetscFunctionReturn(0);
2146 }
2147 
2148 #if defined(PETSC_HAVE_CUDA)
2149 /*@C
2150    MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a dense matrix with an
2151    array provided by the user. This is useful to avoid copying an array
2152    into a matrix
2153 
2154    Not Collective
2155 
2156    Input Parameters:
2157 +  mat - the matrix
2158 -  array - the array in column major order
2159 
2160    Notes:
2161    You can return to the original array with a call to MatDenseCUDAResetArray(). The user is responsible for freeing this array; it will not be
2162    freed when the matrix is destroyed. The array must have been allocated with cudaMalloc().
2163 
2164    Level: developer
2165 
2166 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDAResetArray()`
2167 @*/
2168 PetscErrorCode  MatDenseCUDAPlaceArray(Mat mat,const PetscScalar *array)
2169 {
2170   PetscFunctionBegin;
2171   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2172   PetscUseMethod(mat,"MatDenseCUDAPlaceArray_C",(Mat,const PetscScalar*),(mat,array));
2173   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2174   mat->offloadmask = PETSC_OFFLOAD_GPU;
2175   PetscFunctionReturn(0);
2176 }
2177 
2178 /*@C
2179    MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to MatDenseCUDAPlaceArray()
2180 
2181    Not Collective
2182 
2183    Input Parameters:
2184 .  mat - the matrix
2185 
2186    Notes:
2187    You can only call this after a call to MatDenseCUDAPlaceArray()
2188 
2189    Level: developer
2190 
2191 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()`
2192 
2193 @*/
2194 PetscErrorCode  MatDenseCUDAResetArray(Mat mat)
2195 {
2196   PetscFunctionBegin;
2197   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2198   PetscUseMethod(mat,"MatDenseCUDAResetArray_C",(Mat),(mat));
2199   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2200   PetscFunctionReturn(0);
2201 }
2202 
2203 /*@C
2204    MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a dense matrix with an
2205    array provided by the user. This is useful to avoid copying an array
2206    into a matrix
2207 
2208    Not Collective
2209 
2210    Input Parameters:
2211 +  mat - the matrix
2212 -  array - the array in column major order
2213 
2214    Notes:
2215    This permanently replaces the GPU array and frees the memory associated with the old GPU array.
2216    The memory passed in CANNOT be freed by the user. It will be freed
2217    when the matrix is destroyed. The array should respect the matrix leading dimension.
2218 
2219    Level: developer
2220 
2221 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()`, `MatDenseCUDAResetArray()`
2222 @*/
2223 PetscErrorCode  MatDenseCUDAReplaceArray(Mat mat,const PetscScalar *array)
2224 {
2225   PetscFunctionBegin;
2226   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2227   PetscUseMethod(mat,"MatDenseCUDAReplaceArray_C",(Mat,const PetscScalar*),(mat,array));
2228   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2229   mat->offloadmask = PETSC_OFFLOAD_GPU;
2230   PetscFunctionReturn(0);
2231 }
2232 
2233 /*@C
2234    MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a dense matrix.
2235 
2236    Not Collective
2237 
2238    Input Parameters:
2239 .  A - the matrix
2240 
2241    Output Parameters
2242 .  array - the GPU array in column major order
2243 
2244    Notes:
2245    The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use MatDenseCUDAGetArray(). The array must be restored with MatDenseCUDARestoreArrayWrite() when no longer needed.
2246 
2247    Level: developer
2248 
2249 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayRead()`, `MatDenseCUDARestoreArrayRead()`
2250 @*/
2251 PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a)
2252 {
2253   PetscFunctionBegin;
2254   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2255   PetscUseMethod(A,"MatDenseCUDAGetArrayWrite_C",(Mat,PetscScalar**),(A,a));
2256   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2257   PetscFunctionReturn(0);
2258 }
2259 
2260 /*@C
2261    MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArrayWrite().
2262 
2263    Not Collective
2264 
2265    Input Parameters:
2266 +  A - the matrix
2267 -  array - the GPU array in column major order
2268 
2269    Notes:
2270 
2271    Level: developer
2272 
2273 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()`
2274 @*/
2275 PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a)
2276 {
2277   PetscFunctionBegin;
2278   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2279   PetscUseMethod(A,"MatDenseCUDARestoreArrayWrite_C",(Mat,PetscScalar**),(A,a));
2280   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2281   A->offloadmask = PETSC_OFFLOAD_GPU;
2282   PetscFunctionReturn(0);
2283 }
2284 
2285 /*@C
2286    MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArrayRead() when no longer needed.
2287 
2288    Not Collective
2289 
2290    Input Parameters:
2291 .  A - the matrix
2292 
2293    Output Parameters
2294 .  array - the GPU array in column major order
2295 
2296    Notes:
2297    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite().
2298 
2299    Level: developer
2300 
2301 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`
2302 @*/
2303 PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a)
2304 {
2305   PetscFunctionBegin;
2306   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2307   PetscUseMethod(A,"MatDenseCUDAGetArrayRead_C",(Mat,const PetscScalar**),(A,a));
2308   PetscFunctionReturn(0);
2309 }
2310 
2311 /*@C
2312    MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a dense matrix previously obtained with a call to MatDenseCUDAGetArrayRead().
2313 
2314    Not Collective
2315 
2316    Input Parameters:
2317 +  A - the matrix
2318 -  array - the GPU array in column major order
2319 
2320    Notes:
2321    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite().
2322 
2323    Level: developer
2324 
2325 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDAGetArrayRead()`
2326 @*/
2327 PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a)
2328 {
2329   PetscFunctionBegin;
2330   PetscUseMethod(A,"MatDenseCUDARestoreArrayRead_C",(Mat,const PetscScalar**),(A,a));
2331   PetscFunctionReturn(0);
2332 }
2333 
2334 /*@C
2335    MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArray() when no longer needed.
2336 
2337    Not Collective
2338 
2339    Input Parameters:
2340 .  A - the matrix
2341 
2342    Output Parameters
2343 .  array - the GPU array in column major order
2344 
2345    Notes:
2346    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). For read-only access, use MatDenseCUDAGetArrayRead().
2347 
2348    Level: developer
2349 
2350 .seealso: `MatDenseCUDAGetArrayRead()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`
2351 @*/
2352 PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a)
2353 {
2354   PetscFunctionBegin;
2355   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2356   PetscUseMethod(A,"MatDenseCUDAGetArray_C",(Mat,PetscScalar**),(A,a));
2357   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2358   PetscFunctionReturn(0);
2359 }
2360 
2361 /*@C
2362    MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArray().
2363 
2364    Not Collective
2365 
2366    Input Parameters:
2367 +  A - the matrix
2368 -  array - the GPU array in column major order
2369 
2370    Notes:
2371 
2372    Level: developer
2373 
2374 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()`
2375 @*/
2376 PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a)
2377 {
2378   PetscFunctionBegin;
2379   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2380   PetscUseMethod(A,"MatDenseCUDARestoreArray_C",(Mat,PetscScalar**),(A,a));
2381   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2382   A->offloadmask = PETSC_OFFLOAD_GPU;
2383   PetscFunctionReturn(0);
2384 }
2385 #endif
2386 
2387 /*@C
2388    MatCreateDense - Creates a matrix in dense format.
2389 
2390    Collective
2391 
2392    Input Parameters:
2393 +  comm - MPI communicator
2394 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2395 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2396 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
2397 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
2398 -  data - optional location of matrix data.  Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
2399    to control all matrix memory allocation.
2400 
2401    Output Parameter:
2402 .  A - the matrix
2403 
2404    Notes:
2405    The dense format is fully compatible with standard Fortran 77
2406    storage by columns.
2407    Note that, although local portions of the matrix are stored in column-major
2408    order, the matrix is partitioned across MPI ranks by row.
2409 
2410    The data input variable is intended primarily for Fortran programmers
2411    who wish to allocate their own matrix memory space.  Most users should
2412    set data=NULL (PETSC_NULL_SCALAR for Fortran users).
2413 
2414    The user MUST specify either the local or global matrix dimensions
2415    (possibly both).
2416 
2417    Level: intermediate
2418 
2419 .seealso: `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
2420 @*/
2421 PetscErrorCode  MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
2422 {
2423   PetscFunctionBegin;
2424   PetscCall(MatCreate(comm,A));
2425   PetscCall(MatSetSizes(*A,m,n,M,N));
2426   PetscCall(MatSetType(*A,MATDENSE));
2427   PetscCall(MatSeqDenseSetPreallocation(*A,data));
2428   PetscCall(MatMPIDenseSetPreallocation(*A,data));
2429   PetscFunctionReturn(0);
2430 }
2431 
2432 #if defined(PETSC_HAVE_CUDA)
2433 /*@C
2434    MatCreateDenseCUDA - Creates a matrix in dense format using CUDA.
2435 
2436    Collective
2437 
2438    Input Parameters:
2439 +  comm - MPI communicator
2440 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2441 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2442 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
2443 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
2444 -  data - optional location of GPU matrix data.  Set data=NULL for PETSc
2445    to control matrix memory allocation.
2446 
2447    Output Parameter:
2448 .  A - the matrix
2449 
2450    Notes:
2451 
2452    Level: intermediate
2453 
2454 .seealso: `MatCreate()`, `MatCreateDense()`
2455 @*/
2456 PetscErrorCode  MatCreateDenseCUDA(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
2457 {
2458   PetscFunctionBegin;
2459   PetscCall(MatCreate(comm,A));
2460   PetscValidLogicalCollectiveBool(*A,!!data,6);
2461   PetscCall(MatSetSizes(*A,m,n,M,N));
2462   PetscCall(MatSetType(*A,MATDENSECUDA));
2463   PetscCall(MatSeqDenseCUDASetPreallocation(*A,data));
2464   PetscCall(MatMPIDenseCUDASetPreallocation(*A,data));
2465   PetscFunctionReturn(0);
2466 }
2467 #endif
2468 
2469 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
2470 {
2471   Mat            mat;
2472   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
2473 
2474   PetscFunctionBegin;
2475   *newmat = NULL;
2476   PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&mat));
2477   PetscCall(MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N));
2478   PetscCall(MatSetType(mat,((PetscObject)A)->type_name));
2479   a       = (Mat_MPIDense*)mat->data;
2480 
2481   mat->factortype   = A->factortype;
2482   mat->assembled    = PETSC_TRUE;
2483   mat->preallocated = PETSC_TRUE;
2484 
2485   mat->insertmode = NOT_SET_VALUES;
2486   a->donotstash   = oldmat->donotstash;
2487 
2488   PetscCall(PetscLayoutReference(A->rmap,&mat->rmap));
2489   PetscCall(PetscLayoutReference(A->cmap,&mat->cmap));
2490 
2491   PetscCall(MatDuplicate(oldmat->A,cpvalues,&a->A));
2492   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A));
2493 
2494   *newmat = mat;
2495   PetscFunctionReturn(0);
2496 }
2497 
2498 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
2499 {
2500   PetscBool      isbinary;
2501 #if defined(PETSC_HAVE_HDF5)
2502   PetscBool      ishdf5;
2503 #endif
2504 
2505   PetscFunctionBegin;
2506   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
2507   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
2508   /* force binary viewer to load .info file if it has not yet done so */
2509   PetscCall(PetscViewerSetUp(viewer));
2510   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
2511 #if defined(PETSC_HAVE_HDF5)
2512   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5));
2513 #endif
2514   if (isbinary) {
2515     PetscCall(MatLoad_Dense_Binary(newMat,viewer));
2516 #if defined(PETSC_HAVE_HDF5)
2517   } else if (ishdf5) {
2518     PetscCall(MatLoad_Dense_HDF5(newMat,viewer));
2519 #endif
2520   } else SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
2521   PetscFunctionReturn(0);
2522 }
2523 
2524 static PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag)
2525 {
2526   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
2527   Mat            a,b;
2528 
2529   PetscFunctionBegin;
2530   a    = matA->A;
2531   b    = matB->A;
2532   PetscCall(MatEqual(a,b,flag));
2533   PetscCall(MPIU_Allreduce(MPI_IN_PLACE,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A)));
2534   PetscFunctionReturn(0);
2535 }
2536 
2537 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data)
2538 {
2539   Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data;
2540 
2541   PetscFunctionBegin;
2542   PetscCall(PetscFree2(atb->sendbuf,atb->recvcounts));
2543   PetscCall(MatDestroy(&atb->atb));
2544   PetscCall(PetscFree(atb));
2545   PetscFunctionReturn(0);
2546 }
2547 
2548 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data)
2549 {
2550   Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data;
2551 
2552   PetscFunctionBegin;
2553   PetscCall(PetscFree2(abt->buf[0],abt->buf[1]));
2554   PetscCall(PetscFree2(abt->recvcounts,abt->recvdispls));
2555   PetscCall(PetscFree(abt));
2556   PetscFunctionReturn(0);
2557 }
2558 
2559 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2560 {
2561   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2562   Mat_TransMatMultDense *atb;
2563   MPI_Comm              comm;
2564   PetscMPIInt           size,*recvcounts;
2565   PetscScalar           *carray,*sendbuf;
2566   const PetscScalar     *atbarray;
2567   PetscInt              i,cN=C->cmap->N,proc,k,j,lda;
2568   const PetscInt        *ranges;
2569 
2570   PetscFunctionBegin;
2571   MatCheckProduct(C,3);
2572   PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2573   atb = (Mat_TransMatMultDense *)C->product->data;
2574   recvcounts = atb->recvcounts;
2575   sendbuf = atb->sendbuf;
2576 
2577   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
2578   PetscCallMPI(MPI_Comm_size(comm,&size));
2579 
2580   /* compute atbarray = aseq^T * bseq */
2581   PetscCall(MatTransposeMatMult(a->A,b->A,atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX,PETSC_DEFAULT,&atb->atb));
2582 
2583   PetscCall(MatGetOwnershipRanges(C,&ranges));
2584 
2585   /* arrange atbarray into sendbuf */
2586   PetscCall(MatDenseGetArrayRead(atb->atb,&atbarray));
2587   PetscCall(MatDenseGetLDA(atb->atb,&lda));
2588   for (proc=0, k=0; proc<size; proc++) {
2589     for (j=0; j<cN; j++) {
2590       for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*lda];
2591     }
2592   }
2593   PetscCall(MatDenseRestoreArrayRead(atb->atb,&atbarray));
2594 
2595   /* sum all atbarray to local values of C */
2596   PetscCall(MatDenseGetArrayWrite(c->A,&carray));
2597   PetscCallMPI(MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm));
2598   PetscCall(MatDenseRestoreArrayWrite(c->A,&carray));
2599   PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY));
2600   PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY));
2601   PetscFunctionReturn(0);
2602 }
2603 
2604 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)
2605 {
2606   MPI_Comm              comm;
2607   PetscMPIInt           size;
2608   PetscInt              cm=A->cmap->n,cM,cN=B->cmap->N;
2609   Mat_TransMatMultDense *atb;
2610   PetscBool             cisdense;
2611   PetscInt              i;
2612   const PetscInt        *ranges;
2613 
2614   PetscFunctionBegin;
2615   MatCheckProduct(C,3);
2616   PetscCheck(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2617   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
2618   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
2619     SETERRQ(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2620   }
2621 
2622   /* create matrix product C */
2623   PetscCall(MatSetSizes(C,cm,B->cmap->n,A->cmap->N,B->cmap->N));
2624   PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,""));
2625   if (!cisdense) {
2626     PetscCall(MatSetType(C,((PetscObject)A)->type_name));
2627   }
2628   PetscCall(MatSetUp(C));
2629 
2630   /* create data structure for reuse C */
2631   PetscCallMPI(MPI_Comm_size(comm,&size));
2632   PetscCall(PetscNew(&atb));
2633   cM   = C->rmap->N;
2634   PetscCall(PetscMalloc2(cM*cN,&atb->sendbuf,size,&atb->recvcounts));
2635   PetscCall(MatGetOwnershipRanges(C,&ranges));
2636   for (i=0; i<size; i++) atb->recvcounts[i] = (ranges[i+1] - ranges[i])*cN;
2637 
2638   C->product->data    = atb;
2639   C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
2640   PetscFunctionReturn(0);
2641 }
2642 
2643 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2644 {
2645   MPI_Comm              comm;
2646   PetscMPIInt           i, size;
2647   PetscInt              maxRows, bufsiz;
2648   PetscMPIInt           tag;
2649   PetscInt              alg;
2650   Mat_MatTransMultDense *abt;
2651   Mat_Product           *product = C->product;
2652   PetscBool             flg;
2653 
2654   PetscFunctionBegin;
2655   MatCheckProduct(C,4);
2656   PetscCheck(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2657   /* check local size of A and B */
2658   PetscCheck(A->cmap->n == B->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")",A->cmap->n,B->cmap->n);
2659 
2660   PetscCall(PetscStrcmp(product->alg,"allgatherv",&flg));
2661   alg  = flg ? 0 : 1;
2662 
2663   /* setup matrix product C */
2664   PetscCall(MatSetSizes(C,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N));
2665   PetscCall(MatSetType(C,MATMPIDENSE));
2666   PetscCall(MatSetUp(C));
2667   PetscCall(PetscObjectGetNewTag((PetscObject)C,&tag));
2668 
2669   /* create data structure for reuse C */
2670   PetscCall(PetscObjectGetComm((PetscObject)C,&comm));
2671   PetscCallMPI(MPI_Comm_size(comm,&size));
2672   PetscCall(PetscNew(&abt));
2673   abt->tag = tag;
2674   abt->alg = alg;
2675   switch (alg) {
2676   case 1: /* alg: "cyclic" */
2677     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i]));
2678     bufsiz = A->cmap->N * maxRows;
2679     PetscCall(PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1])));
2680     break;
2681   default: /* alg: "allgatherv" */
2682     PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1])));
2683     PetscCall(PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls)));
2684     for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N;
2685     for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i];
2686     break;
2687   }
2688 
2689   C->product->data    = abt;
2690   C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
2691   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense;
2692   PetscFunctionReturn(0);
2693 }
2694 
2695 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
2696 {
2697   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2698   Mat_MatTransMultDense *abt;
2699   MPI_Comm              comm;
2700   PetscMPIInt           rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom;
2701   PetscScalar           *sendbuf, *recvbuf=NULL, *cv;
2702   PetscInt              i,cK=A->cmap->N,k,j,bn;
2703   PetscScalar           _DOne=1.0,_DZero=0.0;
2704   const PetscScalar     *av,*bv;
2705   PetscBLASInt          cm, cn, ck, alda, blda = 0, clda;
2706   MPI_Request           reqs[2];
2707   const PetscInt        *ranges;
2708 
2709   PetscFunctionBegin;
2710   MatCheckProduct(C,3);
2711   PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2712   abt  = (Mat_MatTransMultDense*)C->product->data;
2713   PetscCall(PetscObjectGetComm((PetscObject)C,&comm));
2714   PetscCallMPI(MPI_Comm_rank(comm,&rank));
2715   PetscCallMPI(MPI_Comm_size(comm,&size));
2716   PetscCall(MatDenseGetArrayRead(a->A,&av));
2717   PetscCall(MatDenseGetArrayRead(b->A,&bv));
2718   PetscCall(MatDenseGetArrayWrite(c->A,&cv));
2719   PetscCall(MatDenseGetLDA(a->A,&i));
2720   PetscCall(PetscBLASIntCast(i,&alda));
2721   PetscCall(MatDenseGetLDA(b->A,&i));
2722   PetscCall(PetscBLASIntCast(i,&blda));
2723   PetscCall(MatDenseGetLDA(c->A,&i));
2724   PetscCall(PetscBLASIntCast(i,&clda));
2725   PetscCall(MatGetOwnershipRanges(B,&ranges));
2726   bn   = B->rmap->n;
2727   if (blda == bn) {
2728     sendbuf = (PetscScalar*)bv;
2729   } else {
2730     sendbuf = abt->buf[0];
2731     for (k = 0, i = 0; i < cK; i++) {
2732       for (j = 0; j < bn; j++, k++) {
2733         sendbuf[k] = bv[i * blda + j];
2734       }
2735     }
2736   }
2737   if (size > 1) {
2738     sendto = (rank + size - 1) % size;
2739     recvfrom = (rank + size + 1) % size;
2740   } else {
2741     sendto = recvfrom = 0;
2742   }
2743   PetscCall(PetscBLASIntCast(cK,&ck));
2744   PetscCall(PetscBLASIntCast(c->A->rmap->n,&cm));
2745   recvisfrom = rank;
2746   for (i = 0; i < size; i++) {
2747     /* we have finished receiving in sending, bufs can be read/modified */
2748     PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2749     PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
2750 
2751     if (nextrecvisfrom != rank) {
2752       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2753       sendsiz = cK * bn;
2754       recvsiz = cK * nextbn;
2755       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
2756       PetscCallMPI(MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]));
2757       PetscCallMPI(MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]));
2758     }
2759 
2760     /* local aseq * sendbuf^T */
2761     PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn));
2762     if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,av,&alda,sendbuf,&cn,&_DZero,cv + clda * ranges[recvisfrom],&clda));
2763 
2764     if (nextrecvisfrom != rank) {
2765       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
2766       PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE));
2767     }
2768     bn = nextbn;
2769     recvisfrom = nextrecvisfrom;
2770     sendbuf = recvbuf;
2771   }
2772   PetscCall(MatDenseRestoreArrayRead(a->A,&av));
2773   PetscCall(MatDenseRestoreArrayRead(b->A,&bv));
2774   PetscCall(MatDenseRestoreArrayWrite(c->A,&cv));
2775   PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY));
2776   PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY));
2777   PetscFunctionReturn(0);
2778 }
2779 
2780 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
2781 {
2782   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2783   Mat_MatTransMultDense *abt;
2784   MPI_Comm              comm;
2785   PetscMPIInt           size;
2786   PetscScalar           *cv, *sendbuf, *recvbuf;
2787   const PetscScalar     *av,*bv;
2788   PetscInt              blda,i,cK=A->cmap->N,k,j,bn;
2789   PetscScalar           _DOne=1.0,_DZero=0.0;
2790   PetscBLASInt          cm, cn, ck, alda, clda;
2791 
2792   PetscFunctionBegin;
2793   MatCheckProduct(C,3);
2794   PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2795   abt  = (Mat_MatTransMultDense*)C->product->data;
2796   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
2797   PetscCallMPI(MPI_Comm_size(comm,&size));
2798   PetscCall(MatDenseGetArrayRead(a->A,&av));
2799   PetscCall(MatDenseGetArrayRead(b->A,&bv));
2800   PetscCall(MatDenseGetArrayWrite(c->A,&cv));
2801   PetscCall(MatDenseGetLDA(a->A,&i));
2802   PetscCall(PetscBLASIntCast(i,&alda));
2803   PetscCall(MatDenseGetLDA(b->A,&blda));
2804   PetscCall(MatDenseGetLDA(c->A,&i));
2805   PetscCall(PetscBLASIntCast(i,&clda));
2806   /* copy transpose of B into buf[0] */
2807   bn      = B->rmap->n;
2808   sendbuf = abt->buf[0];
2809   recvbuf = abt->buf[1];
2810   for (k = 0, j = 0; j < bn; j++) {
2811     for (i = 0; i < cK; i++, k++) {
2812       sendbuf[k] = bv[i * blda + j];
2813     }
2814   }
2815   PetscCall(MatDenseRestoreArrayRead(b->A,&bv));
2816   PetscCallMPI(MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm));
2817   PetscCall(PetscBLASIntCast(cK,&ck));
2818   PetscCall(PetscBLASIntCast(c->A->rmap->n,&cm));
2819   PetscCall(PetscBLASIntCast(c->A->cmap->n,&cn));
2820   if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,av,&alda,recvbuf,&ck,&_DZero,cv,&clda));
2821   PetscCall(MatDenseRestoreArrayRead(a->A,&av));
2822   PetscCall(MatDenseRestoreArrayRead(b->A,&bv));
2823   PetscCall(MatDenseRestoreArrayWrite(c->A,&cv));
2824   PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY));
2825   PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY));
2826   PetscFunctionReturn(0);
2827 }
2828 
2829 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2830 {
2831   Mat_MatTransMultDense *abt;
2832 
2833   PetscFunctionBegin;
2834   MatCheckProduct(C,3);
2835   PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2836   abt = (Mat_MatTransMultDense*)C->product->data;
2837   switch (abt->alg) {
2838   case 1:
2839     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C));
2840     break;
2841   default:
2842     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C));
2843     break;
2844   }
2845   PetscFunctionReturn(0);
2846 }
2847 
2848 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data)
2849 {
2850   Mat_MatMultDense *ab = (Mat_MatMultDense*)data;
2851 
2852   PetscFunctionBegin;
2853   PetscCall(MatDestroy(&ab->Ce));
2854   PetscCall(MatDestroy(&ab->Ae));
2855   PetscCall(MatDestroy(&ab->Be));
2856   PetscCall(PetscFree(ab));
2857   PetscFunctionReturn(0);
2858 }
2859 
2860 #if defined(PETSC_HAVE_ELEMENTAL)
2861 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2862 {
2863   Mat_MatMultDense *ab;
2864 
2865   PetscFunctionBegin;
2866   MatCheckProduct(C,3);
2867   PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Missing product data");
2868   ab   = (Mat_MatMultDense*)C->product->data;
2869   PetscCall(MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae));
2870   PetscCall(MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be));
2871   PetscCall(MatMatMultNumeric_Elemental(ab->Ae,ab->Be,ab->Ce));
2872   PetscCall(MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C));
2873   PetscFunctionReturn(0);
2874 }
2875 
2876 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)
2877 {
2878   Mat              Ae,Be,Ce;
2879   Mat_MatMultDense *ab;
2880 
2881   PetscFunctionBegin;
2882   MatCheckProduct(C,4);
2883   PetscCheck(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2884   /* check local size of A and B */
2885   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
2886     SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2887   }
2888 
2889   /* create elemental matrices Ae and Be */
2890   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Ae));
2891   PetscCall(MatSetSizes(Ae,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N));
2892   PetscCall(MatSetType(Ae,MATELEMENTAL));
2893   PetscCall(MatSetUp(Ae));
2894   PetscCall(MatSetOption(Ae,MAT_ROW_ORIENTED,PETSC_FALSE));
2895 
2896   PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &Be));
2897   PetscCall(MatSetSizes(Be,PETSC_DECIDE,PETSC_DECIDE,B->rmap->N,B->cmap->N));
2898   PetscCall(MatSetType(Be,MATELEMENTAL));
2899   PetscCall(MatSetUp(Be));
2900   PetscCall(MatSetOption(Be,MAT_ROW_ORIENTED,PETSC_FALSE));
2901 
2902   /* compute symbolic Ce = Ae*Be */
2903   PetscCall(MatCreate(PetscObjectComm((PetscObject)C),&Ce));
2904   PetscCall(MatMatMultSymbolic_Elemental(Ae,Be,fill,Ce));
2905 
2906   /* setup C */
2907   PetscCall(MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE));
2908   PetscCall(MatSetType(C,MATDENSE));
2909   PetscCall(MatSetUp(C));
2910 
2911   /* create data structure for reuse Cdense */
2912   PetscCall(PetscNew(&ab));
2913   ab->Ae = Ae;
2914   ab->Be = Be;
2915   ab->Ce = Ce;
2916 
2917   C->product->data    = ab;
2918   C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense;
2919   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
2920   PetscFunctionReturn(0);
2921 }
2922 #endif
2923 /* ----------------------------------------------- */
2924 #if defined(PETSC_HAVE_ELEMENTAL)
2925 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C)
2926 {
2927   PetscFunctionBegin;
2928   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense;
2929   C->ops->productsymbolic = MatProductSymbolic_AB;
2930   PetscFunctionReturn(0);
2931 }
2932 #endif
2933 
2934 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C)
2935 {
2936   Mat_Product *product = C->product;
2937   Mat         A = product->A,B=product->B;
2938 
2939   PetscFunctionBegin;
2940   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend)
2941     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2942   C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense;
2943   C->ops->productsymbolic = MatProductSymbolic_AtB;
2944   PetscFunctionReturn(0);
2945 }
2946 
2947 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C)
2948 {
2949   Mat_Product    *product = C->product;
2950   const char     *algTypes[2] = {"allgatherv","cyclic"};
2951   PetscInt       alg,nalg = 2;
2952   PetscBool      flg = PETSC_FALSE;
2953 
2954   PetscFunctionBegin;
2955   /* Set default algorithm */
2956   alg = 0; /* default is allgatherv */
2957   PetscCall(PetscStrcmp(product->alg,"default",&flg));
2958   if (flg) {
2959     PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]));
2960   }
2961 
2962   /* Get runtime option */
2963   if (product->api_user) {
2964     PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatTransposeMult","Mat");
2965     PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[alg],&alg,&flg));
2966     PetscOptionsEnd();
2967   } else {
2968     PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABt","Mat");
2969     PetscCall(PetscOptionsEList("-mat_product_algorithm","Algorithmic approach","MatProduct_ABt",algTypes,nalg,algTypes[alg],&alg,&flg));
2970     PetscOptionsEnd();
2971   }
2972   if (flg) {
2973     PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]));
2974   }
2975 
2976   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense;
2977   C->ops->productsymbolic          = MatProductSymbolic_ABt;
2978   PetscFunctionReturn(0);
2979 }
2980 
2981 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C)
2982 {
2983   Mat_Product    *product = C->product;
2984 
2985   PetscFunctionBegin;
2986   switch (product->type) {
2987 #if defined(PETSC_HAVE_ELEMENTAL)
2988   case MATPRODUCT_AB:
2989     PetscCall(MatProductSetFromOptions_MPIDense_AB(C));
2990     break;
2991 #endif
2992   case MATPRODUCT_AtB:
2993     PetscCall(MatProductSetFromOptions_MPIDense_AtB(C));
2994     break;
2995   case MATPRODUCT_ABt:
2996     PetscCall(MatProductSetFromOptions_MPIDense_ABt(C));
2997     break;
2998   default:
2999     break;
3000   }
3001   PetscFunctionReturn(0);
3002 }
3003