xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 0dc45c67dd8cb6796abcc61ff7db4f0ecf01d6ed)
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4 */
5 
6 #include <../src/mat/impls/dense/mpi/mpidense.h>    /*I   "petscmat.h"  I*/
7 #include <../src/mat/impls/aij/mpi/mpiaij.h>
8 #include <petscblaslapack.h>
9 
10 /*@
11 
12       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
13               matrix that represents the operator. For sequential matrices it returns itself.
14 
15     Input Parameter:
16 .      A - the Seq or MPI dense matrix
17 
18     Output Parameter:
19 .      B - the inner matrix
20 
21     Level: intermediate
22 
23 @*/
24 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
25 {
26   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
27   PetscBool      flg;
28 
29   PetscFunctionBegin;
30   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
31   PetscValidPointer(B,2);
32   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A,MATMPIDENSE,&flg));
33   if (flg) *B = mat->A;
34   else {
35     PetscCall(PetscObjectBaseTypeCompare((PetscObject)A,MATSEQDENSE,&flg));
36     PetscCheck(flg,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Not for matrix type %s",((PetscObject)A)->type_name);
37     *B = A;
38   }
39   PetscFunctionReturn(0);
40 }
41 
42 PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s)
43 {
44   Mat_MPIDense   *Amat = (Mat_MPIDense*)A->data;
45   Mat_MPIDense   *Bmat = (Mat_MPIDense*)B->data;
46 
47   PetscFunctionBegin;
48   PetscCall(MatCopy(Amat->A,Bmat->A,s));
49   PetscFunctionReturn(0);
50 }
51 
52 PetscErrorCode MatShift_MPIDense(Mat A,PetscScalar alpha)
53 {
54   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
55   PetscInt       j,lda,rstart = A->rmap->rstart,rend = A->rmap->rend,rend2;
56   PetscScalar    *v;
57 
58   PetscFunctionBegin;
59   PetscCall(MatDenseGetArray(mat->A,&v));
60   PetscCall(MatDenseGetLDA(mat->A,&lda));
61   rend2 = PetscMin(rend,A->cmap->N);
62   if (rend2>rstart) {
63     for (j=rstart; j<rend2; j++) v[j-rstart+j*lda] += alpha;
64     PetscCall(PetscLogFlops(rend2-rstart));
65   }
66   PetscCall(MatDenseRestoreArray(mat->A,&v));
67   PetscFunctionReturn(0);
68 }
69 
70 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
71 {
72   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
73   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
74 
75   PetscFunctionBegin;
76   PetscCheck(row >= rstart && row < rend,PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
77   lrow = row - rstart;
78   PetscCall(MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v));
79   PetscFunctionReturn(0);
80 }
81 
82 PetscErrorCode MatRestoreRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
83 {
84   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
85   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
86 
87   PetscFunctionBegin;
88   PetscCheck(row >= rstart && row < rend,PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
89   lrow = row - rstart;
90   PetscCall(MatRestoreRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v));
91   PetscFunctionReturn(0);
92 }
93 
94 PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
95 {
96   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
97   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
98   PetscScalar    *array;
99   MPI_Comm       comm;
100   PetscBool      flg;
101   Mat            B;
102 
103   PetscFunctionBegin;
104   PetscCall(MatHasCongruentLayouts(A,&flg));
105   PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
106   PetscCall(PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B));
107   if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */
108 
109     PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A,MATSEQDENSECUDA,&flg));
110     PetscCheck(!flg,PETSC_COMM_SELF,PETSC_ERR_SUP,"Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature",MATSEQDENSECUDA);
111     PetscCall(PetscObjectGetComm((PetscObject)(mdn->A),&comm));
112     PetscCall(MatCreate(comm,&B));
113     PetscCall(MatSetSizes(B,m,m,m,m));
114     PetscCall(MatSetType(B,((PetscObject)mdn->A)->type_name));
115     PetscCall(MatDenseGetArrayRead(mdn->A,(const PetscScalar**)&array));
116     PetscCall(MatSeqDenseSetPreallocation(B,array+m*rstart));
117     PetscCall(MatDenseRestoreArrayRead(mdn->A,(const PetscScalar**)&array));
118     PetscCall(PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B));
119     *a   = B;
120     PetscCall(MatDestroy(&B));
121   } else *a = B;
122   PetscFunctionReturn(0);
123 }
124 
125 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
126 {
127   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
128   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
129   PetscBool      roworiented = A->roworiented;
130 
131   PetscFunctionBegin;
132   for (i=0; i<m; i++) {
133     if (idxm[i] < 0) continue;
134     PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
135     if (idxm[i] >= rstart && idxm[i] < rend) {
136       row = idxm[i] - rstart;
137       if (roworiented) {
138         PetscCall(MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv));
139       } else {
140         for (j=0; j<n; j++) {
141           if (idxn[j] < 0) continue;
142           PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
143           PetscCall(MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv));
144         }
145       }
146     } else if (!A->donotstash) {
147       mat->assembled = PETSC_FALSE;
148       if (roworiented) {
149         PetscCall(MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE));
150       } else {
151         PetscCall(MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE));
152       }
153     }
154   }
155   PetscFunctionReturn(0);
156 }
157 
158 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
159 {
160   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
161   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
162 
163   PetscFunctionBegin;
164   for (i=0; i<m; i++) {
165     if (idxm[i] < 0) continue; /* negative row */
166     PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
167     if (idxm[i] >= rstart && idxm[i] < rend) {
168       row = idxm[i] - rstart;
169       for (j=0; j<n; j++) {
170         if (idxn[j] < 0) continue; /* negative column */
171         PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
172         PetscCall(MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j));
173       }
174     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
175   }
176   PetscFunctionReturn(0);
177 }
178 
179 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda)
180 {
181   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
182 
183   PetscFunctionBegin;
184   PetscCall(MatDenseGetLDA(a->A,lda));
185   PetscFunctionReturn(0);
186 }
187 
188 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A,PetscInt lda)
189 {
190   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
191   PetscBool      iscuda;
192 
193   PetscFunctionBegin;
194   if (!a->A) {
195     PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
196     PetscCall(PetscLayoutSetUp(A->rmap));
197     PetscCall(PetscLayoutSetUp(A->cmap));
198     PetscCall(MatCreate(PETSC_COMM_SELF,&a->A));
199     PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->A));
200     PetscCall(MatSetSizes(a->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N));
201     PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda));
202     PetscCall(MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE));
203   }
204   PetscCall(MatDenseSetLDA(a->A,lda));
205   PetscFunctionReturn(0);
206 }
207 
208 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar **array)
209 {
210   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
211 
212   PetscFunctionBegin;
213   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
214   PetscCall(MatDenseGetArray(a->A,array));
215   PetscFunctionReturn(0);
216 }
217 
218 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar **array)
219 {
220   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
221 
222   PetscFunctionBegin;
223   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
224   PetscCall(MatDenseGetArrayRead(a->A,array));
225   PetscFunctionReturn(0);
226 }
227 
228 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A,PetscScalar **array)
229 {
230   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
231 
232   PetscFunctionBegin;
233   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
234   PetscCall(MatDenseGetArrayWrite(a->A,array));
235   PetscFunctionReturn(0);
236 }
237 
238 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar *array)
239 {
240   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
241 
242   PetscFunctionBegin;
243   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
244   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
245   PetscCall(MatDensePlaceArray(a->A,array));
246   PetscFunctionReturn(0);
247 }
248 
249 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
250 {
251   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
252 
253   PetscFunctionBegin;
254   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
255   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
256   PetscCall(MatDenseResetArray(a->A));
257   PetscFunctionReturn(0);
258 }
259 
260 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A,const PetscScalar *array)
261 {
262   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
263 
264   PetscFunctionBegin;
265   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
266   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
267   PetscCall(MatDenseReplaceArray(a->A,array));
268   PetscFunctionReturn(0);
269 }
270 
271 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
272 {
273   Mat_MPIDense      *mat  = (Mat_MPIDense*)A->data,*newmatd;
274   PetscInt          lda,i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
275   const PetscInt    *irow,*icol;
276   const PetscScalar *v;
277   PetscScalar       *bv;
278   Mat               newmat;
279   IS                iscol_local;
280   MPI_Comm          comm_is,comm_mat;
281 
282   PetscFunctionBegin;
283   PetscCall(PetscObjectGetComm((PetscObject)A,&comm_mat));
284   PetscCall(PetscObjectGetComm((PetscObject)iscol,&comm_is));
285   PetscCheck(comm_mat == comm_is,PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator");
286 
287   PetscCall(ISAllGather(iscol,&iscol_local));
288   PetscCall(ISGetIndices(isrow,&irow));
289   PetscCall(ISGetIndices(iscol_local,&icol));
290   PetscCall(ISGetLocalSize(isrow,&nrows));
291   PetscCall(ISGetLocalSize(iscol,&ncols));
292   PetscCall(ISGetSize(iscol,&Ncols)); /* global number of columns, size of iscol_local */
293 
294   /* No parallel redistribution currently supported! Should really check each index set
295      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
296      original matrix! */
297 
298   PetscCall(MatGetLocalSize(A,&nlrows,&nlcols));
299   PetscCall(MatGetOwnershipRange(A,&rstart,&rend));
300 
301   /* Check submatrix call */
302   if (scall == MAT_REUSE_MATRIX) {
303     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
304     /* Really need to test rows and column sizes! */
305     newmat = *B;
306   } else {
307     /* Create and fill new matrix */
308     PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&newmat));
309     PetscCall(MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols));
310     PetscCall(MatSetType(newmat,((PetscObject)A)->type_name));
311     PetscCall(MatMPIDenseSetPreallocation(newmat,NULL));
312   }
313 
314   /* Now extract the data pointers and do the copy, column at a time */
315   newmatd = (Mat_MPIDense*)newmat->data;
316   PetscCall(MatDenseGetArray(newmatd->A,&bv));
317   PetscCall(MatDenseGetArrayRead(mat->A,&v));
318   PetscCall(MatDenseGetLDA(mat->A,&lda));
319   for (i=0; i<Ncols; i++) {
320     const PetscScalar *av = v + lda*icol[i];
321     for (j=0; j<nrows; j++) {
322       *bv++ = av[irow[j] - rstart];
323     }
324   }
325   PetscCall(MatDenseRestoreArrayRead(mat->A,&v));
326   PetscCall(MatDenseRestoreArray(newmatd->A,&bv));
327 
328   /* Assemble the matrices so that the correct flags are set */
329   PetscCall(MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY));
330   PetscCall(MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY));
331 
332   /* Free work space */
333   PetscCall(ISRestoreIndices(isrow,&irow));
334   PetscCall(ISRestoreIndices(iscol_local,&icol));
335   PetscCall(ISDestroy(&iscol_local));
336   *B   = newmat;
337   PetscFunctionReturn(0);
338 }
339 
340 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar **array)
341 {
342   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
343 
344   PetscFunctionBegin;
345   PetscCall(MatDenseRestoreArray(a->A,array));
346   PetscFunctionReturn(0);
347 }
348 
349 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar **array)
350 {
351   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
352 
353   PetscFunctionBegin;
354   PetscCall(MatDenseRestoreArrayRead(a->A,array));
355   PetscFunctionReturn(0);
356 }
357 
358 PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A,PetscScalar **array)
359 {
360   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
361 
362   PetscFunctionBegin;
363   PetscCall(MatDenseRestoreArrayWrite(a->A,array));
364   PetscFunctionReturn(0);
365 }
366 
367 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
368 {
369   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
370   PetscInt       nstash,reallocs;
371 
372   PetscFunctionBegin;
373   if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
374 
375   PetscCall(MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range));
376   PetscCall(MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs));
377   PetscCall(PetscInfo(mdn->A,"Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs));
378   PetscFunctionReturn(0);
379 }
380 
381 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
382 {
383   Mat_MPIDense   *mdn=(Mat_MPIDense*)mat->data;
384   PetscInt       i,*row,*col,flg,j,rstart,ncols;
385   PetscMPIInt    n;
386   PetscScalar    *val;
387 
388   PetscFunctionBegin;
389   if (!mdn->donotstash && !mat->nooffprocentries) {
390     /*  wait on receives */
391     while (1) {
392       PetscCall(MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg));
393       if (!flg) break;
394 
395       for (i=0; i<n;) {
396         /* Now identify the consecutive vals belonging to the same row */
397         for (j=i,rstart=row[j]; j<n; j++) {
398           if (row[j] != rstart) break;
399         }
400         if (j < n) ncols = j-i;
401         else       ncols = n-i;
402         /* Now assemble all these values with a single function call */
403         PetscCall(MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode));
404         i    = j;
405       }
406     }
407     PetscCall(MatStashScatterEnd_Private(&mat->stash));
408   }
409 
410   PetscCall(MatAssemblyBegin(mdn->A,mode));
411   PetscCall(MatAssemblyEnd(mdn->A,mode));
412   PetscFunctionReturn(0);
413 }
414 
415 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
416 {
417   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
418 
419   PetscFunctionBegin;
420   PetscCall(MatZeroEntries(l->A));
421   PetscFunctionReturn(0);
422 }
423 
424 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt n,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
425 {
426   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
427   PetscInt          i,len,*lrows;
428 
429   PetscFunctionBegin;
430   /* get locally owned rows */
431   PetscCall(PetscLayoutMapLocal(A->rmap,n,rows,&len,&lrows,NULL));
432   /* fix right hand side if needed */
433   if (x && b) {
434     const PetscScalar *xx;
435     PetscScalar       *bb;
436 
437     PetscCall(VecGetArrayRead(x, &xx));
438     PetscCall(VecGetArrayWrite(b, &bb));
439     for (i=0;i<len;++i) bb[lrows[i]] = diag*xx[lrows[i]];
440     PetscCall(VecRestoreArrayRead(x, &xx));
441     PetscCall(VecRestoreArrayWrite(b, &bb));
442   }
443   PetscCall(MatZeroRows(l->A,len,lrows,0.0,NULL,NULL));
444   if (diag != 0.0) {
445     Vec d;
446 
447     PetscCall(MatCreateVecs(A,NULL,&d));
448     PetscCall(VecSet(d,diag));
449     PetscCall(MatDiagonalSet(A,d,INSERT_VALUES));
450     PetscCall(VecDestroy(&d));
451   }
452   PetscCall(PetscFree(lrows));
453   PetscFunctionReturn(0);
454 }
455 
456 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec);
457 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec);
458 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec);
459 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec);
460 
461 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
462 {
463   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
464   const PetscScalar *ax;
465   PetscScalar       *ay;
466   PetscMemType      axmtype,aymtype;
467 
468   PetscFunctionBegin;
469   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
470   PetscCall(VecGetArrayReadAndMemType(xx,&ax,&axmtype));
471   PetscCall(VecGetArrayAndMemType(mdn->lvec,&ay,&aymtype));
472   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPI_REPLACE));
473   PetscCall(PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay,MPI_REPLACE));
474   PetscCall(VecRestoreArrayAndMemType(mdn->lvec,&ay));
475   PetscCall(VecRestoreArrayReadAndMemType(xx,&ax));
476   PetscCall((*mdn->A->ops->mult)(mdn->A,mdn->lvec,yy));
477   PetscFunctionReturn(0);
478 }
479 
480 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
481 {
482   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
483   const PetscScalar *ax;
484   PetscScalar       *ay;
485   PetscMemType      axmtype,aymtype;
486 
487   PetscFunctionBegin;
488   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
489   PetscCall(VecGetArrayReadAndMemType(xx,&ax,&axmtype));
490   PetscCall(VecGetArrayAndMemType(mdn->lvec,&ay,&aymtype));
491   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPI_REPLACE));
492   PetscCall(PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay,MPI_REPLACE));
493   PetscCall(VecRestoreArrayAndMemType(mdn->lvec,&ay));
494   PetscCall(VecRestoreArrayReadAndMemType(xx,&ax));
495   PetscCall((*mdn->A->ops->multadd)(mdn->A,mdn->lvec,yy,zz));
496   PetscFunctionReturn(0);
497 }
498 
499 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
500 {
501   Mat_MPIDense      *a = (Mat_MPIDense*)A->data;
502   const PetscScalar *ax;
503   PetscScalar       *ay;
504   PetscMemType      axmtype,aymtype;
505 
506   PetscFunctionBegin;
507   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
508   PetscCall(VecSet(yy,0.0));
509   PetscCall((*a->A->ops->multtranspose)(a->A,xx,a->lvec));
510   PetscCall(VecGetArrayReadAndMemType(a->lvec,&ax,&axmtype));
511   PetscCall(VecGetArrayAndMemType(yy,&ay,&aymtype));
512   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPIU_SUM));
513   PetscCall(PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM));
514   PetscCall(VecRestoreArrayReadAndMemType(a->lvec,&ax));
515   PetscCall(VecRestoreArrayAndMemType(yy,&ay));
516   PetscFunctionReturn(0);
517 }
518 
519 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
520 {
521   Mat_MPIDense      *a = (Mat_MPIDense*)A->data;
522   const PetscScalar *ax;
523   PetscScalar       *ay;
524   PetscMemType      axmtype,aymtype;
525 
526   PetscFunctionBegin;
527   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
528   PetscCall(VecCopy(yy,zz));
529   PetscCall((*a->A->ops->multtranspose)(a->A,xx,a->lvec));
530   PetscCall(VecGetArrayReadAndMemType(a->lvec,&ax,&axmtype));
531   PetscCall(VecGetArrayAndMemType(zz,&ay,&aymtype));
532   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPIU_SUM));
533   PetscCall(PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM));
534   PetscCall(VecRestoreArrayReadAndMemType(a->lvec,&ax));
535   PetscCall(VecRestoreArrayAndMemType(zz,&ay));
536   PetscFunctionReturn(0);
537 }
538 
539 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
540 {
541   Mat_MPIDense      *a    = (Mat_MPIDense*)A->data;
542   PetscInt          lda,len,i,n,m = A->rmap->n,radd;
543   PetscScalar       *x,zero = 0.0;
544   const PetscScalar *av;
545 
546   PetscFunctionBegin;
547   PetscCall(VecSet(v,zero));
548   PetscCall(VecGetArray(v,&x));
549   PetscCall(VecGetSize(v,&n));
550   PetscCheck(n == A->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
551   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
552   radd = A->rmap->rstart*m;
553   PetscCall(MatDenseGetArrayRead(a->A,&av));
554   PetscCall(MatDenseGetLDA(a->A,&lda));
555   for (i=0; i<len; i++) {
556     x[i] = av[radd + i*lda + i];
557   }
558   PetscCall(MatDenseRestoreArrayRead(a->A,&av));
559   PetscCall(VecRestoreArray(v,&x));
560   PetscFunctionReturn(0);
561 }
562 
563 PetscErrorCode MatDestroy_MPIDense(Mat mat)
564 {
565   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
566 
567   PetscFunctionBegin;
568 #if defined(PETSC_USE_LOG)
569   PetscLogObjectState((PetscObject)mat,"Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT,mat->rmap->N,mat->cmap->N);
570 #endif
571   PetscCall(MatStashDestroy_Private(&mat->stash));
572   PetscCheck(!mdn->vecinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
573   PetscCheck(!mdn->matinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
574   PetscCall(MatDestroy(&mdn->A));
575   PetscCall(VecDestroy(&mdn->lvec));
576   PetscCall(PetscSFDestroy(&mdn->Mvctx));
577   PetscCall(VecDestroy(&mdn->cvec));
578   PetscCall(MatDestroy(&mdn->cmat));
579 
580   PetscCall(PetscFree(mat->data));
581   PetscCall(PetscObjectChangeTypeName((PetscObject)mat,NULL));
582 
583   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL));
584   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",NULL));
585   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL));
586   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL));
587   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL));
588   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL));
589   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",NULL));
590   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",NULL));
591   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL));
592   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL));
593   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",NULL));
594   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",NULL));
595   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpiaij_C",NULL));
596 #if defined(PETSC_HAVE_ELEMENTAL)
597   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL));
598 #endif
599 #if defined(PETSC_HAVE_SCALAPACK)
600   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",NULL));
601 #endif
602   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL));
603   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",NULL));
604   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",NULL));
605 #if defined (PETSC_HAVE_CUDA)
606   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",NULL));
607   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",NULL));
608   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpidensecuda_C",NULL));
609   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidensecuda_mpidense_C",NULL));
610   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",NULL));
611   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",NULL));
612   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",NULL));
613   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",NULL));
614   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseCUDAGetArray_C",NULL));
615   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseCUDAGetArrayRead_C",NULL));
616   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseCUDAGetArrayWrite_C",NULL));
617   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseCUDARestoreArray_C",NULL));
618   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseCUDARestoreArrayRead_C",NULL));
619   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseCUDARestoreArrayWrite_C",NULL));
620   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseCUDAPlaceArray_C",NULL));
621   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseCUDAResetArray_C",NULL));
622   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseCUDAReplaceArray_C",NULL));
623 #endif
624   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL));
625   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL));
626   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",NULL));
627   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",NULL));
628   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",NULL));
629   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",NULL));
630   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",NULL));
631   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",NULL));
632   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",NULL));
633   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",NULL));
634 
635   PetscCall(PetscObjectCompose((PetscObject)mat,"DiagonalBlock",NULL));
636   PetscFunctionReturn(0);
637 }
638 
639 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat,PetscViewer);
640 
641 #include <petscdraw.h>
642 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
643 {
644   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
645   PetscMPIInt       rank;
646   PetscViewerType   vtype;
647   PetscBool         iascii,isdraw;
648   PetscViewer       sviewer;
649   PetscViewerFormat format;
650 
651   PetscFunctionBegin;
652   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank));
653   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
654   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw));
655   if (iascii) {
656     PetscCall(PetscViewerGetType(viewer,&vtype));
657     PetscCall(PetscViewerGetFormat(viewer,&format));
658     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
659       MatInfo info;
660       PetscCall(MatGetInfo(mat,MAT_LOCAL,&info));
661       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
662       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory));
663       PetscCall(PetscViewerFlush(viewer));
664       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
665       if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx,viewer));
666       PetscFunctionReturn(0);
667     } else if (format == PETSC_VIEWER_ASCII_INFO) {
668       PetscFunctionReturn(0);
669     }
670   } else if (isdraw) {
671     PetscDraw draw;
672     PetscBool isnull;
673 
674     PetscCall(PetscViewerDrawGetDraw(viewer,0,&draw));
675     PetscCall(PetscDrawIsNull(draw,&isnull));
676     if (isnull) PetscFunctionReturn(0);
677   }
678 
679   {
680     /* assemble the entire matrix onto first processor. */
681     Mat         A;
682     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
683     PetscInt    *cols;
684     PetscScalar *vals;
685 
686     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat),&A));
687     if (rank == 0) {
688       PetscCall(MatSetSizes(A,M,N,M,N));
689     } else {
690       PetscCall(MatSetSizes(A,0,0,M,N));
691     }
692     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
693     PetscCall(MatSetType(A,MATMPIDENSE));
694     PetscCall(MatMPIDenseSetPreallocation(A,NULL));
695     PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)A));
696 
697     /* Copy the matrix ... This isn't the most efficient means,
698        but it's quick for now */
699     A->insertmode = INSERT_VALUES;
700 
701     row = mat->rmap->rstart;
702     m   = mdn->A->rmap->n;
703     for (i=0; i<m; i++) {
704       PetscCall(MatGetRow_MPIDense(mat,row,&nz,&cols,&vals));
705       PetscCall(MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES));
706       PetscCall(MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals));
707       row++;
708     }
709 
710     PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY));
711     PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY));
712     PetscCall(PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer));
713     if (rank == 0) {
714       PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name));
715       PetscCall(MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer));
716     }
717     PetscCall(PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer));
718     PetscCall(PetscViewerFlush(viewer));
719     PetscCall(MatDestroy(&A));
720   }
721   PetscFunctionReturn(0);
722 }
723 
724 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
725 {
726   PetscBool      iascii,isbinary,isdraw,issocket;
727 
728   PetscFunctionBegin;
729   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
730   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
731   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket));
732   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw));
733 
734   if (iascii || issocket || isdraw) {
735     PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat,viewer));
736   } else if (isbinary) PetscCall(MatView_Dense_Binary(mat,viewer));
737   PetscFunctionReturn(0);
738 }
739 
740 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
741 {
742   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
743   Mat            mdn  = mat->A;
744   PetscLogDouble isend[5],irecv[5];
745 
746   PetscFunctionBegin;
747   info->block_size = 1.0;
748 
749   PetscCall(MatGetInfo(mdn,MAT_LOCAL,info));
750 
751   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
752   isend[3] = info->memory;  isend[4] = info->mallocs;
753   if (flag == MAT_LOCAL) {
754     info->nz_used      = isend[0];
755     info->nz_allocated = isend[1];
756     info->nz_unneeded  = isend[2];
757     info->memory       = isend[3];
758     info->mallocs      = isend[4];
759   } else if (flag == MAT_GLOBAL_MAX) {
760     PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)A)));
761 
762     info->nz_used      = irecv[0];
763     info->nz_allocated = irecv[1];
764     info->nz_unneeded  = irecv[2];
765     info->memory       = irecv[3];
766     info->mallocs      = irecv[4];
767   } else if (flag == MAT_GLOBAL_SUM) {
768     PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)A)));
769 
770     info->nz_used      = irecv[0];
771     info->nz_allocated = irecv[1];
772     info->nz_unneeded  = irecv[2];
773     info->memory       = irecv[3];
774     info->mallocs      = irecv[4];
775   }
776   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
777   info->fill_ratio_needed = 0;
778   info->factor_mallocs    = 0;
779   PetscFunctionReturn(0);
780 }
781 
782 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
783 {
784   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
785 
786   PetscFunctionBegin;
787   switch (op) {
788   case MAT_NEW_NONZERO_LOCATIONS:
789   case MAT_NEW_NONZERO_LOCATION_ERR:
790   case MAT_NEW_NONZERO_ALLOCATION_ERR:
791     MatCheckPreallocated(A,1);
792     PetscCall(MatSetOption(a->A,op,flg));
793     break;
794   case MAT_ROW_ORIENTED:
795     MatCheckPreallocated(A,1);
796     a->roworiented = flg;
797     PetscCall(MatSetOption(a->A,op,flg));
798     break;
799   case MAT_FORCE_DIAGONAL_ENTRIES:
800   case MAT_KEEP_NONZERO_PATTERN:
801   case MAT_USE_HASH_TABLE:
802   case MAT_SORTED_FULL:
803     PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op]));
804     break;
805   case MAT_IGNORE_OFF_PROC_ENTRIES:
806     a->donotstash = flg;
807     break;
808   case MAT_SYMMETRIC:
809   case MAT_STRUCTURALLY_SYMMETRIC:
810   case MAT_HERMITIAN:
811   case MAT_SYMMETRY_ETERNAL:
812   case MAT_IGNORE_LOWER_TRIANGULAR:
813   case MAT_IGNORE_ZERO_ENTRIES:
814     PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op]));
815     break;
816   default:
817     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
818   }
819   PetscFunctionReturn(0);
820 }
821 
822 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
823 {
824   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
825   const PetscScalar *l;
826   PetscScalar       x,*v,*vv,*r;
827   PetscInt          i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n,lda;
828 
829   PetscFunctionBegin;
830   PetscCall(MatDenseGetArray(mdn->A,&vv));
831   PetscCall(MatDenseGetLDA(mdn->A,&lda));
832   PetscCall(MatGetLocalSize(A,&s2,&s3));
833   if (ll) {
834     PetscCall(VecGetLocalSize(ll,&s2a));
835     PetscCheck(s2a == s2,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2);
836     PetscCall(VecGetArrayRead(ll,&l));
837     for (i=0; i<m; i++) {
838       x = l[i];
839       v = vv + i;
840       for (j=0; j<n; j++) { (*v) *= x; v+= lda;}
841     }
842     PetscCall(VecRestoreArrayRead(ll,&l));
843     PetscCall(PetscLogFlops(1.0*n*m));
844   }
845   if (rr) {
846     const PetscScalar *ar;
847 
848     PetscCall(VecGetLocalSize(rr,&s3a));
849     PetscCheck(s3a == s3,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3);
850     PetscCall(VecGetArrayRead(rr,&ar));
851     if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
852     PetscCall(VecGetArray(mdn->lvec,&r));
853     PetscCall(PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ar,r,MPI_REPLACE));
854     PetscCall(PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ar,r,MPI_REPLACE));
855     PetscCall(VecRestoreArrayRead(rr,&ar));
856     for (i=0; i<n; i++) {
857       x = r[i];
858       v = vv + i*lda;
859       for (j=0; j<m; j++) (*v++) *= x;
860     }
861     PetscCall(VecRestoreArray(mdn->lvec,&r));
862     PetscCall(PetscLogFlops(1.0*n*m));
863   }
864   PetscCall(MatDenseRestoreArray(mdn->A,&vv));
865   PetscFunctionReturn(0);
866 }
867 
868 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
869 {
870   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
871   PetscInt          i,j;
872   PetscMPIInt       size;
873   PetscReal         sum = 0.0;
874   const PetscScalar *av,*v;
875 
876   PetscFunctionBegin;
877   PetscCall(MatDenseGetArrayRead(mdn->A,&av));
878   v    = av;
879   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
880   if (size == 1) {
881     PetscCall(MatNorm(mdn->A,type,nrm));
882   } else {
883     if (type == NORM_FROBENIUS) {
884       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
885         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
886       }
887       PetscCall(MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A)));
888       *nrm = PetscSqrtReal(*nrm);
889       PetscCall(PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n));
890     } else if (type == NORM_1) {
891       PetscReal *tmp,*tmp2;
892       PetscCall(PetscCalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2));
893       *nrm = 0.0;
894       v    = av;
895       for (j=0; j<mdn->A->cmap->n; j++) {
896         for (i=0; i<mdn->A->rmap->n; i++) {
897           tmp[j] += PetscAbsScalar(*v);  v++;
898         }
899       }
900       PetscCall(MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A)));
901       for (j=0; j<A->cmap->N; j++) {
902         if (tmp2[j] > *nrm) *nrm = tmp2[j];
903       }
904       PetscCall(PetscFree2(tmp,tmp2));
905       PetscCall(PetscLogFlops(A->cmap->n*A->rmap->n));
906     } else if (type == NORM_INFINITY) { /* max row norm */
907       PetscReal ntemp;
908       PetscCall(MatNorm(mdn->A,type,&ntemp));
909       PetscCall(MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A)));
910     } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
911   }
912   PetscCall(MatDenseRestoreArrayRead(mdn->A,&av));
913   PetscFunctionReturn(0);
914 }
915 
916 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
917 {
918   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
919   Mat            B;
920   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
921   PetscInt       j,i,lda;
922   PetscScalar    *v;
923 
924   PetscFunctionBegin;
925   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
926     PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B));
927     PetscCall(MatSetSizes(B,A->cmap->n,A->rmap->n,N,M));
928     PetscCall(MatSetType(B,((PetscObject)A)->type_name));
929     PetscCall(MatMPIDenseSetPreallocation(B,NULL));
930   } else B = *matout;
931 
932   m    = a->A->rmap->n; n = a->A->cmap->n;
933   PetscCall(MatDenseGetArrayRead(a->A,(const PetscScalar**)&v));
934   PetscCall(MatDenseGetLDA(a->A,&lda));
935   PetscCall(PetscMalloc1(m,&rwork));
936   for (i=0; i<m; i++) rwork[i] = rstart + i;
937   for (j=0; j<n; j++) {
938     PetscCall(MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES));
939     v   += lda;
940   }
941   PetscCall(MatDenseRestoreArrayRead(a->A,(const PetscScalar**)&v));
942   PetscCall(PetscFree(rwork));
943   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
944   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
945   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
946     *matout = B;
947   } else {
948     PetscCall(MatHeaderMerge(A,&B));
949   }
950   PetscFunctionReturn(0);
951 }
952 
953 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
954 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
955 
956 PetscErrorCode MatSetUp_MPIDense(Mat A)
957 {
958   PetscFunctionBegin;
959   PetscCall(PetscLayoutSetUp(A->rmap));
960   PetscCall(PetscLayoutSetUp(A->cmap));
961   if (!A->preallocated) {
962     PetscCall(MatMPIDenseSetPreallocation(A,NULL));
963   }
964   PetscFunctionReturn(0);
965 }
966 
967 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
968 {
969   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
970 
971   PetscFunctionBegin;
972   PetscCall(MatAXPY(A->A,alpha,B->A,str));
973   PetscFunctionReturn(0);
974 }
975 
976 PetscErrorCode MatConjugate_MPIDense(Mat mat)
977 {
978   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
979 
980   PetscFunctionBegin;
981   PetscCall(MatConjugate(a->A));
982   PetscFunctionReturn(0);
983 }
984 
985 PetscErrorCode MatRealPart_MPIDense(Mat A)
986 {
987   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
988 
989   PetscFunctionBegin;
990   PetscCall(MatRealPart(a->A));
991   PetscFunctionReturn(0);
992 }
993 
994 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
995 {
996   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
997 
998   PetscFunctionBegin;
999   PetscCall(MatImaginaryPart(a->A));
1000   PetscFunctionReturn(0);
1001 }
1002 
1003 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col)
1004 {
1005   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
1006 
1007   PetscFunctionBegin;
1008   PetscCheck(a->A,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing local matrix");
1009   PetscCheck(a->A->ops->getcolumnvector,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing get column operation");
1010   PetscCall((*a->A->ops->getcolumnvector)(a->A,v,col));
1011   PetscFunctionReturn(0);
1012 }
1013 
1014 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat,PetscInt,PetscReal*);
1015 
1016 PetscErrorCode MatGetColumnReductions_MPIDense(Mat A,PetscInt type,PetscReal *reductions)
1017 {
1018   PetscInt       i,m,n;
1019   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
1020   PetscReal      *work;
1021 
1022   PetscFunctionBegin;
1023   PetscCall(MatGetSize(A,&m,&n));
1024   PetscCall(PetscMalloc1(n,&work));
1025   if (type == REDUCTION_MEAN_REALPART) {
1026     PetscCall(MatGetColumnReductions_SeqDense(a->A,(PetscInt)REDUCTION_SUM_REALPART,work));
1027   } else if (type == REDUCTION_MEAN_IMAGINARYPART) {
1028     PetscCall(MatGetColumnReductions_SeqDense(a->A,(PetscInt)REDUCTION_SUM_IMAGINARYPART,work));
1029   } else {
1030     PetscCall(MatGetColumnReductions_SeqDense(a->A,type,work));
1031   }
1032   if (type == NORM_2) {
1033     for (i=0; i<n; i++) work[i] *= work[i];
1034   }
1035   if (type == NORM_INFINITY) {
1036     PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_MAX,A->hdr.comm));
1037   } else {
1038     PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_SUM,A->hdr.comm));
1039   }
1040   PetscCall(PetscFree(work));
1041   if (type == NORM_2) {
1042     for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
1043   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
1044     for (i=0; i<n; i++) reductions[i] /= m;
1045   }
1046   PetscFunctionReturn(0);
1047 }
1048 
1049 #if defined(PETSC_HAVE_CUDA)
1050 PetscErrorCode MatShift_MPIDenseCUDA(Mat A,PetscScalar alpha)
1051 {
1052   PetscScalar *da;
1053   PetscInt    lda;
1054 
1055   PetscFunctionBegin;
1056   PetscCall(MatDenseCUDAGetArray(A,&da));
1057   PetscCall(MatDenseGetLDA(A,&lda));
1058   PetscCall(PetscInfo(A,"Performing Shift on backend\n"));
1059   PetscCall(MatShift_DenseCUDA_Private(da,alpha,lda,A->rmap->rstart,A->rmap->rend,A->cmap->N));
1060   PetscCall(MatDenseCUDARestoreArray(A,&da));
1061   PetscFunctionReturn(0);
1062 }
1063 
1064 static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1065 {
1066   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1067   PetscInt       lda;
1068 
1069   PetscFunctionBegin;
1070   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1071   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1072   if (!a->cvec) {
1073     PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec));
1074     PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec));
1075   }
1076   a->vecinuse = col + 1;
1077   PetscCall(MatDenseGetLDA(a->A,&lda));
1078   PetscCall(MatDenseCUDAGetArray(a->A,(PetscScalar**)&a->ptrinuse));
1079   PetscCall(VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda));
1080   *v   = a->cvec;
1081   PetscFunctionReturn(0);
1082 }
1083 
1084 static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1085 {
1086   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1087 
1088   PetscFunctionBegin;
1089   PetscCheck(a->vecinuse,PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1090   PetscCheck(a->cvec,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1091   a->vecinuse = 0;
1092   PetscCall(MatDenseCUDARestoreArray(a->A,(PetscScalar**)&a->ptrinuse));
1093   PetscCall(VecCUDAResetArray(a->cvec));
1094   *v   = NULL;
1095   PetscFunctionReturn(0);
1096 }
1097 
1098 static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1099 {
1100   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1101   PetscInt       lda;
1102 
1103   PetscFunctionBegin;
1104   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1105   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1106   if (!a->cvec) {
1107     PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec));
1108     PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec));
1109   }
1110   a->vecinuse = col + 1;
1111   PetscCall(MatDenseGetLDA(a->A,&lda));
1112   PetscCall(MatDenseCUDAGetArrayRead(a->A,&a->ptrinuse));
1113   PetscCall(VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda));
1114   PetscCall(VecLockReadPush(a->cvec));
1115   *v   = a->cvec;
1116   PetscFunctionReturn(0);
1117 }
1118 
1119 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1120 {
1121   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1122 
1123   PetscFunctionBegin;
1124   PetscCheck(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1125   PetscCheck(a->cvec,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1126   a->vecinuse = 0;
1127   PetscCall(MatDenseCUDARestoreArrayRead(a->A,&a->ptrinuse));
1128   PetscCall(VecLockReadPop(a->cvec));
1129   PetscCall(VecCUDAResetArray(a->cvec));
1130   *v   = NULL;
1131   PetscFunctionReturn(0);
1132 }
1133 
1134 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1135 {
1136   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1137   PetscInt       lda;
1138 
1139   PetscFunctionBegin;
1140   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1141   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1142   if (!a->cvec) {
1143     PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec));
1144     PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec));
1145   }
1146   a->vecinuse = col + 1;
1147   PetscCall(MatDenseGetLDA(a->A,&lda));
1148   PetscCall(MatDenseCUDAGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse));
1149   PetscCall(VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda));
1150   *v   = a->cvec;
1151   PetscFunctionReturn(0);
1152 }
1153 
1154 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1155 {
1156   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1157 
1158   PetscFunctionBegin;
1159   PetscCheck(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1160   PetscCheck(a->cvec,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1161   a->vecinuse = 0;
1162   PetscCall(MatDenseCUDARestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse));
1163   PetscCall(VecCUDAResetArray(a->cvec));
1164   *v   = NULL;
1165   PetscFunctionReturn(0);
1166 }
1167 
1168 static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1169 {
1170   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1171 
1172   PetscFunctionBegin;
1173   PetscCheck(!l->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1174   PetscCheck(!l->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1175   PetscCall(MatDenseCUDAPlaceArray(l->A,a));
1176   PetscFunctionReturn(0);
1177 }
1178 
1179 static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A)
1180 {
1181   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1182 
1183   PetscFunctionBegin;
1184   PetscCheck(!l->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1185   PetscCheck(!l->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1186   PetscCall(MatDenseCUDAResetArray(l->A));
1187   PetscFunctionReturn(0);
1188 }
1189 
1190 static PetscErrorCode MatDenseCUDAReplaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1191 {
1192   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1193 
1194   PetscFunctionBegin;
1195   PetscCheck(!l->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1196   PetscCheck(!l->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1197   PetscCall(MatDenseCUDAReplaceArray(l->A,a));
1198   PetscFunctionReturn(0);
1199 }
1200 
1201 static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1202 {
1203   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1204 
1205   PetscFunctionBegin;
1206   PetscCall(MatDenseCUDAGetArrayWrite(l->A,a));
1207   PetscFunctionReturn(0);
1208 }
1209 
1210 static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1211 {
1212   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1213 
1214   PetscFunctionBegin;
1215   PetscCall(MatDenseCUDARestoreArrayWrite(l->A,a));
1216   PetscFunctionReturn(0);
1217 }
1218 
1219 static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1220 {
1221   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1222 
1223   PetscFunctionBegin;
1224   PetscCall(MatDenseCUDAGetArrayRead(l->A,a));
1225   PetscFunctionReturn(0);
1226 }
1227 
1228 static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1229 {
1230   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1231 
1232   PetscFunctionBegin;
1233   PetscCall(MatDenseCUDARestoreArrayRead(l->A,a));
1234   PetscFunctionReturn(0);
1235 }
1236 
1237 static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1238 {
1239   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1240 
1241   PetscFunctionBegin;
1242   PetscCall(MatDenseCUDAGetArray(l->A,a));
1243   PetscFunctionReturn(0);
1244 }
1245 
1246 static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1247 {
1248   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1249 
1250   PetscFunctionBegin;
1251   PetscCall(MatDenseCUDARestoreArray(l->A,a));
1252   PetscFunctionReturn(0);
1253 }
1254 
1255 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat,PetscInt,Vec*);
1256 static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat,PetscInt,Vec*);
1257 static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat,PetscInt,Vec*);
1258 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat,PetscInt,Vec*);
1259 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat,PetscInt,Vec*);
1260 static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat,PetscInt,Vec*);
1261 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat,Mat*);
1262 
1263 static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat,PetscBool bind)
1264 {
1265   Mat_MPIDense   *d = (Mat_MPIDense*)mat->data;
1266 
1267   PetscFunctionBegin;
1268   PetscCheck(!d->vecinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1269   PetscCheck(!d->matinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1270   if (d->A) PetscCall(MatBindToCPU(d->A,bind));
1271   mat->boundtocpu = bind;
1272   if (!bind) {
1273     PetscBool iscuda;
1274 
1275     PetscCall(PetscObjectTypeCompare((PetscObject)d->cvec,VECMPICUDA,&iscuda));
1276     if (!iscuda) {
1277       PetscCall(VecDestroy(&d->cvec));
1278     }
1279     PetscCall(PetscObjectTypeCompare((PetscObject)d->cmat,MATMPIDENSECUDA,&iscuda));
1280     if (!iscuda) {
1281       PetscCall(MatDestroy(&d->cmat));
1282     }
1283     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDenseCUDA));
1284     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDenseCUDA));
1285     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDenseCUDA));
1286     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDenseCUDA));
1287     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDenseCUDA));
1288     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDenseCUDA));
1289     mat->ops->shift                   = MatShift_MPIDenseCUDA;
1290   } else {
1291     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense));
1292     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense));
1293     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense));
1294     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense));
1295     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense));
1296     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense));
1297     mat->ops->shift                   = MatShift_MPIDense;
1298   }
1299   if (d->cmat) PetscCall(MatBindToCPU(d->cmat,bind));
1300   PetscFunctionReturn(0);
1301 }
1302 
1303 PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data)
1304 {
1305   Mat_MPIDense   *d = (Mat_MPIDense*)A->data;
1306   PetscBool      iscuda;
1307 
1308   PetscFunctionBegin;
1309   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1310   PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda));
1311   if (!iscuda) PetscFunctionReturn(0);
1312   PetscCall(PetscLayoutSetUp(A->rmap));
1313   PetscCall(PetscLayoutSetUp(A->cmap));
1314   if (!d->A) {
1315     PetscCall(MatCreate(PETSC_COMM_SELF,&d->A));
1316     PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)d->A));
1317     PetscCall(MatSetSizes(d->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N));
1318   }
1319   PetscCall(MatSetType(d->A,MATSEQDENSECUDA));
1320   PetscCall(MatSeqDenseCUDASetPreallocation(d->A,d_data));
1321   A->preallocated = PETSC_TRUE;
1322   A->assembled = PETSC_TRUE;
1323   PetscFunctionReturn(0);
1324 }
1325 #endif
1326 
1327 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1328 {
1329   Mat_MPIDense   *d = (Mat_MPIDense*)x->data;
1330 
1331   PetscFunctionBegin;
1332   PetscCall(MatSetRandom(d->A,rctx));
1333   PetscFunctionReturn(0);
1334 }
1335 
1336 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool  *missing,PetscInt *d)
1337 {
1338   PetscFunctionBegin;
1339   *missing = PETSC_FALSE;
1340   PetscFunctionReturn(0);
1341 }
1342 
1343 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat);
1344 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);
1345 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat);
1346 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);
1347 static PetscErrorCode MatEqual_MPIDense(Mat,Mat,PetscBool*);
1348 static PetscErrorCode MatLoad_MPIDense(Mat,PetscViewer);
1349 
1350 /* -------------------------------------------------------------------*/
1351 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1352                                         MatGetRow_MPIDense,
1353                                         MatRestoreRow_MPIDense,
1354                                         MatMult_MPIDense,
1355                                 /*  4*/ MatMultAdd_MPIDense,
1356                                         MatMultTranspose_MPIDense,
1357                                         MatMultTransposeAdd_MPIDense,
1358                                         NULL,
1359                                         NULL,
1360                                         NULL,
1361                                 /* 10*/ NULL,
1362                                         NULL,
1363                                         NULL,
1364                                         NULL,
1365                                         MatTranspose_MPIDense,
1366                                 /* 15*/ MatGetInfo_MPIDense,
1367                                         MatEqual_MPIDense,
1368                                         MatGetDiagonal_MPIDense,
1369                                         MatDiagonalScale_MPIDense,
1370                                         MatNorm_MPIDense,
1371                                 /* 20*/ MatAssemblyBegin_MPIDense,
1372                                         MatAssemblyEnd_MPIDense,
1373                                         MatSetOption_MPIDense,
1374                                         MatZeroEntries_MPIDense,
1375                                 /* 24*/ MatZeroRows_MPIDense,
1376                                         NULL,
1377                                         NULL,
1378                                         NULL,
1379                                         NULL,
1380                                 /* 29*/ MatSetUp_MPIDense,
1381                                         NULL,
1382                                         NULL,
1383                                         MatGetDiagonalBlock_MPIDense,
1384                                         NULL,
1385                                 /* 34*/ MatDuplicate_MPIDense,
1386                                         NULL,
1387                                         NULL,
1388                                         NULL,
1389                                         NULL,
1390                                 /* 39*/ MatAXPY_MPIDense,
1391                                         MatCreateSubMatrices_MPIDense,
1392                                         NULL,
1393                                         MatGetValues_MPIDense,
1394                                         MatCopy_MPIDense,
1395                                 /* 44*/ NULL,
1396                                         MatScale_MPIDense,
1397                                         MatShift_MPIDense,
1398                                         NULL,
1399                                         NULL,
1400                                 /* 49*/ MatSetRandom_MPIDense,
1401                                         NULL,
1402                                         NULL,
1403                                         NULL,
1404                                         NULL,
1405                                 /* 54*/ NULL,
1406                                         NULL,
1407                                         NULL,
1408                                         NULL,
1409                                         NULL,
1410                                 /* 59*/ MatCreateSubMatrix_MPIDense,
1411                                         MatDestroy_MPIDense,
1412                                         MatView_MPIDense,
1413                                         NULL,
1414                                         NULL,
1415                                 /* 64*/ NULL,
1416                                         NULL,
1417                                         NULL,
1418                                         NULL,
1419                                         NULL,
1420                                 /* 69*/ NULL,
1421                                         NULL,
1422                                         NULL,
1423                                         NULL,
1424                                         NULL,
1425                                 /* 74*/ NULL,
1426                                         NULL,
1427                                         NULL,
1428                                         NULL,
1429                                         NULL,
1430                                 /* 79*/ NULL,
1431                                         NULL,
1432                                         NULL,
1433                                         NULL,
1434                                 /* 83*/ MatLoad_MPIDense,
1435                                         NULL,
1436                                         NULL,
1437                                         NULL,
1438                                         NULL,
1439                                         NULL,
1440                                 /* 89*/ NULL,
1441                                         NULL,
1442                                         NULL,
1443                                         NULL,
1444                                         NULL,
1445                                 /* 94*/ NULL,
1446                                         NULL,
1447                                         MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1448                                         MatMatTransposeMultNumeric_MPIDense_MPIDense,
1449                                         NULL,
1450                                 /* 99*/ MatProductSetFromOptions_MPIDense,
1451                                         NULL,
1452                                         NULL,
1453                                         MatConjugate_MPIDense,
1454                                         NULL,
1455                                 /*104*/ NULL,
1456                                         MatRealPart_MPIDense,
1457                                         MatImaginaryPart_MPIDense,
1458                                         NULL,
1459                                         NULL,
1460                                 /*109*/ NULL,
1461                                         NULL,
1462                                         NULL,
1463                                         MatGetColumnVector_MPIDense,
1464                                         MatMissingDiagonal_MPIDense,
1465                                 /*114*/ NULL,
1466                                         NULL,
1467                                         NULL,
1468                                         NULL,
1469                                         NULL,
1470                                 /*119*/ NULL,
1471                                         NULL,
1472                                         NULL,
1473                                         NULL,
1474                                         NULL,
1475                                 /*124*/ NULL,
1476                                         MatGetColumnReductions_MPIDense,
1477                                         NULL,
1478                                         NULL,
1479                                         NULL,
1480                                 /*129*/ NULL,
1481                                         NULL,
1482                                         MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1483                                         MatTransposeMatMultNumeric_MPIDense_MPIDense,
1484                                         NULL,
1485                                 /*134*/ NULL,
1486                                         NULL,
1487                                         NULL,
1488                                         NULL,
1489                                         NULL,
1490                                 /*139*/ NULL,
1491                                         NULL,
1492                                         NULL,
1493                                         NULL,
1494                                         NULL,
1495                                         MatCreateMPIMatConcatenateSeqMat_MPIDense,
1496                                 /*145*/ NULL,
1497                                         NULL,
1498                                         NULL,
1499                                         NULL,
1500                                         NULL
1501 };
1502 
1503 PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1504 {
1505   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
1506   PetscBool      iscuda;
1507 
1508   PetscFunctionBegin;
1509   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1510   PetscCall(PetscLayoutSetUp(mat->rmap));
1511   PetscCall(PetscLayoutSetUp(mat->cmap));
1512   if (!a->A) {
1513     PetscCall(MatCreate(PETSC_COMM_SELF,&a->A));
1514     PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A));
1515     PetscCall(MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N));
1516   }
1517   PetscCall(PetscObjectTypeCompare((PetscObject)mat,MATMPIDENSECUDA,&iscuda));
1518   PetscCall(MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE));
1519   PetscCall(MatSeqDenseSetPreallocation(a->A,data));
1520   mat->preallocated = PETSC_TRUE;
1521   mat->assembled = PETSC_TRUE;
1522   PetscFunctionReturn(0);
1523 }
1524 
1525 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1526 {
1527   Mat            B,C;
1528 
1529   PetscFunctionBegin;
1530   PetscCall(MatMPIAIJGetLocalMat(A,MAT_INITIAL_MATRIX,&C));
1531   PetscCall(MatConvert_SeqAIJ_SeqDense(C,MATSEQDENSE,MAT_INITIAL_MATRIX,&B));
1532   PetscCall(MatDestroy(&C));
1533   if (reuse == MAT_REUSE_MATRIX) {
1534     C = *newmat;
1535   } else C = NULL;
1536   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A),B,A->cmap->n,!C?MAT_INITIAL_MATRIX:MAT_REUSE_MATRIX,&C));
1537   PetscCall(MatDestroy(&B));
1538   if (reuse == MAT_INPLACE_MATRIX) {
1539     PetscCall(MatHeaderReplace(A,&C));
1540   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1541   PetscFunctionReturn(0);
1542 }
1543 
1544 PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1545 {
1546   Mat            B,C;
1547 
1548   PetscFunctionBegin;
1549   PetscCall(MatDenseGetLocalMatrix(A,&C));
1550   PetscCall(MatConvert_SeqDense_SeqAIJ(C,MATSEQAIJ,MAT_INITIAL_MATRIX,&B));
1551   if (reuse == MAT_REUSE_MATRIX) {
1552     C = *newmat;
1553   } else C = NULL;
1554   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A),B,A->cmap->n,!C?MAT_INITIAL_MATRIX:MAT_REUSE_MATRIX,&C));
1555   PetscCall(MatDestroy(&B));
1556   if (reuse == MAT_INPLACE_MATRIX) {
1557     PetscCall(MatHeaderReplace(A,&C));
1558   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1559   PetscFunctionReturn(0);
1560 }
1561 
1562 #if defined(PETSC_HAVE_ELEMENTAL)
1563 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1564 {
1565   Mat            mat_elemental;
1566   PetscScalar    *v;
1567   PetscInt       m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols;
1568 
1569   PetscFunctionBegin;
1570   if (reuse == MAT_REUSE_MATRIX) {
1571     mat_elemental = *newmat;
1572     PetscCall(MatZeroEntries(*newmat));
1573   } else {
1574     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental));
1575     PetscCall(MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N));
1576     PetscCall(MatSetType(mat_elemental,MATELEMENTAL));
1577     PetscCall(MatSetUp(mat_elemental));
1578     PetscCall(MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE));
1579   }
1580 
1581   PetscCall(PetscMalloc2(m,&rows,N,&cols));
1582   for (i=0; i<N; i++) cols[i] = i;
1583   for (i=0; i<m; i++) rows[i] = rstart + i;
1584 
1585   /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1586   PetscCall(MatDenseGetArray(A,&v));
1587   PetscCall(MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES));
1588   PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY));
1589   PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY));
1590   PetscCall(MatDenseRestoreArray(A,&v));
1591   PetscCall(PetscFree2(rows,cols));
1592 
1593   if (reuse == MAT_INPLACE_MATRIX) {
1594     PetscCall(MatHeaderReplace(A,&mat_elemental));
1595   } else {
1596     *newmat = mat_elemental;
1597   }
1598   PetscFunctionReturn(0);
1599 }
1600 #endif
1601 
1602 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals)
1603 {
1604   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1605 
1606   PetscFunctionBegin;
1607   PetscCall(MatDenseGetColumn(mat->A,col,vals));
1608   PetscFunctionReturn(0);
1609 }
1610 
1611 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals)
1612 {
1613   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1614 
1615   PetscFunctionBegin;
1616   PetscCall(MatDenseRestoreColumn(mat->A,vals));
1617   PetscFunctionReturn(0);
1618 }
1619 
1620 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
1621 {
1622   Mat_MPIDense   *mat;
1623   PetscInt       m,nloc,N;
1624 
1625   PetscFunctionBegin;
1626   PetscCall(MatGetSize(inmat,&m,&N));
1627   PetscCall(MatGetLocalSize(inmat,NULL,&nloc));
1628   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1629     PetscInt sum;
1630 
1631     if (n == PETSC_DECIDE) {
1632       PetscCall(PetscSplitOwnership(comm,&n,&N));
1633     }
1634     /* Check sum(n) = N */
1635     PetscCall(MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm));
1636     PetscCheck(sum == N,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT,sum,N);
1637 
1638     PetscCall(MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat));
1639     PetscCall(MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE));
1640   }
1641 
1642   /* numeric phase */
1643   mat = (Mat_MPIDense*)(*outmat)->data;
1644   PetscCall(MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN));
1645   PetscFunctionReturn(0);
1646 }
1647 
1648 #if defined(PETSC_HAVE_CUDA)
1649 PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M,MatType type,MatReuse reuse,Mat *newmat)
1650 {
1651   Mat            B;
1652   Mat_MPIDense   *m;
1653 
1654   PetscFunctionBegin;
1655   if (reuse == MAT_INITIAL_MATRIX) {
1656     PetscCall(MatDuplicate(M,MAT_COPY_VALUES,newmat));
1657   } else if (reuse == MAT_REUSE_MATRIX) {
1658     PetscCall(MatCopy(M,*newmat,SAME_NONZERO_PATTERN));
1659   }
1660 
1661   B    = *newmat;
1662   PetscCall(MatBindToCPU_MPIDenseCUDA(B,PETSC_TRUE));
1663   PetscCall(PetscFree(B->defaultvectype));
1664   PetscCall(PetscStrallocpy(VECSTANDARD,&B->defaultvectype));
1665   PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSE));
1666   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",NULL));
1667   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",NULL));
1668   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",NULL));
1669   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",NULL));
1670   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",NULL));
1671   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",NULL));
1672   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",NULL));
1673   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",NULL));
1674   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",NULL));
1675   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",NULL));
1676   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",NULL));
1677   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",NULL));
1678   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",NULL));
1679   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",NULL));
1680   m    = (Mat_MPIDense*)(B)->data;
1681   if (m->A) {
1682     PetscCall(MatConvert(m->A,MATSEQDENSE,MAT_INPLACE_MATRIX,&m->A));
1683   }
1684   B->ops->bindtocpu = NULL;
1685   B->offloadmask    = PETSC_OFFLOAD_CPU;
1686   PetscFunctionReturn(0);
1687 }
1688 
1689 PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M,MatType type,MatReuse reuse,Mat *newmat)
1690 {
1691   Mat            B;
1692   Mat_MPIDense   *m;
1693 
1694   PetscFunctionBegin;
1695   if (reuse == MAT_INITIAL_MATRIX) {
1696     PetscCall(MatDuplicate(M,MAT_COPY_VALUES,newmat));
1697   } else if (reuse == MAT_REUSE_MATRIX) {
1698     PetscCall(MatCopy(M,*newmat,SAME_NONZERO_PATTERN));
1699   }
1700 
1701   B    = *newmat;
1702   PetscCall(PetscFree(B->defaultvectype));
1703   PetscCall(PetscStrallocpy(VECCUDA,&B->defaultvectype));
1704   PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSECUDA));
1705   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",                    MatConvert_MPIDenseCUDA_MPIDense));
1706   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",        MatProductSetFromOptions_MPIAIJ_MPIDense));
1707   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",MatProductSetFromOptions_MPIAIJ_MPIDense));
1708   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",        MatProductSetFromOptions_MPIDense_MPIAIJ));
1709   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ));
1710   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",                                MatDenseCUDAGetArray_MPIDenseCUDA));
1711   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",                            MatDenseCUDAGetArrayRead_MPIDenseCUDA));
1712   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",                           MatDenseCUDAGetArrayWrite_MPIDenseCUDA));
1713   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",                            MatDenseCUDARestoreArray_MPIDenseCUDA));
1714   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",                        MatDenseCUDARestoreArrayRead_MPIDenseCUDA));
1715   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",                       MatDenseCUDARestoreArrayWrite_MPIDenseCUDA));
1716   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",                              MatDenseCUDAPlaceArray_MPIDenseCUDA));
1717   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",                              MatDenseCUDAResetArray_MPIDenseCUDA));
1718   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",                            MatDenseCUDAReplaceArray_MPIDenseCUDA));
1719   m    = (Mat_MPIDense*)(B->data);
1720   if (m->A) {
1721     PetscCall(MatConvert(m->A,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&m->A));
1722     B->offloadmask = PETSC_OFFLOAD_BOTH;
1723   } else {
1724     B->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
1725   }
1726   PetscCall(MatBindToCPU_MPIDenseCUDA(B,PETSC_FALSE));
1727 
1728   B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA;
1729   PetscFunctionReturn(0);
1730 }
1731 #endif
1732 
1733 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A,PetscInt col,Vec *v)
1734 {
1735   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1736   PetscInt       lda;
1737 
1738   PetscFunctionBegin;
1739   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1740   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1741   if (!a->cvec) {
1742     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec));
1743   }
1744   a->vecinuse = col + 1;
1745   PetscCall(MatDenseGetLDA(a->A,&lda));
1746   PetscCall(MatDenseGetArray(a->A,(PetscScalar**)&a->ptrinuse));
1747   PetscCall(VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda));
1748   *v   = a->cvec;
1749   PetscFunctionReturn(0);
1750 }
1751 
1752 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A,PetscInt col,Vec *v)
1753 {
1754   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1755 
1756   PetscFunctionBegin;
1757   PetscCheck(a->vecinuse,PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1758   PetscCheck(a->cvec,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1759   a->vecinuse = 0;
1760   PetscCall(MatDenseRestoreArray(a->A,(PetscScalar**)&a->ptrinuse));
1761   PetscCall(VecResetArray(a->cvec));
1762   *v   = NULL;
1763   PetscFunctionReturn(0);
1764 }
1765 
1766 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v)
1767 {
1768   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1769   PetscInt       lda;
1770 
1771   PetscFunctionBegin;
1772   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1773   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1774   if (!a->cvec) {
1775     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec));
1776   }
1777   a->vecinuse = col + 1;
1778   PetscCall(MatDenseGetLDA(a->A,&lda));
1779   PetscCall(MatDenseGetArrayRead(a->A,&a->ptrinuse));
1780   PetscCall(VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda));
1781   PetscCall(VecLockReadPush(a->cvec));
1782   *v   = a->cvec;
1783   PetscFunctionReturn(0);
1784 }
1785 
1786 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v)
1787 {
1788   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1789 
1790   PetscFunctionBegin;
1791   PetscCheck(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1792   PetscCheck(a->cvec,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1793   a->vecinuse = 0;
1794   PetscCall(MatDenseRestoreArrayRead(a->A,&a->ptrinuse));
1795   PetscCall(VecLockReadPop(a->cvec));
1796   PetscCall(VecResetArray(a->cvec));
1797   *v   = NULL;
1798   PetscFunctionReturn(0);
1799 }
1800 
1801 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v)
1802 {
1803   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1804   PetscInt       lda;
1805 
1806   PetscFunctionBegin;
1807   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1808   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1809   if (!a->cvec) {
1810     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec));
1811   }
1812   a->vecinuse = col + 1;
1813   PetscCall(MatDenseGetLDA(a->A,&lda));
1814   PetscCall(MatDenseGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse));
1815   PetscCall(VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda));
1816   *v   = a->cvec;
1817   PetscFunctionReturn(0);
1818 }
1819 
1820 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v)
1821 {
1822   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
1823 
1824   PetscFunctionBegin;
1825   PetscCheck(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1826   PetscCheck(a->cvec,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1827   a->vecinuse = 0;
1828   PetscCall(MatDenseRestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse));
1829   PetscCall(VecResetArray(a->cvec));
1830   *v   = NULL;
1831   PetscFunctionReturn(0);
1832 }
1833 
1834 PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A,PetscInt rbegin,PetscInt rend,PetscInt cbegin,PetscInt cend,Mat *v)
1835 {
1836   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
1837   Mat_MPIDense *c;
1838   MPI_Comm     comm;
1839   PetscInt     pbegin, pend;
1840 
1841   PetscFunctionBegin;
1842   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
1843   PetscCheck(!a->vecinuse,comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1844   PetscCheck(!a->matinuse,comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1845   pbegin = PetscMax(0,PetscMin(A->rmap->rend,rbegin)-A->rmap->rstart);
1846   pend = PetscMin(A->rmap->n,PetscMax(0,rend-A->rmap->rstart));
1847   if (!a->cmat) {
1848     PetscCall(MatCreate(comm,&a->cmat));
1849     PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->cmat));
1850     PetscCall(MatSetType(a->cmat,((PetscObject)A)->type_name));
1851     if (rend-rbegin==A->rmap->N) PetscCall(PetscLayoutReference(A->rmap,&a->cmat->rmap));
1852     else {
1853       PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap,pend-pbegin));
1854       PetscCall(PetscLayoutSetSize(a->cmat->rmap,rend-rbegin));
1855       PetscCall(PetscLayoutSetUp(a->cmat->rmap));
1856     }
1857     PetscCall(PetscLayoutSetSize(a->cmat->cmap,cend-cbegin));
1858     PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1859   } else {
1860     PetscBool same = (PetscBool)(rend-rbegin == a->cmat->rmap->N);
1861     if (same && a->cmat->rmap->N != A->rmap->N) {
1862       same = (PetscBool)(pend-pbegin == a->cmat->rmap->n);
1863       PetscCall(MPIU_Allreduce(MPI_IN_PLACE,&same,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A)));
1864     }
1865     if (!same) {
1866       PetscCall(PetscLayoutDestroy(&a->cmat->rmap));
1867       PetscCall(PetscLayoutCreate(comm,&a->cmat->rmap));
1868       PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap,pend-pbegin));
1869       PetscCall(PetscLayoutSetSize(a->cmat->rmap,rend-rbegin));
1870       PetscCall(PetscLayoutSetUp(a->cmat->rmap));
1871     }
1872     if (cend-cbegin != a->cmat->cmap->N) {
1873       PetscCall(PetscLayoutDestroy(&a->cmat->cmap));
1874       PetscCall(PetscLayoutCreate(comm,&a->cmat->cmap));
1875       PetscCall(PetscLayoutSetSize(a->cmat->cmap,cend-cbegin));
1876       PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1877     }
1878   }
1879   c = (Mat_MPIDense*)a->cmat->data;
1880   PetscCheck(!c->A,comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1881   PetscCall(MatDenseGetSubMatrix(a->A,pbegin,pend,cbegin,cend,&c->A));
1882   a->cmat->preallocated = PETSC_TRUE;
1883   a->cmat->assembled = PETSC_TRUE;
1884   a->matinuse = cbegin + 1;
1885   *v = a->cmat;
1886   PetscFunctionReturn(0);
1887 }
1888 
1889 PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A,Mat *v)
1890 {
1891   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
1892   Mat_MPIDense *c;
1893 
1894   PetscFunctionBegin;
1895   PetscCheck(a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetSubMatrix() first");
1896   PetscCheck(a->cmat,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal matrix");
1897   PetscCheck(*v == a->cmat,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Not the matrix obtained from MatDenseGetSubMatrix()");
1898   a->matinuse = 0;
1899   c = (Mat_MPIDense*)a->cmat->data;
1900   PetscCall(MatDenseRestoreSubMatrix(a->A,&c->A));
1901   *v = NULL;
1902   PetscFunctionReturn(0);
1903 }
1904 
1905 /*MC
1906    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1907 
1908    Options Database Keys:
1909 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions()
1910 
1911   Level: beginner
1912 
1913 .seealso: `MatCreateDense()`
1914 
1915 M*/
1916 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1917 {
1918   Mat_MPIDense   *a;
1919 
1920   PetscFunctionBegin;
1921   PetscCall(PetscNewLog(mat,&a));
1922   mat->data = (void*)a;
1923   PetscCall(PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps)));
1924 
1925   mat->insertmode = NOT_SET_VALUES;
1926 
1927   /* build cache for off array entries formed */
1928   a->donotstash = PETSC_FALSE;
1929 
1930   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash));
1931 
1932   /* stuff used for matrix vector multiply */
1933   a->lvec        = NULL;
1934   a->Mvctx       = NULL;
1935   a->roworiented = PETSC_TRUE;
1936 
1937   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense));
1938   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",MatDenseSetLDA_MPIDense));
1939   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense));
1940   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense));
1941   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense));
1942   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense));
1943   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",MatDenseGetArrayWrite_MPIDense));
1944   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",MatDenseRestoreArrayWrite_MPIDense));
1945   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense));
1946   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense));
1947   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",MatDenseReplaceArray_MPIDense));
1948   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense));
1949   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense));
1950   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense));
1951   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense));
1952   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense));
1953   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense));
1954   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",MatDenseGetSubMatrix_MPIDense));
1955   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",MatDenseRestoreSubMatrix_MPIDense));
1956   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",MatConvert_MPIAIJ_MPIDense));
1957   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpiaij_C",MatConvert_MPIDense_MPIAIJ));
1958 #if defined(PETSC_HAVE_ELEMENTAL)
1959   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental));
1960 #endif
1961 #if defined(PETSC_HAVE_SCALAPACK)
1962   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",MatConvert_Dense_ScaLAPACK));
1963 #endif
1964 #if defined(PETSC_HAVE_CUDA)
1965   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpidensecuda_C",MatConvert_MPIDense_MPIDenseCUDA));
1966 #endif
1967   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense));
1968   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense));
1969   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",MatProductSetFromOptions_MPIDense_MPIAIJ));
1970 #if defined(PETSC_HAVE_CUDA)
1971   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense));
1972   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ));
1973 #endif
1974 
1975   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense));
1976   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense));
1977   PetscCall(PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE));
1978   PetscFunctionReturn(0);
1979 }
1980 
1981 /*MC
1982    MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs.
1983 
1984    Options Database Keys:
1985 . -mat_type mpidensecuda - sets the matrix type to "mpidensecuda" during a call to MatSetFromOptions()
1986 
1987   Level: beginner
1988 
1989 .seealso:
1990 
1991 M*/
1992 #if defined(PETSC_HAVE_CUDA)
1993 #include <petsc/private/deviceimpl.h>
1994 PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B)
1995 {
1996   PetscFunctionBegin;
1997   PetscCall(PetscDeviceInitialize(PETSC_DEVICE_CUDA));
1998   PetscCall(MatCreate_MPIDense(B));
1999   PetscCall(MatConvert_MPIDense_MPIDenseCUDA(B,MATMPIDENSECUDA,MAT_INPLACE_MATRIX,&B));
2000   PetscFunctionReturn(0);
2001 }
2002 #endif
2003 
2004 /*MC
2005    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
2006 
2007    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
2008    and MATMPIDENSE otherwise.
2009 
2010    Options Database Keys:
2011 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
2012 
2013   Level: beginner
2014 
2015 .seealso: `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA`
2016 M*/
2017 
2018 /*MC
2019    MATDENSECUDA - MATDENSECUDA = "densecuda" - A matrix type to be used for dense matrices on GPUs.
2020 
2021    This matrix type is identical to MATSEQDENSECUDA when constructed with a single process communicator,
2022    and MATMPIDENSECUDA otherwise.
2023 
2024    Options Database Keys:
2025 . -mat_type densecuda - sets the matrix type to "densecuda" during a call to MatSetFromOptions()
2026 
2027   Level: beginner
2028 
2029 .seealso: `MATSEQDENSECUDA`, `MATMPIDENSECUDA`, `MATDENSE`
2030 M*/
2031 
2032 /*@C
2033    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
2034 
2035    Collective
2036 
2037    Input Parameters:
2038 .  B - the matrix
2039 -  data - optional location of matrix data.  Set data=NULL for PETSc
2040    to control all matrix memory allocation.
2041 
2042    Notes:
2043    The dense format is fully compatible with standard Fortran 77
2044    storage by columns.
2045 
2046    The data input variable is intended primarily for Fortran programmers
2047    who wish to allocate their own matrix memory space.  Most users should
2048    set data=NULL.
2049 
2050    Level: intermediate
2051 
2052 .seealso: `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
2053 @*/
2054 PetscErrorCode  MatMPIDenseSetPreallocation(Mat B,PetscScalar *data)
2055 {
2056   PetscFunctionBegin;
2057   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2058   PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));
2059   PetscFunctionReturn(0);
2060 }
2061 
2062 /*@
2063    MatDensePlaceArray - Allows one to replace the array in a dense matrix with an
2064    array provided by the user. This is useful to avoid copying an array
2065    into a matrix
2066 
2067    Not Collective
2068 
2069    Input Parameters:
2070 +  mat - the matrix
2071 -  array - the array in column major order
2072 
2073    Notes:
2074    You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be
2075    freed when the matrix is destroyed.
2076 
2077    Level: developer
2078 
2079 .seealso: `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`
2080 
2081 @*/
2082 PetscErrorCode  MatDensePlaceArray(Mat mat,const PetscScalar *array)
2083 {
2084   PetscFunctionBegin;
2085   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2086   PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));
2087   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2088 #if defined(PETSC_HAVE_CUDA)
2089   mat->offloadmask = PETSC_OFFLOAD_CPU;
2090 #endif
2091   PetscFunctionReturn(0);
2092 }
2093 
2094 /*@
2095    MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray()
2096 
2097    Not Collective
2098 
2099    Input Parameters:
2100 .  mat - the matrix
2101 
2102    Notes:
2103    You can only call this after a call to MatDensePlaceArray()
2104 
2105    Level: developer
2106 
2107 .seealso: `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`
2108 
2109 @*/
2110 PetscErrorCode  MatDenseResetArray(Mat mat)
2111 {
2112   PetscFunctionBegin;
2113   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2114   PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));
2115   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2116   PetscFunctionReturn(0);
2117 }
2118 
2119 /*@
2120    MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an
2121    array provided by the user. This is useful to avoid copying an array
2122    into a matrix
2123 
2124    Not Collective
2125 
2126    Input Parameters:
2127 +  mat - the matrix
2128 -  array - the array in column major order
2129 
2130    Notes:
2131    The memory passed in MUST be obtained with PetscMalloc() and CANNOT be
2132    freed by the user. It will be freed when the matrix is destroyed.
2133 
2134    Level: developer
2135 
2136 .seealso: `MatDenseGetArray()`, `VecReplaceArray()`
2137 @*/
2138 PetscErrorCode  MatDenseReplaceArray(Mat mat,const PetscScalar *array)
2139 {
2140   PetscFunctionBegin;
2141   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2142   PetscUseMethod(mat,"MatDenseReplaceArray_C",(Mat,const PetscScalar*),(mat,array));
2143   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2144 #if defined(PETSC_HAVE_CUDA)
2145   mat->offloadmask = PETSC_OFFLOAD_CPU;
2146 #endif
2147   PetscFunctionReturn(0);
2148 }
2149 
2150 #if defined(PETSC_HAVE_CUDA)
2151 /*@C
2152    MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a dense matrix with an
2153    array provided by the user. This is useful to avoid copying an array
2154    into a matrix
2155 
2156    Not Collective
2157 
2158    Input Parameters:
2159 +  mat - the matrix
2160 -  array - the array in column major order
2161 
2162    Notes:
2163    You can return to the original array with a call to MatDenseCUDAResetArray(). The user is responsible for freeing this array; it will not be
2164    freed when the matrix is destroyed. The array must have been allocated with cudaMalloc().
2165 
2166    Level: developer
2167 
2168 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDAResetArray()`
2169 @*/
2170 PetscErrorCode  MatDenseCUDAPlaceArray(Mat mat,const PetscScalar *array)
2171 {
2172   PetscFunctionBegin;
2173   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2174   PetscUseMethod(mat,"MatDenseCUDAPlaceArray_C",(Mat,const PetscScalar*),(mat,array));
2175   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2176   mat->offloadmask = PETSC_OFFLOAD_GPU;
2177   PetscFunctionReturn(0);
2178 }
2179 
2180 /*@C
2181    MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to MatDenseCUDAPlaceArray()
2182 
2183    Not Collective
2184 
2185    Input Parameters:
2186 .  mat - the matrix
2187 
2188    Notes:
2189    You can only call this after a call to MatDenseCUDAPlaceArray()
2190 
2191    Level: developer
2192 
2193 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()`
2194 
2195 @*/
2196 PetscErrorCode  MatDenseCUDAResetArray(Mat mat)
2197 {
2198   PetscFunctionBegin;
2199   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2200   PetscUseMethod(mat,"MatDenseCUDAResetArray_C",(Mat),(mat));
2201   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2202   PetscFunctionReturn(0);
2203 }
2204 
2205 /*@C
2206    MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a dense matrix with an
2207    array provided by the user. This is useful to avoid copying an array
2208    into a matrix
2209 
2210    Not Collective
2211 
2212    Input Parameters:
2213 +  mat - the matrix
2214 -  array - the array in column major order
2215 
2216    Notes:
2217    This permanently replaces the GPU array and frees the memory associated with the old GPU array.
2218    The memory passed in CANNOT be freed by the user. It will be freed
2219    when the matrix is destroyed. The array should respect the matrix leading dimension.
2220 
2221    Level: developer
2222 
2223 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()`, `MatDenseCUDAResetArray()`
2224 @*/
2225 PetscErrorCode  MatDenseCUDAReplaceArray(Mat mat,const PetscScalar *array)
2226 {
2227   PetscFunctionBegin;
2228   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2229   PetscUseMethod(mat,"MatDenseCUDAReplaceArray_C",(Mat,const PetscScalar*),(mat,array));
2230   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2231   mat->offloadmask = PETSC_OFFLOAD_GPU;
2232   PetscFunctionReturn(0);
2233 }
2234 
2235 /*@C
2236    MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a dense matrix.
2237 
2238    Not Collective
2239 
2240    Input Parameters:
2241 .  A - the matrix
2242 
2243    Output Parameters
2244 .  array - the GPU array in column major order
2245 
2246    Notes:
2247    The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use MatDenseCUDAGetArray(). The array must be restored with MatDenseCUDARestoreArrayWrite() when no longer needed.
2248 
2249    Level: developer
2250 
2251 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayRead()`, `MatDenseCUDARestoreArrayRead()`
2252 @*/
2253 PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a)
2254 {
2255   PetscFunctionBegin;
2256   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2257   PetscUseMethod(A,"MatDenseCUDAGetArrayWrite_C",(Mat,PetscScalar**),(A,a));
2258   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2259   PetscFunctionReturn(0);
2260 }
2261 
2262 /*@C
2263    MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArrayWrite().
2264 
2265    Not Collective
2266 
2267    Input Parameters:
2268 +  A - the matrix
2269 -  array - the GPU array in column major order
2270 
2271    Notes:
2272 
2273    Level: developer
2274 
2275 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()`
2276 @*/
2277 PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a)
2278 {
2279   PetscFunctionBegin;
2280   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2281   PetscUseMethod(A,"MatDenseCUDARestoreArrayWrite_C",(Mat,PetscScalar**),(A,a));
2282   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2283   A->offloadmask = PETSC_OFFLOAD_GPU;
2284   PetscFunctionReturn(0);
2285 }
2286 
2287 /*@C
2288    MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArrayRead() when no longer needed.
2289 
2290    Not Collective
2291 
2292    Input Parameters:
2293 .  A - the matrix
2294 
2295    Output Parameters
2296 .  array - the GPU array in column major order
2297 
2298    Notes:
2299    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite().
2300 
2301    Level: developer
2302 
2303 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`
2304 @*/
2305 PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a)
2306 {
2307   PetscFunctionBegin;
2308   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2309   PetscUseMethod(A,"MatDenseCUDAGetArrayRead_C",(Mat,const PetscScalar**),(A,a));
2310   PetscFunctionReturn(0);
2311 }
2312 
2313 /*@C
2314    MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a dense matrix previously obtained with a call to MatDenseCUDAGetArrayRead().
2315 
2316    Not Collective
2317 
2318    Input Parameters:
2319 +  A - the matrix
2320 -  array - the GPU array in column major order
2321 
2322    Notes:
2323    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite().
2324 
2325    Level: developer
2326 
2327 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDAGetArrayRead()`
2328 @*/
2329 PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a)
2330 {
2331   PetscFunctionBegin;
2332   PetscUseMethod(A,"MatDenseCUDARestoreArrayRead_C",(Mat,const PetscScalar**),(A,a));
2333   PetscFunctionReturn(0);
2334 }
2335 
2336 /*@C
2337    MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArray() when no longer needed.
2338 
2339    Not Collective
2340 
2341    Input Parameters:
2342 .  A - the matrix
2343 
2344    Output Parameters
2345 .  array - the GPU array in column major order
2346 
2347    Notes:
2348    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). For read-only access, use MatDenseCUDAGetArrayRead().
2349 
2350    Level: developer
2351 
2352 .seealso: `MatDenseCUDAGetArrayRead()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`
2353 @*/
2354 PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a)
2355 {
2356   PetscFunctionBegin;
2357   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2358   PetscUseMethod(A,"MatDenseCUDAGetArray_C",(Mat,PetscScalar**),(A,a));
2359   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2360   PetscFunctionReturn(0);
2361 }
2362 
2363 /*@C
2364    MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArray().
2365 
2366    Not Collective
2367 
2368    Input Parameters:
2369 +  A - the matrix
2370 -  array - the GPU array in column major order
2371 
2372    Notes:
2373 
2374    Level: developer
2375 
2376 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()`
2377 @*/
2378 PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a)
2379 {
2380   PetscFunctionBegin;
2381   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2382   PetscUseMethod(A,"MatDenseCUDARestoreArray_C",(Mat,PetscScalar**),(A,a));
2383   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2384   A->offloadmask = PETSC_OFFLOAD_GPU;
2385   PetscFunctionReturn(0);
2386 }
2387 #endif
2388 
2389 /*@C
2390    MatCreateDense - Creates a matrix in dense format.
2391 
2392    Collective
2393 
2394    Input Parameters:
2395 +  comm - MPI communicator
2396 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2397 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2398 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
2399 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
2400 -  data - optional location of matrix data.  Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
2401    to control all matrix memory allocation.
2402 
2403    Output Parameter:
2404 .  A - the matrix
2405 
2406    Notes:
2407    The dense format is fully compatible with standard Fortran 77
2408    storage by columns.
2409    Note that, although local portions of the matrix are stored in column-major
2410    order, the matrix is partitioned across MPI ranks by row.
2411 
2412    The data input variable is intended primarily for Fortran programmers
2413    who wish to allocate their own matrix memory space.  Most users should
2414    set data=NULL (PETSC_NULL_SCALAR for Fortran users).
2415 
2416    The user MUST specify either the local or global matrix dimensions
2417    (possibly both).
2418 
2419    Level: intermediate
2420 
2421 .seealso: `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
2422 @*/
2423 PetscErrorCode  MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
2424 {
2425   PetscFunctionBegin;
2426   PetscCall(MatCreate(comm,A));
2427   PetscCall(MatSetSizes(*A,m,n,M,N));
2428   PetscCall(MatSetType(*A,MATDENSE));
2429   PetscCall(MatSeqDenseSetPreallocation(*A,data));
2430   PetscCall(MatMPIDenseSetPreallocation(*A,data));
2431   PetscFunctionReturn(0);
2432 }
2433 
2434 #if defined(PETSC_HAVE_CUDA)
2435 /*@C
2436    MatCreateDenseCUDA - Creates a matrix in dense format using CUDA.
2437 
2438    Collective
2439 
2440    Input Parameters:
2441 +  comm - MPI communicator
2442 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2443 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2444 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
2445 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
2446 -  data - optional location of GPU matrix data.  Set data=NULL for PETSc
2447    to control matrix memory allocation.
2448 
2449    Output Parameter:
2450 .  A - the matrix
2451 
2452    Notes:
2453 
2454    Level: intermediate
2455 
2456 .seealso: `MatCreate()`, `MatCreateDense()`
2457 @*/
2458 PetscErrorCode  MatCreateDenseCUDA(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
2459 {
2460   PetscFunctionBegin;
2461   PetscCall(MatCreate(comm,A));
2462   PetscValidLogicalCollectiveBool(*A,!!data,6);
2463   PetscCall(MatSetSizes(*A,m,n,M,N));
2464   PetscCall(MatSetType(*A,MATDENSECUDA));
2465   PetscCall(MatSeqDenseCUDASetPreallocation(*A,data));
2466   PetscCall(MatMPIDenseCUDASetPreallocation(*A,data));
2467   PetscFunctionReturn(0);
2468 }
2469 #endif
2470 
2471 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
2472 {
2473   Mat            mat;
2474   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
2475 
2476   PetscFunctionBegin;
2477   *newmat = NULL;
2478   PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&mat));
2479   PetscCall(MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N));
2480   PetscCall(MatSetType(mat,((PetscObject)A)->type_name));
2481   a       = (Mat_MPIDense*)mat->data;
2482 
2483   mat->factortype   = A->factortype;
2484   mat->assembled    = PETSC_TRUE;
2485   mat->preallocated = PETSC_TRUE;
2486 
2487   mat->insertmode = NOT_SET_VALUES;
2488   a->donotstash   = oldmat->donotstash;
2489 
2490   PetscCall(PetscLayoutReference(A->rmap,&mat->rmap));
2491   PetscCall(PetscLayoutReference(A->cmap,&mat->cmap));
2492 
2493   PetscCall(MatDuplicate(oldmat->A,cpvalues,&a->A));
2494   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A));
2495 
2496   *newmat = mat;
2497   PetscFunctionReturn(0);
2498 }
2499 
2500 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
2501 {
2502   PetscBool      isbinary;
2503 #if defined(PETSC_HAVE_HDF5)
2504   PetscBool      ishdf5;
2505 #endif
2506 
2507   PetscFunctionBegin;
2508   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
2509   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
2510   /* force binary viewer to load .info file if it has not yet done so */
2511   PetscCall(PetscViewerSetUp(viewer));
2512   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
2513 #if defined(PETSC_HAVE_HDF5)
2514   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5));
2515 #endif
2516   if (isbinary) {
2517     PetscCall(MatLoad_Dense_Binary(newMat,viewer));
2518 #if defined(PETSC_HAVE_HDF5)
2519   } else if (ishdf5) {
2520     PetscCall(MatLoad_Dense_HDF5(newMat,viewer));
2521 #endif
2522   } else SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
2523   PetscFunctionReturn(0);
2524 }
2525 
2526 static PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag)
2527 {
2528   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
2529   Mat            a,b;
2530 
2531   PetscFunctionBegin;
2532   a    = matA->A;
2533   b    = matB->A;
2534   PetscCall(MatEqual(a,b,flag));
2535   PetscCall(MPIU_Allreduce(MPI_IN_PLACE,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A)));
2536   PetscFunctionReturn(0);
2537 }
2538 
2539 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data)
2540 {
2541   Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data;
2542 
2543   PetscFunctionBegin;
2544   PetscCall(PetscFree2(atb->sendbuf,atb->recvcounts));
2545   PetscCall(MatDestroy(&atb->atb));
2546   PetscCall(PetscFree(atb));
2547   PetscFunctionReturn(0);
2548 }
2549 
2550 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data)
2551 {
2552   Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data;
2553 
2554   PetscFunctionBegin;
2555   PetscCall(PetscFree2(abt->buf[0],abt->buf[1]));
2556   PetscCall(PetscFree2(abt->recvcounts,abt->recvdispls));
2557   PetscCall(PetscFree(abt));
2558   PetscFunctionReturn(0);
2559 }
2560 
2561 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2562 {
2563   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2564   Mat_TransMatMultDense *atb;
2565   MPI_Comm              comm;
2566   PetscMPIInt           size,*recvcounts;
2567   PetscScalar           *carray,*sendbuf;
2568   const PetscScalar     *atbarray;
2569   PetscInt              i,cN=C->cmap->N,proc,k,j,lda;
2570   const PetscInt        *ranges;
2571 
2572   PetscFunctionBegin;
2573   MatCheckProduct(C,3);
2574   PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2575   atb = (Mat_TransMatMultDense *)C->product->data;
2576   recvcounts = atb->recvcounts;
2577   sendbuf = atb->sendbuf;
2578 
2579   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
2580   PetscCallMPI(MPI_Comm_size(comm,&size));
2581 
2582   /* compute atbarray = aseq^T * bseq */
2583   PetscCall(MatTransposeMatMult(a->A,b->A,atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX,PETSC_DEFAULT,&atb->atb));
2584 
2585   PetscCall(MatGetOwnershipRanges(C,&ranges));
2586 
2587   /* arrange atbarray into sendbuf */
2588   PetscCall(MatDenseGetArrayRead(atb->atb,&atbarray));
2589   PetscCall(MatDenseGetLDA(atb->atb,&lda));
2590   for (proc=0, k=0; proc<size; proc++) {
2591     for (j=0; j<cN; j++) {
2592       for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*lda];
2593     }
2594   }
2595   PetscCall(MatDenseRestoreArrayRead(atb->atb,&atbarray));
2596 
2597   /* sum all atbarray to local values of C */
2598   PetscCall(MatDenseGetArrayWrite(c->A,&carray));
2599   PetscCallMPI(MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm));
2600   PetscCall(MatDenseRestoreArrayWrite(c->A,&carray));
2601   PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY));
2602   PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY));
2603   PetscFunctionReturn(0);
2604 }
2605 
2606 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)
2607 {
2608   MPI_Comm              comm;
2609   PetscMPIInt           size;
2610   PetscInt              cm=A->cmap->n,cM,cN=B->cmap->N;
2611   Mat_TransMatMultDense *atb;
2612   PetscBool             cisdense;
2613   PetscInt              i;
2614   const PetscInt        *ranges;
2615 
2616   PetscFunctionBegin;
2617   MatCheckProduct(C,3);
2618   PetscCheck(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2619   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
2620   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
2621     SETERRQ(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2622   }
2623 
2624   /* create matrix product C */
2625   PetscCall(MatSetSizes(C,cm,B->cmap->n,A->cmap->N,B->cmap->N));
2626   PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,""));
2627   if (!cisdense) {
2628     PetscCall(MatSetType(C,((PetscObject)A)->type_name));
2629   }
2630   PetscCall(MatSetUp(C));
2631 
2632   /* create data structure for reuse C */
2633   PetscCallMPI(MPI_Comm_size(comm,&size));
2634   PetscCall(PetscNew(&atb));
2635   cM   = C->rmap->N;
2636   PetscCall(PetscMalloc2(cM*cN,&atb->sendbuf,size,&atb->recvcounts));
2637   PetscCall(MatGetOwnershipRanges(C,&ranges));
2638   for (i=0; i<size; i++) atb->recvcounts[i] = (ranges[i+1] - ranges[i])*cN;
2639 
2640   C->product->data    = atb;
2641   C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
2642   PetscFunctionReturn(0);
2643 }
2644 
2645 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2646 {
2647   MPI_Comm              comm;
2648   PetscMPIInt           i, size;
2649   PetscInt              maxRows, bufsiz;
2650   PetscMPIInt           tag;
2651   PetscInt              alg;
2652   Mat_MatTransMultDense *abt;
2653   Mat_Product           *product = C->product;
2654   PetscBool             flg;
2655 
2656   PetscFunctionBegin;
2657   MatCheckProduct(C,4);
2658   PetscCheck(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2659   /* check local size of A and B */
2660   PetscCheck(A->cmap->n == B->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")",A->cmap->n,B->cmap->n);
2661 
2662   PetscCall(PetscStrcmp(product->alg,"allgatherv",&flg));
2663   alg  = flg ? 0 : 1;
2664 
2665   /* setup matrix product C */
2666   PetscCall(MatSetSizes(C,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N));
2667   PetscCall(MatSetType(C,MATMPIDENSE));
2668   PetscCall(MatSetUp(C));
2669   PetscCall(PetscObjectGetNewTag((PetscObject)C,&tag));
2670 
2671   /* create data structure for reuse C */
2672   PetscCall(PetscObjectGetComm((PetscObject)C,&comm));
2673   PetscCallMPI(MPI_Comm_size(comm,&size));
2674   PetscCall(PetscNew(&abt));
2675   abt->tag = tag;
2676   abt->alg = alg;
2677   switch (alg) {
2678   case 1: /* alg: "cyclic" */
2679     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i]));
2680     bufsiz = A->cmap->N * maxRows;
2681     PetscCall(PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1])));
2682     break;
2683   default: /* alg: "allgatherv" */
2684     PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1])));
2685     PetscCall(PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls)));
2686     for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N;
2687     for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i];
2688     break;
2689   }
2690 
2691   C->product->data    = abt;
2692   C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
2693   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense;
2694   PetscFunctionReturn(0);
2695 }
2696 
2697 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
2698 {
2699   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2700   Mat_MatTransMultDense *abt;
2701   MPI_Comm              comm;
2702   PetscMPIInt           rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom;
2703   PetscScalar           *sendbuf, *recvbuf=NULL, *cv;
2704   PetscInt              i,cK=A->cmap->N,k,j,bn;
2705   PetscScalar           _DOne=1.0,_DZero=0.0;
2706   const PetscScalar     *av,*bv;
2707   PetscBLASInt          cm, cn, ck, alda, blda = 0, clda;
2708   MPI_Request           reqs[2];
2709   const PetscInt        *ranges;
2710 
2711   PetscFunctionBegin;
2712   MatCheckProduct(C,3);
2713   PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2714   abt  = (Mat_MatTransMultDense*)C->product->data;
2715   PetscCall(PetscObjectGetComm((PetscObject)C,&comm));
2716   PetscCallMPI(MPI_Comm_rank(comm,&rank));
2717   PetscCallMPI(MPI_Comm_size(comm,&size));
2718   PetscCall(MatDenseGetArrayRead(a->A,&av));
2719   PetscCall(MatDenseGetArrayRead(b->A,&bv));
2720   PetscCall(MatDenseGetArrayWrite(c->A,&cv));
2721   PetscCall(MatDenseGetLDA(a->A,&i));
2722   PetscCall(PetscBLASIntCast(i,&alda));
2723   PetscCall(MatDenseGetLDA(b->A,&i));
2724   PetscCall(PetscBLASIntCast(i,&blda));
2725   PetscCall(MatDenseGetLDA(c->A,&i));
2726   PetscCall(PetscBLASIntCast(i,&clda));
2727   PetscCall(MatGetOwnershipRanges(B,&ranges));
2728   bn   = B->rmap->n;
2729   if (blda == bn) {
2730     sendbuf = (PetscScalar*)bv;
2731   } else {
2732     sendbuf = abt->buf[0];
2733     for (k = 0, i = 0; i < cK; i++) {
2734       for (j = 0; j < bn; j++, k++) {
2735         sendbuf[k] = bv[i * blda + j];
2736       }
2737     }
2738   }
2739   if (size > 1) {
2740     sendto = (rank + size - 1) % size;
2741     recvfrom = (rank + size + 1) % size;
2742   } else {
2743     sendto = recvfrom = 0;
2744   }
2745   PetscCall(PetscBLASIntCast(cK,&ck));
2746   PetscCall(PetscBLASIntCast(c->A->rmap->n,&cm));
2747   recvisfrom = rank;
2748   for (i = 0; i < size; i++) {
2749     /* we have finished receiving in sending, bufs can be read/modified */
2750     PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2751     PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
2752 
2753     if (nextrecvisfrom != rank) {
2754       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2755       sendsiz = cK * bn;
2756       recvsiz = cK * nextbn;
2757       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
2758       PetscCallMPI(MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]));
2759       PetscCallMPI(MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]));
2760     }
2761 
2762     /* local aseq * sendbuf^T */
2763     PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn));
2764     if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,av,&alda,sendbuf,&cn,&_DZero,cv + clda * ranges[recvisfrom],&clda));
2765 
2766     if (nextrecvisfrom != rank) {
2767       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
2768       PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE));
2769     }
2770     bn = nextbn;
2771     recvisfrom = nextrecvisfrom;
2772     sendbuf = recvbuf;
2773   }
2774   PetscCall(MatDenseRestoreArrayRead(a->A,&av));
2775   PetscCall(MatDenseRestoreArrayRead(b->A,&bv));
2776   PetscCall(MatDenseRestoreArrayWrite(c->A,&cv));
2777   PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY));
2778   PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY));
2779   PetscFunctionReturn(0);
2780 }
2781 
2782 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
2783 {
2784   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2785   Mat_MatTransMultDense *abt;
2786   MPI_Comm              comm;
2787   PetscMPIInt           size;
2788   PetscScalar           *cv, *sendbuf, *recvbuf;
2789   const PetscScalar     *av,*bv;
2790   PetscInt              blda,i,cK=A->cmap->N,k,j,bn;
2791   PetscScalar           _DOne=1.0,_DZero=0.0;
2792   PetscBLASInt          cm, cn, ck, alda, clda;
2793 
2794   PetscFunctionBegin;
2795   MatCheckProduct(C,3);
2796   PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2797   abt  = (Mat_MatTransMultDense*)C->product->data;
2798   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
2799   PetscCallMPI(MPI_Comm_size(comm,&size));
2800   PetscCall(MatDenseGetArrayRead(a->A,&av));
2801   PetscCall(MatDenseGetArrayRead(b->A,&bv));
2802   PetscCall(MatDenseGetArrayWrite(c->A,&cv));
2803   PetscCall(MatDenseGetLDA(a->A,&i));
2804   PetscCall(PetscBLASIntCast(i,&alda));
2805   PetscCall(MatDenseGetLDA(b->A,&blda));
2806   PetscCall(MatDenseGetLDA(c->A,&i));
2807   PetscCall(PetscBLASIntCast(i,&clda));
2808   /* copy transpose of B into buf[0] */
2809   bn      = B->rmap->n;
2810   sendbuf = abt->buf[0];
2811   recvbuf = abt->buf[1];
2812   for (k = 0, j = 0; j < bn; j++) {
2813     for (i = 0; i < cK; i++, k++) {
2814       sendbuf[k] = bv[i * blda + j];
2815     }
2816   }
2817   PetscCall(MatDenseRestoreArrayRead(b->A,&bv));
2818   PetscCallMPI(MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm));
2819   PetscCall(PetscBLASIntCast(cK,&ck));
2820   PetscCall(PetscBLASIntCast(c->A->rmap->n,&cm));
2821   PetscCall(PetscBLASIntCast(c->A->cmap->n,&cn));
2822   if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,av,&alda,recvbuf,&ck,&_DZero,cv,&clda));
2823   PetscCall(MatDenseRestoreArrayRead(a->A,&av));
2824   PetscCall(MatDenseRestoreArrayRead(b->A,&bv));
2825   PetscCall(MatDenseRestoreArrayWrite(c->A,&cv));
2826   PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY));
2827   PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY));
2828   PetscFunctionReturn(0);
2829 }
2830 
2831 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2832 {
2833   Mat_MatTransMultDense *abt;
2834 
2835   PetscFunctionBegin;
2836   MatCheckProduct(C,3);
2837   PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2838   abt = (Mat_MatTransMultDense*)C->product->data;
2839   switch (abt->alg) {
2840   case 1:
2841     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C));
2842     break;
2843   default:
2844     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C));
2845     break;
2846   }
2847   PetscFunctionReturn(0);
2848 }
2849 
2850 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data)
2851 {
2852   Mat_MatMultDense *ab = (Mat_MatMultDense*)data;
2853 
2854   PetscFunctionBegin;
2855   PetscCall(MatDestroy(&ab->Ce));
2856   PetscCall(MatDestroy(&ab->Ae));
2857   PetscCall(MatDestroy(&ab->Be));
2858   PetscCall(PetscFree(ab));
2859   PetscFunctionReturn(0);
2860 }
2861 
2862 #if defined(PETSC_HAVE_ELEMENTAL)
2863 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2864 {
2865   Mat_MatMultDense *ab;
2866 
2867   PetscFunctionBegin;
2868   MatCheckProduct(C,3);
2869   PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Missing product data");
2870   ab   = (Mat_MatMultDense*)C->product->data;
2871   PetscCall(MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae));
2872   PetscCall(MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be));
2873   PetscCall(MatMatMultNumeric_Elemental(ab->Ae,ab->Be,ab->Ce));
2874   PetscCall(MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C));
2875   PetscFunctionReturn(0);
2876 }
2877 
2878 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)
2879 {
2880   Mat              Ae,Be,Ce;
2881   Mat_MatMultDense *ab;
2882 
2883   PetscFunctionBegin;
2884   MatCheckProduct(C,4);
2885   PetscCheck(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2886   /* check local size of A and B */
2887   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
2888     SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2889   }
2890 
2891   /* create elemental matrices Ae and Be */
2892   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Ae));
2893   PetscCall(MatSetSizes(Ae,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N));
2894   PetscCall(MatSetType(Ae,MATELEMENTAL));
2895   PetscCall(MatSetUp(Ae));
2896   PetscCall(MatSetOption(Ae,MAT_ROW_ORIENTED,PETSC_FALSE));
2897 
2898   PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &Be));
2899   PetscCall(MatSetSizes(Be,PETSC_DECIDE,PETSC_DECIDE,B->rmap->N,B->cmap->N));
2900   PetscCall(MatSetType(Be,MATELEMENTAL));
2901   PetscCall(MatSetUp(Be));
2902   PetscCall(MatSetOption(Be,MAT_ROW_ORIENTED,PETSC_FALSE));
2903 
2904   /* compute symbolic Ce = Ae*Be */
2905   PetscCall(MatCreate(PetscObjectComm((PetscObject)C),&Ce));
2906   PetscCall(MatMatMultSymbolic_Elemental(Ae,Be,fill,Ce));
2907 
2908   /* setup C */
2909   PetscCall(MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE));
2910   PetscCall(MatSetType(C,MATDENSE));
2911   PetscCall(MatSetUp(C));
2912 
2913   /* create data structure for reuse Cdense */
2914   PetscCall(PetscNew(&ab));
2915   ab->Ae = Ae;
2916   ab->Be = Be;
2917   ab->Ce = Ce;
2918 
2919   C->product->data    = ab;
2920   C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense;
2921   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
2922   PetscFunctionReturn(0);
2923 }
2924 #endif
2925 /* ----------------------------------------------- */
2926 #if defined(PETSC_HAVE_ELEMENTAL)
2927 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C)
2928 {
2929   PetscFunctionBegin;
2930   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense;
2931   C->ops->productsymbolic = MatProductSymbolic_AB;
2932   PetscFunctionReturn(0);
2933 }
2934 #endif
2935 
2936 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C)
2937 {
2938   Mat_Product *product = C->product;
2939   Mat         A = product->A,B=product->B;
2940 
2941   PetscFunctionBegin;
2942   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend)
2943     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2944   C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense;
2945   C->ops->productsymbolic = MatProductSymbolic_AtB;
2946   PetscFunctionReturn(0);
2947 }
2948 
2949 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C)
2950 {
2951   Mat_Product    *product = C->product;
2952   const char     *algTypes[2] = {"allgatherv","cyclic"};
2953   PetscInt       alg,nalg = 2;
2954   PetscBool      flg = PETSC_FALSE;
2955 
2956   PetscFunctionBegin;
2957   /* Set default algorithm */
2958   alg = 0; /* default is allgatherv */
2959   PetscCall(PetscStrcmp(product->alg,"default",&flg));
2960   if (flg) {
2961     PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]));
2962   }
2963 
2964   /* Get runtime option */
2965   if (product->api_user) {
2966     PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatTransposeMult","Mat");
2967     PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[alg],&alg,&flg));
2968     PetscOptionsEnd();
2969   } else {
2970     PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABt","Mat");
2971     PetscCall(PetscOptionsEList("-mat_product_algorithm","Algorithmic approach","MatProduct_ABt",algTypes,nalg,algTypes[alg],&alg,&flg));
2972     PetscOptionsEnd();
2973   }
2974   if (flg) {
2975     PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]));
2976   }
2977 
2978   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense;
2979   C->ops->productsymbolic          = MatProductSymbolic_ABt;
2980   PetscFunctionReturn(0);
2981 }
2982 
2983 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C)
2984 {
2985   Mat_Product    *product = C->product;
2986 
2987   PetscFunctionBegin;
2988   switch (product->type) {
2989 #if defined(PETSC_HAVE_ELEMENTAL)
2990   case MATPRODUCT_AB:
2991     PetscCall(MatProductSetFromOptions_MPIDense_AB(C));
2992     break;
2993 #endif
2994   case MATPRODUCT_AtB:
2995     PetscCall(MatProductSetFromOptions_MPIDense_AtB(C));
2996     break;
2997   case MATPRODUCT_ABt:
2998     PetscCall(MatProductSetFromOptions_MPIDense_ABt(C));
2999     break;
3000   default:
3001     break;
3002   }
3003   PetscFunctionReturn(0);
3004 }
3005