xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 40cbb1a031ea8f2be4fe2b92dc842b003ad37be3)
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4 */
5 
6 #include <../src/mat/impls/dense/mpi/mpidense.h>    /*I   "petscmat.h"  I*/
7 #include <../src/mat/impls/aij/mpi/mpiaij.h>
8 #include <petscblaslapack.h>
9 
10 /*@
11 
12       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
13               matrix that represents the operator. For sequential matrices it returns itself.
14 
15     Input Parameter:
16 .      A - the Seq or MPI dense matrix
17 
18     Output Parameter:
19 .      B - the inner matrix
20 
21     Level: intermediate
22 
23 @*/
24 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
25 {
26   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
27   PetscBool      flg;
28 
29   PetscFunctionBegin;
30   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
31   PetscValidPointer(B,2);
32   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A,MATMPIDENSE,&flg));
33   if (flg) *B = mat->A;
34   else {
35     PetscCall(PetscObjectBaseTypeCompare((PetscObject)A,MATSEQDENSE,&flg));
36     PetscCheck(flg,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Not for matrix type %s",((PetscObject)A)->type_name);
37     *B = A;
38   }
39   PetscFunctionReturn(0);
40 }
41 
42 PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s)
43 {
44   Mat_MPIDense   *Amat = (Mat_MPIDense*)A->data;
45   Mat_MPIDense   *Bmat = (Mat_MPIDense*)B->data;
46 
47   PetscFunctionBegin;
48   PetscCall(MatCopy(Amat->A,Bmat->A,s));
49   PetscFunctionReturn(0);
50 }
51 
52 PetscErrorCode MatShift_MPIDense(Mat A,PetscScalar alpha)
53 {
54   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
55   PetscInt       j,lda,rstart = A->rmap->rstart,rend = A->rmap->rend,rend2;
56   PetscScalar    *v;
57 
58   PetscFunctionBegin;
59   PetscCall(MatDenseGetArray(mat->A,&v));
60   PetscCall(MatDenseGetLDA(mat->A,&lda));
61   rend2 = PetscMin(rend,A->cmap->N);
62   if (rend2>rstart) {
63     for (j=rstart; j<rend2; j++) v[j-rstart+j*lda] += alpha;
64     PetscCall(PetscLogFlops(rend2-rstart));
65   }
66   PetscCall(MatDenseRestoreArray(mat->A,&v));
67   PetscFunctionReturn(0);
68 }
69 
70 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
71 {
72   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
73   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
74 
75   PetscFunctionBegin;
76   PetscCheck(row >= rstart && row < rend,PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
77   lrow = row - rstart;
78   PetscCall(MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v));
79   PetscFunctionReturn(0);
80 }
81 
82 PetscErrorCode MatRestoreRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
83 {
84   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
85   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
86 
87   PetscFunctionBegin;
88   PetscCheck(row >= rstart && row < rend,PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
89   lrow = row - rstart;
90   PetscCall(MatRestoreRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v));
91   PetscFunctionReturn(0);
92 }
93 
94 PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
95 {
96   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
97   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
98   PetscScalar    *array;
99   MPI_Comm       comm;
100   PetscBool      flg;
101   Mat            B;
102 
103   PetscFunctionBegin;
104   PetscCall(MatHasCongruentLayouts(A,&flg));
105   PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
106   PetscCall(PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B));
107   if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */
108 
109     PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A,MATSEQDENSECUDA,&flg));
110     PetscCheck(!flg,PETSC_COMM_SELF,PETSC_ERR_SUP,"Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature",MATSEQDENSECUDA);
111     PetscCall(PetscObjectGetComm((PetscObject)(mdn->A),&comm));
112     PetscCall(MatCreate(comm,&B));
113     PetscCall(MatSetSizes(B,m,m,m,m));
114     PetscCall(MatSetType(B,((PetscObject)mdn->A)->type_name));
115     PetscCall(MatDenseGetArrayRead(mdn->A,(const PetscScalar**)&array));
116     PetscCall(MatSeqDenseSetPreallocation(B,array+m*rstart));
117     PetscCall(MatDenseRestoreArrayRead(mdn->A,(const PetscScalar**)&array));
118     PetscCall(PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B));
119     *a   = B;
120     PetscCall(MatDestroy(&B));
121   } else *a = B;
122   PetscFunctionReturn(0);
123 }
124 
125 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
126 {
127   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
128   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
129   PetscBool      roworiented = A->roworiented;
130 
131   PetscFunctionBegin;
132   for (i=0; i<m; i++) {
133     if (idxm[i] < 0) continue;
134     PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
135     if (idxm[i] >= rstart && idxm[i] < rend) {
136       row = idxm[i] - rstart;
137       if (roworiented) {
138         PetscCall(MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv));
139       } else {
140         for (j=0; j<n; j++) {
141           if (idxn[j] < 0) continue;
142           PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
143           PetscCall(MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv));
144         }
145       }
146     } else if (!A->donotstash) {
147       mat->assembled = PETSC_FALSE;
148       if (roworiented) {
149         PetscCall(MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE));
150       } else {
151         PetscCall(MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE));
152       }
153     }
154   }
155   PetscFunctionReturn(0);
156 }
157 
158 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
159 {
160   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
161   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
162 
163   PetscFunctionBegin;
164   for (i=0; i<m; i++) {
165     if (idxm[i] < 0) continue; /* negative row */
166     PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
167     if (idxm[i] >= rstart && idxm[i] < rend) {
168       row = idxm[i] - rstart;
169       for (j=0; j<n; j++) {
170         if (idxn[j] < 0) continue; /* negative column */
171         PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
172         PetscCall(MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j));
173       }
174     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
175   }
176   PetscFunctionReturn(0);
177 }
178 
179 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda)
180 {
181   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
182 
183   PetscFunctionBegin;
184   PetscCall(MatDenseGetLDA(a->A,lda));
185   PetscFunctionReturn(0);
186 }
187 
188 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A,PetscInt lda)
189 {
190   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
191   PetscBool      iscuda;
192 
193   PetscFunctionBegin;
194   if (!a->A) {
195     PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
196     PetscCall(PetscLayoutSetUp(A->rmap));
197     PetscCall(PetscLayoutSetUp(A->cmap));
198     PetscCall(MatCreate(PETSC_COMM_SELF,&a->A));
199     PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->A));
200     PetscCall(MatSetSizes(a->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N));
201     PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda));
202     PetscCall(MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE));
203   }
204   PetscCall(MatDenseSetLDA(a->A,lda));
205   PetscFunctionReturn(0);
206 }
207 
208 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar **array)
209 {
210   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
211 
212   PetscFunctionBegin;
213   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
214   PetscCall(MatDenseGetArray(a->A,array));
215   PetscFunctionReturn(0);
216 }
217 
218 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar **array)
219 {
220   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
221 
222   PetscFunctionBegin;
223   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
224   PetscCall(MatDenseGetArrayRead(a->A,array));
225   PetscFunctionReturn(0);
226 }
227 
228 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A,PetscScalar **array)
229 {
230   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
231 
232   PetscFunctionBegin;
233   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
234   PetscCall(MatDenseGetArrayWrite(a->A,array));
235   PetscFunctionReturn(0);
236 }
237 
238 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar *array)
239 {
240   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
241 
242   PetscFunctionBegin;
243   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
244   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
245   PetscCall(MatDensePlaceArray(a->A,array));
246   PetscFunctionReturn(0);
247 }
248 
249 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
250 {
251   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
252 
253   PetscFunctionBegin;
254   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
255   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
256   PetscCall(MatDenseResetArray(a->A));
257   PetscFunctionReturn(0);
258 }
259 
260 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A,const PetscScalar *array)
261 {
262   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
263 
264   PetscFunctionBegin;
265   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
266   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
267   PetscCall(MatDenseReplaceArray(a->A,array));
268   PetscFunctionReturn(0);
269 }
270 
271 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
272 {
273   Mat_MPIDense      *mat  = (Mat_MPIDense*)A->data,*newmatd;
274   PetscInt          lda,i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
275   const PetscInt    *irow,*icol;
276   const PetscScalar *v;
277   PetscScalar       *bv;
278   Mat               newmat;
279   IS                iscol_local;
280   MPI_Comm          comm_is,comm_mat;
281 
282   PetscFunctionBegin;
283   PetscCall(PetscObjectGetComm((PetscObject)A,&comm_mat));
284   PetscCall(PetscObjectGetComm((PetscObject)iscol,&comm_is));
285   PetscCheck(comm_mat == comm_is,PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator");
286 
287   PetscCall(ISAllGather(iscol,&iscol_local));
288   PetscCall(ISGetIndices(isrow,&irow));
289   PetscCall(ISGetIndices(iscol_local,&icol));
290   PetscCall(ISGetLocalSize(isrow,&nrows));
291   PetscCall(ISGetLocalSize(iscol,&ncols));
292   PetscCall(ISGetSize(iscol,&Ncols)); /* global number of columns, size of iscol_local */
293 
294   /* No parallel redistribution currently supported! Should really check each index set
295      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
296      original matrix! */
297 
298   PetscCall(MatGetLocalSize(A,&nlrows,&nlcols));
299   PetscCall(MatGetOwnershipRange(A,&rstart,&rend));
300 
301   /* Check submatrix call */
302   if (scall == MAT_REUSE_MATRIX) {
303     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
304     /* Really need to test rows and column sizes! */
305     newmat = *B;
306   } else {
307     /* Create and fill new matrix */
308     PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&newmat));
309     PetscCall(MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols));
310     PetscCall(MatSetType(newmat,((PetscObject)A)->type_name));
311     PetscCall(MatMPIDenseSetPreallocation(newmat,NULL));
312   }
313 
314   /* Now extract the data pointers and do the copy, column at a time */
315   newmatd = (Mat_MPIDense*)newmat->data;
316   PetscCall(MatDenseGetArray(newmatd->A,&bv));
317   PetscCall(MatDenseGetArrayRead(mat->A,&v));
318   PetscCall(MatDenseGetLDA(mat->A,&lda));
319   for (i=0; i<Ncols; i++) {
320     const PetscScalar *av = v + lda*icol[i];
321     for (j=0; j<nrows; j++) {
322       *bv++ = av[irow[j] - rstart];
323     }
324   }
325   PetscCall(MatDenseRestoreArrayRead(mat->A,&v));
326   PetscCall(MatDenseRestoreArray(newmatd->A,&bv));
327 
328   /* Assemble the matrices so that the correct flags are set */
329   PetscCall(MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY));
330   PetscCall(MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY));
331 
332   /* Free work space */
333   PetscCall(ISRestoreIndices(isrow,&irow));
334   PetscCall(ISRestoreIndices(iscol_local,&icol));
335   PetscCall(ISDestroy(&iscol_local));
336   *B   = newmat;
337   PetscFunctionReturn(0);
338 }
339 
340 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar **array)
341 {
342   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
343 
344   PetscFunctionBegin;
345   PetscCall(MatDenseRestoreArray(a->A,array));
346   PetscFunctionReturn(0);
347 }
348 
349 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar **array)
350 {
351   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
352 
353   PetscFunctionBegin;
354   PetscCall(MatDenseRestoreArrayRead(a->A,array));
355   PetscFunctionReturn(0);
356 }
357 
358 PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A,PetscScalar **array)
359 {
360   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
361 
362   PetscFunctionBegin;
363   PetscCall(MatDenseRestoreArrayWrite(a->A,array));
364   PetscFunctionReturn(0);
365 }
366 
367 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
368 {
369   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
370   PetscInt       nstash,reallocs;
371 
372   PetscFunctionBegin;
373   if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
374 
375   PetscCall(MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range));
376   PetscCall(MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs));
377   PetscCall(PetscInfo(mdn->A,"Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs));
378   PetscFunctionReturn(0);
379 }
380 
381 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
382 {
383   Mat_MPIDense   *mdn=(Mat_MPIDense*)mat->data;
384   PetscInt       i,*row,*col,flg,j,rstart,ncols;
385   PetscMPIInt    n;
386   PetscScalar    *val;
387 
388   PetscFunctionBegin;
389   if (!mdn->donotstash && !mat->nooffprocentries) {
390     /*  wait on receives */
391     while (1) {
392       PetscCall(MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg));
393       if (!flg) break;
394 
395       for (i=0; i<n;) {
396         /* Now identify the consecutive vals belonging to the same row */
397         for (j=i,rstart=row[j]; j<n; j++) {
398           if (row[j] != rstart) break;
399         }
400         if (j < n) ncols = j-i;
401         else       ncols = n-i;
402         /* Now assemble all these values with a single function call */
403         PetscCall(MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode));
404         i    = j;
405       }
406     }
407     PetscCall(MatStashScatterEnd_Private(&mat->stash));
408   }
409 
410   PetscCall(MatAssemblyBegin(mdn->A,mode));
411   PetscCall(MatAssemblyEnd(mdn->A,mode));
412   PetscFunctionReturn(0);
413 }
414 
415 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
416 {
417   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
418 
419   PetscFunctionBegin;
420   PetscCall(MatZeroEntries(l->A));
421   PetscFunctionReturn(0);
422 }
423 
424 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt n,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
425 {
426   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
427   PetscInt          i,len,*lrows;
428 
429   PetscFunctionBegin;
430   /* get locally owned rows */
431   PetscCall(PetscLayoutMapLocal(A->rmap,n,rows,&len,&lrows,NULL));
432   /* fix right hand side if needed */
433   if (x && b) {
434     const PetscScalar *xx;
435     PetscScalar       *bb;
436 
437     PetscCall(VecGetArrayRead(x, &xx));
438     PetscCall(VecGetArrayWrite(b, &bb));
439     for (i=0;i<len;++i) bb[lrows[i]] = diag*xx[lrows[i]];
440     PetscCall(VecRestoreArrayRead(x, &xx));
441     PetscCall(VecRestoreArrayWrite(b, &bb));
442   }
443   PetscCall(MatZeroRows(l->A,len,lrows,0.0,NULL,NULL));
444   if (diag != 0.0) {
445     Vec d;
446 
447     PetscCall(MatCreateVecs(A,NULL,&d));
448     PetscCall(VecSet(d,diag));
449     PetscCall(MatDiagonalSet(A,d,INSERT_VALUES));
450     PetscCall(VecDestroy(&d));
451   }
452   PetscCall(PetscFree(lrows));
453   PetscFunctionReturn(0);
454 }
455 
456 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec);
457 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec);
458 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec);
459 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec);
460 
461 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
462 {
463   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
464   const PetscScalar *ax;
465   PetscScalar       *ay;
466   PetscMemType      axmtype,aymtype;
467 
468   PetscFunctionBegin;
469   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
470   PetscCall(VecGetArrayReadAndMemType(xx,&ax,&axmtype));
471   PetscCall(VecGetArrayAndMemType(mdn->lvec,&ay,&aymtype));
472   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPI_REPLACE));
473   PetscCall(PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay,MPI_REPLACE));
474   PetscCall(VecRestoreArrayAndMemType(mdn->lvec,&ay));
475   PetscCall(VecRestoreArrayReadAndMemType(xx,&ax));
476   PetscCall((*mdn->A->ops->mult)(mdn->A,mdn->lvec,yy));
477   PetscFunctionReturn(0);
478 }
479 
480 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
481 {
482   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
483   const PetscScalar *ax;
484   PetscScalar       *ay;
485   PetscMemType      axmtype,aymtype;
486 
487   PetscFunctionBegin;
488   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
489   PetscCall(VecGetArrayReadAndMemType(xx,&ax,&axmtype));
490   PetscCall(VecGetArrayAndMemType(mdn->lvec,&ay,&aymtype));
491   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPI_REPLACE));
492   PetscCall(PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay,MPI_REPLACE));
493   PetscCall(VecRestoreArrayAndMemType(mdn->lvec,&ay));
494   PetscCall(VecRestoreArrayReadAndMemType(xx,&ax));
495   PetscCall((*mdn->A->ops->multadd)(mdn->A,mdn->lvec,yy,zz));
496   PetscFunctionReturn(0);
497 }
498 
499 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
500 {
501   Mat_MPIDense      *a = (Mat_MPIDense*)A->data;
502   const PetscScalar *ax;
503   PetscScalar       *ay;
504   PetscMemType      axmtype,aymtype;
505 
506   PetscFunctionBegin;
507   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
508   PetscCall(VecSet(yy,0.0));
509   PetscCall((*a->A->ops->multtranspose)(a->A,xx,a->lvec));
510   PetscCall(VecGetArrayReadAndMemType(a->lvec,&ax,&axmtype));
511   PetscCall(VecGetArrayAndMemType(yy,&ay,&aymtype));
512   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPIU_SUM));
513   PetscCall(PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM));
514   PetscCall(VecRestoreArrayReadAndMemType(a->lvec,&ax));
515   PetscCall(VecRestoreArrayAndMemType(yy,&ay));
516   PetscFunctionReturn(0);
517 }
518 
519 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
520 {
521   Mat_MPIDense      *a = (Mat_MPIDense*)A->data;
522   const PetscScalar *ax;
523   PetscScalar       *ay;
524   PetscMemType      axmtype,aymtype;
525 
526   PetscFunctionBegin;
527   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
528   PetscCall(VecCopy(yy,zz));
529   PetscCall((*a->A->ops->multtranspose)(a->A,xx,a->lvec));
530   PetscCall(VecGetArrayReadAndMemType(a->lvec,&ax,&axmtype));
531   PetscCall(VecGetArrayAndMemType(zz,&ay,&aymtype));
532   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPIU_SUM));
533   PetscCall(PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM));
534   PetscCall(VecRestoreArrayReadAndMemType(a->lvec,&ax));
535   PetscCall(VecRestoreArrayAndMemType(zz,&ay));
536   PetscFunctionReturn(0);
537 }
538 
539 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
540 {
541   Mat_MPIDense      *a    = (Mat_MPIDense*)A->data;
542   PetscInt          lda,len,i,n,m = A->rmap->n,radd;
543   PetscScalar       *x,zero = 0.0;
544   const PetscScalar *av;
545 
546   PetscFunctionBegin;
547   PetscCall(VecSet(v,zero));
548   PetscCall(VecGetArray(v,&x));
549   PetscCall(VecGetSize(v,&n));
550   PetscCheck(n == A->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
551   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
552   radd = A->rmap->rstart*m;
553   PetscCall(MatDenseGetArrayRead(a->A,&av));
554   PetscCall(MatDenseGetLDA(a->A,&lda));
555   for (i=0; i<len; i++) {
556     x[i] = av[radd + i*lda + i];
557   }
558   PetscCall(MatDenseRestoreArrayRead(a->A,&av));
559   PetscCall(VecRestoreArray(v,&x));
560   PetscFunctionReturn(0);
561 }
562 
563 PetscErrorCode MatDestroy_MPIDense(Mat mat)
564 {
565   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
566 
567   PetscFunctionBegin;
568 #if defined(PETSC_USE_LOG)
569   PetscLogObjectState((PetscObject)mat,"Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT,mat->rmap->N,mat->cmap->N);
570 #endif
571   PetscCall(MatStashDestroy_Private(&mat->stash));
572   PetscCheck(!mdn->vecinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
573   PetscCheck(!mdn->matinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
574   PetscCall(MatDestroy(&mdn->A));
575   PetscCall(VecDestroy(&mdn->lvec));
576   PetscCall(PetscSFDestroy(&mdn->Mvctx));
577   PetscCall(VecDestroy(&mdn->cvec));
578   PetscCall(MatDestroy(&mdn->cmat));
579 
580   PetscCall(PetscFree(mat->data));
581   PetscCall(PetscObjectChangeTypeName((PetscObject)mat,NULL));
582 
583   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL));
584   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",NULL));
585   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL));
586   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL));
587   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL));
588   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL));
589   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",NULL));
590   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",NULL));
591   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL));
592   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL));
593   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",NULL));
594   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",NULL));
595   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpiaij_C",NULL));
596 #if defined(PETSC_HAVE_ELEMENTAL)
597   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL));
598 #endif
599 #if defined(PETSC_HAVE_SCALAPACK)
600   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",NULL));
601 #endif
602   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL));
603   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",NULL));
604   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",NULL));
605 #if defined (PETSC_HAVE_CUDA)
606   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",NULL));
607   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",NULL));
608   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpidensecuda_C",NULL));
609   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidensecuda_mpidense_C",NULL));
610   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",NULL));
611   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",NULL));
612   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",NULL));
613   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",NULL));
614   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseCUDAGetArray_C",NULL));
615   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseCUDAGetArrayRead_C",NULL));
616   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseCUDAGetArrayWrite_C",NULL));
617   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseCUDARestoreArray_C",NULL));
618   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseCUDARestoreArrayRead_C",NULL));
619   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseCUDARestoreArrayWrite_C",NULL));
620   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseCUDAPlaceArray_C",NULL));
621   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseCUDAResetArray_C",NULL));
622   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseCUDAReplaceArray_C",NULL));
623 #endif
624   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL));
625   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL));
626   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",NULL));
627   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",NULL));
628   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",NULL));
629   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",NULL));
630   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",NULL));
631   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",NULL));
632   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",NULL));
633   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",NULL));
634 
635   PetscCall(PetscObjectCompose((PetscObject)mat,"DiagonalBlock",NULL));
636   PetscFunctionReturn(0);
637 }
638 
639 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat,PetscViewer);
640 
641 #include <petscdraw.h>
642 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
643 {
644   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
645   PetscMPIInt       rank;
646   PetscViewerType   vtype;
647   PetscBool         iascii,isdraw;
648   PetscViewer       sviewer;
649   PetscViewerFormat format;
650 
651   PetscFunctionBegin;
652   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank));
653   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
654   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw));
655   if (iascii) {
656     PetscCall(PetscViewerGetType(viewer,&vtype));
657     PetscCall(PetscViewerGetFormat(viewer,&format));
658     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
659       MatInfo info;
660       PetscCall(MatGetInfo(mat,MAT_LOCAL,&info));
661       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
662       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory));
663       PetscCall(PetscViewerFlush(viewer));
664       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
665       if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx,viewer));
666       PetscFunctionReturn(0);
667     } else if (format == PETSC_VIEWER_ASCII_INFO) {
668       PetscFunctionReturn(0);
669     }
670   } else if (isdraw) {
671     PetscDraw draw;
672     PetscBool isnull;
673 
674     PetscCall(PetscViewerDrawGetDraw(viewer,0,&draw));
675     PetscCall(PetscDrawIsNull(draw,&isnull));
676     if (isnull) PetscFunctionReturn(0);
677   }
678 
679   {
680     /* assemble the entire matrix onto first processor. */
681     Mat         A;
682     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
683     PetscInt    *cols;
684     PetscScalar *vals;
685 
686     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat),&A));
687     if (rank == 0) {
688       PetscCall(MatSetSizes(A,M,N,M,N));
689     } else {
690       PetscCall(MatSetSizes(A,0,0,M,N));
691     }
692     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
693     PetscCall(MatSetType(A,MATMPIDENSE));
694     PetscCall(MatMPIDenseSetPreallocation(A,NULL));
695     PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)A));
696 
697     /* Copy the matrix ... This isn't the most efficient means,
698        but it's quick for now */
699     A->insertmode = INSERT_VALUES;
700 
701     row = mat->rmap->rstart;
702     m   = mdn->A->rmap->n;
703     for (i=0; i<m; i++) {
704       PetscCall(MatGetRow_MPIDense(mat,row,&nz,&cols,&vals));
705       PetscCall(MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES));
706       PetscCall(MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals));
707       row++;
708     }
709 
710     PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY));
711     PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY));
712     PetscCall(PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer));
713     if (rank == 0) {
714       PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name));
715       PetscCall(MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer));
716     }
717     PetscCall(PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer));
718     PetscCall(PetscViewerFlush(viewer));
719     PetscCall(MatDestroy(&A));
720   }
721   PetscFunctionReturn(0);
722 }
723 
724 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
725 {
726   PetscBool      iascii,isbinary,isdraw,issocket;
727 
728   PetscFunctionBegin;
729   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
730   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
731   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket));
732   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw));
733 
734   if (iascii || issocket || isdraw) {
735     PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat,viewer));
736   } else if (isbinary) {
737     PetscCall(MatView_Dense_Binary(mat,viewer));
738   }
739   PetscFunctionReturn(0);
740 }
741 
742 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
743 {
744   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
745   Mat            mdn  = mat->A;
746   PetscLogDouble isend[5],irecv[5];
747 
748   PetscFunctionBegin;
749   info->block_size = 1.0;
750 
751   PetscCall(MatGetInfo(mdn,MAT_LOCAL,info));
752 
753   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
754   isend[3] = info->memory;  isend[4] = info->mallocs;
755   if (flag == MAT_LOCAL) {
756     info->nz_used      = isend[0];
757     info->nz_allocated = isend[1];
758     info->nz_unneeded  = isend[2];
759     info->memory       = isend[3];
760     info->mallocs      = isend[4];
761   } else if (flag == MAT_GLOBAL_MAX) {
762     PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)A)));
763 
764     info->nz_used      = irecv[0];
765     info->nz_allocated = irecv[1];
766     info->nz_unneeded  = irecv[2];
767     info->memory       = irecv[3];
768     info->mallocs      = irecv[4];
769   } else if (flag == MAT_GLOBAL_SUM) {
770     PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)A)));
771 
772     info->nz_used      = irecv[0];
773     info->nz_allocated = irecv[1];
774     info->nz_unneeded  = irecv[2];
775     info->memory       = irecv[3];
776     info->mallocs      = irecv[4];
777   }
778   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
779   info->fill_ratio_needed = 0;
780   info->factor_mallocs    = 0;
781   PetscFunctionReturn(0);
782 }
783 
784 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
785 {
786   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
787 
788   PetscFunctionBegin;
789   switch (op) {
790   case MAT_NEW_NONZERO_LOCATIONS:
791   case MAT_NEW_NONZERO_LOCATION_ERR:
792   case MAT_NEW_NONZERO_ALLOCATION_ERR:
793     MatCheckPreallocated(A,1);
794     PetscCall(MatSetOption(a->A,op,flg));
795     break;
796   case MAT_ROW_ORIENTED:
797     MatCheckPreallocated(A,1);
798     a->roworiented = flg;
799     PetscCall(MatSetOption(a->A,op,flg));
800     break;
801   case MAT_FORCE_DIAGONAL_ENTRIES:
802   case MAT_KEEP_NONZERO_PATTERN:
803   case MAT_USE_HASH_TABLE:
804   case MAT_SORTED_FULL:
805     PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op]));
806     break;
807   case MAT_IGNORE_OFF_PROC_ENTRIES:
808     a->donotstash = flg;
809     break;
810   case MAT_SYMMETRIC:
811   case MAT_STRUCTURALLY_SYMMETRIC:
812   case MAT_HERMITIAN:
813   case MAT_SYMMETRY_ETERNAL:
814   case MAT_IGNORE_LOWER_TRIANGULAR:
815   case MAT_IGNORE_ZERO_ENTRIES:
816     PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op]));
817     break;
818   default:
819     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
820   }
821   PetscFunctionReturn(0);
822 }
823 
824 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
825 {
826   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
827   const PetscScalar *l;
828   PetscScalar       x,*v,*vv,*r;
829   PetscInt          i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n,lda;
830 
831   PetscFunctionBegin;
832   PetscCall(MatDenseGetArray(mdn->A,&vv));
833   PetscCall(MatDenseGetLDA(mdn->A,&lda));
834   PetscCall(MatGetLocalSize(A,&s2,&s3));
835   if (ll) {
836     PetscCall(VecGetLocalSize(ll,&s2a));
837     PetscCheck(s2a == s2,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2);
838     PetscCall(VecGetArrayRead(ll,&l));
839     for (i=0; i<m; i++) {
840       x = l[i];
841       v = vv + i;
842       for (j=0; j<n; j++) { (*v) *= x; v+= lda;}
843     }
844     PetscCall(VecRestoreArrayRead(ll,&l));
845     PetscCall(PetscLogFlops(1.0*n*m));
846   }
847   if (rr) {
848     const PetscScalar *ar;
849 
850     PetscCall(VecGetLocalSize(rr,&s3a));
851     PetscCheck(s3a == s3,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3);
852     PetscCall(VecGetArrayRead(rr,&ar));
853     if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
854     PetscCall(VecGetArray(mdn->lvec,&r));
855     PetscCall(PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ar,r,MPI_REPLACE));
856     PetscCall(PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ar,r,MPI_REPLACE));
857     PetscCall(VecRestoreArrayRead(rr,&ar));
858     for (i=0; i<n; i++) {
859       x = r[i];
860       v = vv + i*lda;
861       for (j=0; j<m; j++) (*v++) *= x;
862     }
863     PetscCall(VecRestoreArray(mdn->lvec,&r));
864     PetscCall(PetscLogFlops(1.0*n*m));
865   }
866   PetscCall(MatDenseRestoreArray(mdn->A,&vv));
867   PetscFunctionReturn(0);
868 }
869 
870 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
871 {
872   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
873   PetscInt          i,j;
874   PetscMPIInt       size;
875   PetscReal         sum = 0.0;
876   const PetscScalar *av,*v;
877 
878   PetscFunctionBegin;
879   PetscCall(MatDenseGetArrayRead(mdn->A,&av));
880   v    = av;
881   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
882   if (size == 1) {
883     PetscCall(MatNorm(mdn->A,type,nrm));
884   } else {
885     if (type == NORM_FROBENIUS) {
886       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
887         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
888       }
889       PetscCall(MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A)));
890       *nrm = PetscSqrtReal(*nrm);
891       PetscCall(PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n));
892     } else if (type == NORM_1) {
893       PetscReal *tmp,*tmp2;
894       PetscCall(PetscCalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2));
895       *nrm = 0.0;
896       v    = av;
897       for (j=0; j<mdn->A->cmap->n; j++) {
898         for (i=0; i<mdn->A->rmap->n; i++) {
899           tmp[j] += PetscAbsScalar(*v);  v++;
900         }
901       }
902       PetscCall(MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A)));
903       for (j=0; j<A->cmap->N; j++) {
904         if (tmp2[j] > *nrm) *nrm = tmp2[j];
905       }
906       PetscCall(PetscFree2(tmp,tmp2));
907       PetscCall(PetscLogFlops(A->cmap->n*A->rmap->n));
908     } else if (type == NORM_INFINITY) { /* max row norm */
909       PetscReal ntemp;
910       PetscCall(MatNorm(mdn->A,type,&ntemp));
911       PetscCall(MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A)));
912     } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
913   }
914   PetscCall(MatDenseRestoreArrayRead(mdn->A,&av));
915   PetscFunctionReturn(0);
916 }
917 
918 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
919 {
920   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
921   Mat            B;
922   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
923   PetscInt       j,i,lda;
924   PetscScalar    *v;
925 
926   PetscFunctionBegin;
927   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
928     PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B));
929     PetscCall(MatSetSizes(B,A->cmap->n,A->rmap->n,N,M));
930     PetscCall(MatSetType(B,((PetscObject)A)->type_name));
931     PetscCall(MatMPIDenseSetPreallocation(B,NULL));
932   } else B = *matout;
933 
934   m    = a->A->rmap->n; n = a->A->cmap->n;
935   PetscCall(MatDenseGetArrayRead(a->A,(const PetscScalar**)&v));
936   PetscCall(MatDenseGetLDA(a->A,&lda));
937   PetscCall(PetscMalloc1(m,&rwork));
938   for (i=0; i<m; i++) rwork[i] = rstart + i;
939   for (j=0; j<n; j++) {
940     PetscCall(MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES));
941     v   += lda;
942   }
943   PetscCall(MatDenseRestoreArrayRead(a->A,(const PetscScalar**)&v));
944   PetscCall(PetscFree(rwork));
945   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
946   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
947   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
948     *matout = B;
949   } else {
950     PetscCall(MatHeaderMerge(A,&B));
951   }
952   PetscFunctionReturn(0);
953 }
954 
955 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
956 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
957 
958 PetscErrorCode MatSetUp_MPIDense(Mat A)
959 {
960   PetscFunctionBegin;
961   PetscCall(PetscLayoutSetUp(A->rmap));
962   PetscCall(PetscLayoutSetUp(A->cmap));
963   if (!A->preallocated) {
964     PetscCall(MatMPIDenseSetPreallocation(A,NULL));
965   }
966   PetscFunctionReturn(0);
967 }
968 
969 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
970 {
971   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
972 
973   PetscFunctionBegin;
974   PetscCall(MatAXPY(A->A,alpha,B->A,str));
975   PetscFunctionReturn(0);
976 }
977 
978 PetscErrorCode MatConjugate_MPIDense(Mat mat)
979 {
980   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
981 
982   PetscFunctionBegin;
983   PetscCall(MatConjugate(a->A));
984   PetscFunctionReturn(0);
985 }
986 
987 PetscErrorCode MatRealPart_MPIDense(Mat A)
988 {
989   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
990 
991   PetscFunctionBegin;
992   PetscCall(MatRealPart(a->A));
993   PetscFunctionReturn(0);
994 }
995 
996 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
997 {
998   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
999 
1000   PetscFunctionBegin;
1001   PetscCall(MatImaginaryPart(a->A));
1002   PetscFunctionReturn(0);
1003 }
1004 
1005 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col)
1006 {
1007   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
1008 
1009   PetscFunctionBegin;
1010   PetscCheck(a->A,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing local matrix");
1011   PetscCheck(a->A->ops->getcolumnvector,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing get column operation");
1012   PetscCall((*a->A->ops->getcolumnvector)(a->A,v,col));
1013   PetscFunctionReturn(0);
1014 }
1015 
1016 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat,PetscInt,PetscReal*);
1017 
1018 PetscErrorCode MatGetColumnReductions_MPIDense(Mat A,PetscInt type,PetscReal *reductions)
1019 {
1020   PetscInt       i,m,n;
1021   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
1022   PetscReal      *work;
1023 
1024   PetscFunctionBegin;
1025   PetscCall(MatGetSize(A,&m,&n));
1026   PetscCall(PetscMalloc1(n,&work));
1027   if (type == REDUCTION_MEAN_REALPART) {
1028     PetscCall(MatGetColumnReductions_SeqDense(a->A,(PetscInt)REDUCTION_SUM_REALPART,work));
1029   } else if (type == REDUCTION_MEAN_IMAGINARYPART) {
1030     PetscCall(MatGetColumnReductions_SeqDense(a->A,(PetscInt)REDUCTION_SUM_IMAGINARYPART,work));
1031   } else {
1032     PetscCall(MatGetColumnReductions_SeqDense(a->A,type,work));
1033   }
1034   if (type == NORM_2) {
1035     for (i=0; i<n; i++) work[i] *= work[i];
1036   }
1037   if (type == NORM_INFINITY) {
1038     PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_MAX,A->hdr.comm));
1039   } else {
1040     PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_SUM,A->hdr.comm));
1041   }
1042   PetscCall(PetscFree(work));
1043   if (type == NORM_2) {
1044     for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
1045   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
1046     for (i=0; i<n; i++) reductions[i] /= m;
1047   }
1048   PetscFunctionReturn(0);
1049 }
1050 
1051 #if defined(PETSC_HAVE_CUDA)
1052 PetscErrorCode MatShift_MPIDenseCUDA(Mat A,PetscScalar alpha)
1053 {
1054   PetscScalar *da;
1055   PetscInt    lda;
1056 
1057   PetscFunctionBegin;
1058   PetscCall(MatDenseCUDAGetArray(A,&da));
1059   PetscCall(MatDenseGetLDA(A,&lda));
1060   PetscCall(PetscInfo(A,"Performing Shift on backend\n"));
1061   PetscCall(MatShift_DenseCUDA_Private(da,alpha,lda,A->rmap->rstart,A->rmap->rend,A->cmap->N));
1062   PetscCall(MatDenseCUDARestoreArray(A,&da));
1063   PetscFunctionReturn(0);
1064 }
1065 
1066 static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1067 {
1068   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1069   PetscInt       lda;
1070 
1071   PetscFunctionBegin;
1072   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1073   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1074   if (!a->cvec) {
1075     PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec));
1076     PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec));
1077   }
1078   a->vecinuse = col + 1;
1079   PetscCall(MatDenseGetLDA(a->A,&lda));
1080   PetscCall(MatDenseCUDAGetArray(a->A,(PetscScalar**)&a->ptrinuse));
1081   PetscCall(VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda));
1082   *v   = a->cvec;
1083   PetscFunctionReturn(0);
1084 }
1085 
1086 static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1087 {
1088   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1089 
1090   PetscFunctionBegin;
1091   PetscCheck(a->vecinuse,PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1092   PetscCheck(a->cvec,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1093   a->vecinuse = 0;
1094   PetscCall(MatDenseCUDARestoreArray(a->A,(PetscScalar**)&a->ptrinuse));
1095   PetscCall(VecCUDAResetArray(a->cvec));
1096   *v   = NULL;
1097   PetscFunctionReturn(0);
1098 }
1099 
1100 static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1101 {
1102   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1103   PetscInt       lda;
1104 
1105   PetscFunctionBegin;
1106   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1107   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1108   if (!a->cvec) {
1109     PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec));
1110     PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec));
1111   }
1112   a->vecinuse = col + 1;
1113   PetscCall(MatDenseGetLDA(a->A,&lda));
1114   PetscCall(MatDenseCUDAGetArrayRead(a->A,&a->ptrinuse));
1115   PetscCall(VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda));
1116   PetscCall(VecLockReadPush(a->cvec));
1117   *v   = a->cvec;
1118   PetscFunctionReturn(0);
1119 }
1120 
1121 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1122 {
1123   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1124 
1125   PetscFunctionBegin;
1126   PetscCheck(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1127   PetscCheck(a->cvec,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1128   a->vecinuse = 0;
1129   PetscCall(MatDenseCUDARestoreArrayRead(a->A,&a->ptrinuse));
1130   PetscCall(VecLockReadPop(a->cvec));
1131   PetscCall(VecCUDAResetArray(a->cvec));
1132   *v   = NULL;
1133   PetscFunctionReturn(0);
1134 }
1135 
1136 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1137 {
1138   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1139   PetscInt       lda;
1140 
1141   PetscFunctionBegin;
1142   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1143   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1144   if (!a->cvec) {
1145     PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec));
1146     PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec));
1147   }
1148   a->vecinuse = col + 1;
1149   PetscCall(MatDenseGetLDA(a->A,&lda));
1150   PetscCall(MatDenseCUDAGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse));
1151   PetscCall(VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda));
1152   *v   = a->cvec;
1153   PetscFunctionReturn(0);
1154 }
1155 
1156 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1157 {
1158   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1159 
1160   PetscFunctionBegin;
1161   PetscCheck(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1162   PetscCheck(a->cvec,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1163   a->vecinuse = 0;
1164   PetscCall(MatDenseCUDARestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse));
1165   PetscCall(VecCUDAResetArray(a->cvec));
1166   *v   = NULL;
1167   PetscFunctionReturn(0);
1168 }
1169 
1170 static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1171 {
1172   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1173 
1174   PetscFunctionBegin;
1175   PetscCheck(!l->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1176   PetscCheck(!l->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1177   PetscCall(MatDenseCUDAPlaceArray(l->A,a));
1178   PetscFunctionReturn(0);
1179 }
1180 
1181 static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A)
1182 {
1183   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1184 
1185   PetscFunctionBegin;
1186   PetscCheck(!l->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1187   PetscCheck(!l->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1188   PetscCall(MatDenseCUDAResetArray(l->A));
1189   PetscFunctionReturn(0);
1190 }
1191 
1192 static PetscErrorCode MatDenseCUDAReplaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1193 {
1194   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1195 
1196   PetscFunctionBegin;
1197   PetscCheck(!l->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1198   PetscCheck(!l->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1199   PetscCall(MatDenseCUDAReplaceArray(l->A,a));
1200   PetscFunctionReturn(0);
1201 }
1202 
1203 static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1204 {
1205   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1206 
1207   PetscFunctionBegin;
1208   PetscCall(MatDenseCUDAGetArrayWrite(l->A,a));
1209   PetscFunctionReturn(0);
1210 }
1211 
1212 static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1213 {
1214   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1215 
1216   PetscFunctionBegin;
1217   PetscCall(MatDenseCUDARestoreArrayWrite(l->A,a));
1218   PetscFunctionReturn(0);
1219 }
1220 
1221 static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1222 {
1223   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1224 
1225   PetscFunctionBegin;
1226   PetscCall(MatDenseCUDAGetArrayRead(l->A,a));
1227   PetscFunctionReturn(0);
1228 }
1229 
1230 static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1231 {
1232   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1233 
1234   PetscFunctionBegin;
1235   PetscCall(MatDenseCUDARestoreArrayRead(l->A,a));
1236   PetscFunctionReturn(0);
1237 }
1238 
1239 static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1240 {
1241   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1242 
1243   PetscFunctionBegin;
1244   PetscCall(MatDenseCUDAGetArray(l->A,a));
1245   PetscFunctionReturn(0);
1246 }
1247 
1248 static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1249 {
1250   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1251 
1252   PetscFunctionBegin;
1253   PetscCall(MatDenseCUDARestoreArray(l->A,a));
1254   PetscFunctionReturn(0);
1255 }
1256 
1257 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat,PetscInt,Vec*);
1258 static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat,PetscInt,Vec*);
1259 static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat,PetscInt,Vec*);
1260 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat,PetscInt,Vec*);
1261 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat,PetscInt,Vec*);
1262 static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat,PetscInt,Vec*);
1263 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat,Mat*);
1264 
1265 static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat,PetscBool bind)
1266 {
1267   Mat_MPIDense   *d = (Mat_MPIDense*)mat->data;
1268 
1269   PetscFunctionBegin;
1270   PetscCheck(!d->vecinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1271   PetscCheck(!d->matinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1272   if (d->A) {
1273     PetscCall(MatBindToCPU(d->A,bind));
1274   }
1275   mat->boundtocpu = bind;
1276   if (!bind) {
1277     PetscBool iscuda;
1278 
1279     PetscCall(PetscObjectTypeCompare((PetscObject)d->cvec,VECMPICUDA,&iscuda));
1280     if (!iscuda) {
1281       PetscCall(VecDestroy(&d->cvec));
1282     }
1283     PetscCall(PetscObjectTypeCompare((PetscObject)d->cmat,MATMPIDENSECUDA,&iscuda));
1284     if (!iscuda) {
1285       PetscCall(MatDestroy(&d->cmat));
1286     }
1287     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDenseCUDA));
1288     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDenseCUDA));
1289     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDenseCUDA));
1290     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDenseCUDA));
1291     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDenseCUDA));
1292     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDenseCUDA));
1293     mat->ops->shift                   = MatShift_MPIDenseCUDA;
1294   } else {
1295     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense));
1296     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense));
1297     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense));
1298     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense));
1299     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense));
1300     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense));
1301     mat->ops->shift                   = MatShift_MPIDense;
1302   }
1303   if (d->cmat) {
1304     PetscCall(MatBindToCPU(d->cmat,bind));
1305   }
1306   PetscFunctionReturn(0);
1307 }
1308 
1309 PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data)
1310 {
1311   Mat_MPIDense   *d = (Mat_MPIDense*)A->data;
1312   PetscBool      iscuda;
1313 
1314   PetscFunctionBegin;
1315   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1316   PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda));
1317   if (!iscuda) PetscFunctionReturn(0);
1318   PetscCall(PetscLayoutSetUp(A->rmap));
1319   PetscCall(PetscLayoutSetUp(A->cmap));
1320   if (!d->A) {
1321     PetscCall(MatCreate(PETSC_COMM_SELF,&d->A));
1322     PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)d->A));
1323     PetscCall(MatSetSizes(d->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N));
1324   }
1325   PetscCall(MatSetType(d->A,MATSEQDENSECUDA));
1326   PetscCall(MatSeqDenseCUDASetPreallocation(d->A,d_data));
1327   A->preallocated = PETSC_TRUE;
1328   A->assembled = PETSC_TRUE;
1329   PetscFunctionReturn(0);
1330 }
1331 #endif
1332 
1333 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1334 {
1335   Mat_MPIDense   *d = (Mat_MPIDense*)x->data;
1336 
1337   PetscFunctionBegin;
1338   PetscCall(MatSetRandom(d->A,rctx));
1339   PetscFunctionReturn(0);
1340 }
1341 
1342 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool  *missing,PetscInt *d)
1343 {
1344   PetscFunctionBegin;
1345   *missing = PETSC_FALSE;
1346   PetscFunctionReturn(0);
1347 }
1348 
1349 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat);
1350 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);
1351 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat);
1352 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);
1353 static PetscErrorCode MatEqual_MPIDense(Mat,Mat,PetscBool*);
1354 static PetscErrorCode MatLoad_MPIDense(Mat,PetscViewer);
1355 
1356 /* -------------------------------------------------------------------*/
1357 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1358                                         MatGetRow_MPIDense,
1359                                         MatRestoreRow_MPIDense,
1360                                         MatMult_MPIDense,
1361                                 /*  4*/ MatMultAdd_MPIDense,
1362                                         MatMultTranspose_MPIDense,
1363                                         MatMultTransposeAdd_MPIDense,
1364                                         NULL,
1365                                         NULL,
1366                                         NULL,
1367                                 /* 10*/ NULL,
1368                                         NULL,
1369                                         NULL,
1370                                         NULL,
1371                                         MatTranspose_MPIDense,
1372                                 /* 15*/ MatGetInfo_MPIDense,
1373                                         MatEqual_MPIDense,
1374                                         MatGetDiagonal_MPIDense,
1375                                         MatDiagonalScale_MPIDense,
1376                                         MatNorm_MPIDense,
1377                                 /* 20*/ MatAssemblyBegin_MPIDense,
1378                                         MatAssemblyEnd_MPIDense,
1379                                         MatSetOption_MPIDense,
1380                                         MatZeroEntries_MPIDense,
1381                                 /* 24*/ MatZeroRows_MPIDense,
1382                                         NULL,
1383                                         NULL,
1384                                         NULL,
1385                                         NULL,
1386                                 /* 29*/ MatSetUp_MPIDense,
1387                                         NULL,
1388                                         NULL,
1389                                         MatGetDiagonalBlock_MPIDense,
1390                                         NULL,
1391                                 /* 34*/ MatDuplicate_MPIDense,
1392                                         NULL,
1393                                         NULL,
1394                                         NULL,
1395                                         NULL,
1396                                 /* 39*/ MatAXPY_MPIDense,
1397                                         MatCreateSubMatrices_MPIDense,
1398                                         NULL,
1399                                         MatGetValues_MPIDense,
1400                                         MatCopy_MPIDense,
1401                                 /* 44*/ NULL,
1402                                         MatScale_MPIDense,
1403                                         MatShift_MPIDense,
1404                                         NULL,
1405                                         NULL,
1406                                 /* 49*/ MatSetRandom_MPIDense,
1407                                         NULL,
1408                                         NULL,
1409                                         NULL,
1410                                         NULL,
1411                                 /* 54*/ NULL,
1412                                         NULL,
1413                                         NULL,
1414                                         NULL,
1415                                         NULL,
1416                                 /* 59*/ MatCreateSubMatrix_MPIDense,
1417                                         MatDestroy_MPIDense,
1418                                         MatView_MPIDense,
1419                                         NULL,
1420                                         NULL,
1421                                 /* 64*/ NULL,
1422                                         NULL,
1423                                         NULL,
1424                                         NULL,
1425                                         NULL,
1426                                 /* 69*/ NULL,
1427                                         NULL,
1428                                         NULL,
1429                                         NULL,
1430                                         NULL,
1431                                 /* 74*/ NULL,
1432                                         NULL,
1433                                         NULL,
1434                                         NULL,
1435                                         NULL,
1436                                 /* 79*/ NULL,
1437                                         NULL,
1438                                         NULL,
1439                                         NULL,
1440                                 /* 83*/ MatLoad_MPIDense,
1441                                         NULL,
1442                                         NULL,
1443                                         NULL,
1444                                         NULL,
1445                                         NULL,
1446                                 /* 89*/ NULL,
1447                                         NULL,
1448                                         NULL,
1449                                         NULL,
1450                                         NULL,
1451                                 /* 94*/ NULL,
1452                                         NULL,
1453                                         MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1454                                         MatMatTransposeMultNumeric_MPIDense_MPIDense,
1455                                         NULL,
1456                                 /* 99*/ MatProductSetFromOptions_MPIDense,
1457                                         NULL,
1458                                         NULL,
1459                                         MatConjugate_MPIDense,
1460                                         NULL,
1461                                 /*104*/ NULL,
1462                                         MatRealPart_MPIDense,
1463                                         MatImaginaryPart_MPIDense,
1464                                         NULL,
1465                                         NULL,
1466                                 /*109*/ NULL,
1467                                         NULL,
1468                                         NULL,
1469                                         MatGetColumnVector_MPIDense,
1470                                         MatMissingDiagonal_MPIDense,
1471                                 /*114*/ NULL,
1472                                         NULL,
1473                                         NULL,
1474                                         NULL,
1475                                         NULL,
1476                                 /*119*/ NULL,
1477                                         NULL,
1478                                         NULL,
1479                                         NULL,
1480                                         NULL,
1481                                 /*124*/ NULL,
1482                                         MatGetColumnReductions_MPIDense,
1483                                         NULL,
1484                                         NULL,
1485                                         NULL,
1486                                 /*129*/ NULL,
1487                                         NULL,
1488                                         MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1489                                         MatTransposeMatMultNumeric_MPIDense_MPIDense,
1490                                         NULL,
1491                                 /*134*/ NULL,
1492                                         NULL,
1493                                         NULL,
1494                                         NULL,
1495                                         NULL,
1496                                 /*139*/ NULL,
1497                                         NULL,
1498                                         NULL,
1499                                         NULL,
1500                                         NULL,
1501                                         MatCreateMPIMatConcatenateSeqMat_MPIDense,
1502                                 /*145*/ NULL,
1503                                         NULL,
1504                                         NULL
1505 };
1506 
1507 PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1508 {
1509   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
1510   PetscBool      iscuda;
1511 
1512   PetscFunctionBegin;
1513   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1514   PetscCall(PetscLayoutSetUp(mat->rmap));
1515   PetscCall(PetscLayoutSetUp(mat->cmap));
1516   if (!a->A) {
1517     PetscCall(MatCreate(PETSC_COMM_SELF,&a->A));
1518     PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A));
1519     PetscCall(MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N));
1520   }
1521   PetscCall(PetscObjectTypeCompare((PetscObject)mat,MATMPIDENSECUDA,&iscuda));
1522   PetscCall(MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE));
1523   PetscCall(MatSeqDenseSetPreallocation(a->A,data));
1524   mat->preallocated = PETSC_TRUE;
1525   mat->assembled = PETSC_TRUE;
1526   PetscFunctionReturn(0);
1527 }
1528 
1529 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1530 {
1531   Mat            B,C;
1532 
1533   PetscFunctionBegin;
1534   PetscCall(MatMPIAIJGetLocalMat(A,MAT_INITIAL_MATRIX,&C));
1535   PetscCall(MatConvert_SeqAIJ_SeqDense(C,MATSEQDENSE,MAT_INITIAL_MATRIX,&B));
1536   PetscCall(MatDestroy(&C));
1537   if (reuse == MAT_REUSE_MATRIX) {
1538     C = *newmat;
1539   } else C = NULL;
1540   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A),B,A->cmap->n,!C?MAT_INITIAL_MATRIX:MAT_REUSE_MATRIX,&C));
1541   PetscCall(MatDestroy(&B));
1542   if (reuse == MAT_INPLACE_MATRIX) {
1543     PetscCall(MatHeaderReplace(A,&C));
1544   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1545   PetscFunctionReturn(0);
1546 }
1547 
1548 PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1549 {
1550   Mat            B,C;
1551 
1552   PetscFunctionBegin;
1553   PetscCall(MatDenseGetLocalMatrix(A,&C));
1554   PetscCall(MatConvert_SeqDense_SeqAIJ(C,MATSEQAIJ,MAT_INITIAL_MATRIX,&B));
1555   if (reuse == MAT_REUSE_MATRIX) {
1556     C = *newmat;
1557   } else C = NULL;
1558   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A),B,A->cmap->n,!C?MAT_INITIAL_MATRIX:MAT_REUSE_MATRIX,&C));
1559   PetscCall(MatDestroy(&B));
1560   if (reuse == MAT_INPLACE_MATRIX) {
1561     PetscCall(MatHeaderReplace(A,&C));
1562   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1563   PetscFunctionReturn(0);
1564 }
1565 
1566 #if defined(PETSC_HAVE_ELEMENTAL)
1567 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1568 {
1569   Mat            mat_elemental;
1570   PetscScalar    *v;
1571   PetscInt       m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols;
1572 
1573   PetscFunctionBegin;
1574   if (reuse == MAT_REUSE_MATRIX) {
1575     mat_elemental = *newmat;
1576     PetscCall(MatZeroEntries(*newmat));
1577   } else {
1578     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental));
1579     PetscCall(MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N));
1580     PetscCall(MatSetType(mat_elemental,MATELEMENTAL));
1581     PetscCall(MatSetUp(mat_elemental));
1582     PetscCall(MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE));
1583   }
1584 
1585   PetscCall(PetscMalloc2(m,&rows,N,&cols));
1586   for (i=0; i<N; i++) cols[i] = i;
1587   for (i=0; i<m; i++) rows[i] = rstart + i;
1588 
1589   /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1590   PetscCall(MatDenseGetArray(A,&v));
1591   PetscCall(MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES));
1592   PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY));
1593   PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY));
1594   PetscCall(MatDenseRestoreArray(A,&v));
1595   PetscCall(PetscFree2(rows,cols));
1596 
1597   if (reuse == MAT_INPLACE_MATRIX) {
1598     PetscCall(MatHeaderReplace(A,&mat_elemental));
1599   } else {
1600     *newmat = mat_elemental;
1601   }
1602   PetscFunctionReturn(0);
1603 }
1604 #endif
1605 
1606 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals)
1607 {
1608   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1609 
1610   PetscFunctionBegin;
1611   PetscCall(MatDenseGetColumn(mat->A,col,vals));
1612   PetscFunctionReturn(0);
1613 }
1614 
1615 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals)
1616 {
1617   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1618 
1619   PetscFunctionBegin;
1620   PetscCall(MatDenseRestoreColumn(mat->A,vals));
1621   PetscFunctionReturn(0);
1622 }
1623 
1624 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
1625 {
1626   Mat_MPIDense   *mat;
1627   PetscInt       m,nloc,N;
1628 
1629   PetscFunctionBegin;
1630   PetscCall(MatGetSize(inmat,&m,&N));
1631   PetscCall(MatGetLocalSize(inmat,NULL,&nloc));
1632   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1633     PetscInt sum;
1634 
1635     if (n == PETSC_DECIDE) {
1636       PetscCall(PetscSplitOwnership(comm,&n,&N));
1637     }
1638     /* Check sum(n) = N */
1639     PetscCall(MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm));
1640     PetscCheck(sum == N,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT,sum,N);
1641 
1642     PetscCall(MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat));
1643     PetscCall(MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE));
1644   }
1645 
1646   /* numeric phase */
1647   mat = (Mat_MPIDense*)(*outmat)->data;
1648   PetscCall(MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN));
1649   PetscFunctionReturn(0);
1650 }
1651 
1652 #if defined(PETSC_HAVE_CUDA)
1653 PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M,MatType type,MatReuse reuse,Mat *newmat)
1654 {
1655   Mat            B;
1656   Mat_MPIDense   *m;
1657 
1658   PetscFunctionBegin;
1659   if (reuse == MAT_INITIAL_MATRIX) {
1660     PetscCall(MatDuplicate(M,MAT_COPY_VALUES,newmat));
1661   } else if (reuse == MAT_REUSE_MATRIX) {
1662     PetscCall(MatCopy(M,*newmat,SAME_NONZERO_PATTERN));
1663   }
1664 
1665   B    = *newmat;
1666   PetscCall(MatBindToCPU_MPIDenseCUDA(B,PETSC_TRUE));
1667   PetscCall(PetscFree(B->defaultvectype));
1668   PetscCall(PetscStrallocpy(VECSTANDARD,&B->defaultvectype));
1669   PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSE));
1670   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",NULL));
1671   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",NULL));
1672   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",NULL));
1673   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",NULL));
1674   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",NULL));
1675   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",NULL));
1676   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",NULL));
1677   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",NULL));
1678   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",NULL));
1679   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",NULL));
1680   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",NULL));
1681   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",NULL));
1682   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",NULL));
1683   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",NULL));
1684   m    = (Mat_MPIDense*)(B)->data;
1685   if (m->A) {
1686     PetscCall(MatConvert(m->A,MATSEQDENSE,MAT_INPLACE_MATRIX,&m->A));
1687   }
1688   B->ops->bindtocpu = NULL;
1689   B->offloadmask    = PETSC_OFFLOAD_CPU;
1690   PetscFunctionReturn(0);
1691 }
1692 
1693 PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M,MatType type,MatReuse reuse,Mat *newmat)
1694 {
1695   Mat            B;
1696   Mat_MPIDense   *m;
1697 
1698   PetscFunctionBegin;
1699   if (reuse == MAT_INITIAL_MATRIX) {
1700     PetscCall(MatDuplicate(M,MAT_COPY_VALUES,newmat));
1701   } else if (reuse == MAT_REUSE_MATRIX) {
1702     PetscCall(MatCopy(M,*newmat,SAME_NONZERO_PATTERN));
1703   }
1704 
1705   B    = *newmat;
1706   PetscCall(PetscFree(B->defaultvectype));
1707   PetscCall(PetscStrallocpy(VECCUDA,&B->defaultvectype));
1708   PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSECUDA));
1709   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",                    MatConvert_MPIDenseCUDA_MPIDense));
1710   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",        MatProductSetFromOptions_MPIAIJ_MPIDense));
1711   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",MatProductSetFromOptions_MPIAIJ_MPIDense));
1712   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",        MatProductSetFromOptions_MPIDense_MPIAIJ));
1713   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ));
1714   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",                                MatDenseCUDAGetArray_MPIDenseCUDA));
1715   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",                            MatDenseCUDAGetArrayRead_MPIDenseCUDA));
1716   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",                           MatDenseCUDAGetArrayWrite_MPIDenseCUDA));
1717   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",                            MatDenseCUDARestoreArray_MPIDenseCUDA));
1718   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",                        MatDenseCUDARestoreArrayRead_MPIDenseCUDA));
1719   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",                       MatDenseCUDARestoreArrayWrite_MPIDenseCUDA));
1720   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",                              MatDenseCUDAPlaceArray_MPIDenseCUDA));
1721   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",                              MatDenseCUDAResetArray_MPIDenseCUDA));
1722   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",                            MatDenseCUDAReplaceArray_MPIDenseCUDA));
1723   m    = (Mat_MPIDense*)(B->data);
1724   if (m->A) {
1725     PetscCall(MatConvert(m->A,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&m->A));
1726     B->offloadmask = PETSC_OFFLOAD_BOTH;
1727   } else {
1728     B->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
1729   }
1730   PetscCall(MatBindToCPU_MPIDenseCUDA(B,PETSC_FALSE));
1731 
1732   B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA;
1733   PetscFunctionReturn(0);
1734 }
1735 #endif
1736 
1737 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A,PetscInt col,Vec *v)
1738 {
1739   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1740   PetscInt       lda;
1741 
1742   PetscFunctionBegin;
1743   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1744   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1745   if (!a->cvec) {
1746     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec));
1747   }
1748   a->vecinuse = col + 1;
1749   PetscCall(MatDenseGetLDA(a->A,&lda));
1750   PetscCall(MatDenseGetArray(a->A,(PetscScalar**)&a->ptrinuse));
1751   PetscCall(VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda));
1752   *v   = a->cvec;
1753   PetscFunctionReturn(0);
1754 }
1755 
1756 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A,PetscInt col,Vec *v)
1757 {
1758   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1759 
1760   PetscFunctionBegin;
1761   PetscCheck(a->vecinuse,PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1762   PetscCheck(a->cvec,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1763   a->vecinuse = 0;
1764   PetscCall(MatDenseRestoreArray(a->A,(PetscScalar**)&a->ptrinuse));
1765   PetscCall(VecResetArray(a->cvec));
1766   *v   = NULL;
1767   PetscFunctionReturn(0);
1768 }
1769 
1770 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v)
1771 {
1772   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1773   PetscInt       lda;
1774 
1775   PetscFunctionBegin;
1776   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1777   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1778   if (!a->cvec) {
1779     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec));
1780   }
1781   a->vecinuse = col + 1;
1782   PetscCall(MatDenseGetLDA(a->A,&lda));
1783   PetscCall(MatDenseGetArrayRead(a->A,&a->ptrinuse));
1784   PetscCall(VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda));
1785   PetscCall(VecLockReadPush(a->cvec));
1786   *v   = a->cvec;
1787   PetscFunctionReturn(0);
1788 }
1789 
1790 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v)
1791 {
1792   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1793 
1794   PetscFunctionBegin;
1795   PetscCheck(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1796   PetscCheck(a->cvec,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1797   a->vecinuse = 0;
1798   PetscCall(MatDenseRestoreArrayRead(a->A,&a->ptrinuse));
1799   PetscCall(VecLockReadPop(a->cvec));
1800   PetscCall(VecResetArray(a->cvec));
1801   *v   = NULL;
1802   PetscFunctionReturn(0);
1803 }
1804 
1805 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v)
1806 {
1807   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1808   PetscInt       lda;
1809 
1810   PetscFunctionBegin;
1811   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1812   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1813   if (!a->cvec) {
1814     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec));
1815   }
1816   a->vecinuse = col + 1;
1817   PetscCall(MatDenseGetLDA(a->A,&lda));
1818   PetscCall(MatDenseGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse));
1819   PetscCall(VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda));
1820   *v   = a->cvec;
1821   PetscFunctionReturn(0);
1822 }
1823 
1824 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v)
1825 {
1826   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
1827 
1828   PetscFunctionBegin;
1829   PetscCheck(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1830   PetscCheck(a->cvec,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1831   a->vecinuse = 0;
1832   PetscCall(MatDenseRestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse));
1833   PetscCall(VecResetArray(a->cvec));
1834   *v   = NULL;
1835   PetscFunctionReturn(0);
1836 }
1837 
1838 PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A,PetscInt rbegin,PetscInt rend,PetscInt cbegin,PetscInt cend,Mat *v)
1839 {
1840   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
1841   Mat_MPIDense *c;
1842   MPI_Comm     comm;
1843   PetscInt     pbegin, pend;
1844 
1845   PetscFunctionBegin;
1846   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
1847   PetscCheck(!a->vecinuse,comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1848   PetscCheck(!a->matinuse,comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1849   pbegin = PetscMax(0,PetscMin(A->rmap->rend,rbegin)-A->rmap->rstart);
1850   pend = PetscMin(A->rmap->n,PetscMax(0,rend-A->rmap->rstart));
1851   if (!a->cmat) {
1852     PetscCall(MatCreate(comm,&a->cmat));
1853     PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->cmat));
1854     PetscCall(MatSetType(a->cmat,((PetscObject)A)->type_name));
1855     if (rend-rbegin==A->rmap->N) PetscCall(PetscLayoutReference(A->rmap,&a->cmat->rmap));
1856     else {
1857       PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap,pend-pbegin));
1858       PetscCall(PetscLayoutSetSize(a->cmat->rmap,rend-rbegin));
1859       PetscCall(PetscLayoutSetUp(a->cmat->rmap));
1860     }
1861     PetscCall(PetscLayoutSetSize(a->cmat->cmap,cend-cbegin));
1862     PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1863   } else {
1864     PetscBool same = (PetscBool)(rend-rbegin == a->cmat->rmap->N);
1865     if (same && a->cmat->rmap->N != A->rmap->N) {
1866       same = (PetscBool)(pend-pbegin == a->cmat->rmap->n);
1867       PetscCall(MPIU_Allreduce(MPI_IN_PLACE,&same,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A)));
1868     }
1869     if (!same) {
1870       PetscCall(PetscLayoutDestroy(&a->cmat->rmap));
1871       PetscCall(PetscLayoutCreate(comm,&a->cmat->rmap));
1872       PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap,pend-pbegin));
1873       PetscCall(PetscLayoutSetSize(a->cmat->rmap,rend-rbegin));
1874       PetscCall(PetscLayoutSetUp(a->cmat->rmap));
1875     }
1876     if (cend-cbegin != a->cmat->cmap->N) {
1877       PetscCall(PetscLayoutDestroy(&a->cmat->cmap));
1878       PetscCall(PetscLayoutCreate(comm,&a->cmat->cmap));
1879       PetscCall(PetscLayoutSetSize(a->cmat->cmap,cend-cbegin));
1880       PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1881     }
1882   }
1883   c = (Mat_MPIDense*)a->cmat->data;
1884   PetscCheck(!c->A,comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1885   PetscCall(MatDenseGetSubMatrix(a->A,pbegin,pend,cbegin,cend,&c->A));
1886   a->cmat->preallocated = PETSC_TRUE;
1887   a->cmat->assembled = PETSC_TRUE;
1888   a->matinuse = cbegin + 1;
1889   *v = a->cmat;
1890   PetscFunctionReturn(0);
1891 }
1892 
1893 PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A,Mat *v)
1894 {
1895   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
1896   Mat_MPIDense *c;
1897 
1898   PetscFunctionBegin;
1899   PetscCheck(a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetSubMatrix() first");
1900   PetscCheck(a->cmat,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal matrix");
1901   PetscCheck(*v == a->cmat,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Not the matrix obtained from MatDenseGetSubMatrix()");
1902   a->matinuse = 0;
1903   c = (Mat_MPIDense*)a->cmat->data;
1904   PetscCall(MatDenseRestoreSubMatrix(a->A,&c->A));
1905   *v = NULL;
1906   PetscFunctionReturn(0);
1907 }
1908 
1909 /*MC
1910    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1911 
1912    Options Database Keys:
1913 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions()
1914 
1915   Level: beginner
1916 
1917 .seealso: `MatCreateDense()`
1918 
1919 M*/
1920 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1921 {
1922   Mat_MPIDense   *a;
1923 
1924   PetscFunctionBegin;
1925   PetscCall(PetscNewLog(mat,&a));
1926   mat->data = (void*)a;
1927   PetscCall(PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps)));
1928 
1929   mat->insertmode = NOT_SET_VALUES;
1930 
1931   /* build cache for off array entries formed */
1932   a->donotstash = PETSC_FALSE;
1933 
1934   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash));
1935 
1936   /* stuff used for matrix vector multiply */
1937   a->lvec        = NULL;
1938   a->Mvctx       = NULL;
1939   a->roworiented = PETSC_TRUE;
1940 
1941   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense));
1942   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",MatDenseSetLDA_MPIDense));
1943   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense));
1944   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense));
1945   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense));
1946   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense));
1947   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",MatDenseGetArrayWrite_MPIDense));
1948   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",MatDenseRestoreArrayWrite_MPIDense));
1949   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense));
1950   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense));
1951   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",MatDenseReplaceArray_MPIDense));
1952   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense));
1953   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense));
1954   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense));
1955   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense));
1956   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense));
1957   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense));
1958   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",MatDenseGetSubMatrix_MPIDense));
1959   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",MatDenseRestoreSubMatrix_MPIDense));
1960   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",MatConvert_MPIAIJ_MPIDense));
1961   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpiaij_C",MatConvert_MPIDense_MPIAIJ));
1962 #if defined(PETSC_HAVE_ELEMENTAL)
1963   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental));
1964 #endif
1965 #if defined(PETSC_HAVE_SCALAPACK)
1966   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",MatConvert_Dense_ScaLAPACK));
1967 #endif
1968 #if defined(PETSC_HAVE_CUDA)
1969   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpidensecuda_C",MatConvert_MPIDense_MPIDenseCUDA));
1970 #endif
1971   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense));
1972   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense));
1973   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",MatProductSetFromOptions_MPIDense_MPIAIJ));
1974 #if defined(PETSC_HAVE_CUDA)
1975   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense));
1976   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ));
1977 #endif
1978 
1979   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense));
1980   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense));
1981   PetscCall(PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE));
1982   PetscFunctionReturn(0);
1983 }
1984 
1985 /*MC
1986    MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs.
1987 
1988    Options Database Keys:
1989 . -mat_type mpidensecuda - sets the matrix type to "mpidensecuda" during a call to MatSetFromOptions()
1990 
1991   Level: beginner
1992 
1993 .seealso:
1994 
1995 M*/
1996 #if defined(PETSC_HAVE_CUDA)
1997 #include <petsc/private/deviceimpl.h>
1998 PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B)
1999 {
2000   PetscFunctionBegin;
2001   PetscCall(PetscDeviceInitialize(PETSC_DEVICE_CUDA));
2002   PetscCall(MatCreate_MPIDense(B));
2003   PetscCall(MatConvert_MPIDense_MPIDenseCUDA(B,MATMPIDENSECUDA,MAT_INPLACE_MATRIX,&B));
2004   PetscFunctionReturn(0);
2005 }
2006 #endif
2007 
2008 /*MC
2009    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
2010 
2011    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
2012    and MATMPIDENSE otherwise.
2013 
2014    Options Database Keys:
2015 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
2016 
2017   Level: beginner
2018 
2019 .seealso: `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA`
2020 M*/
2021 
2022 /*MC
2023    MATDENSECUDA - MATDENSECUDA = "densecuda" - A matrix type to be used for dense matrices on GPUs.
2024 
2025    This matrix type is identical to MATSEQDENSECUDA when constructed with a single process communicator,
2026    and MATMPIDENSECUDA otherwise.
2027 
2028    Options Database Keys:
2029 . -mat_type densecuda - sets the matrix type to "densecuda" during a call to MatSetFromOptions()
2030 
2031   Level: beginner
2032 
2033 .seealso: `MATSEQDENSECUDA`, `MATMPIDENSECUDA`, `MATDENSE`
2034 M*/
2035 
2036 /*@C
2037    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
2038 
2039    Collective
2040 
2041    Input Parameters:
2042 .  B - the matrix
2043 -  data - optional location of matrix data.  Set data=NULL for PETSc
2044    to control all matrix memory allocation.
2045 
2046    Notes:
2047    The dense format is fully compatible with standard Fortran 77
2048    storage by columns.
2049 
2050    The data input variable is intended primarily for Fortran programmers
2051    who wish to allocate their own matrix memory space.  Most users should
2052    set data=NULL.
2053 
2054    Level: intermediate
2055 
2056 .seealso: `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
2057 @*/
2058 PetscErrorCode  MatMPIDenseSetPreallocation(Mat B,PetscScalar *data)
2059 {
2060   PetscFunctionBegin;
2061   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2062   PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));
2063   PetscFunctionReturn(0);
2064 }
2065 
2066 /*@
2067    MatDensePlaceArray - Allows one to replace the array in a dense matrix with an
2068    array provided by the user. This is useful to avoid copying an array
2069    into a matrix
2070 
2071    Not Collective
2072 
2073    Input Parameters:
2074 +  mat - the matrix
2075 -  array - the array in column major order
2076 
2077    Notes:
2078    You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be
2079    freed when the matrix is destroyed.
2080 
2081    Level: developer
2082 
2083 .seealso: `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`
2084 
2085 @*/
2086 PetscErrorCode  MatDensePlaceArray(Mat mat,const PetscScalar *array)
2087 {
2088   PetscFunctionBegin;
2089   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2090   PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));
2091   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2092 #if defined(PETSC_HAVE_CUDA)
2093   mat->offloadmask = PETSC_OFFLOAD_CPU;
2094 #endif
2095   PetscFunctionReturn(0);
2096 }
2097 
2098 /*@
2099    MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray()
2100 
2101    Not Collective
2102 
2103    Input Parameters:
2104 .  mat - the matrix
2105 
2106    Notes:
2107    You can only call this after a call to MatDensePlaceArray()
2108 
2109    Level: developer
2110 
2111 .seealso: `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`
2112 
2113 @*/
2114 PetscErrorCode  MatDenseResetArray(Mat mat)
2115 {
2116   PetscFunctionBegin;
2117   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2118   PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));
2119   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2120   PetscFunctionReturn(0);
2121 }
2122 
2123 /*@
2124    MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an
2125    array provided by the user. This is useful to avoid copying an array
2126    into a matrix
2127 
2128    Not Collective
2129 
2130    Input Parameters:
2131 +  mat - the matrix
2132 -  array - the array in column major order
2133 
2134    Notes:
2135    The memory passed in MUST be obtained with PetscMalloc() and CANNOT be
2136    freed by the user. It will be freed when the matrix is destroyed.
2137 
2138    Level: developer
2139 
2140 .seealso: `MatDenseGetArray()`, `VecReplaceArray()`
2141 @*/
2142 PetscErrorCode  MatDenseReplaceArray(Mat mat,const PetscScalar *array)
2143 {
2144   PetscFunctionBegin;
2145   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2146   PetscUseMethod(mat,"MatDenseReplaceArray_C",(Mat,const PetscScalar*),(mat,array));
2147   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2148 #if defined(PETSC_HAVE_CUDA)
2149   mat->offloadmask = PETSC_OFFLOAD_CPU;
2150 #endif
2151   PetscFunctionReturn(0);
2152 }
2153 
2154 #if defined(PETSC_HAVE_CUDA)
2155 /*@C
2156    MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a dense matrix with an
2157    array provided by the user. This is useful to avoid copying an array
2158    into a matrix
2159 
2160    Not Collective
2161 
2162    Input Parameters:
2163 +  mat - the matrix
2164 -  array - the array in column major order
2165 
2166    Notes:
2167    You can return to the original array with a call to MatDenseCUDAResetArray(). The user is responsible for freeing this array; it will not be
2168    freed when the matrix is destroyed. The array must have been allocated with cudaMalloc().
2169 
2170    Level: developer
2171 
2172 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDAResetArray()`
2173 @*/
2174 PetscErrorCode  MatDenseCUDAPlaceArray(Mat mat,const PetscScalar *array)
2175 {
2176   PetscFunctionBegin;
2177   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2178   PetscUseMethod(mat,"MatDenseCUDAPlaceArray_C",(Mat,const PetscScalar*),(mat,array));
2179   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2180   mat->offloadmask = PETSC_OFFLOAD_GPU;
2181   PetscFunctionReturn(0);
2182 }
2183 
2184 /*@C
2185    MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to MatDenseCUDAPlaceArray()
2186 
2187    Not Collective
2188 
2189    Input Parameters:
2190 .  mat - the matrix
2191 
2192    Notes:
2193    You can only call this after a call to MatDenseCUDAPlaceArray()
2194 
2195    Level: developer
2196 
2197 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()`
2198 
2199 @*/
2200 PetscErrorCode  MatDenseCUDAResetArray(Mat mat)
2201 {
2202   PetscFunctionBegin;
2203   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2204   PetscUseMethod(mat,"MatDenseCUDAResetArray_C",(Mat),(mat));
2205   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2206   PetscFunctionReturn(0);
2207 }
2208 
2209 /*@C
2210    MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a dense matrix with an
2211    array provided by the user. This is useful to avoid copying an array
2212    into a matrix
2213 
2214    Not Collective
2215 
2216    Input Parameters:
2217 +  mat - the matrix
2218 -  array - the array in column major order
2219 
2220    Notes:
2221    This permanently replaces the GPU array and frees the memory associated with the old GPU array.
2222    The memory passed in CANNOT be freed by the user. It will be freed
2223    when the matrix is destroyed. The array should respect the matrix leading dimension.
2224 
2225    Level: developer
2226 
2227 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()`, `MatDenseCUDAResetArray()`
2228 @*/
2229 PetscErrorCode  MatDenseCUDAReplaceArray(Mat mat,const PetscScalar *array)
2230 {
2231   PetscFunctionBegin;
2232   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2233   PetscUseMethod(mat,"MatDenseCUDAReplaceArray_C",(Mat,const PetscScalar*),(mat,array));
2234   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2235   mat->offloadmask = PETSC_OFFLOAD_GPU;
2236   PetscFunctionReturn(0);
2237 }
2238 
2239 /*@C
2240    MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a dense matrix.
2241 
2242    Not Collective
2243 
2244    Input Parameters:
2245 .  A - the matrix
2246 
2247    Output Parameters
2248 .  array - the GPU array in column major order
2249 
2250    Notes:
2251    The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use MatDenseCUDAGetArray(). The array must be restored with MatDenseCUDARestoreArrayWrite() when no longer needed.
2252 
2253    Level: developer
2254 
2255 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayRead()`, `MatDenseCUDARestoreArrayRead()`
2256 @*/
2257 PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a)
2258 {
2259   PetscFunctionBegin;
2260   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2261   PetscUseMethod(A,"MatDenseCUDAGetArrayWrite_C",(Mat,PetscScalar**),(A,a));
2262   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2263   PetscFunctionReturn(0);
2264 }
2265 
2266 /*@C
2267    MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArrayWrite().
2268 
2269    Not Collective
2270 
2271    Input Parameters:
2272 +  A - the matrix
2273 -  array - the GPU array in column major order
2274 
2275    Notes:
2276 
2277    Level: developer
2278 
2279 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()`
2280 @*/
2281 PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a)
2282 {
2283   PetscFunctionBegin;
2284   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2285   PetscUseMethod(A,"MatDenseCUDARestoreArrayWrite_C",(Mat,PetscScalar**),(A,a));
2286   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2287   A->offloadmask = PETSC_OFFLOAD_GPU;
2288   PetscFunctionReturn(0);
2289 }
2290 
2291 /*@C
2292    MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArrayRead() when no longer needed.
2293 
2294    Not Collective
2295 
2296    Input Parameters:
2297 .  A - the matrix
2298 
2299    Output Parameters
2300 .  array - the GPU array in column major order
2301 
2302    Notes:
2303    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite().
2304 
2305    Level: developer
2306 
2307 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`
2308 @*/
2309 PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a)
2310 {
2311   PetscFunctionBegin;
2312   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2313   PetscUseMethod(A,"MatDenseCUDAGetArrayRead_C",(Mat,const PetscScalar**),(A,a));
2314   PetscFunctionReturn(0);
2315 }
2316 
2317 /*@C
2318    MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a dense matrix previously obtained with a call to MatDenseCUDAGetArrayRead().
2319 
2320    Not Collective
2321 
2322    Input Parameters:
2323 +  A - the matrix
2324 -  array - the GPU array in column major order
2325 
2326    Notes:
2327    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite().
2328 
2329    Level: developer
2330 
2331 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDAGetArrayRead()`
2332 @*/
2333 PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a)
2334 {
2335   PetscFunctionBegin;
2336   PetscUseMethod(A,"MatDenseCUDARestoreArrayRead_C",(Mat,const PetscScalar**),(A,a));
2337   PetscFunctionReturn(0);
2338 }
2339 
2340 /*@C
2341    MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArray() when no longer needed.
2342 
2343    Not Collective
2344 
2345    Input Parameters:
2346 .  A - the matrix
2347 
2348    Output Parameters
2349 .  array - the GPU array in column major order
2350 
2351    Notes:
2352    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). For read-only access, use MatDenseCUDAGetArrayRead().
2353 
2354    Level: developer
2355 
2356 .seealso: `MatDenseCUDAGetArrayRead()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`
2357 @*/
2358 PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a)
2359 {
2360   PetscFunctionBegin;
2361   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2362   PetscUseMethod(A,"MatDenseCUDAGetArray_C",(Mat,PetscScalar**),(A,a));
2363   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2364   PetscFunctionReturn(0);
2365 }
2366 
2367 /*@C
2368    MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArray().
2369 
2370    Not Collective
2371 
2372    Input Parameters:
2373 +  A - the matrix
2374 -  array - the GPU array in column major order
2375 
2376    Notes:
2377 
2378    Level: developer
2379 
2380 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()`
2381 @*/
2382 PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a)
2383 {
2384   PetscFunctionBegin;
2385   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2386   PetscUseMethod(A,"MatDenseCUDARestoreArray_C",(Mat,PetscScalar**),(A,a));
2387   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2388   A->offloadmask = PETSC_OFFLOAD_GPU;
2389   PetscFunctionReturn(0);
2390 }
2391 #endif
2392 
2393 /*@C
2394    MatCreateDense - Creates a matrix in dense format.
2395 
2396    Collective
2397 
2398    Input Parameters:
2399 +  comm - MPI communicator
2400 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2401 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2402 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
2403 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
2404 -  data - optional location of matrix data.  Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
2405    to control all matrix memory allocation.
2406 
2407    Output Parameter:
2408 .  A - the matrix
2409 
2410    Notes:
2411    The dense format is fully compatible with standard Fortran 77
2412    storage by columns.
2413    Note that, although local portions of the matrix are stored in column-major
2414    order, the matrix is partitioned across MPI ranks by row.
2415 
2416    The data input variable is intended primarily for Fortran programmers
2417    who wish to allocate their own matrix memory space.  Most users should
2418    set data=NULL (PETSC_NULL_SCALAR for Fortran users).
2419 
2420    The user MUST specify either the local or global matrix dimensions
2421    (possibly both).
2422 
2423    Level: intermediate
2424 
2425 .seealso: `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
2426 @*/
2427 PetscErrorCode  MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
2428 {
2429   PetscFunctionBegin;
2430   PetscCall(MatCreate(comm,A));
2431   PetscCall(MatSetSizes(*A,m,n,M,N));
2432   PetscCall(MatSetType(*A,MATDENSE));
2433   PetscCall(MatSeqDenseSetPreallocation(*A,data));
2434   PetscCall(MatMPIDenseSetPreallocation(*A,data));
2435   PetscFunctionReturn(0);
2436 }
2437 
2438 #if defined(PETSC_HAVE_CUDA)
2439 /*@C
2440    MatCreateDenseCUDA - Creates a matrix in dense format using CUDA.
2441 
2442    Collective
2443 
2444    Input Parameters:
2445 +  comm - MPI communicator
2446 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2447 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2448 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
2449 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
2450 -  data - optional location of GPU matrix data.  Set data=NULL for PETSc
2451    to control matrix memory allocation.
2452 
2453    Output Parameter:
2454 .  A - the matrix
2455 
2456    Notes:
2457 
2458    Level: intermediate
2459 
2460 .seealso: `MatCreate()`, `MatCreateDense()`
2461 @*/
2462 PetscErrorCode  MatCreateDenseCUDA(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
2463 {
2464   PetscFunctionBegin;
2465   PetscCall(MatCreate(comm,A));
2466   PetscValidLogicalCollectiveBool(*A,!!data,6);
2467   PetscCall(MatSetSizes(*A,m,n,M,N));
2468   PetscCall(MatSetType(*A,MATDENSECUDA));
2469   PetscCall(MatSeqDenseCUDASetPreallocation(*A,data));
2470   PetscCall(MatMPIDenseCUDASetPreallocation(*A,data));
2471   PetscFunctionReturn(0);
2472 }
2473 #endif
2474 
2475 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
2476 {
2477   Mat            mat;
2478   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
2479 
2480   PetscFunctionBegin;
2481   *newmat = NULL;
2482   PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&mat));
2483   PetscCall(MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N));
2484   PetscCall(MatSetType(mat,((PetscObject)A)->type_name));
2485   a       = (Mat_MPIDense*)mat->data;
2486 
2487   mat->factortype   = A->factortype;
2488   mat->assembled    = PETSC_TRUE;
2489   mat->preallocated = PETSC_TRUE;
2490 
2491   mat->insertmode = NOT_SET_VALUES;
2492   a->donotstash   = oldmat->donotstash;
2493 
2494   PetscCall(PetscLayoutReference(A->rmap,&mat->rmap));
2495   PetscCall(PetscLayoutReference(A->cmap,&mat->cmap));
2496 
2497   PetscCall(MatDuplicate(oldmat->A,cpvalues,&a->A));
2498   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A));
2499 
2500   *newmat = mat;
2501   PetscFunctionReturn(0);
2502 }
2503 
2504 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
2505 {
2506   PetscBool      isbinary;
2507 #if defined(PETSC_HAVE_HDF5)
2508   PetscBool      ishdf5;
2509 #endif
2510 
2511   PetscFunctionBegin;
2512   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
2513   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
2514   /* force binary viewer to load .info file if it has not yet done so */
2515   PetscCall(PetscViewerSetUp(viewer));
2516   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
2517 #if defined(PETSC_HAVE_HDF5)
2518   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5));
2519 #endif
2520   if (isbinary) {
2521     PetscCall(MatLoad_Dense_Binary(newMat,viewer));
2522 #if defined(PETSC_HAVE_HDF5)
2523   } else if (ishdf5) {
2524     PetscCall(MatLoad_Dense_HDF5(newMat,viewer));
2525 #endif
2526   } else SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
2527   PetscFunctionReturn(0);
2528 }
2529 
2530 static PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag)
2531 {
2532   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
2533   Mat            a,b;
2534 
2535   PetscFunctionBegin;
2536   a    = matA->A;
2537   b    = matB->A;
2538   PetscCall(MatEqual(a,b,flag));
2539   PetscCall(MPIU_Allreduce(MPI_IN_PLACE,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A)));
2540   PetscFunctionReturn(0);
2541 }
2542 
2543 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data)
2544 {
2545   Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data;
2546 
2547   PetscFunctionBegin;
2548   PetscCall(PetscFree2(atb->sendbuf,atb->recvcounts));
2549   PetscCall(MatDestroy(&atb->atb));
2550   PetscCall(PetscFree(atb));
2551   PetscFunctionReturn(0);
2552 }
2553 
2554 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data)
2555 {
2556   Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data;
2557 
2558   PetscFunctionBegin;
2559   PetscCall(PetscFree2(abt->buf[0],abt->buf[1]));
2560   PetscCall(PetscFree2(abt->recvcounts,abt->recvdispls));
2561   PetscCall(PetscFree(abt));
2562   PetscFunctionReturn(0);
2563 }
2564 
2565 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2566 {
2567   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2568   Mat_TransMatMultDense *atb;
2569   MPI_Comm              comm;
2570   PetscMPIInt           size,*recvcounts;
2571   PetscScalar           *carray,*sendbuf;
2572   const PetscScalar     *atbarray;
2573   PetscInt              i,cN=C->cmap->N,proc,k,j,lda;
2574   const PetscInt        *ranges;
2575 
2576   PetscFunctionBegin;
2577   MatCheckProduct(C,3);
2578   PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2579   atb = (Mat_TransMatMultDense *)C->product->data;
2580   recvcounts = atb->recvcounts;
2581   sendbuf = atb->sendbuf;
2582 
2583   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
2584   PetscCallMPI(MPI_Comm_size(comm,&size));
2585 
2586   /* compute atbarray = aseq^T * bseq */
2587   PetscCall(MatTransposeMatMult(a->A,b->A,atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX,PETSC_DEFAULT,&atb->atb));
2588 
2589   PetscCall(MatGetOwnershipRanges(C,&ranges));
2590 
2591   /* arrange atbarray into sendbuf */
2592   PetscCall(MatDenseGetArrayRead(atb->atb,&atbarray));
2593   PetscCall(MatDenseGetLDA(atb->atb,&lda));
2594   for (proc=0, k=0; proc<size; proc++) {
2595     for (j=0; j<cN; j++) {
2596       for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*lda];
2597     }
2598   }
2599   PetscCall(MatDenseRestoreArrayRead(atb->atb,&atbarray));
2600 
2601   /* sum all atbarray to local values of C */
2602   PetscCall(MatDenseGetArrayWrite(c->A,&carray));
2603   PetscCallMPI(MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm));
2604   PetscCall(MatDenseRestoreArrayWrite(c->A,&carray));
2605   PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY));
2606   PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY));
2607   PetscFunctionReturn(0);
2608 }
2609 
2610 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)
2611 {
2612   MPI_Comm              comm;
2613   PetscMPIInt           size;
2614   PetscInt              cm=A->cmap->n,cM,cN=B->cmap->N;
2615   Mat_TransMatMultDense *atb;
2616   PetscBool             cisdense;
2617   PetscInt              i;
2618   const PetscInt        *ranges;
2619 
2620   PetscFunctionBegin;
2621   MatCheckProduct(C,3);
2622   PetscCheck(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2623   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
2624   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
2625     SETERRQ(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2626   }
2627 
2628   /* create matrix product C */
2629   PetscCall(MatSetSizes(C,cm,B->cmap->n,A->cmap->N,B->cmap->N));
2630   PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,""));
2631   if (!cisdense) {
2632     PetscCall(MatSetType(C,((PetscObject)A)->type_name));
2633   }
2634   PetscCall(MatSetUp(C));
2635 
2636   /* create data structure for reuse C */
2637   PetscCallMPI(MPI_Comm_size(comm,&size));
2638   PetscCall(PetscNew(&atb));
2639   cM   = C->rmap->N;
2640   PetscCall(PetscMalloc2(cM*cN,&atb->sendbuf,size,&atb->recvcounts));
2641   PetscCall(MatGetOwnershipRanges(C,&ranges));
2642   for (i=0; i<size; i++) atb->recvcounts[i] = (ranges[i+1] - ranges[i])*cN;
2643 
2644   C->product->data    = atb;
2645   C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
2646   PetscFunctionReturn(0);
2647 }
2648 
2649 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2650 {
2651   MPI_Comm              comm;
2652   PetscMPIInt           i, size;
2653   PetscInt              maxRows, bufsiz;
2654   PetscMPIInt           tag;
2655   PetscInt              alg;
2656   Mat_MatTransMultDense *abt;
2657   Mat_Product           *product = C->product;
2658   PetscBool             flg;
2659 
2660   PetscFunctionBegin;
2661   MatCheckProduct(C,4);
2662   PetscCheck(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2663   /* check local size of A and B */
2664   PetscCheck(A->cmap->n == B->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")",A->cmap->n,B->cmap->n);
2665 
2666   PetscCall(PetscStrcmp(product->alg,"allgatherv",&flg));
2667   alg  = flg ? 0 : 1;
2668 
2669   /* setup matrix product C */
2670   PetscCall(MatSetSizes(C,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N));
2671   PetscCall(MatSetType(C,MATMPIDENSE));
2672   PetscCall(MatSetUp(C));
2673   PetscCall(PetscObjectGetNewTag((PetscObject)C,&tag));
2674 
2675   /* create data structure for reuse C */
2676   PetscCall(PetscObjectGetComm((PetscObject)C,&comm));
2677   PetscCallMPI(MPI_Comm_size(comm,&size));
2678   PetscCall(PetscNew(&abt));
2679   abt->tag = tag;
2680   abt->alg = alg;
2681   switch (alg) {
2682   case 1: /* alg: "cyclic" */
2683     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i]));
2684     bufsiz = A->cmap->N * maxRows;
2685     PetscCall(PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1])));
2686     break;
2687   default: /* alg: "allgatherv" */
2688     PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1])));
2689     PetscCall(PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls)));
2690     for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N;
2691     for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i];
2692     break;
2693   }
2694 
2695   C->product->data    = abt;
2696   C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
2697   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense;
2698   PetscFunctionReturn(0);
2699 }
2700 
2701 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
2702 {
2703   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2704   Mat_MatTransMultDense *abt;
2705   MPI_Comm              comm;
2706   PetscMPIInt           rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom;
2707   PetscScalar           *sendbuf, *recvbuf=NULL, *cv;
2708   PetscInt              i,cK=A->cmap->N,k,j,bn;
2709   PetscScalar           _DOne=1.0,_DZero=0.0;
2710   const PetscScalar     *av,*bv;
2711   PetscBLASInt          cm, cn, ck, alda, blda = 0, clda;
2712   MPI_Request           reqs[2];
2713   const PetscInt        *ranges;
2714 
2715   PetscFunctionBegin;
2716   MatCheckProduct(C,3);
2717   PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2718   abt  = (Mat_MatTransMultDense*)C->product->data;
2719   PetscCall(PetscObjectGetComm((PetscObject)C,&comm));
2720   PetscCallMPI(MPI_Comm_rank(comm,&rank));
2721   PetscCallMPI(MPI_Comm_size(comm,&size));
2722   PetscCall(MatDenseGetArrayRead(a->A,&av));
2723   PetscCall(MatDenseGetArrayRead(b->A,&bv));
2724   PetscCall(MatDenseGetArrayWrite(c->A,&cv));
2725   PetscCall(MatDenseGetLDA(a->A,&i));
2726   PetscCall(PetscBLASIntCast(i,&alda));
2727   PetscCall(MatDenseGetLDA(b->A,&i));
2728   PetscCall(PetscBLASIntCast(i,&blda));
2729   PetscCall(MatDenseGetLDA(c->A,&i));
2730   PetscCall(PetscBLASIntCast(i,&clda));
2731   PetscCall(MatGetOwnershipRanges(B,&ranges));
2732   bn   = B->rmap->n;
2733   if (blda == bn) {
2734     sendbuf = (PetscScalar*)bv;
2735   } else {
2736     sendbuf = abt->buf[0];
2737     for (k = 0, i = 0; i < cK; i++) {
2738       for (j = 0; j < bn; j++, k++) {
2739         sendbuf[k] = bv[i * blda + j];
2740       }
2741     }
2742   }
2743   if (size > 1) {
2744     sendto = (rank + size - 1) % size;
2745     recvfrom = (rank + size + 1) % size;
2746   } else {
2747     sendto = recvfrom = 0;
2748   }
2749   PetscCall(PetscBLASIntCast(cK,&ck));
2750   PetscCall(PetscBLASIntCast(c->A->rmap->n,&cm));
2751   recvisfrom = rank;
2752   for (i = 0; i < size; i++) {
2753     /* we have finished receiving in sending, bufs can be read/modified */
2754     PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2755     PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
2756 
2757     if (nextrecvisfrom != rank) {
2758       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2759       sendsiz = cK * bn;
2760       recvsiz = cK * nextbn;
2761       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
2762       PetscCallMPI(MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]));
2763       PetscCallMPI(MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]));
2764     }
2765 
2766     /* local aseq * sendbuf^T */
2767     PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn));
2768     if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,av,&alda,sendbuf,&cn,&_DZero,cv + clda * ranges[recvisfrom],&clda));
2769 
2770     if (nextrecvisfrom != rank) {
2771       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
2772       PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE));
2773     }
2774     bn = nextbn;
2775     recvisfrom = nextrecvisfrom;
2776     sendbuf = recvbuf;
2777   }
2778   PetscCall(MatDenseRestoreArrayRead(a->A,&av));
2779   PetscCall(MatDenseRestoreArrayRead(b->A,&bv));
2780   PetscCall(MatDenseRestoreArrayWrite(c->A,&cv));
2781   PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY));
2782   PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY));
2783   PetscFunctionReturn(0);
2784 }
2785 
2786 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
2787 {
2788   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2789   Mat_MatTransMultDense *abt;
2790   MPI_Comm              comm;
2791   PetscMPIInt           size;
2792   PetscScalar           *cv, *sendbuf, *recvbuf;
2793   const PetscScalar     *av,*bv;
2794   PetscInt              blda,i,cK=A->cmap->N,k,j,bn;
2795   PetscScalar           _DOne=1.0,_DZero=0.0;
2796   PetscBLASInt          cm, cn, ck, alda, clda;
2797 
2798   PetscFunctionBegin;
2799   MatCheckProduct(C,3);
2800   PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2801   abt  = (Mat_MatTransMultDense*)C->product->data;
2802   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
2803   PetscCallMPI(MPI_Comm_size(comm,&size));
2804   PetscCall(MatDenseGetArrayRead(a->A,&av));
2805   PetscCall(MatDenseGetArrayRead(b->A,&bv));
2806   PetscCall(MatDenseGetArrayWrite(c->A,&cv));
2807   PetscCall(MatDenseGetLDA(a->A,&i));
2808   PetscCall(PetscBLASIntCast(i,&alda));
2809   PetscCall(MatDenseGetLDA(b->A,&blda));
2810   PetscCall(MatDenseGetLDA(c->A,&i));
2811   PetscCall(PetscBLASIntCast(i,&clda));
2812   /* copy transpose of B into buf[0] */
2813   bn      = B->rmap->n;
2814   sendbuf = abt->buf[0];
2815   recvbuf = abt->buf[1];
2816   for (k = 0, j = 0; j < bn; j++) {
2817     for (i = 0; i < cK; i++, k++) {
2818       sendbuf[k] = bv[i * blda + j];
2819     }
2820   }
2821   PetscCall(MatDenseRestoreArrayRead(b->A,&bv));
2822   PetscCallMPI(MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm));
2823   PetscCall(PetscBLASIntCast(cK,&ck));
2824   PetscCall(PetscBLASIntCast(c->A->rmap->n,&cm));
2825   PetscCall(PetscBLASIntCast(c->A->cmap->n,&cn));
2826   if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,av,&alda,recvbuf,&ck,&_DZero,cv,&clda));
2827   PetscCall(MatDenseRestoreArrayRead(a->A,&av));
2828   PetscCall(MatDenseRestoreArrayRead(b->A,&bv));
2829   PetscCall(MatDenseRestoreArrayWrite(c->A,&cv));
2830   PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY));
2831   PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY));
2832   PetscFunctionReturn(0);
2833 }
2834 
2835 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2836 {
2837   Mat_MatTransMultDense *abt;
2838 
2839   PetscFunctionBegin;
2840   MatCheckProduct(C,3);
2841   PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2842   abt = (Mat_MatTransMultDense*)C->product->data;
2843   switch (abt->alg) {
2844   case 1:
2845     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C));
2846     break;
2847   default:
2848     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C));
2849     break;
2850   }
2851   PetscFunctionReturn(0);
2852 }
2853 
2854 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data)
2855 {
2856   Mat_MatMultDense *ab = (Mat_MatMultDense*)data;
2857 
2858   PetscFunctionBegin;
2859   PetscCall(MatDestroy(&ab->Ce));
2860   PetscCall(MatDestroy(&ab->Ae));
2861   PetscCall(MatDestroy(&ab->Be));
2862   PetscCall(PetscFree(ab));
2863   PetscFunctionReturn(0);
2864 }
2865 
2866 #if defined(PETSC_HAVE_ELEMENTAL)
2867 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2868 {
2869   Mat_MatMultDense *ab;
2870 
2871   PetscFunctionBegin;
2872   MatCheckProduct(C,3);
2873   PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Missing product data");
2874   ab   = (Mat_MatMultDense*)C->product->data;
2875   PetscCall(MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae));
2876   PetscCall(MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be));
2877   PetscCall(MatMatMultNumeric_Elemental(ab->Ae,ab->Be,ab->Ce));
2878   PetscCall(MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C));
2879   PetscFunctionReturn(0);
2880 }
2881 
2882 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)
2883 {
2884   Mat              Ae,Be,Ce;
2885   Mat_MatMultDense *ab;
2886 
2887   PetscFunctionBegin;
2888   MatCheckProduct(C,4);
2889   PetscCheck(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2890   /* check local size of A and B */
2891   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
2892     SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2893   }
2894 
2895   /* create elemental matrices Ae and Be */
2896   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Ae));
2897   PetscCall(MatSetSizes(Ae,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N));
2898   PetscCall(MatSetType(Ae,MATELEMENTAL));
2899   PetscCall(MatSetUp(Ae));
2900   PetscCall(MatSetOption(Ae,MAT_ROW_ORIENTED,PETSC_FALSE));
2901 
2902   PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &Be));
2903   PetscCall(MatSetSizes(Be,PETSC_DECIDE,PETSC_DECIDE,B->rmap->N,B->cmap->N));
2904   PetscCall(MatSetType(Be,MATELEMENTAL));
2905   PetscCall(MatSetUp(Be));
2906   PetscCall(MatSetOption(Be,MAT_ROW_ORIENTED,PETSC_FALSE));
2907 
2908   /* compute symbolic Ce = Ae*Be */
2909   PetscCall(MatCreate(PetscObjectComm((PetscObject)C),&Ce));
2910   PetscCall(MatMatMultSymbolic_Elemental(Ae,Be,fill,Ce));
2911 
2912   /* setup C */
2913   PetscCall(MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE));
2914   PetscCall(MatSetType(C,MATDENSE));
2915   PetscCall(MatSetUp(C));
2916 
2917   /* create data structure for reuse Cdense */
2918   PetscCall(PetscNew(&ab));
2919   ab->Ae = Ae;
2920   ab->Be = Be;
2921   ab->Ce = Ce;
2922 
2923   C->product->data    = ab;
2924   C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense;
2925   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
2926   PetscFunctionReturn(0);
2927 }
2928 #endif
2929 /* ----------------------------------------------- */
2930 #if defined(PETSC_HAVE_ELEMENTAL)
2931 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C)
2932 {
2933   PetscFunctionBegin;
2934   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense;
2935   C->ops->productsymbolic = MatProductSymbolic_AB;
2936   PetscFunctionReturn(0);
2937 }
2938 #endif
2939 
2940 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C)
2941 {
2942   Mat_Product *product = C->product;
2943   Mat         A = product->A,B=product->B;
2944 
2945   PetscFunctionBegin;
2946   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend)
2947     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2948   C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense;
2949   C->ops->productsymbolic = MatProductSymbolic_AtB;
2950   PetscFunctionReturn(0);
2951 }
2952 
2953 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C)
2954 {
2955   Mat_Product    *product = C->product;
2956   const char     *algTypes[2] = {"allgatherv","cyclic"};
2957   PetscInt       alg,nalg = 2;
2958   PetscBool      flg = PETSC_FALSE;
2959 
2960   PetscFunctionBegin;
2961   /* Set default algorithm */
2962   alg = 0; /* default is allgatherv */
2963   PetscCall(PetscStrcmp(product->alg,"default",&flg));
2964   if (flg) {
2965     PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]));
2966   }
2967 
2968   /* Get runtime option */
2969   if (product->api_user) {
2970     PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatTransposeMult","Mat");
2971     PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[alg],&alg,&flg));
2972     PetscOptionsEnd();
2973   } else {
2974     PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABt","Mat");
2975     PetscCall(PetscOptionsEList("-mat_product_algorithm","Algorithmic approach","MatProduct_ABt",algTypes,nalg,algTypes[alg],&alg,&flg));
2976     PetscOptionsEnd();
2977   }
2978   if (flg) {
2979     PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]));
2980   }
2981 
2982   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense;
2983   C->ops->productsymbolic          = MatProductSymbolic_ABt;
2984   PetscFunctionReturn(0);
2985 }
2986 
2987 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C)
2988 {
2989   Mat_Product    *product = C->product;
2990 
2991   PetscFunctionBegin;
2992   switch (product->type) {
2993 #if defined(PETSC_HAVE_ELEMENTAL)
2994   case MATPRODUCT_AB:
2995     PetscCall(MatProductSetFromOptions_MPIDense_AB(C));
2996     break;
2997 #endif
2998   case MATPRODUCT_AtB:
2999     PetscCall(MatProductSetFromOptions_MPIDense_AtB(C));
3000     break;
3001   case MATPRODUCT_ABt:
3002     PetscCall(MatProductSetFromOptions_MPIDense_ABt(C));
3003     break;
3004   default:
3005     break;
3006   }
3007   PetscFunctionReturn(0);
3008 }
3009