xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 6ffe77eaecce1557e50d00ca5347a7f48e598865)
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4 */
5 
6 #include <../src/mat/impls/dense/mpi/mpidense.h>    /*I   "petscmat.h"  I*/
7 #include <../src/mat/impls/aij/mpi/mpiaij.h>
8 #include <petscblaslapack.h>
9 
10 /*@
11 
12       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
13               matrix that represents the operator. For sequential matrices it returns itself.
14 
15     Input Parameter:
16 .      A - the Seq or MPI dense matrix
17 
18     Output Parameter:
19 .      B - the inner matrix
20 
21     Level: intermediate
22 
23 @*/
24 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
25 {
26   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
27   PetscBool      flg;
28 
29   PetscFunctionBegin;
30   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
31   PetscValidPointer(B,2);
32   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A,MATMPIDENSE,&flg));
33   if (flg) *B = mat->A;
34   else {
35     PetscCall(PetscObjectBaseTypeCompare((PetscObject)A,MATSEQDENSE,&flg));
36     PetscCheck(flg,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Not for matrix type %s",((PetscObject)A)->type_name);
37     *B = A;
38   }
39   PetscFunctionReturn(0);
40 }
41 
42 PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s)
43 {
44   Mat_MPIDense   *Amat = (Mat_MPIDense*)A->data;
45   Mat_MPIDense   *Bmat = (Mat_MPIDense*)B->data;
46 
47   PetscFunctionBegin;
48   PetscCall(MatCopy(Amat->A,Bmat->A,s));
49   PetscFunctionReturn(0);
50 }
51 
52 PetscErrorCode MatShift_MPIDense(Mat A,PetscScalar alpha)
53 {
54   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
55   PetscInt       j,lda,rstart = A->rmap->rstart,rend = A->rmap->rend,rend2;
56   PetscScalar    *v;
57 
58   PetscFunctionBegin;
59   PetscCall(MatDenseGetArray(mat->A,&v));
60   PetscCall(MatDenseGetLDA(mat->A,&lda));
61   rend2 = PetscMin(rend,A->cmap->N);
62   if (rend2>rstart) {
63     for (j=rstart; j<rend2; j++) v[j-rstart+j*lda] += alpha;
64     PetscCall(PetscLogFlops(rend2-rstart));
65   }
66   PetscCall(MatDenseRestoreArray(mat->A,&v));
67   PetscFunctionReturn(0);
68 }
69 
70 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
71 {
72   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
73   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
74 
75   PetscFunctionBegin;
76   PetscCheck(row >= rstart && row < rend,PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
77   lrow = row - rstart;
78   PetscCall(MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v));
79   PetscFunctionReturn(0);
80 }
81 
82 PetscErrorCode MatRestoreRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
83 {
84   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
85   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
86 
87   PetscFunctionBegin;
88   PetscCheck(row >= rstart && row < rend,PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
89   lrow = row - rstart;
90   PetscCall(MatRestoreRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v));
91   PetscFunctionReturn(0);
92 }
93 
94 PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
95 {
96   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
97   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
98   PetscScalar    *array;
99   MPI_Comm       comm;
100   PetscBool      flg;
101   Mat            B;
102 
103   PetscFunctionBegin;
104   PetscCall(MatHasCongruentLayouts(A,&flg));
105   PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
106   PetscCall(PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B));
107   if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */
108 
109     PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A,MATSEQDENSECUDA,&flg));
110     PetscCheck(!flg,PETSC_COMM_SELF,PETSC_ERR_SUP,"Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature",MATSEQDENSECUDA);
111     PetscCall(PetscObjectGetComm((PetscObject)(mdn->A),&comm));
112     PetscCall(MatCreate(comm,&B));
113     PetscCall(MatSetSizes(B,m,m,m,m));
114     PetscCall(MatSetType(B,((PetscObject)mdn->A)->type_name));
115     PetscCall(MatDenseGetArrayRead(mdn->A,(const PetscScalar**)&array));
116     PetscCall(MatSeqDenseSetPreallocation(B,array+m*rstart));
117     PetscCall(MatDenseRestoreArrayRead(mdn->A,(const PetscScalar**)&array));
118     PetscCall(PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B));
119     *a   = B;
120     PetscCall(MatDestroy(&B));
121   } else *a = B;
122   PetscFunctionReturn(0);
123 }
124 
125 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
126 {
127   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
128   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
129   PetscBool      roworiented = A->roworiented;
130 
131   PetscFunctionBegin;
132   for (i=0; i<m; i++) {
133     if (idxm[i] < 0) continue;
134     PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
135     if (idxm[i] >= rstart && idxm[i] < rend) {
136       row = idxm[i] - rstart;
137       if (roworiented) {
138         PetscCall(MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv));
139       } else {
140         for (j=0; j<n; j++) {
141           if (idxn[j] < 0) continue;
142           PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
143           PetscCall(MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv));
144         }
145       }
146     } else if (!A->donotstash) {
147       mat->assembled = PETSC_FALSE;
148       if (roworiented) {
149         PetscCall(MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE));
150       } else {
151         PetscCall(MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE));
152       }
153     }
154   }
155   PetscFunctionReturn(0);
156 }
157 
158 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
159 {
160   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
161   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
162 
163   PetscFunctionBegin;
164   for (i=0; i<m; i++) {
165     if (idxm[i] < 0) continue; /* negative row */
166     PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
167     if (idxm[i] >= rstart && idxm[i] < rend) {
168       row = idxm[i] - rstart;
169       for (j=0; j<n; j++) {
170         if (idxn[j] < 0) continue; /* negative column */
171         PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
172         PetscCall(MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j));
173       }
174     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
175   }
176   PetscFunctionReturn(0);
177 }
178 
179 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda)
180 {
181   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
182 
183   PetscFunctionBegin;
184   PetscCall(MatDenseGetLDA(a->A,lda));
185   PetscFunctionReturn(0);
186 }
187 
188 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A,PetscInt lda)
189 {
190   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
191   PetscBool      iscuda;
192 
193   PetscFunctionBegin;
194   if (!a->A) {
195     PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
196     PetscCall(PetscLayoutSetUp(A->rmap));
197     PetscCall(PetscLayoutSetUp(A->cmap));
198     PetscCall(MatCreate(PETSC_COMM_SELF,&a->A));
199     PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->A));
200     PetscCall(MatSetSizes(a->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N));
201     PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda));
202     PetscCall(MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE));
203   }
204   PetscCall(MatDenseSetLDA(a->A,lda));
205   PetscFunctionReturn(0);
206 }
207 
208 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar **array)
209 {
210   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
211 
212   PetscFunctionBegin;
213   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
214   PetscCall(MatDenseGetArray(a->A,array));
215   PetscFunctionReturn(0);
216 }
217 
218 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar **array)
219 {
220   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
221 
222   PetscFunctionBegin;
223   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
224   PetscCall(MatDenseGetArrayRead(a->A,array));
225   PetscFunctionReturn(0);
226 }
227 
228 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A,PetscScalar **array)
229 {
230   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
231 
232   PetscFunctionBegin;
233   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
234   PetscCall(MatDenseGetArrayWrite(a->A,array));
235   PetscFunctionReturn(0);
236 }
237 
238 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar *array)
239 {
240   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
241 
242   PetscFunctionBegin;
243   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
244   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
245   PetscCall(MatDensePlaceArray(a->A,array));
246   PetscFunctionReturn(0);
247 }
248 
249 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
250 {
251   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
252 
253   PetscFunctionBegin;
254   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
255   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
256   PetscCall(MatDenseResetArray(a->A));
257   PetscFunctionReturn(0);
258 }
259 
260 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A,const PetscScalar *array)
261 {
262   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
263 
264   PetscFunctionBegin;
265   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
266   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
267   PetscCall(MatDenseReplaceArray(a->A,array));
268   PetscFunctionReturn(0);
269 }
270 
271 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
272 {
273   Mat_MPIDense      *mat  = (Mat_MPIDense*)A->data,*newmatd;
274   PetscInt          lda,i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
275   const PetscInt    *irow,*icol;
276   const PetscScalar *v;
277   PetscScalar       *bv;
278   Mat               newmat;
279   IS                iscol_local;
280   MPI_Comm          comm_is,comm_mat;
281 
282   PetscFunctionBegin;
283   PetscCall(PetscObjectGetComm((PetscObject)A,&comm_mat));
284   PetscCall(PetscObjectGetComm((PetscObject)iscol,&comm_is));
285   PetscCheck(comm_mat == comm_is,PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator");
286 
287   PetscCall(ISAllGather(iscol,&iscol_local));
288   PetscCall(ISGetIndices(isrow,&irow));
289   PetscCall(ISGetIndices(iscol_local,&icol));
290   PetscCall(ISGetLocalSize(isrow,&nrows));
291   PetscCall(ISGetLocalSize(iscol,&ncols));
292   PetscCall(ISGetSize(iscol,&Ncols)); /* global number of columns, size of iscol_local */
293 
294   /* No parallel redistribution currently supported! Should really check each index set
295      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
296      original matrix! */
297 
298   PetscCall(MatGetLocalSize(A,&nlrows,&nlcols));
299   PetscCall(MatGetOwnershipRange(A,&rstart,&rend));
300 
301   /* Check submatrix call */
302   if (scall == MAT_REUSE_MATRIX) {
303     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
304     /* Really need to test rows and column sizes! */
305     newmat = *B;
306   } else {
307     /* Create and fill new matrix */
308     PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&newmat));
309     PetscCall(MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols));
310     PetscCall(MatSetType(newmat,((PetscObject)A)->type_name));
311     PetscCall(MatMPIDenseSetPreallocation(newmat,NULL));
312   }
313 
314   /* Now extract the data pointers and do the copy, column at a time */
315   newmatd = (Mat_MPIDense*)newmat->data;
316   PetscCall(MatDenseGetArray(newmatd->A,&bv));
317   PetscCall(MatDenseGetArrayRead(mat->A,&v));
318   PetscCall(MatDenseGetLDA(mat->A,&lda));
319   for (i=0; i<Ncols; i++) {
320     const PetscScalar *av = v + lda*icol[i];
321     for (j=0; j<nrows; j++) {
322       *bv++ = av[irow[j] - rstart];
323     }
324   }
325   PetscCall(MatDenseRestoreArrayRead(mat->A,&v));
326   PetscCall(MatDenseRestoreArray(newmatd->A,&bv));
327 
328   /* Assemble the matrices so that the correct flags are set */
329   PetscCall(MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY));
330   PetscCall(MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY));
331 
332   /* Free work space */
333   PetscCall(ISRestoreIndices(isrow,&irow));
334   PetscCall(ISRestoreIndices(iscol_local,&icol));
335   PetscCall(ISDestroy(&iscol_local));
336   *B   = newmat;
337   PetscFunctionReturn(0);
338 }
339 
340 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar **array)
341 {
342   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
343 
344   PetscFunctionBegin;
345   PetscCall(MatDenseRestoreArray(a->A,array));
346   PetscFunctionReturn(0);
347 }
348 
349 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar **array)
350 {
351   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
352 
353   PetscFunctionBegin;
354   PetscCall(MatDenseRestoreArrayRead(a->A,array));
355   PetscFunctionReturn(0);
356 }
357 
358 PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A,PetscScalar **array)
359 {
360   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
361 
362   PetscFunctionBegin;
363   PetscCall(MatDenseRestoreArrayWrite(a->A,array));
364   PetscFunctionReturn(0);
365 }
366 
367 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
368 {
369   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
370   PetscInt       nstash,reallocs;
371 
372   PetscFunctionBegin;
373   if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
374 
375   PetscCall(MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range));
376   PetscCall(MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs));
377   PetscCall(PetscInfo(mdn->A,"Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs));
378   PetscFunctionReturn(0);
379 }
380 
381 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
382 {
383   Mat_MPIDense   *mdn=(Mat_MPIDense*)mat->data;
384   PetscInt       i,*row,*col,flg,j,rstart,ncols;
385   PetscMPIInt    n;
386   PetscScalar    *val;
387 
388   PetscFunctionBegin;
389   if (!mdn->donotstash && !mat->nooffprocentries) {
390     /*  wait on receives */
391     while (1) {
392       PetscCall(MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg));
393       if (!flg) break;
394 
395       for (i=0; i<n;) {
396         /* Now identify the consecutive vals belonging to the same row */
397         for (j=i,rstart=row[j]; j<n; j++) {
398           if (row[j] != rstart) break;
399         }
400         if (j < n) ncols = j-i;
401         else       ncols = n-i;
402         /* Now assemble all these values with a single function call */
403         PetscCall(MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode));
404         i    = j;
405       }
406     }
407     PetscCall(MatStashScatterEnd_Private(&mat->stash));
408   }
409 
410   PetscCall(MatAssemblyBegin(mdn->A,mode));
411   PetscCall(MatAssemblyEnd(mdn->A,mode));
412   PetscFunctionReturn(0);
413 }
414 
415 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
416 {
417   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
418 
419   PetscFunctionBegin;
420   PetscCall(MatZeroEntries(l->A));
421   PetscFunctionReturn(0);
422 }
423 
424 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt n,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
425 {
426   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
427   PetscInt          i,len,*lrows;
428 
429   PetscFunctionBegin;
430   /* get locally owned rows */
431   PetscCall(PetscLayoutMapLocal(A->rmap,n,rows,&len,&lrows,NULL));
432   /* fix right hand side if needed */
433   if (x && b) {
434     const PetscScalar *xx;
435     PetscScalar       *bb;
436 
437     PetscCall(VecGetArrayRead(x, &xx));
438     PetscCall(VecGetArrayWrite(b, &bb));
439     for (i=0;i<len;++i) bb[lrows[i]] = diag*xx[lrows[i]];
440     PetscCall(VecRestoreArrayRead(x, &xx));
441     PetscCall(VecRestoreArrayWrite(b, &bb));
442   }
443   PetscCall(MatZeroRows(l->A,len,lrows,0.0,NULL,NULL));
444   if (diag != 0.0) {
445     Vec d;
446 
447     PetscCall(MatCreateVecs(A,NULL,&d));
448     PetscCall(VecSet(d,diag));
449     PetscCall(MatDiagonalSet(A,d,INSERT_VALUES));
450     PetscCall(VecDestroy(&d));
451   }
452   PetscCall(PetscFree(lrows));
453   PetscFunctionReturn(0);
454 }
455 
456 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec);
457 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec);
458 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec);
459 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec);
460 
461 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
462 {
463   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
464   const PetscScalar *ax;
465   PetscScalar       *ay;
466   PetscMemType      axmtype,aymtype;
467 
468   PetscFunctionBegin;
469   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
470   PetscCall(VecGetArrayReadAndMemType(xx,&ax,&axmtype));
471   PetscCall(VecGetArrayAndMemType(mdn->lvec,&ay,&aymtype));
472   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPI_REPLACE));
473   PetscCall(PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay,MPI_REPLACE));
474   PetscCall(VecRestoreArrayAndMemType(mdn->lvec,&ay));
475   PetscCall(VecRestoreArrayReadAndMemType(xx,&ax));
476   PetscCall((*mdn->A->ops->mult)(mdn->A,mdn->lvec,yy));
477   PetscFunctionReturn(0);
478 }
479 
480 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
481 {
482   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
483   const PetscScalar *ax;
484   PetscScalar       *ay;
485   PetscMemType      axmtype,aymtype;
486 
487   PetscFunctionBegin;
488   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
489   PetscCall(VecGetArrayReadAndMemType(xx,&ax,&axmtype));
490   PetscCall(VecGetArrayAndMemType(mdn->lvec,&ay,&aymtype));
491   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPI_REPLACE));
492   PetscCall(PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay,MPI_REPLACE));
493   PetscCall(VecRestoreArrayAndMemType(mdn->lvec,&ay));
494   PetscCall(VecRestoreArrayReadAndMemType(xx,&ax));
495   PetscCall((*mdn->A->ops->multadd)(mdn->A,mdn->lvec,yy,zz));
496   PetscFunctionReturn(0);
497 }
498 
499 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
500 {
501   Mat_MPIDense      *a = (Mat_MPIDense*)A->data;
502   const PetscScalar *ax;
503   PetscScalar       *ay;
504   PetscMemType      axmtype,aymtype;
505 
506   PetscFunctionBegin;
507   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
508   PetscCall(VecSet(yy,0.0));
509   PetscCall((*a->A->ops->multtranspose)(a->A,xx,a->lvec));
510   PetscCall(VecGetArrayReadAndMemType(a->lvec,&ax,&axmtype));
511   PetscCall(VecGetArrayAndMemType(yy,&ay,&aymtype));
512   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPIU_SUM));
513   PetscCall(PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM));
514   PetscCall(VecRestoreArrayReadAndMemType(a->lvec,&ax));
515   PetscCall(VecRestoreArrayAndMemType(yy,&ay));
516   PetscFunctionReturn(0);
517 }
518 
519 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
520 {
521   Mat_MPIDense      *a = (Mat_MPIDense*)A->data;
522   const PetscScalar *ax;
523   PetscScalar       *ay;
524   PetscMemType      axmtype,aymtype;
525 
526   PetscFunctionBegin;
527   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
528   PetscCall(VecCopy(yy,zz));
529   PetscCall((*a->A->ops->multtranspose)(a->A,xx,a->lvec));
530   PetscCall(VecGetArrayReadAndMemType(a->lvec,&ax,&axmtype));
531   PetscCall(VecGetArrayAndMemType(zz,&ay,&aymtype));
532   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPIU_SUM));
533   PetscCall(PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM));
534   PetscCall(VecRestoreArrayReadAndMemType(a->lvec,&ax));
535   PetscCall(VecRestoreArrayAndMemType(zz,&ay));
536   PetscFunctionReturn(0);
537 }
538 
539 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
540 {
541   Mat_MPIDense      *a    = (Mat_MPIDense*)A->data;
542   PetscInt          lda,len,i,n,m = A->rmap->n,radd;
543   PetscScalar       *x,zero = 0.0;
544   const PetscScalar *av;
545 
546   PetscFunctionBegin;
547   PetscCall(VecSet(v,zero));
548   PetscCall(VecGetArray(v,&x));
549   PetscCall(VecGetSize(v,&n));
550   PetscCheck(n == A->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
551   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
552   radd = A->rmap->rstart*m;
553   PetscCall(MatDenseGetArrayRead(a->A,&av));
554   PetscCall(MatDenseGetLDA(a->A,&lda));
555   for (i=0; i<len; i++) {
556     x[i] = av[radd + i*lda + i];
557   }
558   PetscCall(MatDenseRestoreArrayRead(a->A,&av));
559   PetscCall(VecRestoreArray(v,&x));
560   PetscFunctionReturn(0);
561 }
562 
563 PetscErrorCode MatDestroy_MPIDense(Mat mat)
564 {
565   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
566 
567   PetscFunctionBegin;
568 #if defined(PETSC_USE_LOG)
569   PetscLogObjectState((PetscObject)mat,"Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT,mat->rmap->N,mat->cmap->N);
570 #endif
571   PetscCall(MatStashDestroy_Private(&mat->stash));
572   PetscCheck(!mdn->vecinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
573   PetscCheck(!mdn->matinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
574   PetscCall(MatDestroy(&mdn->A));
575   PetscCall(VecDestroy(&mdn->lvec));
576   PetscCall(PetscSFDestroy(&mdn->Mvctx));
577   PetscCall(VecDestroy(&mdn->cvec));
578   PetscCall(MatDestroy(&mdn->cmat));
579 
580   PetscCall(PetscFree(mat->data));
581   PetscCall(PetscObjectChangeTypeName((PetscObject)mat,NULL));
582 
583   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL));
584   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",NULL));
585   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL));
586   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL));
587   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL));
588   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL));
589   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",NULL));
590   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",NULL));
591   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL));
592   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL));
593   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",NULL));
594   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",NULL));
595   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpiaij_C",NULL));
596 #if defined(PETSC_HAVE_ELEMENTAL)
597   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL));
598 #endif
599 #if defined(PETSC_HAVE_SCALAPACK)
600   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",NULL));
601 #endif
602   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL));
603   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",NULL));
604   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",NULL));
605 #if defined (PETSC_HAVE_CUDA)
606   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",NULL));
607   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",NULL));
608 #endif
609   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL));
610   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL));
611   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",NULL));
612   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",NULL));
613   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",NULL));
614   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",NULL));
615   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",NULL));
616   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",NULL));
617   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",NULL));
618   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",NULL));
619   PetscFunctionReturn(0);
620 }
621 
622 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat,PetscViewer);
623 
624 #include <petscdraw.h>
625 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
626 {
627   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
628   PetscMPIInt       rank;
629   PetscViewerType   vtype;
630   PetscBool         iascii,isdraw;
631   PetscViewer       sviewer;
632   PetscViewerFormat format;
633 
634   PetscFunctionBegin;
635   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank));
636   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
637   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw));
638   if (iascii) {
639     PetscCall(PetscViewerGetType(viewer,&vtype));
640     PetscCall(PetscViewerGetFormat(viewer,&format));
641     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
642       MatInfo info;
643       PetscCall(MatGetInfo(mat,MAT_LOCAL,&info));
644       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
645       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory));
646       PetscCall(PetscViewerFlush(viewer));
647       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
648       if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx,viewer));
649       PetscFunctionReturn(0);
650     } else if (format == PETSC_VIEWER_ASCII_INFO) {
651       PetscFunctionReturn(0);
652     }
653   } else if (isdraw) {
654     PetscDraw draw;
655     PetscBool isnull;
656 
657     PetscCall(PetscViewerDrawGetDraw(viewer,0,&draw));
658     PetscCall(PetscDrawIsNull(draw,&isnull));
659     if (isnull) PetscFunctionReturn(0);
660   }
661 
662   {
663     /* assemble the entire matrix onto first processor. */
664     Mat         A;
665     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
666     PetscInt    *cols;
667     PetscScalar *vals;
668 
669     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat),&A));
670     if (rank == 0) {
671       PetscCall(MatSetSizes(A,M,N,M,N));
672     } else {
673       PetscCall(MatSetSizes(A,0,0,M,N));
674     }
675     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
676     PetscCall(MatSetType(A,MATMPIDENSE));
677     PetscCall(MatMPIDenseSetPreallocation(A,NULL));
678     PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)A));
679 
680     /* Copy the matrix ... This isn't the most efficient means,
681        but it's quick for now */
682     A->insertmode = INSERT_VALUES;
683 
684     row = mat->rmap->rstart;
685     m   = mdn->A->rmap->n;
686     for (i=0; i<m; i++) {
687       PetscCall(MatGetRow_MPIDense(mat,row,&nz,&cols,&vals));
688       PetscCall(MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES));
689       PetscCall(MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals));
690       row++;
691     }
692 
693     PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY));
694     PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY));
695     PetscCall(PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer));
696     if (rank == 0) {
697       PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name));
698       PetscCall(MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer));
699     }
700     PetscCall(PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer));
701     PetscCall(PetscViewerFlush(viewer));
702     PetscCall(MatDestroy(&A));
703   }
704   PetscFunctionReturn(0);
705 }
706 
707 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
708 {
709   PetscBool      iascii,isbinary,isdraw,issocket;
710 
711   PetscFunctionBegin;
712   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
713   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
714   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket));
715   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw));
716 
717   if (iascii || issocket || isdraw) {
718     PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat,viewer));
719   } else if (isbinary) {
720     PetscCall(MatView_Dense_Binary(mat,viewer));
721   }
722   PetscFunctionReturn(0);
723 }
724 
725 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
726 {
727   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
728   Mat            mdn  = mat->A;
729   PetscLogDouble isend[5],irecv[5];
730 
731   PetscFunctionBegin;
732   info->block_size = 1.0;
733 
734   PetscCall(MatGetInfo(mdn,MAT_LOCAL,info));
735 
736   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
737   isend[3] = info->memory;  isend[4] = info->mallocs;
738   if (flag == MAT_LOCAL) {
739     info->nz_used      = isend[0];
740     info->nz_allocated = isend[1];
741     info->nz_unneeded  = isend[2];
742     info->memory       = isend[3];
743     info->mallocs      = isend[4];
744   } else if (flag == MAT_GLOBAL_MAX) {
745     PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)A)));
746 
747     info->nz_used      = irecv[0];
748     info->nz_allocated = irecv[1];
749     info->nz_unneeded  = irecv[2];
750     info->memory       = irecv[3];
751     info->mallocs      = irecv[4];
752   } else if (flag == MAT_GLOBAL_SUM) {
753     PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)A)));
754 
755     info->nz_used      = irecv[0];
756     info->nz_allocated = irecv[1];
757     info->nz_unneeded  = irecv[2];
758     info->memory       = irecv[3];
759     info->mallocs      = irecv[4];
760   }
761   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
762   info->fill_ratio_needed = 0;
763   info->factor_mallocs    = 0;
764   PetscFunctionReturn(0);
765 }
766 
767 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
768 {
769   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
770 
771   PetscFunctionBegin;
772   switch (op) {
773   case MAT_NEW_NONZERO_LOCATIONS:
774   case MAT_NEW_NONZERO_LOCATION_ERR:
775   case MAT_NEW_NONZERO_ALLOCATION_ERR:
776     MatCheckPreallocated(A,1);
777     PetscCall(MatSetOption(a->A,op,flg));
778     break;
779   case MAT_ROW_ORIENTED:
780     MatCheckPreallocated(A,1);
781     a->roworiented = flg;
782     PetscCall(MatSetOption(a->A,op,flg));
783     break;
784   case MAT_FORCE_DIAGONAL_ENTRIES:
785   case MAT_KEEP_NONZERO_PATTERN:
786   case MAT_USE_HASH_TABLE:
787   case MAT_SORTED_FULL:
788     PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op]));
789     break;
790   case MAT_IGNORE_OFF_PROC_ENTRIES:
791     a->donotstash = flg;
792     break;
793   case MAT_SYMMETRIC:
794   case MAT_STRUCTURALLY_SYMMETRIC:
795   case MAT_HERMITIAN:
796   case MAT_SYMMETRY_ETERNAL:
797   case MAT_IGNORE_LOWER_TRIANGULAR:
798   case MAT_IGNORE_ZERO_ENTRIES:
799     PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op]));
800     break;
801   default:
802     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
803   }
804   PetscFunctionReturn(0);
805 }
806 
807 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
808 {
809   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
810   const PetscScalar *l;
811   PetscScalar       x,*v,*vv,*r;
812   PetscInt          i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n,lda;
813 
814   PetscFunctionBegin;
815   PetscCall(MatDenseGetArray(mdn->A,&vv));
816   PetscCall(MatDenseGetLDA(mdn->A,&lda));
817   PetscCall(MatGetLocalSize(A,&s2,&s3));
818   if (ll) {
819     PetscCall(VecGetLocalSize(ll,&s2a));
820     PetscCheck(s2a == s2,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2);
821     PetscCall(VecGetArrayRead(ll,&l));
822     for (i=0; i<m; i++) {
823       x = l[i];
824       v = vv + i;
825       for (j=0; j<n; j++) { (*v) *= x; v+= lda;}
826     }
827     PetscCall(VecRestoreArrayRead(ll,&l));
828     PetscCall(PetscLogFlops(1.0*n*m));
829   }
830   if (rr) {
831     const PetscScalar *ar;
832 
833     PetscCall(VecGetLocalSize(rr,&s3a));
834     PetscCheck(s3a == s3,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3);
835     PetscCall(VecGetArrayRead(rr,&ar));
836     if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
837     PetscCall(VecGetArray(mdn->lvec,&r));
838     PetscCall(PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ar,r,MPI_REPLACE));
839     PetscCall(PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ar,r,MPI_REPLACE));
840     PetscCall(VecRestoreArrayRead(rr,&ar));
841     for (i=0; i<n; i++) {
842       x = r[i];
843       v = vv + i*lda;
844       for (j=0; j<m; j++) (*v++) *= x;
845     }
846     PetscCall(VecRestoreArray(mdn->lvec,&r));
847     PetscCall(PetscLogFlops(1.0*n*m));
848   }
849   PetscCall(MatDenseRestoreArray(mdn->A,&vv));
850   PetscFunctionReturn(0);
851 }
852 
853 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
854 {
855   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
856   PetscInt          i,j;
857   PetscMPIInt       size;
858   PetscReal         sum = 0.0;
859   const PetscScalar *av,*v;
860 
861   PetscFunctionBegin;
862   PetscCall(MatDenseGetArrayRead(mdn->A,&av));
863   v    = av;
864   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
865   if (size == 1) {
866     PetscCall(MatNorm(mdn->A,type,nrm));
867   } else {
868     if (type == NORM_FROBENIUS) {
869       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
870         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
871       }
872       PetscCall(MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A)));
873       *nrm = PetscSqrtReal(*nrm);
874       PetscCall(PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n));
875     } else if (type == NORM_1) {
876       PetscReal *tmp,*tmp2;
877       PetscCall(PetscCalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2));
878       *nrm = 0.0;
879       v    = av;
880       for (j=0; j<mdn->A->cmap->n; j++) {
881         for (i=0; i<mdn->A->rmap->n; i++) {
882           tmp[j] += PetscAbsScalar(*v);  v++;
883         }
884       }
885       PetscCall(MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A)));
886       for (j=0; j<A->cmap->N; j++) {
887         if (tmp2[j] > *nrm) *nrm = tmp2[j];
888       }
889       PetscCall(PetscFree2(tmp,tmp2));
890       PetscCall(PetscLogFlops(A->cmap->n*A->rmap->n));
891     } else if (type == NORM_INFINITY) { /* max row norm */
892       PetscReal ntemp;
893       PetscCall(MatNorm(mdn->A,type,&ntemp));
894       PetscCall(MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A)));
895     } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
896   }
897   PetscCall(MatDenseRestoreArrayRead(mdn->A,&av));
898   PetscFunctionReturn(0);
899 }
900 
901 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
902 {
903   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
904   Mat            B;
905   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
906   PetscInt       j,i,lda;
907   PetscScalar    *v;
908 
909   PetscFunctionBegin;
910   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
911     PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B));
912     PetscCall(MatSetSizes(B,A->cmap->n,A->rmap->n,N,M));
913     PetscCall(MatSetType(B,((PetscObject)A)->type_name));
914     PetscCall(MatMPIDenseSetPreallocation(B,NULL));
915   } else B = *matout;
916 
917   m    = a->A->rmap->n; n = a->A->cmap->n;
918   PetscCall(MatDenseGetArrayRead(a->A,(const PetscScalar**)&v));
919   PetscCall(MatDenseGetLDA(a->A,&lda));
920   PetscCall(PetscMalloc1(m,&rwork));
921   for (i=0; i<m; i++) rwork[i] = rstart + i;
922   for (j=0; j<n; j++) {
923     PetscCall(MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES));
924     v   += lda;
925   }
926   PetscCall(MatDenseRestoreArrayRead(a->A,(const PetscScalar**)&v));
927   PetscCall(PetscFree(rwork));
928   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
929   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
930   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
931     *matout = B;
932   } else {
933     PetscCall(MatHeaderMerge(A,&B));
934   }
935   PetscFunctionReturn(0);
936 }
937 
938 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
939 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
940 
941 PetscErrorCode MatSetUp_MPIDense(Mat A)
942 {
943   PetscFunctionBegin;
944   PetscCall(PetscLayoutSetUp(A->rmap));
945   PetscCall(PetscLayoutSetUp(A->cmap));
946   if (!A->preallocated) {
947     PetscCall(MatMPIDenseSetPreallocation(A,NULL));
948   }
949   PetscFunctionReturn(0);
950 }
951 
952 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
953 {
954   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
955 
956   PetscFunctionBegin;
957   PetscCall(MatAXPY(A->A,alpha,B->A,str));
958   PetscFunctionReturn(0);
959 }
960 
961 PetscErrorCode MatConjugate_MPIDense(Mat mat)
962 {
963   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
964 
965   PetscFunctionBegin;
966   PetscCall(MatConjugate(a->A));
967   PetscFunctionReturn(0);
968 }
969 
970 PetscErrorCode MatRealPart_MPIDense(Mat A)
971 {
972   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
973 
974   PetscFunctionBegin;
975   PetscCall(MatRealPart(a->A));
976   PetscFunctionReturn(0);
977 }
978 
979 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
980 {
981   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
982 
983   PetscFunctionBegin;
984   PetscCall(MatImaginaryPart(a->A));
985   PetscFunctionReturn(0);
986 }
987 
988 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col)
989 {
990   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
991 
992   PetscFunctionBegin;
993   PetscCheck(a->A,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing local matrix");
994   PetscCheck(a->A->ops->getcolumnvector,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing get column operation");
995   PetscCall((*a->A->ops->getcolumnvector)(a->A,v,col));
996   PetscFunctionReturn(0);
997 }
998 
999 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat,PetscInt,PetscReal*);
1000 
1001 PetscErrorCode MatGetColumnReductions_MPIDense(Mat A,PetscInt type,PetscReal *reductions)
1002 {
1003   PetscInt       i,m,n;
1004   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
1005   PetscReal      *work;
1006 
1007   PetscFunctionBegin;
1008   PetscCall(MatGetSize(A,&m,&n));
1009   PetscCall(PetscMalloc1(n,&work));
1010   if (type == REDUCTION_MEAN_REALPART) {
1011     PetscCall(MatGetColumnReductions_SeqDense(a->A,(PetscInt)REDUCTION_SUM_REALPART,work));
1012   } else if (type == REDUCTION_MEAN_IMAGINARYPART) {
1013     PetscCall(MatGetColumnReductions_SeqDense(a->A,(PetscInt)REDUCTION_SUM_IMAGINARYPART,work));
1014   } else {
1015     PetscCall(MatGetColumnReductions_SeqDense(a->A,type,work));
1016   }
1017   if (type == NORM_2) {
1018     for (i=0; i<n; i++) work[i] *= work[i];
1019   }
1020   if (type == NORM_INFINITY) {
1021     PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_MAX,A->hdr.comm));
1022   } else {
1023     PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_SUM,A->hdr.comm));
1024   }
1025   PetscCall(PetscFree(work));
1026   if (type == NORM_2) {
1027     for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
1028   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
1029     for (i=0; i<n; i++) reductions[i] /= m;
1030   }
1031   PetscFunctionReturn(0);
1032 }
1033 
1034 #if defined(PETSC_HAVE_CUDA)
1035 PetscErrorCode MatShift_MPIDenseCUDA(Mat A,PetscScalar alpha)
1036 {
1037   PetscScalar *da;
1038   PetscInt    lda;
1039 
1040   PetscFunctionBegin;
1041   PetscCall(MatDenseCUDAGetArray(A,&da));
1042   PetscCall(MatDenseGetLDA(A,&lda));
1043   PetscCall(PetscInfo(A,"Performing Shift on backend\n"));
1044   PetscCall(MatShift_DenseCUDA_Private(da,alpha,lda,A->rmap->rstart,A->rmap->rend,A->cmap->N));
1045   PetscCall(MatDenseCUDARestoreArray(A,&da));
1046   PetscFunctionReturn(0);
1047 }
1048 
1049 static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1050 {
1051   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1052   PetscInt       lda;
1053 
1054   PetscFunctionBegin;
1055   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1056   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1057   if (!a->cvec) {
1058     PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec));
1059     PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec));
1060   }
1061   a->vecinuse = col + 1;
1062   PetscCall(MatDenseGetLDA(a->A,&lda));
1063   PetscCall(MatDenseCUDAGetArray(a->A,(PetscScalar**)&a->ptrinuse));
1064   PetscCall(VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda));
1065   *v   = a->cvec;
1066   PetscFunctionReturn(0);
1067 }
1068 
1069 static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1070 {
1071   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1072 
1073   PetscFunctionBegin;
1074   PetscCheck(a->vecinuse,PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1075   PetscCheck(a->cvec,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1076   a->vecinuse = 0;
1077   PetscCall(MatDenseCUDARestoreArray(a->A,(PetscScalar**)&a->ptrinuse));
1078   PetscCall(VecCUDAResetArray(a->cvec));
1079   *v   = NULL;
1080   PetscFunctionReturn(0);
1081 }
1082 
1083 static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1084 {
1085   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1086   PetscInt       lda;
1087 
1088   PetscFunctionBegin;
1089   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1090   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1091   if (!a->cvec) {
1092     PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec));
1093     PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec));
1094   }
1095   a->vecinuse = col + 1;
1096   PetscCall(MatDenseGetLDA(a->A,&lda));
1097   PetscCall(MatDenseCUDAGetArrayRead(a->A,&a->ptrinuse));
1098   PetscCall(VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda));
1099   PetscCall(VecLockReadPush(a->cvec));
1100   *v   = a->cvec;
1101   PetscFunctionReturn(0);
1102 }
1103 
1104 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1105 {
1106   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1107 
1108   PetscFunctionBegin;
1109   PetscCheck(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1110   PetscCheck(a->cvec,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1111   a->vecinuse = 0;
1112   PetscCall(MatDenseCUDARestoreArrayRead(a->A,&a->ptrinuse));
1113   PetscCall(VecLockReadPop(a->cvec));
1114   PetscCall(VecCUDAResetArray(a->cvec));
1115   *v   = NULL;
1116   PetscFunctionReturn(0);
1117 }
1118 
1119 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1120 {
1121   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1122   PetscInt       lda;
1123 
1124   PetscFunctionBegin;
1125   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1126   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1127   if (!a->cvec) {
1128     PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec));
1129     PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec));
1130   }
1131   a->vecinuse = col + 1;
1132   PetscCall(MatDenseGetLDA(a->A,&lda));
1133   PetscCall(MatDenseCUDAGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse));
1134   PetscCall(VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda));
1135   *v   = a->cvec;
1136   PetscFunctionReturn(0);
1137 }
1138 
1139 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1140 {
1141   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1142 
1143   PetscFunctionBegin;
1144   PetscCheck(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1145   PetscCheck(a->cvec,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1146   a->vecinuse = 0;
1147   PetscCall(MatDenseCUDARestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse));
1148   PetscCall(VecCUDAResetArray(a->cvec));
1149   *v   = NULL;
1150   PetscFunctionReturn(0);
1151 }
1152 
1153 static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1154 {
1155   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1156 
1157   PetscFunctionBegin;
1158   PetscCheck(!l->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1159   PetscCheck(!l->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1160   PetscCall(MatDenseCUDAPlaceArray(l->A,a));
1161   PetscFunctionReturn(0);
1162 }
1163 
1164 static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A)
1165 {
1166   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1167 
1168   PetscFunctionBegin;
1169   PetscCheck(!l->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1170   PetscCheck(!l->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1171   PetscCall(MatDenseCUDAResetArray(l->A));
1172   PetscFunctionReturn(0);
1173 }
1174 
1175 static PetscErrorCode MatDenseCUDAReplaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1176 {
1177   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1178 
1179   PetscFunctionBegin;
1180   PetscCheck(!l->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1181   PetscCheck(!l->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1182   PetscCall(MatDenseCUDAReplaceArray(l->A,a));
1183   PetscFunctionReturn(0);
1184 }
1185 
1186 static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1187 {
1188   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1189 
1190   PetscFunctionBegin;
1191   PetscCall(MatDenseCUDAGetArrayWrite(l->A,a));
1192   PetscFunctionReturn(0);
1193 }
1194 
1195 static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1196 {
1197   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1198 
1199   PetscFunctionBegin;
1200   PetscCall(MatDenseCUDARestoreArrayWrite(l->A,a));
1201   PetscFunctionReturn(0);
1202 }
1203 
1204 static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1205 {
1206   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1207 
1208   PetscFunctionBegin;
1209   PetscCall(MatDenseCUDAGetArrayRead(l->A,a));
1210   PetscFunctionReturn(0);
1211 }
1212 
1213 static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1214 {
1215   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1216 
1217   PetscFunctionBegin;
1218   PetscCall(MatDenseCUDARestoreArrayRead(l->A,a));
1219   PetscFunctionReturn(0);
1220 }
1221 
1222 static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1223 {
1224   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1225 
1226   PetscFunctionBegin;
1227   PetscCall(MatDenseCUDAGetArray(l->A,a));
1228   PetscFunctionReturn(0);
1229 }
1230 
1231 static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1232 {
1233   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1234 
1235   PetscFunctionBegin;
1236   PetscCall(MatDenseCUDARestoreArray(l->A,a));
1237   PetscFunctionReturn(0);
1238 }
1239 
1240 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat,PetscInt,Vec*);
1241 static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat,PetscInt,Vec*);
1242 static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat,PetscInt,Vec*);
1243 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat,PetscInt,Vec*);
1244 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat,PetscInt,Vec*);
1245 static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat,PetscInt,Vec*);
1246 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat,Mat*);
1247 
1248 static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat,PetscBool bind)
1249 {
1250   Mat_MPIDense   *d = (Mat_MPIDense*)mat->data;
1251 
1252   PetscFunctionBegin;
1253   PetscCheck(!d->vecinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1254   PetscCheck(!d->matinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1255   if (d->A) {
1256     PetscCall(MatBindToCPU(d->A,bind));
1257   }
1258   mat->boundtocpu = bind;
1259   if (!bind) {
1260     PetscBool iscuda;
1261 
1262     PetscCall(PetscObjectTypeCompare((PetscObject)d->cvec,VECMPICUDA,&iscuda));
1263     if (!iscuda) {
1264       PetscCall(VecDestroy(&d->cvec));
1265     }
1266     PetscCall(PetscObjectTypeCompare((PetscObject)d->cmat,MATMPIDENSECUDA,&iscuda));
1267     if (!iscuda) {
1268       PetscCall(MatDestroy(&d->cmat));
1269     }
1270     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDenseCUDA));
1271     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDenseCUDA));
1272     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDenseCUDA));
1273     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDenseCUDA));
1274     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDenseCUDA));
1275     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDenseCUDA));
1276     mat->ops->shift                   = MatShift_MPIDenseCUDA;
1277   } else {
1278     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense));
1279     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense));
1280     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense));
1281     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense));
1282     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense));
1283     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense));
1284     mat->ops->shift                   = MatShift_MPIDense;
1285   }
1286   if (d->cmat) {
1287     PetscCall(MatBindToCPU(d->cmat,bind));
1288   }
1289   PetscFunctionReturn(0);
1290 }
1291 
1292 PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data)
1293 {
1294   Mat_MPIDense   *d = (Mat_MPIDense*)A->data;
1295   PetscBool      iscuda;
1296 
1297   PetscFunctionBegin;
1298   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1299   PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda));
1300   if (!iscuda) PetscFunctionReturn(0);
1301   PetscCall(PetscLayoutSetUp(A->rmap));
1302   PetscCall(PetscLayoutSetUp(A->cmap));
1303   if (!d->A) {
1304     PetscCall(MatCreate(PETSC_COMM_SELF,&d->A));
1305     PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)d->A));
1306     PetscCall(MatSetSizes(d->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N));
1307   }
1308   PetscCall(MatSetType(d->A,MATSEQDENSECUDA));
1309   PetscCall(MatSeqDenseCUDASetPreallocation(d->A,d_data));
1310   A->preallocated = PETSC_TRUE;
1311   A->assembled = PETSC_TRUE;
1312   PetscFunctionReturn(0);
1313 }
1314 #endif
1315 
1316 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1317 {
1318   Mat_MPIDense   *d = (Mat_MPIDense*)x->data;
1319 
1320   PetscFunctionBegin;
1321   PetscCall(MatSetRandom(d->A,rctx));
1322   PetscFunctionReturn(0);
1323 }
1324 
1325 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool  *missing,PetscInt *d)
1326 {
1327   PetscFunctionBegin;
1328   *missing = PETSC_FALSE;
1329   PetscFunctionReturn(0);
1330 }
1331 
1332 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat);
1333 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);
1334 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat);
1335 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);
1336 static PetscErrorCode MatEqual_MPIDense(Mat,Mat,PetscBool*);
1337 static PetscErrorCode MatLoad_MPIDense(Mat,PetscViewer);
1338 
1339 /* -------------------------------------------------------------------*/
1340 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1341                                         MatGetRow_MPIDense,
1342                                         MatRestoreRow_MPIDense,
1343                                         MatMult_MPIDense,
1344                                 /*  4*/ MatMultAdd_MPIDense,
1345                                         MatMultTranspose_MPIDense,
1346                                         MatMultTransposeAdd_MPIDense,
1347                                         NULL,
1348                                         NULL,
1349                                         NULL,
1350                                 /* 10*/ NULL,
1351                                         NULL,
1352                                         NULL,
1353                                         NULL,
1354                                         MatTranspose_MPIDense,
1355                                 /* 15*/ MatGetInfo_MPIDense,
1356                                         MatEqual_MPIDense,
1357                                         MatGetDiagonal_MPIDense,
1358                                         MatDiagonalScale_MPIDense,
1359                                         MatNorm_MPIDense,
1360                                 /* 20*/ MatAssemblyBegin_MPIDense,
1361                                         MatAssemblyEnd_MPIDense,
1362                                         MatSetOption_MPIDense,
1363                                         MatZeroEntries_MPIDense,
1364                                 /* 24*/ MatZeroRows_MPIDense,
1365                                         NULL,
1366                                         NULL,
1367                                         NULL,
1368                                         NULL,
1369                                 /* 29*/ MatSetUp_MPIDense,
1370                                         NULL,
1371                                         NULL,
1372                                         MatGetDiagonalBlock_MPIDense,
1373                                         NULL,
1374                                 /* 34*/ MatDuplicate_MPIDense,
1375                                         NULL,
1376                                         NULL,
1377                                         NULL,
1378                                         NULL,
1379                                 /* 39*/ MatAXPY_MPIDense,
1380                                         MatCreateSubMatrices_MPIDense,
1381                                         NULL,
1382                                         MatGetValues_MPIDense,
1383                                         MatCopy_MPIDense,
1384                                 /* 44*/ NULL,
1385                                         MatScale_MPIDense,
1386                                         MatShift_MPIDense,
1387                                         NULL,
1388                                         NULL,
1389                                 /* 49*/ MatSetRandom_MPIDense,
1390                                         NULL,
1391                                         NULL,
1392                                         NULL,
1393                                         NULL,
1394                                 /* 54*/ NULL,
1395                                         NULL,
1396                                         NULL,
1397                                         NULL,
1398                                         NULL,
1399                                 /* 59*/ MatCreateSubMatrix_MPIDense,
1400                                         MatDestroy_MPIDense,
1401                                         MatView_MPIDense,
1402                                         NULL,
1403                                         NULL,
1404                                 /* 64*/ NULL,
1405                                         NULL,
1406                                         NULL,
1407                                         NULL,
1408                                         NULL,
1409                                 /* 69*/ NULL,
1410                                         NULL,
1411                                         NULL,
1412                                         NULL,
1413                                         NULL,
1414                                 /* 74*/ NULL,
1415                                         NULL,
1416                                         NULL,
1417                                         NULL,
1418                                         NULL,
1419                                 /* 79*/ NULL,
1420                                         NULL,
1421                                         NULL,
1422                                         NULL,
1423                                 /* 83*/ MatLoad_MPIDense,
1424                                         NULL,
1425                                         NULL,
1426                                         NULL,
1427                                         NULL,
1428                                         NULL,
1429                                 /* 89*/ NULL,
1430                                         NULL,
1431                                         NULL,
1432                                         NULL,
1433                                         NULL,
1434                                 /* 94*/ NULL,
1435                                         NULL,
1436                                         MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1437                                         MatMatTransposeMultNumeric_MPIDense_MPIDense,
1438                                         NULL,
1439                                 /* 99*/ MatProductSetFromOptions_MPIDense,
1440                                         NULL,
1441                                         NULL,
1442                                         MatConjugate_MPIDense,
1443                                         NULL,
1444                                 /*104*/ NULL,
1445                                         MatRealPart_MPIDense,
1446                                         MatImaginaryPart_MPIDense,
1447                                         NULL,
1448                                         NULL,
1449                                 /*109*/ NULL,
1450                                         NULL,
1451                                         NULL,
1452                                         MatGetColumnVector_MPIDense,
1453                                         MatMissingDiagonal_MPIDense,
1454                                 /*114*/ NULL,
1455                                         NULL,
1456                                         NULL,
1457                                         NULL,
1458                                         NULL,
1459                                 /*119*/ NULL,
1460                                         NULL,
1461                                         NULL,
1462                                         NULL,
1463                                         NULL,
1464                                 /*124*/ NULL,
1465                                         MatGetColumnReductions_MPIDense,
1466                                         NULL,
1467                                         NULL,
1468                                         NULL,
1469                                 /*129*/ NULL,
1470                                         NULL,
1471                                         MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1472                                         MatTransposeMatMultNumeric_MPIDense_MPIDense,
1473                                         NULL,
1474                                 /*134*/ NULL,
1475                                         NULL,
1476                                         NULL,
1477                                         NULL,
1478                                         NULL,
1479                                 /*139*/ NULL,
1480                                         NULL,
1481                                         NULL,
1482                                         NULL,
1483                                         NULL,
1484                                         MatCreateMPIMatConcatenateSeqMat_MPIDense,
1485                                 /*145*/ NULL,
1486                                         NULL,
1487                                         NULL
1488 };
1489 
1490 PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1491 {
1492   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
1493   PetscBool      iscuda;
1494 
1495   PetscFunctionBegin;
1496   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1497   PetscCall(PetscLayoutSetUp(mat->rmap));
1498   PetscCall(PetscLayoutSetUp(mat->cmap));
1499   if (!a->A) {
1500     PetscCall(MatCreate(PETSC_COMM_SELF,&a->A));
1501     PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A));
1502     PetscCall(MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N));
1503   }
1504   PetscCall(PetscObjectTypeCompare((PetscObject)mat,MATMPIDENSECUDA,&iscuda));
1505   PetscCall(MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE));
1506   PetscCall(MatSeqDenseSetPreallocation(a->A,data));
1507   mat->preallocated = PETSC_TRUE;
1508   mat->assembled = PETSC_TRUE;
1509   PetscFunctionReturn(0);
1510 }
1511 
1512 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1513 {
1514   Mat            B,C;
1515 
1516   PetscFunctionBegin;
1517   PetscCall(MatMPIAIJGetLocalMat(A,MAT_INITIAL_MATRIX,&C));
1518   PetscCall(MatConvert_SeqAIJ_SeqDense(C,MATSEQDENSE,MAT_INITIAL_MATRIX,&B));
1519   PetscCall(MatDestroy(&C));
1520   if (reuse == MAT_REUSE_MATRIX) {
1521     C = *newmat;
1522   } else C = NULL;
1523   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A),B,A->cmap->n,!C?MAT_INITIAL_MATRIX:MAT_REUSE_MATRIX,&C));
1524   PetscCall(MatDestroy(&B));
1525   if (reuse == MAT_INPLACE_MATRIX) {
1526     PetscCall(MatHeaderReplace(A,&C));
1527   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1528   PetscFunctionReturn(0);
1529 }
1530 
1531 PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1532 {
1533   Mat            B,C;
1534 
1535   PetscFunctionBegin;
1536   PetscCall(MatDenseGetLocalMatrix(A,&C));
1537   PetscCall(MatConvert_SeqDense_SeqAIJ(C,MATSEQAIJ,MAT_INITIAL_MATRIX,&B));
1538   if (reuse == MAT_REUSE_MATRIX) {
1539     C = *newmat;
1540   } else C = NULL;
1541   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A),B,A->cmap->n,!C?MAT_INITIAL_MATRIX:MAT_REUSE_MATRIX,&C));
1542   PetscCall(MatDestroy(&B));
1543   if (reuse == MAT_INPLACE_MATRIX) {
1544     PetscCall(MatHeaderReplace(A,&C));
1545   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1546   PetscFunctionReturn(0);
1547 }
1548 
1549 #if defined(PETSC_HAVE_ELEMENTAL)
1550 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1551 {
1552   Mat            mat_elemental;
1553   PetscScalar    *v;
1554   PetscInt       m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols;
1555 
1556   PetscFunctionBegin;
1557   if (reuse == MAT_REUSE_MATRIX) {
1558     mat_elemental = *newmat;
1559     PetscCall(MatZeroEntries(*newmat));
1560   } else {
1561     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental));
1562     PetscCall(MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N));
1563     PetscCall(MatSetType(mat_elemental,MATELEMENTAL));
1564     PetscCall(MatSetUp(mat_elemental));
1565     PetscCall(MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE));
1566   }
1567 
1568   PetscCall(PetscMalloc2(m,&rows,N,&cols));
1569   for (i=0; i<N; i++) cols[i] = i;
1570   for (i=0; i<m; i++) rows[i] = rstart + i;
1571 
1572   /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1573   PetscCall(MatDenseGetArray(A,&v));
1574   PetscCall(MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES));
1575   PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY));
1576   PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY));
1577   PetscCall(MatDenseRestoreArray(A,&v));
1578   PetscCall(PetscFree2(rows,cols));
1579 
1580   if (reuse == MAT_INPLACE_MATRIX) {
1581     PetscCall(MatHeaderReplace(A,&mat_elemental));
1582   } else {
1583     *newmat = mat_elemental;
1584   }
1585   PetscFunctionReturn(0);
1586 }
1587 #endif
1588 
1589 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals)
1590 {
1591   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1592 
1593   PetscFunctionBegin;
1594   PetscCall(MatDenseGetColumn(mat->A,col,vals));
1595   PetscFunctionReturn(0);
1596 }
1597 
1598 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals)
1599 {
1600   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1601 
1602   PetscFunctionBegin;
1603   PetscCall(MatDenseRestoreColumn(mat->A,vals));
1604   PetscFunctionReturn(0);
1605 }
1606 
1607 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
1608 {
1609   Mat_MPIDense   *mat;
1610   PetscInt       m,nloc,N;
1611 
1612   PetscFunctionBegin;
1613   PetscCall(MatGetSize(inmat,&m,&N));
1614   PetscCall(MatGetLocalSize(inmat,NULL,&nloc));
1615   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1616     PetscInt sum;
1617 
1618     if (n == PETSC_DECIDE) {
1619       PetscCall(PetscSplitOwnership(comm,&n,&N));
1620     }
1621     /* Check sum(n) = N */
1622     PetscCall(MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm));
1623     PetscCheck(sum == N,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT,sum,N);
1624 
1625     PetscCall(MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat));
1626     PetscCall(MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE));
1627   }
1628 
1629   /* numeric phase */
1630   mat = (Mat_MPIDense*)(*outmat)->data;
1631   PetscCall(MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN));
1632   PetscFunctionReturn(0);
1633 }
1634 
1635 #if defined(PETSC_HAVE_CUDA)
1636 PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M,MatType type,MatReuse reuse,Mat *newmat)
1637 {
1638   Mat            B;
1639   Mat_MPIDense   *m;
1640 
1641   PetscFunctionBegin;
1642   if (reuse == MAT_INITIAL_MATRIX) {
1643     PetscCall(MatDuplicate(M,MAT_COPY_VALUES,newmat));
1644   } else if (reuse == MAT_REUSE_MATRIX) {
1645     PetscCall(MatCopy(M,*newmat,SAME_NONZERO_PATTERN));
1646   }
1647 
1648   B    = *newmat;
1649   PetscCall(MatBindToCPU_MPIDenseCUDA(B,PETSC_TRUE));
1650   PetscCall(PetscFree(B->defaultvectype));
1651   PetscCall(PetscStrallocpy(VECSTANDARD,&B->defaultvectype));
1652   PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSE));
1653   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",NULL));
1654   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",NULL));
1655   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",NULL));
1656   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",NULL));
1657   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",NULL));
1658   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",NULL));
1659   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",NULL));
1660   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",NULL));
1661   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",NULL));
1662   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",NULL));
1663   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",NULL));
1664   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",NULL));
1665   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",NULL));
1666   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",NULL));
1667   m    = (Mat_MPIDense*)(B)->data;
1668   if (m->A) {
1669     PetscCall(MatConvert(m->A,MATSEQDENSE,MAT_INPLACE_MATRIX,&m->A));
1670   }
1671   B->ops->bindtocpu = NULL;
1672   B->offloadmask    = PETSC_OFFLOAD_CPU;
1673   PetscFunctionReturn(0);
1674 }
1675 
1676 PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M,MatType type,MatReuse reuse,Mat *newmat)
1677 {
1678   Mat            B;
1679   Mat_MPIDense   *m;
1680 
1681   PetscFunctionBegin;
1682   if (reuse == MAT_INITIAL_MATRIX) {
1683     PetscCall(MatDuplicate(M,MAT_COPY_VALUES,newmat));
1684   } else if (reuse == MAT_REUSE_MATRIX) {
1685     PetscCall(MatCopy(M,*newmat,SAME_NONZERO_PATTERN));
1686   }
1687 
1688   B    = *newmat;
1689   PetscCall(PetscFree(B->defaultvectype));
1690   PetscCall(PetscStrallocpy(VECCUDA,&B->defaultvectype));
1691   PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSECUDA));
1692   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",                    MatConvert_MPIDenseCUDA_MPIDense));
1693   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",        MatProductSetFromOptions_MPIAIJ_MPIDense));
1694   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",MatProductSetFromOptions_MPIAIJ_MPIDense));
1695   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",        MatProductSetFromOptions_MPIDense_MPIAIJ));
1696   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ));
1697   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",                                MatDenseCUDAGetArray_MPIDenseCUDA));
1698   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",                            MatDenseCUDAGetArrayRead_MPIDenseCUDA));
1699   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",                           MatDenseCUDAGetArrayWrite_MPIDenseCUDA));
1700   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",                            MatDenseCUDARestoreArray_MPIDenseCUDA));
1701   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",                        MatDenseCUDARestoreArrayRead_MPIDenseCUDA));
1702   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",                       MatDenseCUDARestoreArrayWrite_MPIDenseCUDA));
1703   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",                              MatDenseCUDAPlaceArray_MPIDenseCUDA));
1704   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",                              MatDenseCUDAResetArray_MPIDenseCUDA));
1705   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",                            MatDenseCUDAReplaceArray_MPIDenseCUDA));
1706   m    = (Mat_MPIDense*)(B->data);
1707   if (m->A) {
1708     PetscCall(MatConvert(m->A,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&m->A));
1709     B->offloadmask = PETSC_OFFLOAD_BOTH;
1710   } else {
1711     B->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
1712   }
1713   PetscCall(MatBindToCPU_MPIDenseCUDA(B,PETSC_FALSE));
1714 
1715   B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA;
1716   PetscFunctionReturn(0);
1717 }
1718 #endif
1719 
1720 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A,PetscInt col,Vec *v)
1721 {
1722   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1723   PetscInt       lda;
1724 
1725   PetscFunctionBegin;
1726   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1727   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1728   if (!a->cvec) {
1729     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec));
1730   }
1731   a->vecinuse = col + 1;
1732   PetscCall(MatDenseGetLDA(a->A,&lda));
1733   PetscCall(MatDenseGetArray(a->A,(PetscScalar**)&a->ptrinuse));
1734   PetscCall(VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda));
1735   *v   = a->cvec;
1736   PetscFunctionReturn(0);
1737 }
1738 
1739 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A,PetscInt col,Vec *v)
1740 {
1741   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1742 
1743   PetscFunctionBegin;
1744   PetscCheck(a->vecinuse,PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1745   PetscCheck(a->cvec,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1746   a->vecinuse = 0;
1747   PetscCall(MatDenseRestoreArray(a->A,(PetscScalar**)&a->ptrinuse));
1748   PetscCall(VecResetArray(a->cvec));
1749   *v   = NULL;
1750   PetscFunctionReturn(0);
1751 }
1752 
1753 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v)
1754 {
1755   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1756   PetscInt       lda;
1757 
1758   PetscFunctionBegin;
1759   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1760   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1761   if (!a->cvec) {
1762     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec));
1763   }
1764   a->vecinuse = col + 1;
1765   PetscCall(MatDenseGetLDA(a->A,&lda));
1766   PetscCall(MatDenseGetArrayRead(a->A,&a->ptrinuse));
1767   PetscCall(VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda));
1768   PetscCall(VecLockReadPush(a->cvec));
1769   *v   = a->cvec;
1770   PetscFunctionReturn(0);
1771 }
1772 
1773 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v)
1774 {
1775   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1776 
1777   PetscFunctionBegin;
1778   PetscCheck(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1779   PetscCheck(a->cvec,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1780   a->vecinuse = 0;
1781   PetscCall(MatDenseRestoreArrayRead(a->A,&a->ptrinuse));
1782   PetscCall(VecLockReadPop(a->cvec));
1783   PetscCall(VecResetArray(a->cvec));
1784   *v   = NULL;
1785   PetscFunctionReturn(0);
1786 }
1787 
1788 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v)
1789 {
1790   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1791   PetscInt       lda;
1792 
1793   PetscFunctionBegin;
1794   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1795   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1796   if (!a->cvec) {
1797     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec));
1798   }
1799   a->vecinuse = col + 1;
1800   PetscCall(MatDenseGetLDA(a->A,&lda));
1801   PetscCall(MatDenseGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse));
1802   PetscCall(VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda));
1803   *v   = a->cvec;
1804   PetscFunctionReturn(0);
1805 }
1806 
1807 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v)
1808 {
1809   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
1810 
1811   PetscFunctionBegin;
1812   PetscCheck(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1813   PetscCheck(a->cvec,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1814   a->vecinuse = 0;
1815   PetscCall(MatDenseRestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse));
1816   PetscCall(VecResetArray(a->cvec));
1817   *v   = NULL;
1818   PetscFunctionReturn(0);
1819 }
1820 
1821 PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A,PetscInt cbegin,PetscInt cend,Mat *v)
1822 {
1823   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
1824   Mat_MPIDense *c;
1825   MPI_Comm     comm;
1826 
1827   PetscFunctionBegin;
1828   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
1829   PetscCheck(!a->vecinuse,comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1830   PetscCheck(!a->matinuse,comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1831   if (!a->cmat) {
1832     PetscCall(MatCreate(comm,&a->cmat));
1833     PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->cmat));
1834     PetscCall(MatSetType(a->cmat,((PetscObject)A)->type_name));
1835     PetscCall(PetscLayoutReference(A->rmap,&a->cmat->rmap));
1836     PetscCall(PetscLayoutSetSize(a->cmat->cmap,cend-cbegin));
1837     PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1838   } else if (cend-cbegin != a->cmat->cmap->N) {
1839     PetscCall(PetscLayoutDestroy(&a->cmat->cmap));
1840     PetscCall(PetscLayoutCreate(comm,&a->cmat->cmap));
1841     PetscCall(PetscLayoutSetSize(a->cmat->cmap,cend-cbegin));
1842     PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1843   }
1844   c = (Mat_MPIDense*)a->cmat->data;
1845   PetscCheck(!c->A,comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1846   PetscCall(MatDenseGetSubMatrix(a->A,cbegin,cend,&c->A));
1847   a->cmat->preallocated = PETSC_TRUE;
1848   a->cmat->assembled = PETSC_TRUE;
1849   a->matinuse = cbegin + 1;
1850   *v = a->cmat;
1851   PetscFunctionReturn(0);
1852 }
1853 
1854 PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A,Mat *v)
1855 {
1856   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
1857   Mat_MPIDense *c;
1858 
1859   PetscFunctionBegin;
1860   PetscCheck(a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetSubMatrix() first");
1861   PetscCheck(a->cmat,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal matrix");
1862   PetscCheck(*v == a->cmat,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Not the matrix obtained from MatDenseGetSubMatrix()");
1863   a->matinuse = 0;
1864   c = (Mat_MPIDense*)a->cmat->data;
1865   PetscCall(MatDenseRestoreSubMatrix(a->A,&c->A));
1866   *v = NULL;
1867   PetscFunctionReturn(0);
1868 }
1869 
1870 /*MC
1871    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1872 
1873    Options Database Keys:
1874 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions()
1875 
1876   Level: beginner
1877 
1878 .seealso: `MatCreateDense()`
1879 
1880 M*/
1881 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1882 {
1883   Mat_MPIDense   *a;
1884 
1885   PetscFunctionBegin;
1886   PetscCall(PetscNewLog(mat,&a));
1887   mat->data = (void*)a;
1888   PetscCall(PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps)));
1889 
1890   mat->insertmode = NOT_SET_VALUES;
1891 
1892   /* build cache for off array entries formed */
1893   a->donotstash = PETSC_FALSE;
1894 
1895   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash));
1896 
1897   /* stuff used for matrix vector multiply */
1898   a->lvec        = NULL;
1899   a->Mvctx       = NULL;
1900   a->roworiented = PETSC_TRUE;
1901 
1902   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense));
1903   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",MatDenseSetLDA_MPIDense));
1904   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense));
1905   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense));
1906   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense));
1907   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense));
1908   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",MatDenseGetArrayWrite_MPIDense));
1909   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",MatDenseRestoreArrayWrite_MPIDense));
1910   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense));
1911   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense));
1912   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",MatDenseReplaceArray_MPIDense));
1913   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense));
1914   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense));
1915   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense));
1916   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense));
1917   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense));
1918   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense));
1919   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",MatDenseGetSubMatrix_MPIDense));
1920   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",MatDenseRestoreSubMatrix_MPIDense));
1921   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",MatConvert_MPIAIJ_MPIDense));
1922   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpiaij_C",MatConvert_MPIDense_MPIAIJ));
1923 #if defined(PETSC_HAVE_ELEMENTAL)
1924   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental));
1925 #endif
1926 #if defined(PETSC_HAVE_SCALAPACK)
1927   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",MatConvert_Dense_ScaLAPACK));
1928 #endif
1929 #if defined(PETSC_HAVE_CUDA)
1930   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpidensecuda_C",MatConvert_MPIDense_MPIDenseCUDA));
1931 #endif
1932   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense));
1933   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense));
1934   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",MatProductSetFromOptions_MPIDense_MPIAIJ));
1935 #if defined(PETSC_HAVE_CUDA)
1936   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense));
1937   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ));
1938 #endif
1939 
1940   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense));
1941   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense));
1942   PetscCall(PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE));
1943   PetscFunctionReturn(0);
1944 }
1945 
1946 /*MC
1947    MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs.
1948 
1949    Options Database Keys:
1950 . -mat_type mpidensecuda - sets the matrix type to "mpidensecuda" during a call to MatSetFromOptions()
1951 
1952   Level: beginner
1953 
1954 .seealso:
1955 
1956 M*/
1957 #if defined(PETSC_HAVE_CUDA)
1958 #include <petsc/private/deviceimpl.h>
1959 PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B)
1960 {
1961   PetscFunctionBegin;
1962   PetscCall(PetscDeviceInitialize(PETSC_DEVICE_CUDA));
1963   PetscCall(MatCreate_MPIDense(B));
1964   PetscCall(MatConvert_MPIDense_MPIDenseCUDA(B,MATMPIDENSECUDA,MAT_INPLACE_MATRIX,&B));
1965   PetscFunctionReturn(0);
1966 }
1967 #endif
1968 
1969 /*MC
1970    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1971 
1972    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1973    and MATMPIDENSE otherwise.
1974 
1975    Options Database Keys:
1976 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1977 
1978   Level: beginner
1979 
1980 .seealso: `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA`
1981 M*/
1982 
1983 /*MC
1984    MATDENSECUDA - MATDENSECUDA = "densecuda" - A matrix type to be used for dense matrices on GPUs.
1985 
1986    This matrix type is identical to MATSEQDENSECUDA when constructed with a single process communicator,
1987    and MATMPIDENSECUDA otherwise.
1988 
1989    Options Database Keys:
1990 . -mat_type densecuda - sets the matrix type to "densecuda" during a call to MatSetFromOptions()
1991 
1992   Level: beginner
1993 
1994 .seealso: `MATSEQDENSECUDA`, `MATMPIDENSECUDA`, `MATDENSE`
1995 M*/
1996 
1997 /*@C
1998    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1999 
2000    Collective
2001 
2002    Input Parameters:
2003 .  B - the matrix
2004 -  data - optional location of matrix data.  Set data=NULL for PETSc
2005    to control all matrix memory allocation.
2006 
2007    Notes:
2008    The dense format is fully compatible with standard Fortran 77
2009    storage by columns.
2010 
2011    The data input variable is intended primarily for Fortran programmers
2012    who wish to allocate their own matrix memory space.  Most users should
2013    set data=NULL.
2014 
2015    Level: intermediate
2016 
2017 .seealso: `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
2018 @*/
2019 PetscErrorCode  MatMPIDenseSetPreallocation(Mat B,PetscScalar *data)
2020 {
2021   PetscFunctionBegin;
2022   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2023   PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));
2024   PetscFunctionReturn(0);
2025 }
2026 
2027 /*@
2028    MatDensePlaceArray - Allows one to replace the array in a dense matrix with an
2029    array provided by the user. This is useful to avoid copying an array
2030    into a matrix
2031 
2032    Not Collective
2033 
2034    Input Parameters:
2035 +  mat - the matrix
2036 -  array - the array in column major order
2037 
2038    Notes:
2039    You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be
2040    freed when the matrix is destroyed.
2041 
2042    Level: developer
2043 
2044 .seealso: `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`
2045 
2046 @*/
2047 PetscErrorCode  MatDensePlaceArray(Mat mat,const PetscScalar *array)
2048 {
2049   PetscFunctionBegin;
2050   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2051   PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));
2052   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2053 #if defined(PETSC_HAVE_CUDA)
2054   mat->offloadmask = PETSC_OFFLOAD_CPU;
2055 #endif
2056   PetscFunctionReturn(0);
2057 }
2058 
2059 /*@
2060    MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray()
2061 
2062    Not Collective
2063 
2064    Input Parameters:
2065 .  mat - the matrix
2066 
2067    Notes:
2068    You can only call this after a call to MatDensePlaceArray()
2069 
2070    Level: developer
2071 
2072 .seealso: `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`
2073 
2074 @*/
2075 PetscErrorCode  MatDenseResetArray(Mat mat)
2076 {
2077   PetscFunctionBegin;
2078   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2079   PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));
2080   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2081   PetscFunctionReturn(0);
2082 }
2083 
2084 /*@
2085    MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an
2086    array provided by the user. This is useful to avoid copying an array
2087    into a matrix
2088 
2089    Not Collective
2090 
2091    Input Parameters:
2092 +  mat - the matrix
2093 -  array - the array in column major order
2094 
2095    Notes:
2096    The memory passed in MUST be obtained with PetscMalloc() and CANNOT be
2097    freed by the user. It will be freed when the matrix is destroyed.
2098 
2099    Level: developer
2100 
2101 .seealso: `MatDenseGetArray()`, `VecReplaceArray()`
2102 @*/
2103 PetscErrorCode  MatDenseReplaceArray(Mat mat,const PetscScalar *array)
2104 {
2105   PetscFunctionBegin;
2106   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2107   PetscUseMethod(mat,"MatDenseReplaceArray_C",(Mat,const PetscScalar*),(mat,array));
2108   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2109 #if defined(PETSC_HAVE_CUDA)
2110   mat->offloadmask = PETSC_OFFLOAD_CPU;
2111 #endif
2112   PetscFunctionReturn(0);
2113 }
2114 
2115 #if defined(PETSC_HAVE_CUDA)
2116 /*@C
2117    MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a dense matrix with an
2118    array provided by the user. This is useful to avoid copying an array
2119    into a matrix
2120 
2121    Not Collective
2122 
2123    Input Parameters:
2124 +  mat - the matrix
2125 -  array - the array in column major order
2126 
2127    Notes:
2128    You can return to the original array with a call to MatDenseCUDAResetArray(). The user is responsible for freeing this array; it will not be
2129    freed when the matrix is destroyed. The array must have been allocated with cudaMalloc().
2130 
2131    Level: developer
2132 
2133 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDAResetArray()`
2134 @*/
2135 PetscErrorCode  MatDenseCUDAPlaceArray(Mat mat,const PetscScalar *array)
2136 {
2137   PetscFunctionBegin;
2138   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2139   PetscUseMethod(mat,"MatDenseCUDAPlaceArray_C",(Mat,const PetscScalar*),(mat,array));
2140   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2141   mat->offloadmask = PETSC_OFFLOAD_GPU;
2142   PetscFunctionReturn(0);
2143 }
2144 
2145 /*@C
2146    MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to MatDenseCUDAPlaceArray()
2147 
2148    Not Collective
2149 
2150    Input Parameters:
2151 .  mat - the matrix
2152 
2153    Notes:
2154    You can only call this after a call to MatDenseCUDAPlaceArray()
2155 
2156    Level: developer
2157 
2158 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()`
2159 
2160 @*/
2161 PetscErrorCode  MatDenseCUDAResetArray(Mat mat)
2162 {
2163   PetscFunctionBegin;
2164   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2165   PetscUseMethod(mat,"MatDenseCUDAResetArray_C",(Mat),(mat));
2166   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2167   PetscFunctionReturn(0);
2168 }
2169 
2170 /*@C
2171    MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a dense matrix with an
2172    array provided by the user. This is useful to avoid copying an array
2173    into a matrix
2174 
2175    Not Collective
2176 
2177    Input Parameters:
2178 +  mat - the matrix
2179 -  array - the array in column major order
2180 
2181    Notes:
2182    This permanently replaces the GPU array and frees the memory associated with the old GPU array.
2183    The memory passed in CANNOT be freed by the user. It will be freed
2184    when the matrix is destroyed. The array should respect the matrix leading dimension.
2185 
2186    Level: developer
2187 
2188 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()`, `MatDenseCUDAResetArray()`
2189 @*/
2190 PetscErrorCode  MatDenseCUDAReplaceArray(Mat mat,const PetscScalar *array)
2191 {
2192   PetscFunctionBegin;
2193   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2194   PetscUseMethod(mat,"MatDenseCUDAReplaceArray_C",(Mat,const PetscScalar*),(mat,array));
2195   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2196   mat->offloadmask = PETSC_OFFLOAD_GPU;
2197   PetscFunctionReturn(0);
2198 }
2199 
2200 /*@C
2201    MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a dense matrix.
2202 
2203    Not Collective
2204 
2205    Input Parameters:
2206 .  A - the matrix
2207 
2208    Output Parameters
2209 .  array - the GPU array in column major order
2210 
2211    Notes:
2212    The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use MatDenseCUDAGetArray(). The array must be restored with MatDenseCUDARestoreArrayWrite() when no longer needed.
2213 
2214    Level: developer
2215 
2216 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayRead()`, `MatDenseCUDARestoreArrayRead()`
2217 @*/
2218 PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a)
2219 {
2220   PetscFunctionBegin;
2221   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2222   PetscUseMethod(A,"MatDenseCUDAGetArrayWrite_C",(Mat,PetscScalar**),(A,a));
2223   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2224   PetscFunctionReturn(0);
2225 }
2226 
2227 /*@C
2228    MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArrayWrite().
2229 
2230    Not Collective
2231 
2232    Input Parameters:
2233 +  A - the matrix
2234 -  array - the GPU array in column major order
2235 
2236    Notes:
2237 
2238    Level: developer
2239 
2240 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()`
2241 @*/
2242 PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a)
2243 {
2244   PetscFunctionBegin;
2245   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2246   PetscUseMethod(A,"MatDenseCUDARestoreArrayWrite_C",(Mat,PetscScalar**),(A,a));
2247   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2248   A->offloadmask = PETSC_OFFLOAD_GPU;
2249   PetscFunctionReturn(0);
2250 }
2251 
2252 /*@C
2253    MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArrayRead() when no longer needed.
2254 
2255    Not Collective
2256 
2257    Input Parameters:
2258 .  A - the matrix
2259 
2260    Output Parameters
2261 .  array - the GPU array in column major order
2262 
2263    Notes:
2264    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite().
2265 
2266    Level: developer
2267 
2268 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`
2269 @*/
2270 PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a)
2271 {
2272   PetscFunctionBegin;
2273   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2274   PetscUseMethod(A,"MatDenseCUDAGetArrayRead_C",(Mat,const PetscScalar**),(A,a));
2275   PetscFunctionReturn(0);
2276 }
2277 
2278 /*@C
2279    MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a dense matrix previously obtained with a call to MatDenseCUDAGetArrayRead().
2280 
2281    Not Collective
2282 
2283    Input Parameters:
2284 +  A - the matrix
2285 -  array - the GPU array in column major order
2286 
2287    Notes:
2288    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite().
2289 
2290    Level: developer
2291 
2292 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDAGetArrayRead()`
2293 @*/
2294 PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a)
2295 {
2296   PetscFunctionBegin;
2297   PetscUseMethod(A,"MatDenseCUDARestoreArrayRead_C",(Mat,const PetscScalar**),(A,a));
2298   PetscFunctionReturn(0);
2299 }
2300 
2301 /*@C
2302    MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArray() when no longer needed.
2303 
2304    Not Collective
2305 
2306    Input Parameters:
2307 .  A - the matrix
2308 
2309    Output Parameters
2310 .  array - the GPU array in column major order
2311 
2312    Notes:
2313    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). For read-only access, use MatDenseCUDAGetArrayRead().
2314 
2315    Level: developer
2316 
2317 .seealso: `MatDenseCUDAGetArrayRead()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`
2318 @*/
2319 PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a)
2320 {
2321   PetscFunctionBegin;
2322   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2323   PetscUseMethod(A,"MatDenseCUDAGetArray_C",(Mat,PetscScalar**),(A,a));
2324   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2325   PetscFunctionReturn(0);
2326 }
2327 
2328 /*@C
2329    MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArray().
2330 
2331    Not Collective
2332 
2333    Input Parameters:
2334 +  A - the matrix
2335 -  array - the GPU array in column major order
2336 
2337    Notes:
2338 
2339    Level: developer
2340 
2341 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()`
2342 @*/
2343 PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a)
2344 {
2345   PetscFunctionBegin;
2346   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2347   PetscUseMethod(A,"MatDenseCUDARestoreArray_C",(Mat,PetscScalar**),(A,a));
2348   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2349   A->offloadmask = PETSC_OFFLOAD_GPU;
2350   PetscFunctionReturn(0);
2351 }
2352 #endif
2353 
2354 /*@C
2355    MatCreateDense - Creates a matrix in dense format.
2356 
2357    Collective
2358 
2359    Input Parameters:
2360 +  comm - MPI communicator
2361 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2362 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2363 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
2364 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
2365 -  data - optional location of matrix data.  Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
2366    to control all matrix memory allocation.
2367 
2368    Output Parameter:
2369 .  A - the matrix
2370 
2371    Notes:
2372    The dense format is fully compatible with standard Fortran 77
2373    storage by columns.
2374    Note that, although local portions of the matrix are stored in column-major
2375    order, the matrix is partitioned across MPI ranks by row.
2376 
2377    The data input variable is intended primarily for Fortran programmers
2378    who wish to allocate their own matrix memory space.  Most users should
2379    set data=NULL (PETSC_NULL_SCALAR for Fortran users).
2380 
2381    The user MUST specify either the local or global matrix dimensions
2382    (possibly both).
2383 
2384    Level: intermediate
2385 
2386 .seealso: `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
2387 @*/
2388 PetscErrorCode  MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
2389 {
2390   PetscFunctionBegin;
2391   PetscCall(MatCreate(comm,A));
2392   PetscCall(MatSetSizes(*A,m,n,M,N));
2393   PetscCall(MatSetType(*A,MATDENSE));
2394   PetscCall(MatSeqDenseSetPreallocation(*A,data));
2395   PetscCall(MatMPIDenseSetPreallocation(*A,data));
2396   PetscFunctionReturn(0);
2397 }
2398 
2399 #if defined(PETSC_HAVE_CUDA)
2400 /*@C
2401    MatCreateDenseCUDA - Creates a matrix in dense format using CUDA.
2402 
2403    Collective
2404 
2405    Input Parameters:
2406 +  comm - MPI communicator
2407 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2408 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2409 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
2410 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
2411 -  data - optional location of GPU matrix data.  Set data=NULL for PETSc
2412    to control matrix memory allocation.
2413 
2414    Output Parameter:
2415 .  A - the matrix
2416 
2417    Notes:
2418 
2419    Level: intermediate
2420 
2421 .seealso: `MatCreate()`, `MatCreateDense()`
2422 @*/
2423 PetscErrorCode  MatCreateDenseCUDA(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
2424 {
2425   PetscFunctionBegin;
2426   PetscCall(MatCreate(comm,A));
2427   PetscValidLogicalCollectiveBool(*A,!!data,6);
2428   PetscCall(MatSetSizes(*A,m,n,M,N));
2429   PetscCall(MatSetType(*A,MATDENSECUDA));
2430   PetscCall(MatSeqDenseCUDASetPreallocation(*A,data));
2431   PetscCall(MatMPIDenseCUDASetPreallocation(*A,data));
2432   PetscFunctionReturn(0);
2433 }
2434 #endif
2435 
2436 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
2437 {
2438   Mat            mat;
2439   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
2440 
2441   PetscFunctionBegin;
2442   *newmat = NULL;
2443   PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&mat));
2444   PetscCall(MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N));
2445   PetscCall(MatSetType(mat,((PetscObject)A)->type_name));
2446   a       = (Mat_MPIDense*)mat->data;
2447 
2448   mat->factortype   = A->factortype;
2449   mat->assembled    = PETSC_TRUE;
2450   mat->preallocated = PETSC_TRUE;
2451 
2452   mat->insertmode = NOT_SET_VALUES;
2453   a->donotstash   = oldmat->donotstash;
2454 
2455   PetscCall(PetscLayoutReference(A->rmap,&mat->rmap));
2456   PetscCall(PetscLayoutReference(A->cmap,&mat->cmap));
2457 
2458   PetscCall(MatDuplicate(oldmat->A,cpvalues,&a->A));
2459   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A));
2460 
2461   *newmat = mat;
2462   PetscFunctionReturn(0);
2463 }
2464 
2465 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
2466 {
2467   PetscBool      isbinary;
2468 #if defined(PETSC_HAVE_HDF5)
2469   PetscBool      ishdf5;
2470 #endif
2471 
2472   PetscFunctionBegin;
2473   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
2474   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
2475   /* force binary viewer to load .info file if it has not yet done so */
2476   PetscCall(PetscViewerSetUp(viewer));
2477   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
2478 #if defined(PETSC_HAVE_HDF5)
2479   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5));
2480 #endif
2481   if (isbinary) {
2482     PetscCall(MatLoad_Dense_Binary(newMat,viewer));
2483 #if defined(PETSC_HAVE_HDF5)
2484   } else if (ishdf5) {
2485     PetscCall(MatLoad_Dense_HDF5(newMat,viewer));
2486 #endif
2487   } else SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
2488   PetscFunctionReturn(0);
2489 }
2490 
2491 static PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag)
2492 {
2493   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
2494   Mat            a,b;
2495 
2496   PetscFunctionBegin;
2497   a    = matA->A;
2498   b    = matB->A;
2499   PetscCall(MatEqual(a,b,flag));
2500   PetscCall(MPIU_Allreduce(MPI_IN_PLACE,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A)));
2501   PetscFunctionReturn(0);
2502 }
2503 
2504 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data)
2505 {
2506   Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data;
2507 
2508   PetscFunctionBegin;
2509   PetscCall(PetscFree2(atb->sendbuf,atb->recvcounts));
2510   PetscCall(MatDestroy(&atb->atb));
2511   PetscCall(PetscFree(atb));
2512   PetscFunctionReturn(0);
2513 }
2514 
2515 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data)
2516 {
2517   Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data;
2518 
2519   PetscFunctionBegin;
2520   PetscCall(PetscFree2(abt->buf[0],abt->buf[1]));
2521   PetscCall(PetscFree2(abt->recvcounts,abt->recvdispls));
2522   PetscCall(PetscFree(abt));
2523   PetscFunctionReturn(0);
2524 }
2525 
2526 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2527 {
2528   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2529   Mat_TransMatMultDense *atb;
2530   MPI_Comm              comm;
2531   PetscMPIInt           size,*recvcounts;
2532   PetscScalar           *carray,*sendbuf;
2533   const PetscScalar     *atbarray;
2534   PetscInt              i,cN=C->cmap->N,proc,k,j,lda;
2535   const PetscInt        *ranges;
2536 
2537   PetscFunctionBegin;
2538   MatCheckProduct(C,3);
2539   PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2540   atb = (Mat_TransMatMultDense *)C->product->data;
2541   recvcounts = atb->recvcounts;
2542   sendbuf = atb->sendbuf;
2543 
2544   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
2545   PetscCallMPI(MPI_Comm_size(comm,&size));
2546 
2547   /* compute atbarray = aseq^T * bseq */
2548   PetscCall(MatTransposeMatMult(a->A,b->A,atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX,PETSC_DEFAULT,&atb->atb));
2549 
2550   PetscCall(MatGetOwnershipRanges(C,&ranges));
2551 
2552   /* arrange atbarray into sendbuf */
2553   PetscCall(MatDenseGetArrayRead(atb->atb,&atbarray));
2554   PetscCall(MatDenseGetLDA(atb->atb,&lda));
2555   for (proc=0, k=0; proc<size; proc++) {
2556     for (j=0; j<cN; j++) {
2557       for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*lda];
2558     }
2559   }
2560   PetscCall(MatDenseRestoreArrayRead(atb->atb,&atbarray));
2561 
2562   /* sum all atbarray to local values of C */
2563   PetscCall(MatDenseGetArrayWrite(c->A,&carray));
2564   PetscCallMPI(MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm));
2565   PetscCall(MatDenseRestoreArrayWrite(c->A,&carray));
2566   PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY));
2567   PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY));
2568   PetscFunctionReturn(0);
2569 }
2570 
2571 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)
2572 {
2573   MPI_Comm              comm;
2574   PetscMPIInt           size;
2575   PetscInt              cm=A->cmap->n,cM,cN=B->cmap->N;
2576   Mat_TransMatMultDense *atb;
2577   PetscBool             cisdense;
2578   PetscInt              i;
2579   const PetscInt        *ranges;
2580 
2581   PetscFunctionBegin;
2582   MatCheckProduct(C,3);
2583   PetscCheck(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2584   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
2585   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
2586     SETERRQ(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2587   }
2588 
2589   /* create matrix product C */
2590   PetscCall(MatSetSizes(C,cm,B->cmap->n,A->cmap->N,B->cmap->N));
2591   PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,""));
2592   if (!cisdense) {
2593     PetscCall(MatSetType(C,((PetscObject)A)->type_name));
2594   }
2595   PetscCall(MatSetUp(C));
2596 
2597   /* create data structure for reuse C */
2598   PetscCallMPI(MPI_Comm_size(comm,&size));
2599   PetscCall(PetscNew(&atb));
2600   cM   = C->rmap->N;
2601   PetscCall(PetscMalloc2(cM*cN,&atb->sendbuf,size,&atb->recvcounts));
2602   PetscCall(MatGetOwnershipRanges(C,&ranges));
2603   for (i=0; i<size; i++) atb->recvcounts[i] = (ranges[i+1] - ranges[i])*cN;
2604 
2605   C->product->data    = atb;
2606   C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
2607   PetscFunctionReturn(0);
2608 }
2609 
2610 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2611 {
2612   MPI_Comm              comm;
2613   PetscMPIInt           i, size;
2614   PetscInt              maxRows, bufsiz;
2615   PetscMPIInt           tag;
2616   PetscInt              alg;
2617   Mat_MatTransMultDense *abt;
2618   Mat_Product           *product = C->product;
2619   PetscBool             flg;
2620 
2621   PetscFunctionBegin;
2622   MatCheckProduct(C,4);
2623   PetscCheck(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2624   /* check local size of A and B */
2625   PetscCheck(A->cmap->n == B->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")",A->cmap->n,B->cmap->n);
2626 
2627   PetscCall(PetscStrcmp(product->alg,"allgatherv",&flg));
2628   alg  = flg ? 0 : 1;
2629 
2630   /* setup matrix product C */
2631   PetscCall(MatSetSizes(C,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N));
2632   PetscCall(MatSetType(C,MATMPIDENSE));
2633   PetscCall(MatSetUp(C));
2634   PetscCall(PetscObjectGetNewTag((PetscObject)C,&tag));
2635 
2636   /* create data structure for reuse C */
2637   PetscCall(PetscObjectGetComm((PetscObject)C,&comm));
2638   PetscCallMPI(MPI_Comm_size(comm,&size));
2639   PetscCall(PetscNew(&abt));
2640   abt->tag = tag;
2641   abt->alg = alg;
2642   switch (alg) {
2643   case 1: /* alg: "cyclic" */
2644     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i]));
2645     bufsiz = A->cmap->N * maxRows;
2646     PetscCall(PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1])));
2647     break;
2648   default: /* alg: "allgatherv" */
2649     PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1])));
2650     PetscCall(PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls)));
2651     for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N;
2652     for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i];
2653     break;
2654   }
2655 
2656   C->product->data    = abt;
2657   C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
2658   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense;
2659   PetscFunctionReturn(0);
2660 }
2661 
2662 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
2663 {
2664   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2665   Mat_MatTransMultDense *abt;
2666   MPI_Comm              comm;
2667   PetscMPIInt           rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom;
2668   PetscScalar           *sendbuf, *recvbuf=NULL, *cv;
2669   PetscInt              i,cK=A->cmap->N,k,j,bn;
2670   PetscScalar           _DOne=1.0,_DZero=0.0;
2671   const PetscScalar     *av,*bv;
2672   PetscBLASInt          cm, cn, ck, alda, blda = 0, clda;
2673   MPI_Request           reqs[2];
2674   const PetscInt        *ranges;
2675 
2676   PetscFunctionBegin;
2677   MatCheckProduct(C,3);
2678   PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2679   abt  = (Mat_MatTransMultDense*)C->product->data;
2680   PetscCall(PetscObjectGetComm((PetscObject)C,&comm));
2681   PetscCallMPI(MPI_Comm_rank(comm,&rank));
2682   PetscCallMPI(MPI_Comm_size(comm,&size));
2683   PetscCall(MatDenseGetArrayRead(a->A,&av));
2684   PetscCall(MatDenseGetArrayRead(b->A,&bv));
2685   PetscCall(MatDenseGetArrayWrite(c->A,&cv));
2686   PetscCall(MatDenseGetLDA(a->A,&i));
2687   PetscCall(PetscBLASIntCast(i,&alda));
2688   PetscCall(MatDenseGetLDA(b->A,&i));
2689   PetscCall(PetscBLASIntCast(i,&blda));
2690   PetscCall(MatDenseGetLDA(c->A,&i));
2691   PetscCall(PetscBLASIntCast(i,&clda));
2692   PetscCall(MatGetOwnershipRanges(B,&ranges));
2693   bn   = B->rmap->n;
2694   if (blda == bn) {
2695     sendbuf = (PetscScalar*)bv;
2696   } else {
2697     sendbuf = abt->buf[0];
2698     for (k = 0, i = 0; i < cK; i++) {
2699       for (j = 0; j < bn; j++, k++) {
2700         sendbuf[k] = bv[i * blda + j];
2701       }
2702     }
2703   }
2704   if (size > 1) {
2705     sendto = (rank + size - 1) % size;
2706     recvfrom = (rank + size + 1) % size;
2707   } else {
2708     sendto = recvfrom = 0;
2709   }
2710   PetscCall(PetscBLASIntCast(cK,&ck));
2711   PetscCall(PetscBLASIntCast(c->A->rmap->n,&cm));
2712   recvisfrom = rank;
2713   for (i = 0; i < size; i++) {
2714     /* we have finished receiving in sending, bufs can be read/modified */
2715     PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2716     PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
2717 
2718     if (nextrecvisfrom != rank) {
2719       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2720       sendsiz = cK * bn;
2721       recvsiz = cK * nextbn;
2722       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
2723       PetscCallMPI(MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]));
2724       PetscCallMPI(MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]));
2725     }
2726 
2727     /* local aseq * sendbuf^T */
2728     PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn));
2729     if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,av,&alda,sendbuf,&cn,&_DZero,cv + clda * ranges[recvisfrom],&clda));
2730 
2731     if (nextrecvisfrom != rank) {
2732       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
2733       PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE));
2734     }
2735     bn = nextbn;
2736     recvisfrom = nextrecvisfrom;
2737     sendbuf = recvbuf;
2738   }
2739   PetscCall(MatDenseRestoreArrayRead(a->A,&av));
2740   PetscCall(MatDenseRestoreArrayRead(b->A,&bv));
2741   PetscCall(MatDenseRestoreArrayWrite(c->A,&cv));
2742   PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY));
2743   PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY));
2744   PetscFunctionReturn(0);
2745 }
2746 
2747 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
2748 {
2749   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2750   Mat_MatTransMultDense *abt;
2751   MPI_Comm              comm;
2752   PetscMPIInt           size;
2753   PetscScalar           *cv, *sendbuf, *recvbuf;
2754   const PetscScalar     *av,*bv;
2755   PetscInt              blda,i,cK=A->cmap->N,k,j,bn;
2756   PetscScalar           _DOne=1.0,_DZero=0.0;
2757   PetscBLASInt          cm, cn, ck, alda, clda;
2758 
2759   PetscFunctionBegin;
2760   MatCheckProduct(C,3);
2761   PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2762   abt  = (Mat_MatTransMultDense*)C->product->data;
2763   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
2764   PetscCallMPI(MPI_Comm_size(comm,&size));
2765   PetscCall(MatDenseGetArrayRead(a->A,&av));
2766   PetscCall(MatDenseGetArrayRead(b->A,&bv));
2767   PetscCall(MatDenseGetArrayWrite(c->A,&cv));
2768   PetscCall(MatDenseGetLDA(a->A,&i));
2769   PetscCall(PetscBLASIntCast(i,&alda));
2770   PetscCall(MatDenseGetLDA(b->A,&blda));
2771   PetscCall(MatDenseGetLDA(c->A,&i));
2772   PetscCall(PetscBLASIntCast(i,&clda));
2773   /* copy transpose of B into buf[0] */
2774   bn      = B->rmap->n;
2775   sendbuf = abt->buf[0];
2776   recvbuf = abt->buf[1];
2777   for (k = 0, j = 0; j < bn; j++) {
2778     for (i = 0; i < cK; i++, k++) {
2779       sendbuf[k] = bv[i * blda + j];
2780     }
2781   }
2782   PetscCall(MatDenseRestoreArrayRead(b->A,&bv));
2783   PetscCallMPI(MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm));
2784   PetscCall(PetscBLASIntCast(cK,&ck));
2785   PetscCall(PetscBLASIntCast(c->A->rmap->n,&cm));
2786   PetscCall(PetscBLASIntCast(c->A->cmap->n,&cn));
2787   if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,av,&alda,recvbuf,&ck,&_DZero,cv,&clda));
2788   PetscCall(MatDenseRestoreArrayRead(a->A,&av));
2789   PetscCall(MatDenseRestoreArrayRead(b->A,&bv));
2790   PetscCall(MatDenseRestoreArrayWrite(c->A,&cv));
2791   PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY));
2792   PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY));
2793   PetscFunctionReturn(0);
2794 }
2795 
2796 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2797 {
2798   Mat_MatTransMultDense *abt;
2799 
2800   PetscFunctionBegin;
2801   MatCheckProduct(C,3);
2802   PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2803   abt = (Mat_MatTransMultDense*)C->product->data;
2804   switch (abt->alg) {
2805   case 1:
2806     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C));
2807     break;
2808   default:
2809     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C));
2810     break;
2811   }
2812   PetscFunctionReturn(0);
2813 }
2814 
2815 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data)
2816 {
2817   Mat_MatMultDense *ab = (Mat_MatMultDense*)data;
2818 
2819   PetscFunctionBegin;
2820   PetscCall(MatDestroy(&ab->Ce));
2821   PetscCall(MatDestroy(&ab->Ae));
2822   PetscCall(MatDestroy(&ab->Be));
2823   PetscCall(PetscFree(ab));
2824   PetscFunctionReturn(0);
2825 }
2826 
2827 #if defined(PETSC_HAVE_ELEMENTAL)
2828 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2829 {
2830   Mat_MatMultDense *ab;
2831 
2832   PetscFunctionBegin;
2833   MatCheckProduct(C,3);
2834   PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Missing product data");
2835   ab   = (Mat_MatMultDense*)C->product->data;
2836   PetscCall(MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae));
2837   PetscCall(MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be));
2838   PetscCall(MatMatMultNumeric_Elemental(ab->Ae,ab->Be,ab->Ce));
2839   PetscCall(MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C));
2840   PetscFunctionReturn(0);
2841 }
2842 
2843 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)
2844 {
2845   Mat              Ae,Be,Ce;
2846   Mat_MatMultDense *ab;
2847 
2848   PetscFunctionBegin;
2849   MatCheckProduct(C,4);
2850   PetscCheck(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2851   /* check local size of A and B */
2852   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
2853     SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2854   }
2855 
2856   /* create elemental matrices Ae and Be */
2857   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Ae));
2858   PetscCall(MatSetSizes(Ae,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N));
2859   PetscCall(MatSetType(Ae,MATELEMENTAL));
2860   PetscCall(MatSetUp(Ae));
2861   PetscCall(MatSetOption(Ae,MAT_ROW_ORIENTED,PETSC_FALSE));
2862 
2863   PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &Be));
2864   PetscCall(MatSetSizes(Be,PETSC_DECIDE,PETSC_DECIDE,B->rmap->N,B->cmap->N));
2865   PetscCall(MatSetType(Be,MATELEMENTAL));
2866   PetscCall(MatSetUp(Be));
2867   PetscCall(MatSetOption(Be,MAT_ROW_ORIENTED,PETSC_FALSE));
2868 
2869   /* compute symbolic Ce = Ae*Be */
2870   PetscCall(MatCreate(PetscObjectComm((PetscObject)C),&Ce));
2871   PetscCall(MatMatMultSymbolic_Elemental(Ae,Be,fill,Ce));
2872 
2873   /* setup C */
2874   PetscCall(MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE));
2875   PetscCall(MatSetType(C,MATDENSE));
2876   PetscCall(MatSetUp(C));
2877 
2878   /* create data structure for reuse Cdense */
2879   PetscCall(PetscNew(&ab));
2880   ab->Ae = Ae;
2881   ab->Be = Be;
2882   ab->Ce = Ce;
2883 
2884   C->product->data    = ab;
2885   C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense;
2886   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
2887   PetscFunctionReturn(0);
2888 }
2889 #endif
2890 /* ----------------------------------------------- */
2891 #if defined(PETSC_HAVE_ELEMENTAL)
2892 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C)
2893 {
2894   PetscFunctionBegin;
2895   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense;
2896   C->ops->productsymbolic = MatProductSymbolic_AB;
2897   PetscFunctionReturn(0);
2898 }
2899 #endif
2900 
2901 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C)
2902 {
2903   Mat_Product *product = C->product;
2904   Mat         A = product->A,B=product->B;
2905 
2906   PetscFunctionBegin;
2907   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend)
2908     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2909   C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense;
2910   C->ops->productsymbolic = MatProductSymbolic_AtB;
2911   PetscFunctionReturn(0);
2912 }
2913 
2914 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C)
2915 {
2916   Mat_Product    *product = C->product;
2917   const char     *algTypes[2] = {"allgatherv","cyclic"};
2918   PetscInt       alg,nalg = 2;
2919   PetscBool      flg = PETSC_FALSE;
2920 
2921   PetscFunctionBegin;
2922   /* Set default algorithm */
2923   alg = 0; /* default is allgatherv */
2924   PetscCall(PetscStrcmp(product->alg,"default",&flg));
2925   if (flg) {
2926     PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]));
2927   }
2928 
2929   /* Get runtime option */
2930   if (product->api_user) {
2931     PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatTransposeMult","Mat");
2932     PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[alg],&alg,&flg));
2933     PetscOptionsEnd();
2934   } else {
2935     PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABt","Mat");
2936     PetscCall(PetscOptionsEList("-mat_product_algorithm","Algorithmic approach","MatProduct_ABt",algTypes,nalg,algTypes[alg],&alg,&flg));
2937     PetscOptionsEnd();
2938   }
2939   if (flg) {
2940     PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]));
2941   }
2942 
2943   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense;
2944   C->ops->productsymbolic          = MatProductSymbolic_ABt;
2945   PetscFunctionReturn(0);
2946 }
2947 
2948 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C)
2949 {
2950   Mat_Product    *product = C->product;
2951 
2952   PetscFunctionBegin;
2953   switch (product->type) {
2954 #if defined(PETSC_HAVE_ELEMENTAL)
2955   case MATPRODUCT_AB:
2956     PetscCall(MatProductSetFromOptions_MPIDense_AB(C));
2957     break;
2958 #endif
2959   case MATPRODUCT_AtB:
2960     PetscCall(MatProductSetFromOptions_MPIDense_AtB(C));
2961     break;
2962   case MATPRODUCT_ABt:
2963     PetscCall(MatProductSetFromOptions_MPIDense_ABt(C));
2964     break;
2965   default:
2966     break;
2967   }
2968   PetscFunctionReturn(0);
2969 }
2970