xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 2b3cbbda7ef6bfb59aeed26278d0c91b4282b9fd)
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4 */
5 
6 #include <../src/mat/impls/dense/mpi/mpidense.h>    /*I   "petscmat.h"  I*/
7 #include <../src/mat/impls/aij/mpi/mpiaij.h>
8 #include <petscblaslapack.h>
9 
10 /*@
11 
12       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
13               matrix that represents the operator. For sequential matrices it returns itself.
14 
15     Input Parameter:
16 .      A - the Seq or MPI dense matrix
17 
18     Output Parameter:
19 .      B - the inner matrix
20 
21     Level: intermediate
22 
23 @*/
24 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
25 {
26   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
27   PetscBool      flg;
28 
29   PetscFunctionBegin;
30   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
31   PetscValidPointer(B,2);
32   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A,MATMPIDENSE,&flg));
33   if (flg) *B = mat->A;
34   else {
35     PetscCall(PetscObjectBaseTypeCompare((PetscObject)A,MATSEQDENSE,&flg));
36     PetscCheck(flg,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Not for matrix type %s",((PetscObject)A)->type_name);
37     *B = A;
38   }
39   PetscFunctionReturn(0);
40 }
41 
42 PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s)
43 {
44   Mat_MPIDense   *Amat = (Mat_MPIDense*)A->data;
45   Mat_MPIDense   *Bmat = (Mat_MPIDense*)B->data;
46 
47   PetscFunctionBegin;
48   PetscCall(MatCopy(Amat->A,Bmat->A,s));
49   PetscFunctionReturn(0);
50 }
51 
52 PetscErrorCode MatShift_MPIDense(Mat A,PetscScalar alpha)
53 {
54   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
55   PetscInt       j,lda,rstart = A->rmap->rstart,rend = A->rmap->rend,rend2;
56   PetscScalar    *v;
57 
58   PetscFunctionBegin;
59   PetscCall(MatDenseGetArray(mat->A,&v));
60   PetscCall(MatDenseGetLDA(mat->A,&lda));
61   rend2 = PetscMin(rend,A->cmap->N);
62   if (rend2>rstart) {
63     for (j=rstart; j<rend2; j++) v[j-rstart+j*lda] += alpha;
64     PetscCall(PetscLogFlops(rend2-rstart));
65   }
66   PetscCall(MatDenseRestoreArray(mat->A,&v));
67   PetscFunctionReturn(0);
68 }
69 
70 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
71 {
72   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
73   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
74 
75   PetscFunctionBegin;
76   PetscCheck(row >= rstart && row < rend,PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
77   lrow = row - rstart;
78   PetscCall(MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v));
79   PetscFunctionReturn(0);
80 }
81 
82 PetscErrorCode MatRestoreRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
83 {
84   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
85   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
86 
87   PetscFunctionBegin;
88   PetscCheck(row >= rstart && row < rend,PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
89   lrow = row - rstart;
90   PetscCall(MatRestoreRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v));
91   PetscFunctionReturn(0);
92 }
93 
94 PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
95 {
96   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
97   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
98   PetscScalar    *array;
99   MPI_Comm       comm;
100   PetscBool      flg;
101   Mat            B;
102 
103   PetscFunctionBegin;
104   PetscCall(MatHasCongruentLayouts(A,&flg));
105   PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
106   PetscCall(PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B));
107   if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */
108 
109     PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A,MATSEQDENSECUDA,&flg));
110     PetscCheck(!flg,PETSC_COMM_SELF,PETSC_ERR_SUP,"Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature",MATSEQDENSECUDA);
111     PetscCall(PetscObjectGetComm((PetscObject)(mdn->A),&comm));
112     PetscCall(MatCreate(comm,&B));
113     PetscCall(MatSetSizes(B,m,m,m,m));
114     PetscCall(MatSetType(B,((PetscObject)mdn->A)->type_name));
115     PetscCall(MatDenseGetArrayRead(mdn->A,(const PetscScalar**)&array));
116     PetscCall(MatSeqDenseSetPreallocation(B,array+m*rstart));
117     PetscCall(MatDenseRestoreArrayRead(mdn->A,(const PetscScalar**)&array));
118     PetscCall(PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B));
119     *a   = B;
120     PetscCall(MatDestroy(&B));
121   } else *a = B;
122   PetscFunctionReturn(0);
123 }
124 
125 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
126 {
127   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
128   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
129   PetscBool      roworiented = A->roworiented;
130 
131   PetscFunctionBegin;
132   for (i=0; i<m; i++) {
133     if (idxm[i] < 0) continue;
134     PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
135     if (idxm[i] >= rstart && idxm[i] < rend) {
136       row = idxm[i] - rstart;
137       if (roworiented) {
138         PetscCall(MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv));
139       } else {
140         for (j=0; j<n; j++) {
141           if (idxn[j] < 0) continue;
142           PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
143           PetscCall(MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv));
144         }
145       }
146     } else if (!A->donotstash) {
147       mat->assembled = PETSC_FALSE;
148       if (roworiented) {
149         PetscCall(MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE));
150       } else {
151         PetscCall(MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE));
152       }
153     }
154   }
155   PetscFunctionReturn(0);
156 }
157 
158 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
159 {
160   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
161   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
162 
163   PetscFunctionBegin;
164   for (i=0; i<m; i++) {
165     if (idxm[i] < 0) continue; /* negative row */
166     PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
167     if (idxm[i] >= rstart && idxm[i] < rend) {
168       row = idxm[i] - rstart;
169       for (j=0; j<n; j++) {
170         if (idxn[j] < 0) continue; /* negative column */
171         PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
172         PetscCall(MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j));
173       }
174     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
175   }
176   PetscFunctionReturn(0);
177 }
178 
179 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda)
180 {
181   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
182 
183   PetscFunctionBegin;
184   PetscCall(MatDenseGetLDA(a->A,lda));
185   PetscFunctionReturn(0);
186 }
187 
188 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A,PetscInt lda)
189 {
190   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
191   PetscBool      iscuda;
192 
193   PetscFunctionBegin;
194   if (!a->A) {
195     PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
196     PetscCall(PetscLayoutSetUp(A->rmap));
197     PetscCall(PetscLayoutSetUp(A->cmap));
198     PetscCall(MatCreate(PETSC_COMM_SELF,&a->A));
199     PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->A));
200     PetscCall(MatSetSizes(a->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N));
201     PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda));
202     PetscCall(MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE));
203   }
204   PetscCall(MatDenseSetLDA(a->A,lda));
205   PetscFunctionReturn(0);
206 }
207 
208 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar **array)
209 {
210   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
211 
212   PetscFunctionBegin;
213   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
214   PetscCall(MatDenseGetArray(a->A,array));
215   PetscFunctionReturn(0);
216 }
217 
218 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar **array)
219 {
220   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
221 
222   PetscFunctionBegin;
223   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
224   PetscCall(MatDenseGetArrayRead(a->A,array));
225   PetscFunctionReturn(0);
226 }
227 
228 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A,PetscScalar **array)
229 {
230   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
231 
232   PetscFunctionBegin;
233   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
234   PetscCall(MatDenseGetArrayWrite(a->A,array));
235   PetscFunctionReturn(0);
236 }
237 
238 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar *array)
239 {
240   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
241 
242   PetscFunctionBegin;
243   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
244   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
245   PetscCall(MatDensePlaceArray(a->A,array));
246   PetscFunctionReturn(0);
247 }
248 
249 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
250 {
251   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
252 
253   PetscFunctionBegin;
254   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
255   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
256   PetscCall(MatDenseResetArray(a->A));
257   PetscFunctionReturn(0);
258 }
259 
260 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A,const PetscScalar *array)
261 {
262   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
263 
264   PetscFunctionBegin;
265   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
266   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
267   PetscCall(MatDenseReplaceArray(a->A,array));
268   PetscFunctionReturn(0);
269 }
270 
271 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
272 {
273   Mat_MPIDense      *mat  = (Mat_MPIDense*)A->data,*newmatd;
274   PetscInt          lda,i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
275   const PetscInt    *irow,*icol;
276   const PetscScalar *v;
277   PetscScalar       *bv;
278   Mat               newmat;
279   IS                iscol_local;
280   MPI_Comm          comm_is,comm_mat;
281 
282   PetscFunctionBegin;
283   PetscCall(PetscObjectGetComm((PetscObject)A,&comm_mat));
284   PetscCall(PetscObjectGetComm((PetscObject)iscol,&comm_is));
285   PetscCheck(comm_mat == comm_is,PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator");
286 
287   PetscCall(ISAllGather(iscol,&iscol_local));
288   PetscCall(ISGetIndices(isrow,&irow));
289   PetscCall(ISGetIndices(iscol_local,&icol));
290   PetscCall(ISGetLocalSize(isrow,&nrows));
291   PetscCall(ISGetLocalSize(iscol,&ncols));
292   PetscCall(ISGetSize(iscol,&Ncols)); /* global number of columns, size of iscol_local */
293 
294   /* No parallel redistribution currently supported! Should really check each index set
295      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
296      original matrix! */
297 
298   PetscCall(MatGetLocalSize(A,&nlrows,&nlcols));
299   PetscCall(MatGetOwnershipRange(A,&rstart,&rend));
300 
301   /* Check submatrix call */
302   if (scall == MAT_REUSE_MATRIX) {
303     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
304     /* Really need to test rows and column sizes! */
305     newmat = *B;
306   } else {
307     /* Create and fill new matrix */
308     PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&newmat));
309     PetscCall(MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols));
310     PetscCall(MatSetType(newmat,((PetscObject)A)->type_name));
311     PetscCall(MatMPIDenseSetPreallocation(newmat,NULL));
312   }
313 
314   /* Now extract the data pointers and do the copy, column at a time */
315   newmatd = (Mat_MPIDense*)newmat->data;
316   PetscCall(MatDenseGetArray(newmatd->A,&bv));
317   PetscCall(MatDenseGetArrayRead(mat->A,&v));
318   PetscCall(MatDenseGetLDA(mat->A,&lda));
319   for (i=0; i<Ncols; i++) {
320     const PetscScalar *av = v + lda*icol[i];
321     for (j=0; j<nrows; j++) {
322       *bv++ = av[irow[j] - rstart];
323     }
324   }
325   PetscCall(MatDenseRestoreArrayRead(mat->A,&v));
326   PetscCall(MatDenseRestoreArray(newmatd->A,&bv));
327 
328   /* Assemble the matrices so that the correct flags are set */
329   PetscCall(MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY));
330   PetscCall(MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY));
331 
332   /* Free work space */
333   PetscCall(ISRestoreIndices(isrow,&irow));
334   PetscCall(ISRestoreIndices(iscol_local,&icol));
335   PetscCall(ISDestroy(&iscol_local));
336   *B   = newmat;
337   PetscFunctionReturn(0);
338 }
339 
340 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar **array)
341 {
342   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
343 
344   PetscFunctionBegin;
345   PetscCall(MatDenseRestoreArray(a->A,array));
346   PetscFunctionReturn(0);
347 }
348 
349 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar **array)
350 {
351   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
352 
353   PetscFunctionBegin;
354   PetscCall(MatDenseRestoreArrayRead(a->A,array));
355   PetscFunctionReturn(0);
356 }
357 
358 PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A,PetscScalar **array)
359 {
360   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
361 
362   PetscFunctionBegin;
363   PetscCall(MatDenseRestoreArrayWrite(a->A,array));
364   PetscFunctionReturn(0);
365 }
366 
367 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
368 {
369   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
370   PetscInt       nstash,reallocs;
371 
372   PetscFunctionBegin;
373   if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
374 
375   PetscCall(MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range));
376   PetscCall(MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs));
377   PetscCall(PetscInfo(mdn->A,"Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs));
378   PetscFunctionReturn(0);
379 }
380 
381 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
382 {
383   Mat_MPIDense   *mdn=(Mat_MPIDense*)mat->data;
384   PetscInt       i,*row,*col,flg,j,rstart,ncols;
385   PetscMPIInt    n;
386   PetscScalar    *val;
387 
388   PetscFunctionBegin;
389   if (!mdn->donotstash && !mat->nooffprocentries) {
390     /*  wait on receives */
391     while (1) {
392       PetscCall(MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg));
393       if (!flg) break;
394 
395       for (i=0; i<n;) {
396         /* Now identify the consecutive vals belonging to the same row */
397         for (j=i,rstart=row[j]; j<n; j++) {
398           if (row[j] != rstart) break;
399         }
400         if (j < n) ncols = j-i;
401         else       ncols = n-i;
402         /* Now assemble all these values with a single function call */
403         PetscCall(MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode));
404         i    = j;
405       }
406     }
407     PetscCall(MatStashScatterEnd_Private(&mat->stash));
408   }
409 
410   PetscCall(MatAssemblyBegin(mdn->A,mode));
411   PetscCall(MatAssemblyEnd(mdn->A,mode));
412   PetscFunctionReturn(0);
413 }
414 
415 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
416 {
417   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
418 
419   PetscFunctionBegin;
420   PetscCall(MatZeroEntries(l->A));
421   PetscFunctionReturn(0);
422 }
423 
424 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt n,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
425 {
426   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
427   PetscInt          i,len,*lrows;
428 
429   PetscFunctionBegin;
430   /* get locally owned rows */
431   PetscCall(PetscLayoutMapLocal(A->rmap,n,rows,&len,&lrows,NULL));
432   /* fix right hand side if needed */
433   if (x && b) {
434     const PetscScalar *xx;
435     PetscScalar       *bb;
436 
437     PetscCall(VecGetArrayRead(x, &xx));
438     PetscCall(VecGetArrayWrite(b, &bb));
439     for (i=0;i<len;++i) bb[lrows[i]] = diag*xx[lrows[i]];
440     PetscCall(VecRestoreArrayRead(x, &xx));
441     PetscCall(VecRestoreArrayWrite(b, &bb));
442   }
443   PetscCall(MatZeroRows(l->A,len,lrows,0.0,NULL,NULL));
444   if (diag != 0.0) {
445     Vec d;
446 
447     PetscCall(MatCreateVecs(A,NULL,&d));
448     PetscCall(VecSet(d,diag));
449     PetscCall(MatDiagonalSet(A,d,INSERT_VALUES));
450     PetscCall(VecDestroy(&d));
451   }
452   PetscCall(PetscFree(lrows));
453   PetscFunctionReturn(0);
454 }
455 
456 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec);
457 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec);
458 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec);
459 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec);
460 
461 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
462 {
463   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
464   const PetscScalar *ax;
465   PetscScalar       *ay;
466   PetscMemType      axmtype,aymtype;
467 
468   PetscFunctionBegin;
469   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
470   PetscCall(VecGetArrayReadAndMemType(xx,&ax,&axmtype));
471   PetscCall(VecGetArrayAndMemType(mdn->lvec,&ay,&aymtype));
472   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPI_REPLACE));
473   PetscCall(PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay,MPI_REPLACE));
474   PetscCall(VecRestoreArrayAndMemType(mdn->lvec,&ay));
475   PetscCall(VecRestoreArrayReadAndMemType(xx,&ax));
476   PetscCall((*mdn->A->ops->mult)(mdn->A,mdn->lvec,yy));
477   PetscFunctionReturn(0);
478 }
479 
480 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
481 {
482   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
483   const PetscScalar *ax;
484   PetscScalar       *ay;
485   PetscMemType      axmtype,aymtype;
486 
487   PetscFunctionBegin;
488   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
489   PetscCall(VecGetArrayReadAndMemType(xx,&ax,&axmtype));
490   PetscCall(VecGetArrayAndMemType(mdn->lvec,&ay,&aymtype));
491   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPI_REPLACE));
492   PetscCall(PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay,MPI_REPLACE));
493   PetscCall(VecRestoreArrayAndMemType(mdn->lvec,&ay));
494   PetscCall(VecRestoreArrayReadAndMemType(xx,&ax));
495   PetscCall((*mdn->A->ops->multadd)(mdn->A,mdn->lvec,yy,zz));
496   PetscFunctionReturn(0);
497 }
498 
499 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
500 {
501   Mat_MPIDense      *a = (Mat_MPIDense*)A->data;
502   const PetscScalar *ax;
503   PetscScalar       *ay;
504   PetscMemType      axmtype,aymtype;
505 
506   PetscFunctionBegin;
507   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
508   PetscCall(VecSet(yy,0.0));
509   PetscCall((*a->A->ops->multtranspose)(a->A,xx,a->lvec));
510   PetscCall(VecGetArrayReadAndMemType(a->lvec,&ax,&axmtype));
511   PetscCall(VecGetArrayAndMemType(yy,&ay,&aymtype));
512   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPIU_SUM));
513   PetscCall(PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM));
514   PetscCall(VecRestoreArrayReadAndMemType(a->lvec,&ax));
515   PetscCall(VecRestoreArrayAndMemType(yy,&ay));
516   PetscFunctionReturn(0);
517 }
518 
519 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
520 {
521   Mat_MPIDense      *a = (Mat_MPIDense*)A->data;
522   const PetscScalar *ax;
523   PetscScalar       *ay;
524   PetscMemType      axmtype,aymtype;
525 
526   PetscFunctionBegin;
527   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
528   PetscCall(VecCopy(yy,zz));
529   PetscCall((*a->A->ops->multtranspose)(a->A,xx,a->lvec));
530   PetscCall(VecGetArrayReadAndMemType(a->lvec,&ax,&axmtype));
531   PetscCall(VecGetArrayAndMemType(zz,&ay,&aymtype));
532   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPIU_SUM));
533   PetscCall(PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM));
534   PetscCall(VecRestoreArrayReadAndMemType(a->lvec,&ax));
535   PetscCall(VecRestoreArrayAndMemType(zz,&ay));
536   PetscFunctionReturn(0);
537 }
538 
539 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
540 {
541   Mat_MPIDense      *a    = (Mat_MPIDense*)A->data;
542   PetscInt          lda,len,i,n,m = A->rmap->n,radd;
543   PetscScalar       *x,zero = 0.0;
544   const PetscScalar *av;
545 
546   PetscFunctionBegin;
547   PetscCall(VecSet(v,zero));
548   PetscCall(VecGetArray(v,&x));
549   PetscCall(VecGetSize(v,&n));
550   PetscCheck(n == A->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
551   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
552   radd = A->rmap->rstart*m;
553   PetscCall(MatDenseGetArrayRead(a->A,&av));
554   PetscCall(MatDenseGetLDA(a->A,&lda));
555   for (i=0; i<len; i++) {
556     x[i] = av[radd + i*lda + i];
557   }
558   PetscCall(MatDenseRestoreArrayRead(a->A,&av));
559   PetscCall(VecRestoreArray(v,&x));
560   PetscFunctionReturn(0);
561 }
562 
563 PetscErrorCode MatDestroy_MPIDense(Mat mat)
564 {
565   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
566 
567   PetscFunctionBegin;
568 #if defined(PETSC_USE_LOG)
569   PetscLogObjectState((PetscObject)mat,"Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT,mat->rmap->N,mat->cmap->N);
570 #endif
571   PetscCall(MatStashDestroy_Private(&mat->stash));
572   PetscCheck(!mdn->vecinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
573   PetscCheck(!mdn->matinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
574   PetscCall(MatDestroy(&mdn->A));
575   PetscCall(VecDestroy(&mdn->lvec));
576   PetscCall(PetscSFDestroy(&mdn->Mvctx));
577   PetscCall(VecDestroy(&mdn->cvec));
578   PetscCall(MatDestroy(&mdn->cmat));
579 
580   PetscCall(PetscFree(mat->data));
581   PetscCall(PetscObjectChangeTypeName((PetscObject)mat,NULL));
582 
583   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL));
584   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",NULL));
585   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL));
586   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL));
587   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL));
588   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL));
589   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",NULL));
590   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",NULL));
591   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL));
592   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL));
593   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",NULL));
594   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",NULL));
595   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpiaij_C",NULL));
596 #if defined(PETSC_HAVE_ELEMENTAL)
597   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL));
598 #endif
599 #if defined(PETSC_HAVE_SCALAPACK)
600   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",NULL));
601 #endif
602   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL));
603   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",NULL));
604   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",NULL));
605 #if defined (PETSC_HAVE_CUDA)
606   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",NULL));
607   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",NULL));
608 #endif
609   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL));
610   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL));
611   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",NULL));
612   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",NULL));
613   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",NULL));
614   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",NULL));
615   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",NULL));
616   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",NULL));
617   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",NULL));
618   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",NULL));
619 
620   PetscCall(PetscObjectCompose((PetscObject)mat,"DiagonalBlock",NULL));
621   PetscFunctionReturn(0);
622 }
623 
624 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat,PetscViewer);
625 
626 #include <petscdraw.h>
627 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
628 {
629   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
630   PetscMPIInt       rank;
631   PetscViewerType   vtype;
632   PetscBool         iascii,isdraw;
633   PetscViewer       sviewer;
634   PetscViewerFormat format;
635 
636   PetscFunctionBegin;
637   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank));
638   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
639   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw));
640   if (iascii) {
641     PetscCall(PetscViewerGetType(viewer,&vtype));
642     PetscCall(PetscViewerGetFormat(viewer,&format));
643     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
644       MatInfo info;
645       PetscCall(MatGetInfo(mat,MAT_LOCAL,&info));
646       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
647       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory));
648       PetscCall(PetscViewerFlush(viewer));
649       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
650       if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx,viewer));
651       PetscFunctionReturn(0);
652     } else if (format == PETSC_VIEWER_ASCII_INFO) {
653       PetscFunctionReturn(0);
654     }
655   } else if (isdraw) {
656     PetscDraw draw;
657     PetscBool isnull;
658 
659     PetscCall(PetscViewerDrawGetDraw(viewer,0,&draw));
660     PetscCall(PetscDrawIsNull(draw,&isnull));
661     if (isnull) PetscFunctionReturn(0);
662   }
663 
664   {
665     /* assemble the entire matrix onto first processor. */
666     Mat         A;
667     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
668     PetscInt    *cols;
669     PetscScalar *vals;
670 
671     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat),&A));
672     if (rank == 0) {
673       PetscCall(MatSetSizes(A,M,N,M,N));
674     } else {
675       PetscCall(MatSetSizes(A,0,0,M,N));
676     }
677     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
678     PetscCall(MatSetType(A,MATMPIDENSE));
679     PetscCall(MatMPIDenseSetPreallocation(A,NULL));
680     PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)A));
681 
682     /* Copy the matrix ... This isn't the most efficient means,
683        but it's quick for now */
684     A->insertmode = INSERT_VALUES;
685 
686     row = mat->rmap->rstart;
687     m   = mdn->A->rmap->n;
688     for (i=0; i<m; i++) {
689       PetscCall(MatGetRow_MPIDense(mat,row,&nz,&cols,&vals));
690       PetscCall(MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES));
691       PetscCall(MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals));
692       row++;
693     }
694 
695     PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY));
696     PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY));
697     PetscCall(PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer));
698     if (rank == 0) {
699       PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name));
700       PetscCall(MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer));
701     }
702     PetscCall(PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer));
703     PetscCall(PetscViewerFlush(viewer));
704     PetscCall(MatDestroy(&A));
705   }
706   PetscFunctionReturn(0);
707 }
708 
709 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
710 {
711   PetscBool      iascii,isbinary,isdraw,issocket;
712 
713   PetscFunctionBegin;
714   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
715   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
716   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket));
717   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw));
718 
719   if (iascii || issocket || isdraw) {
720     PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat,viewer));
721   } else if (isbinary) {
722     PetscCall(MatView_Dense_Binary(mat,viewer));
723   }
724   PetscFunctionReturn(0);
725 }
726 
727 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
728 {
729   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
730   Mat            mdn  = mat->A;
731   PetscLogDouble isend[5],irecv[5];
732 
733   PetscFunctionBegin;
734   info->block_size = 1.0;
735 
736   PetscCall(MatGetInfo(mdn,MAT_LOCAL,info));
737 
738   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
739   isend[3] = info->memory;  isend[4] = info->mallocs;
740   if (flag == MAT_LOCAL) {
741     info->nz_used      = isend[0];
742     info->nz_allocated = isend[1];
743     info->nz_unneeded  = isend[2];
744     info->memory       = isend[3];
745     info->mallocs      = isend[4];
746   } else if (flag == MAT_GLOBAL_MAX) {
747     PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)A)));
748 
749     info->nz_used      = irecv[0];
750     info->nz_allocated = irecv[1];
751     info->nz_unneeded  = irecv[2];
752     info->memory       = irecv[3];
753     info->mallocs      = irecv[4];
754   } else if (flag == MAT_GLOBAL_SUM) {
755     PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)A)));
756 
757     info->nz_used      = irecv[0];
758     info->nz_allocated = irecv[1];
759     info->nz_unneeded  = irecv[2];
760     info->memory       = irecv[3];
761     info->mallocs      = irecv[4];
762   }
763   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
764   info->fill_ratio_needed = 0;
765   info->factor_mallocs    = 0;
766   PetscFunctionReturn(0);
767 }
768 
769 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
770 {
771   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
772 
773   PetscFunctionBegin;
774   switch (op) {
775   case MAT_NEW_NONZERO_LOCATIONS:
776   case MAT_NEW_NONZERO_LOCATION_ERR:
777   case MAT_NEW_NONZERO_ALLOCATION_ERR:
778     MatCheckPreallocated(A,1);
779     PetscCall(MatSetOption(a->A,op,flg));
780     break;
781   case MAT_ROW_ORIENTED:
782     MatCheckPreallocated(A,1);
783     a->roworiented = flg;
784     PetscCall(MatSetOption(a->A,op,flg));
785     break;
786   case MAT_FORCE_DIAGONAL_ENTRIES:
787   case MAT_KEEP_NONZERO_PATTERN:
788   case MAT_USE_HASH_TABLE:
789   case MAT_SORTED_FULL:
790     PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op]));
791     break;
792   case MAT_IGNORE_OFF_PROC_ENTRIES:
793     a->donotstash = flg;
794     break;
795   case MAT_SYMMETRIC:
796   case MAT_STRUCTURALLY_SYMMETRIC:
797   case MAT_HERMITIAN:
798   case MAT_SYMMETRY_ETERNAL:
799   case MAT_IGNORE_LOWER_TRIANGULAR:
800   case MAT_IGNORE_ZERO_ENTRIES:
801     PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op]));
802     break;
803   default:
804     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
805   }
806   PetscFunctionReturn(0);
807 }
808 
809 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
810 {
811   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
812   const PetscScalar *l;
813   PetscScalar       x,*v,*vv,*r;
814   PetscInt          i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n,lda;
815 
816   PetscFunctionBegin;
817   PetscCall(MatDenseGetArray(mdn->A,&vv));
818   PetscCall(MatDenseGetLDA(mdn->A,&lda));
819   PetscCall(MatGetLocalSize(A,&s2,&s3));
820   if (ll) {
821     PetscCall(VecGetLocalSize(ll,&s2a));
822     PetscCheck(s2a == s2,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2);
823     PetscCall(VecGetArrayRead(ll,&l));
824     for (i=0; i<m; i++) {
825       x = l[i];
826       v = vv + i;
827       for (j=0; j<n; j++) { (*v) *= x; v+= lda;}
828     }
829     PetscCall(VecRestoreArrayRead(ll,&l));
830     PetscCall(PetscLogFlops(1.0*n*m));
831   }
832   if (rr) {
833     const PetscScalar *ar;
834 
835     PetscCall(VecGetLocalSize(rr,&s3a));
836     PetscCheck(s3a == s3,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3);
837     PetscCall(VecGetArrayRead(rr,&ar));
838     if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
839     PetscCall(VecGetArray(mdn->lvec,&r));
840     PetscCall(PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ar,r,MPI_REPLACE));
841     PetscCall(PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ar,r,MPI_REPLACE));
842     PetscCall(VecRestoreArrayRead(rr,&ar));
843     for (i=0; i<n; i++) {
844       x = r[i];
845       v = vv + i*lda;
846       for (j=0; j<m; j++) (*v++) *= x;
847     }
848     PetscCall(VecRestoreArray(mdn->lvec,&r));
849     PetscCall(PetscLogFlops(1.0*n*m));
850   }
851   PetscCall(MatDenseRestoreArray(mdn->A,&vv));
852   PetscFunctionReturn(0);
853 }
854 
855 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
856 {
857   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
858   PetscInt          i,j;
859   PetscMPIInt       size;
860   PetscReal         sum = 0.0;
861   const PetscScalar *av,*v;
862 
863   PetscFunctionBegin;
864   PetscCall(MatDenseGetArrayRead(mdn->A,&av));
865   v    = av;
866   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
867   if (size == 1) {
868     PetscCall(MatNorm(mdn->A,type,nrm));
869   } else {
870     if (type == NORM_FROBENIUS) {
871       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
872         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
873       }
874       PetscCall(MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A)));
875       *nrm = PetscSqrtReal(*nrm);
876       PetscCall(PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n));
877     } else if (type == NORM_1) {
878       PetscReal *tmp,*tmp2;
879       PetscCall(PetscCalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2));
880       *nrm = 0.0;
881       v    = av;
882       for (j=0; j<mdn->A->cmap->n; j++) {
883         for (i=0; i<mdn->A->rmap->n; i++) {
884           tmp[j] += PetscAbsScalar(*v);  v++;
885         }
886       }
887       PetscCall(MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A)));
888       for (j=0; j<A->cmap->N; j++) {
889         if (tmp2[j] > *nrm) *nrm = tmp2[j];
890       }
891       PetscCall(PetscFree2(tmp,tmp2));
892       PetscCall(PetscLogFlops(A->cmap->n*A->rmap->n));
893     } else if (type == NORM_INFINITY) { /* max row norm */
894       PetscReal ntemp;
895       PetscCall(MatNorm(mdn->A,type,&ntemp));
896       PetscCall(MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A)));
897     } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
898   }
899   PetscCall(MatDenseRestoreArrayRead(mdn->A,&av));
900   PetscFunctionReturn(0);
901 }
902 
903 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
904 {
905   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
906   Mat            B;
907   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
908   PetscInt       j,i,lda;
909   PetscScalar    *v;
910 
911   PetscFunctionBegin;
912   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
913     PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B));
914     PetscCall(MatSetSizes(B,A->cmap->n,A->rmap->n,N,M));
915     PetscCall(MatSetType(B,((PetscObject)A)->type_name));
916     PetscCall(MatMPIDenseSetPreallocation(B,NULL));
917   } else B = *matout;
918 
919   m    = a->A->rmap->n; n = a->A->cmap->n;
920   PetscCall(MatDenseGetArrayRead(a->A,(const PetscScalar**)&v));
921   PetscCall(MatDenseGetLDA(a->A,&lda));
922   PetscCall(PetscMalloc1(m,&rwork));
923   for (i=0; i<m; i++) rwork[i] = rstart + i;
924   for (j=0; j<n; j++) {
925     PetscCall(MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES));
926     v   += lda;
927   }
928   PetscCall(MatDenseRestoreArrayRead(a->A,(const PetscScalar**)&v));
929   PetscCall(PetscFree(rwork));
930   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
931   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
932   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
933     *matout = B;
934   } else {
935     PetscCall(MatHeaderMerge(A,&B));
936   }
937   PetscFunctionReturn(0);
938 }
939 
940 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
941 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
942 
943 PetscErrorCode MatSetUp_MPIDense(Mat A)
944 {
945   PetscFunctionBegin;
946   PetscCall(PetscLayoutSetUp(A->rmap));
947   PetscCall(PetscLayoutSetUp(A->cmap));
948   if (!A->preallocated) {
949     PetscCall(MatMPIDenseSetPreallocation(A,NULL));
950   }
951   PetscFunctionReturn(0);
952 }
953 
954 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
955 {
956   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
957 
958   PetscFunctionBegin;
959   PetscCall(MatAXPY(A->A,alpha,B->A,str));
960   PetscFunctionReturn(0);
961 }
962 
963 PetscErrorCode MatConjugate_MPIDense(Mat mat)
964 {
965   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
966 
967   PetscFunctionBegin;
968   PetscCall(MatConjugate(a->A));
969   PetscFunctionReturn(0);
970 }
971 
972 PetscErrorCode MatRealPart_MPIDense(Mat A)
973 {
974   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
975 
976   PetscFunctionBegin;
977   PetscCall(MatRealPart(a->A));
978   PetscFunctionReturn(0);
979 }
980 
981 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
982 {
983   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
984 
985   PetscFunctionBegin;
986   PetscCall(MatImaginaryPart(a->A));
987   PetscFunctionReturn(0);
988 }
989 
990 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col)
991 {
992   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
993 
994   PetscFunctionBegin;
995   PetscCheck(a->A,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing local matrix");
996   PetscCheck(a->A->ops->getcolumnvector,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing get column operation");
997   PetscCall((*a->A->ops->getcolumnvector)(a->A,v,col));
998   PetscFunctionReturn(0);
999 }
1000 
1001 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat,PetscInt,PetscReal*);
1002 
1003 PetscErrorCode MatGetColumnReductions_MPIDense(Mat A,PetscInt type,PetscReal *reductions)
1004 {
1005   PetscInt       i,m,n;
1006   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
1007   PetscReal      *work;
1008 
1009   PetscFunctionBegin;
1010   PetscCall(MatGetSize(A,&m,&n));
1011   PetscCall(PetscMalloc1(n,&work));
1012   if (type == REDUCTION_MEAN_REALPART) {
1013     PetscCall(MatGetColumnReductions_SeqDense(a->A,(PetscInt)REDUCTION_SUM_REALPART,work));
1014   } else if (type == REDUCTION_MEAN_IMAGINARYPART) {
1015     PetscCall(MatGetColumnReductions_SeqDense(a->A,(PetscInt)REDUCTION_SUM_IMAGINARYPART,work));
1016   } else {
1017     PetscCall(MatGetColumnReductions_SeqDense(a->A,type,work));
1018   }
1019   if (type == NORM_2) {
1020     for (i=0; i<n; i++) work[i] *= work[i];
1021   }
1022   if (type == NORM_INFINITY) {
1023     PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_MAX,A->hdr.comm));
1024   } else {
1025     PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_SUM,A->hdr.comm));
1026   }
1027   PetscCall(PetscFree(work));
1028   if (type == NORM_2) {
1029     for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
1030   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
1031     for (i=0; i<n; i++) reductions[i] /= m;
1032   }
1033   PetscFunctionReturn(0);
1034 }
1035 
1036 #if defined(PETSC_HAVE_CUDA)
1037 PetscErrorCode MatShift_MPIDenseCUDA(Mat A,PetscScalar alpha)
1038 {
1039   PetscScalar *da;
1040   PetscInt    lda;
1041 
1042   PetscFunctionBegin;
1043   PetscCall(MatDenseCUDAGetArray(A,&da));
1044   PetscCall(MatDenseGetLDA(A,&lda));
1045   PetscCall(PetscInfo(A,"Performing Shift on backend\n"));
1046   PetscCall(MatShift_DenseCUDA_Private(da,alpha,lda,A->rmap->rstart,A->rmap->rend,A->cmap->N));
1047   PetscCall(MatDenseCUDARestoreArray(A,&da));
1048   PetscFunctionReturn(0);
1049 }
1050 
1051 static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1052 {
1053   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1054   PetscInt       lda;
1055 
1056   PetscFunctionBegin;
1057   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1058   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1059   if (!a->cvec) {
1060     PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec));
1061     PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec));
1062   }
1063   a->vecinuse = col + 1;
1064   PetscCall(MatDenseGetLDA(a->A,&lda));
1065   PetscCall(MatDenseCUDAGetArray(a->A,(PetscScalar**)&a->ptrinuse));
1066   PetscCall(VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda));
1067   *v   = a->cvec;
1068   PetscFunctionReturn(0);
1069 }
1070 
1071 static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1072 {
1073   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1074 
1075   PetscFunctionBegin;
1076   PetscCheck(a->vecinuse,PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1077   PetscCheck(a->cvec,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1078   a->vecinuse = 0;
1079   PetscCall(MatDenseCUDARestoreArray(a->A,(PetscScalar**)&a->ptrinuse));
1080   PetscCall(VecCUDAResetArray(a->cvec));
1081   *v   = NULL;
1082   PetscFunctionReturn(0);
1083 }
1084 
1085 static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1086 {
1087   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1088   PetscInt       lda;
1089 
1090   PetscFunctionBegin;
1091   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1092   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1093   if (!a->cvec) {
1094     PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec));
1095     PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec));
1096   }
1097   a->vecinuse = col + 1;
1098   PetscCall(MatDenseGetLDA(a->A,&lda));
1099   PetscCall(MatDenseCUDAGetArrayRead(a->A,&a->ptrinuse));
1100   PetscCall(VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda));
1101   PetscCall(VecLockReadPush(a->cvec));
1102   *v   = a->cvec;
1103   PetscFunctionReturn(0);
1104 }
1105 
1106 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1107 {
1108   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1109 
1110   PetscFunctionBegin;
1111   PetscCheck(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1112   PetscCheck(a->cvec,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1113   a->vecinuse = 0;
1114   PetscCall(MatDenseCUDARestoreArrayRead(a->A,&a->ptrinuse));
1115   PetscCall(VecLockReadPop(a->cvec));
1116   PetscCall(VecCUDAResetArray(a->cvec));
1117   *v   = NULL;
1118   PetscFunctionReturn(0);
1119 }
1120 
1121 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1122 {
1123   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1124   PetscInt       lda;
1125 
1126   PetscFunctionBegin;
1127   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1128   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1129   if (!a->cvec) {
1130     PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec));
1131     PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec));
1132   }
1133   a->vecinuse = col + 1;
1134   PetscCall(MatDenseGetLDA(a->A,&lda));
1135   PetscCall(MatDenseCUDAGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse));
1136   PetscCall(VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda));
1137   *v   = a->cvec;
1138   PetscFunctionReturn(0);
1139 }
1140 
1141 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1142 {
1143   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1144 
1145   PetscFunctionBegin;
1146   PetscCheck(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1147   PetscCheck(a->cvec,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1148   a->vecinuse = 0;
1149   PetscCall(MatDenseCUDARestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse));
1150   PetscCall(VecCUDAResetArray(a->cvec));
1151   *v   = NULL;
1152   PetscFunctionReturn(0);
1153 }
1154 
1155 static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1156 {
1157   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1158 
1159   PetscFunctionBegin;
1160   PetscCheck(!l->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1161   PetscCheck(!l->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1162   PetscCall(MatDenseCUDAPlaceArray(l->A,a));
1163   PetscFunctionReturn(0);
1164 }
1165 
1166 static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A)
1167 {
1168   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1169 
1170   PetscFunctionBegin;
1171   PetscCheck(!l->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1172   PetscCheck(!l->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1173   PetscCall(MatDenseCUDAResetArray(l->A));
1174   PetscFunctionReturn(0);
1175 }
1176 
1177 static PetscErrorCode MatDenseCUDAReplaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1178 {
1179   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1180 
1181   PetscFunctionBegin;
1182   PetscCheck(!l->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1183   PetscCheck(!l->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1184   PetscCall(MatDenseCUDAReplaceArray(l->A,a));
1185   PetscFunctionReturn(0);
1186 }
1187 
1188 static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1189 {
1190   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1191 
1192   PetscFunctionBegin;
1193   PetscCall(MatDenseCUDAGetArrayWrite(l->A,a));
1194   PetscFunctionReturn(0);
1195 }
1196 
1197 static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1198 {
1199   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1200 
1201   PetscFunctionBegin;
1202   PetscCall(MatDenseCUDARestoreArrayWrite(l->A,a));
1203   PetscFunctionReturn(0);
1204 }
1205 
1206 static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1207 {
1208   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1209 
1210   PetscFunctionBegin;
1211   PetscCall(MatDenseCUDAGetArrayRead(l->A,a));
1212   PetscFunctionReturn(0);
1213 }
1214 
1215 static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1216 {
1217   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1218 
1219   PetscFunctionBegin;
1220   PetscCall(MatDenseCUDARestoreArrayRead(l->A,a));
1221   PetscFunctionReturn(0);
1222 }
1223 
1224 static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1225 {
1226   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1227 
1228   PetscFunctionBegin;
1229   PetscCall(MatDenseCUDAGetArray(l->A,a));
1230   PetscFunctionReturn(0);
1231 }
1232 
1233 static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1234 {
1235   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1236 
1237   PetscFunctionBegin;
1238   PetscCall(MatDenseCUDARestoreArray(l->A,a));
1239   PetscFunctionReturn(0);
1240 }
1241 
1242 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat,PetscInt,Vec*);
1243 static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat,PetscInt,Vec*);
1244 static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat,PetscInt,Vec*);
1245 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat,PetscInt,Vec*);
1246 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat,PetscInt,Vec*);
1247 static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat,PetscInt,Vec*);
1248 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat,Mat*);
1249 
1250 static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat,PetscBool bind)
1251 {
1252   Mat_MPIDense   *d = (Mat_MPIDense*)mat->data;
1253 
1254   PetscFunctionBegin;
1255   PetscCheck(!d->vecinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1256   PetscCheck(!d->matinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1257   if (d->A) {
1258     PetscCall(MatBindToCPU(d->A,bind));
1259   }
1260   mat->boundtocpu = bind;
1261   if (!bind) {
1262     PetscBool iscuda;
1263 
1264     PetscCall(PetscObjectTypeCompare((PetscObject)d->cvec,VECMPICUDA,&iscuda));
1265     if (!iscuda) {
1266       PetscCall(VecDestroy(&d->cvec));
1267     }
1268     PetscCall(PetscObjectTypeCompare((PetscObject)d->cmat,MATMPIDENSECUDA,&iscuda));
1269     if (!iscuda) {
1270       PetscCall(MatDestroy(&d->cmat));
1271     }
1272     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDenseCUDA));
1273     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDenseCUDA));
1274     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDenseCUDA));
1275     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDenseCUDA));
1276     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDenseCUDA));
1277     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDenseCUDA));
1278     mat->ops->shift                   = MatShift_MPIDenseCUDA;
1279   } else {
1280     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense));
1281     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense));
1282     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense));
1283     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense));
1284     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense));
1285     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense));
1286     mat->ops->shift                   = MatShift_MPIDense;
1287   }
1288   if (d->cmat) {
1289     PetscCall(MatBindToCPU(d->cmat,bind));
1290   }
1291   PetscFunctionReturn(0);
1292 }
1293 
1294 PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data)
1295 {
1296   Mat_MPIDense   *d = (Mat_MPIDense*)A->data;
1297   PetscBool      iscuda;
1298 
1299   PetscFunctionBegin;
1300   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1301   PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda));
1302   if (!iscuda) PetscFunctionReturn(0);
1303   PetscCall(PetscLayoutSetUp(A->rmap));
1304   PetscCall(PetscLayoutSetUp(A->cmap));
1305   if (!d->A) {
1306     PetscCall(MatCreate(PETSC_COMM_SELF,&d->A));
1307     PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)d->A));
1308     PetscCall(MatSetSizes(d->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N));
1309   }
1310   PetscCall(MatSetType(d->A,MATSEQDENSECUDA));
1311   PetscCall(MatSeqDenseCUDASetPreallocation(d->A,d_data));
1312   A->preallocated = PETSC_TRUE;
1313   A->assembled = PETSC_TRUE;
1314   PetscFunctionReturn(0);
1315 }
1316 #endif
1317 
1318 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1319 {
1320   Mat_MPIDense   *d = (Mat_MPIDense*)x->data;
1321 
1322   PetscFunctionBegin;
1323   PetscCall(MatSetRandom(d->A,rctx));
1324   PetscFunctionReturn(0);
1325 }
1326 
1327 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool  *missing,PetscInt *d)
1328 {
1329   PetscFunctionBegin;
1330   *missing = PETSC_FALSE;
1331   PetscFunctionReturn(0);
1332 }
1333 
1334 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat);
1335 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);
1336 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat);
1337 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);
1338 static PetscErrorCode MatEqual_MPIDense(Mat,Mat,PetscBool*);
1339 static PetscErrorCode MatLoad_MPIDense(Mat,PetscViewer);
1340 
1341 /* -------------------------------------------------------------------*/
1342 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1343                                         MatGetRow_MPIDense,
1344                                         MatRestoreRow_MPIDense,
1345                                         MatMult_MPIDense,
1346                                 /*  4*/ MatMultAdd_MPIDense,
1347                                         MatMultTranspose_MPIDense,
1348                                         MatMultTransposeAdd_MPIDense,
1349                                         NULL,
1350                                         NULL,
1351                                         NULL,
1352                                 /* 10*/ NULL,
1353                                         NULL,
1354                                         NULL,
1355                                         NULL,
1356                                         MatTranspose_MPIDense,
1357                                 /* 15*/ MatGetInfo_MPIDense,
1358                                         MatEqual_MPIDense,
1359                                         MatGetDiagonal_MPIDense,
1360                                         MatDiagonalScale_MPIDense,
1361                                         MatNorm_MPIDense,
1362                                 /* 20*/ MatAssemblyBegin_MPIDense,
1363                                         MatAssemblyEnd_MPIDense,
1364                                         MatSetOption_MPIDense,
1365                                         MatZeroEntries_MPIDense,
1366                                 /* 24*/ MatZeroRows_MPIDense,
1367                                         NULL,
1368                                         NULL,
1369                                         NULL,
1370                                         NULL,
1371                                 /* 29*/ MatSetUp_MPIDense,
1372                                         NULL,
1373                                         NULL,
1374                                         MatGetDiagonalBlock_MPIDense,
1375                                         NULL,
1376                                 /* 34*/ MatDuplicate_MPIDense,
1377                                         NULL,
1378                                         NULL,
1379                                         NULL,
1380                                         NULL,
1381                                 /* 39*/ MatAXPY_MPIDense,
1382                                         MatCreateSubMatrices_MPIDense,
1383                                         NULL,
1384                                         MatGetValues_MPIDense,
1385                                         MatCopy_MPIDense,
1386                                 /* 44*/ NULL,
1387                                         MatScale_MPIDense,
1388                                         MatShift_MPIDense,
1389                                         NULL,
1390                                         NULL,
1391                                 /* 49*/ MatSetRandom_MPIDense,
1392                                         NULL,
1393                                         NULL,
1394                                         NULL,
1395                                         NULL,
1396                                 /* 54*/ NULL,
1397                                         NULL,
1398                                         NULL,
1399                                         NULL,
1400                                         NULL,
1401                                 /* 59*/ MatCreateSubMatrix_MPIDense,
1402                                         MatDestroy_MPIDense,
1403                                         MatView_MPIDense,
1404                                         NULL,
1405                                         NULL,
1406                                 /* 64*/ NULL,
1407                                         NULL,
1408                                         NULL,
1409                                         NULL,
1410                                         NULL,
1411                                 /* 69*/ NULL,
1412                                         NULL,
1413                                         NULL,
1414                                         NULL,
1415                                         NULL,
1416                                 /* 74*/ NULL,
1417                                         NULL,
1418                                         NULL,
1419                                         NULL,
1420                                         NULL,
1421                                 /* 79*/ NULL,
1422                                         NULL,
1423                                         NULL,
1424                                         NULL,
1425                                 /* 83*/ MatLoad_MPIDense,
1426                                         NULL,
1427                                         NULL,
1428                                         NULL,
1429                                         NULL,
1430                                         NULL,
1431                                 /* 89*/ NULL,
1432                                         NULL,
1433                                         NULL,
1434                                         NULL,
1435                                         NULL,
1436                                 /* 94*/ NULL,
1437                                         NULL,
1438                                         MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1439                                         MatMatTransposeMultNumeric_MPIDense_MPIDense,
1440                                         NULL,
1441                                 /* 99*/ MatProductSetFromOptions_MPIDense,
1442                                         NULL,
1443                                         NULL,
1444                                         MatConjugate_MPIDense,
1445                                         NULL,
1446                                 /*104*/ NULL,
1447                                         MatRealPart_MPIDense,
1448                                         MatImaginaryPart_MPIDense,
1449                                         NULL,
1450                                         NULL,
1451                                 /*109*/ NULL,
1452                                         NULL,
1453                                         NULL,
1454                                         MatGetColumnVector_MPIDense,
1455                                         MatMissingDiagonal_MPIDense,
1456                                 /*114*/ NULL,
1457                                         NULL,
1458                                         NULL,
1459                                         NULL,
1460                                         NULL,
1461                                 /*119*/ NULL,
1462                                         NULL,
1463                                         NULL,
1464                                         NULL,
1465                                         NULL,
1466                                 /*124*/ NULL,
1467                                         MatGetColumnReductions_MPIDense,
1468                                         NULL,
1469                                         NULL,
1470                                         NULL,
1471                                 /*129*/ NULL,
1472                                         NULL,
1473                                         MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1474                                         MatTransposeMatMultNumeric_MPIDense_MPIDense,
1475                                         NULL,
1476                                 /*134*/ NULL,
1477                                         NULL,
1478                                         NULL,
1479                                         NULL,
1480                                         NULL,
1481                                 /*139*/ NULL,
1482                                         NULL,
1483                                         NULL,
1484                                         NULL,
1485                                         NULL,
1486                                         MatCreateMPIMatConcatenateSeqMat_MPIDense,
1487                                 /*145*/ NULL,
1488                                         NULL,
1489                                         NULL
1490 };
1491 
1492 PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1493 {
1494   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
1495   PetscBool      iscuda;
1496 
1497   PetscFunctionBegin;
1498   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1499   PetscCall(PetscLayoutSetUp(mat->rmap));
1500   PetscCall(PetscLayoutSetUp(mat->cmap));
1501   if (!a->A) {
1502     PetscCall(MatCreate(PETSC_COMM_SELF,&a->A));
1503     PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A));
1504     PetscCall(MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N));
1505   }
1506   PetscCall(PetscObjectTypeCompare((PetscObject)mat,MATMPIDENSECUDA,&iscuda));
1507   PetscCall(MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE));
1508   PetscCall(MatSeqDenseSetPreallocation(a->A,data));
1509   mat->preallocated = PETSC_TRUE;
1510   mat->assembled = PETSC_TRUE;
1511   PetscFunctionReturn(0);
1512 }
1513 
1514 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1515 {
1516   Mat            B,C;
1517 
1518   PetscFunctionBegin;
1519   PetscCall(MatMPIAIJGetLocalMat(A,MAT_INITIAL_MATRIX,&C));
1520   PetscCall(MatConvert_SeqAIJ_SeqDense(C,MATSEQDENSE,MAT_INITIAL_MATRIX,&B));
1521   PetscCall(MatDestroy(&C));
1522   if (reuse == MAT_REUSE_MATRIX) {
1523     C = *newmat;
1524   } else C = NULL;
1525   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A),B,A->cmap->n,!C?MAT_INITIAL_MATRIX:MAT_REUSE_MATRIX,&C));
1526   PetscCall(MatDestroy(&B));
1527   if (reuse == MAT_INPLACE_MATRIX) {
1528     PetscCall(MatHeaderReplace(A,&C));
1529   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1530   PetscFunctionReturn(0);
1531 }
1532 
1533 PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1534 {
1535   Mat            B,C;
1536 
1537   PetscFunctionBegin;
1538   PetscCall(MatDenseGetLocalMatrix(A,&C));
1539   PetscCall(MatConvert_SeqDense_SeqAIJ(C,MATSEQAIJ,MAT_INITIAL_MATRIX,&B));
1540   if (reuse == MAT_REUSE_MATRIX) {
1541     C = *newmat;
1542   } else C = NULL;
1543   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A),B,A->cmap->n,!C?MAT_INITIAL_MATRIX:MAT_REUSE_MATRIX,&C));
1544   PetscCall(MatDestroy(&B));
1545   if (reuse == MAT_INPLACE_MATRIX) {
1546     PetscCall(MatHeaderReplace(A,&C));
1547   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1548   PetscFunctionReturn(0);
1549 }
1550 
1551 #if defined(PETSC_HAVE_ELEMENTAL)
1552 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1553 {
1554   Mat            mat_elemental;
1555   PetscScalar    *v;
1556   PetscInt       m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols;
1557 
1558   PetscFunctionBegin;
1559   if (reuse == MAT_REUSE_MATRIX) {
1560     mat_elemental = *newmat;
1561     PetscCall(MatZeroEntries(*newmat));
1562   } else {
1563     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental));
1564     PetscCall(MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N));
1565     PetscCall(MatSetType(mat_elemental,MATELEMENTAL));
1566     PetscCall(MatSetUp(mat_elemental));
1567     PetscCall(MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE));
1568   }
1569 
1570   PetscCall(PetscMalloc2(m,&rows,N,&cols));
1571   for (i=0; i<N; i++) cols[i] = i;
1572   for (i=0; i<m; i++) rows[i] = rstart + i;
1573 
1574   /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1575   PetscCall(MatDenseGetArray(A,&v));
1576   PetscCall(MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES));
1577   PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY));
1578   PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY));
1579   PetscCall(MatDenseRestoreArray(A,&v));
1580   PetscCall(PetscFree2(rows,cols));
1581 
1582   if (reuse == MAT_INPLACE_MATRIX) {
1583     PetscCall(MatHeaderReplace(A,&mat_elemental));
1584   } else {
1585     *newmat = mat_elemental;
1586   }
1587   PetscFunctionReturn(0);
1588 }
1589 #endif
1590 
1591 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals)
1592 {
1593   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1594 
1595   PetscFunctionBegin;
1596   PetscCall(MatDenseGetColumn(mat->A,col,vals));
1597   PetscFunctionReturn(0);
1598 }
1599 
1600 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals)
1601 {
1602   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1603 
1604   PetscFunctionBegin;
1605   PetscCall(MatDenseRestoreColumn(mat->A,vals));
1606   PetscFunctionReturn(0);
1607 }
1608 
1609 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
1610 {
1611   Mat_MPIDense   *mat;
1612   PetscInt       m,nloc,N;
1613 
1614   PetscFunctionBegin;
1615   PetscCall(MatGetSize(inmat,&m,&N));
1616   PetscCall(MatGetLocalSize(inmat,NULL,&nloc));
1617   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1618     PetscInt sum;
1619 
1620     if (n == PETSC_DECIDE) {
1621       PetscCall(PetscSplitOwnership(comm,&n,&N));
1622     }
1623     /* Check sum(n) = N */
1624     PetscCall(MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm));
1625     PetscCheck(sum == N,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT,sum,N);
1626 
1627     PetscCall(MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat));
1628     PetscCall(MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE));
1629   }
1630 
1631   /* numeric phase */
1632   mat = (Mat_MPIDense*)(*outmat)->data;
1633   PetscCall(MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN));
1634   PetscFunctionReturn(0);
1635 }
1636 
1637 #if defined(PETSC_HAVE_CUDA)
1638 PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M,MatType type,MatReuse reuse,Mat *newmat)
1639 {
1640   Mat            B;
1641   Mat_MPIDense   *m;
1642 
1643   PetscFunctionBegin;
1644   if (reuse == MAT_INITIAL_MATRIX) {
1645     PetscCall(MatDuplicate(M,MAT_COPY_VALUES,newmat));
1646   } else if (reuse == MAT_REUSE_MATRIX) {
1647     PetscCall(MatCopy(M,*newmat,SAME_NONZERO_PATTERN));
1648   }
1649 
1650   B    = *newmat;
1651   PetscCall(MatBindToCPU_MPIDenseCUDA(B,PETSC_TRUE));
1652   PetscCall(PetscFree(B->defaultvectype));
1653   PetscCall(PetscStrallocpy(VECSTANDARD,&B->defaultvectype));
1654   PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSE));
1655   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",NULL));
1656   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",NULL));
1657   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",NULL));
1658   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",NULL));
1659   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",NULL));
1660   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",NULL));
1661   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",NULL));
1662   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",NULL));
1663   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",NULL));
1664   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",NULL));
1665   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",NULL));
1666   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",NULL));
1667   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",NULL));
1668   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",NULL));
1669   m    = (Mat_MPIDense*)(B)->data;
1670   if (m->A) {
1671     PetscCall(MatConvert(m->A,MATSEQDENSE,MAT_INPLACE_MATRIX,&m->A));
1672   }
1673   B->ops->bindtocpu = NULL;
1674   B->offloadmask    = PETSC_OFFLOAD_CPU;
1675   PetscFunctionReturn(0);
1676 }
1677 
1678 PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M,MatType type,MatReuse reuse,Mat *newmat)
1679 {
1680   Mat            B;
1681   Mat_MPIDense   *m;
1682 
1683   PetscFunctionBegin;
1684   if (reuse == MAT_INITIAL_MATRIX) {
1685     PetscCall(MatDuplicate(M,MAT_COPY_VALUES,newmat));
1686   } else if (reuse == MAT_REUSE_MATRIX) {
1687     PetscCall(MatCopy(M,*newmat,SAME_NONZERO_PATTERN));
1688   }
1689 
1690   B    = *newmat;
1691   PetscCall(PetscFree(B->defaultvectype));
1692   PetscCall(PetscStrallocpy(VECCUDA,&B->defaultvectype));
1693   PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSECUDA));
1694   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",                    MatConvert_MPIDenseCUDA_MPIDense));
1695   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",        MatProductSetFromOptions_MPIAIJ_MPIDense));
1696   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",MatProductSetFromOptions_MPIAIJ_MPIDense));
1697   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",        MatProductSetFromOptions_MPIDense_MPIAIJ));
1698   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ));
1699   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",                                MatDenseCUDAGetArray_MPIDenseCUDA));
1700   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",                            MatDenseCUDAGetArrayRead_MPIDenseCUDA));
1701   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",                           MatDenseCUDAGetArrayWrite_MPIDenseCUDA));
1702   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",                            MatDenseCUDARestoreArray_MPIDenseCUDA));
1703   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",                        MatDenseCUDARestoreArrayRead_MPIDenseCUDA));
1704   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",                       MatDenseCUDARestoreArrayWrite_MPIDenseCUDA));
1705   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",                              MatDenseCUDAPlaceArray_MPIDenseCUDA));
1706   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",                              MatDenseCUDAResetArray_MPIDenseCUDA));
1707   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",                            MatDenseCUDAReplaceArray_MPIDenseCUDA));
1708   m    = (Mat_MPIDense*)(B->data);
1709   if (m->A) {
1710     PetscCall(MatConvert(m->A,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&m->A));
1711     B->offloadmask = PETSC_OFFLOAD_BOTH;
1712   } else {
1713     B->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
1714   }
1715   PetscCall(MatBindToCPU_MPIDenseCUDA(B,PETSC_FALSE));
1716 
1717   B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA;
1718   PetscFunctionReturn(0);
1719 }
1720 #endif
1721 
1722 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A,PetscInt col,Vec *v)
1723 {
1724   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1725   PetscInt       lda;
1726 
1727   PetscFunctionBegin;
1728   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1729   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1730   if (!a->cvec) {
1731     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec));
1732   }
1733   a->vecinuse = col + 1;
1734   PetscCall(MatDenseGetLDA(a->A,&lda));
1735   PetscCall(MatDenseGetArray(a->A,(PetscScalar**)&a->ptrinuse));
1736   PetscCall(VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda));
1737   *v   = a->cvec;
1738   PetscFunctionReturn(0);
1739 }
1740 
1741 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A,PetscInt col,Vec *v)
1742 {
1743   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1744 
1745   PetscFunctionBegin;
1746   PetscCheck(a->vecinuse,PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1747   PetscCheck(a->cvec,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1748   a->vecinuse = 0;
1749   PetscCall(MatDenseRestoreArray(a->A,(PetscScalar**)&a->ptrinuse));
1750   PetscCall(VecResetArray(a->cvec));
1751   *v   = NULL;
1752   PetscFunctionReturn(0);
1753 }
1754 
1755 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v)
1756 {
1757   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1758   PetscInt       lda;
1759 
1760   PetscFunctionBegin;
1761   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1762   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1763   if (!a->cvec) {
1764     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec));
1765   }
1766   a->vecinuse = col + 1;
1767   PetscCall(MatDenseGetLDA(a->A,&lda));
1768   PetscCall(MatDenseGetArrayRead(a->A,&a->ptrinuse));
1769   PetscCall(VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda));
1770   PetscCall(VecLockReadPush(a->cvec));
1771   *v   = a->cvec;
1772   PetscFunctionReturn(0);
1773 }
1774 
1775 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v)
1776 {
1777   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1778 
1779   PetscFunctionBegin;
1780   PetscCheck(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1781   PetscCheck(a->cvec,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1782   a->vecinuse = 0;
1783   PetscCall(MatDenseRestoreArrayRead(a->A,&a->ptrinuse));
1784   PetscCall(VecLockReadPop(a->cvec));
1785   PetscCall(VecResetArray(a->cvec));
1786   *v   = NULL;
1787   PetscFunctionReturn(0);
1788 }
1789 
1790 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v)
1791 {
1792   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1793   PetscInt       lda;
1794 
1795   PetscFunctionBegin;
1796   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1797   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1798   if (!a->cvec) {
1799     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec));
1800   }
1801   a->vecinuse = col + 1;
1802   PetscCall(MatDenseGetLDA(a->A,&lda));
1803   PetscCall(MatDenseGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse));
1804   PetscCall(VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda));
1805   *v   = a->cvec;
1806   PetscFunctionReturn(0);
1807 }
1808 
1809 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v)
1810 {
1811   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
1812 
1813   PetscFunctionBegin;
1814   PetscCheck(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1815   PetscCheck(a->cvec,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1816   a->vecinuse = 0;
1817   PetscCall(MatDenseRestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse));
1818   PetscCall(VecResetArray(a->cvec));
1819   *v   = NULL;
1820   PetscFunctionReturn(0);
1821 }
1822 
1823 PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A,PetscInt cbegin,PetscInt cend,Mat *v)
1824 {
1825   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
1826   Mat_MPIDense *c;
1827   MPI_Comm     comm;
1828 
1829   PetscFunctionBegin;
1830   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
1831   PetscCheck(!a->vecinuse,comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1832   PetscCheck(!a->matinuse,comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1833   if (!a->cmat) {
1834     PetscCall(MatCreate(comm,&a->cmat));
1835     PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->cmat));
1836     PetscCall(MatSetType(a->cmat,((PetscObject)A)->type_name));
1837     PetscCall(PetscLayoutReference(A->rmap,&a->cmat->rmap));
1838     PetscCall(PetscLayoutSetSize(a->cmat->cmap,cend-cbegin));
1839     PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1840   } else if (cend-cbegin != a->cmat->cmap->N) {
1841     PetscCall(PetscLayoutDestroy(&a->cmat->cmap));
1842     PetscCall(PetscLayoutCreate(comm,&a->cmat->cmap));
1843     PetscCall(PetscLayoutSetSize(a->cmat->cmap,cend-cbegin));
1844     PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1845   }
1846   c = (Mat_MPIDense*)a->cmat->data;
1847   PetscCheck(!c->A,comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1848   PetscCall(MatDenseGetSubMatrix(a->A,cbegin,cend,&c->A));
1849   a->cmat->preallocated = PETSC_TRUE;
1850   a->cmat->assembled = PETSC_TRUE;
1851   a->matinuse = cbegin + 1;
1852   *v = a->cmat;
1853   PetscFunctionReturn(0);
1854 }
1855 
1856 PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A,Mat *v)
1857 {
1858   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
1859   Mat_MPIDense *c;
1860 
1861   PetscFunctionBegin;
1862   PetscCheck(a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetSubMatrix() first");
1863   PetscCheck(a->cmat,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal matrix");
1864   PetscCheck(*v == a->cmat,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Not the matrix obtained from MatDenseGetSubMatrix()");
1865   a->matinuse = 0;
1866   c = (Mat_MPIDense*)a->cmat->data;
1867   PetscCall(MatDenseRestoreSubMatrix(a->A,&c->A));
1868   *v = NULL;
1869   PetscFunctionReturn(0);
1870 }
1871 
1872 /*MC
1873    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1874 
1875    Options Database Keys:
1876 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions()
1877 
1878   Level: beginner
1879 
1880 .seealso: `MatCreateDense()`
1881 
1882 M*/
1883 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1884 {
1885   Mat_MPIDense   *a;
1886 
1887   PetscFunctionBegin;
1888   PetscCall(PetscNewLog(mat,&a));
1889   mat->data = (void*)a;
1890   PetscCall(PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps)));
1891 
1892   mat->insertmode = NOT_SET_VALUES;
1893 
1894   /* build cache for off array entries formed */
1895   a->donotstash = PETSC_FALSE;
1896 
1897   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash));
1898 
1899   /* stuff used for matrix vector multiply */
1900   a->lvec        = NULL;
1901   a->Mvctx       = NULL;
1902   a->roworiented = PETSC_TRUE;
1903 
1904   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense));
1905   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",MatDenseSetLDA_MPIDense));
1906   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense));
1907   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense));
1908   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense));
1909   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense));
1910   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",MatDenseGetArrayWrite_MPIDense));
1911   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",MatDenseRestoreArrayWrite_MPIDense));
1912   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense));
1913   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense));
1914   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",MatDenseReplaceArray_MPIDense));
1915   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense));
1916   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense));
1917   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense));
1918   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense));
1919   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense));
1920   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense));
1921   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",MatDenseGetSubMatrix_MPIDense));
1922   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",MatDenseRestoreSubMatrix_MPIDense));
1923   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",MatConvert_MPIAIJ_MPIDense));
1924   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpiaij_C",MatConvert_MPIDense_MPIAIJ));
1925 #if defined(PETSC_HAVE_ELEMENTAL)
1926   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental));
1927 #endif
1928 #if defined(PETSC_HAVE_SCALAPACK)
1929   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",MatConvert_Dense_ScaLAPACK));
1930 #endif
1931 #if defined(PETSC_HAVE_CUDA)
1932   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpidensecuda_C",MatConvert_MPIDense_MPIDenseCUDA));
1933 #endif
1934   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense));
1935   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense));
1936   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",MatProductSetFromOptions_MPIDense_MPIAIJ));
1937 #if defined(PETSC_HAVE_CUDA)
1938   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense));
1939   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ));
1940 #endif
1941 
1942   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense));
1943   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense));
1944   PetscCall(PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE));
1945   PetscFunctionReturn(0);
1946 }
1947 
1948 /*MC
1949    MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs.
1950 
1951    Options Database Keys:
1952 . -mat_type mpidensecuda - sets the matrix type to "mpidensecuda" during a call to MatSetFromOptions()
1953 
1954   Level: beginner
1955 
1956 .seealso:
1957 
1958 M*/
1959 #if defined(PETSC_HAVE_CUDA)
1960 #include <petsc/private/deviceimpl.h>
1961 PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B)
1962 {
1963   PetscFunctionBegin;
1964   PetscCall(PetscDeviceInitialize(PETSC_DEVICE_CUDA));
1965   PetscCall(MatCreate_MPIDense(B));
1966   PetscCall(MatConvert_MPIDense_MPIDenseCUDA(B,MATMPIDENSECUDA,MAT_INPLACE_MATRIX,&B));
1967   PetscFunctionReturn(0);
1968 }
1969 #endif
1970 
1971 /*MC
1972    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1973 
1974    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1975    and MATMPIDENSE otherwise.
1976 
1977    Options Database Keys:
1978 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1979 
1980   Level: beginner
1981 
1982 .seealso: `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA`
1983 M*/
1984 
1985 /*MC
1986    MATDENSECUDA - MATDENSECUDA = "densecuda" - A matrix type to be used for dense matrices on GPUs.
1987 
1988    This matrix type is identical to MATSEQDENSECUDA when constructed with a single process communicator,
1989    and MATMPIDENSECUDA otherwise.
1990 
1991    Options Database Keys:
1992 . -mat_type densecuda - sets the matrix type to "densecuda" during a call to MatSetFromOptions()
1993 
1994   Level: beginner
1995 
1996 .seealso: `MATSEQDENSECUDA`, `MATMPIDENSECUDA`, `MATDENSE`
1997 M*/
1998 
1999 /*@C
2000    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
2001 
2002    Collective
2003 
2004    Input Parameters:
2005 .  B - the matrix
2006 -  data - optional location of matrix data.  Set data=NULL for PETSc
2007    to control all matrix memory allocation.
2008 
2009    Notes:
2010    The dense format is fully compatible with standard Fortran 77
2011    storage by columns.
2012 
2013    The data input variable is intended primarily for Fortran programmers
2014    who wish to allocate their own matrix memory space.  Most users should
2015    set data=NULL.
2016 
2017    Level: intermediate
2018 
2019 .seealso: `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
2020 @*/
2021 PetscErrorCode  MatMPIDenseSetPreallocation(Mat B,PetscScalar *data)
2022 {
2023   PetscFunctionBegin;
2024   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2025   PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));
2026   PetscFunctionReturn(0);
2027 }
2028 
2029 /*@
2030    MatDensePlaceArray - Allows one to replace the array in a dense matrix with an
2031    array provided by the user. This is useful to avoid copying an array
2032    into a matrix
2033 
2034    Not Collective
2035 
2036    Input Parameters:
2037 +  mat - the matrix
2038 -  array - the array in column major order
2039 
2040    Notes:
2041    You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be
2042    freed when the matrix is destroyed.
2043 
2044    Level: developer
2045 
2046 .seealso: `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`
2047 
2048 @*/
2049 PetscErrorCode  MatDensePlaceArray(Mat mat,const PetscScalar *array)
2050 {
2051   PetscFunctionBegin;
2052   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2053   PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));
2054   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2055 #if defined(PETSC_HAVE_CUDA)
2056   mat->offloadmask = PETSC_OFFLOAD_CPU;
2057 #endif
2058   PetscFunctionReturn(0);
2059 }
2060 
2061 /*@
2062    MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray()
2063 
2064    Not Collective
2065 
2066    Input Parameters:
2067 .  mat - the matrix
2068 
2069    Notes:
2070    You can only call this after a call to MatDensePlaceArray()
2071 
2072    Level: developer
2073 
2074 .seealso: `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`
2075 
2076 @*/
2077 PetscErrorCode  MatDenseResetArray(Mat mat)
2078 {
2079   PetscFunctionBegin;
2080   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2081   PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));
2082   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2083   PetscFunctionReturn(0);
2084 }
2085 
2086 /*@
2087    MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an
2088    array provided by the user. This is useful to avoid copying an array
2089    into a matrix
2090 
2091    Not Collective
2092 
2093    Input Parameters:
2094 +  mat - the matrix
2095 -  array - the array in column major order
2096 
2097    Notes:
2098    The memory passed in MUST be obtained with PetscMalloc() and CANNOT be
2099    freed by the user. It will be freed when the matrix is destroyed.
2100 
2101    Level: developer
2102 
2103 .seealso: `MatDenseGetArray()`, `VecReplaceArray()`
2104 @*/
2105 PetscErrorCode  MatDenseReplaceArray(Mat mat,const PetscScalar *array)
2106 {
2107   PetscFunctionBegin;
2108   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2109   PetscUseMethod(mat,"MatDenseReplaceArray_C",(Mat,const PetscScalar*),(mat,array));
2110   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2111 #if defined(PETSC_HAVE_CUDA)
2112   mat->offloadmask = PETSC_OFFLOAD_CPU;
2113 #endif
2114   PetscFunctionReturn(0);
2115 }
2116 
2117 #if defined(PETSC_HAVE_CUDA)
2118 /*@C
2119    MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a dense matrix with an
2120    array provided by the user. This is useful to avoid copying an array
2121    into a matrix
2122 
2123    Not Collective
2124 
2125    Input Parameters:
2126 +  mat - the matrix
2127 -  array - the array in column major order
2128 
2129    Notes:
2130    You can return to the original array with a call to MatDenseCUDAResetArray(). The user is responsible for freeing this array; it will not be
2131    freed when the matrix is destroyed. The array must have been allocated with cudaMalloc().
2132 
2133    Level: developer
2134 
2135 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDAResetArray()`
2136 @*/
2137 PetscErrorCode  MatDenseCUDAPlaceArray(Mat mat,const PetscScalar *array)
2138 {
2139   PetscFunctionBegin;
2140   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2141   PetscUseMethod(mat,"MatDenseCUDAPlaceArray_C",(Mat,const PetscScalar*),(mat,array));
2142   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2143   mat->offloadmask = PETSC_OFFLOAD_GPU;
2144   PetscFunctionReturn(0);
2145 }
2146 
2147 /*@C
2148    MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to MatDenseCUDAPlaceArray()
2149 
2150    Not Collective
2151 
2152    Input Parameters:
2153 .  mat - the matrix
2154 
2155    Notes:
2156    You can only call this after a call to MatDenseCUDAPlaceArray()
2157 
2158    Level: developer
2159 
2160 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()`
2161 
2162 @*/
2163 PetscErrorCode  MatDenseCUDAResetArray(Mat mat)
2164 {
2165   PetscFunctionBegin;
2166   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2167   PetscUseMethod(mat,"MatDenseCUDAResetArray_C",(Mat),(mat));
2168   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2169   PetscFunctionReturn(0);
2170 }
2171 
2172 /*@C
2173    MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a dense matrix with an
2174    array provided by the user. This is useful to avoid copying an array
2175    into a matrix
2176 
2177    Not Collective
2178 
2179    Input Parameters:
2180 +  mat - the matrix
2181 -  array - the array in column major order
2182 
2183    Notes:
2184    This permanently replaces the GPU array and frees the memory associated with the old GPU array.
2185    The memory passed in CANNOT be freed by the user. It will be freed
2186    when the matrix is destroyed. The array should respect the matrix leading dimension.
2187 
2188    Level: developer
2189 
2190 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()`, `MatDenseCUDAResetArray()`
2191 @*/
2192 PetscErrorCode  MatDenseCUDAReplaceArray(Mat mat,const PetscScalar *array)
2193 {
2194   PetscFunctionBegin;
2195   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2196   PetscUseMethod(mat,"MatDenseCUDAReplaceArray_C",(Mat,const PetscScalar*),(mat,array));
2197   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2198   mat->offloadmask = PETSC_OFFLOAD_GPU;
2199   PetscFunctionReturn(0);
2200 }
2201 
2202 /*@C
2203    MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a dense matrix.
2204 
2205    Not Collective
2206 
2207    Input Parameters:
2208 .  A - the matrix
2209 
2210    Output Parameters
2211 .  array - the GPU array in column major order
2212 
2213    Notes:
2214    The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use MatDenseCUDAGetArray(). The array must be restored with MatDenseCUDARestoreArrayWrite() when no longer needed.
2215 
2216    Level: developer
2217 
2218 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayRead()`, `MatDenseCUDARestoreArrayRead()`
2219 @*/
2220 PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a)
2221 {
2222   PetscFunctionBegin;
2223   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2224   PetscUseMethod(A,"MatDenseCUDAGetArrayWrite_C",(Mat,PetscScalar**),(A,a));
2225   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2226   PetscFunctionReturn(0);
2227 }
2228 
2229 /*@C
2230    MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArrayWrite().
2231 
2232    Not Collective
2233 
2234    Input Parameters:
2235 +  A - the matrix
2236 -  array - the GPU array in column major order
2237 
2238    Notes:
2239 
2240    Level: developer
2241 
2242 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()`
2243 @*/
2244 PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a)
2245 {
2246   PetscFunctionBegin;
2247   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2248   PetscUseMethod(A,"MatDenseCUDARestoreArrayWrite_C",(Mat,PetscScalar**),(A,a));
2249   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2250   A->offloadmask = PETSC_OFFLOAD_GPU;
2251   PetscFunctionReturn(0);
2252 }
2253 
2254 /*@C
2255    MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArrayRead() when no longer needed.
2256 
2257    Not Collective
2258 
2259    Input Parameters:
2260 .  A - the matrix
2261 
2262    Output Parameters
2263 .  array - the GPU array in column major order
2264 
2265    Notes:
2266    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite().
2267 
2268    Level: developer
2269 
2270 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`
2271 @*/
2272 PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a)
2273 {
2274   PetscFunctionBegin;
2275   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2276   PetscUseMethod(A,"MatDenseCUDAGetArrayRead_C",(Mat,const PetscScalar**),(A,a));
2277   PetscFunctionReturn(0);
2278 }
2279 
2280 /*@C
2281    MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a dense matrix previously obtained with a call to MatDenseCUDAGetArrayRead().
2282 
2283    Not Collective
2284 
2285    Input Parameters:
2286 +  A - the matrix
2287 -  array - the GPU array in column major order
2288 
2289    Notes:
2290    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite().
2291 
2292    Level: developer
2293 
2294 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDAGetArrayRead()`
2295 @*/
2296 PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a)
2297 {
2298   PetscFunctionBegin;
2299   PetscUseMethod(A,"MatDenseCUDARestoreArrayRead_C",(Mat,const PetscScalar**),(A,a));
2300   PetscFunctionReturn(0);
2301 }
2302 
2303 /*@C
2304    MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArray() when no longer needed.
2305 
2306    Not Collective
2307 
2308    Input Parameters:
2309 .  A - the matrix
2310 
2311    Output Parameters
2312 .  array - the GPU array in column major order
2313 
2314    Notes:
2315    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). For read-only access, use MatDenseCUDAGetArrayRead().
2316 
2317    Level: developer
2318 
2319 .seealso: `MatDenseCUDAGetArrayRead()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`
2320 @*/
2321 PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a)
2322 {
2323   PetscFunctionBegin;
2324   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2325   PetscUseMethod(A,"MatDenseCUDAGetArray_C",(Mat,PetscScalar**),(A,a));
2326   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2327   PetscFunctionReturn(0);
2328 }
2329 
2330 /*@C
2331    MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArray().
2332 
2333    Not Collective
2334 
2335    Input Parameters:
2336 +  A - the matrix
2337 -  array - the GPU array in column major order
2338 
2339    Notes:
2340 
2341    Level: developer
2342 
2343 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()`
2344 @*/
2345 PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a)
2346 {
2347   PetscFunctionBegin;
2348   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2349   PetscUseMethod(A,"MatDenseCUDARestoreArray_C",(Mat,PetscScalar**),(A,a));
2350   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2351   A->offloadmask = PETSC_OFFLOAD_GPU;
2352   PetscFunctionReturn(0);
2353 }
2354 #endif
2355 
2356 /*@C
2357    MatCreateDense - Creates a matrix in dense format.
2358 
2359    Collective
2360 
2361    Input Parameters:
2362 +  comm - MPI communicator
2363 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2364 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2365 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
2366 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
2367 -  data - optional location of matrix data.  Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
2368    to control all matrix memory allocation.
2369 
2370    Output Parameter:
2371 .  A - the matrix
2372 
2373    Notes:
2374    The dense format is fully compatible with standard Fortran 77
2375    storage by columns.
2376    Note that, although local portions of the matrix are stored in column-major
2377    order, the matrix is partitioned across MPI ranks by row.
2378 
2379    The data input variable is intended primarily for Fortran programmers
2380    who wish to allocate their own matrix memory space.  Most users should
2381    set data=NULL (PETSC_NULL_SCALAR for Fortran users).
2382 
2383    The user MUST specify either the local or global matrix dimensions
2384    (possibly both).
2385 
2386    Level: intermediate
2387 
2388 .seealso: `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
2389 @*/
2390 PetscErrorCode  MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
2391 {
2392   PetscFunctionBegin;
2393   PetscCall(MatCreate(comm,A));
2394   PetscCall(MatSetSizes(*A,m,n,M,N));
2395   PetscCall(MatSetType(*A,MATDENSE));
2396   PetscCall(MatSeqDenseSetPreallocation(*A,data));
2397   PetscCall(MatMPIDenseSetPreallocation(*A,data));
2398   PetscFunctionReturn(0);
2399 }
2400 
2401 #if defined(PETSC_HAVE_CUDA)
2402 /*@C
2403    MatCreateDenseCUDA - Creates a matrix in dense format using CUDA.
2404 
2405    Collective
2406 
2407    Input Parameters:
2408 +  comm - MPI communicator
2409 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2410 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2411 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
2412 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
2413 -  data - optional location of GPU matrix data.  Set data=NULL for PETSc
2414    to control matrix memory allocation.
2415 
2416    Output Parameter:
2417 .  A - the matrix
2418 
2419    Notes:
2420 
2421    Level: intermediate
2422 
2423 .seealso: `MatCreate()`, `MatCreateDense()`
2424 @*/
2425 PetscErrorCode  MatCreateDenseCUDA(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
2426 {
2427   PetscFunctionBegin;
2428   PetscCall(MatCreate(comm,A));
2429   PetscValidLogicalCollectiveBool(*A,!!data,6);
2430   PetscCall(MatSetSizes(*A,m,n,M,N));
2431   PetscCall(MatSetType(*A,MATDENSECUDA));
2432   PetscCall(MatSeqDenseCUDASetPreallocation(*A,data));
2433   PetscCall(MatMPIDenseCUDASetPreallocation(*A,data));
2434   PetscFunctionReturn(0);
2435 }
2436 #endif
2437 
2438 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
2439 {
2440   Mat            mat;
2441   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
2442 
2443   PetscFunctionBegin;
2444   *newmat = NULL;
2445   PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&mat));
2446   PetscCall(MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N));
2447   PetscCall(MatSetType(mat,((PetscObject)A)->type_name));
2448   a       = (Mat_MPIDense*)mat->data;
2449 
2450   mat->factortype   = A->factortype;
2451   mat->assembled    = PETSC_TRUE;
2452   mat->preallocated = PETSC_TRUE;
2453 
2454   mat->insertmode = NOT_SET_VALUES;
2455   a->donotstash   = oldmat->donotstash;
2456 
2457   PetscCall(PetscLayoutReference(A->rmap,&mat->rmap));
2458   PetscCall(PetscLayoutReference(A->cmap,&mat->cmap));
2459 
2460   PetscCall(MatDuplicate(oldmat->A,cpvalues,&a->A));
2461   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A));
2462 
2463   *newmat = mat;
2464   PetscFunctionReturn(0);
2465 }
2466 
2467 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
2468 {
2469   PetscBool      isbinary;
2470 #if defined(PETSC_HAVE_HDF5)
2471   PetscBool      ishdf5;
2472 #endif
2473 
2474   PetscFunctionBegin;
2475   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
2476   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
2477   /* force binary viewer to load .info file if it has not yet done so */
2478   PetscCall(PetscViewerSetUp(viewer));
2479   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
2480 #if defined(PETSC_HAVE_HDF5)
2481   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5));
2482 #endif
2483   if (isbinary) {
2484     PetscCall(MatLoad_Dense_Binary(newMat,viewer));
2485 #if defined(PETSC_HAVE_HDF5)
2486   } else if (ishdf5) {
2487     PetscCall(MatLoad_Dense_HDF5(newMat,viewer));
2488 #endif
2489   } else SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
2490   PetscFunctionReturn(0);
2491 }
2492 
2493 static PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag)
2494 {
2495   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
2496   Mat            a,b;
2497 
2498   PetscFunctionBegin;
2499   a    = matA->A;
2500   b    = matB->A;
2501   PetscCall(MatEqual(a,b,flag));
2502   PetscCall(MPIU_Allreduce(MPI_IN_PLACE,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A)));
2503   PetscFunctionReturn(0);
2504 }
2505 
2506 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data)
2507 {
2508   Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data;
2509 
2510   PetscFunctionBegin;
2511   PetscCall(PetscFree2(atb->sendbuf,atb->recvcounts));
2512   PetscCall(MatDestroy(&atb->atb));
2513   PetscCall(PetscFree(atb));
2514   PetscFunctionReturn(0);
2515 }
2516 
2517 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data)
2518 {
2519   Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data;
2520 
2521   PetscFunctionBegin;
2522   PetscCall(PetscFree2(abt->buf[0],abt->buf[1]));
2523   PetscCall(PetscFree2(abt->recvcounts,abt->recvdispls));
2524   PetscCall(PetscFree(abt));
2525   PetscFunctionReturn(0);
2526 }
2527 
2528 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2529 {
2530   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2531   Mat_TransMatMultDense *atb;
2532   MPI_Comm              comm;
2533   PetscMPIInt           size,*recvcounts;
2534   PetscScalar           *carray,*sendbuf;
2535   const PetscScalar     *atbarray;
2536   PetscInt              i,cN=C->cmap->N,proc,k,j,lda;
2537   const PetscInt        *ranges;
2538 
2539   PetscFunctionBegin;
2540   MatCheckProduct(C,3);
2541   PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2542   atb = (Mat_TransMatMultDense *)C->product->data;
2543   recvcounts = atb->recvcounts;
2544   sendbuf = atb->sendbuf;
2545 
2546   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
2547   PetscCallMPI(MPI_Comm_size(comm,&size));
2548 
2549   /* compute atbarray = aseq^T * bseq */
2550   PetscCall(MatTransposeMatMult(a->A,b->A,atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX,PETSC_DEFAULT,&atb->atb));
2551 
2552   PetscCall(MatGetOwnershipRanges(C,&ranges));
2553 
2554   /* arrange atbarray into sendbuf */
2555   PetscCall(MatDenseGetArrayRead(atb->atb,&atbarray));
2556   PetscCall(MatDenseGetLDA(atb->atb,&lda));
2557   for (proc=0, k=0; proc<size; proc++) {
2558     for (j=0; j<cN; j++) {
2559       for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*lda];
2560     }
2561   }
2562   PetscCall(MatDenseRestoreArrayRead(atb->atb,&atbarray));
2563 
2564   /* sum all atbarray to local values of C */
2565   PetscCall(MatDenseGetArrayWrite(c->A,&carray));
2566   PetscCallMPI(MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm));
2567   PetscCall(MatDenseRestoreArrayWrite(c->A,&carray));
2568   PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY));
2569   PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY));
2570   PetscFunctionReturn(0);
2571 }
2572 
2573 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)
2574 {
2575   MPI_Comm              comm;
2576   PetscMPIInt           size;
2577   PetscInt              cm=A->cmap->n,cM,cN=B->cmap->N;
2578   Mat_TransMatMultDense *atb;
2579   PetscBool             cisdense;
2580   PetscInt              i;
2581   const PetscInt        *ranges;
2582 
2583   PetscFunctionBegin;
2584   MatCheckProduct(C,3);
2585   PetscCheck(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2586   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
2587   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
2588     SETERRQ(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2589   }
2590 
2591   /* create matrix product C */
2592   PetscCall(MatSetSizes(C,cm,B->cmap->n,A->cmap->N,B->cmap->N));
2593   PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,""));
2594   if (!cisdense) {
2595     PetscCall(MatSetType(C,((PetscObject)A)->type_name));
2596   }
2597   PetscCall(MatSetUp(C));
2598 
2599   /* create data structure for reuse C */
2600   PetscCallMPI(MPI_Comm_size(comm,&size));
2601   PetscCall(PetscNew(&atb));
2602   cM   = C->rmap->N;
2603   PetscCall(PetscMalloc2(cM*cN,&atb->sendbuf,size,&atb->recvcounts));
2604   PetscCall(MatGetOwnershipRanges(C,&ranges));
2605   for (i=0; i<size; i++) atb->recvcounts[i] = (ranges[i+1] - ranges[i])*cN;
2606 
2607   C->product->data    = atb;
2608   C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
2609   PetscFunctionReturn(0);
2610 }
2611 
2612 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2613 {
2614   MPI_Comm              comm;
2615   PetscMPIInt           i, size;
2616   PetscInt              maxRows, bufsiz;
2617   PetscMPIInt           tag;
2618   PetscInt              alg;
2619   Mat_MatTransMultDense *abt;
2620   Mat_Product           *product = C->product;
2621   PetscBool             flg;
2622 
2623   PetscFunctionBegin;
2624   MatCheckProduct(C,4);
2625   PetscCheck(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2626   /* check local size of A and B */
2627   PetscCheck(A->cmap->n == B->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")",A->cmap->n,B->cmap->n);
2628 
2629   PetscCall(PetscStrcmp(product->alg,"allgatherv",&flg));
2630   alg  = flg ? 0 : 1;
2631 
2632   /* setup matrix product C */
2633   PetscCall(MatSetSizes(C,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N));
2634   PetscCall(MatSetType(C,MATMPIDENSE));
2635   PetscCall(MatSetUp(C));
2636   PetscCall(PetscObjectGetNewTag((PetscObject)C,&tag));
2637 
2638   /* create data structure for reuse C */
2639   PetscCall(PetscObjectGetComm((PetscObject)C,&comm));
2640   PetscCallMPI(MPI_Comm_size(comm,&size));
2641   PetscCall(PetscNew(&abt));
2642   abt->tag = tag;
2643   abt->alg = alg;
2644   switch (alg) {
2645   case 1: /* alg: "cyclic" */
2646     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i]));
2647     bufsiz = A->cmap->N * maxRows;
2648     PetscCall(PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1])));
2649     break;
2650   default: /* alg: "allgatherv" */
2651     PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1])));
2652     PetscCall(PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls)));
2653     for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N;
2654     for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i];
2655     break;
2656   }
2657 
2658   C->product->data    = abt;
2659   C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
2660   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense;
2661   PetscFunctionReturn(0);
2662 }
2663 
2664 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
2665 {
2666   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2667   Mat_MatTransMultDense *abt;
2668   MPI_Comm              comm;
2669   PetscMPIInt           rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom;
2670   PetscScalar           *sendbuf, *recvbuf=NULL, *cv;
2671   PetscInt              i,cK=A->cmap->N,k,j,bn;
2672   PetscScalar           _DOne=1.0,_DZero=0.0;
2673   const PetscScalar     *av,*bv;
2674   PetscBLASInt          cm, cn, ck, alda, blda = 0, clda;
2675   MPI_Request           reqs[2];
2676   const PetscInt        *ranges;
2677 
2678   PetscFunctionBegin;
2679   MatCheckProduct(C,3);
2680   PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2681   abt  = (Mat_MatTransMultDense*)C->product->data;
2682   PetscCall(PetscObjectGetComm((PetscObject)C,&comm));
2683   PetscCallMPI(MPI_Comm_rank(comm,&rank));
2684   PetscCallMPI(MPI_Comm_size(comm,&size));
2685   PetscCall(MatDenseGetArrayRead(a->A,&av));
2686   PetscCall(MatDenseGetArrayRead(b->A,&bv));
2687   PetscCall(MatDenseGetArrayWrite(c->A,&cv));
2688   PetscCall(MatDenseGetLDA(a->A,&i));
2689   PetscCall(PetscBLASIntCast(i,&alda));
2690   PetscCall(MatDenseGetLDA(b->A,&i));
2691   PetscCall(PetscBLASIntCast(i,&blda));
2692   PetscCall(MatDenseGetLDA(c->A,&i));
2693   PetscCall(PetscBLASIntCast(i,&clda));
2694   PetscCall(MatGetOwnershipRanges(B,&ranges));
2695   bn   = B->rmap->n;
2696   if (blda == bn) {
2697     sendbuf = (PetscScalar*)bv;
2698   } else {
2699     sendbuf = abt->buf[0];
2700     for (k = 0, i = 0; i < cK; i++) {
2701       for (j = 0; j < bn; j++, k++) {
2702         sendbuf[k] = bv[i * blda + j];
2703       }
2704     }
2705   }
2706   if (size > 1) {
2707     sendto = (rank + size - 1) % size;
2708     recvfrom = (rank + size + 1) % size;
2709   } else {
2710     sendto = recvfrom = 0;
2711   }
2712   PetscCall(PetscBLASIntCast(cK,&ck));
2713   PetscCall(PetscBLASIntCast(c->A->rmap->n,&cm));
2714   recvisfrom = rank;
2715   for (i = 0; i < size; i++) {
2716     /* we have finished receiving in sending, bufs can be read/modified */
2717     PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2718     PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
2719 
2720     if (nextrecvisfrom != rank) {
2721       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2722       sendsiz = cK * bn;
2723       recvsiz = cK * nextbn;
2724       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
2725       PetscCallMPI(MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]));
2726       PetscCallMPI(MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]));
2727     }
2728 
2729     /* local aseq * sendbuf^T */
2730     PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn));
2731     if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,av,&alda,sendbuf,&cn,&_DZero,cv + clda * ranges[recvisfrom],&clda));
2732 
2733     if (nextrecvisfrom != rank) {
2734       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
2735       PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE));
2736     }
2737     bn = nextbn;
2738     recvisfrom = nextrecvisfrom;
2739     sendbuf = recvbuf;
2740   }
2741   PetscCall(MatDenseRestoreArrayRead(a->A,&av));
2742   PetscCall(MatDenseRestoreArrayRead(b->A,&bv));
2743   PetscCall(MatDenseRestoreArrayWrite(c->A,&cv));
2744   PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY));
2745   PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY));
2746   PetscFunctionReturn(0);
2747 }
2748 
2749 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
2750 {
2751   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2752   Mat_MatTransMultDense *abt;
2753   MPI_Comm              comm;
2754   PetscMPIInt           size;
2755   PetscScalar           *cv, *sendbuf, *recvbuf;
2756   const PetscScalar     *av,*bv;
2757   PetscInt              blda,i,cK=A->cmap->N,k,j,bn;
2758   PetscScalar           _DOne=1.0,_DZero=0.0;
2759   PetscBLASInt          cm, cn, ck, alda, clda;
2760 
2761   PetscFunctionBegin;
2762   MatCheckProduct(C,3);
2763   PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2764   abt  = (Mat_MatTransMultDense*)C->product->data;
2765   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
2766   PetscCallMPI(MPI_Comm_size(comm,&size));
2767   PetscCall(MatDenseGetArrayRead(a->A,&av));
2768   PetscCall(MatDenseGetArrayRead(b->A,&bv));
2769   PetscCall(MatDenseGetArrayWrite(c->A,&cv));
2770   PetscCall(MatDenseGetLDA(a->A,&i));
2771   PetscCall(PetscBLASIntCast(i,&alda));
2772   PetscCall(MatDenseGetLDA(b->A,&blda));
2773   PetscCall(MatDenseGetLDA(c->A,&i));
2774   PetscCall(PetscBLASIntCast(i,&clda));
2775   /* copy transpose of B into buf[0] */
2776   bn      = B->rmap->n;
2777   sendbuf = abt->buf[0];
2778   recvbuf = abt->buf[1];
2779   for (k = 0, j = 0; j < bn; j++) {
2780     for (i = 0; i < cK; i++, k++) {
2781       sendbuf[k] = bv[i * blda + j];
2782     }
2783   }
2784   PetscCall(MatDenseRestoreArrayRead(b->A,&bv));
2785   PetscCallMPI(MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm));
2786   PetscCall(PetscBLASIntCast(cK,&ck));
2787   PetscCall(PetscBLASIntCast(c->A->rmap->n,&cm));
2788   PetscCall(PetscBLASIntCast(c->A->cmap->n,&cn));
2789   if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,av,&alda,recvbuf,&ck,&_DZero,cv,&clda));
2790   PetscCall(MatDenseRestoreArrayRead(a->A,&av));
2791   PetscCall(MatDenseRestoreArrayRead(b->A,&bv));
2792   PetscCall(MatDenseRestoreArrayWrite(c->A,&cv));
2793   PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY));
2794   PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY));
2795   PetscFunctionReturn(0);
2796 }
2797 
2798 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2799 {
2800   Mat_MatTransMultDense *abt;
2801 
2802   PetscFunctionBegin;
2803   MatCheckProduct(C,3);
2804   PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2805   abt = (Mat_MatTransMultDense*)C->product->data;
2806   switch (abt->alg) {
2807   case 1:
2808     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C));
2809     break;
2810   default:
2811     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C));
2812     break;
2813   }
2814   PetscFunctionReturn(0);
2815 }
2816 
2817 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data)
2818 {
2819   Mat_MatMultDense *ab = (Mat_MatMultDense*)data;
2820 
2821   PetscFunctionBegin;
2822   PetscCall(MatDestroy(&ab->Ce));
2823   PetscCall(MatDestroy(&ab->Ae));
2824   PetscCall(MatDestroy(&ab->Be));
2825   PetscCall(PetscFree(ab));
2826   PetscFunctionReturn(0);
2827 }
2828 
2829 #if defined(PETSC_HAVE_ELEMENTAL)
2830 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2831 {
2832   Mat_MatMultDense *ab;
2833 
2834   PetscFunctionBegin;
2835   MatCheckProduct(C,3);
2836   PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Missing product data");
2837   ab   = (Mat_MatMultDense*)C->product->data;
2838   PetscCall(MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae));
2839   PetscCall(MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be));
2840   PetscCall(MatMatMultNumeric_Elemental(ab->Ae,ab->Be,ab->Ce));
2841   PetscCall(MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C));
2842   PetscFunctionReturn(0);
2843 }
2844 
2845 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)
2846 {
2847   Mat              Ae,Be,Ce;
2848   Mat_MatMultDense *ab;
2849 
2850   PetscFunctionBegin;
2851   MatCheckProduct(C,4);
2852   PetscCheck(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2853   /* check local size of A and B */
2854   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
2855     SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2856   }
2857 
2858   /* create elemental matrices Ae and Be */
2859   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Ae));
2860   PetscCall(MatSetSizes(Ae,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N));
2861   PetscCall(MatSetType(Ae,MATELEMENTAL));
2862   PetscCall(MatSetUp(Ae));
2863   PetscCall(MatSetOption(Ae,MAT_ROW_ORIENTED,PETSC_FALSE));
2864 
2865   PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &Be));
2866   PetscCall(MatSetSizes(Be,PETSC_DECIDE,PETSC_DECIDE,B->rmap->N,B->cmap->N));
2867   PetscCall(MatSetType(Be,MATELEMENTAL));
2868   PetscCall(MatSetUp(Be));
2869   PetscCall(MatSetOption(Be,MAT_ROW_ORIENTED,PETSC_FALSE));
2870 
2871   /* compute symbolic Ce = Ae*Be */
2872   PetscCall(MatCreate(PetscObjectComm((PetscObject)C),&Ce));
2873   PetscCall(MatMatMultSymbolic_Elemental(Ae,Be,fill,Ce));
2874 
2875   /* setup C */
2876   PetscCall(MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE));
2877   PetscCall(MatSetType(C,MATDENSE));
2878   PetscCall(MatSetUp(C));
2879 
2880   /* create data structure for reuse Cdense */
2881   PetscCall(PetscNew(&ab));
2882   ab->Ae = Ae;
2883   ab->Be = Be;
2884   ab->Ce = Ce;
2885 
2886   C->product->data    = ab;
2887   C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense;
2888   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
2889   PetscFunctionReturn(0);
2890 }
2891 #endif
2892 /* ----------------------------------------------- */
2893 #if defined(PETSC_HAVE_ELEMENTAL)
2894 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C)
2895 {
2896   PetscFunctionBegin;
2897   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense;
2898   C->ops->productsymbolic = MatProductSymbolic_AB;
2899   PetscFunctionReturn(0);
2900 }
2901 #endif
2902 
2903 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C)
2904 {
2905   Mat_Product *product = C->product;
2906   Mat         A = product->A,B=product->B;
2907 
2908   PetscFunctionBegin;
2909   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend)
2910     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2911   C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense;
2912   C->ops->productsymbolic = MatProductSymbolic_AtB;
2913   PetscFunctionReturn(0);
2914 }
2915 
2916 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C)
2917 {
2918   Mat_Product    *product = C->product;
2919   const char     *algTypes[2] = {"allgatherv","cyclic"};
2920   PetscInt       alg,nalg = 2;
2921   PetscBool      flg = PETSC_FALSE;
2922 
2923   PetscFunctionBegin;
2924   /* Set default algorithm */
2925   alg = 0; /* default is allgatherv */
2926   PetscCall(PetscStrcmp(product->alg,"default",&flg));
2927   if (flg) {
2928     PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]));
2929   }
2930 
2931   /* Get runtime option */
2932   if (product->api_user) {
2933     PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatTransposeMult","Mat");
2934     PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[alg],&alg,&flg));
2935     PetscOptionsEnd();
2936   } else {
2937     PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABt","Mat");
2938     PetscCall(PetscOptionsEList("-mat_product_algorithm","Algorithmic approach","MatProduct_ABt",algTypes,nalg,algTypes[alg],&alg,&flg));
2939     PetscOptionsEnd();
2940   }
2941   if (flg) {
2942     PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]));
2943   }
2944 
2945   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense;
2946   C->ops->productsymbolic          = MatProductSymbolic_ABt;
2947   PetscFunctionReturn(0);
2948 }
2949 
2950 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C)
2951 {
2952   Mat_Product    *product = C->product;
2953 
2954   PetscFunctionBegin;
2955   switch (product->type) {
2956 #if defined(PETSC_HAVE_ELEMENTAL)
2957   case MATPRODUCT_AB:
2958     PetscCall(MatProductSetFromOptions_MPIDense_AB(C));
2959     break;
2960 #endif
2961   case MATPRODUCT_AtB:
2962     PetscCall(MatProductSetFromOptions_MPIDense_AtB(C));
2963     break;
2964   case MATPRODUCT_ABt:
2965     PetscCall(MatProductSetFromOptions_MPIDense_ABt(C));
2966     break;
2967   default:
2968     break;
2969   }
2970   PetscFunctionReturn(0);
2971 }
2972