xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 2f605a994c568b341eedf7d10867052a20765ec2)
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4 */
5 
6 #include <../src/mat/impls/dense/mpi/mpidense.h>    /*I   "petscmat.h"  I*/
7 #include <../src/mat/impls/aij/mpi/mpiaij.h>
8 #include <petscblaslapack.h>
9 
10 /*@
11 
12       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
13               matrix that represents the operator. For sequential matrices it returns itself.
14 
15     Input Parameter:
16 .      A - the Seq or MPI dense matrix
17 
18     Output Parameter:
19 .      B - the inner matrix
20 
21     Level: intermediate
22 
23 @*/
24 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
25 {
26   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
27   PetscBool      flg;
28 
29   PetscFunctionBegin;
30   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
31   PetscValidPointer(B,2);
32   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A,MATMPIDENSE,&flg));
33   if (flg) *B = mat->A;
34   else {
35     PetscCall(PetscObjectBaseTypeCompare((PetscObject)A,MATSEQDENSE,&flg));
36     PetscCheck(flg,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Not for matrix type %s",((PetscObject)A)->type_name);
37     *B = A;
38   }
39   PetscFunctionReturn(0);
40 }
41 
42 PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s)
43 {
44   Mat_MPIDense   *Amat = (Mat_MPIDense*)A->data;
45   Mat_MPIDense   *Bmat = (Mat_MPIDense*)B->data;
46 
47   PetscFunctionBegin;
48   PetscCall(MatCopy(Amat->A,Bmat->A,s));
49   PetscFunctionReturn(0);
50 }
51 
52 PetscErrorCode MatShift_MPIDense(Mat A,PetscScalar alpha)
53 {
54   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
55   PetscInt       j,lda,rstart = A->rmap->rstart,rend = A->rmap->rend,rend2;
56   PetscScalar    *v;
57 
58   PetscFunctionBegin;
59   PetscCall(MatDenseGetArray(mat->A,&v));
60   PetscCall(MatDenseGetLDA(mat->A,&lda));
61   rend2 = PetscMin(rend,A->cmap->N);
62   if (rend2>rstart) {
63     for (j=rstart; j<rend2; j++) v[j-rstart+j*lda] += alpha;
64     PetscCall(PetscLogFlops(rend2-rstart));
65   }
66   PetscCall(MatDenseRestoreArray(mat->A,&v));
67   PetscFunctionReturn(0);
68 }
69 
70 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
71 {
72   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
73   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
74 
75   PetscFunctionBegin;
76   PetscCheck(row >= rstart && row < rend,PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
77   lrow = row - rstart;
78   PetscCall(MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v));
79   PetscFunctionReturn(0);
80 }
81 
82 PetscErrorCode MatRestoreRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
83 {
84   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
85   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
86 
87   PetscFunctionBegin;
88   PetscCheck(row >= rstart && row < rend,PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
89   lrow = row - rstart;
90   PetscCall(MatRestoreRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v));
91   PetscFunctionReturn(0);
92 }
93 
94 PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
95 {
96   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
97   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
98   PetscScalar    *array;
99   MPI_Comm       comm;
100   PetscBool      flg;
101   Mat            B;
102 
103   PetscFunctionBegin;
104   PetscCall(MatHasCongruentLayouts(A,&flg));
105   PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
106   PetscCall(PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B));
107   if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */
108 
109     PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A,MATSEQDENSECUDA,&flg));
110     PetscCheck(!flg,PETSC_COMM_SELF,PETSC_ERR_SUP,"Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature",MATSEQDENSECUDA);
111     PetscCall(PetscObjectGetComm((PetscObject)(mdn->A),&comm));
112     PetscCall(MatCreate(comm,&B));
113     PetscCall(MatSetSizes(B,m,m,m,m));
114     PetscCall(MatSetType(B,((PetscObject)mdn->A)->type_name));
115     PetscCall(MatDenseGetArrayRead(mdn->A,(const PetscScalar**)&array));
116     PetscCall(MatSeqDenseSetPreallocation(B,array+m*rstart));
117     PetscCall(MatDenseRestoreArrayRead(mdn->A,(const PetscScalar**)&array));
118     PetscCall(PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B));
119     *a   = B;
120     PetscCall(MatDestroy(&B));
121   } else *a = B;
122   PetscFunctionReturn(0);
123 }
124 
125 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
126 {
127   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
128   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
129   PetscBool      roworiented = A->roworiented;
130 
131   PetscFunctionBegin;
132   for (i=0; i<m; i++) {
133     if (idxm[i] < 0) continue;
134     PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
135     if (idxm[i] >= rstart && idxm[i] < rend) {
136       row = idxm[i] - rstart;
137       if (roworiented) {
138         PetscCall(MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv));
139       } else {
140         for (j=0; j<n; j++) {
141           if (idxn[j] < 0) continue;
142           PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
143           PetscCall(MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv));
144         }
145       }
146     } else if (!A->donotstash) {
147       mat->assembled = PETSC_FALSE;
148       if (roworiented) {
149         PetscCall(MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE));
150       } else {
151         PetscCall(MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE));
152       }
153     }
154   }
155   PetscFunctionReturn(0);
156 }
157 
158 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
159 {
160   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
161   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
162 
163   PetscFunctionBegin;
164   for (i=0; i<m; i++) {
165     if (idxm[i] < 0) continue; /* negative row */
166     PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
167     if (idxm[i] >= rstart && idxm[i] < rend) {
168       row = idxm[i] - rstart;
169       for (j=0; j<n; j++) {
170         if (idxn[j] < 0) continue; /* negative column */
171         PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
172         PetscCall(MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j));
173       }
174     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
175   }
176   PetscFunctionReturn(0);
177 }
178 
179 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda)
180 {
181   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
182 
183   PetscFunctionBegin;
184   PetscCall(MatDenseGetLDA(a->A,lda));
185   PetscFunctionReturn(0);
186 }
187 
188 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A,PetscInt lda)
189 {
190   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
191   PetscBool      iscuda;
192 
193   PetscFunctionBegin;
194   if (!a->A) {
195     PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
196     PetscCall(PetscLayoutSetUp(A->rmap));
197     PetscCall(PetscLayoutSetUp(A->cmap));
198     PetscCall(MatCreate(PETSC_COMM_SELF,&a->A));
199     PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->A));
200     PetscCall(MatSetSizes(a->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N));
201     PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda));
202     PetscCall(MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE));
203   }
204   PetscCall(MatDenseSetLDA(a->A,lda));
205   PetscFunctionReturn(0);
206 }
207 
208 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar **array)
209 {
210   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
211 
212   PetscFunctionBegin;
213   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
214   PetscCall(MatDenseGetArray(a->A,array));
215   PetscFunctionReturn(0);
216 }
217 
218 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar **array)
219 {
220   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
221 
222   PetscFunctionBegin;
223   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
224   PetscCall(MatDenseGetArrayRead(a->A,array));
225   PetscFunctionReturn(0);
226 }
227 
228 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A,PetscScalar **array)
229 {
230   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
231 
232   PetscFunctionBegin;
233   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
234   PetscCall(MatDenseGetArrayWrite(a->A,array));
235   PetscFunctionReturn(0);
236 }
237 
238 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar *array)
239 {
240   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
241 
242   PetscFunctionBegin;
243   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
244   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
245   PetscCall(MatDensePlaceArray(a->A,array));
246   PetscFunctionReturn(0);
247 }
248 
249 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
250 {
251   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
252 
253   PetscFunctionBegin;
254   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
255   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
256   PetscCall(MatDenseResetArray(a->A));
257   PetscFunctionReturn(0);
258 }
259 
260 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A,const PetscScalar *array)
261 {
262   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
263 
264   PetscFunctionBegin;
265   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
266   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
267   PetscCall(MatDenseReplaceArray(a->A,array));
268   PetscFunctionReturn(0);
269 }
270 
271 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
272 {
273   Mat_MPIDense      *mat  = (Mat_MPIDense*)A->data,*newmatd;
274   PetscInt          lda,i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
275   const PetscInt    *irow,*icol;
276   const PetscScalar *v;
277   PetscScalar       *bv;
278   Mat               newmat;
279   IS                iscol_local;
280   MPI_Comm          comm_is,comm_mat;
281 
282   PetscFunctionBegin;
283   PetscCall(PetscObjectGetComm((PetscObject)A,&comm_mat));
284   PetscCall(PetscObjectGetComm((PetscObject)iscol,&comm_is));
285   PetscCheck(comm_mat == comm_is,PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator");
286 
287   PetscCall(ISAllGather(iscol,&iscol_local));
288   PetscCall(ISGetIndices(isrow,&irow));
289   PetscCall(ISGetIndices(iscol_local,&icol));
290   PetscCall(ISGetLocalSize(isrow,&nrows));
291   PetscCall(ISGetLocalSize(iscol,&ncols));
292   PetscCall(ISGetSize(iscol,&Ncols)); /* global number of columns, size of iscol_local */
293 
294   /* No parallel redistribution currently supported! Should really check each index set
295      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
296      original matrix! */
297 
298   PetscCall(MatGetLocalSize(A,&nlrows,&nlcols));
299   PetscCall(MatGetOwnershipRange(A,&rstart,&rend));
300 
301   /* Check submatrix call */
302   if (scall == MAT_REUSE_MATRIX) {
303     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
304     /* Really need to test rows and column sizes! */
305     newmat = *B;
306   } else {
307     /* Create and fill new matrix */
308     PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&newmat));
309     PetscCall(MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols));
310     PetscCall(MatSetType(newmat,((PetscObject)A)->type_name));
311     PetscCall(MatMPIDenseSetPreallocation(newmat,NULL));
312   }
313 
314   /* Now extract the data pointers and do the copy, column at a time */
315   newmatd = (Mat_MPIDense*)newmat->data;
316   PetscCall(MatDenseGetArray(newmatd->A,&bv));
317   PetscCall(MatDenseGetArrayRead(mat->A,&v));
318   PetscCall(MatDenseGetLDA(mat->A,&lda));
319   for (i=0; i<Ncols; i++) {
320     const PetscScalar *av = v + lda*icol[i];
321     for (j=0; j<nrows; j++) {
322       *bv++ = av[irow[j] - rstart];
323     }
324   }
325   PetscCall(MatDenseRestoreArrayRead(mat->A,&v));
326   PetscCall(MatDenseRestoreArray(newmatd->A,&bv));
327 
328   /* Assemble the matrices so that the correct flags are set */
329   PetscCall(MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY));
330   PetscCall(MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY));
331 
332   /* Free work space */
333   PetscCall(ISRestoreIndices(isrow,&irow));
334   PetscCall(ISRestoreIndices(iscol_local,&icol));
335   PetscCall(ISDestroy(&iscol_local));
336   *B   = newmat;
337   PetscFunctionReturn(0);
338 }
339 
340 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar **array)
341 {
342   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
343 
344   PetscFunctionBegin;
345   PetscCall(MatDenseRestoreArray(a->A,array));
346   PetscFunctionReturn(0);
347 }
348 
349 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar **array)
350 {
351   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
352 
353   PetscFunctionBegin;
354   PetscCall(MatDenseRestoreArrayRead(a->A,array));
355   PetscFunctionReturn(0);
356 }
357 
358 PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A,PetscScalar **array)
359 {
360   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
361 
362   PetscFunctionBegin;
363   PetscCall(MatDenseRestoreArrayWrite(a->A,array));
364   PetscFunctionReturn(0);
365 }
366 
367 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
368 {
369   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
370   PetscInt       nstash,reallocs;
371 
372   PetscFunctionBegin;
373   if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
374 
375   PetscCall(MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range));
376   PetscCall(MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs));
377   PetscCall(PetscInfo(mdn->A,"Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs));
378   PetscFunctionReturn(0);
379 }
380 
381 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
382 {
383   Mat_MPIDense   *mdn=(Mat_MPIDense*)mat->data;
384   PetscInt       i,*row,*col,flg,j,rstart,ncols;
385   PetscMPIInt    n;
386   PetscScalar    *val;
387 
388   PetscFunctionBegin;
389   if (!mdn->donotstash && !mat->nooffprocentries) {
390     /*  wait on receives */
391     while (1) {
392       PetscCall(MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg));
393       if (!flg) break;
394 
395       for (i=0; i<n;) {
396         /* Now identify the consecutive vals belonging to the same row */
397         for (j=i,rstart=row[j]; j<n; j++) {
398           if (row[j] != rstart) break;
399         }
400         if (j < n) ncols = j-i;
401         else       ncols = n-i;
402         /* Now assemble all these values with a single function call */
403         PetscCall(MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode));
404         i    = j;
405       }
406     }
407     PetscCall(MatStashScatterEnd_Private(&mat->stash));
408   }
409 
410   PetscCall(MatAssemblyBegin(mdn->A,mode));
411   PetscCall(MatAssemblyEnd(mdn->A,mode));
412   PetscFunctionReturn(0);
413 }
414 
415 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
416 {
417   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
418 
419   PetscFunctionBegin;
420   PetscCall(MatZeroEntries(l->A));
421   PetscFunctionReturn(0);
422 }
423 
424 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt n,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
425 {
426   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
427   PetscInt          i,len,*lrows;
428 
429   PetscFunctionBegin;
430   /* get locally owned rows */
431   PetscCall(PetscLayoutMapLocal(A->rmap,n,rows,&len,&lrows,NULL));
432   /* fix right hand side if needed */
433   if (x && b) {
434     const PetscScalar *xx;
435     PetscScalar       *bb;
436 
437     PetscCall(VecGetArrayRead(x, &xx));
438     PetscCall(VecGetArrayWrite(b, &bb));
439     for (i=0;i<len;++i) bb[lrows[i]] = diag*xx[lrows[i]];
440     PetscCall(VecRestoreArrayRead(x, &xx));
441     PetscCall(VecRestoreArrayWrite(b, &bb));
442   }
443   PetscCall(MatZeroRows(l->A,len,lrows,0.0,NULL,NULL));
444   if (diag != 0.0) {
445     Vec d;
446 
447     PetscCall(MatCreateVecs(A,NULL,&d));
448     PetscCall(VecSet(d,diag));
449     PetscCall(MatDiagonalSet(A,d,INSERT_VALUES));
450     PetscCall(VecDestroy(&d));
451   }
452   PetscCall(PetscFree(lrows));
453   PetscFunctionReturn(0);
454 }
455 
456 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec);
457 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec);
458 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec);
459 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec);
460 
461 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
462 {
463   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
464   const PetscScalar *ax;
465   PetscScalar       *ay;
466   PetscMemType      axmtype,aymtype;
467 
468   PetscFunctionBegin;
469   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
470   PetscCall(VecGetArrayReadAndMemType(xx,&ax,&axmtype));
471   PetscCall(VecGetArrayAndMemType(mdn->lvec,&ay,&aymtype));
472   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPI_REPLACE));
473   PetscCall(PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay,MPI_REPLACE));
474   PetscCall(VecRestoreArrayAndMemType(mdn->lvec,&ay));
475   PetscCall(VecRestoreArrayReadAndMemType(xx,&ax));
476   PetscCall((*mdn->A->ops->mult)(mdn->A,mdn->lvec,yy));
477   PetscFunctionReturn(0);
478 }
479 
480 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
481 {
482   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
483   const PetscScalar *ax;
484   PetscScalar       *ay;
485   PetscMemType      axmtype,aymtype;
486 
487   PetscFunctionBegin;
488   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
489   PetscCall(VecGetArrayReadAndMemType(xx,&ax,&axmtype));
490   PetscCall(VecGetArrayAndMemType(mdn->lvec,&ay,&aymtype));
491   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPI_REPLACE));
492   PetscCall(PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay,MPI_REPLACE));
493   PetscCall(VecRestoreArrayAndMemType(mdn->lvec,&ay));
494   PetscCall(VecRestoreArrayReadAndMemType(xx,&ax));
495   PetscCall((*mdn->A->ops->multadd)(mdn->A,mdn->lvec,yy,zz));
496   PetscFunctionReturn(0);
497 }
498 
499 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
500 {
501   Mat_MPIDense      *a = (Mat_MPIDense*)A->data;
502   const PetscScalar *ax;
503   PetscScalar       *ay;
504   PetscMemType      axmtype,aymtype;
505 
506   PetscFunctionBegin;
507   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
508   PetscCall(VecSet(yy,0.0));
509   PetscCall((*a->A->ops->multtranspose)(a->A,xx,a->lvec));
510   PetscCall(VecGetArrayReadAndMemType(a->lvec,&ax,&axmtype));
511   PetscCall(VecGetArrayAndMemType(yy,&ay,&aymtype));
512   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPIU_SUM));
513   PetscCall(PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM));
514   PetscCall(VecRestoreArrayReadAndMemType(a->lvec,&ax));
515   PetscCall(VecRestoreArrayAndMemType(yy,&ay));
516   PetscFunctionReturn(0);
517 }
518 
519 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
520 {
521   Mat_MPIDense      *a = (Mat_MPIDense*)A->data;
522   const PetscScalar *ax;
523   PetscScalar       *ay;
524   PetscMemType      axmtype,aymtype;
525 
526   PetscFunctionBegin;
527   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
528   PetscCall(VecCopy(yy,zz));
529   PetscCall((*a->A->ops->multtranspose)(a->A,xx,a->lvec));
530   PetscCall(VecGetArrayReadAndMemType(a->lvec,&ax,&axmtype));
531   PetscCall(VecGetArrayAndMemType(zz,&ay,&aymtype));
532   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPIU_SUM));
533   PetscCall(PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM));
534   PetscCall(VecRestoreArrayReadAndMemType(a->lvec,&ax));
535   PetscCall(VecRestoreArrayAndMemType(zz,&ay));
536   PetscFunctionReturn(0);
537 }
538 
539 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
540 {
541   Mat_MPIDense      *a    = (Mat_MPIDense*)A->data;
542   PetscInt          lda,len,i,n,m = A->rmap->n,radd;
543   PetscScalar       *x,zero = 0.0;
544   const PetscScalar *av;
545 
546   PetscFunctionBegin;
547   PetscCall(VecSet(v,zero));
548   PetscCall(VecGetArray(v,&x));
549   PetscCall(VecGetSize(v,&n));
550   PetscCheck(n == A->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
551   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
552   radd = A->rmap->rstart*m;
553   PetscCall(MatDenseGetArrayRead(a->A,&av));
554   PetscCall(MatDenseGetLDA(a->A,&lda));
555   for (i=0; i<len; i++) {
556     x[i] = av[radd + i*lda + i];
557   }
558   PetscCall(MatDenseRestoreArrayRead(a->A,&av));
559   PetscCall(VecRestoreArray(v,&x));
560   PetscFunctionReturn(0);
561 }
562 
563 PetscErrorCode MatDestroy_MPIDense(Mat mat)
564 {
565   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
566 
567   PetscFunctionBegin;
568 #if defined(PETSC_USE_LOG)
569   PetscLogObjectState((PetscObject)mat,"Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT,mat->rmap->N,mat->cmap->N);
570 #endif
571   PetscCall(MatStashDestroy_Private(&mat->stash));
572   PetscCheck(!mdn->vecinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
573   PetscCheck(!mdn->matinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
574   PetscCall(MatDestroy(&mdn->A));
575   PetscCall(VecDestroy(&mdn->lvec));
576   PetscCall(PetscSFDestroy(&mdn->Mvctx));
577   PetscCall(VecDestroy(&mdn->cvec));
578   PetscCall(MatDestroy(&mdn->cmat));
579 
580   PetscCall(PetscFree(mat->data));
581   PetscCall(PetscObjectChangeTypeName((PetscObject)mat,NULL));
582 
583   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL));
584   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",NULL));
585   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL));
586   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL));
587   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL));
588   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL));
589   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",NULL));
590   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",NULL));
591   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL));
592   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL));
593   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",NULL));
594   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",NULL));
595   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpiaij_C",NULL));
596 #if defined(PETSC_HAVE_ELEMENTAL)
597   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL));
598 #endif
599 #if defined(PETSC_HAVE_SCALAPACK)
600   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",NULL));
601 #endif
602   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL));
603   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",NULL));
604   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",NULL));
605 #if defined (PETSC_HAVE_CUDA)
606   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",NULL));
607   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",NULL));
608 #endif
609   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL));
610   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL));
611   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",NULL));
612   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",NULL));
613   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",NULL));
614   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",NULL));
615   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",NULL));
616   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",NULL));
617   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",NULL));
618   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",NULL));
619   PetscFunctionReturn(0);
620 }
621 
622 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat,PetscViewer);
623 
624 #include <petscdraw.h>
625 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
626 {
627   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
628   PetscMPIInt       rank;
629   PetscViewerType   vtype;
630   PetscBool         iascii,isdraw;
631   PetscViewer       sviewer;
632   PetscViewerFormat format;
633 
634   PetscFunctionBegin;
635   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank));
636   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
637   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw));
638   if (iascii) {
639     PetscCall(PetscViewerGetType(viewer,&vtype));
640     PetscCall(PetscViewerGetFormat(viewer,&format));
641     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
642       MatInfo info;
643       PetscCall(MatGetInfo(mat,MAT_LOCAL,&info));
644       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
645       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory));
646       PetscCall(PetscViewerFlush(viewer));
647       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
648       if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx,viewer));
649       PetscFunctionReturn(0);
650     } else if (format == PETSC_VIEWER_ASCII_INFO) {
651       PetscFunctionReturn(0);
652     }
653   } else if (isdraw) {
654     PetscDraw draw;
655     PetscBool isnull;
656 
657     PetscCall(PetscViewerDrawGetDraw(viewer,0,&draw));
658     PetscCall(PetscDrawIsNull(draw,&isnull));
659     if (isnull) PetscFunctionReturn(0);
660   }
661 
662   {
663     /* assemble the entire matrix onto first processor. */
664     Mat         A;
665     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
666     PetscInt    *cols;
667     PetscScalar *vals;
668 
669     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat),&A));
670     if (rank == 0) {
671       PetscCall(MatSetSizes(A,M,N,M,N));
672     } else {
673       PetscCall(MatSetSizes(A,0,0,M,N));
674     }
675     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
676     PetscCall(MatSetType(A,MATMPIDENSE));
677     PetscCall(MatMPIDenseSetPreallocation(A,NULL));
678     PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)A));
679 
680     /* Copy the matrix ... This isn't the most efficient means,
681        but it's quick for now */
682     A->insertmode = INSERT_VALUES;
683 
684     row = mat->rmap->rstart;
685     m   = mdn->A->rmap->n;
686     for (i=0; i<m; i++) {
687       PetscCall(MatGetRow_MPIDense(mat,row,&nz,&cols,&vals));
688       PetscCall(MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES));
689       PetscCall(MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals));
690       row++;
691     }
692 
693     PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY));
694     PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY));
695     PetscCall(PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer));
696     if (rank == 0) {
697       PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name));
698       PetscCall(MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer));
699     }
700     PetscCall(PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer));
701     PetscCall(PetscViewerFlush(viewer));
702     PetscCall(MatDestroy(&A));
703   }
704   PetscFunctionReturn(0);
705 }
706 
707 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
708 {
709   PetscBool      iascii,isbinary,isdraw,issocket;
710 
711   PetscFunctionBegin;
712   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
713   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
714   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket));
715   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw));
716 
717   if (iascii || issocket || isdraw) {
718     PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat,viewer));
719   } else if (isbinary) {
720     PetscCall(MatView_Dense_Binary(mat,viewer));
721   }
722   PetscFunctionReturn(0);
723 }
724 
725 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
726 {
727   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
728   Mat            mdn  = mat->A;
729   PetscLogDouble isend[5],irecv[5];
730 
731   PetscFunctionBegin;
732   info->block_size = 1.0;
733 
734   PetscCall(MatGetInfo(mdn,MAT_LOCAL,info));
735 
736   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
737   isend[3] = info->memory;  isend[4] = info->mallocs;
738   if (flag == MAT_LOCAL) {
739     info->nz_used      = isend[0];
740     info->nz_allocated = isend[1];
741     info->nz_unneeded  = isend[2];
742     info->memory       = isend[3];
743     info->mallocs      = isend[4];
744   } else if (flag == MAT_GLOBAL_MAX) {
745     PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)A)));
746 
747     info->nz_used      = irecv[0];
748     info->nz_allocated = irecv[1];
749     info->nz_unneeded  = irecv[2];
750     info->memory       = irecv[3];
751     info->mallocs      = irecv[4];
752   } else if (flag == MAT_GLOBAL_SUM) {
753     PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)A)));
754 
755     info->nz_used      = irecv[0];
756     info->nz_allocated = irecv[1];
757     info->nz_unneeded  = irecv[2];
758     info->memory       = irecv[3];
759     info->mallocs      = irecv[4];
760   }
761   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
762   info->fill_ratio_needed = 0;
763   info->factor_mallocs    = 0;
764   PetscFunctionReturn(0);
765 }
766 
767 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
768 {
769   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
770 
771   PetscFunctionBegin;
772   switch (op) {
773   case MAT_NEW_NONZERO_LOCATIONS:
774   case MAT_NEW_NONZERO_LOCATION_ERR:
775   case MAT_NEW_NONZERO_ALLOCATION_ERR:
776     MatCheckPreallocated(A,1);
777     PetscCall(MatSetOption(a->A,op,flg));
778     break;
779   case MAT_ROW_ORIENTED:
780     MatCheckPreallocated(A,1);
781     a->roworiented = flg;
782     PetscCall(MatSetOption(a->A,op,flg));
783     break;
784   case MAT_FORCE_DIAGONAL_ENTRIES:
785   case MAT_KEEP_NONZERO_PATTERN:
786   case MAT_USE_HASH_TABLE:
787   case MAT_SORTED_FULL:
788     PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op]));
789     break;
790   case MAT_IGNORE_OFF_PROC_ENTRIES:
791     a->donotstash = flg;
792     break;
793   case MAT_SYMMETRIC:
794   case MAT_STRUCTURALLY_SYMMETRIC:
795   case MAT_HERMITIAN:
796   case MAT_SYMMETRY_ETERNAL:
797   case MAT_IGNORE_LOWER_TRIANGULAR:
798   case MAT_IGNORE_ZERO_ENTRIES:
799     PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op]));
800     break;
801   default:
802     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
803   }
804   PetscFunctionReturn(0);
805 }
806 
807 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
808 {
809   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
810   const PetscScalar *l;
811   PetscScalar       x,*v,*vv,*r;
812   PetscInt          i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n,lda;
813 
814   PetscFunctionBegin;
815   PetscCall(MatDenseGetArray(mdn->A,&vv));
816   PetscCall(MatDenseGetLDA(mdn->A,&lda));
817   PetscCall(MatGetLocalSize(A,&s2,&s3));
818   if (ll) {
819     PetscCall(VecGetLocalSize(ll,&s2a));
820     PetscCheck(s2a == s2,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2);
821     PetscCall(VecGetArrayRead(ll,&l));
822     for (i=0; i<m; i++) {
823       x = l[i];
824       v = vv + i;
825       for (j=0; j<n; j++) { (*v) *= x; v+= lda;}
826     }
827     PetscCall(VecRestoreArrayRead(ll,&l));
828     PetscCall(PetscLogFlops(1.0*n*m));
829   }
830   if (rr) {
831     const PetscScalar *ar;
832 
833     PetscCall(VecGetLocalSize(rr,&s3a));
834     PetscCheck(s3a == s3,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3);
835     PetscCall(VecGetArrayRead(rr,&ar));
836     if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
837     PetscCall(VecGetArray(mdn->lvec,&r));
838     PetscCall(PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ar,r,MPI_REPLACE));
839     PetscCall(PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ar,r,MPI_REPLACE));
840     PetscCall(VecRestoreArrayRead(rr,&ar));
841     for (i=0; i<n; i++) {
842       x = r[i];
843       v = vv + i*lda;
844       for (j=0; j<m; j++) (*v++) *= x;
845     }
846     PetscCall(VecRestoreArray(mdn->lvec,&r));
847     PetscCall(PetscLogFlops(1.0*n*m));
848   }
849   PetscCall(MatDenseRestoreArray(mdn->A,&vv));
850   PetscFunctionReturn(0);
851 }
852 
853 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
854 {
855   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
856   PetscInt          i,j;
857   PetscMPIInt       size;
858   PetscReal         sum = 0.0;
859   const PetscScalar *av,*v;
860 
861   PetscFunctionBegin;
862   PetscCall(MatDenseGetArrayRead(mdn->A,&av));
863   v    = av;
864   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
865   if (size == 1) {
866     PetscCall(MatNorm(mdn->A,type,nrm));
867   } else {
868     if (type == NORM_FROBENIUS) {
869       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
870         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
871       }
872       PetscCall(MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A)));
873       *nrm = PetscSqrtReal(*nrm);
874       PetscCall(PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n));
875     } else if (type == NORM_1) {
876       PetscReal *tmp,*tmp2;
877       PetscCall(PetscCalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2));
878       *nrm = 0.0;
879       v    = av;
880       for (j=0; j<mdn->A->cmap->n; j++) {
881         for (i=0; i<mdn->A->rmap->n; i++) {
882           tmp[j] += PetscAbsScalar(*v);  v++;
883         }
884       }
885       PetscCall(MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A)));
886       for (j=0; j<A->cmap->N; j++) {
887         if (tmp2[j] > *nrm) *nrm = tmp2[j];
888       }
889       PetscCall(PetscFree2(tmp,tmp2));
890       PetscCall(PetscLogFlops(A->cmap->n*A->rmap->n));
891     } else if (type == NORM_INFINITY) { /* max row norm */
892       PetscReal ntemp;
893       PetscCall(MatNorm(mdn->A,type,&ntemp));
894       PetscCall(MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A)));
895     } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
896   }
897   PetscCall(MatDenseRestoreArrayRead(mdn->A,&av));
898   PetscFunctionReturn(0);
899 }
900 
901 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
902 {
903   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
904   Mat            B;
905   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
906   PetscInt       j,i,lda;
907   PetscScalar    *v;
908 
909   PetscFunctionBegin;
910   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
911     PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B));
912     PetscCall(MatSetSizes(B,A->cmap->n,A->rmap->n,N,M));
913     PetscCall(MatSetType(B,((PetscObject)A)->type_name));
914     PetscCall(MatMPIDenseSetPreallocation(B,NULL));
915   } else B = *matout;
916 
917   m    = a->A->rmap->n; n = a->A->cmap->n;
918   PetscCall(MatDenseGetArrayRead(a->A,(const PetscScalar**)&v));
919   PetscCall(MatDenseGetLDA(a->A,&lda));
920   PetscCall(PetscMalloc1(m,&rwork));
921   for (i=0; i<m; i++) rwork[i] = rstart + i;
922   for (j=0; j<n; j++) {
923     PetscCall(MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES));
924     v   += lda;
925   }
926   PetscCall(MatDenseRestoreArrayRead(a->A,(const PetscScalar**)&v));
927   PetscCall(PetscFree(rwork));
928   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
929   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
930   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
931     *matout = B;
932   } else {
933     PetscCall(MatHeaderMerge(A,&B));
934   }
935   PetscFunctionReturn(0);
936 }
937 
938 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
939 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
940 
941 PetscErrorCode MatSetUp_MPIDense(Mat A)
942 {
943   PetscFunctionBegin;
944   PetscCall(PetscLayoutSetUp(A->rmap));
945   PetscCall(PetscLayoutSetUp(A->cmap));
946   if (!A->preallocated) {
947     PetscCall(MatMPIDenseSetPreallocation(A,NULL));
948   }
949   PetscFunctionReturn(0);
950 }
951 
952 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
953 {
954   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
955 
956   PetscFunctionBegin;
957   PetscCall(MatAXPY(A->A,alpha,B->A,str));
958   PetscFunctionReturn(0);
959 }
960 
961 PetscErrorCode MatConjugate_MPIDense(Mat mat)
962 {
963   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
964 
965   PetscFunctionBegin;
966   PetscCall(MatConjugate(a->A));
967   PetscFunctionReturn(0);
968 }
969 
970 PetscErrorCode MatRealPart_MPIDense(Mat A)
971 {
972   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
973 
974   PetscFunctionBegin;
975   PetscCall(MatRealPart(a->A));
976   PetscFunctionReturn(0);
977 }
978 
979 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
980 {
981   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
982 
983   PetscFunctionBegin;
984   PetscCall(MatImaginaryPart(a->A));
985   PetscFunctionReturn(0);
986 }
987 
988 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col)
989 {
990   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
991 
992   PetscFunctionBegin;
993   PetscCheck(a->A,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing local matrix");
994   PetscCheck(a->A->ops->getcolumnvector,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing get column operation");
995   PetscCall((*a->A->ops->getcolumnvector)(a->A,v,col));
996   PetscFunctionReturn(0);
997 }
998 
999 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat,PetscInt,PetscReal*);
1000 
1001 PetscErrorCode MatGetColumnReductions_MPIDense(Mat A,PetscInt type,PetscReal *reductions)
1002 {
1003   PetscInt       i,m,n;
1004   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
1005   PetscReal      *work;
1006 
1007   PetscFunctionBegin;
1008   PetscCall(MatGetSize(A,&m,&n));
1009   PetscCall(PetscMalloc1(n,&work));
1010   if (type == REDUCTION_MEAN_REALPART) {
1011     PetscCall(MatGetColumnReductions_SeqDense(a->A,(PetscInt)REDUCTION_SUM_REALPART,work));
1012   } else if (type == REDUCTION_MEAN_IMAGINARYPART) {
1013     PetscCall(MatGetColumnReductions_SeqDense(a->A,(PetscInt)REDUCTION_SUM_IMAGINARYPART,work));
1014   } else {
1015     PetscCall(MatGetColumnReductions_SeqDense(a->A,type,work));
1016   }
1017   if (type == NORM_2) {
1018     for (i=0; i<n; i++) work[i] *= work[i];
1019   }
1020   if (type == NORM_INFINITY) {
1021     PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_MAX,A->hdr.comm));
1022   } else {
1023     PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_SUM,A->hdr.comm));
1024   }
1025   PetscCall(PetscFree(work));
1026   if (type == NORM_2) {
1027     for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
1028   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
1029     for (i=0; i<n; i++) reductions[i] /= m;
1030   }
1031   PetscFunctionReturn(0);
1032 }
1033 
1034 #if defined(PETSC_HAVE_CUDA)
1035 static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1036 {
1037   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1038   PetscInt       lda;
1039 
1040   PetscFunctionBegin;
1041   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1042   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1043   if (!a->cvec) {
1044     PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec));
1045     PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec));
1046   }
1047   a->vecinuse = col + 1;
1048   PetscCall(MatDenseGetLDA(a->A,&lda));
1049   PetscCall(MatDenseCUDAGetArray(a->A,(PetscScalar**)&a->ptrinuse));
1050   PetscCall(VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda));
1051   *v   = a->cvec;
1052   PetscFunctionReturn(0);
1053 }
1054 
1055 static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1056 {
1057   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1058 
1059   PetscFunctionBegin;
1060   PetscCheck(a->vecinuse,PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1061   PetscCheck(a->cvec,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1062   a->vecinuse = 0;
1063   PetscCall(MatDenseCUDARestoreArray(a->A,(PetscScalar**)&a->ptrinuse));
1064   PetscCall(VecCUDAResetArray(a->cvec));
1065   *v   = NULL;
1066   PetscFunctionReturn(0);
1067 }
1068 
1069 static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1070 {
1071   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1072   PetscInt       lda;
1073 
1074   PetscFunctionBegin;
1075   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1076   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1077   if (!a->cvec) {
1078     PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec));
1079     PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec));
1080   }
1081   a->vecinuse = col + 1;
1082   PetscCall(MatDenseGetLDA(a->A,&lda));
1083   PetscCall(MatDenseCUDAGetArrayRead(a->A,&a->ptrinuse));
1084   PetscCall(VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda));
1085   PetscCall(VecLockReadPush(a->cvec));
1086   *v   = a->cvec;
1087   PetscFunctionReturn(0);
1088 }
1089 
1090 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1091 {
1092   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1093 
1094   PetscFunctionBegin;
1095   PetscCheck(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1096   PetscCheck(a->cvec,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1097   a->vecinuse = 0;
1098   PetscCall(MatDenseCUDARestoreArrayRead(a->A,&a->ptrinuse));
1099   PetscCall(VecLockReadPop(a->cvec));
1100   PetscCall(VecCUDAResetArray(a->cvec));
1101   *v   = NULL;
1102   PetscFunctionReturn(0);
1103 }
1104 
1105 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1106 {
1107   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1108   PetscInt       lda;
1109 
1110   PetscFunctionBegin;
1111   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1112   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1113   if (!a->cvec) {
1114     PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec));
1115     PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec));
1116   }
1117   a->vecinuse = col + 1;
1118   PetscCall(MatDenseGetLDA(a->A,&lda));
1119   PetscCall(MatDenseCUDAGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse));
1120   PetscCall(VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda));
1121   *v   = a->cvec;
1122   PetscFunctionReturn(0);
1123 }
1124 
1125 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1126 {
1127   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1128 
1129   PetscFunctionBegin;
1130   PetscCheck(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1131   PetscCheck(a->cvec,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1132   a->vecinuse = 0;
1133   PetscCall(MatDenseCUDARestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse));
1134   PetscCall(VecCUDAResetArray(a->cvec));
1135   *v   = NULL;
1136   PetscFunctionReturn(0);
1137 }
1138 
1139 static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1140 {
1141   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1142 
1143   PetscFunctionBegin;
1144   PetscCheck(!l->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1145   PetscCheck(!l->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1146   PetscCall(MatDenseCUDAPlaceArray(l->A,a));
1147   PetscFunctionReturn(0);
1148 }
1149 
1150 static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A)
1151 {
1152   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1153 
1154   PetscFunctionBegin;
1155   PetscCheck(!l->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1156   PetscCheck(!l->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1157   PetscCall(MatDenseCUDAResetArray(l->A));
1158   PetscFunctionReturn(0);
1159 }
1160 
1161 static PetscErrorCode MatDenseCUDAReplaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1162 {
1163   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1164 
1165   PetscFunctionBegin;
1166   PetscCheck(!l->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1167   PetscCheck(!l->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1168   PetscCall(MatDenseCUDAReplaceArray(l->A,a));
1169   PetscFunctionReturn(0);
1170 }
1171 
1172 static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1173 {
1174   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1175 
1176   PetscFunctionBegin;
1177   PetscCall(MatDenseCUDAGetArrayWrite(l->A,a));
1178   PetscFunctionReturn(0);
1179 }
1180 
1181 static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1182 {
1183   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1184 
1185   PetscFunctionBegin;
1186   PetscCall(MatDenseCUDARestoreArrayWrite(l->A,a));
1187   PetscFunctionReturn(0);
1188 }
1189 
1190 static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1191 {
1192   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1193 
1194   PetscFunctionBegin;
1195   PetscCall(MatDenseCUDAGetArrayRead(l->A,a));
1196   PetscFunctionReturn(0);
1197 }
1198 
1199 static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1200 {
1201   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1202 
1203   PetscFunctionBegin;
1204   PetscCall(MatDenseCUDARestoreArrayRead(l->A,a));
1205   PetscFunctionReturn(0);
1206 }
1207 
1208 static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1209 {
1210   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1211 
1212   PetscFunctionBegin;
1213   PetscCall(MatDenseCUDAGetArray(l->A,a));
1214   PetscFunctionReturn(0);
1215 }
1216 
1217 static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1218 {
1219   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1220 
1221   PetscFunctionBegin;
1222   PetscCall(MatDenseCUDARestoreArray(l->A,a));
1223   PetscFunctionReturn(0);
1224 }
1225 
1226 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat,PetscInt,Vec*);
1227 static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat,PetscInt,Vec*);
1228 static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat,PetscInt,Vec*);
1229 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat,PetscInt,Vec*);
1230 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat,PetscInt,Vec*);
1231 static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat,PetscInt,Vec*);
1232 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat,Mat*);
1233 
1234 static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat,PetscBool bind)
1235 {
1236   Mat_MPIDense   *d = (Mat_MPIDense*)mat->data;
1237 
1238   PetscFunctionBegin;
1239   PetscCheck(!d->vecinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1240   PetscCheck(!d->matinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1241   if (d->A) {
1242     PetscCall(MatBindToCPU(d->A,bind));
1243   }
1244   mat->boundtocpu = bind;
1245   if (!bind) {
1246     PetscBool iscuda;
1247 
1248     PetscCall(PetscObjectTypeCompare((PetscObject)d->cvec,VECMPICUDA,&iscuda));
1249     if (!iscuda) {
1250       PetscCall(VecDestroy(&d->cvec));
1251     }
1252     PetscCall(PetscObjectTypeCompare((PetscObject)d->cmat,MATMPIDENSECUDA,&iscuda));
1253     if (!iscuda) {
1254       PetscCall(MatDestroy(&d->cmat));
1255     }
1256     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDenseCUDA));
1257     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDenseCUDA));
1258     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDenseCUDA));
1259     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDenseCUDA));
1260     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDenseCUDA));
1261     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDenseCUDA));
1262   } else {
1263     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense));
1264     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense));
1265     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense));
1266     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense));
1267     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense));
1268     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense));
1269   }
1270   if (d->cmat) {
1271     PetscCall(MatBindToCPU(d->cmat,bind));
1272   }
1273   PetscFunctionReturn(0);
1274 }
1275 
1276 PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data)
1277 {
1278   Mat_MPIDense   *d = (Mat_MPIDense*)A->data;
1279   PetscBool      iscuda;
1280 
1281   PetscFunctionBegin;
1282   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1283   PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda));
1284   if (!iscuda) PetscFunctionReturn(0);
1285   PetscCall(PetscLayoutSetUp(A->rmap));
1286   PetscCall(PetscLayoutSetUp(A->cmap));
1287   if (!d->A) {
1288     PetscCall(MatCreate(PETSC_COMM_SELF,&d->A));
1289     PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)d->A));
1290     PetscCall(MatSetSizes(d->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N));
1291   }
1292   PetscCall(MatSetType(d->A,MATSEQDENSECUDA));
1293   PetscCall(MatSeqDenseCUDASetPreallocation(d->A,d_data));
1294   A->preallocated = PETSC_TRUE;
1295   A->assembled = PETSC_TRUE;
1296   PetscFunctionReturn(0);
1297 }
1298 #endif
1299 
1300 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1301 {
1302   Mat_MPIDense   *d = (Mat_MPIDense*)x->data;
1303 
1304   PetscFunctionBegin;
1305   PetscCall(MatSetRandom(d->A,rctx));
1306   PetscFunctionReturn(0);
1307 }
1308 
1309 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool  *missing,PetscInt *d)
1310 {
1311   PetscFunctionBegin;
1312   *missing = PETSC_FALSE;
1313   PetscFunctionReturn(0);
1314 }
1315 
1316 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat);
1317 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);
1318 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat);
1319 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);
1320 static PetscErrorCode MatEqual_MPIDense(Mat,Mat,PetscBool*);
1321 static PetscErrorCode MatLoad_MPIDense(Mat,PetscViewer);
1322 
1323 /* -------------------------------------------------------------------*/
1324 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1325                                         MatGetRow_MPIDense,
1326                                         MatRestoreRow_MPIDense,
1327                                         MatMult_MPIDense,
1328                                 /*  4*/ MatMultAdd_MPIDense,
1329                                         MatMultTranspose_MPIDense,
1330                                         MatMultTransposeAdd_MPIDense,
1331                                         NULL,
1332                                         NULL,
1333                                         NULL,
1334                                 /* 10*/ NULL,
1335                                         NULL,
1336                                         NULL,
1337                                         NULL,
1338                                         MatTranspose_MPIDense,
1339                                 /* 15*/ MatGetInfo_MPIDense,
1340                                         MatEqual_MPIDense,
1341                                         MatGetDiagonal_MPIDense,
1342                                         MatDiagonalScale_MPIDense,
1343                                         MatNorm_MPIDense,
1344                                 /* 20*/ MatAssemblyBegin_MPIDense,
1345                                         MatAssemblyEnd_MPIDense,
1346                                         MatSetOption_MPIDense,
1347                                         MatZeroEntries_MPIDense,
1348                                 /* 24*/ MatZeroRows_MPIDense,
1349                                         NULL,
1350                                         NULL,
1351                                         NULL,
1352                                         NULL,
1353                                 /* 29*/ MatSetUp_MPIDense,
1354                                         NULL,
1355                                         NULL,
1356                                         MatGetDiagonalBlock_MPIDense,
1357                                         NULL,
1358                                 /* 34*/ MatDuplicate_MPIDense,
1359                                         NULL,
1360                                         NULL,
1361                                         NULL,
1362                                         NULL,
1363                                 /* 39*/ MatAXPY_MPIDense,
1364                                         MatCreateSubMatrices_MPIDense,
1365                                         NULL,
1366                                         MatGetValues_MPIDense,
1367                                         MatCopy_MPIDense,
1368                                 /* 44*/ NULL,
1369                                         MatScale_MPIDense,
1370                                         MatShift_MPIDense,
1371                                         NULL,
1372                                         NULL,
1373                                 /* 49*/ MatSetRandom_MPIDense,
1374                                         NULL,
1375                                         NULL,
1376                                         NULL,
1377                                         NULL,
1378                                 /* 54*/ NULL,
1379                                         NULL,
1380                                         NULL,
1381                                         NULL,
1382                                         NULL,
1383                                 /* 59*/ MatCreateSubMatrix_MPIDense,
1384                                         MatDestroy_MPIDense,
1385                                         MatView_MPIDense,
1386                                         NULL,
1387                                         NULL,
1388                                 /* 64*/ NULL,
1389                                         NULL,
1390                                         NULL,
1391                                         NULL,
1392                                         NULL,
1393                                 /* 69*/ NULL,
1394                                         NULL,
1395                                         NULL,
1396                                         NULL,
1397                                         NULL,
1398                                 /* 74*/ NULL,
1399                                         NULL,
1400                                         NULL,
1401                                         NULL,
1402                                         NULL,
1403                                 /* 79*/ NULL,
1404                                         NULL,
1405                                         NULL,
1406                                         NULL,
1407                                 /* 83*/ MatLoad_MPIDense,
1408                                         NULL,
1409                                         NULL,
1410                                         NULL,
1411                                         NULL,
1412                                         NULL,
1413                                 /* 89*/ NULL,
1414                                         NULL,
1415                                         NULL,
1416                                         NULL,
1417                                         NULL,
1418                                 /* 94*/ NULL,
1419                                         NULL,
1420                                         MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1421                                         MatMatTransposeMultNumeric_MPIDense_MPIDense,
1422                                         NULL,
1423                                 /* 99*/ MatProductSetFromOptions_MPIDense,
1424                                         NULL,
1425                                         NULL,
1426                                         MatConjugate_MPIDense,
1427                                         NULL,
1428                                 /*104*/ NULL,
1429                                         MatRealPart_MPIDense,
1430                                         MatImaginaryPart_MPIDense,
1431                                         NULL,
1432                                         NULL,
1433                                 /*109*/ NULL,
1434                                         NULL,
1435                                         NULL,
1436                                         MatGetColumnVector_MPIDense,
1437                                         MatMissingDiagonal_MPIDense,
1438                                 /*114*/ NULL,
1439                                         NULL,
1440                                         NULL,
1441                                         NULL,
1442                                         NULL,
1443                                 /*119*/ NULL,
1444                                         NULL,
1445                                         NULL,
1446                                         NULL,
1447                                         NULL,
1448                                 /*124*/ NULL,
1449                                         MatGetColumnReductions_MPIDense,
1450                                         NULL,
1451                                         NULL,
1452                                         NULL,
1453                                 /*129*/ NULL,
1454                                         NULL,
1455                                         MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1456                                         MatTransposeMatMultNumeric_MPIDense_MPIDense,
1457                                         NULL,
1458                                 /*134*/ NULL,
1459                                         NULL,
1460                                         NULL,
1461                                         NULL,
1462                                         NULL,
1463                                 /*139*/ NULL,
1464                                         NULL,
1465                                         NULL,
1466                                         NULL,
1467                                         NULL,
1468                                         MatCreateMPIMatConcatenateSeqMat_MPIDense,
1469                                 /*145*/ NULL,
1470                                         NULL,
1471                                         NULL
1472 };
1473 
1474 PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1475 {
1476   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
1477   PetscBool      iscuda;
1478 
1479   PetscFunctionBegin;
1480   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1481   PetscCall(PetscLayoutSetUp(mat->rmap));
1482   PetscCall(PetscLayoutSetUp(mat->cmap));
1483   if (!a->A) {
1484     PetscCall(MatCreate(PETSC_COMM_SELF,&a->A));
1485     PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A));
1486     PetscCall(MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N));
1487   }
1488   PetscCall(PetscObjectTypeCompare((PetscObject)mat,MATMPIDENSECUDA,&iscuda));
1489   PetscCall(MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE));
1490   PetscCall(MatSeqDenseSetPreallocation(a->A,data));
1491   mat->preallocated = PETSC_TRUE;
1492   mat->assembled = PETSC_TRUE;
1493   PetscFunctionReturn(0);
1494 }
1495 
1496 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1497 {
1498   Mat            B,C;
1499 
1500   PetscFunctionBegin;
1501   PetscCall(MatMPIAIJGetLocalMat(A,MAT_INITIAL_MATRIX,&C));
1502   PetscCall(MatConvert_SeqAIJ_SeqDense(C,MATSEQDENSE,MAT_INITIAL_MATRIX,&B));
1503   PetscCall(MatDestroy(&C));
1504   if (reuse == MAT_REUSE_MATRIX) {
1505     C = *newmat;
1506   } else C = NULL;
1507   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A),B,A->cmap->n,!C?MAT_INITIAL_MATRIX:MAT_REUSE_MATRIX,&C));
1508   PetscCall(MatDestroy(&B));
1509   if (reuse == MAT_INPLACE_MATRIX) {
1510     PetscCall(MatHeaderReplace(A,&C));
1511   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1512   PetscFunctionReturn(0);
1513 }
1514 
1515 PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1516 {
1517   Mat            B,C;
1518 
1519   PetscFunctionBegin;
1520   PetscCall(MatDenseGetLocalMatrix(A,&C));
1521   PetscCall(MatConvert_SeqDense_SeqAIJ(C,MATSEQAIJ,MAT_INITIAL_MATRIX,&B));
1522   if (reuse == MAT_REUSE_MATRIX) {
1523     C = *newmat;
1524   } else C = NULL;
1525   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A),B,A->cmap->n,!C?MAT_INITIAL_MATRIX:MAT_REUSE_MATRIX,&C));
1526   PetscCall(MatDestroy(&B));
1527   if (reuse == MAT_INPLACE_MATRIX) {
1528     PetscCall(MatHeaderReplace(A,&C));
1529   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1530   PetscFunctionReturn(0);
1531 }
1532 
1533 #if defined(PETSC_HAVE_ELEMENTAL)
1534 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1535 {
1536   Mat            mat_elemental;
1537   PetscScalar    *v;
1538   PetscInt       m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols;
1539 
1540   PetscFunctionBegin;
1541   if (reuse == MAT_REUSE_MATRIX) {
1542     mat_elemental = *newmat;
1543     PetscCall(MatZeroEntries(*newmat));
1544   } else {
1545     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental));
1546     PetscCall(MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N));
1547     PetscCall(MatSetType(mat_elemental,MATELEMENTAL));
1548     PetscCall(MatSetUp(mat_elemental));
1549     PetscCall(MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE));
1550   }
1551 
1552   PetscCall(PetscMalloc2(m,&rows,N,&cols));
1553   for (i=0; i<N; i++) cols[i] = i;
1554   for (i=0; i<m; i++) rows[i] = rstart + i;
1555 
1556   /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1557   PetscCall(MatDenseGetArray(A,&v));
1558   PetscCall(MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES));
1559   PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY));
1560   PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY));
1561   PetscCall(MatDenseRestoreArray(A,&v));
1562   PetscCall(PetscFree2(rows,cols));
1563 
1564   if (reuse == MAT_INPLACE_MATRIX) {
1565     PetscCall(MatHeaderReplace(A,&mat_elemental));
1566   } else {
1567     *newmat = mat_elemental;
1568   }
1569   PetscFunctionReturn(0);
1570 }
1571 #endif
1572 
1573 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals)
1574 {
1575   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1576 
1577   PetscFunctionBegin;
1578   PetscCall(MatDenseGetColumn(mat->A,col,vals));
1579   PetscFunctionReturn(0);
1580 }
1581 
1582 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals)
1583 {
1584   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1585 
1586   PetscFunctionBegin;
1587   PetscCall(MatDenseRestoreColumn(mat->A,vals));
1588   PetscFunctionReturn(0);
1589 }
1590 
1591 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
1592 {
1593   Mat_MPIDense   *mat;
1594   PetscInt       m,nloc,N;
1595 
1596   PetscFunctionBegin;
1597   PetscCall(MatGetSize(inmat,&m,&N));
1598   PetscCall(MatGetLocalSize(inmat,NULL,&nloc));
1599   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1600     PetscInt sum;
1601 
1602     if (n == PETSC_DECIDE) {
1603       PetscCall(PetscSplitOwnership(comm,&n,&N));
1604     }
1605     /* Check sum(n) = N */
1606     PetscCall(MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm));
1607     PetscCheck(sum == N,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT,sum,N);
1608 
1609     PetscCall(MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat));
1610     PetscCall(MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE));
1611   }
1612 
1613   /* numeric phase */
1614   mat = (Mat_MPIDense*)(*outmat)->data;
1615   PetscCall(MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN));
1616   PetscFunctionReturn(0);
1617 }
1618 
1619 #if defined(PETSC_HAVE_CUDA)
1620 PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M,MatType type,MatReuse reuse,Mat *newmat)
1621 {
1622   Mat            B;
1623   Mat_MPIDense   *m;
1624 
1625   PetscFunctionBegin;
1626   if (reuse == MAT_INITIAL_MATRIX) {
1627     PetscCall(MatDuplicate(M,MAT_COPY_VALUES,newmat));
1628   } else if (reuse == MAT_REUSE_MATRIX) {
1629     PetscCall(MatCopy(M,*newmat,SAME_NONZERO_PATTERN));
1630   }
1631 
1632   B    = *newmat;
1633   PetscCall(MatBindToCPU_MPIDenseCUDA(B,PETSC_TRUE));
1634   PetscCall(PetscFree(B->defaultvectype));
1635   PetscCall(PetscStrallocpy(VECSTANDARD,&B->defaultvectype));
1636   PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSE));
1637   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",NULL));
1638   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",NULL));
1639   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",NULL));
1640   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",NULL));
1641   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",NULL));
1642   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",NULL));
1643   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",NULL));
1644   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",NULL));
1645   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",NULL));
1646   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",NULL));
1647   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",NULL));
1648   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",NULL));
1649   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",NULL));
1650   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",NULL));
1651   m    = (Mat_MPIDense*)(B)->data;
1652   if (m->A) {
1653     PetscCall(MatConvert(m->A,MATSEQDENSE,MAT_INPLACE_MATRIX,&m->A));
1654   }
1655   B->ops->bindtocpu = NULL;
1656   B->offloadmask    = PETSC_OFFLOAD_CPU;
1657   PetscFunctionReturn(0);
1658 }
1659 
1660 PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M,MatType type,MatReuse reuse,Mat *newmat)
1661 {
1662   Mat            B;
1663   Mat_MPIDense   *m;
1664 
1665   PetscFunctionBegin;
1666   if (reuse == MAT_INITIAL_MATRIX) {
1667     PetscCall(MatDuplicate(M,MAT_COPY_VALUES,newmat));
1668   } else if (reuse == MAT_REUSE_MATRIX) {
1669     PetscCall(MatCopy(M,*newmat,SAME_NONZERO_PATTERN));
1670   }
1671 
1672   B    = *newmat;
1673   PetscCall(PetscFree(B->defaultvectype));
1674   PetscCall(PetscStrallocpy(VECCUDA,&B->defaultvectype));
1675   PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSECUDA));
1676   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",                    MatConvert_MPIDenseCUDA_MPIDense));
1677   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",        MatProductSetFromOptions_MPIAIJ_MPIDense));
1678   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",MatProductSetFromOptions_MPIAIJ_MPIDense));
1679   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",        MatProductSetFromOptions_MPIDense_MPIAIJ));
1680   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ));
1681   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",                                MatDenseCUDAGetArray_MPIDenseCUDA));
1682   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",                            MatDenseCUDAGetArrayRead_MPIDenseCUDA));
1683   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",                           MatDenseCUDAGetArrayWrite_MPIDenseCUDA));
1684   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",                            MatDenseCUDARestoreArray_MPIDenseCUDA));
1685   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",                        MatDenseCUDARestoreArrayRead_MPIDenseCUDA));
1686   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",                       MatDenseCUDARestoreArrayWrite_MPIDenseCUDA));
1687   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",                              MatDenseCUDAPlaceArray_MPIDenseCUDA));
1688   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",                              MatDenseCUDAResetArray_MPIDenseCUDA));
1689   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",                            MatDenseCUDAReplaceArray_MPIDenseCUDA));
1690   m    = (Mat_MPIDense*)(B->data);
1691   if (m->A) {
1692     PetscCall(MatConvert(m->A,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&m->A));
1693     B->offloadmask = PETSC_OFFLOAD_BOTH;
1694   } else {
1695     B->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
1696   }
1697   PetscCall(MatBindToCPU_MPIDenseCUDA(B,PETSC_FALSE));
1698 
1699   B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA;
1700   PetscFunctionReturn(0);
1701 }
1702 #endif
1703 
1704 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A,PetscInt col,Vec *v)
1705 {
1706   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1707   PetscInt       lda;
1708 
1709   PetscFunctionBegin;
1710   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1711   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1712   if (!a->cvec) {
1713     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec));
1714   }
1715   a->vecinuse = col + 1;
1716   PetscCall(MatDenseGetLDA(a->A,&lda));
1717   PetscCall(MatDenseGetArray(a->A,(PetscScalar**)&a->ptrinuse));
1718   PetscCall(VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda));
1719   *v   = a->cvec;
1720   PetscFunctionReturn(0);
1721 }
1722 
1723 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A,PetscInt col,Vec *v)
1724 {
1725   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1726 
1727   PetscFunctionBegin;
1728   PetscCheck(a->vecinuse,PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1729   PetscCheck(a->cvec,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1730   a->vecinuse = 0;
1731   PetscCall(MatDenseRestoreArray(a->A,(PetscScalar**)&a->ptrinuse));
1732   PetscCall(VecResetArray(a->cvec));
1733   *v   = NULL;
1734   PetscFunctionReturn(0);
1735 }
1736 
1737 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v)
1738 {
1739   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1740   PetscInt       lda;
1741 
1742   PetscFunctionBegin;
1743   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1744   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1745   if (!a->cvec) {
1746     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec));
1747   }
1748   a->vecinuse = col + 1;
1749   PetscCall(MatDenseGetLDA(a->A,&lda));
1750   PetscCall(MatDenseGetArrayRead(a->A,&a->ptrinuse));
1751   PetscCall(VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda));
1752   PetscCall(VecLockReadPush(a->cvec));
1753   *v   = a->cvec;
1754   PetscFunctionReturn(0);
1755 }
1756 
1757 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v)
1758 {
1759   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1760 
1761   PetscFunctionBegin;
1762   PetscCheck(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1763   PetscCheck(a->cvec,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1764   a->vecinuse = 0;
1765   PetscCall(MatDenseRestoreArrayRead(a->A,&a->ptrinuse));
1766   PetscCall(VecLockReadPop(a->cvec));
1767   PetscCall(VecResetArray(a->cvec));
1768   *v   = NULL;
1769   PetscFunctionReturn(0);
1770 }
1771 
1772 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v)
1773 {
1774   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1775   PetscInt       lda;
1776 
1777   PetscFunctionBegin;
1778   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1779   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1780   if (!a->cvec) {
1781     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec));
1782   }
1783   a->vecinuse = col + 1;
1784   PetscCall(MatDenseGetLDA(a->A,&lda));
1785   PetscCall(MatDenseGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse));
1786   PetscCall(VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda));
1787   *v   = a->cvec;
1788   PetscFunctionReturn(0);
1789 }
1790 
1791 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v)
1792 {
1793   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
1794 
1795   PetscFunctionBegin;
1796   PetscCheck(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1797   PetscCheck(a->cvec,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1798   a->vecinuse = 0;
1799   PetscCall(MatDenseRestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse));
1800   PetscCall(VecResetArray(a->cvec));
1801   *v   = NULL;
1802   PetscFunctionReturn(0);
1803 }
1804 
1805 PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A,PetscInt cbegin,PetscInt cend,Mat *v)
1806 {
1807   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
1808   Mat_MPIDense *c;
1809   MPI_Comm     comm;
1810 
1811   PetscFunctionBegin;
1812   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
1813   PetscCheck(!a->vecinuse,comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1814   PetscCheck(!a->matinuse,comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1815   if (!a->cmat) {
1816     PetscCall(MatCreate(comm,&a->cmat));
1817     PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->cmat));
1818     PetscCall(MatSetType(a->cmat,((PetscObject)A)->type_name));
1819     PetscCall(PetscLayoutReference(A->rmap,&a->cmat->rmap));
1820     PetscCall(PetscLayoutSetSize(a->cmat->cmap,cend-cbegin));
1821     PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1822   } else if (cend-cbegin != a->cmat->cmap->N) {
1823     PetscCall(PetscLayoutDestroy(&a->cmat->cmap));
1824     PetscCall(PetscLayoutCreate(comm,&a->cmat->cmap));
1825     PetscCall(PetscLayoutSetSize(a->cmat->cmap,cend-cbegin));
1826     PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1827   }
1828   c = (Mat_MPIDense*)a->cmat->data;
1829   PetscCheck(!c->A,comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1830   PetscCall(MatDenseGetSubMatrix(a->A,cbegin,cend,&c->A));
1831   a->cmat->preallocated = PETSC_TRUE;
1832   a->cmat->assembled = PETSC_TRUE;
1833   a->matinuse = cbegin + 1;
1834   *v = a->cmat;
1835   PetscFunctionReturn(0);
1836 }
1837 
1838 PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A,Mat *v)
1839 {
1840   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
1841   Mat_MPIDense *c;
1842 
1843   PetscFunctionBegin;
1844   PetscCheck(a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetSubMatrix() first");
1845   PetscCheck(a->cmat,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal matrix");
1846   PetscCheck(*v == a->cmat,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Not the matrix obtained from MatDenseGetSubMatrix()");
1847   a->matinuse = 0;
1848   c = (Mat_MPIDense*)a->cmat->data;
1849   PetscCall(MatDenseRestoreSubMatrix(a->A,&c->A));
1850   *v = NULL;
1851   PetscFunctionReturn(0);
1852 }
1853 
1854 /*MC
1855    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1856 
1857    Options Database Keys:
1858 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions()
1859 
1860   Level: beginner
1861 
1862 .seealso: `MatCreateDense()`
1863 
1864 M*/
1865 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1866 {
1867   Mat_MPIDense   *a;
1868 
1869   PetscFunctionBegin;
1870   PetscCall(PetscNewLog(mat,&a));
1871   mat->data = (void*)a;
1872   PetscCall(PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps)));
1873 
1874   mat->insertmode = NOT_SET_VALUES;
1875 
1876   /* build cache for off array entries formed */
1877   a->donotstash = PETSC_FALSE;
1878 
1879   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash));
1880 
1881   /* stuff used for matrix vector multiply */
1882   a->lvec        = NULL;
1883   a->Mvctx       = NULL;
1884   a->roworiented = PETSC_TRUE;
1885 
1886   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense));
1887   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",MatDenseSetLDA_MPIDense));
1888   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense));
1889   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense));
1890   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense));
1891   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense));
1892   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",MatDenseGetArrayWrite_MPIDense));
1893   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",MatDenseRestoreArrayWrite_MPIDense));
1894   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense));
1895   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense));
1896   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",MatDenseReplaceArray_MPIDense));
1897   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense));
1898   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense));
1899   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense));
1900   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense));
1901   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense));
1902   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense));
1903   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",MatDenseGetSubMatrix_MPIDense));
1904   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",MatDenseRestoreSubMatrix_MPIDense));
1905   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",MatConvert_MPIAIJ_MPIDense));
1906   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpiaij_C",MatConvert_MPIDense_MPIAIJ));
1907 #if defined(PETSC_HAVE_ELEMENTAL)
1908   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental));
1909 #endif
1910 #if defined(PETSC_HAVE_SCALAPACK)
1911   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",MatConvert_Dense_ScaLAPACK));
1912 #endif
1913 #if defined(PETSC_HAVE_CUDA)
1914   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpidensecuda_C",MatConvert_MPIDense_MPIDenseCUDA));
1915 #endif
1916   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense));
1917   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense));
1918   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",MatProductSetFromOptions_MPIDense_MPIAIJ));
1919 #if defined(PETSC_HAVE_CUDA)
1920   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense));
1921   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ));
1922 #endif
1923 
1924   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense));
1925   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense));
1926   PetscCall(PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE));
1927   PetscFunctionReturn(0);
1928 }
1929 
1930 /*MC
1931    MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs.
1932 
1933    Options Database Keys:
1934 . -mat_type mpidensecuda - sets the matrix type to "mpidensecuda" during a call to MatSetFromOptions()
1935 
1936   Level: beginner
1937 
1938 .seealso:
1939 
1940 M*/
1941 #if defined(PETSC_HAVE_CUDA)
1942 #include <petsc/private/deviceimpl.h>
1943 PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B)
1944 {
1945   PetscFunctionBegin;
1946   PetscCall(PetscDeviceInitialize(PETSC_DEVICE_CUDA));
1947   PetscCall(MatCreate_MPIDense(B));
1948   PetscCall(MatConvert_MPIDense_MPIDenseCUDA(B,MATMPIDENSECUDA,MAT_INPLACE_MATRIX,&B));
1949   PetscFunctionReturn(0);
1950 }
1951 #endif
1952 
1953 /*MC
1954    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1955 
1956    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1957    and MATMPIDENSE otherwise.
1958 
1959    Options Database Keys:
1960 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1961 
1962   Level: beginner
1963 
1964 .seealso: `MATSEQDENSE,MATMPIDENSE,MATDENSECUDA`
1965 M*/
1966 
1967 /*MC
1968    MATDENSECUDA - MATDENSECUDA = "densecuda" - A matrix type to be used for dense matrices on GPUs.
1969 
1970    This matrix type is identical to MATSEQDENSECUDA when constructed with a single process communicator,
1971    and MATMPIDENSECUDA otherwise.
1972 
1973    Options Database Keys:
1974 . -mat_type densecuda - sets the matrix type to "densecuda" during a call to MatSetFromOptions()
1975 
1976   Level: beginner
1977 
1978 .seealso: `MATSEQDENSECUDA,MATMPIDENSECUDA,MATDENSE`
1979 M*/
1980 
1981 /*@C
1982    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1983 
1984    Collective
1985 
1986    Input Parameters:
1987 .  B - the matrix
1988 -  data - optional location of matrix data.  Set data=NULL for PETSc
1989    to control all matrix memory allocation.
1990 
1991    Notes:
1992    The dense format is fully compatible with standard Fortran 77
1993    storage by columns.
1994 
1995    The data input variable is intended primarily for Fortran programmers
1996    who wish to allocate their own matrix memory space.  Most users should
1997    set data=NULL.
1998 
1999    Level: intermediate
2000 
2001 .seealso: `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
2002 @*/
2003 PetscErrorCode  MatMPIDenseSetPreallocation(Mat B,PetscScalar *data)
2004 {
2005   PetscFunctionBegin;
2006   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2007   PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));
2008   PetscFunctionReturn(0);
2009 }
2010 
2011 /*@
2012    MatDensePlaceArray - Allows one to replace the array in a dense matrix with an
2013    array provided by the user. This is useful to avoid copying an array
2014    into a matrix
2015 
2016    Not Collective
2017 
2018    Input Parameters:
2019 +  mat - the matrix
2020 -  array - the array in column major order
2021 
2022    Notes:
2023    You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be
2024    freed when the matrix is destroyed.
2025 
2026    Level: developer
2027 
2028 .seealso: `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`
2029 
2030 @*/
2031 PetscErrorCode  MatDensePlaceArray(Mat mat,const PetscScalar *array)
2032 {
2033   PetscFunctionBegin;
2034   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2035   PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));
2036   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2037 #if defined(PETSC_HAVE_CUDA)
2038   mat->offloadmask = PETSC_OFFLOAD_CPU;
2039 #endif
2040   PetscFunctionReturn(0);
2041 }
2042 
2043 /*@
2044    MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray()
2045 
2046    Not Collective
2047 
2048    Input Parameters:
2049 .  mat - the matrix
2050 
2051    Notes:
2052    You can only call this after a call to MatDensePlaceArray()
2053 
2054    Level: developer
2055 
2056 .seealso: `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`
2057 
2058 @*/
2059 PetscErrorCode  MatDenseResetArray(Mat mat)
2060 {
2061   PetscFunctionBegin;
2062   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2063   PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));
2064   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2065   PetscFunctionReturn(0);
2066 }
2067 
2068 /*@
2069    MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an
2070    array provided by the user. This is useful to avoid copying an array
2071    into a matrix
2072 
2073    Not Collective
2074 
2075    Input Parameters:
2076 +  mat - the matrix
2077 -  array - the array in column major order
2078 
2079    Notes:
2080    The memory passed in MUST be obtained with PetscMalloc() and CANNOT be
2081    freed by the user. It will be freed when the matrix is destroyed.
2082 
2083    Level: developer
2084 
2085 .seealso: `MatDenseGetArray()`, `VecReplaceArray()`
2086 @*/
2087 PetscErrorCode  MatDenseReplaceArray(Mat mat,const PetscScalar *array)
2088 {
2089   PetscFunctionBegin;
2090   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2091   PetscUseMethod(mat,"MatDenseReplaceArray_C",(Mat,const PetscScalar*),(mat,array));
2092   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2093 #if defined(PETSC_HAVE_CUDA)
2094   mat->offloadmask = PETSC_OFFLOAD_CPU;
2095 #endif
2096   PetscFunctionReturn(0);
2097 }
2098 
2099 #if defined(PETSC_HAVE_CUDA)
2100 /*@C
2101    MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a dense matrix with an
2102    array provided by the user. This is useful to avoid copying an array
2103    into a matrix
2104 
2105    Not Collective
2106 
2107    Input Parameters:
2108 +  mat - the matrix
2109 -  array - the array in column major order
2110 
2111    Notes:
2112    You can return to the original array with a call to MatDenseCUDAResetArray(). The user is responsible for freeing this array; it will not be
2113    freed when the matrix is destroyed. The array must have been allocated with cudaMalloc().
2114 
2115    Level: developer
2116 
2117 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDAResetArray()`
2118 @*/
2119 PetscErrorCode  MatDenseCUDAPlaceArray(Mat mat,const PetscScalar *array)
2120 {
2121   PetscFunctionBegin;
2122   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2123   PetscUseMethod(mat,"MatDenseCUDAPlaceArray_C",(Mat,const PetscScalar*),(mat,array));
2124   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2125   mat->offloadmask = PETSC_OFFLOAD_GPU;
2126   PetscFunctionReturn(0);
2127 }
2128 
2129 /*@C
2130    MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to MatDenseCUDAPlaceArray()
2131 
2132    Not Collective
2133 
2134    Input Parameters:
2135 .  mat - the matrix
2136 
2137    Notes:
2138    You can only call this after a call to MatDenseCUDAPlaceArray()
2139 
2140    Level: developer
2141 
2142 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()`
2143 
2144 @*/
2145 PetscErrorCode  MatDenseCUDAResetArray(Mat mat)
2146 {
2147   PetscFunctionBegin;
2148   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2149   PetscUseMethod(mat,"MatDenseCUDAResetArray_C",(Mat),(mat));
2150   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2151   PetscFunctionReturn(0);
2152 }
2153 
2154 /*@C
2155    MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a dense matrix with an
2156    array provided by the user. This is useful to avoid copying an array
2157    into a matrix
2158 
2159    Not Collective
2160 
2161    Input Parameters:
2162 +  mat - the matrix
2163 -  array - the array in column major order
2164 
2165    Notes:
2166    This permanently replaces the GPU array and frees the memory associated with the old GPU array.
2167    The memory passed in CANNOT be freed by the user. It will be freed
2168    when the matrix is destroyed. The array should respect the matrix leading dimension.
2169 
2170    Level: developer
2171 
2172 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()`, `MatDenseCUDAResetArray()`
2173 @*/
2174 PetscErrorCode  MatDenseCUDAReplaceArray(Mat mat,const PetscScalar *array)
2175 {
2176   PetscFunctionBegin;
2177   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2178   PetscUseMethod(mat,"MatDenseCUDAReplaceArray_C",(Mat,const PetscScalar*),(mat,array));
2179   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2180   mat->offloadmask = PETSC_OFFLOAD_GPU;
2181   PetscFunctionReturn(0);
2182 }
2183 
2184 /*@C
2185    MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a dense matrix.
2186 
2187    Not Collective
2188 
2189    Input Parameters:
2190 .  A - the matrix
2191 
2192    Output Parameters
2193 .  array - the GPU array in column major order
2194 
2195    Notes:
2196    The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use MatDenseCUDAGetArray(). The array must be restored with MatDenseCUDARestoreArrayWrite() when no longer needed.
2197 
2198    Level: developer
2199 
2200 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayRead()`, `MatDenseCUDARestoreArrayRead()`
2201 @*/
2202 PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a)
2203 {
2204   PetscFunctionBegin;
2205   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2206   PetscUseMethod(A,"MatDenseCUDAGetArrayWrite_C",(Mat,PetscScalar**),(A,a));
2207   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2208   PetscFunctionReturn(0);
2209 }
2210 
2211 /*@C
2212    MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArrayWrite().
2213 
2214    Not Collective
2215 
2216    Input Parameters:
2217 +  A - the matrix
2218 -  array - the GPU array in column major order
2219 
2220    Notes:
2221 
2222    Level: developer
2223 
2224 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()`
2225 @*/
2226 PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a)
2227 {
2228   PetscFunctionBegin;
2229   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2230   PetscUseMethod(A,"MatDenseCUDARestoreArrayWrite_C",(Mat,PetscScalar**),(A,a));
2231   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2232   A->offloadmask = PETSC_OFFLOAD_GPU;
2233   PetscFunctionReturn(0);
2234 }
2235 
2236 /*@C
2237    MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArrayRead() when no longer needed.
2238 
2239    Not Collective
2240 
2241    Input Parameters:
2242 .  A - the matrix
2243 
2244    Output Parameters
2245 .  array - the GPU array in column major order
2246 
2247    Notes:
2248    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite().
2249 
2250    Level: developer
2251 
2252 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`
2253 @*/
2254 PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a)
2255 {
2256   PetscFunctionBegin;
2257   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2258   PetscUseMethod(A,"MatDenseCUDAGetArrayRead_C",(Mat,const PetscScalar**),(A,a));
2259   PetscFunctionReturn(0);
2260 }
2261 
2262 /*@C
2263    MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a dense matrix previously obtained with a call to MatDenseCUDAGetArrayRead().
2264 
2265    Not Collective
2266 
2267    Input Parameters:
2268 +  A - the matrix
2269 -  array - the GPU array in column major order
2270 
2271    Notes:
2272    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite().
2273 
2274    Level: developer
2275 
2276 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDAGetArrayRead()`
2277 @*/
2278 PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a)
2279 {
2280   PetscFunctionBegin;
2281   PetscUseMethod(A,"MatDenseCUDARestoreArrayRead_C",(Mat,const PetscScalar**),(A,a));
2282   PetscFunctionReturn(0);
2283 }
2284 
2285 /*@C
2286    MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArray() when no longer needed.
2287 
2288    Not Collective
2289 
2290    Input Parameters:
2291 .  A - the matrix
2292 
2293    Output Parameters
2294 .  array - the GPU array in column major order
2295 
2296    Notes:
2297    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). For read-only access, use MatDenseCUDAGetArrayRead().
2298 
2299    Level: developer
2300 
2301 .seealso: `MatDenseCUDAGetArrayRead()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`
2302 @*/
2303 PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a)
2304 {
2305   PetscFunctionBegin;
2306   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2307   PetscUseMethod(A,"MatDenseCUDAGetArray_C",(Mat,PetscScalar**),(A,a));
2308   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2309   PetscFunctionReturn(0);
2310 }
2311 
2312 /*@C
2313    MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArray().
2314 
2315    Not Collective
2316 
2317    Input Parameters:
2318 +  A - the matrix
2319 -  array - the GPU array in column major order
2320 
2321    Notes:
2322 
2323    Level: developer
2324 
2325 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()`
2326 @*/
2327 PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a)
2328 {
2329   PetscFunctionBegin;
2330   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2331   PetscUseMethod(A,"MatDenseCUDARestoreArray_C",(Mat,PetscScalar**),(A,a));
2332   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2333   A->offloadmask = PETSC_OFFLOAD_GPU;
2334   PetscFunctionReturn(0);
2335 }
2336 #endif
2337 
2338 /*@C
2339    MatCreateDense - Creates a matrix in dense format.
2340 
2341    Collective
2342 
2343    Input Parameters:
2344 +  comm - MPI communicator
2345 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2346 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2347 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
2348 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
2349 -  data - optional location of matrix data.  Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
2350    to control all matrix memory allocation.
2351 
2352    Output Parameter:
2353 .  A - the matrix
2354 
2355    Notes:
2356    The dense format is fully compatible with standard Fortran 77
2357    storage by columns.
2358    Note that, although local portions of the matrix are stored in column-major
2359    order, the matrix is partitioned across MPI ranks by row.
2360 
2361    The data input variable is intended primarily for Fortran programmers
2362    who wish to allocate their own matrix memory space.  Most users should
2363    set data=NULL (PETSC_NULL_SCALAR for Fortran users).
2364 
2365    The user MUST specify either the local or global matrix dimensions
2366    (possibly both).
2367 
2368    Level: intermediate
2369 
2370 .seealso: `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
2371 @*/
2372 PetscErrorCode  MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
2373 {
2374   PetscFunctionBegin;
2375   PetscCall(MatCreate(comm,A));
2376   PetscCall(MatSetSizes(*A,m,n,M,N));
2377   PetscCall(MatSetType(*A,MATDENSE));
2378   PetscCall(MatSeqDenseSetPreallocation(*A,data));
2379   PetscCall(MatMPIDenseSetPreallocation(*A,data));
2380   PetscFunctionReturn(0);
2381 }
2382 
2383 #if defined(PETSC_HAVE_CUDA)
2384 /*@C
2385    MatCreateDenseCUDA - Creates a matrix in dense format using CUDA.
2386 
2387    Collective
2388 
2389    Input Parameters:
2390 +  comm - MPI communicator
2391 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2392 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2393 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
2394 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
2395 -  data - optional location of GPU matrix data.  Set data=NULL for PETSc
2396    to control matrix memory allocation.
2397 
2398    Output Parameter:
2399 .  A - the matrix
2400 
2401    Notes:
2402 
2403    Level: intermediate
2404 
2405 .seealso: `MatCreate()`, `MatCreateDense()`
2406 @*/
2407 PetscErrorCode  MatCreateDenseCUDA(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
2408 {
2409   PetscFunctionBegin;
2410   PetscCall(MatCreate(comm,A));
2411   PetscValidLogicalCollectiveBool(*A,!!data,6);
2412   PetscCall(MatSetSizes(*A,m,n,M,N));
2413   PetscCall(MatSetType(*A,MATDENSECUDA));
2414   PetscCall(MatSeqDenseCUDASetPreallocation(*A,data));
2415   PetscCall(MatMPIDenseCUDASetPreallocation(*A,data));
2416   PetscFunctionReturn(0);
2417 }
2418 #endif
2419 
2420 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
2421 {
2422   Mat            mat;
2423   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
2424 
2425   PetscFunctionBegin;
2426   *newmat = NULL;
2427   PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&mat));
2428   PetscCall(MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N));
2429   PetscCall(MatSetType(mat,((PetscObject)A)->type_name));
2430   a       = (Mat_MPIDense*)mat->data;
2431 
2432   mat->factortype   = A->factortype;
2433   mat->assembled    = PETSC_TRUE;
2434   mat->preallocated = PETSC_TRUE;
2435 
2436   mat->insertmode = NOT_SET_VALUES;
2437   a->donotstash   = oldmat->donotstash;
2438 
2439   PetscCall(PetscLayoutReference(A->rmap,&mat->rmap));
2440   PetscCall(PetscLayoutReference(A->cmap,&mat->cmap));
2441 
2442   PetscCall(MatDuplicate(oldmat->A,cpvalues,&a->A));
2443   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A));
2444 
2445   *newmat = mat;
2446   PetscFunctionReturn(0);
2447 }
2448 
2449 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
2450 {
2451   PetscBool      isbinary;
2452 #if defined(PETSC_HAVE_HDF5)
2453   PetscBool      ishdf5;
2454 #endif
2455 
2456   PetscFunctionBegin;
2457   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
2458   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
2459   /* force binary viewer to load .info file if it has not yet done so */
2460   PetscCall(PetscViewerSetUp(viewer));
2461   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
2462 #if defined(PETSC_HAVE_HDF5)
2463   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5));
2464 #endif
2465   if (isbinary) {
2466     PetscCall(MatLoad_Dense_Binary(newMat,viewer));
2467 #if defined(PETSC_HAVE_HDF5)
2468   } else if (ishdf5) {
2469     PetscCall(MatLoad_Dense_HDF5(newMat,viewer));
2470 #endif
2471   } else SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
2472   PetscFunctionReturn(0);
2473 }
2474 
2475 static PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag)
2476 {
2477   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
2478   Mat            a,b;
2479 
2480   PetscFunctionBegin;
2481   a    = matA->A;
2482   b    = matB->A;
2483   PetscCall(MatEqual(a,b,flag));
2484   PetscCall(MPIU_Allreduce(MPI_IN_PLACE,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A)));
2485   PetscFunctionReturn(0);
2486 }
2487 
2488 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data)
2489 {
2490   Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data;
2491 
2492   PetscFunctionBegin;
2493   PetscCall(PetscFree2(atb->sendbuf,atb->recvcounts));
2494   PetscCall(MatDestroy(&atb->atb));
2495   PetscCall(PetscFree(atb));
2496   PetscFunctionReturn(0);
2497 }
2498 
2499 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data)
2500 {
2501   Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data;
2502 
2503   PetscFunctionBegin;
2504   PetscCall(PetscFree2(abt->buf[0],abt->buf[1]));
2505   PetscCall(PetscFree2(abt->recvcounts,abt->recvdispls));
2506   PetscCall(PetscFree(abt));
2507   PetscFunctionReturn(0);
2508 }
2509 
2510 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2511 {
2512   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2513   Mat_TransMatMultDense *atb;
2514   MPI_Comm              comm;
2515   PetscMPIInt           size,*recvcounts;
2516   PetscScalar           *carray,*sendbuf;
2517   const PetscScalar     *atbarray;
2518   PetscInt              i,cN=C->cmap->N,proc,k,j,lda;
2519   const PetscInt        *ranges;
2520 
2521   PetscFunctionBegin;
2522   MatCheckProduct(C,3);
2523   PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2524   atb = (Mat_TransMatMultDense *)C->product->data;
2525   recvcounts = atb->recvcounts;
2526   sendbuf = atb->sendbuf;
2527 
2528   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
2529   PetscCallMPI(MPI_Comm_size(comm,&size));
2530 
2531   /* compute atbarray = aseq^T * bseq */
2532   PetscCall(MatTransposeMatMult(a->A,b->A,atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX,PETSC_DEFAULT,&atb->atb));
2533 
2534   PetscCall(MatGetOwnershipRanges(C,&ranges));
2535 
2536   /* arrange atbarray into sendbuf */
2537   PetscCall(MatDenseGetArrayRead(atb->atb,&atbarray));
2538   PetscCall(MatDenseGetLDA(atb->atb,&lda));
2539   for (proc=0, k=0; proc<size; proc++) {
2540     for (j=0; j<cN; j++) {
2541       for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*lda];
2542     }
2543   }
2544   PetscCall(MatDenseRestoreArrayRead(atb->atb,&atbarray));
2545 
2546   /* sum all atbarray to local values of C */
2547   PetscCall(MatDenseGetArrayWrite(c->A,&carray));
2548   PetscCallMPI(MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm));
2549   PetscCall(MatDenseRestoreArrayWrite(c->A,&carray));
2550   PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY));
2551   PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY));
2552   PetscFunctionReturn(0);
2553 }
2554 
2555 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)
2556 {
2557   MPI_Comm              comm;
2558   PetscMPIInt           size;
2559   PetscInt              cm=A->cmap->n,cM,cN=B->cmap->N;
2560   Mat_TransMatMultDense *atb;
2561   PetscBool             cisdense;
2562   PetscInt              i;
2563   const PetscInt        *ranges;
2564 
2565   PetscFunctionBegin;
2566   MatCheckProduct(C,3);
2567   PetscCheck(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2568   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
2569   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
2570     SETERRQ(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2571   }
2572 
2573   /* create matrix product C */
2574   PetscCall(MatSetSizes(C,cm,B->cmap->n,A->cmap->N,B->cmap->N));
2575   PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,""));
2576   if (!cisdense) {
2577     PetscCall(MatSetType(C,((PetscObject)A)->type_name));
2578   }
2579   PetscCall(MatSetUp(C));
2580 
2581   /* create data structure for reuse C */
2582   PetscCallMPI(MPI_Comm_size(comm,&size));
2583   PetscCall(PetscNew(&atb));
2584   cM   = C->rmap->N;
2585   PetscCall(PetscMalloc2(cM*cN,&atb->sendbuf,size,&atb->recvcounts));
2586   PetscCall(MatGetOwnershipRanges(C,&ranges));
2587   for (i=0; i<size; i++) atb->recvcounts[i] = (ranges[i+1] - ranges[i])*cN;
2588 
2589   C->product->data    = atb;
2590   C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
2591   PetscFunctionReturn(0);
2592 }
2593 
2594 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2595 {
2596   MPI_Comm              comm;
2597   PetscMPIInt           i, size;
2598   PetscInt              maxRows, bufsiz;
2599   PetscMPIInt           tag;
2600   PetscInt              alg;
2601   Mat_MatTransMultDense *abt;
2602   Mat_Product           *product = C->product;
2603   PetscBool             flg;
2604 
2605   PetscFunctionBegin;
2606   MatCheckProduct(C,4);
2607   PetscCheck(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2608   /* check local size of A and B */
2609   PetscCheck(A->cmap->n == B->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")",A->cmap->n,B->cmap->n);
2610 
2611   PetscCall(PetscStrcmp(product->alg,"allgatherv",&flg));
2612   alg  = flg ? 0 : 1;
2613 
2614   /* setup matrix product C */
2615   PetscCall(MatSetSizes(C,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N));
2616   PetscCall(MatSetType(C,MATMPIDENSE));
2617   PetscCall(MatSetUp(C));
2618   PetscCall(PetscObjectGetNewTag((PetscObject)C,&tag));
2619 
2620   /* create data structure for reuse C */
2621   PetscCall(PetscObjectGetComm((PetscObject)C,&comm));
2622   PetscCallMPI(MPI_Comm_size(comm,&size));
2623   PetscCall(PetscNew(&abt));
2624   abt->tag = tag;
2625   abt->alg = alg;
2626   switch (alg) {
2627   case 1: /* alg: "cyclic" */
2628     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i]));
2629     bufsiz = A->cmap->N * maxRows;
2630     PetscCall(PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1])));
2631     break;
2632   default: /* alg: "allgatherv" */
2633     PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1])));
2634     PetscCall(PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls)));
2635     for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N;
2636     for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i];
2637     break;
2638   }
2639 
2640   C->product->data    = abt;
2641   C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
2642   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense;
2643   PetscFunctionReturn(0);
2644 }
2645 
2646 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
2647 {
2648   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2649   Mat_MatTransMultDense *abt;
2650   MPI_Comm              comm;
2651   PetscMPIInt           rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom;
2652   PetscScalar           *sendbuf, *recvbuf=NULL, *cv;
2653   PetscInt              i,cK=A->cmap->N,k,j,bn;
2654   PetscScalar           _DOne=1.0,_DZero=0.0;
2655   const PetscScalar     *av,*bv;
2656   PetscBLASInt          cm, cn, ck, alda, blda = 0, clda;
2657   MPI_Request           reqs[2];
2658   const PetscInt        *ranges;
2659 
2660   PetscFunctionBegin;
2661   MatCheckProduct(C,3);
2662   PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2663   abt  = (Mat_MatTransMultDense*)C->product->data;
2664   PetscCall(PetscObjectGetComm((PetscObject)C,&comm));
2665   PetscCallMPI(MPI_Comm_rank(comm,&rank));
2666   PetscCallMPI(MPI_Comm_size(comm,&size));
2667   PetscCall(MatDenseGetArrayRead(a->A,&av));
2668   PetscCall(MatDenseGetArrayRead(b->A,&bv));
2669   PetscCall(MatDenseGetArrayWrite(c->A,&cv));
2670   PetscCall(MatDenseGetLDA(a->A,&i));
2671   PetscCall(PetscBLASIntCast(i,&alda));
2672   PetscCall(MatDenseGetLDA(b->A,&i));
2673   PetscCall(PetscBLASIntCast(i,&blda));
2674   PetscCall(MatDenseGetLDA(c->A,&i));
2675   PetscCall(PetscBLASIntCast(i,&clda));
2676   PetscCall(MatGetOwnershipRanges(B,&ranges));
2677   bn   = B->rmap->n;
2678   if (blda == bn) {
2679     sendbuf = (PetscScalar*)bv;
2680   } else {
2681     sendbuf = abt->buf[0];
2682     for (k = 0, i = 0; i < cK; i++) {
2683       for (j = 0; j < bn; j++, k++) {
2684         sendbuf[k] = bv[i * blda + j];
2685       }
2686     }
2687   }
2688   if (size > 1) {
2689     sendto = (rank + size - 1) % size;
2690     recvfrom = (rank + size + 1) % size;
2691   } else {
2692     sendto = recvfrom = 0;
2693   }
2694   PetscCall(PetscBLASIntCast(cK,&ck));
2695   PetscCall(PetscBLASIntCast(c->A->rmap->n,&cm));
2696   recvisfrom = rank;
2697   for (i = 0; i < size; i++) {
2698     /* we have finished receiving in sending, bufs can be read/modified */
2699     PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2700     PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
2701 
2702     if (nextrecvisfrom != rank) {
2703       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2704       sendsiz = cK * bn;
2705       recvsiz = cK * nextbn;
2706       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
2707       PetscCallMPI(MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]));
2708       PetscCallMPI(MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]));
2709     }
2710 
2711     /* local aseq * sendbuf^T */
2712     PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn));
2713     if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,av,&alda,sendbuf,&cn,&_DZero,cv + clda * ranges[recvisfrom],&clda));
2714 
2715     if (nextrecvisfrom != rank) {
2716       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
2717       PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE));
2718     }
2719     bn = nextbn;
2720     recvisfrom = nextrecvisfrom;
2721     sendbuf = recvbuf;
2722   }
2723   PetscCall(MatDenseRestoreArrayRead(a->A,&av));
2724   PetscCall(MatDenseRestoreArrayRead(b->A,&bv));
2725   PetscCall(MatDenseRestoreArrayWrite(c->A,&cv));
2726   PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY));
2727   PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY));
2728   PetscFunctionReturn(0);
2729 }
2730 
2731 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
2732 {
2733   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2734   Mat_MatTransMultDense *abt;
2735   MPI_Comm              comm;
2736   PetscMPIInt           size;
2737   PetscScalar           *cv, *sendbuf, *recvbuf;
2738   const PetscScalar     *av,*bv;
2739   PetscInt              blda,i,cK=A->cmap->N,k,j,bn;
2740   PetscScalar           _DOne=1.0,_DZero=0.0;
2741   PetscBLASInt          cm, cn, ck, alda, clda;
2742 
2743   PetscFunctionBegin;
2744   MatCheckProduct(C,3);
2745   PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2746   abt  = (Mat_MatTransMultDense*)C->product->data;
2747   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
2748   PetscCallMPI(MPI_Comm_size(comm,&size));
2749   PetscCall(MatDenseGetArrayRead(a->A,&av));
2750   PetscCall(MatDenseGetArrayRead(b->A,&bv));
2751   PetscCall(MatDenseGetArrayWrite(c->A,&cv));
2752   PetscCall(MatDenseGetLDA(a->A,&i));
2753   PetscCall(PetscBLASIntCast(i,&alda));
2754   PetscCall(MatDenseGetLDA(b->A,&blda));
2755   PetscCall(MatDenseGetLDA(c->A,&i));
2756   PetscCall(PetscBLASIntCast(i,&clda));
2757   /* copy transpose of B into buf[0] */
2758   bn      = B->rmap->n;
2759   sendbuf = abt->buf[0];
2760   recvbuf = abt->buf[1];
2761   for (k = 0, j = 0; j < bn; j++) {
2762     for (i = 0; i < cK; i++, k++) {
2763       sendbuf[k] = bv[i * blda + j];
2764     }
2765   }
2766   PetscCall(MatDenseRestoreArrayRead(b->A,&bv));
2767   PetscCallMPI(MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm));
2768   PetscCall(PetscBLASIntCast(cK,&ck));
2769   PetscCall(PetscBLASIntCast(c->A->rmap->n,&cm));
2770   PetscCall(PetscBLASIntCast(c->A->cmap->n,&cn));
2771   if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,av,&alda,recvbuf,&ck,&_DZero,cv,&clda));
2772   PetscCall(MatDenseRestoreArrayRead(a->A,&av));
2773   PetscCall(MatDenseRestoreArrayRead(b->A,&bv));
2774   PetscCall(MatDenseRestoreArrayWrite(c->A,&cv));
2775   PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY));
2776   PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY));
2777   PetscFunctionReturn(0);
2778 }
2779 
2780 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2781 {
2782   Mat_MatTransMultDense *abt;
2783 
2784   PetscFunctionBegin;
2785   MatCheckProduct(C,3);
2786   PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2787   abt = (Mat_MatTransMultDense*)C->product->data;
2788   switch (abt->alg) {
2789   case 1:
2790     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C));
2791     break;
2792   default:
2793     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C));
2794     break;
2795   }
2796   PetscFunctionReturn(0);
2797 }
2798 
2799 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data)
2800 {
2801   Mat_MatMultDense *ab = (Mat_MatMultDense*)data;
2802 
2803   PetscFunctionBegin;
2804   PetscCall(MatDestroy(&ab->Ce));
2805   PetscCall(MatDestroy(&ab->Ae));
2806   PetscCall(MatDestroy(&ab->Be));
2807   PetscCall(PetscFree(ab));
2808   PetscFunctionReturn(0);
2809 }
2810 
2811 #if defined(PETSC_HAVE_ELEMENTAL)
2812 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2813 {
2814   Mat_MatMultDense *ab;
2815 
2816   PetscFunctionBegin;
2817   MatCheckProduct(C,3);
2818   PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Missing product data");
2819   ab   = (Mat_MatMultDense*)C->product->data;
2820   PetscCall(MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae));
2821   PetscCall(MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be));
2822   PetscCall(MatMatMultNumeric_Elemental(ab->Ae,ab->Be,ab->Ce));
2823   PetscCall(MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C));
2824   PetscFunctionReturn(0);
2825 }
2826 
2827 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)
2828 {
2829   Mat              Ae,Be,Ce;
2830   Mat_MatMultDense *ab;
2831 
2832   PetscFunctionBegin;
2833   MatCheckProduct(C,4);
2834   PetscCheck(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2835   /* check local size of A and B */
2836   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
2837     SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2838   }
2839 
2840   /* create elemental matrices Ae and Be */
2841   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Ae));
2842   PetscCall(MatSetSizes(Ae,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N));
2843   PetscCall(MatSetType(Ae,MATELEMENTAL));
2844   PetscCall(MatSetUp(Ae));
2845   PetscCall(MatSetOption(Ae,MAT_ROW_ORIENTED,PETSC_FALSE));
2846 
2847   PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &Be));
2848   PetscCall(MatSetSizes(Be,PETSC_DECIDE,PETSC_DECIDE,B->rmap->N,B->cmap->N));
2849   PetscCall(MatSetType(Be,MATELEMENTAL));
2850   PetscCall(MatSetUp(Be));
2851   PetscCall(MatSetOption(Be,MAT_ROW_ORIENTED,PETSC_FALSE));
2852 
2853   /* compute symbolic Ce = Ae*Be */
2854   PetscCall(MatCreate(PetscObjectComm((PetscObject)C),&Ce));
2855   PetscCall(MatMatMultSymbolic_Elemental(Ae,Be,fill,Ce));
2856 
2857   /* setup C */
2858   PetscCall(MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE));
2859   PetscCall(MatSetType(C,MATDENSE));
2860   PetscCall(MatSetUp(C));
2861 
2862   /* create data structure for reuse Cdense */
2863   PetscCall(PetscNew(&ab));
2864   ab->Ae = Ae;
2865   ab->Be = Be;
2866   ab->Ce = Ce;
2867 
2868   C->product->data    = ab;
2869   C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense;
2870   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
2871   PetscFunctionReturn(0);
2872 }
2873 #endif
2874 /* ----------------------------------------------- */
2875 #if defined(PETSC_HAVE_ELEMENTAL)
2876 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C)
2877 {
2878   PetscFunctionBegin;
2879   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense;
2880   C->ops->productsymbolic = MatProductSymbolic_AB;
2881   PetscFunctionReturn(0);
2882 }
2883 #endif
2884 
2885 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C)
2886 {
2887   Mat_Product *product = C->product;
2888   Mat         A = product->A,B=product->B;
2889 
2890   PetscFunctionBegin;
2891   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend)
2892     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2893   C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense;
2894   C->ops->productsymbolic = MatProductSymbolic_AtB;
2895   PetscFunctionReturn(0);
2896 }
2897 
2898 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C)
2899 {
2900   Mat_Product    *product = C->product;
2901   const char     *algTypes[2] = {"allgatherv","cyclic"};
2902   PetscInt       alg,nalg = 2;
2903   PetscBool      flg = PETSC_FALSE;
2904 
2905   PetscFunctionBegin;
2906   /* Set default algorithm */
2907   alg = 0; /* default is allgatherv */
2908   PetscCall(PetscStrcmp(product->alg,"default",&flg));
2909   if (flg) {
2910     PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]));
2911   }
2912 
2913   /* Get runtime option */
2914   if (product->api_user) {
2915     PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatTransposeMult","Mat");
2916     PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[alg],&alg,&flg));
2917     PetscOptionsEnd();
2918   } else {
2919     PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABt","Mat");
2920     PetscCall(PetscOptionsEList("-mat_product_algorithm","Algorithmic approach","MatProduct_ABt",algTypes,nalg,algTypes[alg],&alg,&flg));
2921     PetscOptionsEnd();
2922   }
2923   if (flg) {
2924     PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]));
2925   }
2926 
2927   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense;
2928   C->ops->productsymbolic          = MatProductSymbolic_ABt;
2929   PetscFunctionReturn(0);
2930 }
2931 
2932 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C)
2933 {
2934   Mat_Product    *product = C->product;
2935 
2936   PetscFunctionBegin;
2937   switch (product->type) {
2938 #if defined(PETSC_HAVE_ELEMENTAL)
2939   case MATPRODUCT_AB:
2940     PetscCall(MatProductSetFromOptions_MPIDense_AB(C));
2941     break;
2942 #endif
2943   case MATPRODUCT_AtB:
2944     PetscCall(MatProductSetFromOptions_MPIDense_AtB(C));
2945     break;
2946   case MATPRODUCT_ABt:
2947     PetscCall(MatProductSetFromOptions_MPIDense_ABt(C));
2948     break;
2949   default:
2950     break;
2951   }
2952   PetscFunctionReturn(0);
2953 }
2954