xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 2291f7e77f703bb8b1bf66dd3675e73030a0050f)
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4 */
5 
6 #include <../src/mat/impls/dense/mpi/mpidense.h>    /*I   "petscmat.h"  I*/
7 #include <../src/mat/impls/aij/mpi/mpiaij.h>
8 #include <petscblaslapack.h>
9 
10 /*@
11 
12       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
13               matrix that represents the operator. For sequential matrices it returns itself.
14 
15     Input Parameter:
16 .      A - the Seq or MPI dense matrix
17 
18     Output Parameter:
19 .      B - the inner matrix
20 
21     Level: intermediate
22 
23 @*/
24 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
25 {
26   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
27   PetscBool      flg;
28 
29   PetscFunctionBegin;
30   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
31   PetscValidPointer(B,2);
32   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A,MATMPIDENSE,&flg));
33   if (flg) *B = mat->A;
34   else {
35     PetscCall(PetscObjectBaseTypeCompare((PetscObject)A,MATSEQDENSE,&flg));
36     PetscCheck(flg,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Not for matrix type %s",((PetscObject)A)->type_name);
37     *B = A;
38   }
39   PetscFunctionReturn(0);
40 }
41 
42 PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s)
43 {
44   Mat_MPIDense   *Amat = (Mat_MPIDense*)A->data;
45   Mat_MPIDense   *Bmat = (Mat_MPIDense*)B->data;
46 
47   PetscFunctionBegin;
48   PetscCall(MatCopy(Amat->A,Bmat->A,s));
49   PetscFunctionReturn(0);
50 }
51 
52 PetscErrorCode MatShift_MPIDense(Mat A,PetscScalar alpha)
53 {
54   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
55   PetscInt       j,lda,rstart = A->rmap->rstart,rend = A->rmap->rend,rend2;
56   PetscScalar    *v;
57 
58   PetscFunctionBegin;
59   PetscCall(MatDenseGetArray(mat->A,&v));
60   PetscCall(MatDenseGetLDA(mat->A,&lda));
61   rend2 = PetscMin(rend,A->cmap->N);
62   if (rend2>rstart) {
63     for (j=rstart; j<rend2; j++) v[j-rstart+j*lda] += alpha;
64     PetscCall(PetscLogFlops(rend2-rstart));
65   }
66   PetscCall(MatDenseRestoreArray(mat->A,&v));
67   PetscFunctionReturn(0);
68 }
69 
70 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
71 {
72   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
73   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
74 
75   PetscFunctionBegin;
76   PetscCheck(row >= rstart && row < rend,PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
77   lrow = row - rstart;
78   PetscCall(MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v));
79   PetscFunctionReturn(0);
80 }
81 
82 PetscErrorCode MatRestoreRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
83 {
84   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
85   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
86 
87   PetscFunctionBegin;
88   PetscCheck(row >= rstart && row < rend,PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
89   lrow = row - rstart;
90   PetscCall(MatRestoreRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v));
91   PetscFunctionReturn(0);
92 }
93 
94 PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
95 {
96   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
97   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
98   PetscScalar    *array;
99   MPI_Comm       comm;
100   PetscBool      flg;
101   Mat            B;
102 
103   PetscFunctionBegin;
104   PetscCall(MatHasCongruentLayouts(A,&flg));
105   PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
106   PetscCall(PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B));
107   if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */
108 
109     PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A,MATSEQDENSECUDA,&flg));
110     PetscCheck(!flg,PETSC_COMM_SELF,PETSC_ERR_SUP,"Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature",MATSEQDENSECUDA);
111     PetscCall(PetscObjectGetComm((PetscObject)(mdn->A),&comm));
112     PetscCall(MatCreate(comm,&B));
113     PetscCall(MatSetSizes(B,m,m,m,m));
114     PetscCall(MatSetType(B,((PetscObject)mdn->A)->type_name));
115     PetscCall(MatDenseGetArrayRead(mdn->A,(const PetscScalar**)&array));
116     PetscCall(MatSeqDenseSetPreallocation(B,array+m*rstart));
117     PetscCall(MatDenseRestoreArrayRead(mdn->A,(const PetscScalar**)&array));
118     PetscCall(PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B));
119     *a   = B;
120     PetscCall(MatDestroy(&B));
121   } else *a = B;
122   PetscFunctionReturn(0);
123 }
124 
125 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
126 {
127   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
128   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
129   PetscBool      roworiented = A->roworiented;
130 
131   PetscFunctionBegin;
132   for (i=0; i<m; i++) {
133     if (idxm[i] < 0) continue;
134     PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
135     if (idxm[i] >= rstart && idxm[i] < rend) {
136       row = idxm[i] - rstart;
137       if (roworiented) {
138         PetscCall(MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv));
139       } else {
140         for (j=0; j<n; j++) {
141           if (idxn[j] < 0) continue;
142           PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
143           PetscCall(MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv));
144         }
145       }
146     } else if (!A->donotstash) {
147       mat->assembled = PETSC_FALSE;
148       if (roworiented) {
149         PetscCall(MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE));
150       } else {
151         PetscCall(MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE));
152       }
153     }
154   }
155   PetscFunctionReturn(0);
156 }
157 
158 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
159 {
160   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
161   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
162 
163   PetscFunctionBegin;
164   for (i=0; i<m; i++) {
165     if (idxm[i] < 0) continue; /* negative row */
166     PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
167     if (idxm[i] >= rstart && idxm[i] < rend) {
168       row = idxm[i] - rstart;
169       for (j=0; j<n; j++) {
170         if (idxn[j] < 0) continue; /* negative column */
171         PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
172         PetscCall(MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j));
173       }
174     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
175   }
176   PetscFunctionReturn(0);
177 }
178 
179 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda)
180 {
181   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
182 
183   PetscFunctionBegin;
184   PetscCall(MatDenseGetLDA(a->A,lda));
185   PetscFunctionReturn(0);
186 }
187 
188 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A,PetscInt lda)
189 {
190   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
191   PetscBool      iscuda;
192 
193   PetscFunctionBegin;
194   if (!a->A) {
195     PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
196     PetscCall(PetscLayoutSetUp(A->rmap));
197     PetscCall(PetscLayoutSetUp(A->cmap));
198     PetscCall(MatCreate(PETSC_COMM_SELF,&a->A));
199     PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->A));
200     PetscCall(MatSetSizes(a->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N));
201     PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda));
202     PetscCall(MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE));
203   }
204   PetscCall(MatDenseSetLDA(a->A,lda));
205   PetscFunctionReturn(0);
206 }
207 
208 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar **array)
209 {
210   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
211 
212   PetscFunctionBegin;
213   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
214   PetscCall(MatDenseGetArray(a->A,array));
215   PetscFunctionReturn(0);
216 }
217 
218 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar **array)
219 {
220   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
221 
222   PetscFunctionBegin;
223   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
224   PetscCall(MatDenseGetArrayRead(a->A,array));
225   PetscFunctionReturn(0);
226 }
227 
228 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A,PetscScalar **array)
229 {
230   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
231 
232   PetscFunctionBegin;
233   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
234   PetscCall(MatDenseGetArrayWrite(a->A,array));
235   PetscFunctionReturn(0);
236 }
237 
238 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar *array)
239 {
240   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
241 
242   PetscFunctionBegin;
243   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
244   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
245   PetscCall(MatDensePlaceArray(a->A,array));
246   PetscFunctionReturn(0);
247 }
248 
249 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
250 {
251   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
252 
253   PetscFunctionBegin;
254   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
255   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
256   PetscCall(MatDenseResetArray(a->A));
257   PetscFunctionReturn(0);
258 }
259 
260 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A,const PetscScalar *array)
261 {
262   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
263 
264   PetscFunctionBegin;
265   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
266   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
267   PetscCall(MatDenseReplaceArray(a->A,array));
268   PetscFunctionReturn(0);
269 }
270 
271 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
272 {
273   Mat_MPIDense      *mat  = (Mat_MPIDense*)A->data,*newmatd;
274   PetscInt          lda,i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
275   const PetscInt    *irow,*icol;
276   const PetscScalar *v;
277   PetscScalar       *bv;
278   Mat               newmat;
279   IS                iscol_local;
280   MPI_Comm          comm_is,comm_mat;
281 
282   PetscFunctionBegin;
283   PetscCall(PetscObjectGetComm((PetscObject)A,&comm_mat));
284   PetscCall(PetscObjectGetComm((PetscObject)iscol,&comm_is));
285   PetscCheck(comm_mat == comm_is,PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator");
286 
287   PetscCall(ISAllGather(iscol,&iscol_local));
288   PetscCall(ISGetIndices(isrow,&irow));
289   PetscCall(ISGetIndices(iscol_local,&icol));
290   PetscCall(ISGetLocalSize(isrow,&nrows));
291   PetscCall(ISGetLocalSize(iscol,&ncols));
292   PetscCall(ISGetSize(iscol,&Ncols)); /* global number of columns, size of iscol_local */
293 
294   /* No parallel redistribution currently supported! Should really check each index set
295      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
296      original matrix! */
297 
298   PetscCall(MatGetLocalSize(A,&nlrows,&nlcols));
299   PetscCall(MatGetOwnershipRange(A,&rstart,&rend));
300 
301   /* Check submatrix call */
302   if (scall == MAT_REUSE_MATRIX) {
303     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
304     /* Really need to test rows and column sizes! */
305     newmat = *B;
306   } else {
307     /* Create and fill new matrix */
308     PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&newmat));
309     PetscCall(MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols));
310     PetscCall(MatSetType(newmat,((PetscObject)A)->type_name));
311     PetscCall(MatMPIDenseSetPreallocation(newmat,NULL));
312   }
313 
314   /* Now extract the data pointers and do the copy, column at a time */
315   newmatd = (Mat_MPIDense*)newmat->data;
316   PetscCall(MatDenseGetArray(newmatd->A,&bv));
317   PetscCall(MatDenseGetArrayRead(mat->A,&v));
318   PetscCall(MatDenseGetLDA(mat->A,&lda));
319   for (i=0; i<Ncols; i++) {
320     const PetscScalar *av = v + lda*icol[i];
321     for (j=0; j<nrows; j++) {
322       *bv++ = av[irow[j] - rstart];
323     }
324   }
325   PetscCall(MatDenseRestoreArrayRead(mat->A,&v));
326   PetscCall(MatDenseRestoreArray(newmatd->A,&bv));
327 
328   /* Assemble the matrices so that the correct flags are set */
329   PetscCall(MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY));
330   PetscCall(MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY));
331 
332   /* Free work space */
333   PetscCall(ISRestoreIndices(isrow,&irow));
334   PetscCall(ISRestoreIndices(iscol_local,&icol));
335   PetscCall(ISDestroy(&iscol_local));
336   *B   = newmat;
337   PetscFunctionReturn(0);
338 }
339 
340 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar **array)
341 {
342   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
343 
344   PetscFunctionBegin;
345   PetscCall(MatDenseRestoreArray(a->A,array));
346   PetscFunctionReturn(0);
347 }
348 
349 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar **array)
350 {
351   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
352 
353   PetscFunctionBegin;
354   PetscCall(MatDenseRestoreArrayRead(a->A,array));
355   PetscFunctionReturn(0);
356 }
357 
358 PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A,PetscScalar **array)
359 {
360   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
361 
362   PetscFunctionBegin;
363   PetscCall(MatDenseRestoreArrayWrite(a->A,array));
364   PetscFunctionReturn(0);
365 }
366 
367 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
368 {
369   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
370   PetscInt       nstash,reallocs;
371 
372   PetscFunctionBegin;
373   if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
374 
375   PetscCall(MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range));
376   PetscCall(MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs));
377   PetscCall(PetscInfo(mdn->A,"Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs));
378   PetscFunctionReturn(0);
379 }
380 
381 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
382 {
383   Mat_MPIDense   *mdn=(Mat_MPIDense*)mat->data;
384   PetscInt       i,*row,*col,flg,j,rstart,ncols;
385   PetscMPIInt    n;
386   PetscScalar    *val;
387 
388   PetscFunctionBegin;
389   if (!mdn->donotstash && !mat->nooffprocentries) {
390     /*  wait on receives */
391     while (1) {
392       PetscCall(MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg));
393       if (!flg) break;
394 
395       for (i=0; i<n;) {
396         /* Now identify the consecutive vals belonging to the same row */
397         for (j=i,rstart=row[j]; j<n; j++) {
398           if (row[j] != rstart) break;
399         }
400         if (j < n) ncols = j-i;
401         else       ncols = n-i;
402         /* Now assemble all these values with a single function call */
403         PetscCall(MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode));
404         i    = j;
405       }
406     }
407     PetscCall(MatStashScatterEnd_Private(&mat->stash));
408   }
409 
410   PetscCall(MatAssemblyBegin(mdn->A,mode));
411   PetscCall(MatAssemblyEnd(mdn->A,mode));
412   PetscFunctionReturn(0);
413 }
414 
415 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
416 {
417   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
418 
419   PetscFunctionBegin;
420   PetscCall(MatZeroEntries(l->A));
421   PetscFunctionReturn(0);
422 }
423 
424 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt n,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
425 {
426   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
427   PetscInt          i,len,*lrows;
428 
429   PetscFunctionBegin;
430   /* get locally owned rows */
431   PetscCall(PetscLayoutMapLocal(A->rmap,n,rows,&len,&lrows,NULL));
432   /* fix right hand side if needed */
433   if (x && b) {
434     const PetscScalar *xx;
435     PetscScalar       *bb;
436 
437     PetscCall(VecGetArrayRead(x, &xx));
438     PetscCall(VecGetArrayWrite(b, &bb));
439     for (i=0;i<len;++i) bb[lrows[i]] = diag*xx[lrows[i]];
440     PetscCall(VecRestoreArrayRead(x, &xx));
441     PetscCall(VecRestoreArrayWrite(b, &bb));
442   }
443   PetscCall(MatZeroRows(l->A,len,lrows,0.0,NULL,NULL));
444   if (diag != 0.0) {
445     Vec d;
446 
447     PetscCall(MatCreateVecs(A,NULL,&d));
448     PetscCall(VecSet(d,diag));
449     PetscCall(MatDiagonalSet(A,d,INSERT_VALUES));
450     PetscCall(VecDestroy(&d));
451   }
452   PetscCall(PetscFree(lrows));
453   PetscFunctionReturn(0);
454 }
455 
456 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec);
457 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec);
458 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec);
459 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec);
460 
461 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
462 {
463   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
464   const PetscScalar *ax;
465   PetscScalar       *ay;
466   PetscMemType      axmtype,aymtype;
467 
468   PetscFunctionBegin;
469   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
470   PetscCall(VecGetArrayReadAndMemType(xx,&ax,&axmtype));
471   PetscCall(VecGetArrayAndMemType(mdn->lvec,&ay,&aymtype));
472   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPI_REPLACE));
473   PetscCall(PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay,MPI_REPLACE));
474   PetscCall(VecRestoreArrayAndMemType(mdn->lvec,&ay));
475   PetscCall(VecRestoreArrayReadAndMemType(xx,&ax));
476   PetscCall((*mdn->A->ops->mult)(mdn->A,mdn->lvec,yy));
477   PetscFunctionReturn(0);
478 }
479 
480 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
481 {
482   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
483   const PetscScalar *ax;
484   PetscScalar       *ay;
485   PetscMemType      axmtype,aymtype;
486 
487   PetscFunctionBegin;
488   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
489   PetscCall(VecGetArrayReadAndMemType(xx,&ax,&axmtype));
490   PetscCall(VecGetArrayAndMemType(mdn->lvec,&ay,&aymtype));
491   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPI_REPLACE));
492   PetscCall(PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay,MPI_REPLACE));
493   PetscCall(VecRestoreArrayAndMemType(mdn->lvec,&ay));
494   PetscCall(VecRestoreArrayReadAndMemType(xx,&ax));
495   PetscCall((*mdn->A->ops->multadd)(mdn->A,mdn->lvec,yy,zz));
496   PetscFunctionReturn(0);
497 }
498 
499 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
500 {
501   Mat_MPIDense      *a = (Mat_MPIDense*)A->data;
502   const PetscScalar *ax;
503   PetscScalar       *ay;
504   PetscMemType      axmtype,aymtype;
505 
506   PetscFunctionBegin;
507   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
508   PetscCall(VecSet(yy,0.0));
509   PetscCall((*a->A->ops->multtranspose)(a->A,xx,a->lvec));
510   PetscCall(VecGetArrayReadAndMemType(a->lvec,&ax,&axmtype));
511   PetscCall(VecGetArrayAndMemType(yy,&ay,&aymtype));
512   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPIU_SUM));
513   PetscCall(PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM));
514   PetscCall(VecRestoreArrayReadAndMemType(a->lvec,&ax));
515   PetscCall(VecRestoreArrayAndMemType(yy,&ay));
516   PetscFunctionReturn(0);
517 }
518 
519 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
520 {
521   Mat_MPIDense      *a = (Mat_MPIDense*)A->data;
522   const PetscScalar *ax;
523   PetscScalar       *ay;
524   PetscMemType      axmtype,aymtype;
525 
526   PetscFunctionBegin;
527   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
528   PetscCall(VecCopy(yy,zz));
529   PetscCall((*a->A->ops->multtranspose)(a->A,xx,a->lvec));
530   PetscCall(VecGetArrayReadAndMemType(a->lvec,&ax,&axmtype));
531   PetscCall(VecGetArrayAndMemType(zz,&ay,&aymtype));
532   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPIU_SUM));
533   PetscCall(PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM));
534   PetscCall(VecRestoreArrayReadAndMemType(a->lvec,&ax));
535   PetscCall(VecRestoreArrayAndMemType(zz,&ay));
536   PetscFunctionReturn(0);
537 }
538 
539 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
540 {
541   Mat_MPIDense      *a    = (Mat_MPIDense*)A->data;
542   PetscInt          lda,len,i,n,m = A->rmap->n,radd;
543   PetscScalar       *x,zero = 0.0;
544   const PetscScalar *av;
545 
546   PetscFunctionBegin;
547   PetscCall(VecSet(v,zero));
548   PetscCall(VecGetArray(v,&x));
549   PetscCall(VecGetSize(v,&n));
550   PetscCheck(n == A->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
551   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
552   radd = A->rmap->rstart*m;
553   PetscCall(MatDenseGetArrayRead(a->A,&av));
554   PetscCall(MatDenseGetLDA(a->A,&lda));
555   for (i=0; i<len; i++) {
556     x[i] = av[radd + i*lda + i];
557   }
558   PetscCall(MatDenseRestoreArrayRead(a->A,&av));
559   PetscCall(VecRestoreArray(v,&x));
560   PetscFunctionReturn(0);
561 }
562 
563 PetscErrorCode MatDestroy_MPIDense(Mat mat)
564 {
565   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
566 
567   PetscFunctionBegin;
568 #if defined(PETSC_USE_LOG)
569   PetscLogObjectState((PetscObject)mat,"Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT,mat->rmap->N,mat->cmap->N);
570 #endif
571   PetscCall(MatStashDestroy_Private(&mat->stash));
572   PetscCheck(!mdn->vecinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
573   PetscCheck(!mdn->matinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
574   PetscCall(MatDestroy(&mdn->A));
575   PetscCall(VecDestroy(&mdn->lvec));
576   PetscCall(PetscSFDestroy(&mdn->Mvctx));
577   PetscCall(VecDestroy(&mdn->cvec));
578   PetscCall(MatDestroy(&mdn->cmat));
579 
580   PetscCall(PetscFree(mat->data));
581   PetscCall(PetscObjectChangeTypeName((PetscObject)mat,NULL));
582 
583   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL));
584   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",NULL));
585   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL));
586   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL));
587   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL));
588   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL));
589   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",NULL));
590   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",NULL));
591   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL));
592   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL));
593   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",NULL));
594   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",NULL));
595   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpiaij_C",NULL));
596 #if defined(PETSC_HAVE_ELEMENTAL)
597   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL));
598 #endif
599 #if defined(PETSC_HAVE_SCALAPACK)
600   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",NULL));
601 #endif
602   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL));
603   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",NULL));
604   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",NULL));
605 #if defined (PETSC_HAVE_CUDA)
606   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",NULL));
607   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",NULL));
608 #endif
609   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL));
610   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL));
611   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",NULL));
612   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",NULL));
613   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",NULL));
614   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",NULL));
615   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",NULL));
616   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",NULL));
617   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",NULL));
618   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",NULL));
619 
620   PetscCall(PetscObjectCompose((PetscObject)mat,"DiagonalBlock",NULL));
621   PetscFunctionReturn(0);
622 }
623 
624 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat,PetscViewer);
625 
626 #include <petscdraw.h>
627 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
628 {
629   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
630   PetscMPIInt       rank;
631   PetscViewerType   vtype;
632   PetscBool         iascii,isdraw;
633   PetscViewer       sviewer;
634   PetscViewerFormat format;
635 
636   PetscFunctionBegin;
637   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank));
638   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
639   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw));
640   if (iascii) {
641     PetscCall(PetscViewerGetType(viewer,&vtype));
642     PetscCall(PetscViewerGetFormat(viewer,&format));
643     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
644       MatInfo info;
645       PetscCall(MatGetInfo(mat,MAT_LOCAL,&info));
646       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
647       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory));
648       PetscCall(PetscViewerFlush(viewer));
649       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
650       if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx,viewer));
651       PetscFunctionReturn(0);
652     } else if (format == PETSC_VIEWER_ASCII_INFO) {
653       PetscFunctionReturn(0);
654     }
655   } else if (isdraw) {
656     PetscDraw draw;
657     PetscBool isnull;
658 
659     PetscCall(PetscViewerDrawGetDraw(viewer,0,&draw));
660     PetscCall(PetscDrawIsNull(draw,&isnull));
661     if (isnull) PetscFunctionReturn(0);
662   }
663 
664   {
665     /* assemble the entire matrix onto first processor. */
666     Mat         A;
667     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
668     PetscInt    *cols;
669     PetscScalar *vals;
670 
671     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat),&A));
672     if (rank == 0) {
673       PetscCall(MatSetSizes(A,M,N,M,N));
674     } else {
675       PetscCall(MatSetSizes(A,0,0,M,N));
676     }
677     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
678     PetscCall(MatSetType(A,MATMPIDENSE));
679     PetscCall(MatMPIDenseSetPreallocation(A,NULL));
680     PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)A));
681 
682     /* Copy the matrix ... This isn't the most efficient means,
683        but it's quick for now */
684     A->insertmode = INSERT_VALUES;
685 
686     row = mat->rmap->rstart;
687     m   = mdn->A->rmap->n;
688     for (i=0; i<m; i++) {
689       PetscCall(MatGetRow_MPIDense(mat,row,&nz,&cols,&vals));
690       PetscCall(MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES));
691       PetscCall(MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals));
692       row++;
693     }
694 
695     PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY));
696     PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY));
697     PetscCall(PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer));
698     if (rank == 0) {
699       PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name));
700       PetscCall(MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer));
701     }
702     PetscCall(PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer));
703     PetscCall(PetscViewerFlush(viewer));
704     PetscCall(MatDestroy(&A));
705   }
706   PetscFunctionReturn(0);
707 }
708 
709 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
710 {
711   PetscBool      iascii,isbinary,isdraw,issocket;
712 
713   PetscFunctionBegin;
714   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
715   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
716   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket));
717   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw));
718 
719   if (iascii || issocket || isdraw) {
720     PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat,viewer));
721   } else if (isbinary) {
722     PetscCall(MatView_Dense_Binary(mat,viewer));
723   }
724   PetscFunctionReturn(0);
725 }
726 
727 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
728 {
729   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
730   Mat            mdn  = mat->A;
731   PetscLogDouble isend[5],irecv[5];
732 
733   PetscFunctionBegin;
734   info->block_size = 1.0;
735 
736   PetscCall(MatGetInfo(mdn,MAT_LOCAL,info));
737 
738   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
739   isend[3] = info->memory;  isend[4] = info->mallocs;
740   if (flag == MAT_LOCAL) {
741     info->nz_used      = isend[0];
742     info->nz_allocated = isend[1];
743     info->nz_unneeded  = isend[2];
744     info->memory       = isend[3];
745     info->mallocs      = isend[4];
746   } else if (flag == MAT_GLOBAL_MAX) {
747     PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)A)));
748 
749     info->nz_used      = irecv[0];
750     info->nz_allocated = irecv[1];
751     info->nz_unneeded  = irecv[2];
752     info->memory       = irecv[3];
753     info->mallocs      = irecv[4];
754   } else if (flag == MAT_GLOBAL_SUM) {
755     PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)A)));
756 
757     info->nz_used      = irecv[0];
758     info->nz_allocated = irecv[1];
759     info->nz_unneeded  = irecv[2];
760     info->memory       = irecv[3];
761     info->mallocs      = irecv[4];
762   }
763   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
764   info->fill_ratio_needed = 0;
765   info->factor_mallocs    = 0;
766   PetscFunctionReturn(0);
767 }
768 
769 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
770 {
771   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
772 
773   PetscFunctionBegin;
774   switch (op) {
775   case MAT_NEW_NONZERO_LOCATIONS:
776   case MAT_NEW_NONZERO_LOCATION_ERR:
777   case MAT_NEW_NONZERO_ALLOCATION_ERR:
778     MatCheckPreallocated(A,1);
779     PetscCall(MatSetOption(a->A,op,flg));
780     break;
781   case MAT_ROW_ORIENTED:
782     MatCheckPreallocated(A,1);
783     a->roworiented = flg;
784     PetscCall(MatSetOption(a->A,op,flg));
785     break;
786   case MAT_FORCE_DIAGONAL_ENTRIES:
787   case MAT_KEEP_NONZERO_PATTERN:
788   case MAT_USE_HASH_TABLE:
789   case MAT_SORTED_FULL:
790     PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op]));
791     break;
792   case MAT_IGNORE_OFF_PROC_ENTRIES:
793     a->donotstash = flg;
794     break;
795   case MAT_SYMMETRIC:
796   case MAT_STRUCTURALLY_SYMMETRIC:
797   case MAT_HERMITIAN:
798   case MAT_SYMMETRY_ETERNAL:
799   case MAT_IGNORE_LOWER_TRIANGULAR:
800   case MAT_IGNORE_ZERO_ENTRIES:
801     PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op]));
802     break;
803   default:
804     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
805   }
806   PetscFunctionReturn(0);
807 }
808 
809 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
810 {
811   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
812   const PetscScalar *l;
813   PetscScalar       x,*v,*vv,*r;
814   PetscInt          i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n,lda;
815 
816   PetscFunctionBegin;
817   PetscCall(MatDenseGetArray(mdn->A,&vv));
818   PetscCall(MatDenseGetLDA(mdn->A,&lda));
819   PetscCall(MatGetLocalSize(A,&s2,&s3));
820   if (ll) {
821     PetscCall(VecGetLocalSize(ll,&s2a));
822     PetscCheck(s2a == s2,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2);
823     PetscCall(VecGetArrayRead(ll,&l));
824     for (i=0; i<m; i++) {
825       x = l[i];
826       v = vv + i;
827       for (j=0; j<n; j++) { (*v) *= x; v+= lda;}
828     }
829     PetscCall(VecRestoreArrayRead(ll,&l));
830     PetscCall(PetscLogFlops(1.0*n*m));
831   }
832   if (rr) {
833     const PetscScalar *ar;
834 
835     PetscCall(VecGetLocalSize(rr,&s3a));
836     PetscCheck(s3a == s3,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3);
837     PetscCall(VecGetArrayRead(rr,&ar));
838     if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
839     PetscCall(VecGetArray(mdn->lvec,&r));
840     PetscCall(PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ar,r,MPI_REPLACE));
841     PetscCall(PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ar,r,MPI_REPLACE));
842     PetscCall(VecRestoreArrayRead(rr,&ar));
843     for (i=0; i<n; i++) {
844       x = r[i];
845       v = vv + i*lda;
846       for (j=0; j<m; j++) (*v++) *= x;
847     }
848     PetscCall(VecRestoreArray(mdn->lvec,&r));
849     PetscCall(PetscLogFlops(1.0*n*m));
850   }
851   PetscCall(MatDenseRestoreArray(mdn->A,&vv));
852   PetscFunctionReturn(0);
853 }
854 
855 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
856 {
857   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
858   PetscInt          i,j;
859   PetscMPIInt       size;
860   PetscReal         sum = 0.0;
861   const PetscScalar *av,*v;
862 
863   PetscFunctionBegin;
864   PetscCall(MatDenseGetArrayRead(mdn->A,&av));
865   v    = av;
866   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
867   if (size == 1) {
868     PetscCall(MatNorm(mdn->A,type,nrm));
869   } else {
870     if (type == NORM_FROBENIUS) {
871       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
872         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
873       }
874       PetscCall(MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A)));
875       *nrm = PetscSqrtReal(*nrm);
876       PetscCall(PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n));
877     } else if (type == NORM_1) {
878       PetscReal *tmp,*tmp2;
879       PetscCall(PetscCalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2));
880       *nrm = 0.0;
881       v    = av;
882       for (j=0; j<mdn->A->cmap->n; j++) {
883         for (i=0; i<mdn->A->rmap->n; i++) {
884           tmp[j] += PetscAbsScalar(*v);  v++;
885         }
886       }
887       PetscCall(MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A)));
888       for (j=0; j<A->cmap->N; j++) {
889         if (tmp2[j] > *nrm) *nrm = tmp2[j];
890       }
891       PetscCall(PetscFree2(tmp,tmp2));
892       PetscCall(PetscLogFlops(A->cmap->n*A->rmap->n));
893     } else if (type == NORM_INFINITY) { /* max row norm */
894       PetscReal ntemp;
895       PetscCall(MatNorm(mdn->A,type,&ntemp));
896       PetscCall(MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A)));
897     } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
898   }
899   PetscCall(MatDenseRestoreArrayRead(mdn->A,&av));
900   PetscFunctionReturn(0);
901 }
902 
903 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
904 {
905   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
906   Mat            B;
907   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
908   PetscInt       j,i,lda;
909   PetscScalar    *v;
910 
911   PetscFunctionBegin;
912   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
913     PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B));
914     PetscCall(MatSetSizes(B,A->cmap->n,A->rmap->n,N,M));
915     PetscCall(MatSetType(B,((PetscObject)A)->type_name));
916     PetscCall(MatMPIDenseSetPreallocation(B,NULL));
917   } else B = *matout;
918 
919   m    = a->A->rmap->n; n = a->A->cmap->n;
920   PetscCall(MatDenseGetArrayRead(a->A,(const PetscScalar**)&v));
921   PetscCall(MatDenseGetLDA(a->A,&lda));
922   PetscCall(PetscMalloc1(m,&rwork));
923   for (i=0; i<m; i++) rwork[i] = rstart + i;
924   for (j=0; j<n; j++) {
925     PetscCall(MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES));
926     v   += lda;
927   }
928   PetscCall(MatDenseRestoreArrayRead(a->A,(const PetscScalar**)&v));
929   PetscCall(PetscFree(rwork));
930   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
931   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
932   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
933     *matout = B;
934   } else {
935     PetscCall(MatHeaderMerge(A,&B));
936   }
937   PetscFunctionReturn(0);
938 }
939 
940 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
941 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
942 
943 PetscErrorCode MatSetUp_MPIDense(Mat A)
944 {
945   PetscFunctionBegin;
946   PetscCall(PetscLayoutSetUp(A->rmap));
947   PetscCall(PetscLayoutSetUp(A->cmap));
948   if (!A->preallocated) {
949     PetscCall(MatMPIDenseSetPreallocation(A,NULL));
950   }
951   PetscFunctionReturn(0);
952 }
953 
954 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
955 {
956   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
957 
958   PetscFunctionBegin;
959   PetscCall(MatAXPY(A->A,alpha,B->A,str));
960   PetscFunctionReturn(0);
961 }
962 
963 PetscErrorCode MatConjugate_MPIDense(Mat mat)
964 {
965   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
966 
967   PetscFunctionBegin;
968   PetscCall(MatConjugate(a->A));
969   PetscFunctionReturn(0);
970 }
971 
972 PetscErrorCode MatRealPart_MPIDense(Mat A)
973 {
974   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
975 
976   PetscFunctionBegin;
977   PetscCall(MatRealPart(a->A));
978   PetscFunctionReturn(0);
979 }
980 
981 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
982 {
983   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
984 
985   PetscFunctionBegin;
986   PetscCall(MatImaginaryPart(a->A));
987   PetscFunctionReturn(0);
988 }
989 
990 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col)
991 {
992   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
993 
994   PetscFunctionBegin;
995   PetscCheck(a->A,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing local matrix");
996   PetscCheck(a->A->ops->getcolumnvector,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing get column operation");
997   PetscCall((*a->A->ops->getcolumnvector)(a->A,v,col));
998   PetscFunctionReturn(0);
999 }
1000 
1001 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat,PetscInt,PetscReal*);
1002 
1003 PetscErrorCode MatGetColumnReductions_MPIDense(Mat A,PetscInt type,PetscReal *reductions)
1004 {
1005   PetscInt       i,m,n;
1006   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
1007   PetscReal      *work;
1008 
1009   PetscFunctionBegin;
1010   PetscCall(MatGetSize(A,&m,&n));
1011   PetscCall(PetscMalloc1(n,&work));
1012   if (type == REDUCTION_MEAN_REALPART) {
1013     PetscCall(MatGetColumnReductions_SeqDense(a->A,(PetscInt)REDUCTION_SUM_REALPART,work));
1014   } else if (type == REDUCTION_MEAN_IMAGINARYPART) {
1015     PetscCall(MatGetColumnReductions_SeqDense(a->A,(PetscInt)REDUCTION_SUM_IMAGINARYPART,work));
1016   } else {
1017     PetscCall(MatGetColumnReductions_SeqDense(a->A,type,work));
1018   }
1019   if (type == NORM_2) {
1020     for (i=0; i<n; i++) work[i] *= work[i];
1021   }
1022   if (type == NORM_INFINITY) {
1023     PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_MAX,A->hdr.comm));
1024   } else {
1025     PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_SUM,A->hdr.comm));
1026   }
1027   PetscCall(PetscFree(work));
1028   if (type == NORM_2) {
1029     for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
1030   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
1031     for (i=0; i<n; i++) reductions[i] /= m;
1032   }
1033   PetscFunctionReturn(0);
1034 }
1035 
1036 #if defined(PETSC_HAVE_CUDA)
1037 PetscErrorCode MatShift_MPIDenseCUDA(Mat A,PetscScalar alpha)
1038 {
1039   PetscScalar *da;
1040   PetscInt    lda;
1041 
1042   PetscFunctionBegin;
1043   PetscCall(MatDenseCUDAGetArray(A,&da));
1044   PetscCall(MatDenseGetLDA(A,&lda));
1045   PetscCall(PetscInfo(A,"Performing Shift on backend\n"));
1046   PetscCall(MatShift_DenseCUDA_Private(da,alpha,lda,A->rmap->rstart,A->rmap->rend,A->cmap->N));
1047   PetscCall(MatDenseCUDARestoreArray(A,&da));
1048   PetscFunctionReturn(0);
1049 }
1050 
1051 static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1052 {
1053   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1054   PetscInt       lda;
1055 
1056   PetscFunctionBegin;
1057   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1058   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1059   if (!a->cvec) {
1060     PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec));
1061     PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec));
1062   }
1063   a->vecinuse = col + 1;
1064   PetscCall(MatDenseGetLDA(a->A,&lda));
1065   PetscCall(MatDenseCUDAGetArray(a->A,(PetscScalar**)&a->ptrinuse));
1066   PetscCall(VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda));
1067   *v   = a->cvec;
1068   PetscFunctionReturn(0);
1069 }
1070 
1071 static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1072 {
1073   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1074 
1075   PetscFunctionBegin;
1076   PetscCheck(a->vecinuse,PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1077   PetscCheck(a->cvec,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1078   a->vecinuse = 0;
1079   PetscCall(MatDenseCUDARestoreArray(a->A,(PetscScalar**)&a->ptrinuse));
1080   PetscCall(VecCUDAResetArray(a->cvec));
1081   *v   = NULL;
1082   PetscFunctionReturn(0);
1083 }
1084 
1085 static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1086 {
1087   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1088   PetscInt       lda;
1089 
1090   PetscFunctionBegin;
1091   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1092   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1093   if (!a->cvec) {
1094     PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec));
1095     PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec));
1096   }
1097   a->vecinuse = col + 1;
1098   PetscCall(MatDenseGetLDA(a->A,&lda));
1099   PetscCall(MatDenseCUDAGetArrayRead(a->A,&a->ptrinuse));
1100   PetscCall(VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda));
1101   PetscCall(VecLockReadPush(a->cvec));
1102   *v   = a->cvec;
1103   PetscFunctionReturn(0);
1104 }
1105 
1106 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1107 {
1108   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1109 
1110   PetscFunctionBegin;
1111   PetscCheck(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1112   PetscCheck(a->cvec,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1113   a->vecinuse = 0;
1114   PetscCall(MatDenseCUDARestoreArrayRead(a->A,&a->ptrinuse));
1115   PetscCall(VecLockReadPop(a->cvec));
1116   PetscCall(VecCUDAResetArray(a->cvec));
1117   *v   = NULL;
1118   PetscFunctionReturn(0);
1119 }
1120 
1121 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1122 {
1123   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1124   PetscInt       lda;
1125 
1126   PetscFunctionBegin;
1127   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1128   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1129   if (!a->cvec) {
1130     PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec));
1131     PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec));
1132   }
1133   a->vecinuse = col + 1;
1134   PetscCall(MatDenseGetLDA(a->A,&lda));
1135   PetscCall(MatDenseCUDAGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse));
1136   PetscCall(VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda));
1137   *v   = a->cvec;
1138   PetscFunctionReturn(0);
1139 }
1140 
1141 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1142 {
1143   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1144 
1145   PetscFunctionBegin;
1146   PetscCheck(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1147   PetscCheck(a->cvec,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1148   a->vecinuse = 0;
1149   PetscCall(MatDenseCUDARestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse));
1150   PetscCall(VecCUDAResetArray(a->cvec));
1151   *v   = NULL;
1152   PetscFunctionReturn(0);
1153 }
1154 
1155 static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1156 {
1157   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1158 
1159   PetscFunctionBegin;
1160   PetscCheck(!l->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1161   PetscCheck(!l->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1162   PetscCall(MatDenseCUDAPlaceArray(l->A,a));
1163   PetscFunctionReturn(0);
1164 }
1165 
1166 static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A)
1167 {
1168   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1169 
1170   PetscFunctionBegin;
1171   PetscCheck(!l->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1172   PetscCheck(!l->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1173   PetscCall(MatDenseCUDAResetArray(l->A));
1174   PetscFunctionReturn(0);
1175 }
1176 
1177 static PetscErrorCode MatDenseCUDAReplaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1178 {
1179   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1180 
1181   PetscFunctionBegin;
1182   PetscCheck(!l->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1183   PetscCheck(!l->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1184   PetscCall(MatDenseCUDAReplaceArray(l->A,a));
1185   PetscFunctionReturn(0);
1186 }
1187 
1188 static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1189 {
1190   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1191 
1192   PetscFunctionBegin;
1193   PetscCall(MatDenseCUDAGetArrayWrite(l->A,a));
1194   PetscFunctionReturn(0);
1195 }
1196 
1197 static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1198 {
1199   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1200 
1201   PetscFunctionBegin;
1202   PetscCall(MatDenseCUDARestoreArrayWrite(l->A,a));
1203   PetscFunctionReturn(0);
1204 }
1205 
1206 static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1207 {
1208   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1209 
1210   PetscFunctionBegin;
1211   PetscCall(MatDenseCUDAGetArrayRead(l->A,a));
1212   PetscFunctionReturn(0);
1213 }
1214 
1215 static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1216 {
1217   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1218 
1219   PetscFunctionBegin;
1220   PetscCall(MatDenseCUDARestoreArrayRead(l->A,a));
1221   PetscFunctionReturn(0);
1222 }
1223 
1224 static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1225 {
1226   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1227 
1228   PetscFunctionBegin;
1229   PetscCall(MatDenseCUDAGetArray(l->A,a));
1230   PetscFunctionReturn(0);
1231 }
1232 
1233 static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1234 {
1235   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1236 
1237   PetscFunctionBegin;
1238   PetscCall(MatDenseCUDARestoreArray(l->A,a));
1239   PetscFunctionReturn(0);
1240 }
1241 
1242 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat,PetscInt,Vec*);
1243 static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat,PetscInt,Vec*);
1244 static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat,PetscInt,Vec*);
1245 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat,PetscInt,Vec*);
1246 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat,PetscInt,Vec*);
1247 static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat,PetscInt,Vec*);
1248 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat,Mat*);
1249 
1250 static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat,PetscBool bind)
1251 {
1252   Mat_MPIDense   *d = (Mat_MPIDense*)mat->data;
1253 
1254   PetscFunctionBegin;
1255   PetscCheck(!d->vecinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1256   PetscCheck(!d->matinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1257   if (d->A) {
1258     PetscCall(MatBindToCPU(d->A,bind));
1259   }
1260   mat->boundtocpu = bind;
1261   if (!bind) {
1262     PetscBool iscuda;
1263 
1264     PetscCall(PetscObjectTypeCompare((PetscObject)d->cvec,VECMPICUDA,&iscuda));
1265     if (!iscuda) {
1266       PetscCall(VecDestroy(&d->cvec));
1267     }
1268     PetscCall(PetscObjectTypeCompare((PetscObject)d->cmat,MATMPIDENSECUDA,&iscuda));
1269     if (!iscuda) {
1270       PetscCall(MatDestroy(&d->cmat));
1271     }
1272     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDenseCUDA));
1273     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDenseCUDA));
1274     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDenseCUDA));
1275     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDenseCUDA));
1276     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDenseCUDA));
1277     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDenseCUDA));
1278     mat->ops->shift                   = MatShift_MPIDenseCUDA;
1279   } else {
1280     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense));
1281     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense));
1282     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense));
1283     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense));
1284     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense));
1285     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense));
1286     mat->ops->shift                   = MatShift_MPIDense;
1287   }
1288   if (d->cmat) {
1289     PetscCall(MatBindToCPU(d->cmat,bind));
1290   }
1291   PetscFunctionReturn(0);
1292 }
1293 
1294 PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data)
1295 {
1296   Mat_MPIDense   *d = (Mat_MPIDense*)A->data;
1297   PetscBool      iscuda;
1298 
1299   PetscFunctionBegin;
1300   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1301   PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda));
1302   if (!iscuda) PetscFunctionReturn(0);
1303   PetscCall(PetscLayoutSetUp(A->rmap));
1304   PetscCall(PetscLayoutSetUp(A->cmap));
1305   if (!d->A) {
1306     PetscCall(MatCreate(PETSC_COMM_SELF,&d->A));
1307     PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)d->A));
1308     PetscCall(MatSetSizes(d->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N));
1309   }
1310   PetscCall(MatSetType(d->A,MATSEQDENSECUDA));
1311   PetscCall(MatSeqDenseCUDASetPreallocation(d->A,d_data));
1312   A->preallocated = PETSC_TRUE;
1313   A->assembled = PETSC_TRUE;
1314   PetscFunctionReturn(0);
1315 }
1316 #endif
1317 
1318 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1319 {
1320   Mat_MPIDense   *d = (Mat_MPIDense*)x->data;
1321 
1322   PetscFunctionBegin;
1323   PetscCall(MatSetRandom(d->A,rctx));
1324   PetscFunctionReturn(0);
1325 }
1326 
1327 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool  *missing,PetscInt *d)
1328 {
1329   PetscFunctionBegin;
1330   *missing = PETSC_FALSE;
1331   PetscFunctionReturn(0);
1332 }
1333 
1334 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat);
1335 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);
1336 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat);
1337 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);
1338 static PetscErrorCode MatEqual_MPIDense(Mat,Mat,PetscBool*);
1339 static PetscErrorCode MatLoad_MPIDense(Mat,PetscViewer);
1340 
1341 /* -------------------------------------------------------------------*/
1342 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1343                                         MatGetRow_MPIDense,
1344                                         MatRestoreRow_MPIDense,
1345                                         MatMult_MPIDense,
1346                                 /*  4*/ MatMultAdd_MPIDense,
1347                                         MatMultTranspose_MPIDense,
1348                                         MatMultTransposeAdd_MPIDense,
1349                                         NULL,
1350                                         NULL,
1351                                         NULL,
1352                                 /* 10*/ NULL,
1353                                         NULL,
1354                                         NULL,
1355                                         NULL,
1356                                         MatTranspose_MPIDense,
1357                                 /* 15*/ MatGetInfo_MPIDense,
1358                                         MatEqual_MPIDense,
1359                                         MatGetDiagonal_MPIDense,
1360                                         MatDiagonalScale_MPIDense,
1361                                         MatNorm_MPIDense,
1362                                 /* 20*/ MatAssemblyBegin_MPIDense,
1363                                         MatAssemblyEnd_MPIDense,
1364                                         MatSetOption_MPIDense,
1365                                         MatZeroEntries_MPIDense,
1366                                 /* 24*/ MatZeroRows_MPIDense,
1367                                         NULL,
1368                                         NULL,
1369                                         NULL,
1370                                         NULL,
1371                                 /* 29*/ MatSetUp_MPIDense,
1372                                         NULL,
1373                                         NULL,
1374                                         MatGetDiagonalBlock_MPIDense,
1375                                         NULL,
1376                                 /* 34*/ MatDuplicate_MPIDense,
1377                                         NULL,
1378                                         NULL,
1379                                         NULL,
1380                                         NULL,
1381                                 /* 39*/ MatAXPY_MPIDense,
1382                                         MatCreateSubMatrices_MPIDense,
1383                                         NULL,
1384                                         MatGetValues_MPIDense,
1385                                         MatCopy_MPIDense,
1386                                 /* 44*/ NULL,
1387                                         MatScale_MPIDense,
1388                                         MatShift_MPIDense,
1389                                         NULL,
1390                                         NULL,
1391                                 /* 49*/ MatSetRandom_MPIDense,
1392                                         NULL,
1393                                         NULL,
1394                                         NULL,
1395                                         NULL,
1396                                 /* 54*/ NULL,
1397                                         NULL,
1398                                         NULL,
1399                                         NULL,
1400                                         NULL,
1401                                 /* 59*/ MatCreateSubMatrix_MPIDense,
1402                                         MatDestroy_MPIDense,
1403                                         MatView_MPIDense,
1404                                         NULL,
1405                                         NULL,
1406                                 /* 64*/ NULL,
1407                                         NULL,
1408                                         NULL,
1409                                         NULL,
1410                                         NULL,
1411                                 /* 69*/ NULL,
1412                                         NULL,
1413                                         NULL,
1414                                         NULL,
1415                                         NULL,
1416                                 /* 74*/ NULL,
1417                                         NULL,
1418                                         NULL,
1419                                         NULL,
1420                                         NULL,
1421                                 /* 79*/ NULL,
1422                                         NULL,
1423                                         NULL,
1424                                         NULL,
1425                                 /* 83*/ MatLoad_MPIDense,
1426                                         NULL,
1427                                         NULL,
1428                                         NULL,
1429                                         NULL,
1430                                         NULL,
1431                                 /* 89*/ NULL,
1432                                         NULL,
1433                                         NULL,
1434                                         NULL,
1435                                         NULL,
1436                                 /* 94*/ NULL,
1437                                         NULL,
1438                                         MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1439                                         MatMatTransposeMultNumeric_MPIDense_MPIDense,
1440                                         NULL,
1441                                 /* 99*/ MatProductSetFromOptions_MPIDense,
1442                                         NULL,
1443                                         NULL,
1444                                         MatConjugate_MPIDense,
1445                                         NULL,
1446                                 /*104*/ NULL,
1447                                         MatRealPart_MPIDense,
1448                                         MatImaginaryPart_MPIDense,
1449                                         NULL,
1450                                         NULL,
1451                                 /*109*/ NULL,
1452                                         NULL,
1453                                         NULL,
1454                                         MatGetColumnVector_MPIDense,
1455                                         MatMissingDiagonal_MPIDense,
1456                                 /*114*/ NULL,
1457                                         NULL,
1458                                         NULL,
1459                                         NULL,
1460                                         NULL,
1461                                 /*119*/ NULL,
1462                                         NULL,
1463                                         NULL,
1464                                         NULL,
1465                                         NULL,
1466                                 /*124*/ NULL,
1467                                         MatGetColumnReductions_MPIDense,
1468                                         NULL,
1469                                         NULL,
1470                                         NULL,
1471                                 /*129*/ NULL,
1472                                         NULL,
1473                                         MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1474                                         MatTransposeMatMultNumeric_MPIDense_MPIDense,
1475                                         NULL,
1476                                 /*134*/ NULL,
1477                                         NULL,
1478                                         NULL,
1479                                         NULL,
1480                                         NULL,
1481                                 /*139*/ NULL,
1482                                         NULL,
1483                                         NULL,
1484                                         NULL,
1485                                         NULL,
1486                                         MatCreateMPIMatConcatenateSeqMat_MPIDense,
1487                                 /*145*/ NULL,
1488                                         NULL,
1489                                         NULL
1490 };
1491 
1492 PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1493 {
1494   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
1495   PetscBool      iscuda;
1496 
1497   PetscFunctionBegin;
1498   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1499   PetscCall(PetscLayoutSetUp(mat->rmap));
1500   PetscCall(PetscLayoutSetUp(mat->cmap));
1501   if (!a->A) {
1502     PetscCall(MatCreate(PETSC_COMM_SELF,&a->A));
1503     PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A));
1504     PetscCall(MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N));
1505   }
1506   PetscCall(PetscObjectTypeCompare((PetscObject)mat,MATMPIDENSECUDA,&iscuda));
1507   PetscCall(MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE));
1508   PetscCall(MatSeqDenseSetPreallocation(a->A,data));
1509   mat->preallocated = PETSC_TRUE;
1510   mat->assembled = PETSC_TRUE;
1511   PetscFunctionReturn(0);
1512 }
1513 
1514 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1515 {
1516   Mat            B,C;
1517 
1518   PetscFunctionBegin;
1519   PetscCall(MatMPIAIJGetLocalMat(A,MAT_INITIAL_MATRIX,&C));
1520   PetscCall(MatConvert_SeqAIJ_SeqDense(C,MATSEQDENSE,MAT_INITIAL_MATRIX,&B));
1521   PetscCall(MatDestroy(&C));
1522   if (reuse == MAT_REUSE_MATRIX) {
1523     C = *newmat;
1524   } else C = NULL;
1525   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A),B,A->cmap->n,!C?MAT_INITIAL_MATRIX:MAT_REUSE_MATRIX,&C));
1526   PetscCall(MatDestroy(&B));
1527   if (reuse == MAT_INPLACE_MATRIX) {
1528     PetscCall(MatHeaderReplace(A,&C));
1529   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1530   PetscFunctionReturn(0);
1531 }
1532 
1533 PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1534 {
1535   Mat            B,C;
1536 
1537   PetscFunctionBegin;
1538   PetscCall(MatDenseGetLocalMatrix(A,&C));
1539   PetscCall(MatConvert_SeqDense_SeqAIJ(C,MATSEQAIJ,MAT_INITIAL_MATRIX,&B));
1540   if (reuse == MAT_REUSE_MATRIX) {
1541     C = *newmat;
1542   } else C = NULL;
1543   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A),B,A->cmap->n,!C?MAT_INITIAL_MATRIX:MAT_REUSE_MATRIX,&C));
1544   PetscCall(MatDestroy(&B));
1545   if (reuse == MAT_INPLACE_MATRIX) {
1546     PetscCall(MatHeaderReplace(A,&C));
1547   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1548   PetscFunctionReturn(0);
1549 }
1550 
1551 #if defined(PETSC_HAVE_ELEMENTAL)
1552 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1553 {
1554   Mat            mat_elemental;
1555   PetscScalar    *v;
1556   PetscInt       m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols;
1557 
1558   PetscFunctionBegin;
1559   if (reuse == MAT_REUSE_MATRIX) {
1560     mat_elemental = *newmat;
1561     PetscCall(MatZeroEntries(*newmat));
1562   } else {
1563     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental));
1564     PetscCall(MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N));
1565     PetscCall(MatSetType(mat_elemental,MATELEMENTAL));
1566     PetscCall(MatSetUp(mat_elemental));
1567     PetscCall(MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE));
1568   }
1569 
1570   PetscCall(PetscMalloc2(m,&rows,N,&cols));
1571   for (i=0; i<N; i++) cols[i] = i;
1572   for (i=0; i<m; i++) rows[i] = rstart + i;
1573 
1574   /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1575   PetscCall(MatDenseGetArray(A,&v));
1576   PetscCall(MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES));
1577   PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY));
1578   PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY));
1579   PetscCall(MatDenseRestoreArray(A,&v));
1580   PetscCall(PetscFree2(rows,cols));
1581 
1582   if (reuse == MAT_INPLACE_MATRIX) {
1583     PetscCall(MatHeaderReplace(A,&mat_elemental));
1584   } else {
1585     *newmat = mat_elemental;
1586   }
1587   PetscFunctionReturn(0);
1588 }
1589 #endif
1590 
1591 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals)
1592 {
1593   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1594 
1595   PetscFunctionBegin;
1596   PetscCall(MatDenseGetColumn(mat->A,col,vals));
1597   PetscFunctionReturn(0);
1598 }
1599 
1600 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals)
1601 {
1602   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1603 
1604   PetscFunctionBegin;
1605   PetscCall(MatDenseRestoreColumn(mat->A,vals));
1606   PetscFunctionReturn(0);
1607 }
1608 
1609 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
1610 {
1611   Mat_MPIDense   *mat;
1612   PetscInt       m,nloc,N;
1613 
1614   PetscFunctionBegin;
1615   PetscCall(MatGetSize(inmat,&m,&N));
1616   PetscCall(MatGetLocalSize(inmat,NULL,&nloc));
1617   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1618     PetscInt sum;
1619 
1620     if (n == PETSC_DECIDE) {
1621       PetscCall(PetscSplitOwnership(comm,&n,&N));
1622     }
1623     /* Check sum(n) = N */
1624     PetscCall(MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm));
1625     PetscCheck(sum == N,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT,sum,N);
1626 
1627     PetscCall(MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat));
1628     PetscCall(MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE));
1629   }
1630 
1631   /* numeric phase */
1632   mat = (Mat_MPIDense*)(*outmat)->data;
1633   PetscCall(MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN));
1634   PetscFunctionReturn(0);
1635 }
1636 
1637 #if defined(PETSC_HAVE_CUDA)
1638 PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M,MatType type,MatReuse reuse,Mat *newmat)
1639 {
1640   Mat            B;
1641   Mat_MPIDense   *m;
1642 
1643   PetscFunctionBegin;
1644   if (reuse == MAT_INITIAL_MATRIX) {
1645     PetscCall(MatDuplicate(M,MAT_COPY_VALUES,newmat));
1646   } else if (reuse == MAT_REUSE_MATRIX) {
1647     PetscCall(MatCopy(M,*newmat,SAME_NONZERO_PATTERN));
1648   }
1649 
1650   B    = *newmat;
1651   PetscCall(MatBindToCPU_MPIDenseCUDA(B,PETSC_TRUE));
1652   PetscCall(PetscFree(B->defaultvectype));
1653   PetscCall(PetscStrallocpy(VECSTANDARD,&B->defaultvectype));
1654   PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSE));
1655   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",NULL));
1656   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",NULL));
1657   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",NULL));
1658   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",NULL));
1659   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",NULL));
1660   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",NULL));
1661   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",NULL));
1662   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",NULL));
1663   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",NULL));
1664   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",NULL));
1665   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",NULL));
1666   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",NULL));
1667   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",NULL));
1668   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",NULL));
1669   m    = (Mat_MPIDense*)(B)->data;
1670   if (m->A) {
1671     PetscCall(MatConvert(m->A,MATSEQDENSE,MAT_INPLACE_MATRIX,&m->A));
1672   }
1673   B->ops->bindtocpu = NULL;
1674   B->offloadmask    = PETSC_OFFLOAD_CPU;
1675   PetscFunctionReturn(0);
1676 }
1677 
1678 PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M,MatType type,MatReuse reuse,Mat *newmat)
1679 {
1680   Mat            B;
1681   Mat_MPIDense   *m;
1682 
1683   PetscFunctionBegin;
1684   if (reuse == MAT_INITIAL_MATRIX) {
1685     PetscCall(MatDuplicate(M,MAT_COPY_VALUES,newmat));
1686   } else if (reuse == MAT_REUSE_MATRIX) {
1687     PetscCall(MatCopy(M,*newmat,SAME_NONZERO_PATTERN));
1688   }
1689 
1690   B    = *newmat;
1691   PetscCall(PetscFree(B->defaultvectype));
1692   PetscCall(PetscStrallocpy(VECCUDA,&B->defaultvectype));
1693   PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSECUDA));
1694   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",                    MatConvert_MPIDenseCUDA_MPIDense));
1695   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",        MatProductSetFromOptions_MPIAIJ_MPIDense));
1696   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",MatProductSetFromOptions_MPIAIJ_MPIDense));
1697   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",        MatProductSetFromOptions_MPIDense_MPIAIJ));
1698   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ));
1699   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",                                MatDenseCUDAGetArray_MPIDenseCUDA));
1700   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",                            MatDenseCUDAGetArrayRead_MPIDenseCUDA));
1701   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",                           MatDenseCUDAGetArrayWrite_MPIDenseCUDA));
1702   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",                            MatDenseCUDARestoreArray_MPIDenseCUDA));
1703   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",                        MatDenseCUDARestoreArrayRead_MPIDenseCUDA));
1704   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",                       MatDenseCUDARestoreArrayWrite_MPIDenseCUDA));
1705   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",                              MatDenseCUDAPlaceArray_MPIDenseCUDA));
1706   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",                              MatDenseCUDAResetArray_MPIDenseCUDA));
1707   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",                            MatDenseCUDAReplaceArray_MPIDenseCUDA));
1708   m    = (Mat_MPIDense*)(B->data);
1709   if (m->A) {
1710     PetscCall(MatConvert(m->A,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&m->A));
1711     B->offloadmask = PETSC_OFFLOAD_BOTH;
1712   } else {
1713     B->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
1714   }
1715   PetscCall(MatBindToCPU_MPIDenseCUDA(B,PETSC_FALSE));
1716 
1717   B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA;
1718   PetscFunctionReturn(0);
1719 }
1720 #endif
1721 
1722 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A,PetscInt col,Vec *v)
1723 {
1724   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1725   PetscInt       lda;
1726 
1727   PetscFunctionBegin;
1728   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1729   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1730   if (!a->cvec) {
1731     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec));
1732   }
1733   a->vecinuse = col + 1;
1734   PetscCall(MatDenseGetLDA(a->A,&lda));
1735   PetscCall(MatDenseGetArray(a->A,(PetscScalar**)&a->ptrinuse));
1736   PetscCall(VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda));
1737   *v   = a->cvec;
1738   PetscFunctionReturn(0);
1739 }
1740 
1741 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A,PetscInt col,Vec *v)
1742 {
1743   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1744 
1745   PetscFunctionBegin;
1746   PetscCheck(a->vecinuse,PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1747   PetscCheck(a->cvec,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1748   a->vecinuse = 0;
1749   PetscCall(MatDenseRestoreArray(a->A,(PetscScalar**)&a->ptrinuse));
1750   PetscCall(VecResetArray(a->cvec));
1751   *v   = NULL;
1752   PetscFunctionReturn(0);
1753 }
1754 
1755 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v)
1756 {
1757   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1758   PetscInt       lda;
1759 
1760   PetscFunctionBegin;
1761   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1762   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1763   if (!a->cvec) {
1764     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec));
1765   }
1766   a->vecinuse = col + 1;
1767   PetscCall(MatDenseGetLDA(a->A,&lda));
1768   PetscCall(MatDenseGetArrayRead(a->A,&a->ptrinuse));
1769   PetscCall(VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda));
1770   PetscCall(VecLockReadPush(a->cvec));
1771   *v   = a->cvec;
1772   PetscFunctionReturn(0);
1773 }
1774 
1775 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v)
1776 {
1777   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1778 
1779   PetscFunctionBegin;
1780   PetscCheck(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1781   PetscCheck(a->cvec,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1782   a->vecinuse = 0;
1783   PetscCall(MatDenseRestoreArrayRead(a->A,&a->ptrinuse));
1784   PetscCall(VecLockReadPop(a->cvec));
1785   PetscCall(VecResetArray(a->cvec));
1786   *v   = NULL;
1787   PetscFunctionReturn(0);
1788 }
1789 
1790 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v)
1791 {
1792   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1793   PetscInt       lda;
1794 
1795   PetscFunctionBegin;
1796   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1797   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1798   if (!a->cvec) {
1799     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec));
1800   }
1801   a->vecinuse = col + 1;
1802   PetscCall(MatDenseGetLDA(a->A,&lda));
1803   PetscCall(MatDenseGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse));
1804   PetscCall(VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda));
1805   *v   = a->cvec;
1806   PetscFunctionReturn(0);
1807 }
1808 
1809 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v)
1810 {
1811   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
1812 
1813   PetscFunctionBegin;
1814   PetscCheck(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1815   PetscCheck(a->cvec,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1816   a->vecinuse = 0;
1817   PetscCall(MatDenseRestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse));
1818   PetscCall(VecResetArray(a->cvec));
1819   *v   = NULL;
1820   PetscFunctionReturn(0);
1821 }
1822 
1823 PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A,PetscInt rbegin,PetscInt rend,PetscInt cbegin,PetscInt cend,Mat *v)
1824 {
1825   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
1826   Mat_MPIDense *c;
1827   MPI_Comm     comm;
1828   PetscInt     pbegin, pend;
1829 
1830   PetscFunctionBegin;
1831   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
1832   PetscCheck(!a->vecinuse,comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1833   PetscCheck(!a->matinuse,comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1834   pbegin = PetscMax(0,PetscMin(A->rmap->rend,rbegin)-A->rmap->rstart);
1835   pend = PetscMin(A->rmap->n,PetscMax(0,rend-A->rmap->rstart));
1836   if (!a->cmat) {
1837     PetscCall(MatCreate(comm,&a->cmat));
1838     PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->cmat));
1839     PetscCall(MatSetType(a->cmat,((PetscObject)A)->type_name));
1840     if (rend-rbegin==A->rmap->N) PetscCall(PetscLayoutReference(A->rmap,&a->cmat->rmap));
1841     else {
1842       PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap,pend-pbegin));
1843       PetscCall(PetscLayoutSetSize(a->cmat->rmap,rend-rbegin));
1844       PetscCall(PetscLayoutSetUp(a->cmat->rmap));
1845     }
1846     PetscCall(PetscLayoutSetSize(a->cmat->cmap,cend-cbegin));
1847     PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1848   } else {
1849     PetscBool same = (PetscBool)(rend-rbegin == a->cmat->rmap->N);
1850     if (same && a->cmat->rmap->N != A->rmap->N) {
1851       same = (PetscBool)(pend-pbegin == a->cmat->rmap->n);
1852       PetscCall(MPIU_Allreduce(MPI_IN_PLACE,&same,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A)));
1853     }
1854     if (!same) {
1855       PetscCall(PetscLayoutDestroy(&a->cmat->rmap));
1856       PetscCall(PetscLayoutCreate(comm,&a->cmat->rmap));
1857       PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap,pend-pbegin));
1858       PetscCall(PetscLayoutSetSize(a->cmat->rmap,rend-rbegin));
1859       PetscCall(PetscLayoutSetUp(a->cmat->rmap));
1860     }
1861     if (cend-cbegin != a->cmat->cmap->N) {
1862       PetscCall(PetscLayoutDestroy(&a->cmat->cmap));
1863       PetscCall(PetscLayoutCreate(comm,&a->cmat->cmap));
1864       PetscCall(PetscLayoutSetSize(a->cmat->cmap,cend-cbegin));
1865       PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1866     }
1867   }
1868   c = (Mat_MPIDense*)a->cmat->data;
1869   PetscCheck(!c->A,comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1870   PetscCall(MatDenseGetSubMatrix(a->A,pbegin,pend,cbegin,cend,&c->A));
1871   a->cmat->preallocated = PETSC_TRUE;
1872   a->cmat->assembled = PETSC_TRUE;
1873   a->matinuse = cbegin + 1;
1874   *v = a->cmat;
1875   PetscFunctionReturn(0);
1876 }
1877 
1878 PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A,Mat *v)
1879 {
1880   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
1881   Mat_MPIDense *c;
1882 
1883   PetscFunctionBegin;
1884   PetscCheck(a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetSubMatrix() first");
1885   PetscCheck(a->cmat,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal matrix");
1886   PetscCheck(*v == a->cmat,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Not the matrix obtained from MatDenseGetSubMatrix()");
1887   a->matinuse = 0;
1888   c = (Mat_MPIDense*)a->cmat->data;
1889   PetscCall(MatDenseRestoreSubMatrix(a->A,&c->A));
1890   *v = NULL;
1891   PetscFunctionReturn(0);
1892 }
1893 
1894 /*MC
1895    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1896 
1897    Options Database Keys:
1898 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions()
1899 
1900   Level: beginner
1901 
1902 .seealso: `MatCreateDense()`
1903 
1904 M*/
1905 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1906 {
1907   Mat_MPIDense   *a;
1908 
1909   PetscFunctionBegin;
1910   PetscCall(PetscNewLog(mat,&a));
1911   mat->data = (void*)a;
1912   PetscCall(PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps)));
1913 
1914   mat->insertmode = NOT_SET_VALUES;
1915 
1916   /* build cache for off array entries formed */
1917   a->donotstash = PETSC_FALSE;
1918 
1919   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash));
1920 
1921   /* stuff used for matrix vector multiply */
1922   a->lvec        = NULL;
1923   a->Mvctx       = NULL;
1924   a->roworiented = PETSC_TRUE;
1925 
1926   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense));
1927   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",MatDenseSetLDA_MPIDense));
1928   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense));
1929   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense));
1930   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense));
1931   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense));
1932   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",MatDenseGetArrayWrite_MPIDense));
1933   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",MatDenseRestoreArrayWrite_MPIDense));
1934   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense));
1935   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense));
1936   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",MatDenseReplaceArray_MPIDense));
1937   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense));
1938   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense));
1939   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense));
1940   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense));
1941   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense));
1942   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense));
1943   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",MatDenseGetSubMatrix_MPIDense));
1944   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",MatDenseRestoreSubMatrix_MPIDense));
1945   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",MatConvert_MPIAIJ_MPIDense));
1946   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpiaij_C",MatConvert_MPIDense_MPIAIJ));
1947 #if defined(PETSC_HAVE_ELEMENTAL)
1948   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental));
1949 #endif
1950 #if defined(PETSC_HAVE_SCALAPACK)
1951   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",MatConvert_Dense_ScaLAPACK));
1952 #endif
1953 #if defined(PETSC_HAVE_CUDA)
1954   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpidensecuda_C",MatConvert_MPIDense_MPIDenseCUDA));
1955 #endif
1956   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense));
1957   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense));
1958   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",MatProductSetFromOptions_MPIDense_MPIAIJ));
1959 #if defined(PETSC_HAVE_CUDA)
1960   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense));
1961   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ));
1962 #endif
1963 
1964   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense));
1965   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense));
1966   PetscCall(PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE));
1967   PetscFunctionReturn(0);
1968 }
1969 
1970 /*MC
1971    MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs.
1972 
1973    Options Database Keys:
1974 . -mat_type mpidensecuda - sets the matrix type to "mpidensecuda" during a call to MatSetFromOptions()
1975 
1976   Level: beginner
1977 
1978 .seealso:
1979 
1980 M*/
1981 #if defined(PETSC_HAVE_CUDA)
1982 #include <petsc/private/deviceimpl.h>
1983 PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B)
1984 {
1985   PetscFunctionBegin;
1986   PetscCall(PetscDeviceInitialize(PETSC_DEVICE_CUDA));
1987   PetscCall(MatCreate_MPIDense(B));
1988   PetscCall(MatConvert_MPIDense_MPIDenseCUDA(B,MATMPIDENSECUDA,MAT_INPLACE_MATRIX,&B));
1989   PetscFunctionReturn(0);
1990 }
1991 #endif
1992 
1993 /*MC
1994    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1995 
1996    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1997    and MATMPIDENSE otherwise.
1998 
1999    Options Database Keys:
2000 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
2001 
2002   Level: beginner
2003 
2004 .seealso: `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA`
2005 M*/
2006 
2007 /*MC
2008    MATDENSECUDA - MATDENSECUDA = "densecuda" - A matrix type to be used for dense matrices on GPUs.
2009 
2010    This matrix type is identical to MATSEQDENSECUDA when constructed with a single process communicator,
2011    and MATMPIDENSECUDA otherwise.
2012 
2013    Options Database Keys:
2014 . -mat_type densecuda - sets the matrix type to "densecuda" during a call to MatSetFromOptions()
2015 
2016   Level: beginner
2017 
2018 .seealso: `MATSEQDENSECUDA`, `MATMPIDENSECUDA`, `MATDENSE`
2019 M*/
2020 
2021 /*@C
2022    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
2023 
2024    Collective
2025 
2026    Input Parameters:
2027 .  B - the matrix
2028 -  data - optional location of matrix data.  Set data=NULL for PETSc
2029    to control all matrix memory allocation.
2030 
2031    Notes:
2032    The dense format is fully compatible with standard Fortran 77
2033    storage by columns.
2034 
2035    The data input variable is intended primarily for Fortran programmers
2036    who wish to allocate their own matrix memory space.  Most users should
2037    set data=NULL.
2038 
2039    Level: intermediate
2040 
2041 .seealso: `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
2042 @*/
2043 PetscErrorCode  MatMPIDenseSetPreallocation(Mat B,PetscScalar *data)
2044 {
2045   PetscFunctionBegin;
2046   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2047   PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));
2048   PetscFunctionReturn(0);
2049 }
2050 
2051 /*@
2052    MatDensePlaceArray - Allows one to replace the array in a dense matrix with an
2053    array provided by the user. This is useful to avoid copying an array
2054    into a matrix
2055 
2056    Not Collective
2057 
2058    Input Parameters:
2059 +  mat - the matrix
2060 -  array - the array in column major order
2061 
2062    Notes:
2063    You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be
2064    freed when the matrix is destroyed.
2065 
2066    Level: developer
2067 
2068 .seealso: `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`
2069 
2070 @*/
2071 PetscErrorCode  MatDensePlaceArray(Mat mat,const PetscScalar *array)
2072 {
2073   PetscFunctionBegin;
2074   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2075   PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));
2076   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2077 #if defined(PETSC_HAVE_CUDA)
2078   mat->offloadmask = PETSC_OFFLOAD_CPU;
2079 #endif
2080   PetscFunctionReturn(0);
2081 }
2082 
2083 /*@
2084    MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray()
2085 
2086    Not Collective
2087 
2088    Input Parameters:
2089 .  mat - the matrix
2090 
2091    Notes:
2092    You can only call this after a call to MatDensePlaceArray()
2093 
2094    Level: developer
2095 
2096 .seealso: `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`
2097 
2098 @*/
2099 PetscErrorCode  MatDenseResetArray(Mat mat)
2100 {
2101   PetscFunctionBegin;
2102   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2103   PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));
2104   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2105   PetscFunctionReturn(0);
2106 }
2107 
2108 /*@
2109    MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an
2110    array provided by the user. This is useful to avoid copying an array
2111    into a matrix
2112 
2113    Not Collective
2114 
2115    Input Parameters:
2116 +  mat - the matrix
2117 -  array - the array in column major order
2118 
2119    Notes:
2120    The memory passed in MUST be obtained with PetscMalloc() and CANNOT be
2121    freed by the user. It will be freed when the matrix is destroyed.
2122 
2123    Level: developer
2124 
2125 .seealso: `MatDenseGetArray()`, `VecReplaceArray()`
2126 @*/
2127 PetscErrorCode  MatDenseReplaceArray(Mat mat,const PetscScalar *array)
2128 {
2129   PetscFunctionBegin;
2130   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2131   PetscUseMethod(mat,"MatDenseReplaceArray_C",(Mat,const PetscScalar*),(mat,array));
2132   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2133 #if defined(PETSC_HAVE_CUDA)
2134   mat->offloadmask = PETSC_OFFLOAD_CPU;
2135 #endif
2136   PetscFunctionReturn(0);
2137 }
2138 
2139 #if defined(PETSC_HAVE_CUDA)
2140 /*@C
2141    MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a dense matrix with an
2142    array provided by the user. This is useful to avoid copying an array
2143    into a matrix
2144 
2145    Not Collective
2146 
2147    Input Parameters:
2148 +  mat - the matrix
2149 -  array - the array in column major order
2150 
2151    Notes:
2152    You can return to the original array with a call to MatDenseCUDAResetArray(). The user is responsible for freeing this array; it will not be
2153    freed when the matrix is destroyed. The array must have been allocated with cudaMalloc().
2154 
2155    Level: developer
2156 
2157 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDAResetArray()`
2158 @*/
2159 PetscErrorCode  MatDenseCUDAPlaceArray(Mat mat,const PetscScalar *array)
2160 {
2161   PetscFunctionBegin;
2162   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2163   PetscUseMethod(mat,"MatDenseCUDAPlaceArray_C",(Mat,const PetscScalar*),(mat,array));
2164   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2165   mat->offloadmask = PETSC_OFFLOAD_GPU;
2166   PetscFunctionReturn(0);
2167 }
2168 
2169 /*@C
2170    MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to MatDenseCUDAPlaceArray()
2171 
2172    Not Collective
2173 
2174    Input Parameters:
2175 .  mat - the matrix
2176 
2177    Notes:
2178    You can only call this after a call to MatDenseCUDAPlaceArray()
2179 
2180    Level: developer
2181 
2182 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()`
2183 
2184 @*/
2185 PetscErrorCode  MatDenseCUDAResetArray(Mat mat)
2186 {
2187   PetscFunctionBegin;
2188   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2189   PetscUseMethod(mat,"MatDenseCUDAResetArray_C",(Mat),(mat));
2190   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2191   PetscFunctionReturn(0);
2192 }
2193 
2194 /*@C
2195    MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a dense matrix with an
2196    array provided by the user. This is useful to avoid copying an array
2197    into a matrix
2198 
2199    Not Collective
2200 
2201    Input Parameters:
2202 +  mat - the matrix
2203 -  array - the array in column major order
2204 
2205    Notes:
2206    This permanently replaces the GPU array and frees the memory associated with the old GPU array.
2207    The memory passed in CANNOT be freed by the user. It will be freed
2208    when the matrix is destroyed. The array should respect the matrix leading dimension.
2209 
2210    Level: developer
2211 
2212 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()`, `MatDenseCUDAResetArray()`
2213 @*/
2214 PetscErrorCode  MatDenseCUDAReplaceArray(Mat mat,const PetscScalar *array)
2215 {
2216   PetscFunctionBegin;
2217   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2218   PetscUseMethod(mat,"MatDenseCUDAReplaceArray_C",(Mat,const PetscScalar*),(mat,array));
2219   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2220   mat->offloadmask = PETSC_OFFLOAD_GPU;
2221   PetscFunctionReturn(0);
2222 }
2223 
2224 /*@C
2225    MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a dense matrix.
2226 
2227    Not Collective
2228 
2229    Input Parameters:
2230 .  A - the matrix
2231 
2232    Output Parameters
2233 .  array - the GPU array in column major order
2234 
2235    Notes:
2236    The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use MatDenseCUDAGetArray(). The array must be restored with MatDenseCUDARestoreArrayWrite() when no longer needed.
2237 
2238    Level: developer
2239 
2240 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayRead()`, `MatDenseCUDARestoreArrayRead()`
2241 @*/
2242 PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a)
2243 {
2244   PetscFunctionBegin;
2245   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2246   PetscUseMethod(A,"MatDenseCUDAGetArrayWrite_C",(Mat,PetscScalar**),(A,a));
2247   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2248   PetscFunctionReturn(0);
2249 }
2250 
2251 /*@C
2252    MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArrayWrite().
2253 
2254    Not Collective
2255 
2256    Input Parameters:
2257 +  A - the matrix
2258 -  array - the GPU array in column major order
2259 
2260    Notes:
2261 
2262    Level: developer
2263 
2264 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()`
2265 @*/
2266 PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a)
2267 {
2268   PetscFunctionBegin;
2269   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2270   PetscUseMethod(A,"MatDenseCUDARestoreArrayWrite_C",(Mat,PetscScalar**),(A,a));
2271   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2272   A->offloadmask = PETSC_OFFLOAD_GPU;
2273   PetscFunctionReturn(0);
2274 }
2275 
2276 /*@C
2277    MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArrayRead() when no longer needed.
2278 
2279    Not Collective
2280 
2281    Input Parameters:
2282 .  A - the matrix
2283 
2284    Output Parameters
2285 .  array - the GPU array in column major order
2286 
2287    Notes:
2288    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite().
2289 
2290    Level: developer
2291 
2292 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`
2293 @*/
2294 PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a)
2295 {
2296   PetscFunctionBegin;
2297   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2298   PetscUseMethod(A,"MatDenseCUDAGetArrayRead_C",(Mat,const PetscScalar**),(A,a));
2299   PetscFunctionReturn(0);
2300 }
2301 
2302 /*@C
2303    MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a dense matrix previously obtained with a call to MatDenseCUDAGetArrayRead().
2304 
2305    Not Collective
2306 
2307    Input Parameters:
2308 +  A - the matrix
2309 -  array - the GPU array in column major order
2310 
2311    Notes:
2312    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite().
2313 
2314    Level: developer
2315 
2316 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDAGetArrayRead()`
2317 @*/
2318 PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a)
2319 {
2320   PetscFunctionBegin;
2321   PetscUseMethod(A,"MatDenseCUDARestoreArrayRead_C",(Mat,const PetscScalar**),(A,a));
2322   PetscFunctionReturn(0);
2323 }
2324 
2325 /*@C
2326    MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArray() when no longer needed.
2327 
2328    Not Collective
2329 
2330    Input Parameters:
2331 .  A - the matrix
2332 
2333    Output Parameters
2334 .  array - the GPU array in column major order
2335 
2336    Notes:
2337    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). For read-only access, use MatDenseCUDAGetArrayRead().
2338 
2339    Level: developer
2340 
2341 .seealso: `MatDenseCUDAGetArrayRead()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`
2342 @*/
2343 PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a)
2344 {
2345   PetscFunctionBegin;
2346   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2347   PetscUseMethod(A,"MatDenseCUDAGetArray_C",(Mat,PetscScalar**),(A,a));
2348   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2349   PetscFunctionReturn(0);
2350 }
2351 
2352 /*@C
2353    MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArray().
2354 
2355    Not Collective
2356 
2357    Input Parameters:
2358 +  A - the matrix
2359 -  array - the GPU array in column major order
2360 
2361    Notes:
2362 
2363    Level: developer
2364 
2365 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()`
2366 @*/
2367 PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a)
2368 {
2369   PetscFunctionBegin;
2370   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2371   PetscUseMethod(A,"MatDenseCUDARestoreArray_C",(Mat,PetscScalar**),(A,a));
2372   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2373   A->offloadmask = PETSC_OFFLOAD_GPU;
2374   PetscFunctionReturn(0);
2375 }
2376 #endif
2377 
2378 /*@C
2379    MatCreateDense - Creates a matrix in dense format.
2380 
2381    Collective
2382 
2383    Input Parameters:
2384 +  comm - MPI communicator
2385 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2386 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2387 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
2388 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
2389 -  data - optional location of matrix data.  Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
2390    to control all matrix memory allocation.
2391 
2392    Output Parameter:
2393 .  A - the matrix
2394 
2395    Notes:
2396    The dense format is fully compatible with standard Fortran 77
2397    storage by columns.
2398    Note that, although local portions of the matrix are stored in column-major
2399    order, the matrix is partitioned across MPI ranks by row.
2400 
2401    The data input variable is intended primarily for Fortran programmers
2402    who wish to allocate their own matrix memory space.  Most users should
2403    set data=NULL (PETSC_NULL_SCALAR for Fortran users).
2404 
2405    The user MUST specify either the local or global matrix dimensions
2406    (possibly both).
2407 
2408    Level: intermediate
2409 
2410 .seealso: `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
2411 @*/
2412 PetscErrorCode  MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
2413 {
2414   PetscFunctionBegin;
2415   PetscCall(MatCreate(comm,A));
2416   PetscCall(MatSetSizes(*A,m,n,M,N));
2417   PetscCall(MatSetType(*A,MATDENSE));
2418   PetscCall(MatSeqDenseSetPreallocation(*A,data));
2419   PetscCall(MatMPIDenseSetPreallocation(*A,data));
2420   PetscFunctionReturn(0);
2421 }
2422 
2423 #if defined(PETSC_HAVE_CUDA)
2424 /*@C
2425    MatCreateDenseCUDA - Creates a matrix in dense format using CUDA.
2426 
2427    Collective
2428 
2429    Input Parameters:
2430 +  comm - MPI communicator
2431 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2432 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2433 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
2434 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
2435 -  data - optional location of GPU matrix data.  Set data=NULL for PETSc
2436    to control matrix memory allocation.
2437 
2438    Output Parameter:
2439 .  A - the matrix
2440 
2441    Notes:
2442 
2443    Level: intermediate
2444 
2445 .seealso: `MatCreate()`, `MatCreateDense()`
2446 @*/
2447 PetscErrorCode  MatCreateDenseCUDA(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
2448 {
2449   PetscFunctionBegin;
2450   PetscCall(MatCreate(comm,A));
2451   PetscValidLogicalCollectiveBool(*A,!!data,6);
2452   PetscCall(MatSetSizes(*A,m,n,M,N));
2453   PetscCall(MatSetType(*A,MATDENSECUDA));
2454   PetscCall(MatSeqDenseCUDASetPreallocation(*A,data));
2455   PetscCall(MatMPIDenseCUDASetPreallocation(*A,data));
2456   PetscFunctionReturn(0);
2457 }
2458 #endif
2459 
2460 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
2461 {
2462   Mat            mat;
2463   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
2464 
2465   PetscFunctionBegin;
2466   *newmat = NULL;
2467   PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&mat));
2468   PetscCall(MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N));
2469   PetscCall(MatSetType(mat,((PetscObject)A)->type_name));
2470   a       = (Mat_MPIDense*)mat->data;
2471 
2472   mat->factortype   = A->factortype;
2473   mat->assembled    = PETSC_TRUE;
2474   mat->preallocated = PETSC_TRUE;
2475 
2476   mat->insertmode = NOT_SET_VALUES;
2477   a->donotstash   = oldmat->donotstash;
2478 
2479   PetscCall(PetscLayoutReference(A->rmap,&mat->rmap));
2480   PetscCall(PetscLayoutReference(A->cmap,&mat->cmap));
2481 
2482   PetscCall(MatDuplicate(oldmat->A,cpvalues,&a->A));
2483   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A));
2484 
2485   *newmat = mat;
2486   PetscFunctionReturn(0);
2487 }
2488 
2489 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
2490 {
2491   PetscBool      isbinary;
2492 #if defined(PETSC_HAVE_HDF5)
2493   PetscBool      ishdf5;
2494 #endif
2495 
2496   PetscFunctionBegin;
2497   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
2498   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
2499   /* force binary viewer to load .info file if it has not yet done so */
2500   PetscCall(PetscViewerSetUp(viewer));
2501   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
2502 #if defined(PETSC_HAVE_HDF5)
2503   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5));
2504 #endif
2505   if (isbinary) {
2506     PetscCall(MatLoad_Dense_Binary(newMat,viewer));
2507 #if defined(PETSC_HAVE_HDF5)
2508   } else if (ishdf5) {
2509     PetscCall(MatLoad_Dense_HDF5(newMat,viewer));
2510 #endif
2511   } else SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
2512   PetscFunctionReturn(0);
2513 }
2514 
2515 static PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag)
2516 {
2517   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
2518   Mat            a,b;
2519 
2520   PetscFunctionBegin;
2521   a    = matA->A;
2522   b    = matB->A;
2523   PetscCall(MatEqual(a,b,flag));
2524   PetscCall(MPIU_Allreduce(MPI_IN_PLACE,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A)));
2525   PetscFunctionReturn(0);
2526 }
2527 
2528 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data)
2529 {
2530   Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data;
2531 
2532   PetscFunctionBegin;
2533   PetscCall(PetscFree2(atb->sendbuf,atb->recvcounts));
2534   PetscCall(MatDestroy(&atb->atb));
2535   PetscCall(PetscFree(atb));
2536   PetscFunctionReturn(0);
2537 }
2538 
2539 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data)
2540 {
2541   Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data;
2542 
2543   PetscFunctionBegin;
2544   PetscCall(PetscFree2(abt->buf[0],abt->buf[1]));
2545   PetscCall(PetscFree2(abt->recvcounts,abt->recvdispls));
2546   PetscCall(PetscFree(abt));
2547   PetscFunctionReturn(0);
2548 }
2549 
2550 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2551 {
2552   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2553   Mat_TransMatMultDense *atb;
2554   MPI_Comm              comm;
2555   PetscMPIInt           size,*recvcounts;
2556   PetscScalar           *carray,*sendbuf;
2557   const PetscScalar     *atbarray;
2558   PetscInt              i,cN=C->cmap->N,proc,k,j,lda;
2559   const PetscInt        *ranges;
2560 
2561   PetscFunctionBegin;
2562   MatCheckProduct(C,3);
2563   PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2564   atb = (Mat_TransMatMultDense *)C->product->data;
2565   recvcounts = atb->recvcounts;
2566   sendbuf = atb->sendbuf;
2567 
2568   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
2569   PetscCallMPI(MPI_Comm_size(comm,&size));
2570 
2571   /* compute atbarray = aseq^T * bseq */
2572   PetscCall(MatTransposeMatMult(a->A,b->A,atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX,PETSC_DEFAULT,&atb->atb));
2573 
2574   PetscCall(MatGetOwnershipRanges(C,&ranges));
2575 
2576   /* arrange atbarray into sendbuf */
2577   PetscCall(MatDenseGetArrayRead(atb->atb,&atbarray));
2578   PetscCall(MatDenseGetLDA(atb->atb,&lda));
2579   for (proc=0, k=0; proc<size; proc++) {
2580     for (j=0; j<cN; j++) {
2581       for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*lda];
2582     }
2583   }
2584   PetscCall(MatDenseRestoreArrayRead(atb->atb,&atbarray));
2585 
2586   /* sum all atbarray to local values of C */
2587   PetscCall(MatDenseGetArrayWrite(c->A,&carray));
2588   PetscCallMPI(MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm));
2589   PetscCall(MatDenseRestoreArrayWrite(c->A,&carray));
2590   PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY));
2591   PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY));
2592   PetscFunctionReturn(0);
2593 }
2594 
2595 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)
2596 {
2597   MPI_Comm              comm;
2598   PetscMPIInt           size;
2599   PetscInt              cm=A->cmap->n,cM,cN=B->cmap->N;
2600   Mat_TransMatMultDense *atb;
2601   PetscBool             cisdense;
2602   PetscInt              i;
2603   const PetscInt        *ranges;
2604 
2605   PetscFunctionBegin;
2606   MatCheckProduct(C,3);
2607   PetscCheck(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2608   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
2609   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
2610     SETERRQ(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2611   }
2612 
2613   /* create matrix product C */
2614   PetscCall(MatSetSizes(C,cm,B->cmap->n,A->cmap->N,B->cmap->N));
2615   PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,""));
2616   if (!cisdense) {
2617     PetscCall(MatSetType(C,((PetscObject)A)->type_name));
2618   }
2619   PetscCall(MatSetUp(C));
2620 
2621   /* create data structure for reuse C */
2622   PetscCallMPI(MPI_Comm_size(comm,&size));
2623   PetscCall(PetscNew(&atb));
2624   cM   = C->rmap->N;
2625   PetscCall(PetscMalloc2(cM*cN,&atb->sendbuf,size,&atb->recvcounts));
2626   PetscCall(MatGetOwnershipRanges(C,&ranges));
2627   for (i=0; i<size; i++) atb->recvcounts[i] = (ranges[i+1] - ranges[i])*cN;
2628 
2629   C->product->data    = atb;
2630   C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
2631   PetscFunctionReturn(0);
2632 }
2633 
2634 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2635 {
2636   MPI_Comm              comm;
2637   PetscMPIInt           i, size;
2638   PetscInt              maxRows, bufsiz;
2639   PetscMPIInt           tag;
2640   PetscInt              alg;
2641   Mat_MatTransMultDense *abt;
2642   Mat_Product           *product = C->product;
2643   PetscBool             flg;
2644 
2645   PetscFunctionBegin;
2646   MatCheckProduct(C,4);
2647   PetscCheck(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2648   /* check local size of A and B */
2649   PetscCheck(A->cmap->n == B->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")",A->cmap->n,B->cmap->n);
2650 
2651   PetscCall(PetscStrcmp(product->alg,"allgatherv",&flg));
2652   alg  = flg ? 0 : 1;
2653 
2654   /* setup matrix product C */
2655   PetscCall(MatSetSizes(C,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N));
2656   PetscCall(MatSetType(C,MATMPIDENSE));
2657   PetscCall(MatSetUp(C));
2658   PetscCall(PetscObjectGetNewTag((PetscObject)C,&tag));
2659 
2660   /* create data structure for reuse C */
2661   PetscCall(PetscObjectGetComm((PetscObject)C,&comm));
2662   PetscCallMPI(MPI_Comm_size(comm,&size));
2663   PetscCall(PetscNew(&abt));
2664   abt->tag = tag;
2665   abt->alg = alg;
2666   switch (alg) {
2667   case 1: /* alg: "cyclic" */
2668     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i]));
2669     bufsiz = A->cmap->N * maxRows;
2670     PetscCall(PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1])));
2671     break;
2672   default: /* alg: "allgatherv" */
2673     PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1])));
2674     PetscCall(PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls)));
2675     for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N;
2676     for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i];
2677     break;
2678   }
2679 
2680   C->product->data    = abt;
2681   C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
2682   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense;
2683   PetscFunctionReturn(0);
2684 }
2685 
2686 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
2687 {
2688   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2689   Mat_MatTransMultDense *abt;
2690   MPI_Comm              comm;
2691   PetscMPIInt           rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom;
2692   PetscScalar           *sendbuf, *recvbuf=NULL, *cv;
2693   PetscInt              i,cK=A->cmap->N,k,j,bn;
2694   PetscScalar           _DOne=1.0,_DZero=0.0;
2695   const PetscScalar     *av,*bv;
2696   PetscBLASInt          cm, cn, ck, alda, blda = 0, clda;
2697   MPI_Request           reqs[2];
2698   const PetscInt        *ranges;
2699 
2700   PetscFunctionBegin;
2701   MatCheckProduct(C,3);
2702   PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2703   abt  = (Mat_MatTransMultDense*)C->product->data;
2704   PetscCall(PetscObjectGetComm((PetscObject)C,&comm));
2705   PetscCallMPI(MPI_Comm_rank(comm,&rank));
2706   PetscCallMPI(MPI_Comm_size(comm,&size));
2707   PetscCall(MatDenseGetArrayRead(a->A,&av));
2708   PetscCall(MatDenseGetArrayRead(b->A,&bv));
2709   PetscCall(MatDenseGetArrayWrite(c->A,&cv));
2710   PetscCall(MatDenseGetLDA(a->A,&i));
2711   PetscCall(PetscBLASIntCast(i,&alda));
2712   PetscCall(MatDenseGetLDA(b->A,&i));
2713   PetscCall(PetscBLASIntCast(i,&blda));
2714   PetscCall(MatDenseGetLDA(c->A,&i));
2715   PetscCall(PetscBLASIntCast(i,&clda));
2716   PetscCall(MatGetOwnershipRanges(B,&ranges));
2717   bn   = B->rmap->n;
2718   if (blda == bn) {
2719     sendbuf = (PetscScalar*)bv;
2720   } else {
2721     sendbuf = abt->buf[0];
2722     for (k = 0, i = 0; i < cK; i++) {
2723       for (j = 0; j < bn; j++, k++) {
2724         sendbuf[k] = bv[i * blda + j];
2725       }
2726     }
2727   }
2728   if (size > 1) {
2729     sendto = (rank + size - 1) % size;
2730     recvfrom = (rank + size + 1) % size;
2731   } else {
2732     sendto = recvfrom = 0;
2733   }
2734   PetscCall(PetscBLASIntCast(cK,&ck));
2735   PetscCall(PetscBLASIntCast(c->A->rmap->n,&cm));
2736   recvisfrom = rank;
2737   for (i = 0; i < size; i++) {
2738     /* we have finished receiving in sending, bufs can be read/modified */
2739     PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2740     PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
2741 
2742     if (nextrecvisfrom != rank) {
2743       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2744       sendsiz = cK * bn;
2745       recvsiz = cK * nextbn;
2746       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
2747       PetscCallMPI(MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]));
2748       PetscCallMPI(MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]));
2749     }
2750 
2751     /* local aseq * sendbuf^T */
2752     PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn));
2753     if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,av,&alda,sendbuf,&cn,&_DZero,cv + clda * ranges[recvisfrom],&clda));
2754 
2755     if (nextrecvisfrom != rank) {
2756       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
2757       PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE));
2758     }
2759     bn = nextbn;
2760     recvisfrom = nextrecvisfrom;
2761     sendbuf = recvbuf;
2762   }
2763   PetscCall(MatDenseRestoreArrayRead(a->A,&av));
2764   PetscCall(MatDenseRestoreArrayRead(b->A,&bv));
2765   PetscCall(MatDenseRestoreArrayWrite(c->A,&cv));
2766   PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY));
2767   PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY));
2768   PetscFunctionReturn(0);
2769 }
2770 
2771 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
2772 {
2773   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2774   Mat_MatTransMultDense *abt;
2775   MPI_Comm              comm;
2776   PetscMPIInt           size;
2777   PetscScalar           *cv, *sendbuf, *recvbuf;
2778   const PetscScalar     *av,*bv;
2779   PetscInt              blda,i,cK=A->cmap->N,k,j,bn;
2780   PetscScalar           _DOne=1.0,_DZero=0.0;
2781   PetscBLASInt          cm, cn, ck, alda, clda;
2782 
2783   PetscFunctionBegin;
2784   MatCheckProduct(C,3);
2785   PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2786   abt  = (Mat_MatTransMultDense*)C->product->data;
2787   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
2788   PetscCallMPI(MPI_Comm_size(comm,&size));
2789   PetscCall(MatDenseGetArrayRead(a->A,&av));
2790   PetscCall(MatDenseGetArrayRead(b->A,&bv));
2791   PetscCall(MatDenseGetArrayWrite(c->A,&cv));
2792   PetscCall(MatDenseGetLDA(a->A,&i));
2793   PetscCall(PetscBLASIntCast(i,&alda));
2794   PetscCall(MatDenseGetLDA(b->A,&blda));
2795   PetscCall(MatDenseGetLDA(c->A,&i));
2796   PetscCall(PetscBLASIntCast(i,&clda));
2797   /* copy transpose of B into buf[0] */
2798   bn      = B->rmap->n;
2799   sendbuf = abt->buf[0];
2800   recvbuf = abt->buf[1];
2801   for (k = 0, j = 0; j < bn; j++) {
2802     for (i = 0; i < cK; i++, k++) {
2803       sendbuf[k] = bv[i * blda + j];
2804     }
2805   }
2806   PetscCall(MatDenseRestoreArrayRead(b->A,&bv));
2807   PetscCallMPI(MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm));
2808   PetscCall(PetscBLASIntCast(cK,&ck));
2809   PetscCall(PetscBLASIntCast(c->A->rmap->n,&cm));
2810   PetscCall(PetscBLASIntCast(c->A->cmap->n,&cn));
2811   if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,av,&alda,recvbuf,&ck,&_DZero,cv,&clda));
2812   PetscCall(MatDenseRestoreArrayRead(a->A,&av));
2813   PetscCall(MatDenseRestoreArrayRead(b->A,&bv));
2814   PetscCall(MatDenseRestoreArrayWrite(c->A,&cv));
2815   PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY));
2816   PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY));
2817   PetscFunctionReturn(0);
2818 }
2819 
2820 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2821 {
2822   Mat_MatTransMultDense *abt;
2823 
2824   PetscFunctionBegin;
2825   MatCheckProduct(C,3);
2826   PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2827   abt = (Mat_MatTransMultDense*)C->product->data;
2828   switch (abt->alg) {
2829   case 1:
2830     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C));
2831     break;
2832   default:
2833     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C));
2834     break;
2835   }
2836   PetscFunctionReturn(0);
2837 }
2838 
2839 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data)
2840 {
2841   Mat_MatMultDense *ab = (Mat_MatMultDense*)data;
2842 
2843   PetscFunctionBegin;
2844   PetscCall(MatDestroy(&ab->Ce));
2845   PetscCall(MatDestroy(&ab->Ae));
2846   PetscCall(MatDestroy(&ab->Be));
2847   PetscCall(PetscFree(ab));
2848   PetscFunctionReturn(0);
2849 }
2850 
2851 #if defined(PETSC_HAVE_ELEMENTAL)
2852 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2853 {
2854   Mat_MatMultDense *ab;
2855 
2856   PetscFunctionBegin;
2857   MatCheckProduct(C,3);
2858   PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Missing product data");
2859   ab   = (Mat_MatMultDense*)C->product->data;
2860   PetscCall(MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae));
2861   PetscCall(MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be));
2862   PetscCall(MatMatMultNumeric_Elemental(ab->Ae,ab->Be,ab->Ce));
2863   PetscCall(MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C));
2864   PetscFunctionReturn(0);
2865 }
2866 
2867 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)
2868 {
2869   Mat              Ae,Be,Ce;
2870   Mat_MatMultDense *ab;
2871 
2872   PetscFunctionBegin;
2873   MatCheckProduct(C,4);
2874   PetscCheck(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2875   /* check local size of A and B */
2876   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
2877     SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2878   }
2879 
2880   /* create elemental matrices Ae and Be */
2881   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Ae));
2882   PetscCall(MatSetSizes(Ae,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N));
2883   PetscCall(MatSetType(Ae,MATELEMENTAL));
2884   PetscCall(MatSetUp(Ae));
2885   PetscCall(MatSetOption(Ae,MAT_ROW_ORIENTED,PETSC_FALSE));
2886 
2887   PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &Be));
2888   PetscCall(MatSetSizes(Be,PETSC_DECIDE,PETSC_DECIDE,B->rmap->N,B->cmap->N));
2889   PetscCall(MatSetType(Be,MATELEMENTAL));
2890   PetscCall(MatSetUp(Be));
2891   PetscCall(MatSetOption(Be,MAT_ROW_ORIENTED,PETSC_FALSE));
2892 
2893   /* compute symbolic Ce = Ae*Be */
2894   PetscCall(MatCreate(PetscObjectComm((PetscObject)C),&Ce));
2895   PetscCall(MatMatMultSymbolic_Elemental(Ae,Be,fill,Ce));
2896 
2897   /* setup C */
2898   PetscCall(MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE));
2899   PetscCall(MatSetType(C,MATDENSE));
2900   PetscCall(MatSetUp(C));
2901 
2902   /* create data structure for reuse Cdense */
2903   PetscCall(PetscNew(&ab));
2904   ab->Ae = Ae;
2905   ab->Be = Be;
2906   ab->Ce = Ce;
2907 
2908   C->product->data    = ab;
2909   C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense;
2910   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
2911   PetscFunctionReturn(0);
2912 }
2913 #endif
2914 /* ----------------------------------------------- */
2915 #if defined(PETSC_HAVE_ELEMENTAL)
2916 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C)
2917 {
2918   PetscFunctionBegin;
2919   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense;
2920   C->ops->productsymbolic = MatProductSymbolic_AB;
2921   PetscFunctionReturn(0);
2922 }
2923 #endif
2924 
2925 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C)
2926 {
2927   Mat_Product *product = C->product;
2928   Mat         A = product->A,B=product->B;
2929 
2930   PetscFunctionBegin;
2931   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend)
2932     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2933   C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense;
2934   C->ops->productsymbolic = MatProductSymbolic_AtB;
2935   PetscFunctionReturn(0);
2936 }
2937 
2938 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C)
2939 {
2940   Mat_Product    *product = C->product;
2941   const char     *algTypes[2] = {"allgatherv","cyclic"};
2942   PetscInt       alg,nalg = 2;
2943   PetscBool      flg = PETSC_FALSE;
2944 
2945   PetscFunctionBegin;
2946   /* Set default algorithm */
2947   alg = 0; /* default is allgatherv */
2948   PetscCall(PetscStrcmp(product->alg,"default",&flg));
2949   if (flg) {
2950     PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]));
2951   }
2952 
2953   /* Get runtime option */
2954   if (product->api_user) {
2955     PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatTransposeMult","Mat");
2956     PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[alg],&alg,&flg));
2957     PetscOptionsEnd();
2958   } else {
2959     PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABt","Mat");
2960     PetscCall(PetscOptionsEList("-mat_product_algorithm","Algorithmic approach","MatProduct_ABt",algTypes,nalg,algTypes[alg],&alg,&flg));
2961     PetscOptionsEnd();
2962   }
2963   if (flg) {
2964     PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]));
2965   }
2966 
2967   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense;
2968   C->ops->productsymbolic          = MatProductSymbolic_ABt;
2969   PetscFunctionReturn(0);
2970 }
2971 
2972 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C)
2973 {
2974   Mat_Product    *product = C->product;
2975 
2976   PetscFunctionBegin;
2977   switch (product->type) {
2978 #if defined(PETSC_HAVE_ELEMENTAL)
2979   case MATPRODUCT_AB:
2980     PetscCall(MatProductSetFromOptions_MPIDense_AB(C));
2981     break;
2982 #endif
2983   case MATPRODUCT_AtB:
2984     PetscCall(MatProductSetFromOptions_MPIDense_AtB(C));
2985     break;
2986   case MATPRODUCT_ABt:
2987     PetscCall(MatProductSetFromOptions_MPIDense_ABt(C));
2988     break;
2989   default:
2990     break;
2991   }
2992   PetscFunctionReturn(0);
2993 }
2994