xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 2e3d3ef9bcea2ec3187269b59709989dccbaffee)
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4 */
5 
6 #include <../src/mat/impls/dense/mpi/mpidense.h>    /*I   "petscmat.h"  I*/
7 #include <../src/mat/impls/aij/mpi/mpiaij.h>
8 #include <petscblaslapack.h>
9 
10 /*@
11 
12       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
13               matrix that represents the operator. For sequential matrices it returns itself.
14 
15     Input Parameter:
16 .      A - the Seq or MPI dense matrix
17 
18     Output Parameter:
19 .      B - the inner matrix
20 
21     Level: intermediate
22 
23 @*/
24 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
25 {
26   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
27   PetscBool      flg;
28 
29   PetscFunctionBegin;
30   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
31   PetscValidPointer(B,2);
32   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A,MATMPIDENSE,&flg));
33   if (flg) *B = mat->A;
34   else {
35     PetscCall(PetscObjectBaseTypeCompare((PetscObject)A,MATSEQDENSE,&flg));
36     PetscCheck(flg,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Not for matrix type %s",((PetscObject)A)->type_name);
37     *B = A;
38   }
39   PetscFunctionReturn(0);
40 }
41 
42 PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s)
43 {
44   Mat_MPIDense   *Amat = (Mat_MPIDense*)A->data;
45   Mat_MPIDense   *Bmat = (Mat_MPIDense*)B->data;
46 
47   PetscFunctionBegin;
48   PetscCall(MatCopy(Amat->A,Bmat->A,s));
49   PetscFunctionReturn(0);
50 }
51 
52 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
53 {
54   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
55   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
56 
57   PetscFunctionBegin;
58   PetscCheckFalse(row < rstart || row >= rend,PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
59   lrow = row - rstart;
60   PetscCall(MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v));
61   PetscFunctionReturn(0);
62 }
63 
64 PetscErrorCode MatRestoreRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
65 {
66   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
67   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
68 
69   PetscFunctionBegin;
70   PetscCheckFalse(row < rstart || row >= rend,PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
71   lrow = row - rstart;
72   PetscCall(MatRestoreRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v));
73   PetscFunctionReturn(0);
74 }
75 
76 PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
77 {
78   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
79   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
80   PetscScalar    *array;
81   MPI_Comm       comm;
82   PetscBool      flg;
83   Mat            B;
84 
85   PetscFunctionBegin;
86   PetscCall(MatHasCongruentLayouts(A,&flg));
87   PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
88   PetscCall(PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B));
89   if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */
90 
91     PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A,MATSEQDENSECUDA,&flg));
92     PetscCheck(!flg,PETSC_COMM_SELF,PETSC_ERR_SUP,"Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature",MATSEQDENSECUDA);
93     PetscCall(PetscObjectGetComm((PetscObject)(mdn->A),&comm));
94     PetscCall(MatCreate(comm,&B));
95     PetscCall(MatSetSizes(B,m,m,m,m));
96     PetscCall(MatSetType(B,((PetscObject)mdn->A)->type_name));
97     PetscCall(MatDenseGetArrayRead(mdn->A,(const PetscScalar**)&array));
98     PetscCall(MatSeqDenseSetPreallocation(B,array+m*rstart));
99     PetscCall(MatDenseRestoreArrayRead(mdn->A,(const PetscScalar**)&array));
100     PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
101     PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
102     PetscCall(PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B));
103     *a   = B;
104     PetscCall(MatDestroy(&B));
105   } else *a = B;
106   PetscFunctionReturn(0);
107 }
108 
109 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
110 {
111   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
112   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
113   PetscBool      roworiented = A->roworiented;
114 
115   PetscFunctionBegin;
116   for (i=0; i<m; i++) {
117     if (idxm[i] < 0) continue;
118     PetscCheckFalse(idxm[i] >= mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
119     if (idxm[i] >= rstart && idxm[i] < rend) {
120       row = idxm[i] - rstart;
121       if (roworiented) {
122         PetscCall(MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv));
123       } else {
124         for (j=0; j<n; j++) {
125           if (idxn[j] < 0) continue;
126           PetscCheckFalse(idxn[j] >= mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
127           PetscCall(MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv));
128         }
129       }
130     } else if (!A->donotstash) {
131       mat->assembled = PETSC_FALSE;
132       if (roworiented) {
133         PetscCall(MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE));
134       } else {
135         PetscCall(MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE));
136       }
137     }
138   }
139   PetscFunctionReturn(0);
140 }
141 
142 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
143 {
144   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
145   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
146 
147   PetscFunctionBegin;
148   for (i=0; i<m; i++) {
149     if (idxm[i] < 0) continue; /* negative row */
150     PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
151     if (idxm[i] >= rstart && idxm[i] < rend) {
152       row = idxm[i] - rstart;
153       for (j=0; j<n; j++) {
154         if (idxn[j] < 0) continue; /* negative column */
155         PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
156         PetscCall(MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j));
157       }
158     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
159   }
160   PetscFunctionReturn(0);
161 }
162 
163 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda)
164 {
165   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
166 
167   PetscFunctionBegin;
168   PetscCall(MatDenseGetLDA(a->A,lda));
169   PetscFunctionReturn(0);
170 }
171 
172 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A,PetscInt lda)
173 {
174   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
175   PetscBool      iscuda;
176 
177   PetscFunctionBegin;
178   if (!a->A) {
179     PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
180     PetscCall(PetscLayoutSetUp(A->rmap));
181     PetscCall(PetscLayoutSetUp(A->cmap));
182     PetscCall(MatCreate(PETSC_COMM_SELF,&a->A));
183     PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->A));
184     PetscCall(MatSetSizes(a->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N));
185     PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda));
186     PetscCall(MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE));
187   }
188   PetscCall(MatDenseSetLDA(a->A,lda));
189   PetscFunctionReturn(0);
190 }
191 
192 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar **array)
193 {
194   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
195 
196   PetscFunctionBegin;
197   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
198   PetscCall(MatDenseGetArray(a->A,array));
199   PetscFunctionReturn(0);
200 }
201 
202 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar **array)
203 {
204   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
205 
206   PetscFunctionBegin;
207   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
208   PetscCall(MatDenseGetArrayRead(a->A,array));
209   PetscFunctionReturn(0);
210 }
211 
212 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A,PetscScalar **array)
213 {
214   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
215 
216   PetscFunctionBegin;
217   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
218   PetscCall(MatDenseGetArrayWrite(a->A,array));
219   PetscFunctionReturn(0);
220 }
221 
222 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar *array)
223 {
224   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
225 
226   PetscFunctionBegin;
227   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
228   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
229   PetscCall(MatDensePlaceArray(a->A,array));
230   PetscFunctionReturn(0);
231 }
232 
233 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
234 {
235   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
236 
237   PetscFunctionBegin;
238   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
239   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
240   PetscCall(MatDenseResetArray(a->A));
241   PetscFunctionReturn(0);
242 }
243 
244 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A,const PetscScalar *array)
245 {
246   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
247 
248   PetscFunctionBegin;
249   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
250   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
251   PetscCall(MatDenseReplaceArray(a->A,array));
252   PetscFunctionReturn(0);
253 }
254 
255 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
256 {
257   Mat_MPIDense      *mat  = (Mat_MPIDense*)A->data,*newmatd;
258   PetscInt          lda,i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
259   const PetscInt    *irow,*icol;
260   const PetscScalar *v;
261   PetscScalar       *bv;
262   Mat               newmat;
263   IS                iscol_local;
264   MPI_Comm          comm_is,comm_mat;
265 
266   PetscFunctionBegin;
267   PetscCall(PetscObjectGetComm((PetscObject)A,&comm_mat));
268   PetscCall(PetscObjectGetComm((PetscObject)iscol,&comm_is));
269   PetscCheckFalse(comm_mat != comm_is,PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator");
270 
271   PetscCall(ISAllGather(iscol,&iscol_local));
272   PetscCall(ISGetIndices(isrow,&irow));
273   PetscCall(ISGetIndices(iscol_local,&icol));
274   PetscCall(ISGetLocalSize(isrow,&nrows));
275   PetscCall(ISGetLocalSize(iscol,&ncols));
276   PetscCall(ISGetSize(iscol,&Ncols)); /* global number of columns, size of iscol_local */
277 
278   /* No parallel redistribution currently supported! Should really check each index set
279      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
280      original matrix! */
281 
282   PetscCall(MatGetLocalSize(A,&nlrows,&nlcols));
283   PetscCall(MatGetOwnershipRange(A,&rstart,&rend));
284 
285   /* Check submatrix call */
286   if (scall == MAT_REUSE_MATRIX) {
287     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
288     /* Really need to test rows and column sizes! */
289     newmat = *B;
290   } else {
291     /* Create and fill new matrix */
292     PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&newmat));
293     PetscCall(MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols));
294     PetscCall(MatSetType(newmat,((PetscObject)A)->type_name));
295     PetscCall(MatMPIDenseSetPreallocation(newmat,NULL));
296   }
297 
298   /* Now extract the data pointers and do the copy, column at a time */
299   newmatd = (Mat_MPIDense*)newmat->data;
300   PetscCall(MatDenseGetArray(newmatd->A,&bv));
301   PetscCall(MatDenseGetArrayRead(mat->A,&v));
302   PetscCall(MatDenseGetLDA(mat->A,&lda));
303   for (i=0; i<Ncols; i++) {
304     const PetscScalar *av = v + lda*icol[i];
305     for (j=0; j<nrows; j++) {
306       *bv++ = av[irow[j] - rstart];
307     }
308   }
309   PetscCall(MatDenseRestoreArrayRead(mat->A,&v));
310   PetscCall(MatDenseRestoreArray(newmatd->A,&bv));
311 
312   /* Assemble the matrices so that the correct flags are set */
313   PetscCall(MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY));
314   PetscCall(MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY));
315 
316   /* Free work space */
317   PetscCall(ISRestoreIndices(isrow,&irow));
318   PetscCall(ISRestoreIndices(iscol_local,&icol));
319   PetscCall(ISDestroy(&iscol_local));
320   *B   = newmat;
321   PetscFunctionReturn(0);
322 }
323 
324 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar **array)
325 {
326   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
327 
328   PetscFunctionBegin;
329   PetscCall(MatDenseRestoreArray(a->A,array));
330   PetscFunctionReturn(0);
331 }
332 
333 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar **array)
334 {
335   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
336 
337   PetscFunctionBegin;
338   PetscCall(MatDenseRestoreArrayRead(a->A,array));
339   PetscFunctionReturn(0);
340 }
341 
342 PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A,PetscScalar **array)
343 {
344   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
345 
346   PetscFunctionBegin;
347   PetscCall(MatDenseRestoreArrayWrite(a->A,array));
348   PetscFunctionReturn(0);
349 }
350 
351 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
352 {
353   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
354   PetscInt       nstash,reallocs;
355 
356   PetscFunctionBegin;
357   if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
358 
359   PetscCall(MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range));
360   PetscCall(MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs));
361   PetscCall(PetscInfo(mdn->A,"Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs));
362   PetscFunctionReturn(0);
363 }
364 
365 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
366 {
367   Mat_MPIDense   *mdn=(Mat_MPIDense*)mat->data;
368   PetscInt       i,*row,*col,flg,j,rstart,ncols;
369   PetscMPIInt    n;
370   PetscScalar    *val;
371 
372   PetscFunctionBegin;
373   if (!mdn->donotstash && !mat->nooffprocentries) {
374     /*  wait on receives */
375     while (1) {
376       PetscCall(MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg));
377       if (!flg) break;
378 
379       for (i=0; i<n;) {
380         /* Now identify the consecutive vals belonging to the same row */
381         for (j=i,rstart=row[j]; j<n; j++) {
382           if (row[j] != rstart) break;
383         }
384         if (j < n) ncols = j-i;
385         else       ncols = n-i;
386         /* Now assemble all these values with a single function call */
387         PetscCall(MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode));
388         i    = j;
389       }
390     }
391     PetscCall(MatStashScatterEnd_Private(&mat->stash));
392   }
393 
394   PetscCall(MatAssemblyBegin(mdn->A,mode));
395   PetscCall(MatAssemblyEnd(mdn->A,mode));
396 
397   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
398     PetscCall(MatSetUpMultiply_MPIDense(mat));
399   }
400   PetscFunctionReturn(0);
401 }
402 
403 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
404 {
405   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
406 
407   PetscFunctionBegin;
408   PetscCall(MatZeroEntries(l->A));
409   PetscFunctionReturn(0);
410 }
411 
412 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt n,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
413 {
414   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
415   PetscInt          i,len,*lrows;
416 
417   PetscFunctionBegin;
418   /* get locally owned rows */
419   PetscCall(PetscLayoutMapLocal(A->rmap,n,rows,&len,&lrows,NULL));
420   /* fix right hand side if needed */
421   if (x && b) {
422     const PetscScalar *xx;
423     PetscScalar       *bb;
424 
425     PetscCall(VecGetArrayRead(x, &xx));
426     PetscCall(VecGetArrayWrite(b, &bb));
427     for (i=0;i<len;++i) bb[lrows[i]] = diag*xx[lrows[i]];
428     PetscCall(VecRestoreArrayRead(x, &xx));
429     PetscCall(VecRestoreArrayWrite(b, &bb));
430   }
431   PetscCall(MatZeroRows(l->A,len,lrows,0.0,NULL,NULL));
432   if (diag != 0.0) {
433     Vec d;
434 
435     PetscCall(MatCreateVecs(A,NULL,&d));
436     PetscCall(VecSet(d,diag));
437     PetscCall(MatDiagonalSet(A,d,INSERT_VALUES));
438     PetscCall(VecDestroy(&d));
439   }
440   PetscCall(PetscFree(lrows));
441   PetscFunctionReturn(0);
442 }
443 
444 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec);
445 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec);
446 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec);
447 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec);
448 
449 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
450 {
451   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
452   const PetscScalar *ax;
453   PetscScalar       *ay;
454   PetscMemType      axmtype,aymtype;
455 
456   PetscFunctionBegin;
457   PetscCall(VecGetArrayReadAndMemType(xx,&ax,&axmtype));
458   PetscCall(VecGetArrayAndMemType(mdn->lvec,&ay,&aymtype));
459   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPI_REPLACE));
460   PetscCall(PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay,MPI_REPLACE));
461   PetscCall(VecRestoreArrayAndMemType(mdn->lvec,&ay));
462   PetscCall(VecRestoreArrayReadAndMemType(xx,&ax));
463   PetscCall((*mdn->A->ops->mult)(mdn->A,mdn->lvec,yy));
464   PetscFunctionReturn(0);
465 }
466 
467 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
468 {
469   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
470   const PetscScalar *ax;
471   PetscScalar       *ay;
472   PetscMemType      axmtype,aymtype;
473 
474   PetscFunctionBegin;
475   PetscCall(VecGetArrayReadAndMemType(xx,&ax,&axmtype));
476   PetscCall(VecGetArrayAndMemType(mdn->lvec,&ay,&aymtype));
477   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPI_REPLACE));
478   PetscCall(PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay,MPI_REPLACE));
479   PetscCall(VecRestoreArrayAndMemType(mdn->lvec,&ay));
480   PetscCall(VecRestoreArrayReadAndMemType(xx,&ax));
481   PetscCall((*mdn->A->ops->multadd)(mdn->A,mdn->lvec,yy,zz));
482   PetscFunctionReturn(0);
483 }
484 
485 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
486 {
487   Mat_MPIDense      *a = (Mat_MPIDense*)A->data;
488   const PetscScalar *ax;
489   PetscScalar       *ay;
490   PetscMemType      axmtype,aymtype;
491 
492   PetscFunctionBegin;
493   PetscCall(VecSet(yy,0.0));
494   PetscCall((*a->A->ops->multtranspose)(a->A,xx,a->lvec));
495   PetscCall(VecGetArrayReadAndMemType(a->lvec,&ax,&axmtype));
496   PetscCall(VecGetArrayAndMemType(yy,&ay,&aymtype));
497   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPIU_SUM));
498   PetscCall(PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM));
499   PetscCall(VecRestoreArrayReadAndMemType(a->lvec,&ax));
500   PetscCall(VecRestoreArrayAndMemType(yy,&ay));
501   PetscFunctionReturn(0);
502 }
503 
504 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
505 {
506   Mat_MPIDense      *a = (Mat_MPIDense*)A->data;
507   const PetscScalar *ax;
508   PetscScalar       *ay;
509   PetscMemType      axmtype,aymtype;
510 
511   PetscFunctionBegin;
512   PetscCall(VecCopy(yy,zz));
513   PetscCall((*a->A->ops->multtranspose)(a->A,xx,a->lvec));
514   PetscCall(VecGetArrayReadAndMemType(a->lvec,&ax,&axmtype));
515   PetscCall(VecGetArrayAndMemType(zz,&ay,&aymtype));
516   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPIU_SUM));
517   PetscCall(PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM));
518   PetscCall(VecRestoreArrayReadAndMemType(a->lvec,&ax));
519   PetscCall(VecRestoreArrayAndMemType(zz,&ay));
520   PetscFunctionReturn(0);
521 }
522 
523 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
524 {
525   Mat_MPIDense      *a    = (Mat_MPIDense*)A->data;
526   PetscInt          lda,len,i,n,m = A->rmap->n,radd;
527   PetscScalar       *x,zero = 0.0;
528   const PetscScalar *av;
529 
530   PetscFunctionBegin;
531   PetscCall(VecSet(v,zero));
532   PetscCall(VecGetArray(v,&x));
533   PetscCall(VecGetSize(v,&n));
534   PetscCheckFalse(n != A->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
535   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
536   radd = A->rmap->rstart*m;
537   PetscCall(MatDenseGetArrayRead(a->A,&av));
538   PetscCall(MatDenseGetLDA(a->A,&lda));
539   for (i=0; i<len; i++) {
540     x[i] = av[radd + i*lda + i];
541   }
542   PetscCall(MatDenseRestoreArrayRead(a->A,&av));
543   PetscCall(VecRestoreArray(v,&x));
544   PetscFunctionReturn(0);
545 }
546 
547 PetscErrorCode MatDestroy_MPIDense(Mat mat)
548 {
549   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
550 
551   PetscFunctionBegin;
552 #if defined(PETSC_USE_LOG)
553   PetscLogObjectState((PetscObject)mat,"Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT,mat->rmap->N,mat->cmap->N);
554 #endif
555   PetscCall(MatStashDestroy_Private(&mat->stash));
556   PetscCheck(!mdn->vecinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
557   PetscCheck(!mdn->matinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
558   PetscCall(MatDestroy(&mdn->A));
559   PetscCall(VecDestroy(&mdn->lvec));
560   PetscCall(PetscSFDestroy(&mdn->Mvctx));
561   PetscCall(VecDestroy(&mdn->cvec));
562   PetscCall(MatDestroy(&mdn->cmat));
563 
564   PetscCall(PetscFree(mat->data));
565   PetscCall(PetscObjectChangeTypeName((PetscObject)mat,NULL));
566 
567   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL));
568   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",NULL));
569   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL));
570   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL));
571   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL));
572   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL));
573   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",NULL));
574   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",NULL));
575   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL));
576   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL));
577   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",NULL));
578   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",NULL));
579   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpiaij_C",NULL));
580 #if defined(PETSC_HAVE_ELEMENTAL)
581   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL));
582 #endif
583 #if defined(PETSC_HAVE_SCALAPACK)
584   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",NULL));
585 #endif
586   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL));
587   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",NULL));
588   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",NULL));
589 #if defined (PETSC_HAVE_CUDA)
590   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",NULL));
591   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",NULL));
592 #endif
593   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL));
594   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL));
595   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",NULL));
596   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",NULL));
597   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",NULL));
598   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",NULL));
599   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",NULL));
600   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",NULL));
601   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",NULL));
602   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",NULL));
603   PetscFunctionReturn(0);
604 }
605 
606 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat,PetscViewer);
607 
608 #include <petscdraw.h>
609 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
610 {
611   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
612   PetscMPIInt       rank;
613   PetscViewerType   vtype;
614   PetscBool         iascii,isdraw;
615   PetscViewer       sviewer;
616   PetscViewerFormat format;
617 
618   PetscFunctionBegin;
619   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank));
620   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
621   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw));
622   if (iascii) {
623     PetscCall(PetscViewerGetType(viewer,&vtype));
624     PetscCall(PetscViewerGetFormat(viewer,&format));
625     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
626       MatInfo info;
627       PetscCall(MatGetInfo(mat,MAT_LOCAL,&info));
628       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
629       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory));
630       PetscCall(PetscViewerFlush(viewer));
631       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
632       PetscCall(PetscSFView(mdn->Mvctx,viewer));
633       PetscFunctionReturn(0);
634     } else if (format == PETSC_VIEWER_ASCII_INFO) {
635       PetscFunctionReturn(0);
636     }
637   } else if (isdraw) {
638     PetscDraw draw;
639     PetscBool isnull;
640 
641     PetscCall(PetscViewerDrawGetDraw(viewer,0,&draw));
642     PetscCall(PetscDrawIsNull(draw,&isnull));
643     if (isnull) PetscFunctionReturn(0);
644   }
645 
646   {
647     /* assemble the entire matrix onto first processor. */
648     Mat         A;
649     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
650     PetscInt    *cols;
651     PetscScalar *vals;
652 
653     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat),&A));
654     if (rank == 0) {
655       PetscCall(MatSetSizes(A,M,N,M,N));
656     } else {
657       PetscCall(MatSetSizes(A,0,0,M,N));
658     }
659     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
660     PetscCall(MatSetType(A,MATMPIDENSE));
661     PetscCall(MatMPIDenseSetPreallocation(A,NULL));
662     PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)A));
663 
664     /* Copy the matrix ... This isn't the most efficient means,
665        but it's quick for now */
666     A->insertmode = INSERT_VALUES;
667 
668     row = mat->rmap->rstart;
669     m   = mdn->A->rmap->n;
670     for (i=0; i<m; i++) {
671       PetscCall(MatGetRow_MPIDense(mat,row,&nz,&cols,&vals));
672       PetscCall(MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES));
673       PetscCall(MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals));
674       row++;
675     }
676 
677     PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY));
678     PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY));
679     PetscCall(PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer));
680     if (rank == 0) {
681       PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name));
682       PetscCall(MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer));
683     }
684     PetscCall(PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer));
685     PetscCall(PetscViewerFlush(viewer));
686     PetscCall(MatDestroy(&A));
687   }
688   PetscFunctionReturn(0);
689 }
690 
691 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
692 {
693   PetscBool      iascii,isbinary,isdraw,issocket;
694 
695   PetscFunctionBegin;
696   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
697   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
698   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket));
699   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw));
700 
701   if (iascii || issocket || isdraw) {
702     PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat,viewer));
703   } else if (isbinary) {
704     PetscCall(MatView_Dense_Binary(mat,viewer));
705   }
706   PetscFunctionReturn(0);
707 }
708 
709 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
710 {
711   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
712   Mat            mdn  = mat->A;
713   PetscLogDouble isend[5],irecv[5];
714 
715   PetscFunctionBegin;
716   info->block_size = 1.0;
717 
718   PetscCall(MatGetInfo(mdn,MAT_LOCAL,info));
719 
720   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
721   isend[3] = info->memory;  isend[4] = info->mallocs;
722   if (flag == MAT_LOCAL) {
723     info->nz_used      = isend[0];
724     info->nz_allocated = isend[1];
725     info->nz_unneeded  = isend[2];
726     info->memory       = isend[3];
727     info->mallocs      = isend[4];
728   } else if (flag == MAT_GLOBAL_MAX) {
729     PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)A)));
730 
731     info->nz_used      = irecv[0];
732     info->nz_allocated = irecv[1];
733     info->nz_unneeded  = irecv[2];
734     info->memory       = irecv[3];
735     info->mallocs      = irecv[4];
736   } else if (flag == MAT_GLOBAL_SUM) {
737     PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)A)));
738 
739     info->nz_used      = irecv[0];
740     info->nz_allocated = irecv[1];
741     info->nz_unneeded  = irecv[2];
742     info->memory       = irecv[3];
743     info->mallocs      = irecv[4];
744   }
745   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
746   info->fill_ratio_needed = 0;
747   info->factor_mallocs    = 0;
748   PetscFunctionReturn(0);
749 }
750 
751 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
752 {
753   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
754 
755   PetscFunctionBegin;
756   switch (op) {
757   case MAT_NEW_NONZERO_LOCATIONS:
758   case MAT_NEW_NONZERO_LOCATION_ERR:
759   case MAT_NEW_NONZERO_ALLOCATION_ERR:
760     MatCheckPreallocated(A,1);
761     PetscCall(MatSetOption(a->A,op,flg));
762     break;
763   case MAT_ROW_ORIENTED:
764     MatCheckPreallocated(A,1);
765     a->roworiented = flg;
766     PetscCall(MatSetOption(a->A,op,flg));
767     break;
768   case MAT_FORCE_DIAGONAL_ENTRIES:
769   case MAT_KEEP_NONZERO_PATTERN:
770   case MAT_USE_HASH_TABLE:
771   case MAT_SORTED_FULL:
772     PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op]));
773     break;
774   case MAT_IGNORE_OFF_PROC_ENTRIES:
775     a->donotstash = flg;
776     break;
777   case MAT_SYMMETRIC:
778   case MAT_STRUCTURALLY_SYMMETRIC:
779   case MAT_HERMITIAN:
780   case MAT_SYMMETRY_ETERNAL:
781   case MAT_IGNORE_LOWER_TRIANGULAR:
782   case MAT_IGNORE_ZERO_ENTRIES:
783     PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op]));
784     break;
785   default:
786     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
787   }
788   PetscFunctionReturn(0);
789 }
790 
791 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
792 {
793   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
794   const PetscScalar *l;
795   PetscScalar       x,*v,*vv,*r;
796   PetscInt          i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n,lda;
797 
798   PetscFunctionBegin;
799   PetscCall(MatDenseGetArray(mdn->A,&vv));
800   PetscCall(MatDenseGetLDA(mdn->A,&lda));
801   PetscCall(MatGetLocalSize(A,&s2,&s3));
802   if (ll) {
803     PetscCall(VecGetLocalSize(ll,&s2a));
804     PetscCheckFalse(s2a != s2,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2);
805     PetscCall(VecGetArrayRead(ll,&l));
806     for (i=0; i<m; i++) {
807       x = l[i];
808       v = vv + i;
809       for (j=0; j<n; j++) { (*v) *= x; v+= lda;}
810     }
811     PetscCall(VecRestoreArrayRead(ll,&l));
812     PetscCall(PetscLogFlops(1.0*n*m));
813   }
814   if (rr) {
815     const PetscScalar *ar;
816 
817     PetscCall(VecGetLocalSize(rr,&s3a));
818     PetscCheckFalse(s3a != s3,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3);
819     PetscCall(VecGetArrayRead(rr,&ar));
820     PetscCall(VecGetArray(mdn->lvec,&r));
821     PetscCall(PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ar,r,MPI_REPLACE));
822     PetscCall(PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ar,r,MPI_REPLACE));
823     PetscCall(VecRestoreArrayRead(rr,&ar));
824     for (i=0; i<n; i++) {
825       x = r[i];
826       v = vv + i*lda;
827       for (j=0; j<m; j++) (*v++) *= x;
828     }
829     PetscCall(VecRestoreArray(mdn->lvec,&r));
830     PetscCall(PetscLogFlops(1.0*n*m));
831   }
832   PetscCall(MatDenseRestoreArray(mdn->A,&vv));
833   PetscFunctionReturn(0);
834 }
835 
836 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
837 {
838   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
839   PetscInt          i,j;
840   PetscMPIInt       size;
841   PetscReal         sum = 0.0;
842   const PetscScalar *av,*v;
843 
844   PetscFunctionBegin;
845   PetscCall(MatDenseGetArrayRead(mdn->A,&av));
846   v    = av;
847   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
848   if (size == 1) {
849     PetscCall(MatNorm(mdn->A,type,nrm));
850   } else {
851     if (type == NORM_FROBENIUS) {
852       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
853         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
854       }
855       PetscCall(MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A)));
856       *nrm = PetscSqrtReal(*nrm);
857       PetscCall(PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n));
858     } else if (type == NORM_1) {
859       PetscReal *tmp,*tmp2;
860       PetscCall(PetscCalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2));
861       *nrm = 0.0;
862       v    = av;
863       for (j=0; j<mdn->A->cmap->n; j++) {
864         for (i=0; i<mdn->A->rmap->n; i++) {
865           tmp[j] += PetscAbsScalar(*v);  v++;
866         }
867       }
868       PetscCall(MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A)));
869       for (j=0; j<A->cmap->N; j++) {
870         if (tmp2[j] > *nrm) *nrm = tmp2[j];
871       }
872       PetscCall(PetscFree2(tmp,tmp2));
873       PetscCall(PetscLogFlops(A->cmap->n*A->rmap->n));
874     } else if (type == NORM_INFINITY) { /* max row norm */
875       PetscReal ntemp;
876       PetscCall(MatNorm(mdn->A,type,&ntemp));
877       PetscCall(MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A)));
878     } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
879   }
880   PetscCall(MatDenseRestoreArrayRead(mdn->A,&av));
881   PetscFunctionReturn(0);
882 }
883 
884 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
885 {
886   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
887   Mat            B;
888   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
889   PetscInt       j,i,lda;
890   PetscScalar    *v;
891 
892   PetscFunctionBegin;
893   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
894     PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B));
895     PetscCall(MatSetSizes(B,A->cmap->n,A->rmap->n,N,M));
896     PetscCall(MatSetType(B,((PetscObject)A)->type_name));
897     PetscCall(MatMPIDenseSetPreallocation(B,NULL));
898   } else B = *matout;
899 
900   m    = a->A->rmap->n; n = a->A->cmap->n;
901   PetscCall(MatDenseGetArrayRead(a->A,(const PetscScalar**)&v));
902   PetscCall(MatDenseGetLDA(a->A,&lda));
903   PetscCall(PetscMalloc1(m,&rwork));
904   for (i=0; i<m; i++) rwork[i] = rstart + i;
905   for (j=0; j<n; j++) {
906     PetscCall(MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES));
907     v   += lda;
908   }
909   PetscCall(MatDenseRestoreArrayRead(a->A,(const PetscScalar**)&v));
910   PetscCall(PetscFree(rwork));
911   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
912   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
913   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
914     *matout = B;
915   } else {
916     PetscCall(MatHeaderMerge(A,&B));
917   }
918   PetscFunctionReturn(0);
919 }
920 
921 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
922 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
923 
924 PetscErrorCode MatSetUp_MPIDense(Mat A)
925 {
926   PetscFunctionBegin;
927   PetscCall(PetscLayoutSetUp(A->rmap));
928   PetscCall(PetscLayoutSetUp(A->cmap));
929   if (!A->preallocated) {
930     PetscCall(MatMPIDenseSetPreallocation(A,NULL));
931   }
932   PetscFunctionReturn(0);
933 }
934 
935 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
936 {
937   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
938 
939   PetscFunctionBegin;
940   PetscCall(MatAXPY(A->A,alpha,B->A,str));
941   PetscFunctionReturn(0);
942 }
943 
944 PetscErrorCode MatConjugate_MPIDense(Mat mat)
945 {
946   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
947 
948   PetscFunctionBegin;
949   PetscCall(MatConjugate(a->A));
950   PetscFunctionReturn(0);
951 }
952 
953 PetscErrorCode MatRealPart_MPIDense(Mat A)
954 {
955   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
956 
957   PetscFunctionBegin;
958   PetscCall(MatRealPart(a->A));
959   PetscFunctionReturn(0);
960 }
961 
962 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
963 {
964   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
965 
966   PetscFunctionBegin;
967   PetscCall(MatImaginaryPart(a->A));
968   PetscFunctionReturn(0);
969 }
970 
971 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col)
972 {
973   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
974 
975   PetscFunctionBegin;
976   PetscCheck(a->A,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing local matrix");
977   PetscCheck(a->A->ops->getcolumnvector,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing get column operation");
978   PetscCall((*a->A->ops->getcolumnvector)(a->A,v,col));
979   PetscFunctionReturn(0);
980 }
981 
982 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat,PetscInt,PetscReal*);
983 
984 PetscErrorCode MatGetColumnReductions_MPIDense(Mat A,PetscInt type,PetscReal *reductions)
985 {
986   PetscInt       i,m,n;
987   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
988   PetscReal      *work;
989 
990   PetscFunctionBegin;
991   PetscCall(MatGetSize(A,&m,&n));
992   PetscCall(PetscMalloc1(n,&work));
993   if (type == REDUCTION_MEAN_REALPART) {
994     PetscCall(MatGetColumnReductions_SeqDense(a->A,(PetscInt)REDUCTION_SUM_REALPART,work));
995   } else if (type == REDUCTION_MEAN_IMAGINARYPART) {
996     PetscCall(MatGetColumnReductions_SeqDense(a->A,(PetscInt)REDUCTION_SUM_IMAGINARYPART,work));
997   } else {
998     PetscCall(MatGetColumnReductions_SeqDense(a->A,type,work));
999   }
1000   if (type == NORM_2) {
1001     for (i=0; i<n; i++) work[i] *= work[i];
1002   }
1003   if (type == NORM_INFINITY) {
1004     PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_MAX,A->hdr.comm));
1005   } else {
1006     PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_SUM,A->hdr.comm));
1007   }
1008   PetscCall(PetscFree(work));
1009   if (type == NORM_2) {
1010     for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
1011   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
1012     for (i=0; i<n; i++) reductions[i] /= m;
1013   }
1014   PetscFunctionReturn(0);
1015 }
1016 
1017 #if defined(PETSC_HAVE_CUDA)
1018 static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1019 {
1020   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1021   PetscInt       lda;
1022 
1023   PetscFunctionBegin;
1024   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1025   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1026   if (!a->cvec) {
1027     PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec));
1028     PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec));
1029   }
1030   a->vecinuse = col + 1;
1031   PetscCall(MatDenseGetLDA(a->A,&lda));
1032   PetscCall(MatDenseCUDAGetArray(a->A,(PetscScalar**)&a->ptrinuse));
1033   PetscCall(VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda));
1034   *v   = a->cvec;
1035   PetscFunctionReturn(0);
1036 }
1037 
1038 static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1039 {
1040   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1041 
1042   PetscFunctionBegin;
1043   PetscCheck(a->vecinuse,PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1044   PetscCheck(a->cvec,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1045   a->vecinuse = 0;
1046   PetscCall(MatDenseCUDARestoreArray(a->A,(PetscScalar**)&a->ptrinuse));
1047   PetscCall(VecCUDAResetArray(a->cvec));
1048   *v   = NULL;
1049   PetscFunctionReturn(0);
1050 }
1051 
1052 static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1053 {
1054   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1055   PetscInt       lda;
1056 
1057   PetscFunctionBegin;
1058   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1059   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1060   if (!a->cvec) {
1061     PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec));
1062     PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec));
1063   }
1064   a->vecinuse = col + 1;
1065   PetscCall(MatDenseGetLDA(a->A,&lda));
1066   PetscCall(MatDenseCUDAGetArrayRead(a->A,&a->ptrinuse));
1067   PetscCall(VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda));
1068   PetscCall(VecLockReadPush(a->cvec));
1069   *v   = a->cvec;
1070   PetscFunctionReturn(0);
1071 }
1072 
1073 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1074 {
1075   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1076 
1077   PetscFunctionBegin;
1078   PetscCheck(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1079   PetscCheck(a->cvec,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1080   a->vecinuse = 0;
1081   PetscCall(MatDenseCUDARestoreArrayRead(a->A,&a->ptrinuse));
1082   PetscCall(VecLockReadPop(a->cvec));
1083   PetscCall(VecCUDAResetArray(a->cvec));
1084   *v   = NULL;
1085   PetscFunctionReturn(0);
1086 }
1087 
1088 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1089 {
1090   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1091   PetscInt       lda;
1092 
1093   PetscFunctionBegin;
1094   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1095   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1096   if (!a->cvec) {
1097     PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec));
1098     PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec));
1099   }
1100   a->vecinuse = col + 1;
1101   PetscCall(MatDenseGetLDA(a->A,&lda));
1102   PetscCall(MatDenseCUDAGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse));
1103   PetscCall(VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda));
1104   *v   = a->cvec;
1105   PetscFunctionReturn(0);
1106 }
1107 
1108 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1109 {
1110   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1111 
1112   PetscFunctionBegin;
1113   PetscCheck(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1114   PetscCheck(a->cvec,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1115   a->vecinuse = 0;
1116   PetscCall(MatDenseCUDARestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse));
1117   PetscCall(VecCUDAResetArray(a->cvec));
1118   *v   = NULL;
1119   PetscFunctionReturn(0);
1120 }
1121 
1122 static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1123 {
1124   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1125 
1126   PetscFunctionBegin;
1127   PetscCheck(!l->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1128   PetscCheck(!l->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1129   PetscCall(MatDenseCUDAPlaceArray(l->A,a));
1130   PetscFunctionReturn(0);
1131 }
1132 
1133 static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A)
1134 {
1135   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1136 
1137   PetscFunctionBegin;
1138   PetscCheck(!l->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1139   PetscCheck(!l->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1140   PetscCall(MatDenseCUDAResetArray(l->A));
1141   PetscFunctionReturn(0);
1142 }
1143 
1144 static PetscErrorCode MatDenseCUDAReplaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1145 {
1146   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1147 
1148   PetscFunctionBegin;
1149   PetscCheck(!l->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1150   PetscCheck(!l->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1151   PetscCall(MatDenseCUDAReplaceArray(l->A,a));
1152   PetscFunctionReturn(0);
1153 }
1154 
1155 static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1156 {
1157   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1158 
1159   PetscFunctionBegin;
1160   PetscCall(MatDenseCUDAGetArrayWrite(l->A,a));
1161   PetscFunctionReturn(0);
1162 }
1163 
1164 static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1165 {
1166   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1167 
1168   PetscFunctionBegin;
1169   PetscCall(MatDenseCUDARestoreArrayWrite(l->A,a));
1170   PetscFunctionReturn(0);
1171 }
1172 
1173 static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1174 {
1175   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1176 
1177   PetscFunctionBegin;
1178   PetscCall(MatDenseCUDAGetArrayRead(l->A,a));
1179   PetscFunctionReturn(0);
1180 }
1181 
1182 static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1183 {
1184   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1185 
1186   PetscFunctionBegin;
1187   PetscCall(MatDenseCUDARestoreArrayRead(l->A,a));
1188   PetscFunctionReturn(0);
1189 }
1190 
1191 static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1192 {
1193   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1194 
1195   PetscFunctionBegin;
1196   PetscCall(MatDenseCUDAGetArray(l->A,a));
1197   PetscFunctionReturn(0);
1198 }
1199 
1200 static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1201 {
1202   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1203 
1204   PetscFunctionBegin;
1205   PetscCall(MatDenseCUDARestoreArray(l->A,a));
1206   PetscFunctionReturn(0);
1207 }
1208 
1209 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat,PetscInt,Vec*);
1210 static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat,PetscInt,Vec*);
1211 static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat,PetscInt,Vec*);
1212 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat,PetscInt,Vec*);
1213 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat,PetscInt,Vec*);
1214 static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat,PetscInt,Vec*);
1215 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat,Mat*);
1216 
1217 static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat,PetscBool bind)
1218 {
1219   Mat_MPIDense   *d = (Mat_MPIDense*)mat->data;
1220 
1221   PetscFunctionBegin;
1222   PetscCheck(!d->vecinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1223   PetscCheck(!d->matinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1224   if (d->A) {
1225     PetscCall(MatBindToCPU(d->A,bind));
1226   }
1227   mat->boundtocpu = bind;
1228   if (!bind) {
1229     PetscBool iscuda;
1230 
1231     PetscCall(PetscObjectTypeCompare((PetscObject)d->cvec,VECMPICUDA,&iscuda));
1232     if (!iscuda) {
1233       PetscCall(VecDestroy(&d->cvec));
1234     }
1235     PetscCall(PetscObjectTypeCompare((PetscObject)d->cmat,MATMPIDENSECUDA,&iscuda));
1236     if (!iscuda) {
1237       PetscCall(MatDestroy(&d->cmat));
1238     }
1239     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDenseCUDA));
1240     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDenseCUDA));
1241     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDenseCUDA));
1242     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDenseCUDA));
1243     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDenseCUDA));
1244     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDenseCUDA));
1245   } else {
1246     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense));
1247     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense));
1248     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense));
1249     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense));
1250     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense));
1251     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense));
1252   }
1253   if (d->cmat) {
1254     PetscCall(MatBindToCPU(d->cmat,bind));
1255   }
1256   PetscFunctionReturn(0);
1257 }
1258 
1259 PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data)
1260 {
1261   Mat_MPIDense   *d = (Mat_MPIDense*)A->data;
1262   PetscBool      iscuda;
1263 
1264   PetscFunctionBegin;
1265   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1266   PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda));
1267   if (!iscuda) PetscFunctionReturn(0);
1268   PetscCall(PetscLayoutSetUp(A->rmap));
1269   PetscCall(PetscLayoutSetUp(A->cmap));
1270   if (!d->A) {
1271     PetscCall(MatCreate(PETSC_COMM_SELF,&d->A));
1272     PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)d->A));
1273     PetscCall(MatSetSizes(d->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N));
1274   }
1275   PetscCall(MatSetType(d->A,MATSEQDENSECUDA));
1276   PetscCall(MatSeqDenseCUDASetPreallocation(d->A,d_data));
1277   A->preallocated = PETSC_TRUE;
1278   PetscFunctionReturn(0);
1279 }
1280 #endif
1281 
1282 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1283 {
1284   Mat_MPIDense   *d = (Mat_MPIDense*)x->data;
1285 
1286   PetscFunctionBegin;
1287   PetscCall(MatSetRandom(d->A,rctx));
1288   PetscFunctionReturn(0);
1289 }
1290 
1291 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool  *missing,PetscInt *d)
1292 {
1293   PetscFunctionBegin;
1294   *missing = PETSC_FALSE;
1295   PetscFunctionReturn(0);
1296 }
1297 
1298 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat);
1299 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);
1300 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat);
1301 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);
1302 static PetscErrorCode MatEqual_MPIDense(Mat,Mat,PetscBool*);
1303 static PetscErrorCode MatLoad_MPIDense(Mat,PetscViewer);
1304 
1305 /* -------------------------------------------------------------------*/
1306 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1307                                         MatGetRow_MPIDense,
1308                                         MatRestoreRow_MPIDense,
1309                                         MatMult_MPIDense,
1310                                 /*  4*/ MatMultAdd_MPIDense,
1311                                         MatMultTranspose_MPIDense,
1312                                         MatMultTransposeAdd_MPIDense,
1313                                         NULL,
1314                                         NULL,
1315                                         NULL,
1316                                 /* 10*/ NULL,
1317                                         NULL,
1318                                         NULL,
1319                                         NULL,
1320                                         MatTranspose_MPIDense,
1321                                 /* 15*/ MatGetInfo_MPIDense,
1322                                         MatEqual_MPIDense,
1323                                         MatGetDiagonal_MPIDense,
1324                                         MatDiagonalScale_MPIDense,
1325                                         MatNorm_MPIDense,
1326                                 /* 20*/ MatAssemblyBegin_MPIDense,
1327                                         MatAssemblyEnd_MPIDense,
1328                                         MatSetOption_MPIDense,
1329                                         MatZeroEntries_MPIDense,
1330                                 /* 24*/ MatZeroRows_MPIDense,
1331                                         NULL,
1332                                         NULL,
1333                                         NULL,
1334                                         NULL,
1335                                 /* 29*/ MatSetUp_MPIDense,
1336                                         NULL,
1337                                         NULL,
1338                                         MatGetDiagonalBlock_MPIDense,
1339                                         NULL,
1340                                 /* 34*/ MatDuplicate_MPIDense,
1341                                         NULL,
1342                                         NULL,
1343                                         NULL,
1344                                         NULL,
1345                                 /* 39*/ MatAXPY_MPIDense,
1346                                         MatCreateSubMatrices_MPIDense,
1347                                         NULL,
1348                                         MatGetValues_MPIDense,
1349                                         MatCopy_MPIDense,
1350                                 /* 44*/ NULL,
1351                                         MatScale_MPIDense,
1352                                         MatShift_Basic,
1353                                         NULL,
1354                                         NULL,
1355                                 /* 49*/ MatSetRandom_MPIDense,
1356                                         NULL,
1357                                         NULL,
1358                                         NULL,
1359                                         NULL,
1360                                 /* 54*/ NULL,
1361                                         NULL,
1362                                         NULL,
1363                                         NULL,
1364                                         NULL,
1365                                 /* 59*/ MatCreateSubMatrix_MPIDense,
1366                                         MatDestroy_MPIDense,
1367                                         MatView_MPIDense,
1368                                         NULL,
1369                                         NULL,
1370                                 /* 64*/ NULL,
1371                                         NULL,
1372                                         NULL,
1373                                         NULL,
1374                                         NULL,
1375                                 /* 69*/ NULL,
1376                                         NULL,
1377                                         NULL,
1378                                         NULL,
1379                                         NULL,
1380                                 /* 74*/ NULL,
1381                                         NULL,
1382                                         NULL,
1383                                         NULL,
1384                                         NULL,
1385                                 /* 79*/ NULL,
1386                                         NULL,
1387                                         NULL,
1388                                         NULL,
1389                                 /* 83*/ MatLoad_MPIDense,
1390                                         NULL,
1391                                         NULL,
1392                                         NULL,
1393                                         NULL,
1394                                         NULL,
1395                                 /* 89*/ NULL,
1396                                         NULL,
1397                                         NULL,
1398                                         NULL,
1399                                         NULL,
1400                                 /* 94*/ NULL,
1401                                         NULL,
1402                                         MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1403                                         MatMatTransposeMultNumeric_MPIDense_MPIDense,
1404                                         NULL,
1405                                 /* 99*/ MatProductSetFromOptions_MPIDense,
1406                                         NULL,
1407                                         NULL,
1408                                         MatConjugate_MPIDense,
1409                                         NULL,
1410                                 /*104*/ NULL,
1411                                         MatRealPart_MPIDense,
1412                                         MatImaginaryPart_MPIDense,
1413                                         NULL,
1414                                         NULL,
1415                                 /*109*/ NULL,
1416                                         NULL,
1417                                         NULL,
1418                                         MatGetColumnVector_MPIDense,
1419                                         MatMissingDiagonal_MPIDense,
1420                                 /*114*/ NULL,
1421                                         NULL,
1422                                         NULL,
1423                                         NULL,
1424                                         NULL,
1425                                 /*119*/ NULL,
1426                                         NULL,
1427                                         NULL,
1428                                         NULL,
1429                                         NULL,
1430                                 /*124*/ NULL,
1431                                         MatGetColumnReductions_MPIDense,
1432                                         NULL,
1433                                         NULL,
1434                                         NULL,
1435                                 /*129*/ NULL,
1436                                         NULL,
1437                                         MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1438                                         MatTransposeMatMultNumeric_MPIDense_MPIDense,
1439                                         NULL,
1440                                 /*134*/ NULL,
1441                                         NULL,
1442                                         NULL,
1443                                         NULL,
1444                                         NULL,
1445                                 /*139*/ NULL,
1446                                         NULL,
1447                                         NULL,
1448                                         NULL,
1449                                         NULL,
1450                                         MatCreateMPIMatConcatenateSeqMat_MPIDense,
1451                                 /*145*/ NULL,
1452                                         NULL,
1453                                         NULL
1454 };
1455 
1456 PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1457 {
1458   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
1459   PetscBool      iscuda;
1460 
1461   PetscFunctionBegin;
1462   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1463   PetscCall(PetscLayoutSetUp(mat->rmap));
1464   PetscCall(PetscLayoutSetUp(mat->cmap));
1465   if (!a->A) {
1466     PetscCall(MatCreate(PETSC_COMM_SELF,&a->A));
1467     PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A));
1468     PetscCall(MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N));
1469   }
1470   PetscCall(PetscObjectTypeCompare((PetscObject)mat,MATMPIDENSECUDA,&iscuda));
1471   PetscCall(MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE));
1472   PetscCall(MatSeqDenseSetPreallocation(a->A,data));
1473   mat->preallocated = PETSC_TRUE;
1474   PetscFunctionReturn(0);
1475 }
1476 
1477 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1478 {
1479   Mat            B,C;
1480 
1481   PetscFunctionBegin;
1482   PetscCall(MatMPIAIJGetLocalMat(A,MAT_INITIAL_MATRIX,&C));
1483   PetscCall(MatConvert_SeqAIJ_SeqDense(C,MATSEQDENSE,MAT_INITIAL_MATRIX,&B));
1484   PetscCall(MatDestroy(&C));
1485   if (reuse == MAT_REUSE_MATRIX) {
1486     C = *newmat;
1487   } else C = NULL;
1488   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A),B,A->cmap->n,!C?MAT_INITIAL_MATRIX:MAT_REUSE_MATRIX,&C));
1489   PetscCall(MatDestroy(&B));
1490   if (reuse == MAT_INPLACE_MATRIX) {
1491     PetscCall(MatHeaderReplace(A,&C));
1492   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1493   PetscFunctionReturn(0);
1494 }
1495 
1496 PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1497 {
1498   Mat            B,C;
1499 
1500   PetscFunctionBegin;
1501   PetscCall(MatDenseGetLocalMatrix(A,&C));
1502   PetscCall(MatConvert_SeqDense_SeqAIJ(C,MATSEQAIJ,MAT_INITIAL_MATRIX,&B));
1503   if (reuse == MAT_REUSE_MATRIX) {
1504     C = *newmat;
1505   } else C = NULL;
1506   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A),B,A->cmap->n,!C?MAT_INITIAL_MATRIX:MAT_REUSE_MATRIX,&C));
1507   PetscCall(MatDestroy(&B));
1508   if (reuse == MAT_INPLACE_MATRIX) {
1509     PetscCall(MatHeaderReplace(A,&C));
1510   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1511   PetscFunctionReturn(0);
1512 }
1513 
1514 #if defined(PETSC_HAVE_ELEMENTAL)
1515 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1516 {
1517   Mat            mat_elemental;
1518   PetscScalar    *v;
1519   PetscInt       m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols;
1520 
1521   PetscFunctionBegin;
1522   if (reuse == MAT_REUSE_MATRIX) {
1523     mat_elemental = *newmat;
1524     PetscCall(MatZeroEntries(*newmat));
1525   } else {
1526     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental));
1527     PetscCall(MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N));
1528     PetscCall(MatSetType(mat_elemental,MATELEMENTAL));
1529     PetscCall(MatSetUp(mat_elemental));
1530     PetscCall(MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE));
1531   }
1532 
1533   PetscCall(PetscMalloc2(m,&rows,N,&cols));
1534   for (i=0; i<N; i++) cols[i] = i;
1535   for (i=0; i<m; i++) rows[i] = rstart + i;
1536 
1537   /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1538   PetscCall(MatDenseGetArray(A,&v));
1539   PetscCall(MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES));
1540   PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY));
1541   PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY));
1542   PetscCall(MatDenseRestoreArray(A,&v));
1543   PetscCall(PetscFree2(rows,cols));
1544 
1545   if (reuse == MAT_INPLACE_MATRIX) {
1546     PetscCall(MatHeaderReplace(A,&mat_elemental));
1547   } else {
1548     *newmat = mat_elemental;
1549   }
1550   PetscFunctionReturn(0);
1551 }
1552 #endif
1553 
1554 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals)
1555 {
1556   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1557 
1558   PetscFunctionBegin;
1559   PetscCall(MatDenseGetColumn(mat->A,col,vals));
1560   PetscFunctionReturn(0);
1561 }
1562 
1563 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals)
1564 {
1565   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1566 
1567   PetscFunctionBegin;
1568   PetscCall(MatDenseRestoreColumn(mat->A,vals));
1569   PetscFunctionReturn(0);
1570 }
1571 
1572 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
1573 {
1574   Mat_MPIDense   *mat;
1575   PetscInt       m,nloc,N;
1576 
1577   PetscFunctionBegin;
1578   PetscCall(MatGetSize(inmat,&m,&N));
1579   PetscCall(MatGetLocalSize(inmat,NULL,&nloc));
1580   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1581     PetscInt sum;
1582 
1583     if (n == PETSC_DECIDE) {
1584       PetscCall(PetscSplitOwnership(comm,&n,&N));
1585     }
1586     /* Check sum(n) = N */
1587     PetscCall(MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm));
1588     PetscCheckFalse(sum != N,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT,sum,N);
1589 
1590     PetscCall(MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat));
1591     PetscCall(MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE));
1592   }
1593 
1594   /* numeric phase */
1595   mat = (Mat_MPIDense*)(*outmat)->data;
1596   PetscCall(MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN));
1597   PetscCall(MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY));
1598   PetscCall(MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY));
1599   PetscFunctionReturn(0);
1600 }
1601 
1602 #if defined(PETSC_HAVE_CUDA)
1603 PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M,MatType type,MatReuse reuse,Mat *newmat)
1604 {
1605   Mat            B;
1606   Mat_MPIDense   *m;
1607 
1608   PetscFunctionBegin;
1609   if (reuse == MAT_INITIAL_MATRIX) {
1610     PetscCall(MatDuplicate(M,MAT_COPY_VALUES,newmat));
1611   } else if (reuse == MAT_REUSE_MATRIX) {
1612     PetscCall(MatCopy(M,*newmat,SAME_NONZERO_PATTERN));
1613   }
1614 
1615   B    = *newmat;
1616   PetscCall(MatBindToCPU_MPIDenseCUDA(B,PETSC_TRUE));
1617   PetscCall(PetscFree(B->defaultvectype));
1618   PetscCall(PetscStrallocpy(VECSTANDARD,&B->defaultvectype));
1619   PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSE));
1620   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",NULL));
1621   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",NULL));
1622   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",NULL));
1623   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",NULL));
1624   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",NULL));
1625   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",NULL));
1626   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",NULL));
1627   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",NULL));
1628   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",NULL));
1629   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",NULL));
1630   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",NULL));
1631   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",NULL));
1632   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",NULL));
1633   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",NULL));
1634   m    = (Mat_MPIDense*)(B)->data;
1635   if (m->A) {
1636     PetscCall(MatConvert(m->A,MATSEQDENSE,MAT_INPLACE_MATRIX,&m->A));
1637     PetscCall(MatSetUpMultiply_MPIDense(B));
1638   }
1639   B->ops->bindtocpu = NULL;
1640   B->offloadmask    = PETSC_OFFLOAD_CPU;
1641   PetscFunctionReturn(0);
1642 }
1643 
1644 PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M,MatType type,MatReuse reuse,Mat *newmat)
1645 {
1646   Mat            B;
1647   Mat_MPIDense   *m;
1648 
1649   PetscFunctionBegin;
1650   if (reuse == MAT_INITIAL_MATRIX) {
1651     PetscCall(MatDuplicate(M,MAT_COPY_VALUES,newmat));
1652   } else if (reuse == MAT_REUSE_MATRIX) {
1653     PetscCall(MatCopy(M,*newmat,SAME_NONZERO_PATTERN));
1654   }
1655 
1656   B    = *newmat;
1657   PetscCall(PetscFree(B->defaultvectype));
1658   PetscCall(PetscStrallocpy(VECCUDA,&B->defaultvectype));
1659   PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSECUDA));
1660   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",                    MatConvert_MPIDenseCUDA_MPIDense));
1661   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",        MatProductSetFromOptions_MPIAIJ_MPIDense));
1662   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",MatProductSetFromOptions_MPIAIJ_MPIDense));
1663   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",        MatProductSetFromOptions_MPIDense_MPIAIJ));
1664   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ));
1665   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",                                MatDenseCUDAGetArray_MPIDenseCUDA));
1666   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",                            MatDenseCUDAGetArrayRead_MPIDenseCUDA));
1667   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",                           MatDenseCUDAGetArrayWrite_MPIDenseCUDA));
1668   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",                            MatDenseCUDARestoreArray_MPIDenseCUDA));
1669   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",                        MatDenseCUDARestoreArrayRead_MPIDenseCUDA));
1670   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",                       MatDenseCUDARestoreArrayWrite_MPIDenseCUDA));
1671   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",                              MatDenseCUDAPlaceArray_MPIDenseCUDA));
1672   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",                              MatDenseCUDAResetArray_MPIDenseCUDA));
1673   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",                            MatDenseCUDAReplaceArray_MPIDenseCUDA));
1674   m    = (Mat_MPIDense*)(B->data);
1675   if (m->A) {
1676     PetscCall(MatConvert(m->A,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&m->A));
1677     PetscCall(MatSetUpMultiply_MPIDense(B));
1678     B->offloadmask = PETSC_OFFLOAD_BOTH;
1679   } else {
1680     B->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
1681   }
1682   PetscCall(MatBindToCPU_MPIDenseCUDA(B,PETSC_FALSE));
1683 
1684   B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA;
1685   PetscFunctionReturn(0);
1686 }
1687 #endif
1688 
1689 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A,PetscInt col,Vec *v)
1690 {
1691   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1692   PetscInt       lda;
1693 
1694   PetscFunctionBegin;
1695   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1696   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1697   if (!a->cvec) {
1698     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec));
1699   }
1700   a->vecinuse = col + 1;
1701   PetscCall(MatDenseGetLDA(a->A,&lda));
1702   PetscCall(MatDenseGetArray(a->A,(PetscScalar**)&a->ptrinuse));
1703   PetscCall(VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda));
1704   *v   = a->cvec;
1705   PetscFunctionReturn(0);
1706 }
1707 
1708 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A,PetscInt col,Vec *v)
1709 {
1710   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1711 
1712   PetscFunctionBegin;
1713   PetscCheck(a->vecinuse,PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1714   PetscCheck(a->cvec,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1715   a->vecinuse = 0;
1716   PetscCall(MatDenseRestoreArray(a->A,(PetscScalar**)&a->ptrinuse));
1717   PetscCall(VecResetArray(a->cvec));
1718   *v   = NULL;
1719   PetscFunctionReturn(0);
1720 }
1721 
1722 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v)
1723 {
1724   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1725   PetscInt       lda;
1726 
1727   PetscFunctionBegin;
1728   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1729   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1730   if (!a->cvec) {
1731     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec));
1732   }
1733   a->vecinuse = col + 1;
1734   PetscCall(MatDenseGetLDA(a->A,&lda));
1735   PetscCall(MatDenseGetArrayRead(a->A,&a->ptrinuse));
1736   PetscCall(VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda));
1737   PetscCall(VecLockReadPush(a->cvec));
1738   *v   = a->cvec;
1739   PetscFunctionReturn(0);
1740 }
1741 
1742 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v)
1743 {
1744   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1745 
1746   PetscFunctionBegin;
1747   PetscCheck(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1748   PetscCheck(a->cvec,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1749   a->vecinuse = 0;
1750   PetscCall(MatDenseRestoreArrayRead(a->A,&a->ptrinuse));
1751   PetscCall(VecLockReadPop(a->cvec));
1752   PetscCall(VecResetArray(a->cvec));
1753   *v   = NULL;
1754   PetscFunctionReturn(0);
1755 }
1756 
1757 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v)
1758 {
1759   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1760   PetscInt       lda;
1761 
1762   PetscFunctionBegin;
1763   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1764   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1765   if (!a->cvec) {
1766     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec));
1767   }
1768   a->vecinuse = col + 1;
1769   PetscCall(MatDenseGetLDA(a->A,&lda));
1770   PetscCall(MatDenseGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse));
1771   PetscCall(VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda));
1772   *v   = a->cvec;
1773   PetscFunctionReturn(0);
1774 }
1775 
1776 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v)
1777 {
1778   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
1779 
1780   PetscFunctionBegin;
1781   PetscCheck(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1782   PetscCheck(a->cvec,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1783   a->vecinuse = 0;
1784   PetscCall(MatDenseRestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse));
1785   PetscCall(VecResetArray(a->cvec));
1786   *v   = NULL;
1787   PetscFunctionReturn(0);
1788 }
1789 
1790 PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A,PetscInt cbegin,PetscInt cend,Mat *v)
1791 {
1792   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1793   Mat_MPIDense   *c;
1794   MPI_Comm       comm;
1795   PetscBool      setup = PETSC_FALSE;
1796 
1797   PetscFunctionBegin;
1798   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
1799   PetscCheck(!a->vecinuse,comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1800   PetscCheck(!a->matinuse,comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1801   if (!a->cmat) {
1802     setup = PETSC_TRUE;
1803     PetscCall(MatCreate(comm,&a->cmat));
1804     PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->cmat));
1805     PetscCall(MatSetType(a->cmat,((PetscObject)A)->type_name));
1806     PetscCall(PetscLayoutReference(A->rmap,&a->cmat->rmap));
1807     PetscCall(PetscLayoutSetSize(a->cmat->cmap,cend-cbegin));
1808     PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1809   } else if (cend-cbegin != a->cmat->cmap->N) {
1810     setup = PETSC_TRUE;
1811     PetscCall(PetscLayoutDestroy(&a->cmat->cmap));
1812     PetscCall(PetscLayoutCreate(comm,&a->cmat->cmap));
1813     PetscCall(PetscLayoutSetSize(a->cmat->cmap,cend-cbegin));
1814     PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1815   }
1816   c = (Mat_MPIDense*)a->cmat->data;
1817   PetscCheck(!c->A,comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1818   PetscCall(MatDenseGetSubMatrix(a->A,cbegin,cend,&c->A));
1819   if (setup) { /* do we really need this? */
1820     PetscCall(MatSetUpMultiply_MPIDense(a->cmat));
1821   }
1822   a->cmat->preallocated = PETSC_TRUE;
1823   a->cmat->assembled = PETSC_TRUE;
1824   a->matinuse = cbegin + 1;
1825   *v = a->cmat;
1826   PetscFunctionReturn(0);
1827 }
1828 
1829 PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A,Mat *v)
1830 {
1831   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
1832   Mat_MPIDense *c;
1833 
1834   PetscFunctionBegin;
1835   PetscCheck(a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetSubMatrix() first");
1836   PetscCheck(a->cmat,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal matrix");
1837   PetscCheck(*v == a->cmat,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Not the matrix obtained from MatDenseGetSubMatrix()");
1838   a->matinuse = 0;
1839   c    = (Mat_MPIDense*)a->cmat->data;
1840   PetscCall(MatDenseRestoreSubMatrix(a->A,&c->A));
1841   *v   = NULL;
1842   PetscFunctionReturn(0);
1843 }
1844 
1845 /*MC
1846    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1847 
1848    Options Database Keys:
1849 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions()
1850 
1851   Level: beginner
1852 
1853 .seealso: MatCreateDense()
1854 
1855 M*/
1856 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1857 {
1858   Mat_MPIDense   *a;
1859 
1860   PetscFunctionBegin;
1861   PetscCall(PetscNewLog(mat,&a));
1862   mat->data = (void*)a;
1863   PetscCall(PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps)));
1864 
1865   mat->insertmode = NOT_SET_VALUES;
1866 
1867   /* build cache for off array entries formed */
1868   a->donotstash = PETSC_FALSE;
1869 
1870   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash));
1871 
1872   /* stuff used for matrix vector multiply */
1873   a->lvec        = NULL;
1874   a->Mvctx       = NULL;
1875   a->roworiented = PETSC_TRUE;
1876 
1877   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense));
1878   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",MatDenseSetLDA_MPIDense));
1879   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense));
1880   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense));
1881   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense));
1882   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense));
1883   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",MatDenseGetArrayWrite_MPIDense));
1884   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",MatDenseRestoreArrayWrite_MPIDense));
1885   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense));
1886   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense));
1887   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",MatDenseReplaceArray_MPIDense));
1888   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense));
1889   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense));
1890   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense));
1891   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense));
1892   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense));
1893   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense));
1894   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",MatDenseGetSubMatrix_MPIDense));
1895   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",MatDenseRestoreSubMatrix_MPIDense));
1896   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",MatConvert_MPIAIJ_MPIDense));
1897   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpiaij_C",MatConvert_MPIDense_MPIAIJ));
1898 #if defined(PETSC_HAVE_ELEMENTAL)
1899   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental));
1900 #endif
1901 #if defined(PETSC_HAVE_SCALAPACK)
1902   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",MatConvert_Dense_ScaLAPACK));
1903 #endif
1904 #if defined(PETSC_HAVE_CUDA)
1905   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpidensecuda_C",MatConvert_MPIDense_MPIDenseCUDA));
1906 #endif
1907   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense));
1908   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense));
1909   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",MatProductSetFromOptions_MPIDense_MPIAIJ));
1910 #if defined(PETSC_HAVE_CUDA)
1911   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense));
1912   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ));
1913 #endif
1914 
1915   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense));
1916   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense));
1917   PetscCall(PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE));
1918   PetscFunctionReturn(0);
1919 }
1920 
1921 /*MC
1922    MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs.
1923 
1924    Options Database Keys:
1925 . -mat_type mpidensecuda - sets the matrix type to "mpidensecuda" during a call to MatSetFromOptions()
1926 
1927   Level: beginner
1928 
1929 .seealso:
1930 
1931 M*/
1932 #if defined(PETSC_HAVE_CUDA)
1933 #include <petsc/private/deviceimpl.h>
1934 PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B)
1935 {
1936   PetscFunctionBegin;
1937   PetscCall(PetscDeviceInitialize(PETSC_DEVICE_CUDA));
1938   PetscCall(MatCreate_MPIDense(B));
1939   PetscCall(MatConvert_MPIDense_MPIDenseCUDA(B,MATMPIDENSECUDA,MAT_INPLACE_MATRIX,&B));
1940   PetscFunctionReturn(0);
1941 }
1942 #endif
1943 
1944 /*MC
1945    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1946 
1947    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1948    and MATMPIDENSE otherwise.
1949 
1950    Options Database Keys:
1951 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1952 
1953   Level: beginner
1954 
1955 .seealso: MATSEQDENSE,MATMPIDENSE,MATDENSECUDA
1956 M*/
1957 
1958 /*MC
1959    MATDENSECUDA - MATDENSECUDA = "densecuda" - A matrix type to be used for dense matrices on GPUs.
1960 
1961    This matrix type is identical to MATSEQDENSECUDA when constructed with a single process communicator,
1962    and MATMPIDENSECUDA otherwise.
1963 
1964    Options Database Keys:
1965 . -mat_type densecuda - sets the matrix type to "densecuda" during a call to MatSetFromOptions()
1966 
1967   Level: beginner
1968 
1969 .seealso: MATSEQDENSECUDA,MATMPIDENSECUDA,MATDENSE
1970 M*/
1971 
1972 /*@C
1973    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1974 
1975    Collective
1976 
1977    Input Parameters:
1978 .  B - the matrix
1979 -  data - optional location of matrix data.  Set data=NULL for PETSc
1980    to control all matrix memory allocation.
1981 
1982    Notes:
1983    The dense format is fully compatible with standard Fortran 77
1984    storage by columns.
1985 
1986    The data input variable is intended primarily for Fortran programmers
1987    who wish to allocate their own matrix memory space.  Most users should
1988    set data=NULL.
1989 
1990    Level: intermediate
1991 
1992 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1993 @*/
1994 PetscErrorCode  MatMPIDenseSetPreallocation(Mat B,PetscScalar *data)
1995 {
1996   PetscFunctionBegin;
1997   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
1998   PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));
1999   PetscFunctionReturn(0);
2000 }
2001 
2002 /*@
2003    MatDensePlaceArray - Allows one to replace the array in a dense matrix with an
2004    array provided by the user. This is useful to avoid copying an array
2005    into a matrix
2006 
2007    Not Collective
2008 
2009    Input Parameters:
2010 +  mat - the matrix
2011 -  array - the array in column major order
2012 
2013    Notes:
2014    You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be
2015    freed when the matrix is destroyed.
2016 
2017    Level: developer
2018 
2019 .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
2020 
2021 @*/
2022 PetscErrorCode  MatDensePlaceArray(Mat mat,const PetscScalar *array)
2023 {
2024   PetscFunctionBegin;
2025   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2026   PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));
2027   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2028 #if defined(PETSC_HAVE_CUDA)
2029   mat->offloadmask = PETSC_OFFLOAD_CPU;
2030 #endif
2031   PetscFunctionReturn(0);
2032 }
2033 
2034 /*@
2035    MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray()
2036 
2037    Not Collective
2038 
2039    Input Parameters:
2040 .  mat - the matrix
2041 
2042    Notes:
2043    You can only call this after a call to MatDensePlaceArray()
2044 
2045    Level: developer
2046 
2047 .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
2048 
2049 @*/
2050 PetscErrorCode  MatDenseResetArray(Mat mat)
2051 {
2052   PetscFunctionBegin;
2053   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2054   PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));
2055   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2056   PetscFunctionReturn(0);
2057 }
2058 
2059 /*@
2060    MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an
2061    array provided by the user. This is useful to avoid copying an array
2062    into a matrix
2063 
2064    Not Collective
2065 
2066    Input Parameters:
2067 +  mat - the matrix
2068 -  array - the array in column major order
2069 
2070    Notes:
2071    The memory passed in MUST be obtained with PetscMalloc() and CANNOT be
2072    freed by the user. It will be freed when the matrix is destroyed.
2073 
2074    Level: developer
2075 
2076 .seealso: MatDenseGetArray(), VecReplaceArray()
2077 @*/
2078 PetscErrorCode  MatDenseReplaceArray(Mat mat,const PetscScalar *array)
2079 {
2080   PetscFunctionBegin;
2081   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2082   PetscUseMethod(mat,"MatDenseReplaceArray_C",(Mat,const PetscScalar*),(mat,array));
2083   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2084 #if defined(PETSC_HAVE_CUDA)
2085   mat->offloadmask = PETSC_OFFLOAD_CPU;
2086 #endif
2087   PetscFunctionReturn(0);
2088 }
2089 
2090 #if defined(PETSC_HAVE_CUDA)
2091 /*@C
2092    MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a dense matrix with an
2093    array provided by the user. This is useful to avoid copying an array
2094    into a matrix
2095 
2096    Not Collective
2097 
2098    Input Parameters:
2099 +  mat - the matrix
2100 -  array - the array in column major order
2101 
2102    Notes:
2103    You can return to the original array with a call to MatDenseCUDAResetArray(). The user is responsible for freeing this array; it will not be
2104    freed when the matrix is destroyed. The array must have been allocated with cudaMalloc().
2105 
2106    Level: developer
2107 
2108 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAResetArray()
2109 @*/
2110 PetscErrorCode  MatDenseCUDAPlaceArray(Mat mat,const PetscScalar *array)
2111 {
2112   PetscFunctionBegin;
2113   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2114   PetscUseMethod(mat,"MatDenseCUDAPlaceArray_C",(Mat,const PetscScalar*),(mat,array));
2115   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2116   mat->offloadmask = PETSC_OFFLOAD_GPU;
2117   PetscFunctionReturn(0);
2118 }
2119 
2120 /*@C
2121    MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to MatDenseCUDAPlaceArray()
2122 
2123    Not Collective
2124 
2125    Input Parameters:
2126 .  mat - the matrix
2127 
2128    Notes:
2129    You can only call this after a call to MatDenseCUDAPlaceArray()
2130 
2131    Level: developer
2132 
2133 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray()
2134 
2135 @*/
2136 PetscErrorCode  MatDenseCUDAResetArray(Mat mat)
2137 {
2138   PetscFunctionBegin;
2139   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2140   PetscUseMethod(mat,"MatDenseCUDAResetArray_C",(Mat),(mat));
2141   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2142   PetscFunctionReturn(0);
2143 }
2144 
2145 /*@C
2146    MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a dense matrix with an
2147    array provided by the user. This is useful to avoid copying an array
2148    into a matrix
2149 
2150    Not Collective
2151 
2152    Input Parameters:
2153 +  mat - the matrix
2154 -  array - the array in column major order
2155 
2156    Notes:
2157    This permanently replaces the GPU array and frees the memory associated with the old GPU array.
2158    The memory passed in CANNOT be freed by the user. It will be freed
2159    when the matrix is destroyed. The array should respect the matrix leading dimension.
2160 
2161    Level: developer
2162 
2163 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray(), MatDenseCUDAResetArray()
2164 @*/
2165 PetscErrorCode  MatDenseCUDAReplaceArray(Mat mat,const PetscScalar *array)
2166 {
2167   PetscFunctionBegin;
2168   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2169   PetscUseMethod(mat,"MatDenseCUDAReplaceArray_C",(Mat,const PetscScalar*),(mat,array));
2170   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2171   mat->offloadmask = PETSC_OFFLOAD_GPU;
2172   PetscFunctionReturn(0);
2173 }
2174 
2175 /*@C
2176    MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a dense matrix.
2177 
2178    Not Collective
2179 
2180    Input Parameters:
2181 .  A - the matrix
2182 
2183    Output Parameters
2184 .  array - the GPU array in column major order
2185 
2186    Notes:
2187    The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use MatDenseCUDAGetArray(). The array must be restored with MatDenseCUDARestoreArrayWrite() when no longer needed.
2188 
2189    Level: developer
2190 
2191 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArrayRead()
2192 @*/
2193 PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a)
2194 {
2195   PetscFunctionBegin;
2196   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2197   PetscUseMethod(A,"MatDenseCUDAGetArrayWrite_C",(Mat,PetscScalar**),(A,a));
2198   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2199   PetscFunctionReturn(0);
2200 }
2201 
2202 /*@C
2203    MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArrayWrite().
2204 
2205    Not Collective
2206 
2207    Input Parameters:
2208 +  A - the matrix
2209 -  array - the GPU array in column major order
2210 
2211    Notes:
2212 
2213    Level: developer
2214 
2215 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead()
2216 @*/
2217 PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a)
2218 {
2219   PetscFunctionBegin;
2220   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2221   PetscUseMethod(A,"MatDenseCUDARestoreArrayWrite_C",(Mat,PetscScalar**),(A,a));
2222   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2223   A->offloadmask = PETSC_OFFLOAD_GPU;
2224   PetscFunctionReturn(0);
2225 }
2226 
2227 /*@C
2228    MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArrayRead() when no longer needed.
2229 
2230    Not Collective
2231 
2232    Input Parameters:
2233 .  A - the matrix
2234 
2235    Output Parameters
2236 .  array - the GPU array in column major order
2237 
2238    Notes:
2239    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite().
2240 
2241    Level: developer
2242 
2243 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead()
2244 @*/
2245 PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a)
2246 {
2247   PetscFunctionBegin;
2248   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2249   PetscUseMethod(A,"MatDenseCUDAGetArrayRead_C",(Mat,const PetscScalar**),(A,a));
2250   PetscFunctionReturn(0);
2251 }
2252 
2253 /*@C
2254    MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a dense matrix previously obtained with a call to MatDenseCUDAGetArrayRead().
2255 
2256    Not Collective
2257 
2258    Input Parameters:
2259 +  A - the matrix
2260 -  array - the GPU array in column major order
2261 
2262    Notes:
2263    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite().
2264 
2265    Level: developer
2266 
2267 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDAGetArrayRead()
2268 @*/
2269 PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a)
2270 {
2271   PetscFunctionBegin;
2272   PetscUseMethod(A,"MatDenseCUDARestoreArrayRead_C",(Mat,const PetscScalar**),(A,a));
2273   PetscFunctionReturn(0);
2274 }
2275 
2276 /*@C
2277    MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArray() when no longer needed.
2278 
2279    Not Collective
2280 
2281    Input Parameters:
2282 .  A - the matrix
2283 
2284    Output Parameters
2285 .  array - the GPU array in column major order
2286 
2287    Notes:
2288    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). For read-only access, use MatDenseCUDAGetArrayRead().
2289 
2290    Level: developer
2291 
2292 .seealso: MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead()
2293 @*/
2294 PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a)
2295 {
2296   PetscFunctionBegin;
2297   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2298   PetscUseMethod(A,"MatDenseCUDAGetArray_C",(Mat,PetscScalar**),(A,a));
2299   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2300   PetscFunctionReturn(0);
2301 }
2302 
2303 /*@C
2304    MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArray().
2305 
2306    Not Collective
2307 
2308    Input Parameters:
2309 +  A - the matrix
2310 -  array - the GPU array in column major order
2311 
2312    Notes:
2313 
2314    Level: developer
2315 
2316 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead()
2317 @*/
2318 PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a)
2319 {
2320   PetscFunctionBegin;
2321   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2322   PetscUseMethod(A,"MatDenseCUDARestoreArray_C",(Mat,PetscScalar**),(A,a));
2323   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2324   A->offloadmask = PETSC_OFFLOAD_GPU;
2325   PetscFunctionReturn(0);
2326 }
2327 #endif
2328 
2329 /*@C
2330    MatCreateDense - Creates a matrix in dense format.
2331 
2332    Collective
2333 
2334    Input Parameters:
2335 +  comm - MPI communicator
2336 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2337 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2338 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
2339 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
2340 -  data - optional location of matrix data.  Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
2341    to control all matrix memory allocation.
2342 
2343    Output Parameter:
2344 .  A - the matrix
2345 
2346    Notes:
2347    The dense format is fully compatible with standard Fortran 77
2348    storage by columns.
2349    Note that, although local portions of the matrix are stored in column-major
2350    order, the matrix is partitioned across MPI ranks by row.
2351 
2352    The data input variable is intended primarily for Fortran programmers
2353    who wish to allocate their own matrix memory space.  Most users should
2354    set data=NULL (PETSC_NULL_SCALAR for Fortran users).
2355 
2356    The user MUST specify either the local or global matrix dimensions
2357    (possibly both).
2358 
2359    Level: intermediate
2360 
2361 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
2362 @*/
2363 PetscErrorCode  MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
2364 {
2365   PetscMPIInt    size;
2366 
2367   PetscFunctionBegin;
2368   PetscCall(MatCreate(comm,A));
2369   PetscCall(MatSetSizes(*A,m,n,M,N));
2370   PetscCallMPI(MPI_Comm_size(comm,&size));
2371   if (size > 1) {
2372     PetscBool havedata = (PetscBool)!!data;
2373 
2374     PetscCall(MatSetType(*A,MATMPIDENSE));
2375     PetscCall(MatMPIDenseSetPreallocation(*A,data));
2376     PetscCall(MPIU_Allreduce(MPI_IN_PLACE,&havedata,1,MPIU_BOOL,MPI_LOR,comm));
2377     if (havedata) {  /* user provided data array, so no need to assemble */
2378       PetscCall(MatSetUpMultiply_MPIDense(*A));
2379       (*A)->assembled = PETSC_TRUE;
2380     }
2381   } else {
2382     PetscCall(MatSetType(*A,MATSEQDENSE));
2383     PetscCall(MatSeqDenseSetPreallocation(*A,data));
2384   }
2385   PetscFunctionReturn(0);
2386 }
2387 
2388 #if defined(PETSC_HAVE_CUDA)
2389 /*@C
2390    MatCreateDenseCUDA - Creates a matrix in dense format using CUDA.
2391 
2392    Collective
2393 
2394    Input Parameters:
2395 +  comm - MPI communicator
2396 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2397 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2398 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
2399 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
2400 -  data - optional location of GPU matrix data.  Set data=NULL for PETSc
2401    to control matrix memory allocation.
2402 
2403    Output Parameter:
2404 .  A - the matrix
2405 
2406    Notes:
2407 
2408    Level: intermediate
2409 
2410 .seealso: MatCreate(), MatCreateDense()
2411 @*/
2412 PetscErrorCode  MatCreateDenseCUDA(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
2413 {
2414   PetscMPIInt    size;
2415 
2416   PetscFunctionBegin;
2417   PetscCall(MatCreate(comm,A));
2418   PetscValidLogicalCollectiveBool(*A,!!data,6);
2419   PetscCall(MatSetSizes(*A,m,n,M,N));
2420   PetscCallMPI(MPI_Comm_size(comm,&size));
2421   if (size > 1) {
2422     PetscCall(MatSetType(*A,MATMPIDENSECUDA));
2423     PetscCall(MatMPIDenseCUDASetPreallocation(*A,data));
2424     if (data) {  /* user provided data array, so no need to assemble */
2425       PetscCall(MatSetUpMultiply_MPIDense(*A));
2426       (*A)->assembled = PETSC_TRUE;
2427     }
2428   } else {
2429     PetscCall(MatSetType(*A,MATSEQDENSECUDA));
2430     PetscCall(MatSeqDenseCUDASetPreallocation(*A,data));
2431   }
2432   PetscFunctionReturn(0);
2433 }
2434 #endif
2435 
2436 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
2437 {
2438   Mat            mat;
2439   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
2440 
2441   PetscFunctionBegin;
2442   *newmat = NULL;
2443   PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&mat));
2444   PetscCall(MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N));
2445   PetscCall(MatSetType(mat,((PetscObject)A)->type_name));
2446   a       = (Mat_MPIDense*)mat->data;
2447 
2448   mat->factortype   = A->factortype;
2449   mat->assembled    = PETSC_TRUE;
2450   mat->preallocated = PETSC_TRUE;
2451 
2452   mat->insertmode = NOT_SET_VALUES;
2453   a->donotstash   = oldmat->donotstash;
2454 
2455   PetscCall(PetscLayoutReference(A->rmap,&mat->rmap));
2456   PetscCall(PetscLayoutReference(A->cmap,&mat->cmap));
2457 
2458   PetscCall(MatDuplicate(oldmat->A,cpvalues,&a->A));
2459   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A));
2460   PetscCall(MatSetUpMultiply_MPIDense(mat));
2461 
2462   *newmat = mat;
2463   PetscFunctionReturn(0);
2464 }
2465 
2466 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
2467 {
2468   PetscBool      isbinary;
2469 #if defined(PETSC_HAVE_HDF5)
2470   PetscBool      ishdf5;
2471 #endif
2472 
2473   PetscFunctionBegin;
2474   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
2475   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
2476   /* force binary viewer to load .info file if it has not yet done so */
2477   PetscCall(PetscViewerSetUp(viewer));
2478   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
2479 #if defined(PETSC_HAVE_HDF5)
2480   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5));
2481 #endif
2482   if (isbinary) {
2483     PetscCall(MatLoad_Dense_Binary(newMat,viewer));
2484 #if defined(PETSC_HAVE_HDF5)
2485   } else if (ishdf5) {
2486     PetscCall(MatLoad_Dense_HDF5(newMat,viewer));
2487 #endif
2488   } else SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
2489   PetscFunctionReturn(0);
2490 }
2491 
2492 static PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool  *flag)
2493 {
2494   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
2495   Mat            a,b;
2496   PetscBool      flg;
2497 
2498   PetscFunctionBegin;
2499   a    = matA->A;
2500   b    = matB->A;
2501   PetscCall(MatEqual(a,b,&flg));
2502   PetscCall(MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A)));
2503   PetscFunctionReturn(0);
2504 }
2505 
2506 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data)
2507 {
2508   Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data;
2509 
2510   PetscFunctionBegin;
2511   PetscCall(PetscFree2(atb->sendbuf,atb->recvcounts));
2512   PetscCall(MatDestroy(&atb->atb));
2513   PetscCall(PetscFree(atb));
2514   PetscFunctionReturn(0);
2515 }
2516 
2517 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data)
2518 {
2519   Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data;
2520 
2521   PetscFunctionBegin;
2522   PetscCall(PetscFree2(abt->buf[0],abt->buf[1]));
2523   PetscCall(PetscFree2(abt->recvcounts,abt->recvdispls));
2524   PetscCall(PetscFree(abt));
2525   PetscFunctionReturn(0);
2526 }
2527 
2528 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2529 {
2530   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2531   Mat_TransMatMultDense *atb;
2532   MPI_Comm              comm;
2533   PetscMPIInt           size,*recvcounts;
2534   PetscScalar           *carray,*sendbuf;
2535   const PetscScalar     *atbarray;
2536   PetscInt              i,cN=C->cmap->N,proc,k,j,lda;
2537   const PetscInt        *ranges;
2538 
2539   PetscFunctionBegin;
2540   MatCheckProduct(C,3);
2541   PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2542   atb = (Mat_TransMatMultDense *)C->product->data;
2543   recvcounts = atb->recvcounts;
2544   sendbuf = atb->sendbuf;
2545 
2546   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
2547   PetscCallMPI(MPI_Comm_size(comm,&size));
2548 
2549   /* compute atbarray = aseq^T * bseq */
2550   PetscCall(MatTransposeMatMult(a->A,b->A,atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX,PETSC_DEFAULT,&atb->atb));
2551 
2552   PetscCall(MatGetOwnershipRanges(C,&ranges));
2553 
2554   /* arrange atbarray into sendbuf */
2555   PetscCall(MatDenseGetArrayRead(atb->atb,&atbarray));
2556   PetscCall(MatDenseGetLDA(atb->atb,&lda));
2557   for (proc=0, k=0; proc<size; proc++) {
2558     for (j=0; j<cN; j++) {
2559       for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*lda];
2560     }
2561   }
2562   PetscCall(MatDenseRestoreArrayRead(atb->atb,&atbarray));
2563 
2564   /* sum all atbarray to local values of C */
2565   PetscCall(MatDenseGetArrayWrite(c->A,&carray));
2566   PetscCallMPI(MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm));
2567   PetscCall(MatDenseRestoreArrayWrite(c->A,&carray));
2568   PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY));
2569   PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY));
2570   PetscFunctionReturn(0);
2571 }
2572 
2573 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)
2574 {
2575   MPI_Comm              comm;
2576   PetscMPIInt           size;
2577   PetscInt              cm=A->cmap->n,cM,cN=B->cmap->N;
2578   Mat_TransMatMultDense *atb;
2579   PetscBool             cisdense;
2580   PetscInt              i;
2581   const PetscInt        *ranges;
2582 
2583   PetscFunctionBegin;
2584   MatCheckProduct(C,3);
2585   PetscCheck(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2586   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
2587   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
2588     SETERRQ(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2589   }
2590 
2591   /* create matrix product C */
2592   PetscCall(MatSetSizes(C,cm,B->cmap->n,A->cmap->N,B->cmap->N));
2593   PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,""));
2594   if (!cisdense) {
2595     PetscCall(MatSetType(C,((PetscObject)A)->type_name));
2596   }
2597   PetscCall(MatSetUp(C));
2598 
2599   /* create data structure for reuse C */
2600   PetscCallMPI(MPI_Comm_size(comm,&size));
2601   PetscCall(PetscNew(&atb));
2602   cM   = C->rmap->N;
2603   PetscCall(PetscMalloc2(cM*cN,&atb->sendbuf,size,&atb->recvcounts));
2604   PetscCall(MatGetOwnershipRanges(C,&ranges));
2605   for (i=0; i<size; i++) atb->recvcounts[i] = (ranges[i+1] - ranges[i])*cN;
2606 
2607   C->product->data    = atb;
2608   C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
2609   PetscFunctionReturn(0);
2610 }
2611 
2612 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2613 {
2614   MPI_Comm              comm;
2615   PetscMPIInt           i, size;
2616   PetscInt              maxRows, bufsiz;
2617   PetscMPIInt           tag;
2618   PetscInt              alg;
2619   Mat_MatTransMultDense *abt;
2620   Mat_Product           *product = C->product;
2621   PetscBool             flg;
2622 
2623   PetscFunctionBegin;
2624   MatCheckProduct(C,4);
2625   PetscCheck(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2626   /* check local size of A and B */
2627   PetscCheck(A->cmap->n == B->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")",A->cmap->n,B->cmap->n);
2628 
2629   PetscCall(PetscStrcmp(product->alg,"allgatherv",&flg));
2630   alg  = flg ? 0 : 1;
2631 
2632   /* setup matrix product C */
2633   PetscCall(MatSetSizes(C,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N));
2634   PetscCall(MatSetType(C,MATMPIDENSE));
2635   PetscCall(MatSetUp(C));
2636   PetscCall(PetscObjectGetNewTag((PetscObject)C,&tag));
2637 
2638   /* create data structure for reuse C */
2639   PetscCall(PetscObjectGetComm((PetscObject)C,&comm));
2640   PetscCallMPI(MPI_Comm_size(comm,&size));
2641   PetscCall(PetscNew(&abt));
2642   abt->tag = tag;
2643   abt->alg = alg;
2644   switch (alg) {
2645   case 1: /* alg: "cyclic" */
2646     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i]));
2647     bufsiz = A->cmap->N * maxRows;
2648     PetscCall(PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1])));
2649     break;
2650   default: /* alg: "allgatherv" */
2651     PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1])));
2652     PetscCall(PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls)));
2653     for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N;
2654     for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i];
2655     break;
2656   }
2657 
2658   C->product->data    = abt;
2659   C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
2660   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense;
2661   PetscFunctionReturn(0);
2662 }
2663 
2664 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
2665 {
2666   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2667   Mat_MatTransMultDense *abt;
2668   MPI_Comm              comm;
2669   PetscMPIInt           rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom;
2670   PetscScalar           *sendbuf, *recvbuf=NULL, *cv;
2671   PetscInt              i,cK=A->cmap->N,k,j,bn;
2672   PetscScalar           _DOne=1.0,_DZero=0.0;
2673   const PetscScalar     *av,*bv;
2674   PetscBLASInt          cm, cn, ck, alda, blda = 0, clda;
2675   MPI_Request           reqs[2];
2676   const PetscInt        *ranges;
2677 
2678   PetscFunctionBegin;
2679   MatCheckProduct(C,3);
2680   PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2681   abt  = (Mat_MatTransMultDense*)C->product->data;
2682   PetscCall(PetscObjectGetComm((PetscObject)C,&comm));
2683   PetscCallMPI(MPI_Comm_rank(comm,&rank));
2684   PetscCallMPI(MPI_Comm_size(comm,&size));
2685   PetscCall(MatDenseGetArrayRead(a->A,&av));
2686   PetscCall(MatDenseGetArrayRead(b->A,&bv));
2687   PetscCall(MatDenseGetArrayWrite(c->A,&cv));
2688   PetscCall(MatDenseGetLDA(a->A,&i));
2689   PetscCall(PetscBLASIntCast(i,&alda));
2690   PetscCall(MatDenseGetLDA(b->A,&i));
2691   PetscCall(PetscBLASIntCast(i,&blda));
2692   PetscCall(MatDenseGetLDA(c->A,&i));
2693   PetscCall(PetscBLASIntCast(i,&clda));
2694   PetscCall(MatGetOwnershipRanges(B,&ranges));
2695   bn   = B->rmap->n;
2696   if (blda == bn) {
2697     sendbuf = (PetscScalar*)bv;
2698   } else {
2699     sendbuf = abt->buf[0];
2700     for (k = 0, i = 0; i < cK; i++) {
2701       for (j = 0; j < bn; j++, k++) {
2702         sendbuf[k] = bv[i * blda + j];
2703       }
2704     }
2705   }
2706   if (size > 1) {
2707     sendto = (rank + size - 1) % size;
2708     recvfrom = (rank + size + 1) % size;
2709   } else {
2710     sendto = recvfrom = 0;
2711   }
2712   PetscCall(PetscBLASIntCast(cK,&ck));
2713   PetscCall(PetscBLASIntCast(c->A->rmap->n,&cm));
2714   recvisfrom = rank;
2715   for (i = 0; i < size; i++) {
2716     /* we have finished receiving in sending, bufs can be read/modified */
2717     PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2718     PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
2719 
2720     if (nextrecvisfrom != rank) {
2721       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2722       sendsiz = cK * bn;
2723       recvsiz = cK * nextbn;
2724       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
2725       PetscCallMPI(MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]));
2726       PetscCallMPI(MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]));
2727     }
2728 
2729     /* local aseq * sendbuf^T */
2730     PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn));
2731     if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,av,&alda,sendbuf,&cn,&_DZero,cv + clda * ranges[recvisfrom],&clda));
2732 
2733     if (nextrecvisfrom != rank) {
2734       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
2735       PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE));
2736     }
2737     bn = nextbn;
2738     recvisfrom = nextrecvisfrom;
2739     sendbuf = recvbuf;
2740   }
2741   PetscCall(MatDenseRestoreArrayRead(a->A,&av));
2742   PetscCall(MatDenseRestoreArrayRead(b->A,&bv));
2743   PetscCall(MatDenseRestoreArrayWrite(c->A,&cv));
2744   PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY));
2745   PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY));
2746   PetscFunctionReturn(0);
2747 }
2748 
2749 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
2750 {
2751   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2752   Mat_MatTransMultDense *abt;
2753   MPI_Comm              comm;
2754   PetscMPIInt           size;
2755   PetscScalar           *cv, *sendbuf, *recvbuf;
2756   const PetscScalar     *av,*bv;
2757   PetscInt              blda,i,cK=A->cmap->N,k,j,bn;
2758   PetscScalar           _DOne=1.0,_DZero=0.0;
2759   PetscBLASInt          cm, cn, ck, alda, clda;
2760 
2761   PetscFunctionBegin;
2762   MatCheckProduct(C,3);
2763   PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2764   abt  = (Mat_MatTransMultDense*)C->product->data;
2765   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
2766   PetscCallMPI(MPI_Comm_size(comm,&size));
2767   PetscCall(MatDenseGetArrayRead(a->A,&av));
2768   PetscCall(MatDenseGetArrayRead(b->A,&bv));
2769   PetscCall(MatDenseGetArrayWrite(c->A,&cv));
2770   PetscCall(MatDenseGetLDA(a->A,&i));
2771   PetscCall(PetscBLASIntCast(i,&alda));
2772   PetscCall(MatDenseGetLDA(b->A,&blda));
2773   PetscCall(MatDenseGetLDA(c->A,&i));
2774   PetscCall(PetscBLASIntCast(i,&clda));
2775   /* copy transpose of B into buf[0] */
2776   bn      = B->rmap->n;
2777   sendbuf = abt->buf[0];
2778   recvbuf = abt->buf[1];
2779   for (k = 0, j = 0; j < bn; j++) {
2780     for (i = 0; i < cK; i++, k++) {
2781       sendbuf[k] = bv[i * blda + j];
2782     }
2783   }
2784   PetscCall(MatDenseRestoreArrayRead(b->A,&bv));
2785   PetscCallMPI(MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm));
2786   PetscCall(PetscBLASIntCast(cK,&ck));
2787   PetscCall(PetscBLASIntCast(c->A->rmap->n,&cm));
2788   PetscCall(PetscBLASIntCast(c->A->cmap->n,&cn));
2789   if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,av,&alda,recvbuf,&ck,&_DZero,cv,&clda));
2790   PetscCall(MatDenseRestoreArrayRead(a->A,&av));
2791   PetscCall(MatDenseRestoreArrayRead(b->A,&bv));
2792   PetscCall(MatDenseRestoreArrayWrite(c->A,&cv));
2793   PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY));
2794   PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY));
2795   PetscFunctionReturn(0);
2796 }
2797 
2798 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2799 {
2800   Mat_MatTransMultDense *abt;
2801 
2802   PetscFunctionBegin;
2803   MatCheckProduct(C,3);
2804   PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2805   abt = (Mat_MatTransMultDense*)C->product->data;
2806   switch (abt->alg) {
2807   case 1:
2808     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C));
2809     break;
2810   default:
2811     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C));
2812     break;
2813   }
2814   PetscFunctionReturn(0);
2815 }
2816 
2817 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data)
2818 {
2819   Mat_MatMultDense *ab = (Mat_MatMultDense*)data;
2820 
2821   PetscFunctionBegin;
2822   PetscCall(MatDestroy(&ab->Ce));
2823   PetscCall(MatDestroy(&ab->Ae));
2824   PetscCall(MatDestroy(&ab->Be));
2825   PetscCall(PetscFree(ab));
2826   PetscFunctionReturn(0);
2827 }
2828 
2829 #if defined(PETSC_HAVE_ELEMENTAL)
2830 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2831 {
2832   Mat_MatMultDense *ab;
2833 
2834   PetscFunctionBegin;
2835   MatCheckProduct(C,3);
2836   PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Missing product data");
2837   ab   = (Mat_MatMultDense*)C->product->data;
2838   PetscCall(MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae));
2839   PetscCall(MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be));
2840   PetscCall(MatMatMultNumeric_Elemental(ab->Ae,ab->Be,ab->Ce));
2841   PetscCall(MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C));
2842   PetscFunctionReturn(0);
2843 }
2844 
2845 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)
2846 {
2847   Mat              Ae,Be,Ce;
2848   Mat_MatMultDense *ab;
2849 
2850   PetscFunctionBegin;
2851   MatCheckProduct(C,4);
2852   PetscCheck(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2853   /* check local size of A and B */
2854   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
2855     SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2856   }
2857 
2858   /* create elemental matrices Ae and Be */
2859   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Ae));
2860   PetscCall(MatSetSizes(Ae,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N));
2861   PetscCall(MatSetType(Ae,MATELEMENTAL));
2862   PetscCall(MatSetUp(Ae));
2863   PetscCall(MatSetOption(Ae,MAT_ROW_ORIENTED,PETSC_FALSE));
2864 
2865   PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &Be));
2866   PetscCall(MatSetSizes(Be,PETSC_DECIDE,PETSC_DECIDE,B->rmap->N,B->cmap->N));
2867   PetscCall(MatSetType(Be,MATELEMENTAL));
2868   PetscCall(MatSetUp(Be));
2869   PetscCall(MatSetOption(Be,MAT_ROW_ORIENTED,PETSC_FALSE));
2870 
2871   /* compute symbolic Ce = Ae*Be */
2872   PetscCall(MatCreate(PetscObjectComm((PetscObject)C),&Ce));
2873   PetscCall(MatMatMultSymbolic_Elemental(Ae,Be,fill,Ce));
2874 
2875   /* setup C */
2876   PetscCall(MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE));
2877   PetscCall(MatSetType(C,MATDENSE));
2878   PetscCall(MatSetUp(C));
2879 
2880   /* create data structure for reuse Cdense */
2881   PetscCall(PetscNew(&ab));
2882   ab->Ae = Ae;
2883   ab->Be = Be;
2884   ab->Ce = Ce;
2885 
2886   C->product->data    = ab;
2887   C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense;
2888   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
2889   PetscFunctionReturn(0);
2890 }
2891 #endif
2892 /* ----------------------------------------------- */
2893 #if defined(PETSC_HAVE_ELEMENTAL)
2894 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C)
2895 {
2896   PetscFunctionBegin;
2897   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense;
2898   C->ops->productsymbolic = MatProductSymbolic_AB;
2899   PetscFunctionReturn(0);
2900 }
2901 #endif
2902 
2903 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C)
2904 {
2905   Mat_Product *product = C->product;
2906   Mat         A = product->A,B=product->B;
2907 
2908   PetscFunctionBegin;
2909   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend)
2910     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2911   C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense;
2912   C->ops->productsymbolic = MatProductSymbolic_AtB;
2913   PetscFunctionReturn(0);
2914 }
2915 
2916 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C)
2917 {
2918   PetscErrorCode ierr;
2919   Mat_Product    *product = C->product;
2920   const char     *algTypes[2] = {"allgatherv","cyclic"};
2921   PetscInt       alg,nalg = 2;
2922   PetscBool      flg = PETSC_FALSE;
2923 
2924   PetscFunctionBegin;
2925   /* Set default algorithm */
2926   alg = 0; /* default is allgatherv */
2927   PetscCall(PetscStrcmp(product->alg,"default",&flg));
2928   if (flg) {
2929     PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]));
2930   }
2931 
2932   /* Get runtime option */
2933   if (product->api_user) {
2934     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatTransposeMult","Mat");PetscCall(ierr);
2935     PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[alg],&alg,&flg));
2936     ierr = PetscOptionsEnd();PetscCall(ierr);
2937   } else {
2938     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABt","Mat");PetscCall(ierr);
2939     PetscCall(PetscOptionsEList("-mat_product_algorithm","Algorithmic approach","MatProduct_ABt",algTypes,nalg,algTypes[alg],&alg,&flg));
2940     ierr = PetscOptionsEnd();PetscCall(ierr);
2941   }
2942   if (flg) {
2943     PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]));
2944   }
2945 
2946   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense;
2947   C->ops->productsymbolic          = MatProductSymbolic_ABt;
2948   PetscFunctionReturn(0);
2949 }
2950 
2951 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C)
2952 {
2953   Mat_Product    *product = C->product;
2954 
2955   PetscFunctionBegin;
2956   switch (product->type) {
2957 #if defined(PETSC_HAVE_ELEMENTAL)
2958   case MATPRODUCT_AB:
2959     PetscCall(MatProductSetFromOptions_MPIDense_AB(C));
2960     break;
2961 #endif
2962   case MATPRODUCT_AtB:
2963     PetscCall(MatProductSetFromOptions_MPIDense_AtB(C));
2964     break;
2965   case MATPRODUCT_ABt:
2966     PetscCall(MatProductSetFromOptions_MPIDense_ABt(C));
2967     break;
2968   default:
2969     break;
2970   }
2971   PetscFunctionReturn(0);
2972 }
2973