xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision cea3dcb8a72de384585ca95e01d24484198b5d39)
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4 */
5 
6 #include <../src/mat/impls/dense/mpi/mpidense.h>    /*I   "petscmat.h"  I*/
7 #include <../src/mat/impls/aij/mpi/mpiaij.h>
8 #include <petscblaslapack.h>
9 
10 /*@
11 
12       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
13               matrix that represents the operator. For sequential matrices it returns itself.
14 
15     Input Parameter:
16 .      A - the Seq or MPI dense matrix
17 
18     Output Parameter:
19 .      B - the inner matrix
20 
21     Level: intermediate
22 
23 @*/
24 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
25 {
26   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
27   PetscBool      flg;
28 
29   PetscFunctionBegin;
30   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
31   PetscValidPointer(B,2);
32   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A,MATMPIDENSE,&flg));
33   if (flg) *B = mat->A;
34   else {
35     PetscCall(PetscObjectBaseTypeCompare((PetscObject)A,MATSEQDENSE,&flg));
36     PetscCheck(flg,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Not for matrix type %s",((PetscObject)A)->type_name);
37     *B = A;
38   }
39   PetscFunctionReturn(0);
40 }
41 
42 PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s)
43 {
44   Mat_MPIDense   *Amat = (Mat_MPIDense*)A->data;
45   Mat_MPIDense   *Bmat = (Mat_MPIDense*)B->data;
46 
47   PetscFunctionBegin;
48   PetscCall(MatCopy(Amat->A,Bmat->A,s));
49   PetscFunctionReturn(0);
50 }
51 
52 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
53 {
54   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
55   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
56 
57   PetscFunctionBegin;
58   PetscCheck(row >= rstart && row < rend,PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
59   lrow = row - rstart;
60   PetscCall(MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v));
61   PetscFunctionReturn(0);
62 }
63 
64 PetscErrorCode MatRestoreRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
65 {
66   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
67   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
68 
69   PetscFunctionBegin;
70   PetscCheck(row >= rstart && row < rend,PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
71   lrow = row - rstart;
72   PetscCall(MatRestoreRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v));
73   PetscFunctionReturn(0);
74 }
75 
76 PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
77 {
78   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
79   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
80   PetscScalar    *array;
81   MPI_Comm       comm;
82   PetscBool      flg;
83   Mat            B;
84 
85   PetscFunctionBegin;
86   PetscCall(MatHasCongruentLayouts(A,&flg));
87   PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
88   PetscCall(PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B));
89   if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */
90 
91     PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A,MATSEQDENSECUDA,&flg));
92     PetscCheck(!flg,PETSC_COMM_SELF,PETSC_ERR_SUP,"Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature",MATSEQDENSECUDA);
93     PetscCall(PetscObjectGetComm((PetscObject)(mdn->A),&comm));
94     PetscCall(MatCreate(comm,&B));
95     PetscCall(MatSetSizes(B,m,m,m,m));
96     PetscCall(MatSetType(B,((PetscObject)mdn->A)->type_name));
97     PetscCall(MatDenseGetArrayRead(mdn->A,(const PetscScalar**)&array));
98     PetscCall(MatSeqDenseSetPreallocation(B,array+m*rstart));
99     PetscCall(MatDenseRestoreArrayRead(mdn->A,(const PetscScalar**)&array));
100     PetscCall(PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B));
101     *a   = B;
102     PetscCall(MatDestroy(&B));
103   } else *a = B;
104   PetscFunctionReturn(0);
105 }
106 
107 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
108 {
109   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
110   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
111   PetscBool      roworiented = A->roworiented;
112 
113   PetscFunctionBegin;
114   for (i=0; i<m; i++) {
115     if (idxm[i] < 0) continue;
116     PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
117     if (idxm[i] >= rstart && idxm[i] < rend) {
118       row = idxm[i] - rstart;
119       if (roworiented) {
120         PetscCall(MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv));
121       } else {
122         for (j=0; j<n; j++) {
123           if (idxn[j] < 0) continue;
124           PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
125           PetscCall(MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv));
126         }
127       }
128     } else if (!A->donotstash) {
129       mat->assembled = PETSC_FALSE;
130       if (roworiented) {
131         PetscCall(MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE));
132       } else {
133         PetscCall(MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE));
134       }
135     }
136   }
137   PetscFunctionReturn(0);
138 }
139 
140 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
141 {
142   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
143   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
144 
145   PetscFunctionBegin;
146   for (i=0; i<m; i++) {
147     if (idxm[i] < 0) continue; /* negative row */
148     PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
149     if (idxm[i] >= rstart && idxm[i] < rend) {
150       row = idxm[i] - rstart;
151       for (j=0; j<n; j++) {
152         if (idxn[j] < 0) continue; /* negative column */
153         PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
154         PetscCall(MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j));
155       }
156     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
157   }
158   PetscFunctionReturn(0);
159 }
160 
161 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda)
162 {
163   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
164 
165   PetscFunctionBegin;
166   PetscCall(MatDenseGetLDA(a->A,lda));
167   PetscFunctionReturn(0);
168 }
169 
170 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A,PetscInt lda)
171 {
172   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
173   PetscBool      iscuda;
174 
175   PetscFunctionBegin;
176   if (!a->A) {
177     PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
178     PetscCall(PetscLayoutSetUp(A->rmap));
179     PetscCall(PetscLayoutSetUp(A->cmap));
180     PetscCall(MatCreate(PETSC_COMM_SELF,&a->A));
181     PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->A));
182     PetscCall(MatSetSizes(a->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N));
183     PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda));
184     PetscCall(MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE));
185   }
186   PetscCall(MatDenseSetLDA(a->A,lda));
187   PetscFunctionReturn(0);
188 }
189 
190 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar **array)
191 {
192   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
193 
194   PetscFunctionBegin;
195   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
196   PetscCall(MatDenseGetArray(a->A,array));
197   PetscFunctionReturn(0);
198 }
199 
200 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar **array)
201 {
202   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
203 
204   PetscFunctionBegin;
205   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
206   PetscCall(MatDenseGetArrayRead(a->A,array));
207   PetscFunctionReturn(0);
208 }
209 
210 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A,PetscScalar **array)
211 {
212   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
213 
214   PetscFunctionBegin;
215   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
216   PetscCall(MatDenseGetArrayWrite(a->A,array));
217   PetscFunctionReturn(0);
218 }
219 
220 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar *array)
221 {
222   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
223 
224   PetscFunctionBegin;
225   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
226   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
227   PetscCall(MatDensePlaceArray(a->A,array));
228   PetscFunctionReturn(0);
229 }
230 
231 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
232 {
233   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
234 
235   PetscFunctionBegin;
236   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
237   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
238   PetscCall(MatDenseResetArray(a->A));
239   PetscFunctionReturn(0);
240 }
241 
242 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A,const PetscScalar *array)
243 {
244   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
245 
246   PetscFunctionBegin;
247   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
248   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
249   PetscCall(MatDenseReplaceArray(a->A,array));
250   PetscFunctionReturn(0);
251 }
252 
253 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
254 {
255   Mat_MPIDense      *mat  = (Mat_MPIDense*)A->data,*newmatd;
256   PetscInt          lda,i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
257   const PetscInt    *irow,*icol;
258   const PetscScalar *v;
259   PetscScalar       *bv;
260   Mat               newmat;
261   IS                iscol_local;
262   MPI_Comm          comm_is,comm_mat;
263 
264   PetscFunctionBegin;
265   PetscCall(PetscObjectGetComm((PetscObject)A,&comm_mat));
266   PetscCall(PetscObjectGetComm((PetscObject)iscol,&comm_is));
267   PetscCheck(comm_mat == comm_is,PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator");
268 
269   PetscCall(ISAllGather(iscol,&iscol_local));
270   PetscCall(ISGetIndices(isrow,&irow));
271   PetscCall(ISGetIndices(iscol_local,&icol));
272   PetscCall(ISGetLocalSize(isrow,&nrows));
273   PetscCall(ISGetLocalSize(iscol,&ncols));
274   PetscCall(ISGetSize(iscol,&Ncols)); /* global number of columns, size of iscol_local */
275 
276   /* No parallel redistribution currently supported! Should really check each index set
277      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
278      original matrix! */
279 
280   PetscCall(MatGetLocalSize(A,&nlrows,&nlcols));
281   PetscCall(MatGetOwnershipRange(A,&rstart,&rend));
282 
283   /* Check submatrix call */
284   if (scall == MAT_REUSE_MATRIX) {
285     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
286     /* Really need to test rows and column sizes! */
287     newmat = *B;
288   } else {
289     /* Create and fill new matrix */
290     PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&newmat));
291     PetscCall(MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols));
292     PetscCall(MatSetType(newmat,((PetscObject)A)->type_name));
293     PetscCall(MatMPIDenseSetPreallocation(newmat,NULL));
294   }
295 
296   /* Now extract the data pointers and do the copy, column at a time */
297   newmatd = (Mat_MPIDense*)newmat->data;
298   PetscCall(MatDenseGetArray(newmatd->A,&bv));
299   PetscCall(MatDenseGetArrayRead(mat->A,&v));
300   PetscCall(MatDenseGetLDA(mat->A,&lda));
301   for (i=0; i<Ncols; i++) {
302     const PetscScalar *av = v + lda*icol[i];
303     for (j=0; j<nrows; j++) {
304       *bv++ = av[irow[j] - rstart];
305     }
306   }
307   PetscCall(MatDenseRestoreArrayRead(mat->A,&v));
308   PetscCall(MatDenseRestoreArray(newmatd->A,&bv));
309 
310   /* Assemble the matrices so that the correct flags are set */
311   PetscCall(MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY));
312   PetscCall(MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY));
313 
314   /* Free work space */
315   PetscCall(ISRestoreIndices(isrow,&irow));
316   PetscCall(ISRestoreIndices(iscol_local,&icol));
317   PetscCall(ISDestroy(&iscol_local));
318   *B   = newmat;
319   PetscFunctionReturn(0);
320 }
321 
322 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar **array)
323 {
324   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
325 
326   PetscFunctionBegin;
327   PetscCall(MatDenseRestoreArray(a->A,array));
328   PetscFunctionReturn(0);
329 }
330 
331 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar **array)
332 {
333   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
334 
335   PetscFunctionBegin;
336   PetscCall(MatDenseRestoreArrayRead(a->A,array));
337   PetscFunctionReturn(0);
338 }
339 
340 PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A,PetscScalar **array)
341 {
342   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
343 
344   PetscFunctionBegin;
345   PetscCall(MatDenseRestoreArrayWrite(a->A,array));
346   PetscFunctionReturn(0);
347 }
348 
349 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
350 {
351   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
352   PetscInt       nstash,reallocs;
353 
354   PetscFunctionBegin;
355   if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
356 
357   PetscCall(MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range));
358   PetscCall(MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs));
359   PetscCall(PetscInfo(mdn->A,"Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs));
360   PetscFunctionReturn(0);
361 }
362 
363 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
364 {
365   Mat_MPIDense   *mdn=(Mat_MPIDense*)mat->data;
366   PetscInt       i,*row,*col,flg,j,rstart,ncols;
367   PetscMPIInt    n;
368   PetscScalar    *val;
369 
370   PetscFunctionBegin;
371   if (!mdn->donotstash && !mat->nooffprocentries) {
372     /*  wait on receives */
373     while (1) {
374       PetscCall(MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg));
375       if (!flg) break;
376 
377       for (i=0; i<n;) {
378         /* Now identify the consecutive vals belonging to the same row */
379         for (j=i,rstart=row[j]; j<n; j++) {
380           if (row[j] != rstart) break;
381         }
382         if (j < n) ncols = j-i;
383         else       ncols = n-i;
384         /* Now assemble all these values with a single function call */
385         PetscCall(MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode));
386         i    = j;
387       }
388     }
389     PetscCall(MatStashScatterEnd_Private(&mat->stash));
390   }
391 
392   PetscCall(MatAssemblyBegin(mdn->A,mode));
393   PetscCall(MatAssemblyEnd(mdn->A,mode));
394   PetscFunctionReturn(0);
395 }
396 
397 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
398 {
399   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
400 
401   PetscFunctionBegin;
402   PetscCall(MatZeroEntries(l->A));
403   PetscFunctionReturn(0);
404 }
405 
406 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt n,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
407 {
408   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
409   PetscInt          i,len,*lrows;
410 
411   PetscFunctionBegin;
412   /* get locally owned rows */
413   PetscCall(PetscLayoutMapLocal(A->rmap,n,rows,&len,&lrows,NULL));
414   /* fix right hand side if needed */
415   if (x && b) {
416     const PetscScalar *xx;
417     PetscScalar       *bb;
418 
419     PetscCall(VecGetArrayRead(x, &xx));
420     PetscCall(VecGetArrayWrite(b, &bb));
421     for (i=0;i<len;++i) bb[lrows[i]] = diag*xx[lrows[i]];
422     PetscCall(VecRestoreArrayRead(x, &xx));
423     PetscCall(VecRestoreArrayWrite(b, &bb));
424   }
425   PetscCall(MatZeroRows(l->A,len,lrows,0.0,NULL,NULL));
426   if (diag != 0.0) {
427     Vec d;
428 
429     PetscCall(MatCreateVecs(A,NULL,&d));
430     PetscCall(VecSet(d,diag));
431     PetscCall(MatDiagonalSet(A,d,INSERT_VALUES));
432     PetscCall(VecDestroy(&d));
433   }
434   PetscCall(PetscFree(lrows));
435   PetscFunctionReturn(0);
436 }
437 
438 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec);
439 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec);
440 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec);
441 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec);
442 
443 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
444 {
445   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
446   const PetscScalar *ax;
447   PetscScalar       *ay;
448   PetscMemType      axmtype,aymtype;
449 
450   PetscFunctionBegin;
451   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
452   PetscCall(VecGetArrayReadAndMemType(xx,&ax,&axmtype));
453   PetscCall(VecGetArrayAndMemType(mdn->lvec,&ay,&aymtype));
454   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPI_REPLACE));
455   PetscCall(PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay,MPI_REPLACE));
456   PetscCall(VecRestoreArrayAndMemType(mdn->lvec,&ay));
457   PetscCall(VecRestoreArrayReadAndMemType(xx,&ax));
458   PetscCall((*mdn->A->ops->mult)(mdn->A,mdn->lvec,yy));
459   PetscFunctionReturn(0);
460 }
461 
462 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
463 {
464   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
465   const PetscScalar *ax;
466   PetscScalar       *ay;
467   PetscMemType      axmtype,aymtype;
468 
469   PetscFunctionBegin;
470   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
471   PetscCall(VecGetArrayReadAndMemType(xx,&ax,&axmtype));
472   PetscCall(VecGetArrayAndMemType(mdn->lvec,&ay,&aymtype));
473   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPI_REPLACE));
474   PetscCall(PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay,MPI_REPLACE));
475   PetscCall(VecRestoreArrayAndMemType(mdn->lvec,&ay));
476   PetscCall(VecRestoreArrayReadAndMemType(xx,&ax));
477   PetscCall((*mdn->A->ops->multadd)(mdn->A,mdn->lvec,yy,zz));
478   PetscFunctionReturn(0);
479 }
480 
481 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
482 {
483   Mat_MPIDense      *a = (Mat_MPIDense*)A->data;
484   const PetscScalar *ax;
485   PetscScalar       *ay;
486   PetscMemType      axmtype,aymtype;
487 
488   PetscFunctionBegin;
489   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
490   PetscCall(VecSet(yy,0.0));
491   PetscCall((*a->A->ops->multtranspose)(a->A,xx,a->lvec));
492   PetscCall(VecGetArrayReadAndMemType(a->lvec,&ax,&axmtype));
493   PetscCall(VecGetArrayAndMemType(yy,&ay,&aymtype));
494   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPIU_SUM));
495   PetscCall(PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM));
496   PetscCall(VecRestoreArrayReadAndMemType(a->lvec,&ax));
497   PetscCall(VecRestoreArrayAndMemType(yy,&ay));
498   PetscFunctionReturn(0);
499 }
500 
501 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
502 {
503   Mat_MPIDense      *a = (Mat_MPIDense*)A->data;
504   const PetscScalar *ax;
505   PetscScalar       *ay;
506   PetscMemType      axmtype,aymtype;
507 
508   PetscFunctionBegin;
509   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
510   PetscCall(VecCopy(yy,zz));
511   PetscCall((*a->A->ops->multtranspose)(a->A,xx,a->lvec));
512   PetscCall(VecGetArrayReadAndMemType(a->lvec,&ax,&axmtype));
513   PetscCall(VecGetArrayAndMemType(zz,&ay,&aymtype));
514   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPIU_SUM));
515   PetscCall(PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM));
516   PetscCall(VecRestoreArrayReadAndMemType(a->lvec,&ax));
517   PetscCall(VecRestoreArrayAndMemType(zz,&ay));
518   PetscFunctionReturn(0);
519 }
520 
521 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
522 {
523   Mat_MPIDense      *a    = (Mat_MPIDense*)A->data;
524   PetscInt          lda,len,i,n,m = A->rmap->n,radd;
525   PetscScalar       *x,zero = 0.0;
526   const PetscScalar *av;
527 
528   PetscFunctionBegin;
529   PetscCall(VecSet(v,zero));
530   PetscCall(VecGetArray(v,&x));
531   PetscCall(VecGetSize(v,&n));
532   PetscCheck(n == A->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
533   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
534   radd = A->rmap->rstart*m;
535   PetscCall(MatDenseGetArrayRead(a->A,&av));
536   PetscCall(MatDenseGetLDA(a->A,&lda));
537   for (i=0; i<len; i++) {
538     x[i] = av[radd + i*lda + i];
539   }
540   PetscCall(MatDenseRestoreArrayRead(a->A,&av));
541   PetscCall(VecRestoreArray(v,&x));
542   PetscFunctionReturn(0);
543 }
544 
545 PetscErrorCode MatDestroy_MPIDense(Mat mat)
546 {
547   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
548 
549   PetscFunctionBegin;
550 #if defined(PETSC_USE_LOG)
551   PetscLogObjectState((PetscObject)mat,"Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT,mat->rmap->N,mat->cmap->N);
552 #endif
553   PetscCall(MatStashDestroy_Private(&mat->stash));
554   PetscCheck(!mdn->vecinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
555   PetscCheck(!mdn->matinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
556   PetscCall(MatDestroy(&mdn->A));
557   PetscCall(VecDestroy(&mdn->lvec));
558   PetscCall(PetscSFDestroy(&mdn->Mvctx));
559   PetscCall(VecDestroy(&mdn->cvec));
560   PetscCall(MatDestroy(&mdn->cmat));
561 
562   PetscCall(PetscFree(mat->data));
563   PetscCall(PetscObjectChangeTypeName((PetscObject)mat,NULL));
564 
565   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL));
566   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",NULL));
567   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL));
568   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL));
569   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL));
570   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL));
571   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",NULL));
572   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",NULL));
573   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL));
574   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL));
575   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",NULL));
576   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",NULL));
577   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpiaij_C",NULL));
578 #if defined(PETSC_HAVE_ELEMENTAL)
579   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL));
580 #endif
581 #if defined(PETSC_HAVE_SCALAPACK)
582   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",NULL));
583 #endif
584   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL));
585   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",NULL));
586   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",NULL));
587 #if defined (PETSC_HAVE_CUDA)
588   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",NULL));
589   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",NULL));
590 #endif
591   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL));
592   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL));
593   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",NULL));
594   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",NULL));
595   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",NULL));
596   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",NULL));
597   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",NULL));
598   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",NULL));
599   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",NULL));
600   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",NULL));
601   PetscFunctionReturn(0);
602 }
603 
604 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat,PetscViewer);
605 
606 #include <petscdraw.h>
607 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
608 {
609   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
610   PetscMPIInt       rank;
611   PetscViewerType   vtype;
612   PetscBool         iascii,isdraw;
613   PetscViewer       sviewer;
614   PetscViewerFormat format;
615 
616   PetscFunctionBegin;
617   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank));
618   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
619   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw));
620   if (iascii) {
621     PetscCall(PetscViewerGetType(viewer,&vtype));
622     PetscCall(PetscViewerGetFormat(viewer,&format));
623     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
624       MatInfo info;
625       PetscCall(MatGetInfo(mat,MAT_LOCAL,&info));
626       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
627       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory));
628       PetscCall(PetscViewerFlush(viewer));
629       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
630       if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx,viewer));
631       PetscFunctionReturn(0);
632     } else if (format == PETSC_VIEWER_ASCII_INFO) {
633       PetscFunctionReturn(0);
634     }
635   } else if (isdraw) {
636     PetscDraw draw;
637     PetscBool isnull;
638 
639     PetscCall(PetscViewerDrawGetDraw(viewer,0,&draw));
640     PetscCall(PetscDrawIsNull(draw,&isnull));
641     if (isnull) PetscFunctionReturn(0);
642   }
643 
644   {
645     /* assemble the entire matrix onto first processor. */
646     Mat         A;
647     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
648     PetscInt    *cols;
649     PetscScalar *vals;
650 
651     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat),&A));
652     if (rank == 0) {
653       PetscCall(MatSetSizes(A,M,N,M,N));
654     } else {
655       PetscCall(MatSetSizes(A,0,0,M,N));
656     }
657     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
658     PetscCall(MatSetType(A,MATMPIDENSE));
659     PetscCall(MatMPIDenseSetPreallocation(A,NULL));
660     PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)A));
661 
662     /* Copy the matrix ... This isn't the most efficient means,
663        but it's quick for now */
664     A->insertmode = INSERT_VALUES;
665 
666     row = mat->rmap->rstart;
667     m   = mdn->A->rmap->n;
668     for (i=0; i<m; i++) {
669       PetscCall(MatGetRow_MPIDense(mat,row,&nz,&cols,&vals));
670       PetscCall(MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES));
671       PetscCall(MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals));
672       row++;
673     }
674 
675     PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY));
676     PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY));
677     PetscCall(PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer));
678     if (rank == 0) {
679       PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name));
680       PetscCall(MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer));
681     }
682     PetscCall(PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer));
683     PetscCall(PetscViewerFlush(viewer));
684     PetscCall(MatDestroy(&A));
685   }
686   PetscFunctionReturn(0);
687 }
688 
689 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
690 {
691   PetscBool      iascii,isbinary,isdraw,issocket;
692 
693   PetscFunctionBegin;
694   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
695   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
696   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket));
697   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw));
698 
699   if (iascii || issocket || isdraw) {
700     PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat,viewer));
701   } else if (isbinary) {
702     PetscCall(MatView_Dense_Binary(mat,viewer));
703   }
704   PetscFunctionReturn(0);
705 }
706 
707 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
708 {
709   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
710   Mat            mdn  = mat->A;
711   PetscLogDouble isend[5],irecv[5];
712 
713   PetscFunctionBegin;
714   info->block_size = 1.0;
715 
716   PetscCall(MatGetInfo(mdn,MAT_LOCAL,info));
717 
718   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
719   isend[3] = info->memory;  isend[4] = info->mallocs;
720   if (flag == MAT_LOCAL) {
721     info->nz_used      = isend[0];
722     info->nz_allocated = isend[1];
723     info->nz_unneeded  = isend[2];
724     info->memory       = isend[3];
725     info->mallocs      = isend[4];
726   } else if (flag == MAT_GLOBAL_MAX) {
727     PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)A)));
728 
729     info->nz_used      = irecv[0];
730     info->nz_allocated = irecv[1];
731     info->nz_unneeded  = irecv[2];
732     info->memory       = irecv[3];
733     info->mallocs      = irecv[4];
734   } else if (flag == MAT_GLOBAL_SUM) {
735     PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)A)));
736 
737     info->nz_used      = irecv[0];
738     info->nz_allocated = irecv[1];
739     info->nz_unneeded  = irecv[2];
740     info->memory       = irecv[3];
741     info->mallocs      = irecv[4];
742   }
743   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
744   info->fill_ratio_needed = 0;
745   info->factor_mallocs    = 0;
746   PetscFunctionReturn(0);
747 }
748 
749 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
750 {
751   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
752 
753   PetscFunctionBegin;
754   switch (op) {
755   case MAT_NEW_NONZERO_LOCATIONS:
756   case MAT_NEW_NONZERO_LOCATION_ERR:
757   case MAT_NEW_NONZERO_ALLOCATION_ERR:
758     MatCheckPreallocated(A,1);
759     PetscCall(MatSetOption(a->A,op,flg));
760     break;
761   case MAT_ROW_ORIENTED:
762     MatCheckPreallocated(A,1);
763     a->roworiented = flg;
764     PetscCall(MatSetOption(a->A,op,flg));
765     break;
766   case MAT_FORCE_DIAGONAL_ENTRIES:
767   case MAT_KEEP_NONZERO_PATTERN:
768   case MAT_USE_HASH_TABLE:
769   case MAT_SORTED_FULL:
770     PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op]));
771     break;
772   case MAT_IGNORE_OFF_PROC_ENTRIES:
773     a->donotstash = flg;
774     break;
775   case MAT_SYMMETRIC:
776   case MAT_STRUCTURALLY_SYMMETRIC:
777   case MAT_HERMITIAN:
778   case MAT_SYMMETRY_ETERNAL:
779   case MAT_IGNORE_LOWER_TRIANGULAR:
780   case MAT_IGNORE_ZERO_ENTRIES:
781     PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op]));
782     break;
783   default:
784     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
785   }
786   PetscFunctionReturn(0);
787 }
788 
789 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
790 {
791   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
792   const PetscScalar *l;
793   PetscScalar       x,*v,*vv,*r;
794   PetscInt          i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n,lda;
795 
796   PetscFunctionBegin;
797   PetscCall(MatDenseGetArray(mdn->A,&vv));
798   PetscCall(MatDenseGetLDA(mdn->A,&lda));
799   PetscCall(MatGetLocalSize(A,&s2,&s3));
800   if (ll) {
801     PetscCall(VecGetLocalSize(ll,&s2a));
802     PetscCheck(s2a == s2,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2);
803     PetscCall(VecGetArrayRead(ll,&l));
804     for (i=0; i<m; i++) {
805       x = l[i];
806       v = vv + i;
807       for (j=0; j<n; j++) { (*v) *= x; v+= lda;}
808     }
809     PetscCall(VecRestoreArrayRead(ll,&l));
810     PetscCall(PetscLogFlops(1.0*n*m));
811   }
812   if (rr) {
813     const PetscScalar *ar;
814 
815     PetscCall(VecGetLocalSize(rr,&s3a));
816     PetscCheck(s3a == s3,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3);
817     PetscCall(VecGetArrayRead(rr,&ar));
818     if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
819     PetscCall(VecGetArray(mdn->lvec,&r));
820     PetscCall(PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ar,r,MPI_REPLACE));
821     PetscCall(PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ar,r,MPI_REPLACE));
822     PetscCall(VecRestoreArrayRead(rr,&ar));
823     for (i=0; i<n; i++) {
824       x = r[i];
825       v = vv + i*lda;
826       for (j=0; j<m; j++) (*v++) *= x;
827     }
828     PetscCall(VecRestoreArray(mdn->lvec,&r));
829     PetscCall(PetscLogFlops(1.0*n*m));
830   }
831   PetscCall(MatDenseRestoreArray(mdn->A,&vv));
832   PetscFunctionReturn(0);
833 }
834 
835 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
836 {
837   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
838   PetscInt          i,j;
839   PetscMPIInt       size;
840   PetscReal         sum = 0.0;
841   const PetscScalar *av,*v;
842 
843   PetscFunctionBegin;
844   PetscCall(MatDenseGetArrayRead(mdn->A,&av));
845   v    = av;
846   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
847   if (size == 1) {
848     PetscCall(MatNorm(mdn->A,type,nrm));
849   } else {
850     if (type == NORM_FROBENIUS) {
851       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
852         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
853       }
854       PetscCall(MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A)));
855       *nrm = PetscSqrtReal(*nrm);
856       PetscCall(PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n));
857     } else if (type == NORM_1) {
858       PetscReal *tmp,*tmp2;
859       PetscCall(PetscCalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2));
860       *nrm = 0.0;
861       v    = av;
862       for (j=0; j<mdn->A->cmap->n; j++) {
863         for (i=0; i<mdn->A->rmap->n; i++) {
864           tmp[j] += PetscAbsScalar(*v);  v++;
865         }
866       }
867       PetscCall(MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A)));
868       for (j=0; j<A->cmap->N; j++) {
869         if (tmp2[j] > *nrm) *nrm = tmp2[j];
870       }
871       PetscCall(PetscFree2(tmp,tmp2));
872       PetscCall(PetscLogFlops(A->cmap->n*A->rmap->n));
873     } else if (type == NORM_INFINITY) { /* max row norm */
874       PetscReal ntemp;
875       PetscCall(MatNorm(mdn->A,type,&ntemp));
876       PetscCall(MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A)));
877     } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
878   }
879   PetscCall(MatDenseRestoreArrayRead(mdn->A,&av));
880   PetscFunctionReturn(0);
881 }
882 
883 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
884 {
885   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
886   Mat            B;
887   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
888   PetscInt       j,i,lda;
889   PetscScalar    *v;
890 
891   PetscFunctionBegin;
892   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
893     PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B));
894     PetscCall(MatSetSizes(B,A->cmap->n,A->rmap->n,N,M));
895     PetscCall(MatSetType(B,((PetscObject)A)->type_name));
896     PetscCall(MatMPIDenseSetPreallocation(B,NULL));
897   } else B = *matout;
898 
899   m    = a->A->rmap->n; n = a->A->cmap->n;
900   PetscCall(MatDenseGetArrayRead(a->A,(const PetscScalar**)&v));
901   PetscCall(MatDenseGetLDA(a->A,&lda));
902   PetscCall(PetscMalloc1(m,&rwork));
903   for (i=0; i<m; i++) rwork[i] = rstart + i;
904   for (j=0; j<n; j++) {
905     PetscCall(MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES));
906     v   += lda;
907   }
908   PetscCall(MatDenseRestoreArrayRead(a->A,(const PetscScalar**)&v));
909   PetscCall(PetscFree(rwork));
910   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
911   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
912   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
913     *matout = B;
914   } else {
915     PetscCall(MatHeaderMerge(A,&B));
916   }
917   PetscFunctionReturn(0);
918 }
919 
920 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
921 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
922 
923 PetscErrorCode MatSetUp_MPIDense(Mat A)
924 {
925   PetscFunctionBegin;
926   PetscCall(PetscLayoutSetUp(A->rmap));
927   PetscCall(PetscLayoutSetUp(A->cmap));
928   if (!A->preallocated) {
929     PetscCall(MatMPIDenseSetPreallocation(A,NULL));
930   }
931   PetscFunctionReturn(0);
932 }
933 
934 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
935 {
936   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
937 
938   PetscFunctionBegin;
939   PetscCall(MatAXPY(A->A,alpha,B->A,str));
940   PetscFunctionReturn(0);
941 }
942 
943 PetscErrorCode MatConjugate_MPIDense(Mat mat)
944 {
945   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
946 
947   PetscFunctionBegin;
948   PetscCall(MatConjugate(a->A));
949   PetscFunctionReturn(0);
950 }
951 
952 PetscErrorCode MatRealPart_MPIDense(Mat A)
953 {
954   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
955 
956   PetscFunctionBegin;
957   PetscCall(MatRealPart(a->A));
958   PetscFunctionReturn(0);
959 }
960 
961 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
962 {
963   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
964 
965   PetscFunctionBegin;
966   PetscCall(MatImaginaryPart(a->A));
967   PetscFunctionReturn(0);
968 }
969 
970 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col)
971 {
972   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
973 
974   PetscFunctionBegin;
975   PetscCheck(a->A,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing local matrix");
976   PetscCheck(a->A->ops->getcolumnvector,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing get column operation");
977   PetscCall((*a->A->ops->getcolumnvector)(a->A,v,col));
978   PetscFunctionReturn(0);
979 }
980 
981 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat,PetscInt,PetscReal*);
982 
983 PetscErrorCode MatGetColumnReductions_MPIDense(Mat A,PetscInt type,PetscReal *reductions)
984 {
985   PetscInt       i,m,n;
986   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
987   PetscReal      *work;
988 
989   PetscFunctionBegin;
990   PetscCall(MatGetSize(A,&m,&n));
991   PetscCall(PetscMalloc1(n,&work));
992   if (type == REDUCTION_MEAN_REALPART) {
993     PetscCall(MatGetColumnReductions_SeqDense(a->A,(PetscInt)REDUCTION_SUM_REALPART,work));
994   } else if (type == REDUCTION_MEAN_IMAGINARYPART) {
995     PetscCall(MatGetColumnReductions_SeqDense(a->A,(PetscInt)REDUCTION_SUM_IMAGINARYPART,work));
996   } else {
997     PetscCall(MatGetColumnReductions_SeqDense(a->A,type,work));
998   }
999   if (type == NORM_2) {
1000     for (i=0; i<n; i++) work[i] *= work[i];
1001   }
1002   if (type == NORM_INFINITY) {
1003     PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_MAX,A->hdr.comm));
1004   } else {
1005     PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_SUM,A->hdr.comm));
1006   }
1007   PetscCall(PetscFree(work));
1008   if (type == NORM_2) {
1009     for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
1010   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
1011     for (i=0; i<n; i++) reductions[i] /= m;
1012   }
1013   PetscFunctionReturn(0);
1014 }
1015 
1016 #if defined(PETSC_HAVE_CUDA)
1017 static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1018 {
1019   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1020   PetscInt       lda;
1021 
1022   PetscFunctionBegin;
1023   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1024   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1025   if (!a->cvec) {
1026     PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec));
1027     PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec));
1028   }
1029   a->vecinuse = col + 1;
1030   PetscCall(MatDenseGetLDA(a->A,&lda));
1031   PetscCall(MatDenseCUDAGetArray(a->A,(PetscScalar**)&a->ptrinuse));
1032   PetscCall(VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda));
1033   *v   = a->cvec;
1034   PetscFunctionReturn(0);
1035 }
1036 
1037 static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1038 {
1039   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1040 
1041   PetscFunctionBegin;
1042   PetscCheck(a->vecinuse,PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1043   PetscCheck(a->cvec,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1044   a->vecinuse = 0;
1045   PetscCall(MatDenseCUDARestoreArray(a->A,(PetscScalar**)&a->ptrinuse));
1046   PetscCall(VecCUDAResetArray(a->cvec));
1047   *v   = NULL;
1048   PetscFunctionReturn(0);
1049 }
1050 
1051 static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1052 {
1053   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1054   PetscInt       lda;
1055 
1056   PetscFunctionBegin;
1057   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1058   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1059   if (!a->cvec) {
1060     PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec));
1061     PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec));
1062   }
1063   a->vecinuse = col + 1;
1064   PetscCall(MatDenseGetLDA(a->A,&lda));
1065   PetscCall(MatDenseCUDAGetArrayRead(a->A,&a->ptrinuse));
1066   PetscCall(VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda));
1067   PetscCall(VecLockReadPush(a->cvec));
1068   *v   = a->cvec;
1069   PetscFunctionReturn(0);
1070 }
1071 
1072 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1073 {
1074   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1075 
1076   PetscFunctionBegin;
1077   PetscCheck(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1078   PetscCheck(a->cvec,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1079   a->vecinuse = 0;
1080   PetscCall(MatDenseCUDARestoreArrayRead(a->A,&a->ptrinuse));
1081   PetscCall(VecLockReadPop(a->cvec));
1082   PetscCall(VecCUDAResetArray(a->cvec));
1083   *v   = NULL;
1084   PetscFunctionReturn(0);
1085 }
1086 
1087 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1088 {
1089   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1090   PetscInt       lda;
1091 
1092   PetscFunctionBegin;
1093   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1094   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1095   if (!a->cvec) {
1096     PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec));
1097     PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec));
1098   }
1099   a->vecinuse = col + 1;
1100   PetscCall(MatDenseGetLDA(a->A,&lda));
1101   PetscCall(MatDenseCUDAGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse));
1102   PetscCall(VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda));
1103   *v   = a->cvec;
1104   PetscFunctionReturn(0);
1105 }
1106 
1107 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1108 {
1109   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1110 
1111   PetscFunctionBegin;
1112   PetscCheck(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1113   PetscCheck(a->cvec,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1114   a->vecinuse = 0;
1115   PetscCall(MatDenseCUDARestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse));
1116   PetscCall(VecCUDAResetArray(a->cvec));
1117   *v   = NULL;
1118   PetscFunctionReturn(0);
1119 }
1120 
1121 static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1122 {
1123   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1124 
1125   PetscFunctionBegin;
1126   PetscCheck(!l->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1127   PetscCheck(!l->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1128   PetscCall(MatDenseCUDAPlaceArray(l->A,a));
1129   PetscFunctionReturn(0);
1130 }
1131 
1132 static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A)
1133 {
1134   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1135 
1136   PetscFunctionBegin;
1137   PetscCheck(!l->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1138   PetscCheck(!l->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1139   PetscCall(MatDenseCUDAResetArray(l->A));
1140   PetscFunctionReturn(0);
1141 }
1142 
1143 static PetscErrorCode MatDenseCUDAReplaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1144 {
1145   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1146 
1147   PetscFunctionBegin;
1148   PetscCheck(!l->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1149   PetscCheck(!l->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1150   PetscCall(MatDenseCUDAReplaceArray(l->A,a));
1151   PetscFunctionReturn(0);
1152 }
1153 
1154 static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1155 {
1156   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1157 
1158   PetscFunctionBegin;
1159   PetscCall(MatDenseCUDAGetArrayWrite(l->A,a));
1160   PetscFunctionReturn(0);
1161 }
1162 
1163 static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1164 {
1165   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1166 
1167   PetscFunctionBegin;
1168   PetscCall(MatDenseCUDARestoreArrayWrite(l->A,a));
1169   PetscFunctionReturn(0);
1170 }
1171 
1172 static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1173 {
1174   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1175 
1176   PetscFunctionBegin;
1177   PetscCall(MatDenseCUDAGetArrayRead(l->A,a));
1178   PetscFunctionReturn(0);
1179 }
1180 
1181 static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1182 {
1183   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1184 
1185   PetscFunctionBegin;
1186   PetscCall(MatDenseCUDARestoreArrayRead(l->A,a));
1187   PetscFunctionReturn(0);
1188 }
1189 
1190 static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1191 {
1192   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1193 
1194   PetscFunctionBegin;
1195   PetscCall(MatDenseCUDAGetArray(l->A,a));
1196   PetscFunctionReturn(0);
1197 }
1198 
1199 static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1200 {
1201   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1202 
1203   PetscFunctionBegin;
1204   PetscCall(MatDenseCUDARestoreArray(l->A,a));
1205   PetscFunctionReturn(0);
1206 }
1207 
1208 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat,PetscInt,Vec*);
1209 static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat,PetscInt,Vec*);
1210 static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat,PetscInt,Vec*);
1211 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat,PetscInt,Vec*);
1212 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat,PetscInt,Vec*);
1213 static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat,PetscInt,Vec*);
1214 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat,Mat*);
1215 
1216 static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat,PetscBool bind)
1217 {
1218   Mat_MPIDense   *d = (Mat_MPIDense*)mat->data;
1219 
1220   PetscFunctionBegin;
1221   PetscCheck(!d->vecinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1222   PetscCheck(!d->matinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1223   if (d->A) {
1224     PetscCall(MatBindToCPU(d->A,bind));
1225   }
1226   mat->boundtocpu = bind;
1227   if (!bind) {
1228     PetscBool iscuda;
1229 
1230     PetscCall(PetscObjectTypeCompare((PetscObject)d->cvec,VECMPICUDA,&iscuda));
1231     if (!iscuda) {
1232       PetscCall(VecDestroy(&d->cvec));
1233     }
1234     PetscCall(PetscObjectTypeCompare((PetscObject)d->cmat,MATMPIDENSECUDA,&iscuda));
1235     if (!iscuda) {
1236       PetscCall(MatDestroy(&d->cmat));
1237     }
1238     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDenseCUDA));
1239     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDenseCUDA));
1240     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDenseCUDA));
1241     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDenseCUDA));
1242     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDenseCUDA));
1243     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDenseCUDA));
1244   } else {
1245     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense));
1246     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense));
1247     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense));
1248     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense));
1249     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense));
1250     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense));
1251   }
1252   if (d->cmat) {
1253     PetscCall(MatBindToCPU(d->cmat,bind));
1254   }
1255   PetscFunctionReturn(0);
1256 }
1257 
1258 PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data)
1259 {
1260   Mat_MPIDense   *d = (Mat_MPIDense*)A->data;
1261   PetscBool      iscuda;
1262 
1263   PetscFunctionBegin;
1264   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1265   PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda));
1266   if (!iscuda) PetscFunctionReturn(0);
1267   PetscCall(PetscLayoutSetUp(A->rmap));
1268   PetscCall(PetscLayoutSetUp(A->cmap));
1269   if (!d->A) {
1270     PetscCall(MatCreate(PETSC_COMM_SELF,&d->A));
1271     PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)d->A));
1272     PetscCall(MatSetSizes(d->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N));
1273   }
1274   PetscCall(MatSetType(d->A,MATSEQDENSECUDA));
1275   PetscCall(MatSeqDenseCUDASetPreallocation(d->A,d_data));
1276   A->preallocated = PETSC_TRUE;
1277   A->assembled = PETSC_TRUE;
1278   PetscFunctionReturn(0);
1279 }
1280 #endif
1281 
1282 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1283 {
1284   Mat_MPIDense   *d = (Mat_MPIDense*)x->data;
1285 
1286   PetscFunctionBegin;
1287   PetscCall(MatSetRandom(d->A,rctx));
1288   PetscFunctionReturn(0);
1289 }
1290 
1291 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool  *missing,PetscInt *d)
1292 {
1293   PetscFunctionBegin;
1294   *missing = PETSC_FALSE;
1295   PetscFunctionReturn(0);
1296 }
1297 
1298 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat);
1299 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);
1300 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat);
1301 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);
1302 static PetscErrorCode MatEqual_MPIDense(Mat,Mat,PetscBool*);
1303 static PetscErrorCode MatLoad_MPIDense(Mat,PetscViewer);
1304 
1305 /* -------------------------------------------------------------------*/
1306 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1307                                         MatGetRow_MPIDense,
1308                                         MatRestoreRow_MPIDense,
1309                                         MatMult_MPIDense,
1310                                 /*  4*/ MatMultAdd_MPIDense,
1311                                         MatMultTranspose_MPIDense,
1312                                         MatMultTransposeAdd_MPIDense,
1313                                         NULL,
1314                                         NULL,
1315                                         NULL,
1316                                 /* 10*/ NULL,
1317                                         NULL,
1318                                         NULL,
1319                                         NULL,
1320                                         MatTranspose_MPIDense,
1321                                 /* 15*/ MatGetInfo_MPIDense,
1322                                         MatEqual_MPIDense,
1323                                         MatGetDiagonal_MPIDense,
1324                                         MatDiagonalScale_MPIDense,
1325                                         MatNorm_MPIDense,
1326                                 /* 20*/ MatAssemblyBegin_MPIDense,
1327                                         MatAssemblyEnd_MPIDense,
1328                                         MatSetOption_MPIDense,
1329                                         MatZeroEntries_MPIDense,
1330                                 /* 24*/ MatZeroRows_MPIDense,
1331                                         NULL,
1332                                         NULL,
1333                                         NULL,
1334                                         NULL,
1335                                 /* 29*/ MatSetUp_MPIDense,
1336                                         NULL,
1337                                         NULL,
1338                                         MatGetDiagonalBlock_MPIDense,
1339                                         NULL,
1340                                 /* 34*/ MatDuplicate_MPIDense,
1341                                         NULL,
1342                                         NULL,
1343                                         NULL,
1344                                         NULL,
1345                                 /* 39*/ MatAXPY_MPIDense,
1346                                         MatCreateSubMatrices_MPIDense,
1347                                         NULL,
1348                                         MatGetValues_MPIDense,
1349                                         MatCopy_MPIDense,
1350                                 /* 44*/ NULL,
1351                                         MatScale_MPIDense,
1352                                         MatShift_Basic,
1353                                         NULL,
1354                                         NULL,
1355                                 /* 49*/ MatSetRandom_MPIDense,
1356                                         NULL,
1357                                         NULL,
1358                                         NULL,
1359                                         NULL,
1360                                 /* 54*/ NULL,
1361                                         NULL,
1362                                         NULL,
1363                                         NULL,
1364                                         NULL,
1365                                 /* 59*/ MatCreateSubMatrix_MPIDense,
1366                                         MatDestroy_MPIDense,
1367                                         MatView_MPIDense,
1368                                         NULL,
1369                                         NULL,
1370                                 /* 64*/ NULL,
1371                                         NULL,
1372                                         NULL,
1373                                         NULL,
1374                                         NULL,
1375                                 /* 69*/ NULL,
1376                                         NULL,
1377                                         NULL,
1378                                         NULL,
1379                                         NULL,
1380                                 /* 74*/ NULL,
1381                                         NULL,
1382                                         NULL,
1383                                         NULL,
1384                                         NULL,
1385                                 /* 79*/ NULL,
1386                                         NULL,
1387                                         NULL,
1388                                         NULL,
1389                                 /* 83*/ MatLoad_MPIDense,
1390                                         NULL,
1391                                         NULL,
1392                                         NULL,
1393                                         NULL,
1394                                         NULL,
1395                                 /* 89*/ NULL,
1396                                         NULL,
1397                                         NULL,
1398                                         NULL,
1399                                         NULL,
1400                                 /* 94*/ NULL,
1401                                         NULL,
1402                                         MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1403                                         MatMatTransposeMultNumeric_MPIDense_MPIDense,
1404                                         NULL,
1405                                 /* 99*/ MatProductSetFromOptions_MPIDense,
1406                                         NULL,
1407                                         NULL,
1408                                         MatConjugate_MPIDense,
1409                                         NULL,
1410                                 /*104*/ NULL,
1411                                         MatRealPart_MPIDense,
1412                                         MatImaginaryPart_MPIDense,
1413                                         NULL,
1414                                         NULL,
1415                                 /*109*/ NULL,
1416                                         NULL,
1417                                         NULL,
1418                                         MatGetColumnVector_MPIDense,
1419                                         MatMissingDiagonal_MPIDense,
1420                                 /*114*/ NULL,
1421                                         NULL,
1422                                         NULL,
1423                                         NULL,
1424                                         NULL,
1425                                 /*119*/ NULL,
1426                                         NULL,
1427                                         NULL,
1428                                         NULL,
1429                                         NULL,
1430                                 /*124*/ NULL,
1431                                         MatGetColumnReductions_MPIDense,
1432                                         NULL,
1433                                         NULL,
1434                                         NULL,
1435                                 /*129*/ NULL,
1436                                         NULL,
1437                                         MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1438                                         MatTransposeMatMultNumeric_MPIDense_MPIDense,
1439                                         NULL,
1440                                 /*134*/ NULL,
1441                                         NULL,
1442                                         NULL,
1443                                         NULL,
1444                                         NULL,
1445                                 /*139*/ NULL,
1446                                         NULL,
1447                                         NULL,
1448                                         NULL,
1449                                         NULL,
1450                                         MatCreateMPIMatConcatenateSeqMat_MPIDense,
1451                                 /*145*/ NULL,
1452                                         NULL,
1453                                         NULL
1454 };
1455 
1456 PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1457 {
1458   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
1459   PetscBool      iscuda;
1460 
1461   PetscFunctionBegin;
1462   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1463   PetscCall(PetscLayoutSetUp(mat->rmap));
1464   PetscCall(PetscLayoutSetUp(mat->cmap));
1465   if (!a->A) {
1466     PetscCall(MatCreate(PETSC_COMM_SELF,&a->A));
1467     PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A));
1468     PetscCall(MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N));
1469   }
1470   PetscCall(PetscObjectTypeCompare((PetscObject)mat,MATMPIDENSECUDA,&iscuda));
1471   PetscCall(MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE));
1472   PetscCall(MatSeqDenseSetPreallocation(a->A,data));
1473   mat->preallocated = PETSC_TRUE;
1474   mat->assembled = PETSC_TRUE;
1475   PetscFunctionReturn(0);
1476 }
1477 
1478 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1479 {
1480   Mat            B,C;
1481 
1482   PetscFunctionBegin;
1483   PetscCall(MatMPIAIJGetLocalMat(A,MAT_INITIAL_MATRIX,&C));
1484   PetscCall(MatConvert_SeqAIJ_SeqDense(C,MATSEQDENSE,MAT_INITIAL_MATRIX,&B));
1485   PetscCall(MatDestroy(&C));
1486   if (reuse == MAT_REUSE_MATRIX) {
1487     C = *newmat;
1488   } else C = NULL;
1489   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A),B,A->cmap->n,!C?MAT_INITIAL_MATRIX:MAT_REUSE_MATRIX,&C));
1490   PetscCall(MatDestroy(&B));
1491   if (reuse == MAT_INPLACE_MATRIX) {
1492     PetscCall(MatHeaderReplace(A,&C));
1493   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1494   PetscFunctionReturn(0);
1495 }
1496 
1497 PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1498 {
1499   Mat            B,C;
1500 
1501   PetscFunctionBegin;
1502   PetscCall(MatDenseGetLocalMatrix(A,&C));
1503   PetscCall(MatConvert_SeqDense_SeqAIJ(C,MATSEQAIJ,MAT_INITIAL_MATRIX,&B));
1504   if (reuse == MAT_REUSE_MATRIX) {
1505     C = *newmat;
1506   } else C = NULL;
1507   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A),B,A->cmap->n,!C?MAT_INITIAL_MATRIX:MAT_REUSE_MATRIX,&C));
1508   PetscCall(MatDestroy(&B));
1509   if (reuse == MAT_INPLACE_MATRIX) {
1510     PetscCall(MatHeaderReplace(A,&C));
1511   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1512   PetscFunctionReturn(0);
1513 }
1514 
1515 #if defined(PETSC_HAVE_ELEMENTAL)
1516 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1517 {
1518   Mat            mat_elemental;
1519   PetscScalar    *v;
1520   PetscInt       m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols;
1521 
1522   PetscFunctionBegin;
1523   if (reuse == MAT_REUSE_MATRIX) {
1524     mat_elemental = *newmat;
1525     PetscCall(MatZeroEntries(*newmat));
1526   } else {
1527     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental));
1528     PetscCall(MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N));
1529     PetscCall(MatSetType(mat_elemental,MATELEMENTAL));
1530     PetscCall(MatSetUp(mat_elemental));
1531     PetscCall(MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE));
1532   }
1533 
1534   PetscCall(PetscMalloc2(m,&rows,N,&cols));
1535   for (i=0; i<N; i++) cols[i] = i;
1536   for (i=0; i<m; i++) rows[i] = rstart + i;
1537 
1538   /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1539   PetscCall(MatDenseGetArray(A,&v));
1540   PetscCall(MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES));
1541   PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY));
1542   PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY));
1543   PetscCall(MatDenseRestoreArray(A,&v));
1544   PetscCall(PetscFree2(rows,cols));
1545 
1546   if (reuse == MAT_INPLACE_MATRIX) {
1547     PetscCall(MatHeaderReplace(A,&mat_elemental));
1548   } else {
1549     *newmat = mat_elemental;
1550   }
1551   PetscFunctionReturn(0);
1552 }
1553 #endif
1554 
1555 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals)
1556 {
1557   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1558 
1559   PetscFunctionBegin;
1560   PetscCall(MatDenseGetColumn(mat->A,col,vals));
1561   PetscFunctionReturn(0);
1562 }
1563 
1564 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals)
1565 {
1566   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1567 
1568   PetscFunctionBegin;
1569   PetscCall(MatDenseRestoreColumn(mat->A,vals));
1570   PetscFunctionReturn(0);
1571 }
1572 
1573 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
1574 {
1575   Mat_MPIDense   *mat;
1576   PetscInt       m,nloc,N;
1577 
1578   PetscFunctionBegin;
1579   PetscCall(MatGetSize(inmat,&m,&N));
1580   PetscCall(MatGetLocalSize(inmat,NULL,&nloc));
1581   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1582     PetscInt sum;
1583 
1584     if (n == PETSC_DECIDE) {
1585       PetscCall(PetscSplitOwnership(comm,&n,&N));
1586     }
1587     /* Check sum(n) = N */
1588     PetscCall(MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm));
1589     PetscCheck(sum == N,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT,sum,N);
1590 
1591     PetscCall(MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat));
1592     PetscCall(MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE));
1593   }
1594 
1595   /* numeric phase */
1596   mat = (Mat_MPIDense*)(*outmat)->data;
1597   PetscCall(MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN));
1598   PetscFunctionReturn(0);
1599 }
1600 
1601 #if defined(PETSC_HAVE_CUDA)
1602 PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M,MatType type,MatReuse reuse,Mat *newmat)
1603 {
1604   Mat            B;
1605   Mat_MPIDense   *m;
1606 
1607   PetscFunctionBegin;
1608   if (reuse == MAT_INITIAL_MATRIX) {
1609     PetscCall(MatDuplicate(M,MAT_COPY_VALUES,newmat));
1610   } else if (reuse == MAT_REUSE_MATRIX) {
1611     PetscCall(MatCopy(M,*newmat,SAME_NONZERO_PATTERN));
1612   }
1613 
1614   B    = *newmat;
1615   PetscCall(MatBindToCPU_MPIDenseCUDA(B,PETSC_TRUE));
1616   PetscCall(PetscFree(B->defaultvectype));
1617   PetscCall(PetscStrallocpy(VECSTANDARD,&B->defaultvectype));
1618   PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSE));
1619   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",NULL));
1620   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",NULL));
1621   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",NULL));
1622   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",NULL));
1623   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",NULL));
1624   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",NULL));
1625   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",NULL));
1626   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",NULL));
1627   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",NULL));
1628   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",NULL));
1629   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",NULL));
1630   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",NULL));
1631   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",NULL));
1632   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",NULL));
1633   m    = (Mat_MPIDense*)(B)->data;
1634   if (m->A) {
1635     PetscCall(MatConvert(m->A,MATSEQDENSE,MAT_INPLACE_MATRIX,&m->A));
1636   }
1637   B->ops->bindtocpu = NULL;
1638   B->offloadmask    = PETSC_OFFLOAD_CPU;
1639   PetscFunctionReturn(0);
1640 }
1641 
1642 PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M,MatType type,MatReuse reuse,Mat *newmat)
1643 {
1644   Mat            B;
1645   Mat_MPIDense   *m;
1646 
1647   PetscFunctionBegin;
1648   if (reuse == MAT_INITIAL_MATRIX) {
1649     PetscCall(MatDuplicate(M,MAT_COPY_VALUES,newmat));
1650   } else if (reuse == MAT_REUSE_MATRIX) {
1651     PetscCall(MatCopy(M,*newmat,SAME_NONZERO_PATTERN));
1652   }
1653 
1654   B    = *newmat;
1655   PetscCall(PetscFree(B->defaultvectype));
1656   PetscCall(PetscStrallocpy(VECCUDA,&B->defaultvectype));
1657   PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSECUDA));
1658   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",                    MatConvert_MPIDenseCUDA_MPIDense));
1659   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",        MatProductSetFromOptions_MPIAIJ_MPIDense));
1660   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",MatProductSetFromOptions_MPIAIJ_MPIDense));
1661   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",        MatProductSetFromOptions_MPIDense_MPIAIJ));
1662   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ));
1663   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",                                MatDenseCUDAGetArray_MPIDenseCUDA));
1664   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",                            MatDenseCUDAGetArrayRead_MPIDenseCUDA));
1665   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",                           MatDenseCUDAGetArrayWrite_MPIDenseCUDA));
1666   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",                            MatDenseCUDARestoreArray_MPIDenseCUDA));
1667   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",                        MatDenseCUDARestoreArrayRead_MPIDenseCUDA));
1668   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",                       MatDenseCUDARestoreArrayWrite_MPIDenseCUDA));
1669   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",                              MatDenseCUDAPlaceArray_MPIDenseCUDA));
1670   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",                              MatDenseCUDAResetArray_MPIDenseCUDA));
1671   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",                            MatDenseCUDAReplaceArray_MPIDenseCUDA));
1672   m    = (Mat_MPIDense*)(B->data);
1673   if (m->A) {
1674     PetscCall(MatConvert(m->A,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&m->A));
1675     B->offloadmask = PETSC_OFFLOAD_BOTH;
1676   } else {
1677     B->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
1678   }
1679   PetscCall(MatBindToCPU_MPIDenseCUDA(B,PETSC_FALSE));
1680 
1681   B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA;
1682   PetscFunctionReturn(0);
1683 }
1684 #endif
1685 
1686 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A,PetscInt col,Vec *v)
1687 {
1688   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1689   PetscInt       lda;
1690 
1691   PetscFunctionBegin;
1692   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1693   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1694   if (!a->cvec) {
1695     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec));
1696   }
1697   a->vecinuse = col + 1;
1698   PetscCall(MatDenseGetLDA(a->A,&lda));
1699   PetscCall(MatDenseGetArray(a->A,(PetscScalar**)&a->ptrinuse));
1700   PetscCall(VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda));
1701   *v   = a->cvec;
1702   PetscFunctionReturn(0);
1703 }
1704 
1705 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A,PetscInt col,Vec *v)
1706 {
1707   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1708 
1709   PetscFunctionBegin;
1710   PetscCheck(a->vecinuse,PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1711   PetscCheck(a->cvec,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1712   a->vecinuse = 0;
1713   PetscCall(MatDenseRestoreArray(a->A,(PetscScalar**)&a->ptrinuse));
1714   PetscCall(VecResetArray(a->cvec));
1715   *v   = NULL;
1716   PetscFunctionReturn(0);
1717 }
1718 
1719 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v)
1720 {
1721   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1722   PetscInt       lda;
1723 
1724   PetscFunctionBegin;
1725   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1726   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1727   if (!a->cvec) {
1728     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec));
1729   }
1730   a->vecinuse = col + 1;
1731   PetscCall(MatDenseGetLDA(a->A,&lda));
1732   PetscCall(MatDenseGetArrayRead(a->A,&a->ptrinuse));
1733   PetscCall(VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda));
1734   PetscCall(VecLockReadPush(a->cvec));
1735   *v   = a->cvec;
1736   PetscFunctionReturn(0);
1737 }
1738 
1739 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v)
1740 {
1741   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1742 
1743   PetscFunctionBegin;
1744   PetscCheck(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1745   PetscCheck(a->cvec,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1746   a->vecinuse = 0;
1747   PetscCall(MatDenseRestoreArrayRead(a->A,&a->ptrinuse));
1748   PetscCall(VecLockReadPop(a->cvec));
1749   PetscCall(VecResetArray(a->cvec));
1750   *v   = NULL;
1751   PetscFunctionReturn(0);
1752 }
1753 
1754 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v)
1755 {
1756   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1757   PetscInt       lda;
1758 
1759   PetscFunctionBegin;
1760   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1761   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1762   if (!a->cvec) {
1763     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec));
1764   }
1765   a->vecinuse = col + 1;
1766   PetscCall(MatDenseGetLDA(a->A,&lda));
1767   PetscCall(MatDenseGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse));
1768   PetscCall(VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda));
1769   *v   = a->cvec;
1770   PetscFunctionReturn(0);
1771 }
1772 
1773 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v)
1774 {
1775   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
1776 
1777   PetscFunctionBegin;
1778   PetscCheck(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1779   PetscCheck(a->cvec,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1780   a->vecinuse = 0;
1781   PetscCall(MatDenseRestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse));
1782   PetscCall(VecResetArray(a->cvec));
1783   *v   = NULL;
1784   PetscFunctionReturn(0);
1785 }
1786 
1787 PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A,PetscInt cbegin,PetscInt cend,Mat *v)
1788 {
1789   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
1790   Mat_MPIDense *c;
1791   MPI_Comm     comm;
1792 
1793   PetscFunctionBegin;
1794   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
1795   PetscCheck(!a->vecinuse,comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1796   PetscCheck(!a->matinuse,comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1797   if (!a->cmat) {
1798     PetscCall(MatCreate(comm,&a->cmat));
1799     PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->cmat));
1800     PetscCall(MatSetType(a->cmat,((PetscObject)A)->type_name));
1801     PetscCall(PetscLayoutReference(A->rmap,&a->cmat->rmap));
1802     PetscCall(PetscLayoutSetSize(a->cmat->cmap,cend-cbegin));
1803     PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1804   } else if (cend-cbegin != a->cmat->cmap->N) {
1805     PetscCall(PetscLayoutDestroy(&a->cmat->cmap));
1806     PetscCall(PetscLayoutCreate(comm,&a->cmat->cmap));
1807     PetscCall(PetscLayoutSetSize(a->cmat->cmap,cend-cbegin));
1808     PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1809   }
1810   c = (Mat_MPIDense*)a->cmat->data;
1811   PetscCheck(!c->A,comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1812   PetscCall(MatDenseGetSubMatrix(a->A,cbegin,cend,&c->A));
1813   a->cmat->preallocated = PETSC_TRUE;
1814   a->cmat->assembled = PETSC_TRUE;
1815   a->matinuse = cbegin + 1;
1816   *v = a->cmat;
1817   PetscFunctionReturn(0);
1818 }
1819 
1820 PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A,Mat *v)
1821 {
1822   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
1823   Mat_MPIDense *c;
1824 
1825   PetscFunctionBegin;
1826   PetscCheck(a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetSubMatrix() first");
1827   PetscCheck(a->cmat,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal matrix");
1828   PetscCheck(*v == a->cmat,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Not the matrix obtained from MatDenseGetSubMatrix()");
1829   a->matinuse = 0;
1830   c    = (Mat_MPIDense*)a->cmat->data;
1831   PetscCall(MatDenseRestoreSubMatrix(a->A,&c->A));
1832   *v   = NULL;
1833   PetscFunctionReturn(0);
1834 }
1835 
1836 /*MC
1837    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1838 
1839    Options Database Keys:
1840 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions()
1841 
1842   Level: beginner
1843 
1844 .seealso: MatCreateDense()
1845 
1846 M*/
1847 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1848 {
1849   Mat_MPIDense   *a;
1850 
1851   PetscFunctionBegin;
1852   PetscCall(PetscNewLog(mat,&a));
1853   mat->data = (void*)a;
1854   PetscCall(PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps)));
1855 
1856   mat->insertmode = NOT_SET_VALUES;
1857 
1858   /* build cache for off array entries formed */
1859   a->donotstash = PETSC_FALSE;
1860 
1861   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash));
1862 
1863   /* stuff used for matrix vector multiply */
1864   a->lvec        = NULL;
1865   a->Mvctx       = NULL;
1866   a->roworiented = PETSC_TRUE;
1867 
1868   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense));
1869   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",MatDenseSetLDA_MPIDense));
1870   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense));
1871   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense));
1872   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense));
1873   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense));
1874   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",MatDenseGetArrayWrite_MPIDense));
1875   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",MatDenseRestoreArrayWrite_MPIDense));
1876   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense));
1877   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense));
1878   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",MatDenseReplaceArray_MPIDense));
1879   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense));
1880   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense));
1881   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense));
1882   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense));
1883   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense));
1884   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense));
1885   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",MatDenseGetSubMatrix_MPIDense));
1886   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",MatDenseRestoreSubMatrix_MPIDense));
1887   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",MatConvert_MPIAIJ_MPIDense));
1888   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpiaij_C",MatConvert_MPIDense_MPIAIJ));
1889 #if defined(PETSC_HAVE_ELEMENTAL)
1890   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental));
1891 #endif
1892 #if defined(PETSC_HAVE_SCALAPACK)
1893   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",MatConvert_Dense_ScaLAPACK));
1894 #endif
1895 #if defined(PETSC_HAVE_CUDA)
1896   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpidensecuda_C",MatConvert_MPIDense_MPIDenseCUDA));
1897 #endif
1898   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense));
1899   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense));
1900   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",MatProductSetFromOptions_MPIDense_MPIAIJ));
1901 #if defined(PETSC_HAVE_CUDA)
1902   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense));
1903   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ));
1904 #endif
1905 
1906   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense));
1907   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense));
1908   PetscCall(PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE));
1909   PetscFunctionReturn(0);
1910 }
1911 
1912 /*MC
1913    MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs.
1914 
1915    Options Database Keys:
1916 . -mat_type mpidensecuda - sets the matrix type to "mpidensecuda" during a call to MatSetFromOptions()
1917 
1918   Level: beginner
1919 
1920 .seealso:
1921 
1922 M*/
1923 #if defined(PETSC_HAVE_CUDA)
1924 #include <petsc/private/deviceimpl.h>
1925 PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B)
1926 {
1927   PetscFunctionBegin;
1928   PetscCall(PetscDeviceInitialize(PETSC_DEVICE_CUDA));
1929   PetscCall(MatCreate_MPIDense(B));
1930   PetscCall(MatConvert_MPIDense_MPIDenseCUDA(B,MATMPIDENSECUDA,MAT_INPLACE_MATRIX,&B));
1931   PetscFunctionReturn(0);
1932 }
1933 #endif
1934 
1935 /*MC
1936    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1937 
1938    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1939    and MATMPIDENSE otherwise.
1940 
1941    Options Database Keys:
1942 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1943 
1944   Level: beginner
1945 
1946 .seealso: MATSEQDENSE,MATMPIDENSE,MATDENSECUDA
1947 M*/
1948 
1949 /*MC
1950    MATDENSECUDA - MATDENSECUDA = "densecuda" - A matrix type to be used for dense matrices on GPUs.
1951 
1952    This matrix type is identical to MATSEQDENSECUDA when constructed with a single process communicator,
1953    and MATMPIDENSECUDA otherwise.
1954 
1955    Options Database Keys:
1956 . -mat_type densecuda - sets the matrix type to "densecuda" during a call to MatSetFromOptions()
1957 
1958   Level: beginner
1959 
1960 .seealso: MATSEQDENSECUDA,MATMPIDENSECUDA,MATDENSE
1961 M*/
1962 
1963 /*@C
1964    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1965 
1966    Collective
1967 
1968    Input Parameters:
1969 .  B - the matrix
1970 -  data - optional location of matrix data.  Set data=NULL for PETSc
1971    to control all matrix memory allocation.
1972 
1973    Notes:
1974    The dense format is fully compatible with standard Fortran 77
1975    storage by columns.
1976 
1977    The data input variable is intended primarily for Fortran programmers
1978    who wish to allocate their own matrix memory space.  Most users should
1979    set data=NULL.
1980 
1981    Level: intermediate
1982 
1983 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1984 @*/
1985 PetscErrorCode  MatMPIDenseSetPreallocation(Mat B,PetscScalar *data)
1986 {
1987   PetscFunctionBegin;
1988   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
1989   PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));
1990   PetscFunctionReturn(0);
1991 }
1992 
1993 /*@
1994    MatDensePlaceArray - Allows one to replace the array in a dense matrix with an
1995    array provided by the user. This is useful to avoid copying an array
1996    into a matrix
1997 
1998    Not Collective
1999 
2000    Input Parameters:
2001 +  mat - the matrix
2002 -  array - the array in column major order
2003 
2004    Notes:
2005    You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be
2006    freed when the matrix is destroyed.
2007 
2008    Level: developer
2009 
2010 .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
2011 
2012 @*/
2013 PetscErrorCode  MatDensePlaceArray(Mat mat,const PetscScalar *array)
2014 {
2015   PetscFunctionBegin;
2016   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2017   PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));
2018   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2019 #if defined(PETSC_HAVE_CUDA)
2020   mat->offloadmask = PETSC_OFFLOAD_CPU;
2021 #endif
2022   PetscFunctionReturn(0);
2023 }
2024 
2025 /*@
2026    MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray()
2027 
2028    Not Collective
2029 
2030    Input Parameters:
2031 .  mat - the matrix
2032 
2033    Notes:
2034    You can only call this after a call to MatDensePlaceArray()
2035 
2036    Level: developer
2037 
2038 .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
2039 
2040 @*/
2041 PetscErrorCode  MatDenseResetArray(Mat mat)
2042 {
2043   PetscFunctionBegin;
2044   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2045   PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));
2046   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2047   PetscFunctionReturn(0);
2048 }
2049 
2050 /*@
2051    MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an
2052    array provided by the user. This is useful to avoid copying an array
2053    into a matrix
2054 
2055    Not Collective
2056 
2057    Input Parameters:
2058 +  mat - the matrix
2059 -  array - the array in column major order
2060 
2061    Notes:
2062    The memory passed in MUST be obtained with PetscMalloc() and CANNOT be
2063    freed by the user. It will be freed when the matrix is destroyed.
2064 
2065    Level: developer
2066 
2067 .seealso: MatDenseGetArray(), VecReplaceArray()
2068 @*/
2069 PetscErrorCode  MatDenseReplaceArray(Mat mat,const PetscScalar *array)
2070 {
2071   PetscFunctionBegin;
2072   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2073   PetscUseMethod(mat,"MatDenseReplaceArray_C",(Mat,const PetscScalar*),(mat,array));
2074   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2075 #if defined(PETSC_HAVE_CUDA)
2076   mat->offloadmask = PETSC_OFFLOAD_CPU;
2077 #endif
2078   PetscFunctionReturn(0);
2079 }
2080 
2081 #if defined(PETSC_HAVE_CUDA)
2082 /*@C
2083    MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a dense matrix with an
2084    array provided by the user. This is useful to avoid copying an array
2085    into a matrix
2086 
2087    Not Collective
2088 
2089    Input Parameters:
2090 +  mat - the matrix
2091 -  array - the array in column major order
2092 
2093    Notes:
2094    You can return to the original array with a call to MatDenseCUDAResetArray(). The user is responsible for freeing this array; it will not be
2095    freed when the matrix is destroyed. The array must have been allocated with cudaMalloc().
2096 
2097    Level: developer
2098 
2099 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAResetArray()
2100 @*/
2101 PetscErrorCode  MatDenseCUDAPlaceArray(Mat mat,const PetscScalar *array)
2102 {
2103   PetscFunctionBegin;
2104   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2105   PetscUseMethod(mat,"MatDenseCUDAPlaceArray_C",(Mat,const PetscScalar*),(mat,array));
2106   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2107   mat->offloadmask = PETSC_OFFLOAD_GPU;
2108   PetscFunctionReturn(0);
2109 }
2110 
2111 /*@C
2112    MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to MatDenseCUDAPlaceArray()
2113 
2114    Not Collective
2115 
2116    Input Parameters:
2117 .  mat - the matrix
2118 
2119    Notes:
2120    You can only call this after a call to MatDenseCUDAPlaceArray()
2121 
2122    Level: developer
2123 
2124 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray()
2125 
2126 @*/
2127 PetscErrorCode  MatDenseCUDAResetArray(Mat mat)
2128 {
2129   PetscFunctionBegin;
2130   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2131   PetscUseMethod(mat,"MatDenseCUDAResetArray_C",(Mat),(mat));
2132   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2133   PetscFunctionReturn(0);
2134 }
2135 
2136 /*@C
2137    MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a dense matrix with an
2138    array provided by the user. This is useful to avoid copying an array
2139    into a matrix
2140 
2141    Not Collective
2142 
2143    Input Parameters:
2144 +  mat - the matrix
2145 -  array - the array in column major order
2146 
2147    Notes:
2148    This permanently replaces the GPU array and frees the memory associated with the old GPU array.
2149    The memory passed in CANNOT be freed by the user. It will be freed
2150    when the matrix is destroyed. The array should respect the matrix leading dimension.
2151 
2152    Level: developer
2153 
2154 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray(), MatDenseCUDAResetArray()
2155 @*/
2156 PetscErrorCode  MatDenseCUDAReplaceArray(Mat mat,const PetscScalar *array)
2157 {
2158   PetscFunctionBegin;
2159   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2160   PetscUseMethod(mat,"MatDenseCUDAReplaceArray_C",(Mat,const PetscScalar*),(mat,array));
2161   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2162   mat->offloadmask = PETSC_OFFLOAD_GPU;
2163   PetscFunctionReturn(0);
2164 }
2165 
2166 /*@C
2167    MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a dense matrix.
2168 
2169    Not Collective
2170 
2171    Input Parameters:
2172 .  A - the matrix
2173 
2174    Output Parameters
2175 .  array - the GPU array in column major order
2176 
2177    Notes:
2178    The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use MatDenseCUDAGetArray(). The array must be restored with MatDenseCUDARestoreArrayWrite() when no longer needed.
2179 
2180    Level: developer
2181 
2182 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArrayRead()
2183 @*/
2184 PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a)
2185 {
2186   PetscFunctionBegin;
2187   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2188   PetscUseMethod(A,"MatDenseCUDAGetArrayWrite_C",(Mat,PetscScalar**),(A,a));
2189   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2190   PetscFunctionReturn(0);
2191 }
2192 
2193 /*@C
2194    MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArrayWrite().
2195 
2196    Not Collective
2197 
2198    Input Parameters:
2199 +  A - the matrix
2200 -  array - the GPU array in column major order
2201 
2202    Notes:
2203 
2204    Level: developer
2205 
2206 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead()
2207 @*/
2208 PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a)
2209 {
2210   PetscFunctionBegin;
2211   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2212   PetscUseMethod(A,"MatDenseCUDARestoreArrayWrite_C",(Mat,PetscScalar**),(A,a));
2213   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2214   A->offloadmask = PETSC_OFFLOAD_GPU;
2215   PetscFunctionReturn(0);
2216 }
2217 
2218 /*@C
2219    MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArrayRead() when no longer needed.
2220 
2221    Not Collective
2222 
2223    Input Parameters:
2224 .  A - the matrix
2225 
2226    Output Parameters
2227 .  array - the GPU array in column major order
2228 
2229    Notes:
2230    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite().
2231 
2232    Level: developer
2233 
2234 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead()
2235 @*/
2236 PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a)
2237 {
2238   PetscFunctionBegin;
2239   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2240   PetscUseMethod(A,"MatDenseCUDAGetArrayRead_C",(Mat,const PetscScalar**),(A,a));
2241   PetscFunctionReturn(0);
2242 }
2243 
2244 /*@C
2245    MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a dense matrix previously obtained with a call to MatDenseCUDAGetArrayRead().
2246 
2247    Not Collective
2248 
2249    Input Parameters:
2250 +  A - the matrix
2251 -  array - the GPU array in column major order
2252 
2253    Notes:
2254    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite().
2255 
2256    Level: developer
2257 
2258 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDAGetArrayRead()
2259 @*/
2260 PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a)
2261 {
2262   PetscFunctionBegin;
2263   PetscUseMethod(A,"MatDenseCUDARestoreArrayRead_C",(Mat,const PetscScalar**),(A,a));
2264   PetscFunctionReturn(0);
2265 }
2266 
2267 /*@C
2268    MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArray() when no longer needed.
2269 
2270    Not Collective
2271 
2272    Input Parameters:
2273 .  A - the matrix
2274 
2275    Output Parameters
2276 .  array - the GPU array in column major order
2277 
2278    Notes:
2279    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). For read-only access, use MatDenseCUDAGetArrayRead().
2280 
2281    Level: developer
2282 
2283 .seealso: MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead()
2284 @*/
2285 PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a)
2286 {
2287   PetscFunctionBegin;
2288   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2289   PetscUseMethod(A,"MatDenseCUDAGetArray_C",(Mat,PetscScalar**),(A,a));
2290   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2291   PetscFunctionReturn(0);
2292 }
2293 
2294 /*@C
2295    MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArray().
2296 
2297    Not Collective
2298 
2299    Input Parameters:
2300 +  A - the matrix
2301 -  array - the GPU array in column major order
2302 
2303    Notes:
2304 
2305    Level: developer
2306 
2307 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead()
2308 @*/
2309 PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a)
2310 {
2311   PetscFunctionBegin;
2312   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2313   PetscUseMethod(A,"MatDenseCUDARestoreArray_C",(Mat,PetscScalar**),(A,a));
2314   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2315   A->offloadmask = PETSC_OFFLOAD_GPU;
2316   PetscFunctionReturn(0);
2317 }
2318 #endif
2319 
2320 /*@C
2321    MatCreateDense - Creates a matrix in dense format.
2322 
2323    Collective
2324 
2325    Input Parameters:
2326 +  comm - MPI communicator
2327 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2328 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2329 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
2330 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
2331 -  data - optional location of matrix data.  Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
2332    to control all matrix memory allocation.
2333 
2334    Output Parameter:
2335 .  A - the matrix
2336 
2337    Notes:
2338    The dense format is fully compatible with standard Fortran 77
2339    storage by columns.
2340    Note that, although local portions of the matrix are stored in column-major
2341    order, the matrix is partitioned across MPI ranks by row.
2342 
2343    The data input variable is intended primarily for Fortran programmers
2344    who wish to allocate their own matrix memory space.  Most users should
2345    set data=NULL (PETSC_NULL_SCALAR for Fortran users).
2346 
2347    The user MUST specify either the local or global matrix dimensions
2348    (possibly both).
2349 
2350    Level: intermediate
2351 
2352 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
2353 @*/
2354 PetscErrorCode  MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
2355 {
2356   PetscFunctionBegin;
2357   PetscCall(MatCreate(comm,A));
2358   PetscCall(MatSetSizes(*A,m,n,M,N));
2359   PetscCall(MatSetType(*A,MATDENSE));
2360   PetscCall(MatSeqDenseSetPreallocation(*A,data));
2361   PetscCall(MatMPIDenseSetPreallocation(*A,data));
2362   PetscFunctionReturn(0);
2363 }
2364 
2365 #if defined(PETSC_HAVE_CUDA)
2366 /*@C
2367    MatCreateDenseCUDA - Creates a matrix in dense format using CUDA.
2368 
2369    Collective
2370 
2371    Input Parameters:
2372 +  comm - MPI communicator
2373 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2374 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2375 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
2376 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
2377 -  data - optional location of GPU matrix data.  Set data=NULL for PETSc
2378    to control matrix memory allocation.
2379 
2380    Output Parameter:
2381 .  A - the matrix
2382 
2383    Notes:
2384 
2385    Level: intermediate
2386 
2387 .seealso: MatCreate(), MatCreateDense()
2388 @*/
2389 PetscErrorCode  MatCreateDenseCUDA(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
2390 {
2391   PetscFunctionBegin;
2392   PetscCall(MatCreate(comm,A));
2393   PetscValidLogicalCollectiveBool(*A,!!data,6);
2394   PetscCall(MatSetSizes(*A,m,n,M,N));
2395   PetscCall(MatSetType(*A,MATDENSECUDA));
2396   PetscCall(MatSeqDenseCUDASetPreallocation(*A,data));
2397   PetscCall(MatMPIDenseCUDASetPreallocation(*A,data));
2398   PetscFunctionReturn(0);
2399 }
2400 #endif
2401 
2402 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
2403 {
2404   Mat            mat;
2405   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
2406 
2407   PetscFunctionBegin;
2408   *newmat = NULL;
2409   PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&mat));
2410   PetscCall(MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N));
2411   PetscCall(MatSetType(mat,((PetscObject)A)->type_name));
2412   a       = (Mat_MPIDense*)mat->data;
2413 
2414   mat->factortype   = A->factortype;
2415   mat->assembled    = PETSC_TRUE;
2416   mat->preallocated = PETSC_TRUE;
2417 
2418   mat->insertmode = NOT_SET_VALUES;
2419   a->donotstash   = oldmat->donotstash;
2420 
2421   PetscCall(PetscLayoutReference(A->rmap,&mat->rmap));
2422   PetscCall(PetscLayoutReference(A->cmap,&mat->cmap));
2423 
2424   PetscCall(MatDuplicate(oldmat->A,cpvalues,&a->A));
2425   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A));
2426 
2427   *newmat = mat;
2428   PetscFunctionReturn(0);
2429 }
2430 
2431 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
2432 {
2433   PetscBool      isbinary;
2434 #if defined(PETSC_HAVE_HDF5)
2435   PetscBool      ishdf5;
2436 #endif
2437 
2438   PetscFunctionBegin;
2439   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
2440   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
2441   /* force binary viewer to load .info file if it has not yet done so */
2442   PetscCall(PetscViewerSetUp(viewer));
2443   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
2444 #if defined(PETSC_HAVE_HDF5)
2445   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5));
2446 #endif
2447   if (isbinary) {
2448     PetscCall(MatLoad_Dense_Binary(newMat,viewer));
2449 #if defined(PETSC_HAVE_HDF5)
2450   } else if (ishdf5) {
2451     PetscCall(MatLoad_Dense_HDF5(newMat,viewer));
2452 #endif
2453   } else SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
2454   PetscFunctionReturn(0);
2455 }
2456 
2457 static PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool  *flag)
2458 {
2459   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
2460   Mat            a,b;
2461   PetscBool      flg;
2462 
2463   PetscFunctionBegin;
2464   a    = matA->A;
2465   b    = matB->A;
2466   PetscCall(MatEqual(a,b,&flg));
2467   PetscCall(MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A)));
2468   PetscFunctionReturn(0);
2469 }
2470 
2471 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data)
2472 {
2473   Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data;
2474 
2475   PetscFunctionBegin;
2476   PetscCall(PetscFree2(atb->sendbuf,atb->recvcounts));
2477   PetscCall(MatDestroy(&atb->atb));
2478   PetscCall(PetscFree(atb));
2479   PetscFunctionReturn(0);
2480 }
2481 
2482 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data)
2483 {
2484   Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data;
2485 
2486   PetscFunctionBegin;
2487   PetscCall(PetscFree2(abt->buf[0],abt->buf[1]));
2488   PetscCall(PetscFree2(abt->recvcounts,abt->recvdispls));
2489   PetscCall(PetscFree(abt));
2490   PetscFunctionReturn(0);
2491 }
2492 
2493 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2494 {
2495   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2496   Mat_TransMatMultDense *atb;
2497   MPI_Comm              comm;
2498   PetscMPIInt           size,*recvcounts;
2499   PetscScalar           *carray,*sendbuf;
2500   const PetscScalar     *atbarray;
2501   PetscInt              i,cN=C->cmap->N,proc,k,j,lda;
2502   const PetscInt        *ranges;
2503 
2504   PetscFunctionBegin;
2505   MatCheckProduct(C,3);
2506   PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2507   atb = (Mat_TransMatMultDense *)C->product->data;
2508   recvcounts = atb->recvcounts;
2509   sendbuf = atb->sendbuf;
2510 
2511   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
2512   PetscCallMPI(MPI_Comm_size(comm,&size));
2513 
2514   /* compute atbarray = aseq^T * bseq */
2515   PetscCall(MatTransposeMatMult(a->A,b->A,atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX,PETSC_DEFAULT,&atb->atb));
2516 
2517   PetscCall(MatGetOwnershipRanges(C,&ranges));
2518 
2519   /* arrange atbarray into sendbuf */
2520   PetscCall(MatDenseGetArrayRead(atb->atb,&atbarray));
2521   PetscCall(MatDenseGetLDA(atb->atb,&lda));
2522   for (proc=0, k=0; proc<size; proc++) {
2523     for (j=0; j<cN; j++) {
2524       for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*lda];
2525     }
2526   }
2527   PetscCall(MatDenseRestoreArrayRead(atb->atb,&atbarray));
2528 
2529   /* sum all atbarray to local values of C */
2530   PetscCall(MatDenseGetArrayWrite(c->A,&carray));
2531   PetscCallMPI(MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm));
2532   PetscCall(MatDenseRestoreArrayWrite(c->A,&carray));
2533   PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY));
2534   PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY));
2535   PetscFunctionReturn(0);
2536 }
2537 
2538 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)
2539 {
2540   MPI_Comm              comm;
2541   PetscMPIInt           size;
2542   PetscInt              cm=A->cmap->n,cM,cN=B->cmap->N;
2543   Mat_TransMatMultDense *atb;
2544   PetscBool             cisdense;
2545   PetscInt              i;
2546   const PetscInt        *ranges;
2547 
2548   PetscFunctionBegin;
2549   MatCheckProduct(C,3);
2550   PetscCheck(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2551   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
2552   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
2553     SETERRQ(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2554   }
2555 
2556   /* create matrix product C */
2557   PetscCall(MatSetSizes(C,cm,B->cmap->n,A->cmap->N,B->cmap->N));
2558   PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,""));
2559   if (!cisdense) {
2560     PetscCall(MatSetType(C,((PetscObject)A)->type_name));
2561   }
2562   PetscCall(MatSetUp(C));
2563 
2564   /* create data structure for reuse C */
2565   PetscCallMPI(MPI_Comm_size(comm,&size));
2566   PetscCall(PetscNew(&atb));
2567   cM   = C->rmap->N;
2568   PetscCall(PetscMalloc2(cM*cN,&atb->sendbuf,size,&atb->recvcounts));
2569   PetscCall(MatGetOwnershipRanges(C,&ranges));
2570   for (i=0; i<size; i++) atb->recvcounts[i] = (ranges[i+1] - ranges[i])*cN;
2571 
2572   C->product->data    = atb;
2573   C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
2574   PetscFunctionReturn(0);
2575 }
2576 
2577 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2578 {
2579   MPI_Comm              comm;
2580   PetscMPIInt           i, size;
2581   PetscInt              maxRows, bufsiz;
2582   PetscMPIInt           tag;
2583   PetscInt              alg;
2584   Mat_MatTransMultDense *abt;
2585   Mat_Product           *product = C->product;
2586   PetscBool             flg;
2587 
2588   PetscFunctionBegin;
2589   MatCheckProduct(C,4);
2590   PetscCheck(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2591   /* check local size of A and B */
2592   PetscCheck(A->cmap->n == B->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")",A->cmap->n,B->cmap->n);
2593 
2594   PetscCall(PetscStrcmp(product->alg,"allgatherv",&flg));
2595   alg  = flg ? 0 : 1;
2596 
2597   /* setup matrix product C */
2598   PetscCall(MatSetSizes(C,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N));
2599   PetscCall(MatSetType(C,MATMPIDENSE));
2600   PetscCall(MatSetUp(C));
2601   PetscCall(PetscObjectGetNewTag((PetscObject)C,&tag));
2602 
2603   /* create data structure for reuse C */
2604   PetscCall(PetscObjectGetComm((PetscObject)C,&comm));
2605   PetscCallMPI(MPI_Comm_size(comm,&size));
2606   PetscCall(PetscNew(&abt));
2607   abt->tag = tag;
2608   abt->alg = alg;
2609   switch (alg) {
2610   case 1: /* alg: "cyclic" */
2611     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i]));
2612     bufsiz = A->cmap->N * maxRows;
2613     PetscCall(PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1])));
2614     break;
2615   default: /* alg: "allgatherv" */
2616     PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1])));
2617     PetscCall(PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls)));
2618     for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N;
2619     for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i];
2620     break;
2621   }
2622 
2623   C->product->data    = abt;
2624   C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
2625   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense;
2626   PetscFunctionReturn(0);
2627 }
2628 
2629 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
2630 {
2631   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2632   Mat_MatTransMultDense *abt;
2633   MPI_Comm              comm;
2634   PetscMPIInt           rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom;
2635   PetscScalar           *sendbuf, *recvbuf=NULL, *cv;
2636   PetscInt              i,cK=A->cmap->N,k,j,bn;
2637   PetscScalar           _DOne=1.0,_DZero=0.0;
2638   const PetscScalar     *av,*bv;
2639   PetscBLASInt          cm, cn, ck, alda, blda = 0, clda;
2640   MPI_Request           reqs[2];
2641   const PetscInt        *ranges;
2642 
2643   PetscFunctionBegin;
2644   MatCheckProduct(C,3);
2645   PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2646   abt  = (Mat_MatTransMultDense*)C->product->data;
2647   PetscCall(PetscObjectGetComm((PetscObject)C,&comm));
2648   PetscCallMPI(MPI_Comm_rank(comm,&rank));
2649   PetscCallMPI(MPI_Comm_size(comm,&size));
2650   PetscCall(MatDenseGetArrayRead(a->A,&av));
2651   PetscCall(MatDenseGetArrayRead(b->A,&bv));
2652   PetscCall(MatDenseGetArrayWrite(c->A,&cv));
2653   PetscCall(MatDenseGetLDA(a->A,&i));
2654   PetscCall(PetscBLASIntCast(i,&alda));
2655   PetscCall(MatDenseGetLDA(b->A,&i));
2656   PetscCall(PetscBLASIntCast(i,&blda));
2657   PetscCall(MatDenseGetLDA(c->A,&i));
2658   PetscCall(PetscBLASIntCast(i,&clda));
2659   PetscCall(MatGetOwnershipRanges(B,&ranges));
2660   bn   = B->rmap->n;
2661   if (blda == bn) {
2662     sendbuf = (PetscScalar*)bv;
2663   } else {
2664     sendbuf = abt->buf[0];
2665     for (k = 0, i = 0; i < cK; i++) {
2666       for (j = 0; j < bn; j++, k++) {
2667         sendbuf[k] = bv[i * blda + j];
2668       }
2669     }
2670   }
2671   if (size > 1) {
2672     sendto = (rank + size - 1) % size;
2673     recvfrom = (rank + size + 1) % size;
2674   } else {
2675     sendto = recvfrom = 0;
2676   }
2677   PetscCall(PetscBLASIntCast(cK,&ck));
2678   PetscCall(PetscBLASIntCast(c->A->rmap->n,&cm));
2679   recvisfrom = rank;
2680   for (i = 0; i < size; i++) {
2681     /* we have finished receiving in sending, bufs can be read/modified */
2682     PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2683     PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
2684 
2685     if (nextrecvisfrom != rank) {
2686       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2687       sendsiz = cK * bn;
2688       recvsiz = cK * nextbn;
2689       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
2690       PetscCallMPI(MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]));
2691       PetscCallMPI(MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]));
2692     }
2693 
2694     /* local aseq * sendbuf^T */
2695     PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn));
2696     if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,av,&alda,sendbuf,&cn,&_DZero,cv + clda * ranges[recvisfrom],&clda));
2697 
2698     if (nextrecvisfrom != rank) {
2699       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
2700       PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE));
2701     }
2702     bn = nextbn;
2703     recvisfrom = nextrecvisfrom;
2704     sendbuf = recvbuf;
2705   }
2706   PetscCall(MatDenseRestoreArrayRead(a->A,&av));
2707   PetscCall(MatDenseRestoreArrayRead(b->A,&bv));
2708   PetscCall(MatDenseRestoreArrayWrite(c->A,&cv));
2709   PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY));
2710   PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY));
2711   PetscFunctionReturn(0);
2712 }
2713 
2714 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
2715 {
2716   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2717   Mat_MatTransMultDense *abt;
2718   MPI_Comm              comm;
2719   PetscMPIInt           size;
2720   PetscScalar           *cv, *sendbuf, *recvbuf;
2721   const PetscScalar     *av,*bv;
2722   PetscInt              blda,i,cK=A->cmap->N,k,j,bn;
2723   PetscScalar           _DOne=1.0,_DZero=0.0;
2724   PetscBLASInt          cm, cn, ck, alda, clda;
2725 
2726   PetscFunctionBegin;
2727   MatCheckProduct(C,3);
2728   PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2729   abt  = (Mat_MatTransMultDense*)C->product->data;
2730   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
2731   PetscCallMPI(MPI_Comm_size(comm,&size));
2732   PetscCall(MatDenseGetArrayRead(a->A,&av));
2733   PetscCall(MatDenseGetArrayRead(b->A,&bv));
2734   PetscCall(MatDenseGetArrayWrite(c->A,&cv));
2735   PetscCall(MatDenseGetLDA(a->A,&i));
2736   PetscCall(PetscBLASIntCast(i,&alda));
2737   PetscCall(MatDenseGetLDA(b->A,&blda));
2738   PetscCall(MatDenseGetLDA(c->A,&i));
2739   PetscCall(PetscBLASIntCast(i,&clda));
2740   /* copy transpose of B into buf[0] */
2741   bn      = B->rmap->n;
2742   sendbuf = abt->buf[0];
2743   recvbuf = abt->buf[1];
2744   for (k = 0, j = 0; j < bn; j++) {
2745     for (i = 0; i < cK; i++, k++) {
2746       sendbuf[k] = bv[i * blda + j];
2747     }
2748   }
2749   PetscCall(MatDenseRestoreArrayRead(b->A,&bv));
2750   PetscCallMPI(MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm));
2751   PetscCall(PetscBLASIntCast(cK,&ck));
2752   PetscCall(PetscBLASIntCast(c->A->rmap->n,&cm));
2753   PetscCall(PetscBLASIntCast(c->A->cmap->n,&cn));
2754   if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,av,&alda,recvbuf,&ck,&_DZero,cv,&clda));
2755   PetscCall(MatDenseRestoreArrayRead(a->A,&av));
2756   PetscCall(MatDenseRestoreArrayRead(b->A,&bv));
2757   PetscCall(MatDenseRestoreArrayWrite(c->A,&cv));
2758   PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY));
2759   PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY));
2760   PetscFunctionReturn(0);
2761 }
2762 
2763 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2764 {
2765   Mat_MatTransMultDense *abt;
2766 
2767   PetscFunctionBegin;
2768   MatCheckProduct(C,3);
2769   PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2770   abt = (Mat_MatTransMultDense*)C->product->data;
2771   switch (abt->alg) {
2772   case 1:
2773     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C));
2774     break;
2775   default:
2776     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C));
2777     break;
2778   }
2779   PetscFunctionReturn(0);
2780 }
2781 
2782 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data)
2783 {
2784   Mat_MatMultDense *ab = (Mat_MatMultDense*)data;
2785 
2786   PetscFunctionBegin;
2787   PetscCall(MatDestroy(&ab->Ce));
2788   PetscCall(MatDestroy(&ab->Ae));
2789   PetscCall(MatDestroy(&ab->Be));
2790   PetscCall(PetscFree(ab));
2791   PetscFunctionReturn(0);
2792 }
2793 
2794 #if defined(PETSC_HAVE_ELEMENTAL)
2795 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2796 {
2797   Mat_MatMultDense *ab;
2798 
2799   PetscFunctionBegin;
2800   MatCheckProduct(C,3);
2801   PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Missing product data");
2802   ab   = (Mat_MatMultDense*)C->product->data;
2803   PetscCall(MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae));
2804   PetscCall(MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be));
2805   PetscCall(MatMatMultNumeric_Elemental(ab->Ae,ab->Be,ab->Ce));
2806   PetscCall(MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C));
2807   PetscFunctionReturn(0);
2808 }
2809 
2810 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)
2811 {
2812   Mat              Ae,Be,Ce;
2813   Mat_MatMultDense *ab;
2814 
2815   PetscFunctionBegin;
2816   MatCheckProduct(C,4);
2817   PetscCheck(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2818   /* check local size of A and B */
2819   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
2820     SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2821   }
2822 
2823   /* create elemental matrices Ae and Be */
2824   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Ae));
2825   PetscCall(MatSetSizes(Ae,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N));
2826   PetscCall(MatSetType(Ae,MATELEMENTAL));
2827   PetscCall(MatSetUp(Ae));
2828   PetscCall(MatSetOption(Ae,MAT_ROW_ORIENTED,PETSC_FALSE));
2829 
2830   PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &Be));
2831   PetscCall(MatSetSizes(Be,PETSC_DECIDE,PETSC_DECIDE,B->rmap->N,B->cmap->N));
2832   PetscCall(MatSetType(Be,MATELEMENTAL));
2833   PetscCall(MatSetUp(Be));
2834   PetscCall(MatSetOption(Be,MAT_ROW_ORIENTED,PETSC_FALSE));
2835 
2836   /* compute symbolic Ce = Ae*Be */
2837   PetscCall(MatCreate(PetscObjectComm((PetscObject)C),&Ce));
2838   PetscCall(MatMatMultSymbolic_Elemental(Ae,Be,fill,Ce));
2839 
2840   /* setup C */
2841   PetscCall(MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE));
2842   PetscCall(MatSetType(C,MATDENSE));
2843   PetscCall(MatSetUp(C));
2844 
2845   /* create data structure for reuse Cdense */
2846   PetscCall(PetscNew(&ab));
2847   ab->Ae = Ae;
2848   ab->Be = Be;
2849   ab->Ce = Ce;
2850 
2851   C->product->data    = ab;
2852   C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense;
2853   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
2854   PetscFunctionReturn(0);
2855 }
2856 #endif
2857 /* ----------------------------------------------- */
2858 #if defined(PETSC_HAVE_ELEMENTAL)
2859 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C)
2860 {
2861   PetscFunctionBegin;
2862   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense;
2863   C->ops->productsymbolic = MatProductSymbolic_AB;
2864   PetscFunctionReturn(0);
2865 }
2866 #endif
2867 
2868 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C)
2869 {
2870   Mat_Product *product = C->product;
2871   Mat         A = product->A,B=product->B;
2872 
2873   PetscFunctionBegin;
2874   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend)
2875     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2876   C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense;
2877   C->ops->productsymbolic = MatProductSymbolic_AtB;
2878   PetscFunctionReturn(0);
2879 }
2880 
2881 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C)
2882 {
2883   Mat_Product    *product = C->product;
2884   const char     *algTypes[2] = {"allgatherv","cyclic"};
2885   PetscInt       alg,nalg = 2;
2886   PetscBool      flg = PETSC_FALSE;
2887 
2888   PetscFunctionBegin;
2889   /* Set default algorithm */
2890   alg = 0; /* default is allgatherv */
2891   PetscCall(PetscStrcmp(product->alg,"default",&flg));
2892   if (flg) {
2893     PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]));
2894   }
2895 
2896   /* Get runtime option */
2897   if (product->api_user) {
2898     PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatTransposeMult","Mat");
2899     PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[alg],&alg,&flg));
2900     PetscOptionsEnd();
2901   } else {
2902     PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABt","Mat");
2903     PetscCall(PetscOptionsEList("-mat_product_algorithm","Algorithmic approach","MatProduct_ABt",algTypes,nalg,algTypes[alg],&alg,&flg));
2904     PetscOptionsEnd();
2905   }
2906   if (flg) {
2907     PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]));
2908   }
2909 
2910   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense;
2911   C->ops->productsymbolic          = MatProductSymbolic_ABt;
2912   PetscFunctionReturn(0);
2913 }
2914 
2915 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C)
2916 {
2917   Mat_Product    *product = C->product;
2918 
2919   PetscFunctionBegin;
2920   switch (product->type) {
2921 #if defined(PETSC_HAVE_ELEMENTAL)
2922   case MATPRODUCT_AB:
2923     PetscCall(MatProductSetFromOptions_MPIDense_AB(C));
2924     break;
2925 #endif
2926   case MATPRODUCT_AtB:
2927     PetscCall(MatProductSetFromOptions_MPIDense_AtB(C));
2928     break;
2929   case MATPRODUCT_ABt:
2930     PetscCall(MatProductSetFromOptions_MPIDense_ABt(C));
2931     break;
2932   default:
2933     break;
2934   }
2935   PetscFunctionReturn(0);
2936 }
2937