xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 89669be4d29968dc8d4c19ce1b69194a6a561ea4)
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4 */
5 
6 #include <../src/mat/impls/dense/mpi/mpidense.h>    /*I   "petscmat.h"  I*/
7 #include <../src/mat/impls/aij/mpi/mpiaij.h>
8 #include <petscblaslapack.h>
9 
10 /*@
11 
12       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
13               matrix that represents the operator. For sequential matrices it returns itself.
14 
15     Input Parameter:
16 .      A - the Seq or MPI dense matrix
17 
18     Output Parameter:
19 .      B - the inner matrix
20 
21     Level: intermediate
22 
23 @*/
24 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
25 {
26   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
27   PetscBool      flg;
28 
29   PetscFunctionBegin;
30   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
31   PetscValidPointer(B,2);
32   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A,MATMPIDENSE,&flg));
33   if (flg) *B = mat->A;
34   else {
35     PetscCall(PetscObjectBaseTypeCompare((PetscObject)A,MATSEQDENSE,&flg));
36     PetscCheck(flg,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Not for matrix type %s",((PetscObject)A)->type_name);
37     *B = A;
38   }
39   PetscFunctionReturn(0);
40 }
41 
42 PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s)
43 {
44   Mat_MPIDense   *Amat = (Mat_MPIDense*)A->data;
45   Mat_MPIDense   *Bmat = (Mat_MPIDense*)B->data;
46 
47   PetscFunctionBegin;
48   PetscCall(MatCopy(Amat->A,Bmat->A,s));
49   PetscFunctionReturn(0);
50 }
51 
52 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
53 {
54   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
55   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
56 
57   PetscFunctionBegin;
58   PetscCheck(row >= rstart && row < rend,PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
59   lrow = row - rstart;
60   PetscCall(MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v));
61   PetscFunctionReturn(0);
62 }
63 
64 PetscErrorCode MatRestoreRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
65 {
66   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
67   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
68 
69   PetscFunctionBegin;
70   PetscCheck(row >= rstart && row < rend,PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
71   lrow = row - rstart;
72   PetscCall(MatRestoreRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v));
73   PetscFunctionReturn(0);
74 }
75 
76 PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
77 {
78   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
79   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
80   PetscScalar    *array;
81   MPI_Comm       comm;
82   PetscBool      flg;
83   Mat            B;
84 
85   PetscFunctionBegin;
86   PetscCall(MatHasCongruentLayouts(A,&flg));
87   PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
88   PetscCall(PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B));
89   if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */
90 
91     PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A,MATSEQDENSECUDA,&flg));
92     PetscCheck(!flg,PETSC_COMM_SELF,PETSC_ERR_SUP,"Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature",MATSEQDENSECUDA);
93     PetscCall(PetscObjectGetComm((PetscObject)(mdn->A),&comm));
94     PetscCall(MatCreate(comm,&B));
95     PetscCall(MatSetSizes(B,m,m,m,m));
96     PetscCall(MatSetType(B,((PetscObject)mdn->A)->type_name));
97     PetscCall(MatDenseGetArrayRead(mdn->A,(const PetscScalar**)&array));
98     PetscCall(MatSeqDenseSetPreallocation(B,array+m*rstart));
99     PetscCall(MatDenseRestoreArrayRead(mdn->A,(const PetscScalar**)&array));
100     PetscCall(PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B));
101     *a   = B;
102     PetscCall(MatDestroy(&B));
103   } else *a = B;
104   PetscFunctionReturn(0);
105 }
106 
107 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
108 {
109   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
110   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
111   PetscBool      roworiented = A->roworiented;
112 
113   PetscFunctionBegin;
114   for (i=0; i<m; i++) {
115     if (idxm[i] < 0) continue;
116     PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
117     if (idxm[i] >= rstart && idxm[i] < rend) {
118       row = idxm[i] - rstart;
119       if (roworiented) {
120         PetscCall(MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv));
121       } else {
122         for (j=0; j<n; j++) {
123           if (idxn[j] < 0) continue;
124           PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
125           PetscCall(MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv));
126         }
127       }
128     } else if (!A->donotstash) {
129       mat->assembled = PETSC_FALSE;
130       if (roworiented) {
131         PetscCall(MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE));
132       } else {
133         PetscCall(MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE));
134       }
135     }
136   }
137   PetscFunctionReturn(0);
138 }
139 
140 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
141 {
142   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
143   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
144 
145   PetscFunctionBegin;
146   for (i=0; i<m; i++) {
147     if (idxm[i] < 0) continue; /* negative row */
148     PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
149     if (idxm[i] >= rstart && idxm[i] < rend) {
150       row = idxm[i] - rstart;
151       for (j=0; j<n; j++) {
152         if (idxn[j] < 0) continue; /* negative column */
153         PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
154         PetscCall(MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j));
155       }
156     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
157   }
158   PetscFunctionReturn(0);
159 }
160 
161 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda)
162 {
163   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
164 
165   PetscFunctionBegin;
166   PetscCall(MatDenseGetLDA(a->A,lda));
167   PetscFunctionReturn(0);
168 }
169 
170 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A,PetscInt lda)
171 {
172   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
173   PetscBool      iscuda;
174 
175   PetscFunctionBegin;
176   if (!a->A) {
177     PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
178     PetscCall(PetscLayoutSetUp(A->rmap));
179     PetscCall(PetscLayoutSetUp(A->cmap));
180     PetscCall(MatCreate(PETSC_COMM_SELF,&a->A));
181     PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->A));
182     PetscCall(MatSetSizes(a->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N));
183     PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda));
184     PetscCall(MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE));
185   }
186   PetscCall(MatDenseSetLDA(a->A,lda));
187   PetscFunctionReturn(0);
188 }
189 
190 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar **array)
191 {
192   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
193 
194   PetscFunctionBegin;
195   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
196   PetscCall(MatDenseGetArray(a->A,array));
197   PetscFunctionReturn(0);
198 }
199 
200 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar **array)
201 {
202   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
203 
204   PetscFunctionBegin;
205   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
206   PetscCall(MatDenseGetArrayRead(a->A,array));
207   PetscFunctionReturn(0);
208 }
209 
210 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A,PetscScalar **array)
211 {
212   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
213 
214   PetscFunctionBegin;
215   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
216   PetscCall(MatDenseGetArrayWrite(a->A,array));
217   PetscFunctionReturn(0);
218 }
219 
220 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar *array)
221 {
222   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
223 
224   PetscFunctionBegin;
225   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
226   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
227   PetscCall(MatDensePlaceArray(a->A,array));
228   PetscFunctionReturn(0);
229 }
230 
231 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
232 {
233   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
234 
235   PetscFunctionBegin;
236   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
237   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
238   PetscCall(MatDenseResetArray(a->A));
239   PetscFunctionReturn(0);
240 }
241 
242 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A,const PetscScalar *array)
243 {
244   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
245 
246   PetscFunctionBegin;
247   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
248   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
249   PetscCall(MatDenseReplaceArray(a->A,array));
250   PetscFunctionReturn(0);
251 }
252 
253 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
254 {
255   Mat_MPIDense      *mat  = (Mat_MPIDense*)A->data,*newmatd;
256   PetscInt          lda,i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
257   const PetscInt    *irow,*icol;
258   const PetscScalar *v;
259   PetscScalar       *bv;
260   Mat               newmat;
261   IS                iscol_local;
262   MPI_Comm          comm_is,comm_mat;
263 
264   PetscFunctionBegin;
265   PetscCall(PetscObjectGetComm((PetscObject)A,&comm_mat));
266   PetscCall(PetscObjectGetComm((PetscObject)iscol,&comm_is));
267   PetscCheck(comm_mat == comm_is,PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator");
268 
269   PetscCall(ISAllGather(iscol,&iscol_local));
270   PetscCall(ISGetIndices(isrow,&irow));
271   PetscCall(ISGetIndices(iscol_local,&icol));
272   PetscCall(ISGetLocalSize(isrow,&nrows));
273   PetscCall(ISGetLocalSize(iscol,&ncols));
274   PetscCall(ISGetSize(iscol,&Ncols)); /* global number of columns, size of iscol_local */
275 
276   /* No parallel redistribution currently supported! Should really check each index set
277      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
278      original matrix! */
279 
280   PetscCall(MatGetLocalSize(A,&nlrows,&nlcols));
281   PetscCall(MatGetOwnershipRange(A,&rstart,&rend));
282 
283   /* Check submatrix call */
284   if (scall == MAT_REUSE_MATRIX) {
285     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
286     /* Really need to test rows and column sizes! */
287     newmat = *B;
288   } else {
289     /* Create and fill new matrix */
290     PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&newmat));
291     PetscCall(MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols));
292     PetscCall(MatSetType(newmat,((PetscObject)A)->type_name));
293     PetscCall(MatMPIDenseSetPreallocation(newmat,NULL));
294   }
295 
296   /* Now extract the data pointers and do the copy, column at a time */
297   newmatd = (Mat_MPIDense*)newmat->data;
298   PetscCall(MatDenseGetArray(newmatd->A,&bv));
299   PetscCall(MatDenseGetArrayRead(mat->A,&v));
300   PetscCall(MatDenseGetLDA(mat->A,&lda));
301   for (i=0; i<Ncols; i++) {
302     const PetscScalar *av = v + lda*icol[i];
303     for (j=0; j<nrows; j++) {
304       *bv++ = av[irow[j] - rstart];
305     }
306   }
307   PetscCall(MatDenseRestoreArrayRead(mat->A,&v));
308   PetscCall(MatDenseRestoreArray(newmatd->A,&bv));
309 
310   /* Assemble the matrices so that the correct flags are set */
311   PetscCall(MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY));
312   PetscCall(MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY));
313 
314   /* Free work space */
315   PetscCall(ISRestoreIndices(isrow,&irow));
316   PetscCall(ISRestoreIndices(iscol_local,&icol));
317   PetscCall(ISDestroy(&iscol_local));
318   *B   = newmat;
319   PetscFunctionReturn(0);
320 }
321 
322 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar **array)
323 {
324   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
325 
326   PetscFunctionBegin;
327   PetscCall(MatDenseRestoreArray(a->A,array));
328   PetscFunctionReturn(0);
329 }
330 
331 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar **array)
332 {
333   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
334 
335   PetscFunctionBegin;
336   PetscCall(MatDenseRestoreArrayRead(a->A,array));
337   PetscFunctionReturn(0);
338 }
339 
340 PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A,PetscScalar **array)
341 {
342   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
343 
344   PetscFunctionBegin;
345   PetscCall(MatDenseRestoreArrayWrite(a->A,array));
346   PetscFunctionReturn(0);
347 }
348 
349 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
350 {
351   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
352   PetscInt       nstash,reallocs;
353 
354   PetscFunctionBegin;
355   if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
356 
357   PetscCall(MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range));
358   PetscCall(MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs));
359   PetscCall(PetscInfo(mdn->A,"Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs));
360   PetscFunctionReturn(0);
361 }
362 
363 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
364 {
365   Mat_MPIDense   *mdn=(Mat_MPIDense*)mat->data;
366   PetscInt       i,*row,*col,flg,j,rstart,ncols;
367   PetscMPIInt    n;
368   PetscScalar    *val;
369 
370   PetscFunctionBegin;
371   if (!mdn->donotstash && !mat->nooffprocentries) {
372     /*  wait on receives */
373     while (1) {
374       PetscCall(MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg));
375       if (!flg) break;
376 
377       for (i=0; i<n;) {
378         /* Now identify the consecutive vals belonging to the same row */
379         for (j=i,rstart=row[j]; j<n; j++) {
380           if (row[j] != rstart) break;
381         }
382         if (j < n) ncols = j-i;
383         else       ncols = n-i;
384         /* Now assemble all these values with a single function call */
385         PetscCall(MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode));
386         i    = j;
387       }
388     }
389     PetscCall(MatStashScatterEnd_Private(&mat->stash));
390   }
391 
392   PetscCall(MatAssemblyBegin(mdn->A,mode));
393   PetscCall(MatAssemblyEnd(mdn->A,mode));
394   PetscFunctionReturn(0);
395 }
396 
397 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
398 {
399   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
400 
401   PetscFunctionBegin;
402   PetscCall(MatZeroEntries(l->A));
403   PetscFunctionReturn(0);
404 }
405 
406 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt n,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
407 {
408   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
409   PetscInt          i,len,*lrows;
410 
411   PetscFunctionBegin;
412   /* get locally owned rows */
413   PetscCall(PetscLayoutMapLocal(A->rmap,n,rows,&len,&lrows,NULL));
414   /* fix right hand side if needed */
415   if (x && b) {
416     const PetscScalar *xx;
417     PetscScalar       *bb;
418 
419     PetscCall(VecGetArrayRead(x, &xx));
420     PetscCall(VecGetArrayWrite(b, &bb));
421     for (i=0;i<len;++i) bb[lrows[i]] = diag*xx[lrows[i]];
422     PetscCall(VecRestoreArrayRead(x, &xx));
423     PetscCall(VecRestoreArrayWrite(b, &bb));
424   }
425   PetscCall(MatZeroRows(l->A,len,lrows,0.0,NULL,NULL));
426   if (diag != 0.0) {
427     Vec d;
428 
429     PetscCall(MatCreateVecs(A,NULL,&d));
430     PetscCall(VecSet(d,diag));
431     PetscCall(MatDiagonalSet(A,d,INSERT_VALUES));
432     PetscCall(VecDestroy(&d));
433   }
434   PetscCall(PetscFree(lrows));
435   PetscFunctionReturn(0);
436 }
437 
438 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec);
439 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec);
440 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec);
441 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec);
442 
443 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
444 {
445   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
446   const PetscScalar *ax;
447   PetscScalar       *ay;
448   PetscMemType      axmtype,aymtype;
449 
450   PetscFunctionBegin;
451   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
452   PetscCall(VecGetArrayReadAndMemType(xx,&ax,&axmtype));
453   PetscCall(VecGetArrayAndMemType(mdn->lvec,&ay,&aymtype));
454   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPI_REPLACE));
455   PetscCall(PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay,MPI_REPLACE));
456   PetscCall(VecRestoreArrayAndMemType(mdn->lvec,&ay));
457   PetscCall(VecRestoreArrayReadAndMemType(xx,&ax));
458   PetscCall((*mdn->A->ops->mult)(mdn->A,mdn->lvec,yy));
459   PetscFunctionReturn(0);
460 }
461 
462 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
463 {
464   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
465   const PetscScalar *ax;
466   PetscScalar       *ay;
467   PetscMemType      axmtype,aymtype;
468 
469   PetscFunctionBegin;
470   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
471   PetscCall(VecGetArrayReadAndMemType(xx,&ax,&axmtype));
472   PetscCall(VecGetArrayAndMemType(mdn->lvec,&ay,&aymtype));
473   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPI_REPLACE));
474   PetscCall(PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay,MPI_REPLACE));
475   PetscCall(VecRestoreArrayAndMemType(mdn->lvec,&ay));
476   PetscCall(VecRestoreArrayReadAndMemType(xx,&ax));
477   PetscCall((*mdn->A->ops->multadd)(mdn->A,mdn->lvec,yy,zz));
478   PetscFunctionReturn(0);
479 }
480 
481 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
482 {
483   Mat_MPIDense      *a = (Mat_MPIDense*)A->data;
484   const PetscScalar *ax;
485   PetscScalar       *ay;
486   PetscMemType      axmtype,aymtype;
487 
488   PetscFunctionBegin;
489   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
490   PetscCall(VecSet(yy,0.0));
491   PetscCall((*a->A->ops->multtranspose)(a->A,xx,a->lvec));
492   PetscCall(VecGetArrayReadAndMemType(a->lvec,&ax,&axmtype));
493   PetscCall(VecGetArrayAndMemType(yy,&ay,&aymtype));
494   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPIU_SUM));
495   PetscCall(PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM));
496   PetscCall(VecRestoreArrayReadAndMemType(a->lvec,&ax));
497   PetscCall(VecRestoreArrayAndMemType(yy,&ay));
498   PetscFunctionReturn(0);
499 }
500 
501 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
502 {
503   Mat_MPIDense      *a = (Mat_MPIDense*)A->data;
504   const PetscScalar *ax;
505   PetscScalar       *ay;
506   PetscMemType      axmtype,aymtype;
507 
508   PetscFunctionBegin;
509   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
510   PetscCall(VecCopy(yy,zz));
511   PetscCall((*a->A->ops->multtranspose)(a->A,xx,a->lvec));
512   PetscCall(VecGetArrayReadAndMemType(a->lvec,&ax,&axmtype));
513   PetscCall(VecGetArrayAndMemType(zz,&ay,&aymtype));
514   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPIU_SUM));
515   PetscCall(PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM));
516   PetscCall(VecRestoreArrayReadAndMemType(a->lvec,&ax));
517   PetscCall(VecRestoreArrayAndMemType(zz,&ay));
518   PetscFunctionReturn(0);
519 }
520 
521 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
522 {
523   Mat_MPIDense      *a    = (Mat_MPIDense*)A->data;
524   PetscInt          lda,len,i,n,m = A->rmap->n,radd;
525   PetscScalar       *x,zero = 0.0;
526   const PetscScalar *av;
527 
528   PetscFunctionBegin;
529   PetscCall(VecSet(v,zero));
530   PetscCall(VecGetArray(v,&x));
531   PetscCall(VecGetSize(v,&n));
532   PetscCheck(n == A->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
533   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
534   radd = A->rmap->rstart*m;
535   PetscCall(MatDenseGetArrayRead(a->A,&av));
536   PetscCall(MatDenseGetLDA(a->A,&lda));
537   for (i=0; i<len; i++) {
538     x[i] = av[radd + i*lda + i];
539   }
540   PetscCall(MatDenseRestoreArrayRead(a->A,&av));
541   PetscCall(VecRestoreArray(v,&x));
542   PetscFunctionReturn(0);
543 }
544 
545 PetscErrorCode MatDestroy_MPIDense(Mat mat)
546 {
547   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
548 
549   PetscFunctionBegin;
550 #if defined(PETSC_USE_LOG)
551   PetscLogObjectState((PetscObject)mat,"Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT,mat->rmap->N,mat->cmap->N);
552 #endif
553   PetscCall(MatStashDestroy_Private(&mat->stash));
554   PetscCheck(!mdn->vecinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
555   PetscCheck(!mdn->matinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
556   PetscCall(MatDestroy(&mdn->A));
557   PetscCall(VecDestroy(&mdn->lvec));
558   PetscCall(PetscSFDestroy(&mdn->Mvctx));
559   PetscCall(VecDestroy(&mdn->cvec));
560   PetscCall(MatDestroy(&mdn->cmat));
561 
562   PetscCall(PetscFree(mat->data));
563   PetscCall(PetscObjectChangeTypeName((PetscObject)mat,NULL));
564 
565   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL));
566   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",NULL));
567   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL));
568   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL));
569   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL));
570   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL));
571   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",NULL));
572   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",NULL));
573   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL));
574   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL));
575   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",NULL));
576   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",NULL));
577   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpiaij_C",NULL));
578 #if defined(PETSC_HAVE_ELEMENTAL)
579   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL));
580 #endif
581 #if defined(PETSC_HAVE_SCALAPACK)
582   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",NULL));
583 #endif
584   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL));
585   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",NULL));
586   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",NULL));
587 #if defined (PETSC_HAVE_CUDA)
588   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",NULL));
589   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",NULL));
590 #endif
591   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL));
592   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL));
593   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",NULL));
594   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",NULL));
595   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",NULL));
596   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",NULL));
597   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",NULL));
598   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",NULL));
599   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",NULL));
600   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",NULL));
601   PetscFunctionReturn(0);
602 }
603 
604 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat,PetscViewer);
605 
606 #include <petscdraw.h>
607 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
608 {
609   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
610   PetscMPIInt       rank;
611   PetscViewerType   vtype;
612   PetscBool         iascii,isdraw;
613   PetscViewer       sviewer;
614   PetscViewerFormat format;
615 
616   PetscFunctionBegin;
617   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank));
618   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
619   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw));
620   if (iascii) {
621     PetscCall(PetscViewerGetType(viewer,&vtype));
622     PetscCall(PetscViewerGetFormat(viewer,&format));
623     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
624       MatInfo info;
625       PetscCall(MatGetInfo(mat,MAT_LOCAL,&info));
626       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
627       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory));
628       PetscCall(PetscViewerFlush(viewer));
629       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
630       if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx,viewer));
631       PetscFunctionReturn(0);
632     } else if (format == PETSC_VIEWER_ASCII_INFO) {
633       PetscFunctionReturn(0);
634     }
635   } else if (isdraw) {
636     PetscDraw draw;
637     PetscBool isnull;
638 
639     PetscCall(PetscViewerDrawGetDraw(viewer,0,&draw));
640     PetscCall(PetscDrawIsNull(draw,&isnull));
641     if (isnull) PetscFunctionReturn(0);
642   }
643 
644   {
645     /* assemble the entire matrix onto first processor. */
646     Mat         A;
647     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
648     PetscInt    *cols;
649     PetscScalar *vals;
650 
651     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat),&A));
652     if (rank == 0) {
653       PetscCall(MatSetSizes(A,M,N,M,N));
654     } else {
655       PetscCall(MatSetSizes(A,0,0,M,N));
656     }
657     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
658     PetscCall(MatSetType(A,MATMPIDENSE));
659     PetscCall(MatMPIDenseSetPreallocation(A,NULL));
660     PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)A));
661 
662     /* Copy the matrix ... This isn't the most efficient means,
663        but it's quick for now */
664     A->insertmode = INSERT_VALUES;
665 
666     row = mat->rmap->rstart;
667     m   = mdn->A->rmap->n;
668     for (i=0; i<m; i++) {
669       PetscCall(MatGetRow_MPIDense(mat,row,&nz,&cols,&vals));
670       PetscCall(MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES));
671       PetscCall(MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals));
672       row++;
673     }
674 
675     PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY));
676     PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY));
677     PetscCall(PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer));
678     if (rank == 0) {
679       PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name));
680       PetscCall(MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer));
681     }
682     PetscCall(PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer));
683     PetscCall(PetscViewerFlush(viewer));
684     PetscCall(MatDestroy(&A));
685   }
686   PetscFunctionReturn(0);
687 }
688 
689 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
690 {
691   PetscBool      iascii,isbinary,isdraw,issocket;
692 
693   PetscFunctionBegin;
694   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
695   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
696   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket));
697   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw));
698 
699   if (iascii || issocket || isdraw) {
700     PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat,viewer));
701   } else if (isbinary) {
702     PetscCall(MatView_Dense_Binary(mat,viewer));
703   }
704   PetscFunctionReturn(0);
705 }
706 
707 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
708 {
709   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
710   Mat            mdn  = mat->A;
711   PetscLogDouble isend[5],irecv[5];
712 
713   PetscFunctionBegin;
714   info->block_size = 1.0;
715 
716   PetscCall(MatGetInfo(mdn,MAT_LOCAL,info));
717 
718   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
719   isend[3] = info->memory;  isend[4] = info->mallocs;
720   if (flag == MAT_LOCAL) {
721     info->nz_used      = isend[0];
722     info->nz_allocated = isend[1];
723     info->nz_unneeded  = isend[2];
724     info->memory       = isend[3];
725     info->mallocs      = isend[4];
726   } else if (flag == MAT_GLOBAL_MAX) {
727     PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)A)));
728 
729     info->nz_used      = irecv[0];
730     info->nz_allocated = irecv[1];
731     info->nz_unneeded  = irecv[2];
732     info->memory       = irecv[3];
733     info->mallocs      = irecv[4];
734   } else if (flag == MAT_GLOBAL_SUM) {
735     PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)A)));
736 
737     info->nz_used      = irecv[0];
738     info->nz_allocated = irecv[1];
739     info->nz_unneeded  = irecv[2];
740     info->memory       = irecv[3];
741     info->mallocs      = irecv[4];
742   }
743   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
744   info->fill_ratio_needed = 0;
745   info->factor_mallocs    = 0;
746   PetscFunctionReturn(0);
747 }
748 
749 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
750 {
751   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
752 
753   PetscFunctionBegin;
754   switch (op) {
755   case MAT_NEW_NONZERO_LOCATIONS:
756   case MAT_NEW_NONZERO_LOCATION_ERR:
757   case MAT_NEW_NONZERO_ALLOCATION_ERR:
758     MatCheckPreallocated(A,1);
759     PetscCall(MatSetOption(a->A,op,flg));
760     break;
761   case MAT_ROW_ORIENTED:
762     MatCheckPreallocated(A,1);
763     a->roworiented = flg;
764     PetscCall(MatSetOption(a->A,op,flg));
765     break;
766   case MAT_FORCE_DIAGONAL_ENTRIES:
767   case MAT_KEEP_NONZERO_PATTERN:
768   case MAT_USE_HASH_TABLE:
769   case MAT_SORTED_FULL:
770     PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op]));
771     break;
772   case MAT_IGNORE_OFF_PROC_ENTRIES:
773     a->donotstash = flg;
774     break;
775   case MAT_SYMMETRIC:
776   case MAT_STRUCTURALLY_SYMMETRIC:
777   case MAT_HERMITIAN:
778   case MAT_SYMMETRY_ETERNAL:
779   case MAT_IGNORE_LOWER_TRIANGULAR:
780   case MAT_IGNORE_ZERO_ENTRIES:
781     PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op]));
782     break;
783   default:
784     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
785   }
786   PetscFunctionReturn(0);
787 }
788 
789 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
790 {
791   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
792   const PetscScalar *l;
793   PetscScalar       x,*v,*vv,*r;
794   PetscInt          i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n,lda;
795 
796   PetscFunctionBegin;
797   PetscCall(MatDenseGetArray(mdn->A,&vv));
798   PetscCall(MatDenseGetLDA(mdn->A,&lda));
799   PetscCall(MatGetLocalSize(A,&s2,&s3));
800   if (ll) {
801     PetscCall(VecGetLocalSize(ll,&s2a));
802     PetscCheck(s2a == s2,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2);
803     PetscCall(VecGetArrayRead(ll,&l));
804     for (i=0; i<m; i++) {
805       x = l[i];
806       v = vv + i;
807       for (j=0; j<n; j++) { (*v) *= x; v+= lda;}
808     }
809     PetscCall(VecRestoreArrayRead(ll,&l));
810     PetscCall(PetscLogFlops(1.0*n*m));
811   }
812   if (rr) {
813     const PetscScalar *ar;
814 
815     PetscCall(VecGetLocalSize(rr,&s3a));
816     PetscCheck(s3a == s3,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3);
817     PetscCall(VecGetArrayRead(rr,&ar));
818     if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
819     PetscCall(VecGetArray(mdn->lvec,&r));
820     PetscCall(PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ar,r,MPI_REPLACE));
821     PetscCall(PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ar,r,MPI_REPLACE));
822     PetscCall(VecRestoreArrayRead(rr,&ar));
823     for (i=0; i<n; i++) {
824       x = r[i];
825       v = vv + i*lda;
826       for (j=0; j<m; j++) (*v++) *= x;
827     }
828     PetscCall(VecRestoreArray(mdn->lvec,&r));
829     PetscCall(PetscLogFlops(1.0*n*m));
830   }
831   PetscCall(MatDenseRestoreArray(mdn->A,&vv));
832   PetscFunctionReturn(0);
833 }
834 
835 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
836 {
837   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
838   PetscInt          i,j;
839   PetscMPIInt       size;
840   PetscReal         sum = 0.0;
841   const PetscScalar *av,*v;
842 
843   PetscFunctionBegin;
844   PetscCall(MatDenseGetArrayRead(mdn->A,&av));
845   v    = av;
846   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
847   if (size == 1) {
848     PetscCall(MatNorm(mdn->A,type,nrm));
849   } else {
850     if (type == NORM_FROBENIUS) {
851       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
852         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
853       }
854       PetscCall(MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A)));
855       *nrm = PetscSqrtReal(*nrm);
856       PetscCall(PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n));
857     } else if (type == NORM_1) {
858       PetscReal *tmp,*tmp2;
859       PetscCall(PetscCalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2));
860       *nrm = 0.0;
861       v    = av;
862       for (j=0; j<mdn->A->cmap->n; j++) {
863         for (i=0; i<mdn->A->rmap->n; i++) {
864           tmp[j] += PetscAbsScalar(*v);  v++;
865         }
866       }
867       PetscCall(MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A)));
868       for (j=0; j<A->cmap->N; j++) {
869         if (tmp2[j] > *nrm) *nrm = tmp2[j];
870       }
871       PetscCall(PetscFree2(tmp,tmp2));
872       PetscCall(PetscLogFlops(A->cmap->n*A->rmap->n));
873     } else if (type == NORM_INFINITY) { /* max row norm */
874       PetscReal ntemp;
875       PetscCall(MatNorm(mdn->A,type,&ntemp));
876       PetscCall(MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A)));
877     } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
878   }
879   PetscCall(MatDenseRestoreArrayRead(mdn->A,&av));
880   PetscFunctionReturn(0);
881 }
882 
883 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
884 {
885   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
886   Mat            B;
887   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
888   PetscInt       j,i,lda;
889   PetscScalar    *v;
890 
891   PetscFunctionBegin;
892   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
893     PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B));
894     PetscCall(MatSetSizes(B,A->cmap->n,A->rmap->n,N,M));
895     PetscCall(MatSetType(B,((PetscObject)A)->type_name));
896     PetscCall(MatMPIDenseSetPreallocation(B,NULL));
897   } else B = *matout;
898 
899   m    = a->A->rmap->n; n = a->A->cmap->n;
900   PetscCall(MatDenseGetArrayRead(a->A,(const PetscScalar**)&v));
901   PetscCall(MatDenseGetLDA(a->A,&lda));
902   PetscCall(PetscMalloc1(m,&rwork));
903   for (i=0; i<m; i++) rwork[i] = rstart + i;
904   for (j=0; j<n; j++) {
905     PetscCall(MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES));
906     v   += lda;
907   }
908   PetscCall(MatDenseRestoreArrayRead(a->A,(const PetscScalar**)&v));
909   PetscCall(PetscFree(rwork));
910   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
911   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
912   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
913     *matout = B;
914   } else {
915     PetscCall(MatHeaderMerge(A,&B));
916   }
917   PetscFunctionReturn(0);
918 }
919 
920 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
921 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
922 
923 PetscErrorCode MatSetUp_MPIDense(Mat A)
924 {
925   PetscFunctionBegin;
926   PetscCall(PetscLayoutSetUp(A->rmap));
927   PetscCall(PetscLayoutSetUp(A->cmap));
928   if (!A->preallocated) {
929     PetscCall(MatMPIDenseSetPreallocation(A,NULL));
930   }
931   PetscFunctionReturn(0);
932 }
933 
934 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
935 {
936   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
937 
938   PetscFunctionBegin;
939   PetscCall(MatAXPY(A->A,alpha,B->A,str));
940   PetscFunctionReturn(0);
941 }
942 
943 PetscErrorCode MatConjugate_MPIDense(Mat mat)
944 {
945   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
946 
947   PetscFunctionBegin;
948   PetscCall(MatConjugate(a->A));
949   PetscFunctionReturn(0);
950 }
951 
952 PetscErrorCode MatRealPart_MPIDense(Mat A)
953 {
954   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
955 
956   PetscFunctionBegin;
957   PetscCall(MatRealPart(a->A));
958   PetscFunctionReturn(0);
959 }
960 
961 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
962 {
963   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
964 
965   PetscFunctionBegin;
966   PetscCall(MatImaginaryPart(a->A));
967   PetscFunctionReturn(0);
968 }
969 
970 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col)
971 {
972   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
973 
974   PetscFunctionBegin;
975   PetscCheck(a->A,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing local matrix");
976   PetscCheck(a->A->ops->getcolumnvector,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing get column operation");
977   PetscCall((*a->A->ops->getcolumnvector)(a->A,v,col));
978   PetscFunctionReturn(0);
979 }
980 
981 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat,PetscInt,PetscReal*);
982 
983 PetscErrorCode MatGetColumnReductions_MPIDense(Mat A,PetscInt type,PetscReal *reductions)
984 {
985   PetscInt       i,m,n;
986   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
987   PetscReal      *work;
988 
989   PetscFunctionBegin;
990   PetscCall(MatGetSize(A,&m,&n));
991   PetscCall(PetscMalloc1(n,&work));
992   if (type == REDUCTION_MEAN_REALPART) {
993     PetscCall(MatGetColumnReductions_SeqDense(a->A,(PetscInt)REDUCTION_SUM_REALPART,work));
994   } else if (type == REDUCTION_MEAN_IMAGINARYPART) {
995     PetscCall(MatGetColumnReductions_SeqDense(a->A,(PetscInt)REDUCTION_SUM_IMAGINARYPART,work));
996   } else {
997     PetscCall(MatGetColumnReductions_SeqDense(a->A,type,work));
998   }
999   if (type == NORM_2) {
1000     for (i=0; i<n; i++) work[i] *= work[i];
1001   }
1002   if (type == NORM_INFINITY) {
1003     PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_MAX,A->hdr.comm));
1004   } else {
1005     PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_SUM,A->hdr.comm));
1006   }
1007   PetscCall(PetscFree(work));
1008   if (type == NORM_2) {
1009     for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
1010   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
1011     for (i=0; i<n; i++) reductions[i] /= m;
1012   }
1013   PetscFunctionReturn(0);
1014 }
1015 
1016 #if defined(PETSC_HAVE_CUDA)
1017 static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1018 {
1019   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1020   PetscInt       lda;
1021 
1022   PetscFunctionBegin;
1023   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1024   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1025   if (!a->cvec) {
1026     PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec));
1027     PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec));
1028   }
1029   a->vecinuse = col + 1;
1030   PetscCall(MatDenseGetLDA(a->A,&lda));
1031   PetscCall(MatDenseCUDAGetArray(a->A,(PetscScalar**)&a->ptrinuse));
1032   PetscCall(VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda));
1033   *v   = a->cvec;
1034   PetscFunctionReturn(0);
1035 }
1036 
1037 static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1038 {
1039   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1040 
1041   PetscFunctionBegin;
1042   PetscCheck(a->vecinuse,PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1043   PetscCheck(a->cvec,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1044   a->vecinuse = 0;
1045   PetscCall(MatDenseCUDARestoreArray(a->A,(PetscScalar**)&a->ptrinuse));
1046   PetscCall(VecCUDAResetArray(a->cvec));
1047   *v   = NULL;
1048   PetscFunctionReturn(0);
1049 }
1050 
1051 static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1052 {
1053   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1054   PetscInt       lda;
1055 
1056   PetscFunctionBegin;
1057   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1058   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1059   if (!a->cvec) {
1060     PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec));
1061     PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec));
1062   }
1063   a->vecinuse = col + 1;
1064   PetscCall(MatDenseGetLDA(a->A,&lda));
1065   PetscCall(MatDenseCUDAGetArrayRead(a->A,&a->ptrinuse));
1066   PetscCall(VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda));
1067   PetscCall(VecLockReadPush(a->cvec));
1068   *v   = a->cvec;
1069   PetscFunctionReturn(0);
1070 }
1071 
1072 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1073 {
1074   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1075 
1076   PetscFunctionBegin;
1077   PetscCheck(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1078   PetscCheck(a->cvec,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1079   a->vecinuse = 0;
1080   PetscCall(MatDenseCUDARestoreArrayRead(a->A,&a->ptrinuse));
1081   PetscCall(VecLockReadPop(a->cvec));
1082   PetscCall(VecCUDAResetArray(a->cvec));
1083   *v   = NULL;
1084   PetscFunctionReturn(0);
1085 }
1086 
1087 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1088 {
1089   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1090   PetscInt       lda;
1091 
1092   PetscFunctionBegin;
1093   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1094   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1095   if (!a->cvec) {
1096     PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec));
1097     PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec));
1098   }
1099   a->vecinuse = col + 1;
1100   PetscCall(MatDenseGetLDA(a->A,&lda));
1101   PetscCall(MatDenseCUDAGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse));
1102   PetscCall(VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda));
1103   *v   = a->cvec;
1104   PetscFunctionReturn(0);
1105 }
1106 
1107 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1108 {
1109   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1110 
1111   PetscFunctionBegin;
1112   PetscCheck(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1113   PetscCheck(a->cvec,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1114   a->vecinuse = 0;
1115   PetscCall(MatDenseCUDARestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse));
1116   PetscCall(VecCUDAResetArray(a->cvec));
1117   *v   = NULL;
1118   PetscFunctionReturn(0);
1119 }
1120 
1121 static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1122 {
1123   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1124 
1125   PetscFunctionBegin;
1126   PetscCheck(!l->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1127   PetscCheck(!l->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1128   PetscCall(MatDenseCUDAPlaceArray(l->A,a));
1129   PetscFunctionReturn(0);
1130 }
1131 
1132 static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A)
1133 {
1134   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1135 
1136   PetscFunctionBegin;
1137   PetscCheck(!l->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1138   PetscCheck(!l->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1139   PetscCall(MatDenseCUDAResetArray(l->A));
1140   PetscFunctionReturn(0);
1141 }
1142 
1143 static PetscErrorCode MatDenseCUDAReplaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1144 {
1145   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1146 
1147   PetscFunctionBegin;
1148   PetscCheck(!l->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1149   PetscCheck(!l->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1150   PetscCall(MatDenseCUDAReplaceArray(l->A,a));
1151   PetscFunctionReturn(0);
1152 }
1153 
1154 static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1155 {
1156   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1157 
1158   PetscFunctionBegin;
1159   PetscCall(MatDenseCUDAGetArrayWrite(l->A,a));
1160   PetscFunctionReturn(0);
1161 }
1162 
1163 static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1164 {
1165   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1166 
1167   PetscFunctionBegin;
1168   PetscCall(MatDenseCUDARestoreArrayWrite(l->A,a));
1169   PetscFunctionReturn(0);
1170 }
1171 
1172 static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1173 {
1174   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1175 
1176   PetscFunctionBegin;
1177   PetscCall(MatDenseCUDAGetArrayRead(l->A,a));
1178   PetscFunctionReturn(0);
1179 }
1180 
1181 static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1182 {
1183   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1184 
1185   PetscFunctionBegin;
1186   PetscCall(MatDenseCUDARestoreArrayRead(l->A,a));
1187   PetscFunctionReturn(0);
1188 }
1189 
1190 static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1191 {
1192   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1193 
1194   PetscFunctionBegin;
1195   PetscCall(MatDenseCUDAGetArray(l->A,a));
1196   PetscFunctionReturn(0);
1197 }
1198 
1199 static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1200 {
1201   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1202 
1203   PetscFunctionBegin;
1204   PetscCall(MatDenseCUDARestoreArray(l->A,a));
1205   PetscFunctionReturn(0);
1206 }
1207 
1208 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat,PetscInt,Vec*);
1209 static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat,PetscInt,Vec*);
1210 static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat,PetscInt,Vec*);
1211 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat,PetscInt,Vec*);
1212 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat,PetscInt,Vec*);
1213 static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat,PetscInt,Vec*);
1214 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat,Mat*);
1215 
1216 static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat,PetscBool bind)
1217 {
1218   Mat_MPIDense   *d = (Mat_MPIDense*)mat->data;
1219 
1220   PetscFunctionBegin;
1221   PetscCheck(!d->vecinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1222   PetscCheck(!d->matinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1223   if (d->A) {
1224     PetscCall(MatBindToCPU(d->A,bind));
1225   }
1226   mat->boundtocpu = bind;
1227   if (!bind) {
1228     PetscBool iscuda;
1229 
1230     PetscCall(PetscObjectTypeCompare((PetscObject)d->cvec,VECMPICUDA,&iscuda));
1231     if (!iscuda) {
1232       PetscCall(VecDestroy(&d->cvec));
1233     }
1234     PetscCall(PetscObjectTypeCompare((PetscObject)d->cmat,MATMPIDENSECUDA,&iscuda));
1235     if (!iscuda) {
1236       PetscCall(MatDestroy(&d->cmat));
1237     }
1238     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDenseCUDA));
1239     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDenseCUDA));
1240     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDenseCUDA));
1241     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDenseCUDA));
1242     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDenseCUDA));
1243     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDenseCUDA));
1244   } else {
1245     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense));
1246     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense));
1247     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense));
1248     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense));
1249     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense));
1250     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense));
1251   }
1252   if (d->cmat) {
1253     PetscCall(MatBindToCPU(d->cmat,bind));
1254   }
1255   PetscFunctionReturn(0);
1256 }
1257 
1258 PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data)
1259 {
1260   Mat_MPIDense   *d = (Mat_MPIDense*)A->data;
1261   PetscBool      iscuda;
1262 
1263   PetscFunctionBegin;
1264   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1265   PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda));
1266   if (!iscuda) PetscFunctionReturn(0);
1267   PetscCall(PetscLayoutSetUp(A->rmap));
1268   PetscCall(PetscLayoutSetUp(A->cmap));
1269   if (!d->A) {
1270     PetscCall(MatCreate(PETSC_COMM_SELF,&d->A));
1271     PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)d->A));
1272     PetscCall(MatSetSizes(d->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N));
1273   }
1274   PetscCall(MatSetType(d->A,MATSEQDENSECUDA));
1275   PetscCall(MatSeqDenseCUDASetPreallocation(d->A,d_data));
1276   A->preallocated = PETSC_TRUE;
1277   A->assembled = PETSC_TRUE;
1278   PetscFunctionReturn(0);
1279 }
1280 #endif
1281 
1282 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1283 {
1284   Mat_MPIDense   *d = (Mat_MPIDense*)x->data;
1285 
1286   PetscFunctionBegin;
1287   PetscCall(MatSetRandom(d->A,rctx));
1288   PetscFunctionReturn(0);
1289 }
1290 
1291 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool  *missing,PetscInt *d)
1292 {
1293   PetscFunctionBegin;
1294   *missing = PETSC_FALSE;
1295   PetscFunctionReturn(0);
1296 }
1297 
1298 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat);
1299 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);
1300 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat);
1301 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);
1302 static PetscErrorCode MatEqual_MPIDense(Mat,Mat,PetscBool*);
1303 static PetscErrorCode MatLoad_MPIDense(Mat,PetscViewer);
1304 
1305 /* -------------------------------------------------------------------*/
1306 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1307                                         MatGetRow_MPIDense,
1308                                         MatRestoreRow_MPIDense,
1309                                         MatMult_MPIDense,
1310                                 /*  4*/ MatMultAdd_MPIDense,
1311                                         MatMultTranspose_MPIDense,
1312                                         MatMultTransposeAdd_MPIDense,
1313                                         NULL,
1314                                         NULL,
1315                                         NULL,
1316                                 /* 10*/ NULL,
1317                                         NULL,
1318                                         NULL,
1319                                         NULL,
1320                                         MatTranspose_MPIDense,
1321                                 /* 15*/ MatGetInfo_MPIDense,
1322                                         MatEqual_MPIDense,
1323                                         MatGetDiagonal_MPIDense,
1324                                         MatDiagonalScale_MPIDense,
1325                                         MatNorm_MPIDense,
1326                                 /* 20*/ MatAssemblyBegin_MPIDense,
1327                                         MatAssemblyEnd_MPIDense,
1328                                         MatSetOption_MPIDense,
1329                                         MatZeroEntries_MPIDense,
1330                                 /* 24*/ MatZeroRows_MPIDense,
1331                                         NULL,
1332                                         NULL,
1333                                         NULL,
1334                                         NULL,
1335                                 /* 29*/ MatSetUp_MPIDense,
1336                                         NULL,
1337                                         NULL,
1338                                         MatGetDiagonalBlock_MPIDense,
1339                                         NULL,
1340                                 /* 34*/ MatDuplicate_MPIDense,
1341                                         NULL,
1342                                         NULL,
1343                                         NULL,
1344                                         NULL,
1345                                 /* 39*/ MatAXPY_MPIDense,
1346                                         MatCreateSubMatrices_MPIDense,
1347                                         NULL,
1348                                         MatGetValues_MPIDense,
1349                                         MatCopy_MPIDense,
1350                                 /* 44*/ NULL,
1351                                         MatScale_MPIDense,
1352                                         MatShift_Basic,
1353                                         NULL,
1354                                         NULL,
1355                                 /* 49*/ MatSetRandom_MPIDense,
1356                                         NULL,
1357                                         NULL,
1358                                         NULL,
1359                                         NULL,
1360                                 /* 54*/ NULL,
1361                                         NULL,
1362                                         NULL,
1363                                         NULL,
1364                                         NULL,
1365                                 /* 59*/ MatCreateSubMatrix_MPIDense,
1366                                         MatDestroy_MPIDense,
1367                                         MatView_MPIDense,
1368                                         NULL,
1369                                         NULL,
1370                                 /* 64*/ NULL,
1371                                         NULL,
1372                                         NULL,
1373                                         NULL,
1374                                         NULL,
1375                                 /* 69*/ NULL,
1376                                         NULL,
1377                                         NULL,
1378                                         NULL,
1379                                         NULL,
1380                                 /* 74*/ NULL,
1381                                         NULL,
1382                                         NULL,
1383                                         NULL,
1384                                         NULL,
1385                                 /* 79*/ NULL,
1386                                         NULL,
1387                                         NULL,
1388                                         NULL,
1389                                 /* 83*/ MatLoad_MPIDense,
1390                                         NULL,
1391                                         NULL,
1392                                         NULL,
1393                                         NULL,
1394                                         NULL,
1395                                 /* 89*/ NULL,
1396                                         NULL,
1397                                         NULL,
1398                                         NULL,
1399                                         NULL,
1400                                 /* 94*/ NULL,
1401                                         NULL,
1402                                         MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1403                                         MatMatTransposeMultNumeric_MPIDense_MPIDense,
1404                                         NULL,
1405                                 /* 99*/ MatProductSetFromOptions_MPIDense,
1406                                         NULL,
1407                                         NULL,
1408                                         MatConjugate_MPIDense,
1409                                         NULL,
1410                                 /*104*/ NULL,
1411                                         MatRealPart_MPIDense,
1412                                         MatImaginaryPart_MPIDense,
1413                                         NULL,
1414                                         NULL,
1415                                 /*109*/ NULL,
1416                                         NULL,
1417                                         NULL,
1418                                         MatGetColumnVector_MPIDense,
1419                                         MatMissingDiagonal_MPIDense,
1420                                 /*114*/ NULL,
1421                                         NULL,
1422                                         NULL,
1423                                         NULL,
1424                                         NULL,
1425                                 /*119*/ NULL,
1426                                         NULL,
1427                                         NULL,
1428                                         NULL,
1429                                         NULL,
1430                                 /*124*/ NULL,
1431                                         MatGetColumnReductions_MPIDense,
1432                                         NULL,
1433                                         NULL,
1434                                         NULL,
1435                                 /*129*/ NULL,
1436                                         NULL,
1437                                         MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1438                                         MatTransposeMatMultNumeric_MPIDense_MPIDense,
1439                                         NULL,
1440                                 /*134*/ NULL,
1441                                         NULL,
1442                                         NULL,
1443                                         NULL,
1444                                         NULL,
1445                                 /*139*/ NULL,
1446                                         NULL,
1447                                         NULL,
1448                                         NULL,
1449                                         NULL,
1450                                         MatCreateMPIMatConcatenateSeqMat_MPIDense,
1451                                 /*145*/ NULL,
1452                                         NULL,
1453                                         NULL
1454 };
1455 
1456 PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1457 {
1458   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
1459   PetscBool      iscuda;
1460 
1461   PetscFunctionBegin;
1462   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1463   PetscCall(PetscLayoutSetUp(mat->rmap));
1464   PetscCall(PetscLayoutSetUp(mat->cmap));
1465   if (!a->A) {
1466     PetscCall(MatCreate(PETSC_COMM_SELF,&a->A));
1467     PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A));
1468     PetscCall(MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N));
1469   }
1470   PetscCall(PetscObjectTypeCompare((PetscObject)mat,MATMPIDENSECUDA,&iscuda));
1471   PetscCall(MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE));
1472   PetscCall(MatSeqDenseSetPreallocation(a->A,data));
1473   mat->preallocated = PETSC_TRUE;
1474   mat->assembled = PETSC_TRUE;
1475   PetscFunctionReturn(0);
1476 }
1477 
1478 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1479 {
1480   Mat            B,C;
1481 
1482   PetscFunctionBegin;
1483   PetscCall(MatMPIAIJGetLocalMat(A,MAT_INITIAL_MATRIX,&C));
1484   PetscCall(MatConvert_SeqAIJ_SeqDense(C,MATSEQDENSE,MAT_INITIAL_MATRIX,&B));
1485   PetscCall(MatDestroy(&C));
1486   if (reuse == MAT_REUSE_MATRIX) {
1487     C = *newmat;
1488   } else C = NULL;
1489   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A),B,A->cmap->n,!C?MAT_INITIAL_MATRIX:MAT_REUSE_MATRIX,&C));
1490   PetscCall(MatDestroy(&B));
1491   if (reuse == MAT_INPLACE_MATRIX) {
1492     PetscCall(MatHeaderReplace(A,&C));
1493   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1494   PetscFunctionReturn(0);
1495 }
1496 
1497 PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1498 {
1499   Mat            B,C;
1500 
1501   PetscFunctionBegin;
1502   PetscCall(MatDenseGetLocalMatrix(A,&C));
1503   PetscCall(MatConvert_SeqDense_SeqAIJ(C,MATSEQAIJ,MAT_INITIAL_MATRIX,&B));
1504   if (reuse == MAT_REUSE_MATRIX) {
1505     C = *newmat;
1506   } else C = NULL;
1507   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A),B,A->cmap->n,!C?MAT_INITIAL_MATRIX:MAT_REUSE_MATRIX,&C));
1508   PetscCall(MatDestroy(&B));
1509   if (reuse == MAT_INPLACE_MATRIX) {
1510     PetscCall(MatHeaderReplace(A,&C));
1511   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1512   PetscFunctionReturn(0);
1513 }
1514 
1515 #if defined(PETSC_HAVE_ELEMENTAL)
1516 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1517 {
1518   Mat            mat_elemental;
1519   PetscScalar    *v;
1520   PetscInt       m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols;
1521 
1522   PetscFunctionBegin;
1523   if (reuse == MAT_REUSE_MATRIX) {
1524     mat_elemental = *newmat;
1525     PetscCall(MatZeroEntries(*newmat));
1526   } else {
1527     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental));
1528     PetscCall(MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N));
1529     PetscCall(MatSetType(mat_elemental,MATELEMENTAL));
1530     PetscCall(MatSetUp(mat_elemental));
1531     PetscCall(MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE));
1532   }
1533 
1534   PetscCall(PetscMalloc2(m,&rows,N,&cols));
1535   for (i=0; i<N; i++) cols[i] = i;
1536   for (i=0; i<m; i++) rows[i] = rstart + i;
1537 
1538   /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1539   PetscCall(MatDenseGetArray(A,&v));
1540   PetscCall(MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES));
1541   PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY));
1542   PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY));
1543   PetscCall(MatDenseRestoreArray(A,&v));
1544   PetscCall(PetscFree2(rows,cols));
1545 
1546   if (reuse == MAT_INPLACE_MATRIX) {
1547     PetscCall(MatHeaderReplace(A,&mat_elemental));
1548   } else {
1549     *newmat = mat_elemental;
1550   }
1551   PetscFunctionReturn(0);
1552 }
1553 #endif
1554 
1555 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals)
1556 {
1557   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1558 
1559   PetscFunctionBegin;
1560   PetscCall(MatDenseGetColumn(mat->A,col,vals));
1561   PetscFunctionReturn(0);
1562 }
1563 
1564 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals)
1565 {
1566   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1567 
1568   PetscFunctionBegin;
1569   PetscCall(MatDenseRestoreColumn(mat->A,vals));
1570   PetscFunctionReturn(0);
1571 }
1572 
1573 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
1574 {
1575   Mat_MPIDense   *mat;
1576   PetscInt       m,nloc,N;
1577 
1578   PetscFunctionBegin;
1579   PetscCall(MatGetSize(inmat,&m,&N));
1580   PetscCall(MatGetLocalSize(inmat,NULL,&nloc));
1581   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1582     PetscInt sum;
1583 
1584     if (n == PETSC_DECIDE) {
1585       PetscCall(PetscSplitOwnership(comm,&n,&N));
1586     }
1587     /* Check sum(n) = N */
1588     PetscCall(MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm));
1589     PetscCheck(sum == N,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT,sum,N);
1590 
1591     PetscCall(MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat));
1592     PetscCall(MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE));
1593   }
1594 
1595   /* numeric phase */
1596   mat = (Mat_MPIDense*)(*outmat)->data;
1597   PetscCall(MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN));
1598   PetscFunctionReturn(0);
1599 }
1600 
1601 #if defined(PETSC_HAVE_CUDA)
1602 PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M,MatType type,MatReuse reuse,Mat *newmat)
1603 {
1604   Mat            B;
1605   Mat_MPIDense   *m;
1606 
1607   PetscFunctionBegin;
1608   if (reuse == MAT_INITIAL_MATRIX) {
1609     PetscCall(MatDuplicate(M,MAT_COPY_VALUES,newmat));
1610   } else if (reuse == MAT_REUSE_MATRIX) {
1611     PetscCall(MatCopy(M,*newmat,SAME_NONZERO_PATTERN));
1612   }
1613 
1614   B    = *newmat;
1615   PetscCall(MatBindToCPU_MPIDenseCUDA(B,PETSC_TRUE));
1616   PetscCall(PetscFree(B->defaultvectype));
1617   PetscCall(PetscStrallocpy(VECSTANDARD,&B->defaultvectype));
1618   PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSE));
1619   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",NULL));
1620   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",NULL));
1621   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",NULL));
1622   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",NULL));
1623   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",NULL));
1624   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",NULL));
1625   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",NULL));
1626   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",NULL));
1627   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",NULL));
1628   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",NULL));
1629   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",NULL));
1630   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",NULL));
1631   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",NULL));
1632   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",NULL));
1633   m    = (Mat_MPIDense*)(B)->data;
1634   if (m->A) {
1635     PetscCall(MatConvert(m->A,MATSEQDENSE,MAT_INPLACE_MATRIX,&m->A));
1636   }
1637   B->ops->bindtocpu = NULL;
1638   B->offloadmask    = PETSC_OFFLOAD_CPU;
1639   PetscFunctionReturn(0);
1640 }
1641 
1642 PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M,MatType type,MatReuse reuse,Mat *newmat)
1643 {
1644   Mat            B;
1645   Mat_MPIDense   *m;
1646 
1647   PetscFunctionBegin;
1648   if (reuse == MAT_INITIAL_MATRIX) {
1649     PetscCall(MatDuplicate(M,MAT_COPY_VALUES,newmat));
1650   } else if (reuse == MAT_REUSE_MATRIX) {
1651     PetscCall(MatCopy(M,*newmat,SAME_NONZERO_PATTERN));
1652   }
1653 
1654   B    = *newmat;
1655   PetscCall(PetscFree(B->defaultvectype));
1656   PetscCall(PetscStrallocpy(VECCUDA,&B->defaultvectype));
1657   PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSECUDA));
1658   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",                    MatConvert_MPIDenseCUDA_MPIDense));
1659   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",        MatProductSetFromOptions_MPIAIJ_MPIDense));
1660   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",MatProductSetFromOptions_MPIAIJ_MPIDense));
1661   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",        MatProductSetFromOptions_MPIDense_MPIAIJ));
1662   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ));
1663   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",                                MatDenseCUDAGetArray_MPIDenseCUDA));
1664   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",                            MatDenseCUDAGetArrayRead_MPIDenseCUDA));
1665   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",                           MatDenseCUDAGetArrayWrite_MPIDenseCUDA));
1666   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",                            MatDenseCUDARestoreArray_MPIDenseCUDA));
1667   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",                        MatDenseCUDARestoreArrayRead_MPIDenseCUDA));
1668   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",                       MatDenseCUDARestoreArrayWrite_MPIDenseCUDA));
1669   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",                              MatDenseCUDAPlaceArray_MPIDenseCUDA));
1670   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",                              MatDenseCUDAResetArray_MPIDenseCUDA));
1671   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",                            MatDenseCUDAReplaceArray_MPIDenseCUDA));
1672   m    = (Mat_MPIDense*)(B->data);
1673   if (m->A) {
1674     PetscCall(MatConvert(m->A,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&m->A));
1675     B->offloadmask = PETSC_OFFLOAD_BOTH;
1676   } else {
1677     B->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
1678   }
1679   PetscCall(MatBindToCPU_MPIDenseCUDA(B,PETSC_FALSE));
1680 
1681   B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA;
1682   PetscFunctionReturn(0);
1683 }
1684 #endif
1685 
1686 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A,PetscInt col,Vec *v)
1687 {
1688   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1689   PetscInt       lda;
1690 
1691   PetscFunctionBegin;
1692   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1693   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1694   if (!a->cvec) {
1695     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec));
1696   }
1697   a->vecinuse = col + 1;
1698   PetscCall(MatDenseGetLDA(a->A,&lda));
1699   PetscCall(MatDenseGetArray(a->A,(PetscScalar**)&a->ptrinuse));
1700   PetscCall(VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda));
1701   *v   = a->cvec;
1702   PetscFunctionReturn(0);
1703 }
1704 
1705 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A,PetscInt col,Vec *v)
1706 {
1707   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1708 
1709   PetscFunctionBegin;
1710   PetscCheck(a->vecinuse,PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1711   PetscCheck(a->cvec,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1712   a->vecinuse = 0;
1713   PetscCall(MatDenseRestoreArray(a->A,(PetscScalar**)&a->ptrinuse));
1714   PetscCall(VecResetArray(a->cvec));
1715   *v   = NULL;
1716   PetscFunctionReturn(0);
1717 }
1718 
1719 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v)
1720 {
1721   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1722   PetscInt       lda;
1723 
1724   PetscFunctionBegin;
1725   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1726   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1727   if (!a->cvec) {
1728     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec));
1729   }
1730   a->vecinuse = col + 1;
1731   PetscCall(MatDenseGetLDA(a->A,&lda));
1732   PetscCall(MatDenseGetArrayRead(a->A,&a->ptrinuse));
1733   PetscCall(VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda));
1734   PetscCall(VecLockReadPush(a->cvec));
1735   *v   = a->cvec;
1736   PetscFunctionReturn(0);
1737 }
1738 
1739 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v)
1740 {
1741   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1742 
1743   PetscFunctionBegin;
1744   PetscCheck(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1745   PetscCheck(a->cvec,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1746   a->vecinuse = 0;
1747   PetscCall(MatDenseRestoreArrayRead(a->A,&a->ptrinuse));
1748   PetscCall(VecLockReadPop(a->cvec));
1749   PetscCall(VecResetArray(a->cvec));
1750   *v   = NULL;
1751   PetscFunctionReturn(0);
1752 }
1753 
1754 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v)
1755 {
1756   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1757   PetscInt       lda;
1758 
1759   PetscFunctionBegin;
1760   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1761   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1762   if (!a->cvec) {
1763     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec));
1764   }
1765   a->vecinuse = col + 1;
1766   PetscCall(MatDenseGetLDA(a->A,&lda));
1767   PetscCall(MatDenseGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse));
1768   PetscCall(VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda));
1769   *v   = a->cvec;
1770   PetscFunctionReturn(0);
1771 }
1772 
1773 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v)
1774 {
1775   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
1776 
1777   PetscFunctionBegin;
1778   PetscCheck(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1779   PetscCheck(a->cvec,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1780   a->vecinuse = 0;
1781   PetscCall(MatDenseRestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse));
1782   PetscCall(VecResetArray(a->cvec));
1783   *v   = NULL;
1784   PetscFunctionReturn(0);
1785 }
1786 
1787 PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A,PetscInt cbegin,PetscInt cend,Mat *v)
1788 {
1789   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
1790   Mat_MPIDense *c;
1791   MPI_Comm     comm;
1792 
1793   PetscFunctionBegin;
1794   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
1795   PetscCheck(!a->vecinuse,comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1796   PetscCheck(!a->matinuse,comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1797   if (!a->cmat) {
1798     PetscCall(MatCreate(comm,&a->cmat));
1799     PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->cmat));
1800     PetscCall(MatSetType(a->cmat,((PetscObject)A)->type_name));
1801     PetscCall(PetscLayoutReference(A->rmap,&a->cmat->rmap));
1802     PetscCall(PetscLayoutSetSize(a->cmat->cmap,cend-cbegin));
1803     PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1804   } else if (cend-cbegin != a->cmat->cmap->N) {
1805     PetscCall(PetscLayoutDestroy(&a->cmat->cmap));
1806     PetscCall(PetscLayoutCreate(comm,&a->cmat->cmap));
1807     PetscCall(PetscLayoutSetSize(a->cmat->cmap,cend-cbegin));
1808     PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1809   }
1810   c = (Mat_MPIDense*)a->cmat->data;
1811   PetscCheck(!c->A,comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1812   PetscCall(MatDenseGetSubMatrix(a->A,cbegin,cend,&c->A));
1813   a->cmat->preallocated = PETSC_TRUE;
1814   a->cmat->assembled = PETSC_TRUE;
1815   a->matinuse = cbegin + 1;
1816   *v = a->cmat;
1817   PetscFunctionReturn(0);
1818 }
1819 
1820 PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A,Mat *v)
1821 {
1822   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
1823   Mat_MPIDense *c;
1824 
1825   PetscFunctionBegin;
1826   PetscCheck(a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetSubMatrix() first");
1827   PetscCheck(a->cmat,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal matrix");
1828   PetscCheck(*v == a->cmat,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Not the matrix obtained from MatDenseGetSubMatrix()");
1829   a->matinuse = 0;
1830   c = (Mat_MPIDense*)a->cmat->data;
1831   PetscCall(MatDenseRestoreSubMatrix(a->A,&c->A));
1832   *v = NULL;
1833   PetscFunctionReturn(0);
1834 }
1835 
1836 /*MC
1837    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1838 
1839    Options Database Keys:
1840 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions()
1841 
1842   Level: beginner
1843 
1844 .seealso: `MatCreateDense()`
1845 
1846 M*/
1847 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1848 {
1849   Mat_MPIDense   *a;
1850 
1851   PetscFunctionBegin;
1852   PetscCall(PetscNewLog(mat,&a));
1853   mat->data = (void*)a;
1854   PetscCall(PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps)));
1855 
1856   mat->insertmode = NOT_SET_VALUES;
1857 
1858   /* build cache for off array entries formed */
1859   a->donotstash = PETSC_FALSE;
1860 
1861   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash));
1862 
1863   /* stuff used for matrix vector multiply */
1864   a->lvec        = NULL;
1865   a->Mvctx       = NULL;
1866   a->roworiented = PETSC_TRUE;
1867 
1868   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense));
1869   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",MatDenseSetLDA_MPIDense));
1870   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense));
1871   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense));
1872   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense));
1873   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense));
1874   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",MatDenseGetArrayWrite_MPIDense));
1875   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",MatDenseRestoreArrayWrite_MPIDense));
1876   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense));
1877   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense));
1878   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",MatDenseReplaceArray_MPIDense));
1879   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense));
1880   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense));
1881   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense));
1882   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense));
1883   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense));
1884   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense));
1885   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",MatDenseGetSubMatrix_MPIDense));
1886   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",MatDenseRestoreSubMatrix_MPIDense));
1887   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",MatConvert_MPIAIJ_MPIDense));
1888   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpiaij_C",MatConvert_MPIDense_MPIAIJ));
1889 #if defined(PETSC_HAVE_ELEMENTAL)
1890   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental));
1891 #endif
1892 #if defined(PETSC_HAVE_SCALAPACK)
1893   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",MatConvert_Dense_ScaLAPACK));
1894 #endif
1895 #if defined(PETSC_HAVE_CUDA)
1896   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpidensecuda_C",MatConvert_MPIDense_MPIDenseCUDA));
1897 #endif
1898   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense));
1899   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense));
1900   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",MatProductSetFromOptions_MPIDense_MPIAIJ));
1901 #if defined(PETSC_HAVE_CUDA)
1902   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense));
1903   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ));
1904 #endif
1905 
1906   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense));
1907   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense));
1908   PetscCall(PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE));
1909   PetscFunctionReturn(0);
1910 }
1911 
1912 /*MC
1913    MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs.
1914 
1915    Options Database Keys:
1916 . -mat_type mpidensecuda - sets the matrix type to "mpidensecuda" during a call to MatSetFromOptions()
1917 
1918   Level: beginner
1919 
1920 .seealso:
1921 
1922 M*/
1923 #if defined(PETSC_HAVE_CUDA)
1924 #include <petsc/private/deviceimpl.h>
1925 PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B)
1926 {
1927   PetscFunctionBegin;
1928   PetscCall(PetscDeviceInitialize(PETSC_DEVICE_CUDA));
1929   PetscCall(MatCreate_MPIDense(B));
1930   PetscCall(MatConvert_MPIDense_MPIDenseCUDA(B,MATMPIDENSECUDA,MAT_INPLACE_MATRIX,&B));
1931   PetscFunctionReturn(0);
1932 }
1933 #endif
1934 
1935 /*MC
1936    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1937 
1938    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1939    and MATMPIDENSE otherwise.
1940 
1941    Options Database Keys:
1942 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1943 
1944   Level: beginner
1945 
1946 .seealso: `MATSEQDENSE,MATMPIDENSE,MATDENSECUDA`
1947 M*/
1948 
1949 /*MC
1950    MATDENSECUDA - MATDENSECUDA = "densecuda" - A matrix type to be used for dense matrices on GPUs.
1951 
1952    This matrix type is identical to MATSEQDENSECUDA when constructed with a single process communicator,
1953    and MATMPIDENSECUDA otherwise.
1954 
1955    Options Database Keys:
1956 . -mat_type densecuda - sets the matrix type to "densecuda" during a call to MatSetFromOptions()
1957 
1958   Level: beginner
1959 
1960 .seealso: `MATSEQDENSECUDA,MATMPIDENSECUDA,MATDENSE`
1961 M*/
1962 
1963 /*@C
1964    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1965 
1966    Collective
1967 
1968    Input Parameters:
1969 .  B - the matrix
1970 -  data - optional location of matrix data.  Set data=NULL for PETSc
1971    to control all matrix memory allocation.
1972 
1973    Notes:
1974    The dense format is fully compatible with standard Fortran 77
1975    storage by columns.
1976 
1977    The data input variable is intended primarily for Fortran programmers
1978    who wish to allocate their own matrix memory space.  Most users should
1979    set data=NULL.
1980 
1981    Level: intermediate
1982 
1983 .seealso: `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
1984 @*/
1985 PetscErrorCode  MatMPIDenseSetPreallocation(Mat B,PetscScalar *data)
1986 {
1987   PetscFunctionBegin;
1988   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
1989   PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));
1990   PetscFunctionReturn(0);
1991 }
1992 
1993 /*@
1994    MatDensePlaceArray - Allows one to replace the array in a dense matrix with an
1995    array provided by the user. This is useful to avoid copying an array
1996    into a matrix
1997 
1998    Not Collective
1999 
2000    Input Parameters:
2001 +  mat - the matrix
2002 -  array - the array in column major order
2003 
2004    Notes:
2005    You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be
2006    freed when the matrix is destroyed.
2007 
2008    Level: developer
2009 
2010 .seealso: `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`
2011 
2012 @*/
2013 PetscErrorCode  MatDensePlaceArray(Mat mat,const PetscScalar *array)
2014 {
2015   PetscFunctionBegin;
2016   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2017   PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));
2018   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2019 #if defined(PETSC_HAVE_CUDA)
2020   mat->offloadmask = PETSC_OFFLOAD_CPU;
2021 #endif
2022   PetscFunctionReturn(0);
2023 }
2024 
2025 /*@
2026    MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray()
2027 
2028    Not Collective
2029 
2030    Input Parameters:
2031 .  mat - the matrix
2032 
2033    Notes:
2034    You can only call this after a call to MatDensePlaceArray()
2035 
2036    Level: developer
2037 
2038 .seealso: `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()`
2039 
2040 @*/
2041 PetscErrorCode  MatDenseResetArray(Mat mat)
2042 {
2043   PetscFunctionBegin;
2044   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2045   PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));
2046   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2047   PetscFunctionReturn(0);
2048 }
2049 
2050 /*@
2051    MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an
2052    array provided by the user. This is useful to avoid copying an array
2053    into a matrix
2054 
2055    Not Collective
2056 
2057    Input Parameters:
2058 +  mat - the matrix
2059 -  array - the array in column major order
2060 
2061    Notes:
2062    The memory passed in MUST be obtained with PetscMalloc() and CANNOT be
2063    freed by the user. It will be freed when the matrix is destroyed.
2064 
2065    Level: developer
2066 
2067 .seealso: `MatDenseGetArray()`, `VecReplaceArray()`
2068 @*/
2069 PetscErrorCode  MatDenseReplaceArray(Mat mat,const PetscScalar *array)
2070 {
2071   PetscFunctionBegin;
2072   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2073   PetscUseMethod(mat,"MatDenseReplaceArray_C",(Mat,const PetscScalar*),(mat,array));
2074   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2075 #if defined(PETSC_HAVE_CUDA)
2076   mat->offloadmask = PETSC_OFFLOAD_CPU;
2077 #endif
2078   PetscFunctionReturn(0);
2079 }
2080 
2081 #if defined(PETSC_HAVE_CUDA)
2082 /*@C
2083    MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a dense matrix with an
2084    array provided by the user. This is useful to avoid copying an array
2085    into a matrix
2086 
2087    Not Collective
2088 
2089    Input Parameters:
2090 +  mat - the matrix
2091 -  array - the array in column major order
2092 
2093    Notes:
2094    You can return to the original array with a call to MatDenseCUDAResetArray(). The user is responsible for freeing this array; it will not be
2095    freed when the matrix is destroyed. The array must have been allocated with cudaMalloc().
2096 
2097    Level: developer
2098 
2099 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDAResetArray()`
2100 @*/
2101 PetscErrorCode  MatDenseCUDAPlaceArray(Mat mat,const PetscScalar *array)
2102 {
2103   PetscFunctionBegin;
2104   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2105   PetscUseMethod(mat,"MatDenseCUDAPlaceArray_C",(Mat,const PetscScalar*),(mat,array));
2106   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2107   mat->offloadmask = PETSC_OFFLOAD_GPU;
2108   PetscFunctionReturn(0);
2109 }
2110 
2111 /*@C
2112    MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to MatDenseCUDAPlaceArray()
2113 
2114    Not Collective
2115 
2116    Input Parameters:
2117 .  mat - the matrix
2118 
2119    Notes:
2120    You can only call this after a call to MatDenseCUDAPlaceArray()
2121 
2122    Level: developer
2123 
2124 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()`
2125 
2126 @*/
2127 PetscErrorCode  MatDenseCUDAResetArray(Mat mat)
2128 {
2129   PetscFunctionBegin;
2130   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2131   PetscUseMethod(mat,"MatDenseCUDAResetArray_C",(Mat),(mat));
2132   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2133   PetscFunctionReturn(0);
2134 }
2135 
2136 /*@C
2137    MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a dense matrix with an
2138    array provided by the user. This is useful to avoid copying an array
2139    into a matrix
2140 
2141    Not Collective
2142 
2143    Input Parameters:
2144 +  mat - the matrix
2145 -  array - the array in column major order
2146 
2147    Notes:
2148    This permanently replaces the GPU array and frees the memory associated with the old GPU array.
2149    The memory passed in CANNOT be freed by the user. It will be freed
2150    when the matrix is destroyed. The array should respect the matrix leading dimension.
2151 
2152    Level: developer
2153 
2154 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()`, `MatDenseCUDAResetArray()`
2155 @*/
2156 PetscErrorCode  MatDenseCUDAReplaceArray(Mat mat,const PetscScalar *array)
2157 {
2158   PetscFunctionBegin;
2159   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2160   PetscUseMethod(mat,"MatDenseCUDAReplaceArray_C",(Mat,const PetscScalar*),(mat,array));
2161   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2162   mat->offloadmask = PETSC_OFFLOAD_GPU;
2163   PetscFunctionReturn(0);
2164 }
2165 
2166 /*@C
2167    MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a dense matrix.
2168 
2169    Not Collective
2170 
2171    Input Parameters:
2172 .  A - the matrix
2173 
2174    Output Parameters
2175 .  array - the GPU array in column major order
2176 
2177    Notes:
2178    The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use MatDenseCUDAGetArray(). The array must be restored with MatDenseCUDARestoreArrayWrite() when no longer needed.
2179 
2180    Level: developer
2181 
2182 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayRead()`, `MatDenseCUDARestoreArrayRead()`
2183 @*/
2184 PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a)
2185 {
2186   PetscFunctionBegin;
2187   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2188   PetscUseMethod(A,"MatDenseCUDAGetArrayWrite_C",(Mat,PetscScalar**),(A,a));
2189   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2190   PetscFunctionReturn(0);
2191 }
2192 
2193 /*@C
2194    MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArrayWrite().
2195 
2196    Not Collective
2197 
2198    Input Parameters:
2199 +  A - the matrix
2200 -  array - the GPU array in column major order
2201 
2202    Notes:
2203 
2204    Level: developer
2205 
2206 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()`
2207 @*/
2208 PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a)
2209 {
2210   PetscFunctionBegin;
2211   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2212   PetscUseMethod(A,"MatDenseCUDARestoreArrayWrite_C",(Mat,PetscScalar**),(A,a));
2213   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2214   A->offloadmask = PETSC_OFFLOAD_GPU;
2215   PetscFunctionReturn(0);
2216 }
2217 
2218 /*@C
2219    MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArrayRead() when no longer needed.
2220 
2221    Not Collective
2222 
2223    Input Parameters:
2224 .  A - the matrix
2225 
2226    Output Parameters
2227 .  array - the GPU array in column major order
2228 
2229    Notes:
2230    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite().
2231 
2232    Level: developer
2233 
2234 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`
2235 @*/
2236 PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a)
2237 {
2238   PetscFunctionBegin;
2239   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2240   PetscUseMethod(A,"MatDenseCUDAGetArrayRead_C",(Mat,const PetscScalar**),(A,a));
2241   PetscFunctionReturn(0);
2242 }
2243 
2244 /*@C
2245    MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a dense matrix previously obtained with a call to MatDenseCUDAGetArrayRead().
2246 
2247    Not Collective
2248 
2249    Input Parameters:
2250 +  A - the matrix
2251 -  array - the GPU array in column major order
2252 
2253    Notes:
2254    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite().
2255 
2256    Level: developer
2257 
2258 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDAGetArrayRead()`
2259 @*/
2260 PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a)
2261 {
2262   PetscFunctionBegin;
2263   PetscUseMethod(A,"MatDenseCUDARestoreArrayRead_C",(Mat,const PetscScalar**),(A,a));
2264   PetscFunctionReturn(0);
2265 }
2266 
2267 /*@C
2268    MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArray() when no longer needed.
2269 
2270    Not Collective
2271 
2272    Input Parameters:
2273 .  A - the matrix
2274 
2275    Output Parameters
2276 .  array - the GPU array in column major order
2277 
2278    Notes:
2279    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). For read-only access, use MatDenseCUDAGetArrayRead().
2280 
2281    Level: developer
2282 
2283 .seealso: `MatDenseCUDAGetArrayRead()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`
2284 @*/
2285 PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a)
2286 {
2287   PetscFunctionBegin;
2288   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2289   PetscUseMethod(A,"MatDenseCUDAGetArray_C",(Mat,PetscScalar**),(A,a));
2290   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2291   PetscFunctionReturn(0);
2292 }
2293 
2294 /*@C
2295    MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArray().
2296 
2297    Not Collective
2298 
2299    Input Parameters:
2300 +  A - the matrix
2301 -  array - the GPU array in column major order
2302 
2303    Notes:
2304 
2305    Level: developer
2306 
2307 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()`
2308 @*/
2309 PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a)
2310 {
2311   PetscFunctionBegin;
2312   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2313   PetscUseMethod(A,"MatDenseCUDARestoreArray_C",(Mat,PetscScalar**),(A,a));
2314   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2315   A->offloadmask = PETSC_OFFLOAD_GPU;
2316   PetscFunctionReturn(0);
2317 }
2318 #endif
2319 
2320 /*@C
2321    MatCreateDense - Creates a matrix in dense format.
2322 
2323    Collective
2324 
2325    Input Parameters:
2326 +  comm - MPI communicator
2327 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2328 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2329 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
2330 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
2331 -  data - optional location of matrix data.  Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
2332    to control all matrix memory allocation.
2333 
2334    Output Parameter:
2335 .  A - the matrix
2336 
2337    Notes:
2338    The dense format is fully compatible with standard Fortran 77
2339    storage by columns.
2340    Note that, although local portions of the matrix are stored in column-major
2341    order, the matrix is partitioned across MPI ranks by row.
2342 
2343    The data input variable is intended primarily for Fortran programmers
2344    who wish to allocate their own matrix memory space.  Most users should
2345    set data=NULL (PETSC_NULL_SCALAR for Fortran users).
2346 
2347    The user MUST specify either the local or global matrix dimensions
2348    (possibly both).
2349 
2350    Level: intermediate
2351 
2352 .seealso: `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()`
2353 @*/
2354 PetscErrorCode  MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
2355 {
2356   PetscFunctionBegin;
2357   PetscCall(MatCreate(comm,A));
2358   PetscCall(MatSetSizes(*A,m,n,M,N));
2359   PetscCall(MatSetType(*A,MATDENSE));
2360   PetscCall(MatSeqDenseSetPreallocation(*A,data));
2361   PetscCall(MatMPIDenseSetPreallocation(*A,data));
2362   PetscFunctionReturn(0);
2363 }
2364 
2365 #if defined(PETSC_HAVE_CUDA)
2366 /*@C
2367    MatCreateDenseCUDA - Creates a matrix in dense format using CUDA.
2368 
2369    Collective
2370 
2371    Input Parameters:
2372 +  comm - MPI communicator
2373 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2374 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2375 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
2376 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
2377 -  data - optional location of GPU matrix data.  Set data=NULL for PETSc
2378    to control matrix memory allocation.
2379 
2380    Output Parameter:
2381 .  A - the matrix
2382 
2383    Notes:
2384 
2385    Level: intermediate
2386 
2387 .seealso: `MatCreate()`, `MatCreateDense()`
2388 @*/
2389 PetscErrorCode  MatCreateDenseCUDA(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
2390 {
2391   PetscFunctionBegin;
2392   PetscCall(MatCreate(comm,A));
2393   PetscValidLogicalCollectiveBool(*A,!!data,6);
2394   PetscCall(MatSetSizes(*A,m,n,M,N));
2395   PetscCall(MatSetType(*A,MATDENSECUDA));
2396   PetscCall(MatSeqDenseCUDASetPreallocation(*A,data));
2397   PetscCall(MatMPIDenseCUDASetPreallocation(*A,data));
2398   PetscFunctionReturn(0);
2399 }
2400 #endif
2401 
2402 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
2403 {
2404   Mat            mat;
2405   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
2406 
2407   PetscFunctionBegin;
2408   *newmat = NULL;
2409   PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&mat));
2410   PetscCall(MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N));
2411   PetscCall(MatSetType(mat,((PetscObject)A)->type_name));
2412   a       = (Mat_MPIDense*)mat->data;
2413 
2414   mat->factortype   = A->factortype;
2415   mat->assembled    = PETSC_TRUE;
2416   mat->preallocated = PETSC_TRUE;
2417 
2418   mat->insertmode = NOT_SET_VALUES;
2419   a->donotstash   = oldmat->donotstash;
2420 
2421   PetscCall(PetscLayoutReference(A->rmap,&mat->rmap));
2422   PetscCall(PetscLayoutReference(A->cmap,&mat->cmap));
2423 
2424   PetscCall(MatDuplicate(oldmat->A,cpvalues,&a->A));
2425   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A));
2426 
2427   *newmat = mat;
2428   PetscFunctionReturn(0);
2429 }
2430 
2431 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
2432 {
2433   PetscBool      isbinary;
2434 #if defined(PETSC_HAVE_HDF5)
2435   PetscBool      ishdf5;
2436 #endif
2437 
2438   PetscFunctionBegin;
2439   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
2440   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
2441   /* force binary viewer to load .info file if it has not yet done so */
2442   PetscCall(PetscViewerSetUp(viewer));
2443   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
2444 #if defined(PETSC_HAVE_HDF5)
2445   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5));
2446 #endif
2447   if (isbinary) {
2448     PetscCall(MatLoad_Dense_Binary(newMat,viewer));
2449 #if defined(PETSC_HAVE_HDF5)
2450   } else if (ishdf5) {
2451     PetscCall(MatLoad_Dense_HDF5(newMat,viewer));
2452 #endif
2453   } else SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
2454   PetscFunctionReturn(0);
2455 }
2456 
2457 static PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag)
2458 {
2459   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
2460   Mat            a,b;
2461 
2462   PetscFunctionBegin;
2463   a    = matA->A;
2464   b    = matB->A;
2465   PetscCall(MatEqual(a,b,flag));
2466   PetscCall(MPIU_Allreduce(MPI_IN_PLACE,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A)));
2467   PetscFunctionReturn(0);
2468 }
2469 
2470 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data)
2471 {
2472   Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data;
2473 
2474   PetscFunctionBegin;
2475   PetscCall(PetscFree2(atb->sendbuf,atb->recvcounts));
2476   PetscCall(MatDestroy(&atb->atb));
2477   PetscCall(PetscFree(atb));
2478   PetscFunctionReturn(0);
2479 }
2480 
2481 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data)
2482 {
2483   Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data;
2484 
2485   PetscFunctionBegin;
2486   PetscCall(PetscFree2(abt->buf[0],abt->buf[1]));
2487   PetscCall(PetscFree2(abt->recvcounts,abt->recvdispls));
2488   PetscCall(PetscFree(abt));
2489   PetscFunctionReturn(0);
2490 }
2491 
2492 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2493 {
2494   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2495   Mat_TransMatMultDense *atb;
2496   MPI_Comm              comm;
2497   PetscMPIInt           size,*recvcounts;
2498   PetscScalar           *carray,*sendbuf;
2499   const PetscScalar     *atbarray;
2500   PetscInt              i,cN=C->cmap->N,proc,k,j,lda;
2501   const PetscInt        *ranges;
2502 
2503   PetscFunctionBegin;
2504   MatCheckProduct(C,3);
2505   PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2506   atb = (Mat_TransMatMultDense *)C->product->data;
2507   recvcounts = atb->recvcounts;
2508   sendbuf = atb->sendbuf;
2509 
2510   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
2511   PetscCallMPI(MPI_Comm_size(comm,&size));
2512 
2513   /* compute atbarray = aseq^T * bseq */
2514   PetscCall(MatTransposeMatMult(a->A,b->A,atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX,PETSC_DEFAULT,&atb->atb));
2515 
2516   PetscCall(MatGetOwnershipRanges(C,&ranges));
2517 
2518   /* arrange atbarray into sendbuf */
2519   PetscCall(MatDenseGetArrayRead(atb->atb,&atbarray));
2520   PetscCall(MatDenseGetLDA(atb->atb,&lda));
2521   for (proc=0, k=0; proc<size; proc++) {
2522     for (j=0; j<cN; j++) {
2523       for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*lda];
2524     }
2525   }
2526   PetscCall(MatDenseRestoreArrayRead(atb->atb,&atbarray));
2527 
2528   /* sum all atbarray to local values of C */
2529   PetscCall(MatDenseGetArrayWrite(c->A,&carray));
2530   PetscCallMPI(MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm));
2531   PetscCall(MatDenseRestoreArrayWrite(c->A,&carray));
2532   PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY));
2533   PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY));
2534   PetscFunctionReturn(0);
2535 }
2536 
2537 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)
2538 {
2539   MPI_Comm              comm;
2540   PetscMPIInt           size;
2541   PetscInt              cm=A->cmap->n,cM,cN=B->cmap->N;
2542   Mat_TransMatMultDense *atb;
2543   PetscBool             cisdense;
2544   PetscInt              i;
2545   const PetscInt        *ranges;
2546 
2547   PetscFunctionBegin;
2548   MatCheckProduct(C,3);
2549   PetscCheck(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2550   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
2551   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
2552     SETERRQ(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2553   }
2554 
2555   /* create matrix product C */
2556   PetscCall(MatSetSizes(C,cm,B->cmap->n,A->cmap->N,B->cmap->N));
2557   PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,""));
2558   if (!cisdense) {
2559     PetscCall(MatSetType(C,((PetscObject)A)->type_name));
2560   }
2561   PetscCall(MatSetUp(C));
2562 
2563   /* create data structure for reuse C */
2564   PetscCallMPI(MPI_Comm_size(comm,&size));
2565   PetscCall(PetscNew(&atb));
2566   cM   = C->rmap->N;
2567   PetscCall(PetscMalloc2(cM*cN,&atb->sendbuf,size,&atb->recvcounts));
2568   PetscCall(MatGetOwnershipRanges(C,&ranges));
2569   for (i=0; i<size; i++) atb->recvcounts[i] = (ranges[i+1] - ranges[i])*cN;
2570 
2571   C->product->data    = atb;
2572   C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
2573   PetscFunctionReturn(0);
2574 }
2575 
2576 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2577 {
2578   MPI_Comm              comm;
2579   PetscMPIInt           i, size;
2580   PetscInt              maxRows, bufsiz;
2581   PetscMPIInt           tag;
2582   PetscInt              alg;
2583   Mat_MatTransMultDense *abt;
2584   Mat_Product           *product = C->product;
2585   PetscBool             flg;
2586 
2587   PetscFunctionBegin;
2588   MatCheckProduct(C,4);
2589   PetscCheck(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2590   /* check local size of A and B */
2591   PetscCheck(A->cmap->n == B->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")",A->cmap->n,B->cmap->n);
2592 
2593   PetscCall(PetscStrcmp(product->alg,"allgatherv",&flg));
2594   alg  = flg ? 0 : 1;
2595 
2596   /* setup matrix product C */
2597   PetscCall(MatSetSizes(C,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N));
2598   PetscCall(MatSetType(C,MATMPIDENSE));
2599   PetscCall(MatSetUp(C));
2600   PetscCall(PetscObjectGetNewTag((PetscObject)C,&tag));
2601 
2602   /* create data structure for reuse C */
2603   PetscCall(PetscObjectGetComm((PetscObject)C,&comm));
2604   PetscCallMPI(MPI_Comm_size(comm,&size));
2605   PetscCall(PetscNew(&abt));
2606   abt->tag = tag;
2607   abt->alg = alg;
2608   switch (alg) {
2609   case 1: /* alg: "cyclic" */
2610     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i]));
2611     bufsiz = A->cmap->N * maxRows;
2612     PetscCall(PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1])));
2613     break;
2614   default: /* alg: "allgatherv" */
2615     PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1])));
2616     PetscCall(PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls)));
2617     for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N;
2618     for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i];
2619     break;
2620   }
2621 
2622   C->product->data    = abt;
2623   C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
2624   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense;
2625   PetscFunctionReturn(0);
2626 }
2627 
2628 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
2629 {
2630   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2631   Mat_MatTransMultDense *abt;
2632   MPI_Comm              comm;
2633   PetscMPIInt           rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom;
2634   PetscScalar           *sendbuf, *recvbuf=NULL, *cv;
2635   PetscInt              i,cK=A->cmap->N,k,j,bn;
2636   PetscScalar           _DOne=1.0,_DZero=0.0;
2637   const PetscScalar     *av,*bv;
2638   PetscBLASInt          cm, cn, ck, alda, blda = 0, clda;
2639   MPI_Request           reqs[2];
2640   const PetscInt        *ranges;
2641 
2642   PetscFunctionBegin;
2643   MatCheckProduct(C,3);
2644   PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2645   abt  = (Mat_MatTransMultDense*)C->product->data;
2646   PetscCall(PetscObjectGetComm((PetscObject)C,&comm));
2647   PetscCallMPI(MPI_Comm_rank(comm,&rank));
2648   PetscCallMPI(MPI_Comm_size(comm,&size));
2649   PetscCall(MatDenseGetArrayRead(a->A,&av));
2650   PetscCall(MatDenseGetArrayRead(b->A,&bv));
2651   PetscCall(MatDenseGetArrayWrite(c->A,&cv));
2652   PetscCall(MatDenseGetLDA(a->A,&i));
2653   PetscCall(PetscBLASIntCast(i,&alda));
2654   PetscCall(MatDenseGetLDA(b->A,&i));
2655   PetscCall(PetscBLASIntCast(i,&blda));
2656   PetscCall(MatDenseGetLDA(c->A,&i));
2657   PetscCall(PetscBLASIntCast(i,&clda));
2658   PetscCall(MatGetOwnershipRanges(B,&ranges));
2659   bn   = B->rmap->n;
2660   if (blda == bn) {
2661     sendbuf = (PetscScalar*)bv;
2662   } else {
2663     sendbuf = abt->buf[0];
2664     for (k = 0, i = 0; i < cK; i++) {
2665       for (j = 0; j < bn; j++, k++) {
2666         sendbuf[k] = bv[i * blda + j];
2667       }
2668     }
2669   }
2670   if (size > 1) {
2671     sendto = (rank + size - 1) % size;
2672     recvfrom = (rank + size + 1) % size;
2673   } else {
2674     sendto = recvfrom = 0;
2675   }
2676   PetscCall(PetscBLASIntCast(cK,&ck));
2677   PetscCall(PetscBLASIntCast(c->A->rmap->n,&cm));
2678   recvisfrom = rank;
2679   for (i = 0; i < size; i++) {
2680     /* we have finished receiving in sending, bufs can be read/modified */
2681     PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2682     PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
2683 
2684     if (nextrecvisfrom != rank) {
2685       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2686       sendsiz = cK * bn;
2687       recvsiz = cK * nextbn;
2688       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
2689       PetscCallMPI(MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]));
2690       PetscCallMPI(MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]));
2691     }
2692 
2693     /* local aseq * sendbuf^T */
2694     PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn));
2695     if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,av,&alda,sendbuf,&cn,&_DZero,cv + clda * ranges[recvisfrom],&clda));
2696 
2697     if (nextrecvisfrom != rank) {
2698       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
2699       PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE));
2700     }
2701     bn = nextbn;
2702     recvisfrom = nextrecvisfrom;
2703     sendbuf = recvbuf;
2704   }
2705   PetscCall(MatDenseRestoreArrayRead(a->A,&av));
2706   PetscCall(MatDenseRestoreArrayRead(b->A,&bv));
2707   PetscCall(MatDenseRestoreArrayWrite(c->A,&cv));
2708   PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY));
2709   PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY));
2710   PetscFunctionReturn(0);
2711 }
2712 
2713 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
2714 {
2715   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2716   Mat_MatTransMultDense *abt;
2717   MPI_Comm              comm;
2718   PetscMPIInt           size;
2719   PetscScalar           *cv, *sendbuf, *recvbuf;
2720   const PetscScalar     *av,*bv;
2721   PetscInt              blda,i,cK=A->cmap->N,k,j,bn;
2722   PetscScalar           _DOne=1.0,_DZero=0.0;
2723   PetscBLASInt          cm, cn, ck, alda, clda;
2724 
2725   PetscFunctionBegin;
2726   MatCheckProduct(C,3);
2727   PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2728   abt  = (Mat_MatTransMultDense*)C->product->data;
2729   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
2730   PetscCallMPI(MPI_Comm_size(comm,&size));
2731   PetscCall(MatDenseGetArrayRead(a->A,&av));
2732   PetscCall(MatDenseGetArrayRead(b->A,&bv));
2733   PetscCall(MatDenseGetArrayWrite(c->A,&cv));
2734   PetscCall(MatDenseGetLDA(a->A,&i));
2735   PetscCall(PetscBLASIntCast(i,&alda));
2736   PetscCall(MatDenseGetLDA(b->A,&blda));
2737   PetscCall(MatDenseGetLDA(c->A,&i));
2738   PetscCall(PetscBLASIntCast(i,&clda));
2739   /* copy transpose of B into buf[0] */
2740   bn      = B->rmap->n;
2741   sendbuf = abt->buf[0];
2742   recvbuf = abt->buf[1];
2743   for (k = 0, j = 0; j < bn; j++) {
2744     for (i = 0; i < cK; i++, k++) {
2745       sendbuf[k] = bv[i * blda + j];
2746     }
2747   }
2748   PetscCall(MatDenseRestoreArrayRead(b->A,&bv));
2749   PetscCallMPI(MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm));
2750   PetscCall(PetscBLASIntCast(cK,&ck));
2751   PetscCall(PetscBLASIntCast(c->A->rmap->n,&cm));
2752   PetscCall(PetscBLASIntCast(c->A->cmap->n,&cn));
2753   if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,av,&alda,recvbuf,&ck,&_DZero,cv,&clda));
2754   PetscCall(MatDenseRestoreArrayRead(a->A,&av));
2755   PetscCall(MatDenseRestoreArrayRead(b->A,&bv));
2756   PetscCall(MatDenseRestoreArrayWrite(c->A,&cv));
2757   PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY));
2758   PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY));
2759   PetscFunctionReturn(0);
2760 }
2761 
2762 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2763 {
2764   Mat_MatTransMultDense *abt;
2765 
2766   PetscFunctionBegin;
2767   MatCheckProduct(C,3);
2768   PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2769   abt = (Mat_MatTransMultDense*)C->product->data;
2770   switch (abt->alg) {
2771   case 1:
2772     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C));
2773     break;
2774   default:
2775     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C));
2776     break;
2777   }
2778   PetscFunctionReturn(0);
2779 }
2780 
2781 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data)
2782 {
2783   Mat_MatMultDense *ab = (Mat_MatMultDense*)data;
2784 
2785   PetscFunctionBegin;
2786   PetscCall(MatDestroy(&ab->Ce));
2787   PetscCall(MatDestroy(&ab->Ae));
2788   PetscCall(MatDestroy(&ab->Be));
2789   PetscCall(PetscFree(ab));
2790   PetscFunctionReturn(0);
2791 }
2792 
2793 #if defined(PETSC_HAVE_ELEMENTAL)
2794 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2795 {
2796   Mat_MatMultDense *ab;
2797 
2798   PetscFunctionBegin;
2799   MatCheckProduct(C,3);
2800   PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Missing product data");
2801   ab   = (Mat_MatMultDense*)C->product->data;
2802   PetscCall(MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae));
2803   PetscCall(MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be));
2804   PetscCall(MatMatMultNumeric_Elemental(ab->Ae,ab->Be,ab->Ce));
2805   PetscCall(MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C));
2806   PetscFunctionReturn(0);
2807 }
2808 
2809 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)
2810 {
2811   Mat              Ae,Be,Ce;
2812   Mat_MatMultDense *ab;
2813 
2814   PetscFunctionBegin;
2815   MatCheckProduct(C,4);
2816   PetscCheck(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2817   /* check local size of A and B */
2818   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
2819     SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2820   }
2821 
2822   /* create elemental matrices Ae and Be */
2823   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Ae));
2824   PetscCall(MatSetSizes(Ae,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N));
2825   PetscCall(MatSetType(Ae,MATELEMENTAL));
2826   PetscCall(MatSetUp(Ae));
2827   PetscCall(MatSetOption(Ae,MAT_ROW_ORIENTED,PETSC_FALSE));
2828 
2829   PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &Be));
2830   PetscCall(MatSetSizes(Be,PETSC_DECIDE,PETSC_DECIDE,B->rmap->N,B->cmap->N));
2831   PetscCall(MatSetType(Be,MATELEMENTAL));
2832   PetscCall(MatSetUp(Be));
2833   PetscCall(MatSetOption(Be,MAT_ROW_ORIENTED,PETSC_FALSE));
2834 
2835   /* compute symbolic Ce = Ae*Be */
2836   PetscCall(MatCreate(PetscObjectComm((PetscObject)C),&Ce));
2837   PetscCall(MatMatMultSymbolic_Elemental(Ae,Be,fill,Ce));
2838 
2839   /* setup C */
2840   PetscCall(MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE));
2841   PetscCall(MatSetType(C,MATDENSE));
2842   PetscCall(MatSetUp(C));
2843 
2844   /* create data structure for reuse Cdense */
2845   PetscCall(PetscNew(&ab));
2846   ab->Ae = Ae;
2847   ab->Be = Be;
2848   ab->Ce = Ce;
2849 
2850   C->product->data    = ab;
2851   C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense;
2852   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
2853   PetscFunctionReturn(0);
2854 }
2855 #endif
2856 /* ----------------------------------------------- */
2857 #if defined(PETSC_HAVE_ELEMENTAL)
2858 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C)
2859 {
2860   PetscFunctionBegin;
2861   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense;
2862   C->ops->productsymbolic = MatProductSymbolic_AB;
2863   PetscFunctionReturn(0);
2864 }
2865 #endif
2866 
2867 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C)
2868 {
2869   Mat_Product *product = C->product;
2870   Mat         A = product->A,B=product->B;
2871 
2872   PetscFunctionBegin;
2873   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend)
2874     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2875   C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense;
2876   C->ops->productsymbolic = MatProductSymbolic_AtB;
2877   PetscFunctionReturn(0);
2878 }
2879 
2880 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C)
2881 {
2882   Mat_Product    *product = C->product;
2883   const char     *algTypes[2] = {"allgatherv","cyclic"};
2884   PetscInt       alg,nalg = 2;
2885   PetscBool      flg = PETSC_FALSE;
2886 
2887   PetscFunctionBegin;
2888   /* Set default algorithm */
2889   alg = 0; /* default is allgatherv */
2890   PetscCall(PetscStrcmp(product->alg,"default",&flg));
2891   if (flg) {
2892     PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]));
2893   }
2894 
2895   /* Get runtime option */
2896   if (product->api_user) {
2897     PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatTransposeMult","Mat");
2898     PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[alg],&alg,&flg));
2899     PetscOptionsEnd();
2900   } else {
2901     PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABt","Mat");
2902     PetscCall(PetscOptionsEList("-mat_product_algorithm","Algorithmic approach","MatProduct_ABt",algTypes,nalg,algTypes[alg],&alg,&flg));
2903     PetscOptionsEnd();
2904   }
2905   if (flg) {
2906     PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]));
2907   }
2908 
2909   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense;
2910   C->ops->productsymbolic          = MatProductSymbolic_ABt;
2911   PetscFunctionReturn(0);
2912 }
2913 
2914 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C)
2915 {
2916   Mat_Product    *product = C->product;
2917 
2918   PetscFunctionBegin;
2919   switch (product->type) {
2920 #if defined(PETSC_HAVE_ELEMENTAL)
2921   case MATPRODUCT_AB:
2922     PetscCall(MatProductSetFromOptions_MPIDense_AB(C));
2923     break;
2924 #endif
2925   case MATPRODUCT_AtB:
2926     PetscCall(MatProductSetFromOptions_MPIDense_AtB(C));
2927     break;
2928   case MATPRODUCT_ABt:
2929     PetscCall(MatProductSetFromOptions_MPIDense_ABt(C));
2930     break;
2931   default:
2932     break;
2933   }
2934   PetscFunctionReturn(0);
2935 }
2936