xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 6f08ad05fa8d2c524312bc0b98d11f1d86a638dc)
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4 */
5 
6 #include <../src/mat/impls/dense/mpi/mpidense.h>    /*I   "petscmat.h"  I*/
7 #include <../src/mat/impls/aij/mpi/mpiaij.h>
8 #include <petscblaslapack.h>
9 
10 /*@
11 
12       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
13               matrix that represents the operator. For sequential matrices it returns itself.
14 
15     Input Parameter:
16 .      A - the Seq or MPI dense matrix
17 
18     Output Parameter:
19 .      B - the inner matrix
20 
21     Level: intermediate
22 
23 @*/
24 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
25 {
26   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
27   PetscBool      flg;
28 
29   PetscFunctionBegin;
30   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
31   PetscValidPointer(B,2);
32   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A,MATMPIDENSE,&flg));
33   if (flg) *B = mat->A;
34   else {
35     PetscCall(PetscObjectBaseTypeCompare((PetscObject)A,MATSEQDENSE,&flg));
36     PetscCheck(flg,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Not for matrix type %s",((PetscObject)A)->type_name);
37     *B = A;
38   }
39   PetscFunctionReturn(0);
40 }
41 
42 PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s)
43 {
44   Mat_MPIDense   *Amat = (Mat_MPIDense*)A->data;
45   Mat_MPIDense   *Bmat = (Mat_MPIDense*)B->data;
46 
47   PetscFunctionBegin;
48   PetscCall(MatCopy(Amat->A,Bmat->A,s));
49   PetscFunctionReturn(0);
50 }
51 
52 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
53 {
54   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
55   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
56 
57   PetscFunctionBegin;
58   PetscCheckFalse(row < rstart || row >= rend,PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
59   lrow = row - rstart;
60   PetscCall(MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v));
61   PetscFunctionReturn(0);
62 }
63 
64 PetscErrorCode MatRestoreRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
65 {
66   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
67   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
68 
69   PetscFunctionBegin;
70   PetscCheckFalse(row < rstart || row >= rend,PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
71   lrow = row - rstart;
72   PetscCall(MatRestoreRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v));
73   PetscFunctionReturn(0);
74 }
75 
76 PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
77 {
78   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
79   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
80   PetscScalar    *array;
81   MPI_Comm       comm;
82   PetscBool      flg;
83   Mat            B;
84 
85   PetscFunctionBegin;
86   PetscCall(MatHasCongruentLayouts(A,&flg));
87   PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
88   PetscCall(PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B));
89   if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */
90 
91     PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A,MATSEQDENSECUDA,&flg));
92     PetscCheck(!flg,PETSC_COMM_SELF,PETSC_ERR_SUP,"Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature",MATSEQDENSECUDA);
93     PetscCall(PetscObjectGetComm((PetscObject)(mdn->A),&comm));
94     PetscCall(MatCreate(comm,&B));
95     PetscCall(MatSetSizes(B,m,m,m,m));
96     PetscCall(MatSetType(B,((PetscObject)mdn->A)->type_name));
97     PetscCall(MatDenseGetArrayRead(mdn->A,(const PetscScalar**)&array));
98     PetscCall(MatSeqDenseSetPreallocation(B,array+m*rstart));
99     PetscCall(MatDenseRestoreArrayRead(mdn->A,(const PetscScalar**)&array));
100     PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
101     PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
102     PetscCall(PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B));
103     *a   = B;
104     PetscCall(MatDestroy(&B));
105   } else *a = B;
106   PetscFunctionReturn(0);
107 }
108 
109 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
110 {
111   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
112   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
113   PetscBool      roworiented = A->roworiented;
114 
115   PetscFunctionBegin;
116   for (i=0; i<m; i++) {
117     if (idxm[i] < 0) continue;
118     PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
119     if (idxm[i] >= rstart && idxm[i] < rend) {
120       row = idxm[i] - rstart;
121       if (roworiented) {
122         PetscCall(MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv));
123       } else {
124         for (j=0; j<n; j++) {
125           if (idxn[j] < 0) continue;
126           PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
127           PetscCall(MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv));
128         }
129       }
130     } else if (!A->donotstash) {
131       mat->assembled = PETSC_FALSE;
132       if (roworiented) {
133         PetscCall(MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE));
134       } else {
135         PetscCall(MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE));
136       }
137     }
138   }
139   PetscFunctionReturn(0);
140 }
141 
142 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
143 {
144   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
145   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
146 
147   PetscFunctionBegin;
148   for (i=0; i<m; i++) {
149     if (idxm[i] < 0) continue; /* negative row */
150     PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
151     if (idxm[i] >= rstart && idxm[i] < rend) {
152       row = idxm[i] - rstart;
153       for (j=0; j<n; j++) {
154         if (idxn[j] < 0) continue; /* negative column */
155         PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
156         PetscCall(MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j));
157       }
158     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
159   }
160   PetscFunctionReturn(0);
161 }
162 
163 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda)
164 {
165   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
166 
167   PetscFunctionBegin;
168   PetscCall(MatDenseGetLDA(a->A,lda));
169   PetscFunctionReturn(0);
170 }
171 
172 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A,PetscInt lda)
173 {
174   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
175   PetscBool      iscuda;
176 
177   PetscFunctionBegin;
178   if (!a->A) {
179     PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
180     PetscCall(PetscLayoutSetUp(A->rmap));
181     PetscCall(PetscLayoutSetUp(A->cmap));
182     PetscCall(MatCreate(PETSC_COMM_SELF,&a->A));
183     PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->A));
184     PetscCall(MatSetSizes(a->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N));
185     PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda));
186     PetscCall(MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE));
187   }
188   PetscCall(MatDenseSetLDA(a->A,lda));
189   PetscFunctionReturn(0);
190 }
191 
192 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar **array)
193 {
194   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
195 
196   PetscFunctionBegin;
197   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
198   PetscCall(MatDenseGetArray(a->A,array));
199   PetscFunctionReturn(0);
200 }
201 
202 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar **array)
203 {
204   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
205 
206   PetscFunctionBegin;
207   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
208   PetscCall(MatDenseGetArrayRead(a->A,array));
209   PetscFunctionReturn(0);
210 }
211 
212 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A,PetscScalar **array)
213 {
214   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
215 
216   PetscFunctionBegin;
217   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
218   PetscCall(MatDenseGetArrayWrite(a->A,array));
219   PetscFunctionReturn(0);
220 }
221 
222 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar *array)
223 {
224   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
225 
226   PetscFunctionBegin;
227   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
228   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
229   PetscCall(MatDensePlaceArray(a->A,array));
230   PetscFunctionReturn(0);
231 }
232 
233 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
234 {
235   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
236 
237   PetscFunctionBegin;
238   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
239   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
240   PetscCall(MatDenseResetArray(a->A));
241   PetscFunctionReturn(0);
242 }
243 
244 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A,const PetscScalar *array)
245 {
246   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
247 
248   PetscFunctionBegin;
249   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
250   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
251   PetscCall(MatDenseReplaceArray(a->A,array));
252   PetscFunctionReturn(0);
253 }
254 
255 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
256 {
257   Mat_MPIDense      *mat  = (Mat_MPIDense*)A->data,*newmatd;
258   PetscInt          lda,i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
259   const PetscInt    *irow,*icol;
260   const PetscScalar *v;
261   PetscScalar       *bv;
262   Mat               newmat;
263   IS                iscol_local;
264   MPI_Comm          comm_is,comm_mat;
265 
266   PetscFunctionBegin;
267   PetscCall(PetscObjectGetComm((PetscObject)A,&comm_mat));
268   PetscCall(PetscObjectGetComm((PetscObject)iscol,&comm_is));
269   PetscCheck(comm_mat == comm_is,PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator");
270 
271   PetscCall(ISAllGather(iscol,&iscol_local));
272   PetscCall(ISGetIndices(isrow,&irow));
273   PetscCall(ISGetIndices(iscol_local,&icol));
274   PetscCall(ISGetLocalSize(isrow,&nrows));
275   PetscCall(ISGetLocalSize(iscol,&ncols));
276   PetscCall(ISGetSize(iscol,&Ncols)); /* global number of columns, size of iscol_local */
277 
278   /* No parallel redistribution currently supported! Should really check each index set
279      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
280      original matrix! */
281 
282   PetscCall(MatGetLocalSize(A,&nlrows,&nlcols));
283   PetscCall(MatGetOwnershipRange(A,&rstart,&rend));
284 
285   /* Check submatrix call */
286   if (scall == MAT_REUSE_MATRIX) {
287     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
288     /* Really need to test rows and column sizes! */
289     newmat = *B;
290   } else {
291     /* Create and fill new matrix */
292     PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&newmat));
293     PetscCall(MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols));
294     PetscCall(MatSetType(newmat,((PetscObject)A)->type_name));
295     PetscCall(MatMPIDenseSetPreallocation(newmat,NULL));
296   }
297 
298   /* Now extract the data pointers and do the copy, column at a time */
299   newmatd = (Mat_MPIDense*)newmat->data;
300   PetscCall(MatDenseGetArray(newmatd->A,&bv));
301   PetscCall(MatDenseGetArrayRead(mat->A,&v));
302   PetscCall(MatDenseGetLDA(mat->A,&lda));
303   for (i=0; i<Ncols; i++) {
304     const PetscScalar *av = v + lda*icol[i];
305     for (j=0; j<nrows; j++) {
306       *bv++ = av[irow[j] - rstart];
307     }
308   }
309   PetscCall(MatDenseRestoreArrayRead(mat->A,&v));
310   PetscCall(MatDenseRestoreArray(newmatd->A,&bv));
311 
312   /* Assemble the matrices so that the correct flags are set */
313   PetscCall(MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY));
314   PetscCall(MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY));
315 
316   /* Free work space */
317   PetscCall(ISRestoreIndices(isrow,&irow));
318   PetscCall(ISRestoreIndices(iscol_local,&icol));
319   PetscCall(ISDestroy(&iscol_local));
320   *B   = newmat;
321   PetscFunctionReturn(0);
322 }
323 
324 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar **array)
325 {
326   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
327 
328   PetscFunctionBegin;
329   PetscCall(MatDenseRestoreArray(a->A,array));
330   PetscFunctionReturn(0);
331 }
332 
333 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar **array)
334 {
335   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
336 
337   PetscFunctionBegin;
338   PetscCall(MatDenseRestoreArrayRead(a->A,array));
339   PetscFunctionReturn(0);
340 }
341 
342 PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A,PetscScalar **array)
343 {
344   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
345 
346   PetscFunctionBegin;
347   PetscCall(MatDenseRestoreArrayWrite(a->A,array));
348   PetscFunctionReturn(0);
349 }
350 
351 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
352 {
353   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
354   PetscInt       nstash,reallocs;
355 
356   PetscFunctionBegin;
357   if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
358 
359   PetscCall(MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range));
360   PetscCall(MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs));
361   PetscCall(PetscInfo(mdn->A,"Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs));
362   PetscFunctionReturn(0);
363 }
364 
365 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
366 {
367   Mat_MPIDense   *mdn=(Mat_MPIDense*)mat->data;
368   PetscInt       i,*row,*col,flg,j,rstart,ncols;
369   PetscMPIInt    n;
370   PetscScalar    *val;
371 
372   PetscFunctionBegin;
373   if (!mdn->donotstash && !mat->nooffprocentries) {
374     /*  wait on receives */
375     while (1) {
376       PetscCall(MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg));
377       if (!flg) break;
378 
379       for (i=0; i<n;) {
380         /* Now identify the consecutive vals belonging to the same row */
381         for (j=i,rstart=row[j]; j<n; j++) {
382           if (row[j] != rstart) break;
383         }
384         if (j < n) ncols = j-i;
385         else       ncols = n-i;
386         /* Now assemble all these values with a single function call */
387         PetscCall(MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode));
388         i    = j;
389       }
390     }
391     PetscCall(MatStashScatterEnd_Private(&mat->stash));
392   }
393 
394   PetscCall(MatAssemblyBegin(mdn->A,mode));
395   PetscCall(MatAssemblyEnd(mdn->A,mode));
396   PetscFunctionReturn(0);
397 }
398 
399 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
400 {
401   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
402 
403   PetscFunctionBegin;
404   PetscCall(MatZeroEntries(l->A));
405   PetscFunctionReturn(0);
406 }
407 
408 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt n,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
409 {
410   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
411   PetscInt          i,len,*lrows;
412 
413   PetscFunctionBegin;
414   /* get locally owned rows */
415   PetscCall(PetscLayoutMapLocal(A->rmap,n,rows,&len,&lrows,NULL));
416   /* fix right hand side if needed */
417   if (x && b) {
418     const PetscScalar *xx;
419     PetscScalar       *bb;
420 
421     PetscCall(VecGetArrayRead(x, &xx));
422     PetscCall(VecGetArrayWrite(b, &bb));
423     for (i=0;i<len;++i) bb[lrows[i]] = diag*xx[lrows[i]];
424     PetscCall(VecRestoreArrayRead(x, &xx));
425     PetscCall(VecRestoreArrayWrite(b, &bb));
426   }
427   PetscCall(MatZeroRows(l->A,len,lrows,0.0,NULL,NULL));
428   if (diag != 0.0) {
429     Vec d;
430 
431     PetscCall(MatCreateVecs(A,NULL,&d));
432     PetscCall(VecSet(d,diag));
433     PetscCall(MatDiagonalSet(A,d,INSERT_VALUES));
434     PetscCall(VecDestroy(&d));
435   }
436   PetscCall(PetscFree(lrows));
437   PetscFunctionReturn(0);
438 }
439 
440 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec);
441 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec);
442 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec);
443 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec);
444 
445 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
446 {
447   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
448   const PetscScalar *ax;
449   PetscScalar       *ay;
450   PetscMemType      axmtype,aymtype;
451 
452   PetscFunctionBegin;
453   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
454   PetscCall(VecGetArrayReadAndMemType(xx,&ax,&axmtype));
455   PetscCall(VecGetArrayAndMemType(mdn->lvec,&ay,&aymtype));
456   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPI_REPLACE));
457   PetscCall(PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay,MPI_REPLACE));
458   PetscCall(VecRestoreArrayAndMemType(mdn->lvec,&ay));
459   PetscCall(VecRestoreArrayReadAndMemType(xx,&ax));
460   PetscCall((*mdn->A->ops->mult)(mdn->A,mdn->lvec,yy));
461   PetscFunctionReturn(0);
462 }
463 
464 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
465 {
466   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
467   const PetscScalar *ax;
468   PetscScalar       *ay;
469   PetscMemType      axmtype,aymtype;
470 
471   PetscFunctionBegin;
472   if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat));
473   PetscCall(VecGetArrayReadAndMemType(xx,&ax,&axmtype));
474   PetscCall(VecGetArrayAndMemType(mdn->lvec,&ay,&aymtype));
475   PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPI_REPLACE));
476   PetscCall(PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay,MPI_REPLACE));
477   PetscCall(VecRestoreArrayAndMemType(mdn->lvec,&ay));
478   PetscCall(VecRestoreArrayReadAndMemType(xx,&ax));
479   PetscCall((*mdn->A->ops->multadd)(mdn->A,mdn->lvec,yy,zz));
480   PetscFunctionReturn(0);
481 }
482 
483 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
484 {
485   Mat_MPIDense      *a = (Mat_MPIDense*)A->data;
486   const PetscScalar *ax;
487   PetscScalar       *ay;
488   PetscMemType      axmtype,aymtype;
489 
490   PetscFunctionBegin;
491   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
492   PetscCall(VecSet(yy,0.0));
493   PetscCall((*a->A->ops->multtranspose)(a->A,xx,a->lvec));
494   PetscCall(VecGetArrayReadAndMemType(a->lvec,&ax,&axmtype));
495   PetscCall(VecGetArrayAndMemType(yy,&ay,&aymtype));
496   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPIU_SUM));
497   PetscCall(PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM));
498   PetscCall(VecRestoreArrayReadAndMemType(a->lvec,&ax));
499   PetscCall(VecRestoreArrayAndMemType(yy,&ay));
500   PetscFunctionReturn(0);
501 }
502 
503 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
504 {
505   Mat_MPIDense      *a = (Mat_MPIDense*)A->data;
506   const PetscScalar *ax;
507   PetscScalar       *ay;
508   PetscMemType      axmtype,aymtype;
509 
510   PetscFunctionBegin;
511   if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
512   PetscCall(VecCopy(yy,zz));
513   PetscCall((*a->A->ops->multtranspose)(a->A,xx,a->lvec));
514   PetscCall(VecGetArrayReadAndMemType(a->lvec,&ax,&axmtype));
515   PetscCall(VecGetArrayAndMemType(zz,&ay,&aymtype));
516   PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPIU_SUM));
517   PetscCall(PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM));
518   PetscCall(VecRestoreArrayReadAndMemType(a->lvec,&ax));
519   PetscCall(VecRestoreArrayAndMemType(zz,&ay));
520   PetscFunctionReturn(0);
521 }
522 
523 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
524 {
525   Mat_MPIDense      *a    = (Mat_MPIDense*)A->data;
526   PetscInt          lda,len,i,n,m = A->rmap->n,radd;
527   PetscScalar       *x,zero = 0.0;
528   const PetscScalar *av;
529 
530   PetscFunctionBegin;
531   PetscCall(VecSet(v,zero));
532   PetscCall(VecGetArray(v,&x));
533   PetscCall(VecGetSize(v,&n));
534   PetscCheck(n == A->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
535   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
536   radd = A->rmap->rstart*m;
537   PetscCall(MatDenseGetArrayRead(a->A,&av));
538   PetscCall(MatDenseGetLDA(a->A,&lda));
539   for (i=0; i<len; i++) {
540     x[i] = av[radd + i*lda + i];
541   }
542   PetscCall(MatDenseRestoreArrayRead(a->A,&av));
543   PetscCall(VecRestoreArray(v,&x));
544   PetscFunctionReturn(0);
545 }
546 
547 PetscErrorCode MatDestroy_MPIDense(Mat mat)
548 {
549   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
550 
551   PetscFunctionBegin;
552 #if defined(PETSC_USE_LOG)
553   PetscLogObjectState((PetscObject)mat,"Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT,mat->rmap->N,mat->cmap->N);
554 #endif
555   PetscCall(MatStashDestroy_Private(&mat->stash));
556   PetscCheck(!mdn->vecinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
557   PetscCheck(!mdn->matinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
558   PetscCall(MatDestroy(&mdn->A));
559   PetscCall(VecDestroy(&mdn->lvec));
560   PetscCall(PetscSFDestroy(&mdn->Mvctx));
561   PetscCall(VecDestroy(&mdn->cvec));
562   PetscCall(MatDestroy(&mdn->cmat));
563 
564   PetscCall(PetscFree(mat->data));
565   PetscCall(PetscObjectChangeTypeName((PetscObject)mat,NULL));
566 
567   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL));
568   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",NULL));
569   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL));
570   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL));
571   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL));
572   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL));
573   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",NULL));
574   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",NULL));
575   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL));
576   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL));
577   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",NULL));
578   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",NULL));
579   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpiaij_C",NULL));
580 #if defined(PETSC_HAVE_ELEMENTAL)
581   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL));
582 #endif
583 #if defined(PETSC_HAVE_SCALAPACK)
584   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",NULL));
585 #endif
586   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL));
587   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",NULL));
588   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",NULL));
589 #if defined (PETSC_HAVE_CUDA)
590   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",NULL));
591   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",NULL));
592 #endif
593   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL));
594   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL));
595   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",NULL));
596   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",NULL));
597   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",NULL));
598   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",NULL));
599   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",NULL));
600   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",NULL));
601   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",NULL));
602   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",NULL));
603   PetscFunctionReturn(0);
604 }
605 
606 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat,PetscViewer);
607 
608 #include <petscdraw.h>
609 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
610 {
611   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
612   PetscMPIInt       rank;
613   PetscViewerType   vtype;
614   PetscBool         iascii,isdraw;
615   PetscViewer       sviewer;
616   PetscViewerFormat format;
617 
618   PetscFunctionBegin;
619   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank));
620   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
621   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw));
622   if (iascii) {
623     PetscCall(PetscViewerGetType(viewer,&vtype));
624     PetscCall(PetscViewerGetFormat(viewer,&format));
625     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
626       MatInfo info;
627       PetscCall(MatGetInfo(mat,MAT_LOCAL,&info));
628       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
629       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory));
630       PetscCall(PetscViewerFlush(viewer));
631       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
632       if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx,viewer));
633       PetscFunctionReturn(0);
634     } else if (format == PETSC_VIEWER_ASCII_INFO) {
635       PetscFunctionReturn(0);
636     }
637   } else if (isdraw) {
638     PetscDraw draw;
639     PetscBool isnull;
640 
641     PetscCall(PetscViewerDrawGetDraw(viewer,0,&draw));
642     PetscCall(PetscDrawIsNull(draw,&isnull));
643     if (isnull) PetscFunctionReturn(0);
644   }
645 
646   {
647     /* assemble the entire matrix onto first processor. */
648     Mat         A;
649     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
650     PetscInt    *cols;
651     PetscScalar *vals;
652 
653     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat),&A));
654     if (rank == 0) {
655       PetscCall(MatSetSizes(A,M,N,M,N));
656     } else {
657       PetscCall(MatSetSizes(A,0,0,M,N));
658     }
659     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
660     PetscCall(MatSetType(A,MATMPIDENSE));
661     PetscCall(MatMPIDenseSetPreallocation(A,NULL));
662     PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)A));
663 
664     /* Copy the matrix ... This isn't the most efficient means,
665        but it's quick for now */
666     A->insertmode = INSERT_VALUES;
667 
668     row = mat->rmap->rstart;
669     m   = mdn->A->rmap->n;
670     for (i=0; i<m; i++) {
671       PetscCall(MatGetRow_MPIDense(mat,row,&nz,&cols,&vals));
672       PetscCall(MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES));
673       PetscCall(MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals));
674       row++;
675     }
676 
677     PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY));
678     PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY));
679     PetscCall(PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer));
680     if (rank == 0) {
681       PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name));
682       PetscCall(MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer));
683     }
684     PetscCall(PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer));
685     PetscCall(PetscViewerFlush(viewer));
686     PetscCall(MatDestroy(&A));
687   }
688   PetscFunctionReturn(0);
689 }
690 
691 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
692 {
693   PetscBool      iascii,isbinary,isdraw,issocket;
694 
695   PetscFunctionBegin;
696   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
697   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
698   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket));
699   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw));
700 
701   if (iascii || issocket || isdraw) {
702     PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat,viewer));
703   } else if (isbinary) {
704     PetscCall(MatView_Dense_Binary(mat,viewer));
705   }
706   PetscFunctionReturn(0);
707 }
708 
709 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
710 {
711   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
712   Mat            mdn  = mat->A;
713   PetscLogDouble isend[5],irecv[5];
714 
715   PetscFunctionBegin;
716   info->block_size = 1.0;
717 
718   PetscCall(MatGetInfo(mdn,MAT_LOCAL,info));
719 
720   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
721   isend[3] = info->memory;  isend[4] = info->mallocs;
722   if (flag == MAT_LOCAL) {
723     info->nz_used      = isend[0];
724     info->nz_allocated = isend[1];
725     info->nz_unneeded  = isend[2];
726     info->memory       = isend[3];
727     info->mallocs      = isend[4];
728   } else if (flag == MAT_GLOBAL_MAX) {
729     PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)A)));
730 
731     info->nz_used      = irecv[0];
732     info->nz_allocated = irecv[1];
733     info->nz_unneeded  = irecv[2];
734     info->memory       = irecv[3];
735     info->mallocs      = irecv[4];
736   } else if (flag == MAT_GLOBAL_SUM) {
737     PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)A)));
738 
739     info->nz_used      = irecv[0];
740     info->nz_allocated = irecv[1];
741     info->nz_unneeded  = irecv[2];
742     info->memory       = irecv[3];
743     info->mallocs      = irecv[4];
744   }
745   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
746   info->fill_ratio_needed = 0;
747   info->factor_mallocs    = 0;
748   PetscFunctionReturn(0);
749 }
750 
751 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
752 {
753   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
754 
755   PetscFunctionBegin;
756   switch (op) {
757   case MAT_NEW_NONZERO_LOCATIONS:
758   case MAT_NEW_NONZERO_LOCATION_ERR:
759   case MAT_NEW_NONZERO_ALLOCATION_ERR:
760     MatCheckPreallocated(A,1);
761     PetscCall(MatSetOption(a->A,op,flg));
762     break;
763   case MAT_ROW_ORIENTED:
764     MatCheckPreallocated(A,1);
765     a->roworiented = flg;
766     PetscCall(MatSetOption(a->A,op,flg));
767     break;
768   case MAT_FORCE_DIAGONAL_ENTRIES:
769   case MAT_KEEP_NONZERO_PATTERN:
770   case MAT_USE_HASH_TABLE:
771   case MAT_SORTED_FULL:
772     PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op]));
773     break;
774   case MAT_IGNORE_OFF_PROC_ENTRIES:
775     a->donotstash = flg;
776     break;
777   case MAT_SYMMETRIC:
778   case MAT_STRUCTURALLY_SYMMETRIC:
779   case MAT_HERMITIAN:
780   case MAT_SYMMETRY_ETERNAL:
781   case MAT_IGNORE_LOWER_TRIANGULAR:
782   case MAT_IGNORE_ZERO_ENTRIES:
783     PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op]));
784     break;
785   default:
786     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
787   }
788   PetscFunctionReturn(0);
789 }
790 
791 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
792 {
793   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
794   const PetscScalar *l;
795   PetscScalar       x,*v,*vv,*r;
796   PetscInt          i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n,lda;
797 
798   PetscFunctionBegin;
799   PetscCall(MatDenseGetArray(mdn->A,&vv));
800   PetscCall(MatDenseGetLDA(mdn->A,&lda));
801   PetscCall(MatGetLocalSize(A,&s2,&s3));
802   if (ll) {
803     PetscCall(VecGetLocalSize(ll,&s2a));
804     PetscCheck(s2a == s2,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2);
805     PetscCall(VecGetArrayRead(ll,&l));
806     for (i=0; i<m; i++) {
807       x = l[i];
808       v = vv + i;
809       for (j=0; j<n; j++) { (*v) *= x; v+= lda;}
810     }
811     PetscCall(VecRestoreArrayRead(ll,&l));
812     PetscCall(PetscLogFlops(1.0*n*m));
813   }
814   if (rr) {
815     const PetscScalar *ar;
816 
817     PetscCall(VecGetLocalSize(rr,&s3a));
818     PetscCheck(s3a == s3,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3);
819     PetscCall(VecGetArrayRead(rr,&ar));
820     if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A));
821     PetscCall(VecGetArray(mdn->lvec,&r));
822     PetscCall(PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ar,r,MPI_REPLACE));
823     PetscCall(PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ar,r,MPI_REPLACE));
824     PetscCall(VecRestoreArrayRead(rr,&ar));
825     for (i=0; i<n; i++) {
826       x = r[i];
827       v = vv + i*lda;
828       for (j=0; j<m; j++) (*v++) *= x;
829     }
830     PetscCall(VecRestoreArray(mdn->lvec,&r));
831     PetscCall(PetscLogFlops(1.0*n*m));
832   }
833   PetscCall(MatDenseRestoreArray(mdn->A,&vv));
834   PetscFunctionReturn(0);
835 }
836 
837 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
838 {
839   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
840   PetscInt          i,j;
841   PetscMPIInt       size;
842   PetscReal         sum = 0.0;
843   const PetscScalar *av,*v;
844 
845   PetscFunctionBegin;
846   PetscCall(MatDenseGetArrayRead(mdn->A,&av));
847   v    = av;
848   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
849   if (size == 1) {
850     PetscCall(MatNorm(mdn->A,type,nrm));
851   } else {
852     if (type == NORM_FROBENIUS) {
853       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
854         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
855       }
856       PetscCall(MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A)));
857       *nrm = PetscSqrtReal(*nrm);
858       PetscCall(PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n));
859     } else if (type == NORM_1) {
860       PetscReal *tmp,*tmp2;
861       PetscCall(PetscCalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2));
862       *nrm = 0.0;
863       v    = av;
864       for (j=0; j<mdn->A->cmap->n; j++) {
865         for (i=0; i<mdn->A->rmap->n; i++) {
866           tmp[j] += PetscAbsScalar(*v);  v++;
867         }
868       }
869       PetscCall(MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A)));
870       for (j=0; j<A->cmap->N; j++) {
871         if (tmp2[j] > *nrm) *nrm = tmp2[j];
872       }
873       PetscCall(PetscFree2(tmp,tmp2));
874       PetscCall(PetscLogFlops(A->cmap->n*A->rmap->n));
875     } else if (type == NORM_INFINITY) { /* max row norm */
876       PetscReal ntemp;
877       PetscCall(MatNorm(mdn->A,type,&ntemp));
878       PetscCall(MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A)));
879     } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
880   }
881   PetscCall(MatDenseRestoreArrayRead(mdn->A,&av));
882   PetscFunctionReturn(0);
883 }
884 
885 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
886 {
887   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
888   Mat            B;
889   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
890   PetscInt       j,i,lda;
891   PetscScalar    *v;
892 
893   PetscFunctionBegin;
894   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
895     PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B));
896     PetscCall(MatSetSizes(B,A->cmap->n,A->rmap->n,N,M));
897     PetscCall(MatSetType(B,((PetscObject)A)->type_name));
898     PetscCall(MatMPIDenseSetPreallocation(B,NULL));
899   } else B = *matout;
900 
901   m    = a->A->rmap->n; n = a->A->cmap->n;
902   PetscCall(MatDenseGetArrayRead(a->A,(const PetscScalar**)&v));
903   PetscCall(MatDenseGetLDA(a->A,&lda));
904   PetscCall(PetscMalloc1(m,&rwork));
905   for (i=0; i<m; i++) rwork[i] = rstart + i;
906   for (j=0; j<n; j++) {
907     PetscCall(MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES));
908     v   += lda;
909   }
910   PetscCall(MatDenseRestoreArrayRead(a->A,(const PetscScalar**)&v));
911   PetscCall(PetscFree(rwork));
912   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
913   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
914   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
915     *matout = B;
916   } else {
917     PetscCall(MatHeaderMerge(A,&B));
918   }
919   PetscFunctionReturn(0);
920 }
921 
922 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
923 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
924 
925 PetscErrorCode MatSetUp_MPIDense(Mat A)
926 {
927   PetscFunctionBegin;
928   PetscCall(PetscLayoutSetUp(A->rmap));
929   PetscCall(PetscLayoutSetUp(A->cmap));
930   if (!A->preallocated) {
931     PetscCall(MatMPIDenseSetPreallocation(A,NULL));
932   }
933   PetscFunctionReturn(0);
934 }
935 
936 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
937 {
938   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
939 
940   PetscFunctionBegin;
941   PetscCall(MatAXPY(A->A,alpha,B->A,str));
942   PetscFunctionReturn(0);
943 }
944 
945 PetscErrorCode MatConjugate_MPIDense(Mat mat)
946 {
947   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
948 
949   PetscFunctionBegin;
950   PetscCall(MatConjugate(a->A));
951   PetscFunctionReturn(0);
952 }
953 
954 PetscErrorCode MatRealPart_MPIDense(Mat A)
955 {
956   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
957 
958   PetscFunctionBegin;
959   PetscCall(MatRealPart(a->A));
960   PetscFunctionReturn(0);
961 }
962 
963 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
964 {
965   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
966 
967   PetscFunctionBegin;
968   PetscCall(MatImaginaryPart(a->A));
969   PetscFunctionReturn(0);
970 }
971 
972 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col)
973 {
974   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
975 
976   PetscFunctionBegin;
977   PetscCheck(a->A,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing local matrix");
978   PetscCheck(a->A->ops->getcolumnvector,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing get column operation");
979   PetscCall((*a->A->ops->getcolumnvector)(a->A,v,col));
980   PetscFunctionReturn(0);
981 }
982 
983 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat,PetscInt,PetscReal*);
984 
985 PetscErrorCode MatGetColumnReductions_MPIDense(Mat A,PetscInt type,PetscReal *reductions)
986 {
987   PetscInt       i,m,n;
988   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
989   PetscReal      *work;
990 
991   PetscFunctionBegin;
992   PetscCall(MatGetSize(A,&m,&n));
993   PetscCall(PetscMalloc1(n,&work));
994   if (type == REDUCTION_MEAN_REALPART) {
995     PetscCall(MatGetColumnReductions_SeqDense(a->A,(PetscInt)REDUCTION_SUM_REALPART,work));
996   } else if (type == REDUCTION_MEAN_IMAGINARYPART) {
997     PetscCall(MatGetColumnReductions_SeqDense(a->A,(PetscInt)REDUCTION_SUM_IMAGINARYPART,work));
998   } else {
999     PetscCall(MatGetColumnReductions_SeqDense(a->A,type,work));
1000   }
1001   if (type == NORM_2) {
1002     for (i=0; i<n; i++) work[i] *= work[i];
1003   }
1004   if (type == NORM_INFINITY) {
1005     PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_MAX,A->hdr.comm));
1006   } else {
1007     PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_SUM,A->hdr.comm));
1008   }
1009   PetscCall(PetscFree(work));
1010   if (type == NORM_2) {
1011     for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
1012   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
1013     for (i=0; i<n; i++) reductions[i] /= m;
1014   }
1015   PetscFunctionReturn(0);
1016 }
1017 
1018 #if defined(PETSC_HAVE_CUDA)
1019 static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1020 {
1021   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1022   PetscInt       lda;
1023 
1024   PetscFunctionBegin;
1025   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1026   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1027   if (!a->cvec) {
1028     PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec));
1029     PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec));
1030   }
1031   a->vecinuse = col + 1;
1032   PetscCall(MatDenseGetLDA(a->A,&lda));
1033   PetscCall(MatDenseCUDAGetArray(a->A,(PetscScalar**)&a->ptrinuse));
1034   PetscCall(VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda));
1035   *v   = a->cvec;
1036   PetscFunctionReturn(0);
1037 }
1038 
1039 static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1040 {
1041   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1042 
1043   PetscFunctionBegin;
1044   PetscCheck(a->vecinuse,PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1045   PetscCheck(a->cvec,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1046   a->vecinuse = 0;
1047   PetscCall(MatDenseCUDARestoreArray(a->A,(PetscScalar**)&a->ptrinuse));
1048   PetscCall(VecCUDAResetArray(a->cvec));
1049   *v   = NULL;
1050   PetscFunctionReturn(0);
1051 }
1052 
1053 static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1054 {
1055   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1056   PetscInt       lda;
1057 
1058   PetscFunctionBegin;
1059   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1060   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1061   if (!a->cvec) {
1062     PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec));
1063     PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec));
1064   }
1065   a->vecinuse = col + 1;
1066   PetscCall(MatDenseGetLDA(a->A,&lda));
1067   PetscCall(MatDenseCUDAGetArrayRead(a->A,&a->ptrinuse));
1068   PetscCall(VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda));
1069   PetscCall(VecLockReadPush(a->cvec));
1070   *v   = a->cvec;
1071   PetscFunctionReturn(0);
1072 }
1073 
1074 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1075 {
1076   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1077 
1078   PetscFunctionBegin;
1079   PetscCheck(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1080   PetscCheck(a->cvec,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1081   a->vecinuse = 0;
1082   PetscCall(MatDenseCUDARestoreArrayRead(a->A,&a->ptrinuse));
1083   PetscCall(VecLockReadPop(a->cvec));
1084   PetscCall(VecCUDAResetArray(a->cvec));
1085   *v   = NULL;
1086   PetscFunctionReturn(0);
1087 }
1088 
1089 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1090 {
1091   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1092   PetscInt       lda;
1093 
1094   PetscFunctionBegin;
1095   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1096   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1097   if (!a->cvec) {
1098     PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec));
1099     PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec));
1100   }
1101   a->vecinuse = col + 1;
1102   PetscCall(MatDenseGetLDA(a->A,&lda));
1103   PetscCall(MatDenseCUDAGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse));
1104   PetscCall(VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda));
1105   *v   = a->cvec;
1106   PetscFunctionReturn(0);
1107 }
1108 
1109 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1110 {
1111   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1112 
1113   PetscFunctionBegin;
1114   PetscCheck(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1115   PetscCheck(a->cvec,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1116   a->vecinuse = 0;
1117   PetscCall(MatDenseCUDARestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse));
1118   PetscCall(VecCUDAResetArray(a->cvec));
1119   *v   = NULL;
1120   PetscFunctionReturn(0);
1121 }
1122 
1123 static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1124 {
1125   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1126 
1127   PetscFunctionBegin;
1128   PetscCheck(!l->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1129   PetscCheck(!l->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1130   PetscCall(MatDenseCUDAPlaceArray(l->A,a));
1131   PetscFunctionReturn(0);
1132 }
1133 
1134 static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A)
1135 {
1136   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1137 
1138   PetscFunctionBegin;
1139   PetscCheck(!l->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1140   PetscCheck(!l->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1141   PetscCall(MatDenseCUDAResetArray(l->A));
1142   PetscFunctionReturn(0);
1143 }
1144 
1145 static PetscErrorCode MatDenseCUDAReplaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1146 {
1147   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1148 
1149   PetscFunctionBegin;
1150   PetscCheck(!l->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1151   PetscCheck(!l->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1152   PetscCall(MatDenseCUDAReplaceArray(l->A,a));
1153   PetscFunctionReturn(0);
1154 }
1155 
1156 static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1157 {
1158   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1159 
1160   PetscFunctionBegin;
1161   PetscCall(MatDenseCUDAGetArrayWrite(l->A,a));
1162   PetscFunctionReturn(0);
1163 }
1164 
1165 static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1166 {
1167   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1168 
1169   PetscFunctionBegin;
1170   PetscCall(MatDenseCUDARestoreArrayWrite(l->A,a));
1171   PetscFunctionReturn(0);
1172 }
1173 
1174 static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1175 {
1176   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1177 
1178   PetscFunctionBegin;
1179   PetscCall(MatDenseCUDAGetArrayRead(l->A,a));
1180   PetscFunctionReturn(0);
1181 }
1182 
1183 static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1184 {
1185   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1186 
1187   PetscFunctionBegin;
1188   PetscCall(MatDenseCUDARestoreArrayRead(l->A,a));
1189   PetscFunctionReturn(0);
1190 }
1191 
1192 static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1193 {
1194   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1195 
1196   PetscFunctionBegin;
1197   PetscCall(MatDenseCUDAGetArray(l->A,a));
1198   PetscFunctionReturn(0);
1199 }
1200 
1201 static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1202 {
1203   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1204 
1205   PetscFunctionBegin;
1206   PetscCall(MatDenseCUDARestoreArray(l->A,a));
1207   PetscFunctionReturn(0);
1208 }
1209 
1210 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat,PetscInt,Vec*);
1211 static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat,PetscInt,Vec*);
1212 static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat,PetscInt,Vec*);
1213 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat,PetscInt,Vec*);
1214 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat,PetscInt,Vec*);
1215 static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat,PetscInt,Vec*);
1216 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat,Mat*);
1217 
1218 static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat,PetscBool bind)
1219 {
1220   Mat_MPIDense   *d = (Mat_MPIDense*)mat->data;
1221 
1222   PetscFunctionBegin;
1223   PetscCheck(!d->vecinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1224   PetscCheck(!d->matinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1225   if (d->A) {
1226     PetscCall(MatBindToCPU(d->A,bind));
1227   }
1228   mat->boundtocpu = bind;
1229   if (!bind) {
1230     PetscBool iscuda;
1231 
1232     PetscCall(PetscObjectTypeCompare((PetscObject)d->cvec,VECMPICUDA,&iscuda));
1233     if (!iscuda) {
1234       PetscCall(VecDestroy(&d->cvec));
1235     }
1236     PetscCall(PetscObjectTypeCompare((PetscObject)d->cmat,MATMPIDENSECUDA,&iscuda));
1237     if (!iscuda) {
1238       PetscCall(MatDestroy(&d->cmat));
1239     }
1240     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDenseCUDA));
1241     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDenseCUDA));
1242     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDenseCUDA));
1243     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDenseCUDA));
1244     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDenseCUDA));
1245     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDenseCUDA));
1246   } else {
1247     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense));
1248     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense));
1249     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense));
1250     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense));
1251     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense));
1252     PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense));
1253   }
1254   if (d->cmat) {
1255     PetscCall(MatBindToCPU(d->cmat,bind));
1256   }
1257   PetscFunctionReturn(0);
1258 }
1259 
1260 PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data)
1261 {
1262   Mat_MPIDense   *d = (Mat_MPIDense*)A->data;
1263   PetscBool      iscuda;
1264 
1265   PetscFunctionBegin;
1266   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1267   PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda));
1268   if (!iscuda) PetscFunctionReturn(0);
1269   PetscCall(PetscLayoutSetUp(A->rmap));
1270   PetscCall(PetscLayoutSetUp(A->cmap));
1271   if (!d->A) {
1272     PetscCall(MatCreate(PETSC_COMM_SELF,&d->A));
1273     PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)d->A));
1274     PetscCall(MatSetSizes(d->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N));
1275   }
1276   PetscCall(MatSetType(d->A,MATSEQDENSECUDA));
1277   PetscCall(MatSeqDenseCUDASetPreallocation(d->A,d_data));
1278   A->preallocated = PETSC_TRUE;
1279   A->assembled = PETSC_TRUE;
1280   PetscFunctionReturn(0);
1281 }
1282 #endif
1283 
1284 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1285 {
1286   Mat_MPIDense   *d = (Mat_MPIDense*)x->data;
1287 
1288   PetscFunctionBegin;
1289   PetscCall(MatSetRandom(d->A,rctx));
1290   PetscFunctionReturn(0);
1291 }
1292 
1293 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool  *missing,PetscInt *d)
1294 {
1295   PetscFunctionBegin;
1296   *missing = PETSC_FALSE;
1297   PetscFunctionReturn(0);
1298 }
1299 
1300 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat);
1301 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);
1302 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat);
1303 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);
1304 static PetscErrorCode MatEqual_MPIDense(Mat,Mat,PetscBool*);
1305 static PetscErrorCode MatLoad_MPIDense(Mat,PetscViewer);
1306 
1307 /* -------------------------------------------------------------------*/
1308 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1309                                         MatGetRow_MPIDense,
1310                                         MatRestoreRow_MPIDense,
1311                                         MatMult_MPIDense,
1312                                 /*  4*/ MatMultAdd_MPIDense,
1313                                         MatMultTranspose_MPIDense,
1314                                         MatMultTransposeAdd_MPIDense,
1315                                         NULL,
1316                                         NULL,
1317                                         NULL,
1318                                 /* 10*/ NULL,
1319                                         NULL,
1320                                         NULL,
1321                                         NULL,
1322                                         MatTranspose_MPIDense,
1323                                 /* 15*/ MatGetInfo_MPIDense,
1324                                         MatEqual_MPIDense,
1325                                         MatGetDiagonal_MPIDense,
1326                                         MatDiagonalScale_MPIDense,
1327                                         MatNorm_MPIDense,
1328                                 /* 20*/ MatAssemblyBegin_MPIDense,
1329                                         MatAssemblyEnd_MPIDense,
1330                                         MatSetOption_MPIDense,
1331                                         MatZeroEntries_MPIDense,
1332                                 /* 24*/ MatZeroRows_MPIDense,
1333                                         NULL,
1334                                         NULL,
1335                                         NULL,
1336                                         NULL,
1337                                 /* 29*/ MatSetUp_MPIDense,
1338                                         NULL,
1339                                         NULL,
1340                                         MatGetDiagonalBlock_MPIDense,
1341                                         NULL,
1342                                 /* 34*/ MatDuplicate_MPIDense,
1343                                         NULL,
1344                                         NULL,
1345                                         NULL,
1346                                         NULL,
1347                                 /* 39*/ MatAXPY_MPIDense,
1348                                         MatCreateSubMatrices_MPIDense,
1349                                         NULL,
1350                                         MatGetValues_MPIDense,
1351                                         MatCopy_MPIDense,
1352                                 /* 44*/ NULL,
1353                                         MatScale_MPIDense,
1354                                         MatShift_Basic,
1355                                         NULL,
1356                                         NULL,
1357                                 /* 49*/ MatSetRandom_MPIDense,
1358                                         NULL,
1359                                         NULL,
1360                                         NULL,
1361                                         NULL,
1362                                 /* 54*/ NULL,
1363                                         NULL,
1364                                         NULL,
1365                                         NULL,
1366                                         NULL,
1367                                 /* 59*/ MatCreateSubMatrix_MPIDense,
1368                                         MatDestroy_MPIDense,
1369                                         MatView_MPIDense,
1370                                         NULL,
1371                                         NULL,
1372                                 /* 64*/ NULL,
1373                                         NULL,
1374                                         NULL,
1375                                         NULL,
1376                                         NULL,
1377                                 /* 69*/ NULL,
1378                                         NULL,
1379                                         NULL,
1380                                         NULL,
1381                                         NULL,
1382                                 /* 74*/ NULL,
1383                                         NULL,
1384                                         NULL,
1385                                         NULL,
1386                                         NULL,
1387                                 /* 79*/ NULL,
1388                                         NULL,
1389                                         NULL,
1390                                         NULL,
1391                                 /* 83*/ MatLoad_MPIDense,
1392                                         NULL,
1393                                         NULL,
1394                                         NULL,
1395                                         NULL,
1396                                         NULL,
1397                                 /* 89*/ NULL,
1398                                         NULL,
1399                                         NULL,
1400                                         NULL,
1401                                         NULL,
1402                                 /* 94*/ NULL,
1403                                         NULL,
1404                                         MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1405                                         MatMatTransposeMultNumeric_MPIDense_MPIDense,
1406                                         NULL,
1407                                 /* 99*/ MatProductSetFromOptions_MPIDense,
1408                                         NULL,
1409                                         NULL,
1410                                         MatConjugate_MPIDense,
1411                                         NULL,
1412                                 /*104*/ NULL,
1413                                         MatRealPart_MPIDense,
1414                                         MatImaginaryPart_MPIDense,
1415                                         NULL,
1416                                         NULL,
1417                                 /*109*/ NULL,
1418                                         NULL,
1419                                         NULL,
1420                                         MatGetColumnVector_MPIDense,
1421                                         MatMissingDiagonal_MPIDense,
1422                                 /*114*/ NULL,
1423                                         NULL,
1424                                         NULL,
1425                                         NULL,
1426                                         NULL,
1427                                 /*119*/ NULL,
1428                                         NULL,
1429                                         NULL,
1430                                         NULL,
1431                                         NULL,
1432                                 /*124*/ NULL,
1433                                         MatGetColumnReductions_MPIDense,
1434                                         NULL,
1435                                         NULL,
1436                                         NULL,
1437                                 /*129*/ NULL,
1438                                         NULL,
1439                                         MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1440                                         MatTransposeMatMultNumeric_MPIDense_MPIDense,
1441                                         NULL,
1442                                 /*134*/ NULL,
1443                                         NULL,
1444                                         NULL,
1445                                         NULL,
1446                                         NULL,
1447                                 /*139*/ NULL,
1448                                         NULL,
1449                                         NULL,
1450                                         NULL,
1451                                         NULL,
1452                                         MatCreateMPIMatConcatenateSeqMat_MPIDense,
1453                                 /*145*/ NULL,
1454                                         NULL,
1455                                         NULL
1456 };
1457 
1458 PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1459 {
1460   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
1461   PetscBool      iscuda;
1462 
1463   PetscFunctionBegin;
1464   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1465   PetscCall(PetscLayoutSetUp(mat->rmap));
1466   PetscCall(PetscLayoutSetUp(mat->cmap));
1467   if (!a->A) {
1468     PetscCall(MatCreate(PETSC_COMM_SELF,&a->A));
1469     PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A));
1470     PetscCall(MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N));
1471   }
1472   PetscCall(PetscObjectTypeCompare((PetscObject)mat,MATMPIDENSECUDA,&iscuda));
1473   PetscCall(MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE));
1474   PetscCall(MatSeqDenseSetPreallocation(a->A,data));
1475   mat->preallocated = PETSC_TRUE;
1476   mat->assembled = PETSC_TRUE;
1477   PetscFunctionReturn(0);
1478 }
1479 
1480 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1481 {
1482   Mat            B,C;
1483 
1484   PetscFunctionBegin;
1485   PetscCall(MatMPIAIJGetLocalMat(A,MAT_INITIAL_MATRIX,&C));
1486   PetscCall(MatConvert_SeqAIJ_SeqDense(C,MATSEQDENSE,MAT_INITIAL_MATRIX,&B));
1487   PetscCall(MatDestroy(&C));
1488   if (reuse == MAT_REUSE_MATRIX) {
1489     C = *newmat;
1490   } else C = NULL;
1491   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A),B,A->cmap->n,!C?MAT_INITIAL_MATRIX:MAT_REUSE_MATRIX,&C));
1492   PetscCall(MatDestroy(&B));
1493   if (reuse == MAT_INPLACE_MATRIX) {
1494     PetscCall(MatHeaderReplace(A,&C));
1495   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1496   PetscFunctionReturn(0);
1497 }
1498 
1499 PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1500 {
1501   Mat            B,C;
1502 
1503   PetscFunctionBegin;
1504   PetscCall(MatDenseGetLocalMatrix(A,&C));
1505   PetscCall(MatConvert_SeqDense_SeqAIJ(C,MATSEQAIJ,MAT_INITIAL_MATRIX,&B));
1506   if (reuse == MAT_REUSE_MATRIX) {
1507     C = *newmat;
1508   } else C = NULL;
1509   PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A),B,A->cmap->n,!C?MAT_INITIAL_MATRIX:MAT_REUSE_MATRIX,&C));
1510   PetscCall(MatDestroy(&B));
1511   if (reuse == MAT_INPLACE_MATRIX) {
1512     PetscCall(MatHeaderReplace(A,&C));
1513   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C;
1514   PetscFunctionReturn(0);
1515 }
1516 
1517 #if defined(PETSC_HAVE_ELEMENTAL)
1518 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1519 {
1520   Mat            mat_elemental;
1521   PetscScalar    *v;
1522   PetscInt       m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols;
1523 
1524   PetscFunctionBegin;
1525   if (reuse == MAT_REUSE_MATRIX) {
1526     mat_elemental = *newmat;
1527     PetscCall(MatZeroEntries(*newmat));
1528   } else {
1529     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental));
1530     PetscCall(MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N));
1531     PetscCall(MatSetType(mat_elemental,MATELEMENTAL));
1532     PetscCall(MatSetUp(mat_elemental));
1533     PetscCall(MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE));
1534   }
1535 
1536   PetscCall(PetscMalloc2(m,&rows,N,&cols));
1537   for (i=0; i<N; i++) cols[i] = i;
1538   for (i=0; i<m; i++) rows[i] = rstart + i;
1539 
1540   /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1541   PetscCall(MatDenseGetArray(A,&v));
1542   PetscCall(MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES));
1543   PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY));
1544   PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY));
1545   PetscCall(MatDenseRestoreArray(A,&v));
1546   PetscCall(PetscFree2(rows,cols));
1547 
1548   if (reuse == MAT_INPLACE_MATRIX) {
1549     PetscCall(MatHeaderReplace(A,&mat_elemental));
1550   } else {
1551     *newmat = mat_elemental;
1552   }
1553   PetscFunctionReturn(0);
1554 }
1555 #endif
1556 
1557 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals)
1558 {
1559   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1560 
1561   PetscFunctionBegin;
1562   PetscCall(MatDenseGetColumn(mat->A,col,vals));
1563   PetscFunctionReturn(0);
1564 }
1565 
1566 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals)
1567 {
1568   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1569 
1570   PetscFunctionBegin;
1571   PetscCall(MatDenseRestoreColumn(mat->A,vals));
1572   PetscFunctionReturn(0);
1573 }
1574 
1575 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
1576 {
1577   Mat_MPIDense   *mat;
1578   PetscInt       m,nloc,N;
1579 
1580   PetscFunctionBegin;
1581   PetscCall(MatGetSize(inmat,&m,&N));
1582   PetscCall(MatGetLocalSize(inmat,NULL,&nloc));
1583   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1584     PetscInt sum;
1585 
1586     if (n == PETSC_DECIDE) {
1587       PetscCall(PetscSplitOwnership(comm,&n,&N));
1588     }
1589     /* Check sum(n) = N */
1590     PetscCall(MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm));
1591     PetscCheck(sum == N,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT,sum,N);
1592 
1593     PetscCall(MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat));
1594     PetscCall(MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE));
1595   }
1596 
1597   /* numeric phase */
1598   mat = (Mat_MPIDense*)(*outmat)->data;
1599   PetscCall(MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN));
1600   PetscCall(MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY));
1601   PetscCall(MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY));
1602   PetscFunctionReturn(0);
1603 }
1604 
1605 #if defined(PETSC_HAVE_CUDA)
1606 PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M,MatType type,MatReuse reuse,Mat *newmat)
1607 {
1608   Mat            B;
1609   Mat_MPIDense   *m;
1610 
1611   PetscFunctionBegin;
1612   if (reuse == MAT_INITIAL_MATRIX) {
1613     PetscCall(MatDuplicate(M,MAT_COPY_VALUES,newmat));
1614   } else if (reuse == MAT_REUSE_MATRIX) {
1615     PetscCall(MatCopy(M,*newmat,SAME_NONZERO_PATTERN));
1616   }
1617 
1618   B    = *newmat;
1619   PetscCall(MatBindToCPU_MPIDenseCUDA(B,PETSC_TRUE));
1620   PetscCall(PetscFree(B->defaultvectype));
1621   PetscCall(PetscStrallocpy(VECSTANDARD,&B->defaultvectype));
1622   PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSE));
1623   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",NULL));
1624   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",NULL));
1625   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",NULL));
1626   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",NULL));
1627   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",NULL));
1628   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",NULL));
1629   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",NULL));
1630   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",NULL));
1631   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",NULL));
1632   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",NULL));
1633   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",NULL));
1634   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",NULL));
1635   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",NULL));
1636   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",NULL));
1637   m    = (Mat_MPIDense*)(B)->data;
1638   if (m->A) {
1639     PetscCall(MatConvert(m->A,MATSEQDENSE,MAT_INPLACE_MATRIX,&m->A));
1640   }
1641   B->ops->bindtocpu = NULL;
1642   B->offloadmask    = PETSC_OFFLOAD_CPU;
1643   PetscFunctionReturn(0);
1644 }
1645 
1646 PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M,MatType type,MatReuse reuse,Mat *newmat)
1647 {
1648   Mat            B;
1649   Mat_MPIDense   *m;
1650 
1651   PetscFunctionBegin;
1652   if (reuse == MAT_INITIAL_MATRIX) {
1653     PetscCall(MatDuplicate(M,MAT_COPY_VALUES,newmat));
1654   } else if (reuse == MAT_REUSE_MATRIX) {
1655     PetscCall(MatCopy(M,*newmat,SAME_NONZERO_PATTERN));
1656   }
1657 
1658   B    = *newmat;
1659   PetscCall(PetscFree(B->defaultvectype));
1660   PetscCall(PetscStrallocpy(VECCUDA,&B->defaultvectype));
1661   PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSECUDA));
1662   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",                    MatConvert_MPIDenseCUDA_MPIDense));
1663   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",        MatProductSetFromOptions_MPIAIJ_MPIDense));
1664   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",MatProductSetFromOptions_MPIAIJ_MPIDense));
1665   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",        MatProductSetFromOptions_MPIDense_MPIAIJ));
1666   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ));
1667   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",                                MatDenseCUDAGetArray_MPIDenseCUDA));
1668   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",                            MatDenseCUDAGetArrayRead_MPIDenseCUDA));
1669   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",                           MatDenseCUDAGetArrayWrite_MPIDenseCUDA));
1670   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",                            MatDenseCUDARestoreArray_MPIDenseCUDA));
1671   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",                        MatDenseCUDARestoreArrayRead_MPIDenseCUDA));
1672   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",                       MatDenseCUDARestoreArrayWrite_MPIDenseCUDA));
1673   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",                              MatDenseCUDAPlaceArray_MPIDenseCUDA));
1674   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",                              MatDenseCUDAResetArray_MPIDenseCUDA));
1675   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",                            MatDenseCUDAReplaceArray_MPIDenseCUDA));
1676   m    = (Mat_MPIDense*)(B->data);
1677   if (m->A) {
1678     PetscCall(MatConvert(m->A,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&m->A));
1679     B->offloadmask = PETSC_OFFLOAD_BOTH;
1680   } else {
1681     B->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
1682   }
1683   PetscCall(MatBindToCPU_MPIDenseCUDA(B,PETSC_FALSE));
1684 
1685   B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA;
1686   PetscFunctionReturn(0);
1687 }
1688 #endif
1689 
1690 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A,PetscInt col,Vec *v)
1691 {
1692   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1693   PetscInt       lda;
1694 
1695   PetscFunctionBegin;
1696   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1697   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1698   if (!a->cvec) {
1699     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec));
1700   }
1701   a->vecinuse = col + 1;
1702   PetscCall(MatDenseGetLDA(a->A,&lda));
1703   PetscCall(MatDenseGetArray(a->A,(PetscScalar**)&a->ptrinuse));
1704   PetscCall(VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda));
1705   *v   = a->cvec;
1706   PetscFunctionReturn(0);
1707 }
1708 
1709 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A,PetscInt col,Vec *v)
1710 {
1711   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1712 
1713   PetscFunctionBegin;
1714   PetscCheck(a->vecinuse,PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1715   PetscCheck(a->cvec,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1716   a->vecinuse = 0;
1717   PetscCall(MatDenseRestoreArray(a->A,(PetscScalar**)&a->ptrinuse));
1718   PetscCall(VecResetArray(a->cvec));
1719   *v   = NULL;
1720   PetscFunctionReturn(0);
1721 }
1722 
1723 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v)
1724 {
1725   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1726   PetscInt       lda;
1727 
1728   PetscFunctionBegin;
1729   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1730   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1731   if (!a->cvec) {
1732     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec));
1733   }
1734   a->vecinuse = col + 1;
1735   PetscCall(MatDenseGetLDA(a->A,&lda));
1736   PetscCall(MatDenseGetArrayRead(a->A,&a->ptrinuse));
1737   PetscCall(VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda));
1738   PetscCall(VecLockReadPush(a->cvec));
1739   *v   = a->cvec;
1740   PetscFunctionReturn(0);
1741 }
1742 
1743 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v)
1744 {
1745   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1746 
1747   PetscFunctionBegin;
1748   PetscCheck(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1749   PetscCheck(a->cvec,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1750   a->vecinuse = 0;
1751   PetscCall(MatDenseRestoreArrayRead(a->A,&a->ptrinuse));
1752   PetscCall(VecLockReadPop(a->cvec));
1753   PetscCall(VecResetArray(a->cvec));
1754   *v   = NULL;
1755   PetscFunctionReturn(0);
1756 }
1757 
1758 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v)
1759 {
1760   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1761   PetscInt       lda;
1762 
1763   PetscFunctionBegin;
1764   PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1765   PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1766   if (!a->cvec) {
1767     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec));
1768   }
1769   a->vecinuse = col + 1;
1770   PetscCall(MatDenseGetLDA(a->A,&lda));
1771   PetscCall(MatDenseGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse));
1772   PetscCall(VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda));
1773   *v   = a->cvec;
1774   PetscFunctionReturn(0);
1775 }
1776 
1777 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v)
1778 {
1779   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
1780 
1781   PetscFunctionBegin;
1782   PetscCheck(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1783   PetscCheck(a->cvec,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1784   a->vecinuse = 0;
1785   PetscCall(MatDenseRestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse));
1786   PetscCall(VecResetArray(a->cvec));
1787   *v   = NULL;
1788   PetscFunctionReturn(0);
1789 }
1790 
1791 PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A,PetscInt cbegin,PetscInt cend,Mat *v)
1792 {
1793   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
1794   Mat_MPIDense *c;
1795   MPI_Comm     comm;
1796 
1797   PetscFunctionBegin;
1798   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
1799   PetscCheck(!a->vecinuse,comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1800   PetscCheck(!a->matinuse,comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1801   if (!a->cmat) {
1802     PetscCall(MatCreate(comm,&a->cmat));
1803     PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->cmat));
1804     PetscCall(MatSetType(a->cmat,((PetscObject)A)->type_name));
1805     PetscCall(PetscLayoutReference(A->rmap,&a->cmat->rmap));
1806     PetscCall(PetscLayoutSetSize(a->cmat->cmap,cend-cbegin));
1807     PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1808   } else if (cend-cbegin != a->cmat->cmap->N) {
1809     PetscCall(PetscLayoutDestroy(&a->cmat->cmap));
1810     PetscCall(PetscLayoutCreate(comm,&a->cmat->cmap));
1811     PetscCall(PetscLayoutSetSize(a->cmat->cmap,cend-cbegin));
1812     PetscCall(PetscLayoutSetUp(a->cmat->cmap));
1813   }
1814   c = (Mat_MPIDense*)a->cmat->data;
1815   PetscCheck(!c->A,comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1816   PetscCall(MatDenseGetSubMatrix(a->A,cbegin,cend,&c->A));
1817   a->cmat->preallocated = PETSC_TRUE;
1818   a->cmat->assembled = PETSC_TRUE;
1819   a->matinuse = cbegin + 1;
1820   *v = a->cmat;
1821   PetscFunctionReturn(0);
1822 }
1823 
1824 PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A,Mat *v)
1825 {
1826   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
1827   Mat_MPIDense *c;
1828 
1829   PetscFunctionBegin;
1830   PetscCheck(a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetSubMatrix() first");
1831   PetscCheck(a->cmat,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal matrix");
1832   PetscCheck(*v == a->cmat,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Not the matrix obtained from MatDenseGetSubMatrix()");
1833   a->matinuse = 0;
1834   c    = (Mat_MPIDense*)a->cmat->data;
1835   PetscCall(MatDenseRestoreSubMatrix(a->A,&c->A));
1836   *v   = NULL;
1837   PetscFunctionReturn(0);
1838 }
1839 
1840 /*MC
1841    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1842 
1843    Options Database Keys:
1844 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions()
1845 
1846   Level: beginner
1847 
1848 .seealso: MatCreateDense()
1849 
1850 M*/
1851 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1852 {
1853   Mat_MPIDense   *a;
1854 
1855   PetscFunctionBegin;
1856   PetscCall(PetscNewLog(mat,&a));
1857   mat->data = (void*)a;
1858   PetscCall(PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps)));
1859 
1860   mat->insertmode = NOT_SET_VALUES;
1861 
1862   /* build cache for off array entries formed */
1863   a->donotstash = PETSC_FALSE;
1864 
1865   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash));
1866 
1867   /* stuff used for matrix vector multiply */
1868   a->lvec        = NULL;
1869   a->Mvctx       = NULL;
1870   a->roworiented = PETSC_TRUE;
1871 
1872   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense));
1873   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",MatDenseSetLDA_MPIDense));
1874   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense));
1875   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense));
1876   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense));
1877   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense));
1878   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",MatDenseGetArrayWrite_MPIDense));
1879   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",MatDenseRestoreArrayWrite_MPIDense));
1880   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense));
1881   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense));
1882   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",MatDenseReplaceArray_MPIDense));
1883   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense));
1884   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense));
1885   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense));
1886   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense));
1887   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense));
1888   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense));
1889   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",MatDenseGetSubMatrix_MPIDense));
1890   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",MatDenseRestoreSubMatrix_MPIDense));
1891   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",MatConvert_MPIAIJ_MPIDense));
1892   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpiaij_C",MatConvert_MPIDense_MPIAIJ));
1893 #if defined(PETSC_HAVE_ELEMENTAL)
1894   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental));
1895 #endif
1896 #if defined(PETSC_HAVE_SCALAPACK)
1897   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",MatConvert_Dense_ScaLAPACK));
1898 #endif
1899 #if defined(PETSC_HAVE_CUDA)
1900   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpidensecuda_C",MatConvert_MPIDense_MPIDenseCUDA));
1901 #endif
1902   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense));
1903   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense));
1904   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",MatProductSetFromOptions_MPIDense_MPIAIJ));
1905 #if defined(PETSC_HAVE_CUDA)
1906   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense));
1907   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ));
1908 #endif
1909 
1910   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense));
1911   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense));
1912   PetscCall(PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE));
1913   PetscFunctionReturn(0);
1914 }
1915 
1916 /*MC
1917    MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs.
1918 
1919    Options Database Keys:
1920 . -mat_type mpidensecuda - sets the matrix type to "mpidensecuda" during a call to MatSetFromOptions()
1921 
1922   Level: beginner
1923 
1924 .seealso:
1925 
1926 M*/
1927 #if defined(PETSC_HAVE_CUDA)
1928 #include <petsc/private/deviceimpl.h>
1929 PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B)
1930 {
1931   PetscFunctionBegin;
1932   PetscCall(PetscDeviceInitialize(PETSC_DEVICE_CUDA));
1933   PetscCall(MatCreate_MPIDense(B));
1934   PetscCall(MatConvert_MPIDense_MPIDenseCUDA(B,MATMPIDENSECUDA,MAT_INPLACE_MATRIX,&B));
1935   PetscFunctionReturn(0);
1936 }
1937 #endif
1938 
1939 /*MC
1940    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1941 
1942    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1943    and MATMPIDENSE otherwise.
1944 
1945    Options Database Keys:
1946 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1947 
1948   Level: beginner
1949 
1950 .seealso: MATSEQDENSE,MATMPIDENSE,MATDENSECUDA
1951 M*/
1952 
1953 /*MC
1954    MATDENSECUDA - MATDENSECUDA = "densecuda" - A matrix type to be used for dense matrices on GPUs.
1955 
1956    This matrix type is identical to MATSEQDENSECUDA when constructed with a single process communicator,
1957    and MATMPIDENSECUDA otherwise.
1958 
1959    Options Database Keys:
1960 . -mat_type densecuda - sets the matrix type to "densecuda" during a call to MatSetFromOptions()
1961 
1962   Level: beginner
1963 
1964 .seealso: MATSEQDENSECUDA,MATMPIDENSECUDA,MATDENSE
1965 M*/
1966 
1967 /*@C
1968    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1969 
1970    Collective
1971 
1972    Input Parameters:
1973 .  B - the matrix
1974 -  data - optional location of matrix data.  Set data=NULL for PETSc
1975    to control all matrix memory allocation.
1976 
1977    Notes:
1978    The dense format is fully compatible with standard Fortran 77
1979    storage by columns.
1980 
1981    The data input variable is intended primarily for Fortran programmers
1982    who wish to allocate their own matrix memory space.  Most users should
1983    set data=NULL.
1984 
1985    Level: intermediate
1986 
1987 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1988 @*/
1989 PetscErrorCode  MatMPIDenseSetPreallocation(Mat B,PetscScalar *data)
1990 {
1991   PetscFunctionBegin;
1992   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
1993   PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));
1994   PetscFunctionReturn(0);
1995 }
1996 
1997 /*@
1998    MatDensePlaceArray - Allows one to replace the array in a dense matrix with an
1999    array provided by the user. This is useful to avoid copying an array
2000    into a matrix
2001 
2002    Not Collective
2003 
2004    Input Parameters:
2005 +  mat - the matrix
2006 -  array - the array in column major order
2007 
2008    Notes:
2009    You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be
2010    freed when the matrix is destroyed.
2011 
2012    Level: developer
2013 
2014 .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
2015 
2016 @*/
2017 PetscErrorCode  MatDensePlaceArray(Mat mat,const PetscScalar *array)
2018 {
2019   PetscFunctionBegin;
2020   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2021   PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));
2022   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2023 #if defined(PETSC_HAVE_CUDA)
2024   mat->offloadmask = PETSC_OFFLOAD_CPU;
2025 #endif
2026   PetscFunctionReturn(0);
2027 }
2028 
2029 /*@
2030    MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray()
2031 
2032    Not Collective
2033 
2034    Input Parameters:
2035 .  mat - the matrix
2036 
2037    Notes:
2038    You can only call this after a call to MatDensePlaceArray()
2039 
2040    Level: developer
2041 
2042 .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
2043 
2044 @*/
2045 PetscErrorCode  MatDenseResetArray(Mat mat)
2046 {
2047   PetscFunctionBegin;
2048   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2049   PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));
2050   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2051   PetscFunctionReturn(0);
2052 }
2053 
2054 /*@
2055    MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an
2056    array provided by the user. This is useful to avoid copying an array
2057    into a matrix
2058 
2059    Not Collective
2060 
2061    Input Parameters:
2062 +  mat - the matrix
2063 -  array - the array in column major order
2064 
2065    Notes:
2066    The memory passed in MUST be obtained with PetscMalloc() and CANNOT be
2067    freed by the user. It will be freed when the matrix is destroyed.
2068 
2069    Level: developer
2070 
2071 .seealso: MatDenseGetArray(), VecReplaceArray()
2072 @*/
2073 PetscErrorCode  MatDenseReplaceArray(Mat mat,const PetscScalar *array)
2074 {
2075   PetscFunctionBegin;
2076   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2077   PetscUseMethod(mat,"MatDenseReplaceArray_C",(Mat,const PetscScalar*),(mat,array));
2078   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2079 #if defined(PETSC_HAVE_CUDA)
2080   mat->offloadmask = PETSC_OFFLOAD_CPU;
2081 #endif
2082   PetscFunctionReturn(0);
2083 }
2084 
2085 #if defined(PETSC_HAVE_CUDA)
2086 /*@C
2087    MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a dense matrix with an
2088    array provided by the user. This is useful to avoid copying an array
2089    into a matrix
2090 
2091    Not Collective
2092 
2093    Input Parameters:
2094 +  mat - the matrix
2095 -  array - the array in column major order
2096 
2097    Notes:
2098    You can return to the original array with a call to MatDenseCUDAResetArray(). The user is responsible for freeing this array; it will not be
2099    freed when the matrix is destroyed. The array must have been allocated with cudaMalloc().
2100 
2101    Level: developer
2102 
2103 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAResetArray()
2104 @*/
2105 PetscErrorCode  MatDenseCUDAPlaceArray(Mat mat,const PetscScalar *array)
2106 {
2107   PetscFunctionBegin;
2108   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2109   PetscUseMethod(mat,"MatDenseCUDAPlaceArray_C",(Mat,const PetscScalar*),(mat,array));
2110   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2111   mat->offloadmask = PETSC_OFFLOAD_GPU;
2112   PetscFunctionReturn(0);
2113 }
2114 
2115 /*@C
2116    MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to MatDenseCUDAPlaceArray()
2117 
2118    Not Collective
2119 
2120    Input Parameters:
2121 .  mat - the matrix
2122 
2123    Notes:
2124    You can only call this after a call to MatDenseCUDAPlaceArray()
2125 
2126    Level: developer
2127 
2128 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray()
2129 
2130 @*/
2131 PetscErrorCode  MatDenseCUDAResetArray(Mat mat)
2132 {
2133   PetscFunctionBegin;
2134   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2135   PetscUseMethod(mat,"MatDenseCUDAResetArray_C",(Mat),(mat));
2136   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2137   PetscFunctionReturn(0);
2138 }
2139 
2140 /*@C
2141    MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a dense matrix with an
2142    array provided by the user. This is useful to avoid copying an array
2143    into a matrix
2144 
2145    Not Collective
2146 
2147    Input Parameters:
2148 +  mat - the matrix
2149 -  array - the array in column major order
2150 
2151    Notes:
2152    This permanently replaces the GPU array and frees the memory associated with the old GPU array.
2153    The memory passed in CANNOT be freed by the user. It will be freed
2154    when the matrix is destroyed. The array should respect the matrix leading dimension.
2155 
2156    Level: developer
2157 
2158 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray(), MatDenseCUDAResetArray()
2159 @*/
2160 PetscErrorCode  MatDenseCUDAReplaceArray(Mat mat,const PetscScalar *array)
2161 {
2162   PetscFunctionBegin;
2163   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2164   PetscUseMethod(mat,"MatDenseCUDAReplaceArray_C",(Mat,const PetscScalar*),(mat,array));
2165   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
2166   mat->offloadmask = PETSC_OFFLOAD_GPU;
2167   PetscFunctionReturn(0);
2168 }
2169 
2170 /*@C
2171    MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a dense matrix.
2172 
2173    Not Collective
2174 
2175    Input Parameters:
2176 .  A - the matrix
2177 
2178    Output Parameters
2179 .  array - the GPU array in column major order
2180 
2181    Notes:
2182    The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use MatDenseCUDAGetArray(). The array must be restored with MatDenseCUDARestoreArrayWrite() when no longer needed.
2183 
2184    Level: developer
2185 
2186 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArrayRead()
2187 @*/
2188 PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a)
2189 {
2190   PetscFunctionBegin;
2191   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2192   PetscUseMethod(A,"MatDenseCUDAGetArrayWrite_C",(Mat,PetscScalar**),(A,a));
2193   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2194   PetscFunctionReturn(0);
2195 }
2196 
2197 /*@C
2198    MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArrayWrite().
2199 
2200    Not Collective
2201 
2202    Input Parameters:
2203 +  A - the matrix
2204 -  array - the GPU array in column major order
2205 
2206    Notes:
2207 
2208    Level: developer
2209 
2210 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead()
2211 @*/
2212 PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a)
2213 {
2214   PetscFunctionBegin;
2215   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2216   PetscUseMethod(A,"MatDenseCUDARestoreArrayWrite_C",(Mat,PetscScalar**),(A,a));
2217   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2218   A->offloadmask = PETSC_OFFLOAD_GPU;
2219   PetscFunctionReturn(0);
2220 }
2221 
2222 /*@C
2223    MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArrayRead() when no longer needed.
2224 
2225    Not Collective
2226 
2227    Input Parameters:
2228 .  A - the matrix
2229 
2230    Output Parameters
2231 .  array - the GPU array in column major order
2232 
2233    Notes:
2234    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite().
2235 
2236    Level: developer
2237 
2238 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead()
2239 @*/
2240 PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a)
2241 {
2242   PetscFunctionBegin;
2243   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2244   PetscUseMethod(A,"MatDenseCUDAGetArrayRead_C",(Mat,const PetscScalar**),(A,a));
2245   PetscFunctionReturn(0);
2246 }
2247 
2248 /*@C
2249    MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a dense matrix previously obtained with a call to MatDenseCUDAGetArrayRead().
2250 
2251    Not Collective
2252 
2253    Input Parameters:
2254 +  A - the matrix
2255 -  array - the GPU array in column major order
2256 
2257    Notes:
2258    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite().
2259 
2260    Level: developer
2261 
2262 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDAGetArrayRead()
2263 @*/
2264 PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a)
2265 {
2266   PetscFunctionBegin;
2267   PetscUseMethod(A,"MatDenseCUDARestoreArrayRead_C",(Mat,const PetscScalar**),(A,a));
2268   PetscFunctionReturn(0);
2269 }
2270 
2271 /*@C
2272    MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArray() when no longer needed.
2273 
2274    Not Collective
2275 
2276    Input Parameters:
2277 .  A - the matrix
2278 
2279    Output Parameters
2280 .  array - the GPU array in column major order
2281 
2282    Notes:
2283    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). For read-only access, use MatDenseCUDAGetArrayRead().
2284 
2285    Level: developer
2286 
2287 .seealso: MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead()
2288 @*/
2289 PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a)
2290 {
2291   PetscFunctionBegin;
2292   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2293   PetscUseMethod(A,"MatDenseCUDAGetArray_C",(Mat,PetscScalar**),(A,a));
2294   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2295   PetscFunctionReturn(0);
2296 }
2297 
2298 /*@C
2299    MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArray().
2300 
2301    Not Collective
2302 
2303    Input Parameters:
2304 +  A - the matrix
2305 -  array - the GPU array in column major order
2306 
2307    Notes:
2308 
2309    Level: developer
2310 
2311 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead()
2312 @*/
2313 PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a)
2314 {
2315   PetscFunctionBegin;
2316   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2317   PetscUseMethod(A,"MatDenseCUDARestoreArray_C",(Mat,PetscScalar**),(A,a));
2318   PetscCall(PetscObjectStateIncrease((PetscObject)A));
2319   A->offloadmask = PETSC_OFFLOAD_GPU;
2320   PetscFunctionReturn(0);
2321 }
2322 #endif
2323 
2324 /*@C
2325    MatCreateDense - Creates a matrix in dense format.
2326 
2327    Collective
2328 
2329    Input Parameters:
2330 +  comm - MPI communicator
2331 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2332 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2333 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
2334 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
2335 -  data - optional location of matrix data.  Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
2336    to control all matrix memory allocation.
2337 
2338    Output Parameter:
2339 .  A - the matrix
2340 
2341    Notes:
2342    The dense format is fully compatible with standard Fortran 77
2343    storage by columns.
2344    Note that, although local portions of the matrix are stored in column-major
2345    order, the matrix is partitioned across MPI ranks by row.
2346 
2347    The data input variable is intended primarily for Fortran programmers
2348    who wish to allocate their own matrix memory space.  Most users should
2349    set data=NULL (PETSC_NULL_SCALAR for Fortran users).
2350 
2351    The user MUST specify either the local or global matrix dimensions
2352    (possibly both).
2353 
2354    Level: intermediate
2355 
2356 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
2357 @*/
2358 PetscErrorCode  MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
2359 {
2360   PetscFunctionBegin;
2361   PetscCall(MatCreate(comm,A));
2362   PetscCall(MatSetSizes(*A,m,n,M,N));
2363   PetscCall(MatSetType(*A,MATDENSE));
2364   PetscCall(MatSeqDenseSetPreallocation(*A,data));
2365   PetscCall(MatMPIDenseSetPreallocation(*A,data));
2366   PetscFunctionReturn(0);
2367 }
2368 
2369 #if defined(PETSC_HAVE_CUDA)
2370 /*@C
2371    MatCreateDenseCUDA - Creates a matrix in dense format using CUDA.
2372 
2373    Collective
2374 
2375    Input Parameters:
2376 +  comm - MPI communicator
2377 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2378 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2379 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
2380 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
2381 -  data - optional location of GPU matrix data.  Set data=NULL for PETSc
2382    to control matrix memory allocation.
2383 
2384    Output Parameter:
2385 .  A - the matrix
2386 
2387    Notes:
2388 
2389    Level: intermediate
2390 
2391 .seealso: MatCreate(), MatCreateDense()
2392 @*/
2393 PetscErrorCode  MatCreateDenseCUDA(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
2394 {
2395   PetscFunctionBegin;
2396   PetscCall(MatCreate(comm,A));
2397   PetscValidLogicalCollectiveBool(*A,!!data,6);
2398   PetscCall(MatSetSizes(*A,m,n,M,N));
2399   PetscCall(MatSetType(*A,MATDENSECUDA));
2400   PetscCall(MatSeqDenseCUDASetPreallocation(*A,data));
2401   PetscCall(MatMPIDenseCUDASetPreallocation(*A,data));
2402   PetscFunctionReturn(0);
2403 }
2404 #endif
2405 
2406 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
2407 {
2408   Mat            mat;
2409   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
2410 
2411   PetscFunctionBegin;
2412   *newmat = NULL;
2413   PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&mat));
2414   PetscCall(MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N));
2415   PetscCall(MatSetType(mat,((PetscObject)A)->type_name));
2416   a       = (Mat_MPIDense*)mat->data;
2417 
2418   mat->factortype   = A->factortype;
2419   mat->assembled    = PETSC_TRUE;
2420   mat->preallocated = PETSC_TRUE;
2421 
2422   mat->insertmode = NOT_SET_VALUES;
2423   a->donotstash   = oldmat->donotstash;
2424 
2425   PetscCall(PetscLayoutReference(A->rmap,&mat->rmap));
2426   PetscCall(PetscLayoutReference(A->cmap,&mat->cmap));
2427 
2428   PetscCall(MatDuplicate(oldmat->A,cpvalues,&a->A));
2429   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A));
2430 
2431   *newmat = mat;
2432   PetscFunctionReturn(0);
2433 }
2434 
2435 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
2436 {
2437   PetscBool      isbinary;
2438 #if defined(PETSC_HAVE_HDF5)
2439   PetscBool      ishdf5;
2440 #endif
2441 
2442   PetscFunctionBegin;
2443   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
2444   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
2445   /* force binary viewer to load .info file if it has not yet done so */
2446   PetscCall(PetscViewerSetUp(viewer));
2447   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
2448 #if defined(PETSC_HAVE_HDF5)
2449   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5));
2450 #endif
2451   if (isbinary) {
2452     PetscCall(MatLoad_Dense_Binary(newMat,viewer));
2453 #if defined(PETSC_HAVE_HDF5)
2454   } else if (ishdf5) {
2455     PetscCall(MatLoad_Dense_HDF5(newMat,viewer));
2456 #endif
2457   } else SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
2458   PetscFunctionReturn(0);
2459 }
2460 
2461 static PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool  *flag)
2462 {
2463   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
2464   Mat            a,b;
2465   PetscBool      flg;
2466 
2467   PetscFunctionBegin;
2468   a    = matA->A;
2469   b    = matB->A;
2470   PetscCall(MatEqual(a,b,&flg));
2471   PetscCall(MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A)));
2472   PetscFunctionReturn(0);
2473 }
2474 
2475 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data)
2476 {
2477   Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data;
2478 
2479   PetscFunctionBegin;
2480   PetscCall(PetscFree2(atb->sendbuf,atb->recvcounts));
2481   PetscCall(MatDestroy(&atb->atb));
2482   PetscCall(PetscFree(atb));
2483   PetscFunctionReturn(0);
2484 }
2485 
2486 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data)
2487 {
2488   Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data;
2489 
2490   PetscFunctionBegin;
2491   PetscCall(PetscFree2(abt->buf[0],abt->buf[1]));
2492   PetscCall(PetscFree2(abt->recvcounts,abt->recvdispls));
2493   PetscCall(PetscFree(abt));
2494   PetscFunctionReturn(0);
2495 }
2496 
2497 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2498 {
2499   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2500   Mat_TransMatMultDense *atb;
2501   MPI_Comm              comm;
2502   PetscMPIInt           size,*recvcounts;
2503   PetscScalar           *carray,*sendbuf;
2504   const PetscScalar     *atbarray;
2505   PetscInt              i,cN=C->cmap->N,proc,k,j,lda;
2506   const PetscInt        *ranges;
2507 
2508   PetscFunctionBegin;
2509   MatCheckProduct(C,3);
2510   PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2511   atb = (Mat_TransMatMultDense *)C->product->data;
2512   recvcounts = atb->recvcounts;
2513   sendbuf = atb->sendbuf;
2514 
2515   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
2516   PetscCallMPI(MPI_Comm_size(comm,&size));
2517 
2518   /* compute atbarray = aseq^T * bseq */
2519   PetscCall(MatTransposeMatMult(a->A,b->A,atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX,PETSC_DEFAULT,&atb->atb));
2520 
2521   PetscCall(MatGetOwnershipRanges(C,&ranges));
2522 
2523   /* arrange atbarray into sendbuf */
2524   PetscCall(MatDenseGetArrayRead(atb->atb,&atbarray));
2525   PetscCall(MatDenseGetLDA(atb->atb,&lda));
2526   for (proc=0, k=0; proc<size; proc++) {
2527     for (j=0; j<cN; j++) {
2528       for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*lda];
2529     }
2530   }
2531   PetscCall(MatDenseRestoreArrayRead(atb->atb,&atbarray));
2532 
2533   /* sum all atbarray to local values of C */
2534   PetscCall(MatDenseGetArrayWrite(c->A,&carray));
2535   PetscCallMPI(MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm));
2536   PetscCall(MatDenseRestoreArrayWrite(c->A,&carray));
2537   PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY));
2538   PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY));
2539   PetscFunctionReturn(0);
2540 }
2541 
2542 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)
2543 {
2544   MPI_Comm              comm;
2545   PetscMPIInt           size;
2546   PetscInt              cm=A->cmap->n,cM,cN=B->cmap->N;
2547   Mat_TransMatMultDense *atb;
2548   PetscBool             cisdense;
2549   PetscInt              i;
2550   const PetscInt        *ranges;
2551 
2552   PetscFunctionBegin;
2553   MatCheckProduct(C,3);
2554   PetscCheck(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2555   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
2556   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
2557     SETERRQ(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2558   }
2559 
2560   /* create matrix product C */
2561   PetscCall(MatSetSizes(C,cm,B->cmap->n,A->cmap->N,B->cmap->N));
2562   PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,""));
2563   if (!cisdense) {
2564     PetscCall(MatSetType(C,((PetscObject)A)->type_name));
2565   }
2566   PetscCall(MatSetUp(C));
2567 
2568   /* create data structure for reuse C */
2569   PetscCallMPI(MPI_Comm_size(comm,&size));
2570   PetscCall(PetscNew(&atb));
2571   cM   = C->rmap->N;
2572   PetscCall(PetscMalloc2(cM*cN,&atb->sendbuf,size,&atb->recvcounts));
2573   PetscCall(MatGetOwnershipRanges(C,&ranges));
2574   for (i=0; i<size; i++) atb->recvcounts[i] = (ranges[i+1] - ranges[i])*cN;
2575 
2576   C->product->data    = atb;
2577   C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
2578   PetscFunctionReturn(0);
2579 }
2580 
2581 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2582 {
2583   MPI_Comm              comm;
2584   PetscMPIInt           i, size;
2585   PetscInt              maxRows, bufsiz;
2586   PetscMPIInt           tag;
2587   PetscInt              alg;
2588   Mat_MatTransMultDense *abt;
2589   Mat_Product           *product = C->product;
2590   PetscBool             flg;
2591 
2592   PetscFunctionBegin;
2593   MatCheckProduct(C,4);
2594   PetscCheck(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2595   /* check local size of A and B */
2596   PetscCheck(A->cmap->n == B->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")",A->cmap->n,B->cmap->n);
2597 
2598   PetscCall(PetscStrcmp(product->alg,"allgatherv",&flg));
2599   alg  = flg ? 0 : 1;
2600 
2601   /* setup matrix product C */
2602   PetscCall(MatSetSizes(C,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N));
2603   PetscCall(MatSetType(C,MATMPIDENSE));
2604   PetscCall(MatSetUp(C));
2605   PetscCall(PetscObjectGetNewTag((PetscObject)C,&tag));
2606 
2607   /* create data structure for reuse C */
2608   PetscCall(PetscObjectGetComm((PetscObject)C,&comm));
2609   PetscCallMPI(MPI_Comm_size(comm,&size));
2610   PetscCall(PetscNew(&abt));
2611   abt->tag = tag;
2612   abt->alg = alg;
2613   switch (alg) {
2614   case 1: /* alg: "cyclic" */
2615     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i]));
2616     bufsiz = A->cmap->N * maxRows;
2617     PetscCall(PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1])));
2618     break;
2619   default: /* alg: "allgatherv" */
2620     PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1])));
2621     PetscCall(PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls)));
2622     for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N;
2623     for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i];
2624     break;
2625   }
2626 
2627   C->product->data    = abt;
2628   C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
2629   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense;
2630   PetscFunctionReturn(0);
2631 }
2632 
2633 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
2634 {
2635   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2636   Mat_MatTransMultDense *abt;
2637   MPI_Comm              comm;
2638   PetscMPIInt           rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom;
2639   PetscScalar           *sendbuf, *recvbuf=NULL, *cv;
2640   PetscInt              i,cK=A->cmap->N,k,j,bn;
2641   PetscScalar           _DOne=1.0,_DZero=0.0;
2642   const PetscScalar     *av,*bv;
2643   PetscBLASInt          cm, cn, ck, alda, blda = 0, clda;
2644   MPI_Request           reqs[2];
2645   const PetscInt        *ranges;
2646 
2647   PetscFunctionBegin;
2648   MatCheckProduct(C,3);
2649   PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2650   abt  = (Mat_MatTransMultDense*)C->product->data;
2651   PetscCall(PetscObjectGetComm((PetscObject)C,&comm));
2652   PetscCallMPI(MPI_Comm_rank(comm,&rank));
2653   PetscCallMPI(MPI_Comm_size(comm,&size));
2654   PetscCall(MatDenseGetArrayRead(a->A,&av));
2655   PetscCall(MatDenseGetArrayRead(b->A,&bv));
2656   PetscCall(MatDenseGetArrayWrite(c->A,&cv));
2657   PetscCall(MatDenseGetLDA(a->A,&i));
2658   PetscCall(PetscBLASIntCast(i,&alda));
2659   PetscCall(MatDenseGetLDA(b->A,&i));
2660   PetscCall(PetscBLASIntCast(i,&blda));
2661   PetscCall(MatDenseGetLDA(c->A,&i));
2662   PetscCall(PetscBLASIntCast(i,&clda));
2663   PetscCall(MatGetOwnershipRanges(B,&ranges));
2664   bn   = B->rmap->n;
2665   if (blda == bn) {
2666     sendbuf = (PetscScalar*)bv;
2667   } else {
2668     sendbuf = abt->buf[0];
2669     for (k = 0, i = 0; i < cK; i++) {
2670       for (j = 0; j < bn; j++, k++) {
2671         sendbuf[k] = bv[i * blda + j];
2672       }
2673     }
2674   }
2675   if (size > 1) {
2676     sendto = (rank + size - 1) % size;
2677     recvfrom = (rank + size + 1) % size;
2678   } else {
2679     sendto = recvfrom = 0;
2680   }
2681   PetscCall(PetscBLASIntCast(cK,&ck));
2682   PetscCall(PetscBLASIntCast(c->A->rmap->n,&cm));
2683   recvisfrom = rank;
2684   for (i = 0; i < size; i++) {
2685     /* we have finished receiving in sending, bufs can be read/modified */
2686     PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2687     PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
2688 
2689     if (nextrecvisfrom != rank) {
2690       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2691       sendsiz = cK * bn;
2692       recvsiz = cK * nextbn;
2693       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
2694       PetscCallMPI(MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]));
2695       PetscCallMPI(MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]));
2696     }
2697 
2698     /* local aseq * sendbuf^T */
2699     PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn));
2700     if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,av,&alda,sendbuf,&cn,&_DZero,cv + clda * ranges[recvisfrom],&clda));
2701 
2702     if (nextrecvisfrom != rank) {
2703       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
2704       PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE));
2705     }
2706     bn = nextbn;
2707     recvisfrom = nextrecvisfrom;
2708     sendbuf = recvbuf;
2709   }
2710   PetscCall(MatDenseRestoreArrayRead(a->A,&av));
2711   PetscCall(MatDenseRestoreArrayRead(b->A,&bv));
2712   PetscCall(MatDenseRestoreArrayWrite(c->A,&cv));
2713   PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY));
2714   PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY));
2715   PetscFunctionReturn(0);
2716 }
2717 
2718 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
2719 {
2720   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2721   Mat_MatTransMultDense *abt;
2722   MPI_Comm              comm;
2723   PetscMPIInt           size;
2724   PetscScalar           *cv, *sendbuf, *recvbuf;
2725   const PetscScalar     *av,*bv;
2726   PetscInt              blda,i,cK=A->cmap->N,k,j,bn;
2727   PetscScalar           _DOne=1.0,_DZero=0.0;
2728   PetscBLASInt          cm, cn, ck, alda, clda;
2729 
2730   PetscFunctionBegin;
2731   MatCheckProduct(C,3);
2732   PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2733   abt  = (Mat_MatTransMultDense*)C->product->data;
2734   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
2735   PetscCallMPI(MPI_Comm_size(comm,&size));
2736   PetscCall(MatDenseGetArrayRead(a->A,&av));
2737   PetscCall(MatDenseGetArrayRead(b->A,&bv));
2738   PetscCall(MatDenseGetArrayWrite(c->A,&cv));
2739   PetscCall(MatDenseGetLDA(a->A,&i));
2740   PetscCall(PetscBLASIntCast(i,&alda));
2741   PetscCall(MatDenseGetLDA(b->A,&blda));
2742   PetscCall(MatDenseGetLDA(c->A,&i));
2743   PetscCall(PetscBLASIntCast(i,&clda));
2744   /* copy transpose of B into buf[0] */
2745   bn      = B->rmap->n;
2746   sendbuf = abt->buf[0];
2747   recvbuf = abt->buf[1];
2748   for (k = 0, j = 0; j < bn; j++) {
2749     for (i = 0; i < cK; i++, k++) {
2750       sendbuf[k] = bv[i * blda + j];
2751     }
2752   }
2753   PetscCall(MatDenseRestoreArrayRead(b->A,&bv));
2754   PetscCallMPI(MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm));
2755   PetscCall(PetscBLASIntCast(cK,&ck));
2756   PetscCall(PetscBLASIntCast(c->A->rmap->n,&cm));
2757   PetscCall(PetscBLASIntCast(c->A->cmap->n,&cn));
2758   if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,av,&alda,recvbuf,&ck,&_DZero,cv,&clda));
2759   PetscCall(MatDenseRestoreArrayRead(a->A,&av));
2760   PetscCall(MatDenseRestoreArrayRead(b->A,&bv));
2761   PetscCall(MatDenseRestoreArrayWrite(c->A,&cv));
2762   PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY));
2763   PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY));
2764   PetscFunctionReturn(0);
2765 }
2766 
2767 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2768 {
2769   Mat_MatTransMultDense *abt;
2770 
2771   PetscFunctionBegin;
2772   MatCheckProduct(C,3);
2773   PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2774   abt = (Mat_MatTransMultDense*)C->product->data;
2775   switch (abt->alg) {
2776   case 1:
2777     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C));
2778     break;
2779   default:
2780     PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C));
2781     break;
2782   }
2783   PetscFunctionReturn(0);
2784 }
2785 
2786 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data)
2787 {
2788   Mat_MatMultDense *ab = (Mat_MatMultDense*)data;
2789 
2790   PetscFunctionBegin;
2791   PetscCall(MatDestroy(&ab->Ce));
2792   PetscCall(MatDestroy(&ab->Ae));
2793   PetscCall(MatDestroy(&ab->Be));
2794   PetscCall(PetscFree(ab));
2795   PetscFunctionReturn(0);
2796 }
2797 
2798 #if defined(PETSC_HAVE_ELEMENTAL)
2799 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2800 {
2801   Mat_MatMultDense *ab;
2802 
2803   PetscFunctionBegin;
2804   MatCheckProduct(C,3);
2805   PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Missing product data");
2806   ab   = (Mat_MatMultDense*)C->product->data;
2807   PetscCall(MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae));
2808   PetscCall(MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be));
2809   PetscCall(MatMatMultNumeric_Elemental(ab->Ae,ab->Be,ab->Ce));
2810   PetscCall(MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C));
2811   PetscFunctionReturn(0);
2812 }
2813 
2814 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)
2815 {
2816   Mat              Ae,Be,Ce;
2817   Mat_MatMultDense *ab;
2818 
2819   PetscFunctionBegin;
2820   MatCheckProduct(C,4);
2821   PetscCheck(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2822   /* check local size of A and B */
2823   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
2824     SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2825   }
2826 
2827   /* create elemental matrices Ae and Be */
2828   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Ae));
2829   PetscCall(MatSetSizes(Ae,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N));
2830   PetscCall(MatSetType(Ae,MATELEMENTAL));
2831   PetscCall(MatSetUp(Ae));
2832   PetscCall(MatSetOption(Ae,MAT_ROW_ORIENTED,PETSC_FALSE));
2833 
2834   PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &Be));
2835   PetscCall(MatSetSizes(Be,PETSC_DECIDE,PETSC_DECIDE,B->rmap->N,B->cmap->N));
2836   PetscCall(MatSetType(Be,MATELEMENTAL));
2837   PetscCall(MatSetUp(Be));
2838   PetscCall(MatSetOption(Be,MAT_ROW_ORIENTED,PETSC_FALSE));
2839 
2840   /* compute symbolic Ce = Ae*Be */
2841   PetscCall(MatCreate(PetscObjectComm((PetscObject)C),&Ce));
2842   PetscCall(MatMatMultSymbolic_Elemental(Ae,Be,fill,Ce));
2843 
2844   /* setup C */
2845   PetscCall(MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE));
2846   PetscCall(MatSetType(C,MATDENSE));
2847   PetscCall(MatSetUp(C));
2848 
2849   /* create data structure for reuse Cdense */
2850   PetscCall(PetscNew(&ab));
2851   ab->Ae = Ae;
2852   ab->Be = Be;
2853   ab->Ce = Ce;
2854 
2855   C->product->data    = ab;
2856   C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense;
2857   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
2858   PetscFunctionReturn(0);
2859 }
2860 #endif
2861 /* ----------------------------------------------- */
2862 #if defined(PETSC_HAVE_ELEMENTAL)
2863 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C)
2864 {
2865   PetscFunctionBegin;
2866   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense;
2867   C->ops->productsymbolic = MatProductSymbolic_AB;
2868   PetscFunctionReturn(0);
2869 }
2870 #endif
2871 
2872 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C)
2873 {
2874   Mat_Product *product = C->product;
2875   Mat         A = product->A,B=product->B;
2876 
2877   PetscFunctionBegin;
2878   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend)
2879     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2880   C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense;
2881   C->ops->productsymbolic = MatProductSymbolic_AtB;
2882   PetscFunctionReturn(0);
2883 }
2884 
2885 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C)
2886 {
2887   Mat_Product    *product = C->product;
2888   const char     *algTypes[2] = {"allgatherv","cyclic"};
2889   PetscInt       alg,nalg = 2;
2890   PetscBool      flg = PETSC_FALSE;
2891 
2892   PetscFunctionBegin;
2893   /* Set default algorithm */
2894   alg = 0; /* default is allgatherv */
2895   PetscCall(PetscStrcmp(product->alg,"default",&flg));
2896   if (flg) {
2897     PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]));
2898   }
2899 
2900   /* Get runtime option */
2901   if (product->api_user) {
2902     PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatTransposeMult","Mat");
2903     PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[alg],&alg,&flg));
2904     PetscOptionsEnd();
2905   } else {
2906     PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABt","Mat");
2907     PetscCall(PetscOptionsEList("-mat_product_algorithm","Algorithmic approach","MatProduct_ABt",algTypes,nalg,algTypes[alg],&alg,&flg));
2908     PetscOptionsEnd();
2909   }
2910   if (flg) {
2911     PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]));
2912   }
2913 
2914   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense;
2915   C->ops->productsymbolic          = MatProductSymbolic_ABt;
2916   PetscFunctionReturn(0);
2917 }
2918 
2919 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C)
2920 {
2921   Mat_Product    *product = C->product;
2922 
2923   PetscFunctionBegin;
2924   switch (product->type) {
2925 #if defined(PETSC_HAVE_ELEMENTAL)
2926   case MATPRODUCT_AB:
2927     PetscCall(MatProductSetFromOptions_MPIDense_AB(C));
2928     break;
2929 #endif
2930   case MATPRODUCT_AtB:
2931     PetscCall(MatProductSetFromOptions_MPIDense_AtB(C));
2932     break;
2933   case MATPRODUCT_ABt:
2934     PetscCall(MatProductSetFromOptions_MPIDense_ABt(C));
2935     break;
2936   default:
2937     break;
2938   }
2939   PetscFunctionReturn(0);
2940 }
2941