xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 117ef88edefbfc12e7c19efe87a19a2e1b0acd4f)
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4 */
5 
6 #include <../src/mat/impls/dense/mpi/mpidense.h>    /*I   "petscmat.h"  I*/
7 #include <../src/mat/impls/aij/mpi/mpiaij.h>
8 #include <petscblaslapack.h>
9 
10 /*@
11 
12       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
13               matrix that represents the operator. For sequential matrices it returns itself.
14 
15     Input Parameter:
16 .      A - the Seq or MPI dense matrix
17 
18     Output Parameter:
19 .      B - the inner matrix
20 
21     Level: intermediate
22 
23 @*/
24 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
25 {
26   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
27   PetscErrorCode ierr;
28   PetscBool      flg;
29 
30   PetscFunctionBegin;
31   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
32   PetscValidPointer(B,2);
33   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
34   if (flg) *B = mat->A;
35   else *B = A;
36   PetscFunctionReturn(0);
37 }
38 
39 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
40 {
41   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
42   PetscErrorCode ierr;
43   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
44 
45   PetscFunctionBegin;
46   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
47   lrow = row - rstart;
48   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
49   PetscFunctionReturn(0);
50 }
51 
52 PetscErrorCode MatRestoreRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
53 {
54   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
55   PetscErrorCode ierr;
56   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
57 
58   PetscFunctionBegin;
59   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
60   lrow = row - rstart;
61   ierr = MatRestoreRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
62   PetscFunctionReturn(0);
63 }
64 
65 PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
66 {
67   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
68   PetscErrorCode ierr;
69   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
70   PetscScalar    *array;
71   MPI_Comm       comm;
72   PetscBool      flg;
73   Mat            B;
74 
75   PetscFunctionBegin;
76   ierr = MatHasCongruentLayouts(A,&flg);CHKERRQ(ierr);
77   if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
78   ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr);
79   if (!B) { /* This should use a new function, e.g. MatDenseGetSubMatrix (not create) */
80 
81     ierr = PetscObjectTypeCompare((PetscObject)mdn->A,MATSEQDENSECUDA,&flg);CHKERRQ(ierr);
82     if (flg) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature",MATSEQDENSECUDA);
83     ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
84     ierr = MatCreate(comm,&B);CHKERRQ(ierr);
85     ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr);
86     ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
87     ierr = MatDenseGetArrayRead(mdn->A,(const PetscScalar**)&array);CHKERRQ(ierr);
88     ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr);
89     ierr = MatDenseRestoreArrayRead(mdn->A,(const PetscScalar**)&array);CHKERRQ(ierr);
90     ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
91     ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
92     ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr);
93     *a   = B;
94     ierr = MatDestroy(&B);CHKERRQ(ierr);
95   } else *a = B;
96   PetscFunctionReturn(0);
97 }
98 
99 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
100 {
101   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
102   PetscErrorCode ierr;
103   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
104   PetscBool      roworiented = A->roworiented;
105 
106   PetscFunctionBegin;
107   for (i=0; i<m; i++) {
108     if (idxm[i] < 0) continue;
109     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
110     if (idxm[i] >= rstart && idxm[i] < rend) {
111       row = idxm[i] - rstart;
112       if (roworiented) {
113         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
114       } else {
115         for (j=0; j<n; j++) {
116           if (idxn[j] < 0) continue;
117           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
118           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
119         }
120       }
121     } else if (!A->donotstash) {
122       mat->assembled = PETSC_FALSE;
123       if (roworiented) {
124         ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
125       } else {
126         ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
127       }
128     }
129   }
130   PetscFunctionReturn(0);
131 }
132 
133 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
134 {
135   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
136   PetscErrorCode ierr;
137   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
138 
139   PetscFunctionBegin;
140   for (i=0; i<m; i++) {
141     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
142     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
143     if (idxm[i] >= rstart && idxm[i] < rend) {
144       row = idxm[i] - rstart;
145       for (j=0; j<n; j++) {
146         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
147         if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
148         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
149       }
150     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
151   }
152   PetscFunctionReturn(0);
153 }
154 
155 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda)
156 {
157   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
158   PetscErrorCode ierr;
159 
160   PetscFunctionBegin;
161   ierr = MatDenseGetLDA(a->A,lda);CHKERRQ(ierr);
162   PetscFunctionReturn(0);
163 }
164 
165 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A,PetscInt lda)
166 {
167   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
168   PetscErrorCode ierr;
169 
170   PetscFunctionBegin;
171   ierr = MatDenseSetLDA(a->A,lda);CHKERRQ(ierr);
172   PetscFunctionReturn(0);
173 }
174 
175 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar **array)
176 {
177   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
178   PetscErrorCode ierr;
179 
180   PetscFunctionBegin;
181   ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr);
182   PetscFunctionReturn(0);
183 }
184 
185 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar **array)
186 {
187   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
188   PetscErrorCode ierr;
189 
190   PetscFunctionBegin;
191   ierr = MatDenseGetArrayRead(a->A,array);CHKERRQ(ierr);
192   PetscFunctionReturn(0);
193 }
194 
195 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A,PetscScalar **array)
196 {
197   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
198   PetscErrorCode ierr;
199 
200   PetscFunctionBegin;
201   ierr = MatDenseGetArrayWrite(a->A,array);CHKERRQ(ierr);
202   PetscFunctionReturn(0);
203 }
204 
205 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar *array)
206 {
207   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
208   PetscErrorCode ierr;
209 
210   PetscFunctionBegin;
211   if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first");
212   ierr = MatDensePlaceArray(a->A,array);CHKERRQ(ierr);
213   PetscFunctionReturn(0);
214 }
215 
216 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
217 {
218   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
219   PetscErrorCode ierr;
220 
221   PetscFunctionBegin;
222   if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first");
223   ierr = MatDenseResetArray(a->A);CHKERRQ(ierr);
224   PetscFunctionReturn(0);
225 }
226 
227 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A,const PetscScalar *array)
228 {
229   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
230   PetscErrorCode ierr;
231 
232   PetscFunctionBegin;
233   if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first");
234   ierr = MatDenseReplaceArray(a->A,array);CHKERRQ(ierr);
235   PetscFunctionReturn(0);
236 }
237 
238 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
239 {
240   Mat_MPIDense      *mat  = (Mat_MPIDense*)A->data,*newmatd;
241   PetscErrorCode    ierr;
242   PetscInt          lda,i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
243   const PetscInt    *irow,*icol;
244   const PetscScalar *v;
245   PetscScalar       *bv;
246   Mat               newmat;
247   IS                iscol_local;
248   MPI_Comm          comm_is,comm_mat;
249 
250   PetscFunctionBegin;
251   ierr = PetscObjectGetComm((PetscObject)A,&comm_mat);CHKERRQ(ierr);
252   ierr = PetscObjectGetComm((PetscObject)iscol,&comm_is);CHKERRQ(ierr);
253   if (comm_mat != comm_is) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator");
254 
255   ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
256   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
257   ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr);
258   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
259   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
260   ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */
261 
262   /* No parallel redistribution currently supported! Should really check each index set
263      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
264      original matrix! */
265 
266   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
267   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
268 
269   /* Check submatrix call */
270   if (scall == MAT_REUSE_MATRIX) {
271     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
272     /* Really need to test rows and column sizes! */
273     newmat = *B;
274   } else {
275     /* Create and fill new matrix */
276     ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr);
277     ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
278     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
279     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
280   }
281 
282   /* Now extract the data pointers and do the copy, column at a time */
283   newmatd = (Mat_MPIDense*)newmat->data;
284   ierr = MatDenseGetArray(newmatd->A,&bv);CHKERRQ(ierr);
285   ierr = MatDenseGetArrayRead(mat->A,&v);CHKERRQ(ierr);
286   ierr = MatDenseGetLDA(mat->A,&lda);CHKERRQ(ierr);
287   for (i=0; i<Ncols; i++) {
288     const PetscScalar *av = v + lda*icol[i];
289     for (j=0; j<nrows; j++) {
290       *bv++ = av[irow[j] - rstart];
291     }
292   }
293   ierr = MatDenseRestoreArrayRead(mat->A,&v);CHKERRQ(ierr);
294   ierr = MatDenseRestoreArray(newmatd->A,&bv);CHKERRQ(ierr);
295 
296   /* Assemble the matrices so that the correct flags are set */
297   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
298   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
299 
300   /* Free work space */
301   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
302   ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr);
303   ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
304   *B   = newmat;
305   PetscFunctionReturn(0);
306 }
307 
308 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar **array)
309 {
310   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
311   PetscErrorCode ierr;
312 
313   PetscFunctionBegin;
314   ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr);
315   PetscFunctionReturn(0);
316 }
317 
318 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar **array)
319 {
320   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
321   PetscErrorCode ierr;
322 
323   PetscFunctionBegin;
324   ierr = MatDenseRestoreArrayRead(a->A,array);CHKERRQ(ierr);
325   PetscFunctionReturn(0);
326 }
327 
328 PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A,PetscScalar **array)
329 {
330   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
331   PetscErrorCode ierr;
332 
333   PetscFunctionBegin;
334   ierr = MatDenseRestoreArrayWrite(a->A,array);CHKERRQ(ierr);
335   PetscFunctionReturn(0);
336 }
337 
338 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
339 {
340   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
341   PetscErrorCode ierr;
342   PetscInt       nstash,reallocs;
343 
344   PetscFunctionBegin;
345   if (mdn->donotstash || mat->nooffprocentries) return(0);
346 
347   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
348   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
349   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
350   PetscFunctionReturn(0);
351 }
352 
353 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
354 {
355   Mat_MPIDense   *mdn=(Mat_MPIDense*)mat->data;
356   PetscErrorCode ierr;
357   PetscInt       i,*row,*col,flg,j,rstart,ncols;
358   PetscMPIInt    n;
359   PetscScalar    *val;
360 
361   PetscFunctionBegin;
362   if (!mdn->donotstash && !mat->nooffprocentries) {
363     /*  wait on receives */
364     while (1) {
365       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
366       if (!flg) break;
367 
368       for (i=0; i<n;) {
369         /* Now identify the consecutive vals belonging to the same row */
370         for (j=i,rstart=row[j]; j<n; j++) {
371           if (row[j] != rstart) break;
372         }
373         if (j < n) ncols = j-i;
374         else       ncols = n-i;
375         /* Now assemble all these values with a single function call */
376         ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
377         i    = j;
378       }
379     }
380     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
381   }
382 
383   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
384   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
385 
386   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
387     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
388   }
389   PetscFunctionReturn(0);
390 }
391 
392 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
393 {
394   PetscErrorCode ierr;
395   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
396 
397   PetscFunctionBegin;
398   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
399   PetscFunctionReturn(0);
400 }
401 
402 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt n,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
403 {
404   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
405   PetscErrorCode    ierr;
406   PetscInt          i,len,*lrows;
407 
408   PetscFunctionBegin;
409   /* get locally owned rows */
410   ierr = PetscLayoutMapLocal(A->rmap,n,rows,&len,&lrows,NULL);CHKERRQ(ierr);
411   /* fix right hand side if needed */
412   if (x && b) {
413     const PetscScalar *xx;
414     PetscScalar       *bb;
415 
416     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
417     ierr = VecGetArrayWrite(b, &bb);CHKERRQ(ierr);
418     for (i=0;i<len;++i) bb[lrows[i]] = diag*xx[lrows[i]];
419     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
420     ierr = VecRestoreArrayWrite(b, &bb);CHKERRQ(ierr);
421   }
422   ierr = MatZeroRows(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
423   if (diag != 0.0) {
424     Vec d;
425 
426     ierr = MatCreateVecs(A,NULL,&d);CHKERRQ(ierr);
427     ierr = VecSet(d,diag);CHKERRQ(ierr);
428     ierr = MatDiagonalSet(A,d,INSERT_VALUES);CHKERRQ(ierr);
429     ierr = VecDestroy(&d);CHKERRQ(ierr);
430   }
431   ierr = PetscFree(lrows);CHKERRQ(ierr);
432   PetscFunctionReturn(0);
433 }
434 
435 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec);
436 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec);
437 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec);
438 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec);
439 
440 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
441 {
442   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
443   PetscErrorCode    ierr;
444   const PetscScalar *ax;
445   PetscScalar       *ay;
446 
447   PetscFunctionBegin;
448   ierr = VecGetArrayReadInPlace(xx,&ax);CHKERRQ(ierr);
449   ierr = VecGetArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr);
450   ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr);
451   ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr);
452   ierr = VecRestoreArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr);
453   ierr = VecRestoreArrayReadInPlace(xx,&ax);CHKERRQ(ierr);
454   ierr = (*mdn->A->ops->mult)(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
455   PetscFunctionReturn(0);
456 }
457 
458 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
459 {
460   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
461   PetscErrorCode    ierr;
462   const PetscScalar *ax;
463   PetscScalar       *ay;
464 
465   PetscFunctionBegin;
466   ierr = VecGetArrayReadInPlace(xx,&ax);CHKERRQ(ierr);
467   ierr = VecGetArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr);
468   ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr);
469   ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr);
470   ierr = VecRestoreArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr);
471   ierr = VecRestoreArrayReadInPlace(xx,&ax);CHKERRQ(ierr);
472   ierr = (*mdn->A->ops->multadd)(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
473   PetscFunctionReturn(0);
474 }
475 
476 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
477 {
478   Mat_MPIDense      *a = (Mat_MPIDense*)A->data;
479   PetscErrorCode    ierr;
480   const PetscScalar *ax;
481   PetscScalar       *ay;
482 
483   PetscFunctionBegin;
484   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
485   ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr);
486   ierr = VecGetArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr);
487   ierr = VecGetArrayInPlace(yy,&ay);CHKERRQ(ierr);
488   ierr = PetscSFReduceBegin(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr);
489   ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr);
490   ierr = VecRestoreArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr);
491   ierr = VecRestoreArrayInPlace(yy,&ay);CHKERRQ(ierr);
492   PetscFunctionReturn(0);
493 }
494 
495 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
496 {
497   Mat_MPIDense      *a = (Mat_MPIDense*)A->data;
498   PetscErrorCode    ierr;
499   const PetscScalar *ax;
500   PetscScalar       *ay;
501 
502   PetscFunctionBegin;
503   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
504   ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr);
505   ierr = VecGetArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr);
506   ierr = VecGetArrayInPlace(zz,&ay);CHKERRQ(ierr);
507   ierr = PetscSFReduceBegin(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr);
508   ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr);
509   ierr = VecRestoreArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr);
510   ierr = VecRestoreArrayInPlace(zz,&ay);CHKERRQ(ierr);
511   PetscFunctionReturn(0);
512 }
513 
514 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
515 {
516   Mat_MPIDense      *a    = (Mat_MPIDense*)A->data;
517   PetscErrorCode    ierr;
518   PetscInt          lda,len,i,n,m = A->rmap->n,radd;
519   PetscScalar       *x,zero = 0.0;
520   const PetscScalar *av;
521 
522   PetscFunctionBegin;
523   ierr = VecSet(v,zero);CHKERRQ(ierr);
524   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
525   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
526   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
527   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
528   radd = A->rmap->rstart*m;
529   ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr);
530   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
531   for (i=0; i<len; i++) {
532     x[i] = av[radd + i*lda + i];
533   }
534   ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr);
535   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
536   PetscFunctionReturn(0);
537 }
538 
539 PetscErrorCode MatDestroy_MPIDense(Mat mat)
540 {
541   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
542   PetscErrorCode ierr;
543 
544   PetscFunctionBegin;
545 #if defined(PETSC_USE_LOG)
546   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
547 #endif
548   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
549   ierr = MatDestroy(&mdn->A);CHKERRQ(ierr);
550   ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr);
551   ierr = PetscSFDestroy(&mdn->Mvctx);CHKERRQ(ierr);
552   ierr = VecDestroy(&mdn->cvec);CHKERRQ(ierr);
553 
554   ierr = PetscFree(mat->data);CHKERRQ(ierr);
555   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
556 
557   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL);CHKERRQ(ierr);
558   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",NULL);CHKERRQ(ierr);
559   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr);
560   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr);
561   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL);CHKERRQ(ierr);
562   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL);CHKERRQ(ierr);
563   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",NULL);CHKERRQ(ierr);
564   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",NULL);CHKERRQ(ierr);
565   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);CHKERRQ(ierr);
566   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);CHKERRQ(ierr);
567   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",NULL);CHKERRQ(ierr);
568 #if defined(PETSC_HAVE_ELEMENTAL)
569   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr);
570 #endif
571   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr);
572   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
573   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",NULL);CHKERRQ(ierr);
574 #if defined (PETSC_HAVE_CUDA)
575   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",NULL);CHKERRQ(ierr);
576   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",NULL);CHKERRQ(ierr);
577 #endif
578   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL);CHKERRQ(ierr);
579   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL);CHKERRQ(ierr);
580   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",NULL);CHKERRQ(ierr);
581   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",NULL);CHKERRQ(ierr);
582   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",NULL);CHKERRQ(ierr);
583   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",NULL);CHKERRQ(ierr);
584   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",NULL);CHKERRQ(ierr);
585   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",NULL);CHKERRQ(ierr);
586   PetscFunctionReturn(0);
587 }
588 
589 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat,PetscViewer);
590 
591 #include <petscdraw.h>
592 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
593 {
594   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
595   PetscErrorCode    ierr;
596   PetscMPIInt       rank = mdn->rank;
597   PetscViewerType   vtype;
598   PetscBool         iascii,isdraw;
599   PetscViewer       sviewer;
600   PetscViewerFormat format;
601 
602   PetscFunctionBegin;
603   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
604   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
605   if (iascii) {
606     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
607     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
608     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
609       MatInfo info;
610       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
611       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
612       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
613       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
614       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
615       ierr = PetscSFView(mdn->Mvctx,viewer);CHKERRQ(ierr);
616       PetscFunctionReturn(0);
617     } else if (format == PETSC_VIEWER_ASCII_INFO) {
618       PetscFunctionReturn(0);
619     }
620   } else if (isdraw) {
621     PetscDraw draw;
622     PetscBool isnull;
623 
624     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
625     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
626     if (isnull) PetscFunctionReturn(0);
627   }
628 
629   {
630     /* assemble the entire matrix onto first processor. */
631     Mat         A;
632     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
633     PetscInt    *cols;
634     PetscScalar *vals;
635 
636     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
637     if (!rank) {
638       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
639     } else {
640       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
641     }
642     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
643     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
644     ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr);
645     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
646 
647     /* Copy the matrix ... This isn't the most efficient means,
648        but it's quick for now */
649     A->insertmode = INSERT_VALUES;
650 
651     row = mat->rmap->rstart;
652     m   = mdn->A->rmap->n;
653     for (i=0; i<m; i++) {
654       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
655       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
656       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
657       row++;
658     }
659 
660     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
661     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
662     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
663     if (!rank) {
664       ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
665       ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
666     }
667     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
668     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
669     ierr = MatDestroy(&A);CHKERRQ(ierr);
670   }
671   PetscFunctionReturn(0);
672 }
673 
674 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
675 {
676   PetscErrorCode ierr;
677   PetscBool      iascii,isbinary,isdraw,issocket;
678 
679   PetscFunctionBegin;
680   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
681   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
682   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
683   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
684 
685   if (iascii || issocket || isdraw) {
686     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
687   } else if (isbinary) {
688     ierr = MatView_Dense_Binary(mat,viewer);CHKERRQ(ierr);
689   }
690   PetscFunctionReturn(0);
691 }
692 
693 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
694 {
695   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
696   Mat            mdn  = mat->A;
697   PetscErrorCode ierr;
698   PetscLogDouble isend[5],irecv[5];
699 
700   PetscFunctionBegin;
701   info->block_size = 1.0;
702 
703   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
704 
705   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
706   isend[3] = info->memory;  isend[4] = info->mallocs;
707   if (flag == MAT_LOCAL) {
708     info->nz_used      = isend[0];
709     info->nz_allocated = isend[1];
710     info->nz_unneeded  = isend[2];
711     info->memory       = isend[3];
712     info->mallocs      = isend[4];
713   } else if (flag == MAT_GLOBAL_MAX) {
714     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
715 
716     info->nz_used      = irecv[0];
717     info->nz_allocated = irecv[1];
718     info->nz_unneeded  = irecv[2];
719     info->memory       = irecv[3];
720     info->mallocs      = irecv[4];
721   } else if (flag == MAT_GLOBAL_SUM) {
722     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
723 
724     info->nz_used      = irecv[0];
725     info->nz_allocated = irecv[1];
726     info->nz_unneeded  = irecv[2];
727     info->memory       = irecv[3];
728     info->mallocs      = irecv[4];
729   }
730   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
731   info->fill_ratio_needed = 0;
732   info->factor_mallocs    = 0;
733   PetscFunctionReturn(0);
734 }
735 
736 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
737 {
738   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
739   PetscErrorCode ierr;
740 
741   PetscFunctionBegin;
742   switch (op) {
743   case MAT_NEW_NONZERO_LOCATIONS:
744   case MAT_NEW_NONZERO_LOCATION_ERR:
745   case MAT_NEW_NONZERO_ALLOCATION_ERR:
746     MatCheckPreallocated(A,1);
747     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
748     break;
749   case MAT_ROW_ORIENTED:
750     MatCheckPreallocated(A,1);
751     a->roworiented = flg;
752     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
753     break;
754   case MAT_NEW_DIAGONALS:
755   case MAT_KEEP_NONZERO_PATTERN:
756   case MAT_USE_HASH_TABLE:
757   case MAT_SORTED_FULL:
758     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
759     break;
760   case MAT_IGNORE_OFF_PROC_ENTRIES:
761     a->donotstash = flg;
762     break;
763   case MAT_SYMMETRIC:
764   case MAT_STRUCTURALLY_SYMMETRIC:
765   case MAT_HERMITIAN:
766   case MAT_SYMMETRY_ETERNAL:
767   case MAT_IGNORE_LOWER_TRIANGULAR:
768   case MAT_IGNORE_ZERO_ENTRIES:
769     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
770     break;
771   default:
772     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
773   }
774   PetscFunctionReturn(0);
775 }
776 
777 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
778 {
779   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
780   const PetscScalar *l;
781   PetscScalar       x,*v,*vv,*r;
782   PetscErrorCode    ierr;
783   PetscInt          i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n,lda;
784 
785   PetscFunctionBegin;
786   ierr = MatDenseGetArray(mdn->A,&vv);CHKERRQ(ierr);
787   ierr = MatDenseGetLDA(mdn->A,&lda);CHKERRQ(ierr);
788   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
789   if (ll) {
790     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
791     if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %D != %D", s2a, s2);
792     ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr);
793     for (i=0; i<m; i++) {
794       x = l[i];
795       v = vv + i;
796       for (j=0; j<n; j++) { (*v) *= x; v+= lda;}
797     }
798     ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr);
799     ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr);
800   }
801   if (rr) {
802     const PetscScalar *ar;
803 
804     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
805     if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
806     ierr = VecGetArrayRead(rr,&ar);CHKERRQ(ierr);
807     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
808     ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ar,r);CHKERRQ(ierr);
809     ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ar,r);CHKERRQ(ierr);
810     ierr = VecRestoreArrayRead(rr,&ar);CHKERRQ(ierr);
811     for (i=0; i<n; i++) {
812       x = r[i];
813       v = vv + i*lda;
814       for (j=0; j<m; j++) (*v++) *= x;
815     }
816     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
817     ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr);
818   }
819   ierr = MatDenseRestoreArray(mdn->A,&vv);CHKERRQ(ierr);
820   PetscFunctionReturn(0);
821 }
822 
823 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
824 {
825   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
826   PetscErrorCode    ierr;
827   PetscInt          i,j;
828   PetscReal         sum = 0.0;
829   const PetscScalar *av,*v;
830 
831   PetscFunctionBegin;
832   ierr = MatDenseGetArrayRead(mdn->A,&av);CHKERRQ(ierr);
833   v    = av;
834   if (mdn->size == 1) {
835     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
836   } else {
837     if (type == NORM_FROBENIUS) {
838       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
839         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
840       }
841       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
842       *nrm = PetscSqrtReal(*nrm);
843       ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr);
844     } else if (type == NORM_1) {
845       PetscReal *tmp,*tmp2;
846       ierr = PetscCalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr);
847       *nrm = 0.0;
848       v    = av;
849       for (j=0; j<mdn->A->cmap->n; j++) {
850         for (i=0; i<mdn->A->rmap->n; i++) {
851           tmp[j] += PetscAbsScalar(*v);  v++;
852         }
853       }
854       ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
855       for (j=0; j<A->cmap->N; j++) {
856         if (tmp2[j] > *nrm) *nrm = tmp2[j];
857       }
858       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
859       ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr);
860     } else if (type == NORM_INFINITY) { /* max row norm */
861       PetscReal ntemp;
862       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
863       ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
864     } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
865   }
866   ierr = MatDenseRestoreArrayRead(mdn->A,&av);CHKERRQ(ierr);
867   PetscFunctionReturn(0);
868 }
869 
870 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
871 {
872   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
873   Mat            B;
874   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
875   PetscErrorCode ierr;
876   PetscInt       j,i,lda;
877   PetscScalar    *v;
878 
879   PetscFunctionBegin;
880   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
881     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
882     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
883     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
884     ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr);
885   } else B = *matout;
886 
887   m    = a->A->rmap->n; n = a->A->cmap->n;
888   ierr = MatDenseGetArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr);
889   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
890   ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr);
891   for (i=0; i<m; i++) rwork[i] = rstart + i;
892   for (j=0; j<n; j++) {
893     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
894     v   += lda;
895   }
896   ierr = MatDenseRestoreArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr);
897   ierr = PetscFree(rwork);CHKERRQ(ierr);
898   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
899   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
900   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
901     *matout = B;
902   } else {
903     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
904   }
905   PetscFunctionReturn(0);
906 }
907 
908 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
909 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
910 
911 PetscErrorCode MatSetUp_MPIDense(Mat A)
912 {
913   PetscErrorCode ierr;
914 
915   PetscFunctionBegin;
916   ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
917   ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
918   if (!A->preallocated) {
919     ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
920   }
921   PetscFunctionReturn(0);
922 }
923 
924 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
925 {
926   PetscErrorCode ierr;
927   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
928 
929   PetscFunctionBegin;
930   ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr);
931   PetscFunctionReturn(0);
932 }
933 
934 PetscErrorCode MatConjugate_MPIDense(Mat mat)
935 {
936   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
937   PetscErrorCode ierr;
938 
939   PetscFunctionBegin;
940   ierr = MatConjugate(a->A);CHKERRQ(ierr);
941   PetscFunctionReturn(0);
942 }
943 
944 PetscErrorCode MatRealPart_MPIDense(Mat A)
945 {
946   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
947   PetscErrorCode ierr;
948 
949   PetscFunctionBegin;
950   ierr = MatRealPart(a->A);CHKERRQ(ierr);
951   PetscFunctionReturn(0);
952 }
953 
954 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
955 {
956   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
957   PetscErrorCode ierr;
958 
959   PetscFunctionBegin;
960   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
961   PetscFunctionReturn(0);
962 }
963 
964 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col)
965 {
966   PetscErrorCode ierr;
967   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
968 
969   PetscFunctionBegin;
970   if (!a->A) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing local matrix");
971   if (!a->A->ops->getcolumnvector) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing get column operation");
972   ierr = (*a->A->ops->getcolumnvector)(a->A,v,col);CHKERRQ(ierr);
973   PetscFunctionReturn(0);
974 }
975 
976 PETSC_INTERN PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*);
977 
978 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms)
979 {
980   PetscErrorCode ierr;
981   PetscInt       i,n;
982   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
983   PetscReal      *work;
984 
985   PetscFunctionBegin;
986   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
987   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
988   ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr);
989   if (type == NORM_2) {
990     for (i=0; i<n; i++) work[i] *= work[i];
991   }
992   if (type == NORM_INFINITY) {
993     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
994   } else {
995     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
996   }
997   ierr = PetscFree(work);CHKERRQ(ierr);
998   if (type == NORM_2) {
999     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
1000   }
1001   PetscFunctionReturn(0);
1002 }
1003 
1004 #if defined(PETSC_HAVE_CUDA)
1005 static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1006 {
1007   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1008   PetscErrorCode ierr;
1009   PetscInt       lda;
1010 
1011   PetscFunctionBegin;
1012   if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first");
1013   if (!a->cvec) {
1014     ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1015   }
1016   a->vecinuse = col + 1;
1017   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1018   ierr = MatDenseCUDAGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1019   ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1020   *v   = a->cvec;
1021   PetscFunctionReturn(0);
1022 }
1023 
1024 static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1025 {
1026   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1027   PetscErrorCode ierr;
1028 
1029   PetscFunctionBegin;
1030   if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec first");
1031   if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1032   a->vecinuse = 0;
1033   ierr = MatDenseCUDARestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1034   ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr);
1035   *v   = NULL;
1036   PetscFunctionReturn(0);
1037 }
1038 
1039 static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1040 {
1041   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1042   PetscInt       lda;
1043   PetscErrorCode ierr;
1044 
1045   PetscFunctionBegin;
1046   if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first");
1047   if (!a->cvec) {
1048     ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1049   }
1050   a->vecinuse = col + 1;
1051   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1052   ierr = MatDenseCUDAGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
1053   ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1054   ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr);
1055   *v   = a->cvec;
1056   PetscFunctionReturn(0);
1057 }
1058 
1059 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1060 {
1061   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1062   PetscErrorCode ierr;
1063 
1064   PetscFunctionBegin;
1065   if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec first");
1066   if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1067   a->vecinuse = 0;
1068   ierr = MatDenseCUDARestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
1069   ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr);
1070   ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr);
1071   *v   = NULL;
1072   PetscFunctionReturn(0);
1073 }
1074 
1075 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1076 {
1077   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1078   PetscErrorCode ierr;
1079   PetscInt       lda;
1080 
1081   PetscFunctionBegin;
1082   if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first");
1083   if (!a->cvec) {
1084     ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1085   }
1086   a->vecinuse = col + 1;
1087   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1088   ierr = MatDenseCUDAGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1089   ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1090   *v   = a->cvec;
1091   PetscFunctionReturn(0);
1092 }
1093 
1094 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1095 {
1096   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1097   PetscErrorCode ierr;
1098 
1099   PetscFunctionBegin;
1100   if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec first");
1101   if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1102   a->vecinuse = 0;
1103   ierr = MatDenseCUDARestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1104   ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr);
1105   *v   = NULL;
1106   PetscFunctionReturn(0);
1107 }
1108 
1109 static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1110 {
1111   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1112   PetscErrorCode ierr;
1113 
1114   PetscFunctionBegin;
1115   if (l->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first");
1116   ierr = MatDenseCUDAPlaceArray(l->A,a);CHKERRQ(ierr);
1117   PetscFunctionReturn(0);
1118 }
1119 
1120 static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A)
1121 {
1122   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1123   PetscErrorCode ierr;
1124 
1125   PetscFunctionBegin;
1126   if (l->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first");
1127   ierr = MatDenseCUDAResetArray(l->A);CHKERRQ(ierr);
1128   PetscFunctionReturn(0);
1129 }
1130 
1131 static PetscErrorCode MatDenseCUDAReplaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1132 {
1133   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1134   PetscErrorCode ierr;
1135 
1136   PetscFunctionBegin;
1137   if (l->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first");
1138   ierr = MatDenseCUDAReplaceArray(l->A,a);CHKERRQ(ierr);
1139   PetscFunctionReturn(0);
1140 }
1141 
1142 static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1143 {
1144   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1145   PetscErrorCode ierr;
1146 
1147   PetscFunctionBegin;
1148   ierr = MatDenseCUDAGetArrayWrite(l->A,a);CHKERRQ(ierr);
1149   PetscFunctionReturn(0);
1150 }
1151 
1152 static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1153 {
1154   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1155   PetscErrorCode ierr;
1156 
1157   PetscFunctionBegin;
1158   ierr = MatDenseCUDARestoreArrayWrite(l->A,a);CHKERRQ(ierr);
1159   PetscFunctionReturn(0);
1160 }
1161 
1162 static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1163 {
1164   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1165   PetscErrorCode ierr;
1166 
1167   PetscFunctionBegin;
1168   ierr = MatDenseCUDAGetArrayRead(l->A,a);CHKERRQ(ierr);
1169   PetscFunctionReturn(0);
1170 }
1171 
1172 static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1173 {
1174   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1175   PetscErrorCode ierr;
1176 
1177   PetscFunctionBegin;
1178   ierr = MatDenseCUDARestoreArrayRead(l->A,a);CHKERRQ(ierr);
1179   PetscFunctionReturn(0);
1180 }
1181 
1182 static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1183 {
1184   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1185   PetscErrorCode ierr;
1186 
1187   PetscFunctionBegin;
1188   ierr = MatDenseCUDAGetArray(l->A,a);CHKERRQ(ierr);
1189   PetscFunctionReturn(0);
1190 }
1191 
1192 static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1193 {
1194   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1195   PetscErrorCode ierr;
1196 
1197   PetscFunctionBegin;
1198   ierr = MatDenseCUDARestoreArray(l->A,a);CHKERRQ(ierr);
1199   PetscFunctionReturn(0);
1200 }
1201 
1202 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat,PetscInt,Vec*);
1203 static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat,PetscInt,Vec*);
1204 static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat,PetscInt,Vec*);
1205 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat,PetscInt,Vec*);
1206 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat,PetscInt,Vec*);
1207 static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat,PetscInt,Vec*);
1208 
1209 static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat,PetscBool bind)
1210 {
1211   Mat_MPIDense   *d = (Mat_MPIDense*)mat->data;
1212   PetscErrorCode ierr;
1213 
1214   PetscFunctionBegin;
1215   if (d->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first");
1216   if (d->A) {
1217     ierr = MatBindToCPU(d->A,bind);CHKERRQ(ierr);
1218   }
1219   mat->boundtocpu = bind;
1220   if (!bind) {
1221     PetscBool iscuda;
1222 
1223     ierr = PetscObjectTypeCompare((PetscObject)d->cvec,VECMPICUDA,&iscuda);CHKERRQ(ierr);
1224     if (!iscuda) {
1225       ierr = VecDestroy(&d->cvec);CHKERRQ(ierr);
1226     }
1227     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDenseCUDA);CHKERRQ(ierr);
1228     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDenseCUDA);CHKERRQ(ierr);
1229     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr);
1230     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr);
1231     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr);
1232     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr);
1233   } else {
1234     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr);
1235     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr);
1236     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr);
1237     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr);
1238     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr);
1239     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr);
1240   }
1241   PetscFunctionReturn(0);
1242 }
1243 
1244 PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data)
1245 {
1246   Mat_MPIDense   *d = (Mat_MPIDense*)A->data;
1247   PetscErrorCode ierr;
1248   PetscBool      iscuda;
1249 
1250   PetscFunctionBegin;
1251   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1252   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr);
1253   if (!iscuda) PetscFunctionReturn(0);
1254   ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
1255   ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
1256   if (!d->A) {
1257     ierr = MatCreate(PETSC_COMM_SELF,&d->A);CHKERRQ(ierr);
1258     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)d->A);CHKERRQ(ierr);
1259     ierr = MatSetSizes(d->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N);CHKERRQ(ierr);
1260   }
1261   ierr = MatSetType(d->A,MATSEQDENSECUDA);CHKERRQ(ierr);
1262   ierr = MatSeqDenseCUDASetPreallocation(d->A,d_data);CHKERRQ(ierr);
1263   A->preallocated = PETSC_TRUE;
1264   PetscFunctionReturn(0);
1265 }
1266 #endif
1267 
1268 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1269 {
1270   Mat_MPIDense   *d = (Mat_MPIDense*)x->data;
1271   PetscErrorCode ierr;
1272 
1273   PetscFunctionBegin;
1274   ierr = MatSetRandom(d->A,rctx);CHKERRQ(ierr);
1275   PetscFunctionReturn(0);
1276 }
1277 
1278 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool  *missing,PetscInt *d)
1279 {
1280   PetscFunctionBegin;
1281   *missing = PETSC_FALSE;
1282   PetscFunctionReturn(0);
1283 }
1284 
1285 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat);
1286 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);
1287 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);
1288 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat);
1289 static PetscErrorCode MatEqual_MPIDense(Mat,Mat,PetscBool*);
1290 static PetscErrorCode MatLoad_MPIDense(Mat,PetscViewer);
1291 
1292 /* -------------------------------------------------------------------*/
1293 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1294                                         MatGetRow_MPIDense,
1295                                         MatRestoreRow_MPIDense,
1296                                         MatMult_MPIDense,
1297                                 /*  4*/ MatMultAdd_MPIDense,
1298                                         MatMultTranspose_MPIDense,
1299                                         MatMultTransposeAdd_MPIDense,
1300                                         0,
1301                                         0,
1302                                         0,
1303                                 /* 10*/ 0,
1304                                         0,
1305                                         0,
1306                                         0,
1307                                         MatTranspose_MPIDense,
1308                                 /* 15*/ MatGetInfo_MPIDense,
1309                                         MatEqual_MPIDense,
1310                                         MatGetDiagonal_MPIDense,
1311                                         MatDiagonalScale_MPIDense,
1312                                         MatNorm_MPIDense,
1313                                 /* 20*/ MatAssemblyBegin_MPIDense,
1314                                         MatAssemblyEnd_MPIDense,
1315                                         MatSetOption_MPIDense,
1316                                         MatZeroEntries_MPIDense,
1317                                 /* 24*/ MatZeroRows_MPIDense,
1318                                         0,
1319                                         0,
1320                                         0,
1321                                         0,
1322                                 /* 29*/ MatSetUp_MPIDense,
1323                                         0,
1324                                         0,
1325                                         MatGetDiagonalBlock_MPIDense,
1326                                         0,
1327                                 /* 34*/ MatDuplicate_MPIDense,
1328                                         0,
1329                                         0,
1330                                         0,
1331                                         0,
1332                                 /* 39*/ MatAXPY_MPIDense,
1333                                         MatCreateSubMatrices_MPIDense,
1334                                         0,
1335                                         MatGetValues_MPIDense,
1336                                         0,
1337                                 /* 44*/ 0,
1338                                         MatScale_MPIDense,
1339                                         MatShift_Basic,
1340                                         0,
1341                                         0,
1342                                 /* 49*/ MatSetRandom_MPIDense,
1343                                         0,
1344                                         0,
1345                                         0,
1346                                         0,
1347                                 /* 54*/ 0,
1348                                         0,
1349                                         0,
1350                                         0,
1351                                         0,
1352                                 /* 59*/ MatCreateSubMatrix_MPIDense,
1353                                         MatDestroy_MPIDense,
1354                                         MatView_MPIDense,
1355                                         0,
1356                                         0,
1357                                 /* 64*/ 0,
1358                                         0,
1359                                         0,
1360                                         0,
1361                                         0,
1362                                 /* 69*/ 0,
1363                                         0,
1364                                         0,
1365                                         0,
1366                                         0,
1367                                 /* 74*/ 0,
1368                                         0,
1369                                         0,
1370                                         0,
1371                                         0,
1372                                 /* 79*/ 0,
1373                                         0,
1374                                         0,
1375                                         0,
1376                                 /* 83*/ MatLoad_MPIDense,
1377                                         0,
1378                                         0,
1379                                         0,
1380                                         0,
1381                                         0,
1382                                 /* 89*/ 0,
1383                                         0,
1384                                         0,
1385                                         0,
1386                                         0,
1387                                 /* 94*/ 0,
1388                                         0,
1389                                         MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1390                                         MatMatTransposeMultNumeric_MPIDense_MPIDense,
1391                                         0,
1392                                 /* 99*/ MatProductSetFromOptions_MPIDense,
1393                                         0,
1394                                         0,
1395                                         MatConjugate_MPIDense,
1396                                         0,
1397                                 /*104*/ 0,
1398                                         MatRealPart_MPIDense,
1399                                         MatImaginaryPart_MPIDense,
1400                                         0,
1401                                         0,
1402                                 /*109*/ 0,
1403                                         0,
1404                                         0,
1405                                         MatGetColumnVector_MPIDense,
1406                                         MatMissingDiagonal_MPIDense,
1407                                 /*114*/ 0,
1408                                         0,
1409                                         0,
1410                                         0,
1411                                         0,
1412                                 /*119*/ 0,
1413                                         0,
1414                                         0,
1415                                         0,
1416                                         0,
1417                                 /*124*/ 0,
1418                                         MatGetColumnNorms_MPIDense,
1419                                         0,
1420                                         0,
1421                                         0,
1422                                 /*129*/ 0,
1423                                         0,
1424                                         MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1425                                         MatTransposeMatMultNumeric_MPIDense_MPIDense,
1426                                         0,
1427                                 /*134*/ 0,
1428                                         0,
1429                                         0,
1430                                         0,
1431                                         0,
1432                                 /*139*/ 0,
1433                                         0,
1434                                         0,
1435                                         0,
1436                                         0,
1437                                         MatCreateMPIMatConcatenateSeqMat_MPIDense,
1438                                 /*145*/ 0,
1439                                         0,
1440                                         0
1441 };
1442 
1443 PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1444 {
1445   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
1446   PetscBool      iscuda;
1447   PetscErrorCode ierr;
1448 
1449   PetscFunctionBegin;
1450   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
1451   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
1452   if (!a->A) {
1453     ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1454     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1455     ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr);
1456   }
1457   ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr);
1458   ierr = MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE);CHKERRQ(ierr);
1459   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1460   mat->preallocated = PETSC_TRUE;
1461   PetscFunctionReturn(0);
1462 }
1463 
1464 #if defined(PETSC_HAVE_ELEMENTAL)
1465 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1466 {
1467   Mat            mat_elemental;
1468   PetscErrorCode ierr;
1469   PetscScalar    *v;
1470   PetscInt       m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols;
1471 
1472   PetscFunctionBegin;
1473   if (reuse == MAT_REUSE_MATRIX) {
1474     mat_elemental = *newmat;
1475     ierr = MatZeroEntries(*newmat);CHKERRQ(ierr);
1476   } else {
1477     ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr);
1478     ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1479     ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr);
1480     ierr = MatSetUp(mat_elemental);CHKERRQ(ierr);
1481     ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
1482   }
1483 
1484   ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr);
1485   for (i=0; i<N; i++) cols[i] = i;
1486   for (i=0; i<m; i++) rows[i] = rstart + i;
1487 
1488   /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1489   ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr);
1490   ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr);
1491   ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1492   ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1493   ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr);
1494   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1495 
1496   if (reuse == MAT_INPLACE_MATRIX) {
1497     ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr);
1498   } else {
1499     *newmat = mat_elemental;
1500   }
1501   PetscFunctionReturn(0);
1502 }
1503 #endif
1504 
1505 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals)
1506 {
1507   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1508   PetscErrorCode ierr;
1509 
1510   PetscFunctionBegin;
1511   ierr = MatDenseGetColumn(mat->A,col,vals);CHKERRQ(ierr);
1512   PetscFunctionReturn(0);
1513 }
1514 
1515 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals)
1516 {
1517   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1518   PetscErrorCode ierr;
1519 
1520   PetscFunctionBegin;
1521   ierr = MatDenseRestoreColumn(mat->A,vals);CHKERRQ(ierr);
1522   PetscFunctionReturn(0);
1523 }
1524 
1525 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
1526 {
1527   PetscErrorCode ierr;
1528   Mat_MPIDense   *mat;
1529   PetscInt       m,nloc,N;
1530 
1531   PetscFunctionBegin;
1532   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
1533   ierr = MatGetLocalSize(inmat,NULL,&nloc);CHKERRQ(ierr);
1534   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1535     PetscInt sum;
1536 
1537     if (n == PETSC_DECIDE) {
1538       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
1539     }
1540     /* Check sum(n) = N */
1541     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
1542     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
1543 
1544     ierr = MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat);CHKERRQ(ierr);
1545   }
1546 
1547   /* numeric phase */
1548   mat = (Mat_MPIDense*)(*outmat)->data;
1549   ierr = MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1550   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1551   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1552   PetscFunctionReturn(0);
1553 }
1554 
1555 #if defined(PETSC_HAVE_CUDA)
1556 PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M,MatType type,MatReuse reuse,Mat *newmat)
1557 {
1558   Mat            B;
1559   Mat_MPIDense   *m;
1560   PetscErrorCode ierr;
1561 
1562   PetscFunctionBegin;
1563   if (reuse == MAT_INITIAL_MATRIX) {
1564     ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr);
1565   } else if (reuse == MAT_REUSE_MATRIX) {
1566     ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1567   }
1568 
1569   B    = *newmat;
1570   ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_TRUE);CHKERRQ(ierr);
1571   ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr);
1572   ierr = PetscStrallocpy(VECSTANDARD,&B->defaultvectype);CHKERRQ(ierr);
1573   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSE);CHKERRQ(ierr);
1574   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",NULL);CHKERRQ(ierr);
1575   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",NULL);CHKERRQ(ierr);
1576   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",NULL);CHKERRQ(ierr);
1577   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",NULL);CHKERRQ(ierr);
1578   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",NULL);CHKERRQ(ierr);
1579   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",NULL);CHKERRQ(ierr);
1580   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",NULL);CHKERRQ(ierr);
1581   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",NULL);CHKERRQ(ierr);
1582   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",NULL);CHKERRQ(ierr);
1583   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",NULL);CHKERRQ(ierr);
1584   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",NULL);CHKERRQ(ierr);
1585   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",NULL);CHKERRQ(ierr);
1586   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",NULL);CHKERRQ(ierr);
1587   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",NULL);CHKERRQ(ierr);
1588   m    = (Mat_MPIDense*)(B)->data;
1589   if (m->A) {
1590     ierr = MatConvert(m->A,MATSEQDENSE,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr);
1591     ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr);
1592   }
1593   B->ops->bindtocpu = NULL;
1594   B->offloadmask    = PETSC_OFFLOAD_CPU;
1595   PetscFunctionReturn(0);
1596 }
1597 
1598 PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M,MatType type,MatReuse reuse,Mat *newmat)
1599 {
1600   Mat            B;
1601   Mat_MPIDense   *m;
1602   PetscErrorCode ierr;
1603 
1604   PetscFunctionBegin;
1605   if (reuse == MAT_INITIAL_MATRIX) {
1606     ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr);
1607   } else if (reuse == MAT_REUSE_MATRIX) {
1608     ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1609   }
1610 
1611   B    = *newmat;
1612   ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr);
1613   ierr = PetscStrallocpy(VECCUDA,&B->defaultvectype);CHKERRQ(ierr);
1614   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSECUDA);CHKERRQ(ierr);
1615   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",                    MatConvert_MPIDenseCUDA_MPIDense);CHKERRQ(ierr);
1616   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",        MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
1617   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
1618   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",        MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
1619   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
1620   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",                                MatDenseCUDAGetArray_MPIDenseCUDA);CHKERRQ(ierr);
1621   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",                            MatDenseCUDAGetArrayRead_MPIDenseCUDA);CHKERRQ(ierr);
1622   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",                           MatDenseCUDAGetArrayWrite_MPIDenseCUDA);CHKERRQ(ierr);
1623   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",                            MatDenseCUDARestoreArray_MPIDenseCUDA);CHKERRQ(ierr);
1624   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",                        MatDenseCUDARestoreArrayRead_MPIDenseCUDA);CHKERRQ(ierr);
1625   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",                       MatDenseCUDARestoreArrayWrite_MPIDenseCUDA);CHKERRQ(ierr);
1626   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",                              MatDenseCUDAPlaceArray_MPIDenseCUDA);CHKERRQ(ierr);
1627   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",                              MatDenseCUDAResetArray_MPIDenseCUDA);CHKERRQ(ierr);
1628   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",                            MatDenseCUDAReplaceArray_MPIDenseCUDA);CHKERRQ(ierr);
1629   m    = (Mat_MPIDense*)(B)->data;
1630   if (m->A) {
1631     ierr = MatConvert(m->A,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr);
1632     ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr);
1633     B->offloadmask = PETSC_OFFLOAD_BOTH;
1634   } else {
1635     B->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
1636   }
1637   ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_FALSE);CHKERRQ(ierr);
1638 
1639   B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA;
1640   PetscFunctionReturn(0);
1641 }
1642 #endif
1643 
1644 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A,PetscInt col,Vec *v)
1645 {
1646   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1647   PetscErrorCode ierr;
1648   PetscInt       lda;
1649 
1650   PetscFunctionBegin;
1651   if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first");
1652   if (!a->cvec) {
1653     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1654   }
1655   a->vecinuse = col + 1;
1656   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1657   ierr = MatDenseGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1658   ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1659   *v   = a->cvec;
1660   PetscFunctionReturn(0);
1661 }
1662 
1663 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A,PetscInt col,Vec *v)
1664 {
1665   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1666   PetscErrorCode ierr;
1667 
1668   PetscFunctionBegin;
1669   if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec first");
1670   if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1671   a->vecinuse = 0;
1672   ierr = MatDenseRestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1673   ierr = VecResetArray(a->cvec);CHKERRQ(ierr);
1674   *v   = NULL;
1675   PetscFunctionReturn(0);
1676 }
1677 
1678 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v)
1679 {
1680   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1681   PetscErrorCode ierr;
1682   PetscInt       lda;
1683 
1684   PetscFunctionBegin;
1685   if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first");
1686   if (!a->cvec) {
1687     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1688   }
1689   a->vecinuse = col + 1;
1690   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1691   ierr = MatDenseGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
1692   ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1693   ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr);
1694   *v   = a->cvec;
1695   PetscFunctionReturn(0);
1696 }
1697 
1698 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v)
1699 {
1700   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1701   PetscErrorCode ierr;
1702 
1703   PetscFunctionBegin;
1704   if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec first");
1705   if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1706   a->vecinuse = 0;
1707   ierr = MatDenseRestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
1708   ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr);
1709   ierr = VecResetArray(a->cvec);CHKERRQ(ierr);
1710   *v   = NULL;
1711   PetscFunctionReturn(0);
1712 }
1713 
1714 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v)
1715 {
1716   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1717   PetscErrorCode ierr;
1718   PetscInt       lda;
1719 
1720   PetscFunctionBegin;
1721   if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first");
1722   if (!a->cvec) {
1723     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1724   }
1725   a->vecinuse = col + 1;
1726   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1727   ierr = MatDenseGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1728   ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1729   *v   = a->cvec;
1730   PetscFunctionReturn(0);
1731 }
1732 
1733 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v)
1734 {
1735   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1736   PetscErrorCode ierr;
1737 
1738   PetscFunctionBegin;
1739   if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec first");
1740   if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1741   a->vecinuse = 0;
1742   ierr = MatDenseRestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1743   ierr = VecResetArray(a->cvec);CHKERRQ(ierr);
1744   *v   = NULL;
1745   PetscFunctionReturn(0);
1746 }
1747 
1748 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1749 {
1750   Mat_MPIDense   *a;
1751   PetscErrorCode ierr;
1752 
1753   PetscFunctionBegin;
1754   ierr      = PetscNewLog(mat,&a);CHKERRQ(ierr);
1755   mat->data = (void*)a;
1756   ierr      = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1757 
1758   mat->insertmode = NOT_SET_VALUES;
1759   ierr            = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);CHKERRQ(ierr);
1760   ierr            = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);CHKERRQ(ierr);
1761 
1762   /* build cache for off array entries formed */
1763   a->donotstash = PETSC_FALSE;
1764 
1765   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr);
1766 
1767   /* stuff used for matrix vector multiply */
1768   a->lvec        = 0;
1769   a->Mvctx       = 0;
1770   a->roworiented = PETSC_TRUE;
1771 
1772   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense);CHKERRQ(ierr);
1773   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",MatDenseSetLDA_MPIDense);CHKERRQ(ierr);
1774   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr);
1775   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr);
1776   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense);CHKERRQ(ierr);
1777   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense);CHKERRQ(ierr);
1778   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",MatDenseGetArrayWrite_MPIDense);CHKERRQ(ierr);
1779   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",MatDenseRestoreArrayWrite_MPIDense);CHKERRQ(ierr);
1780   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);CHKERRQ(ierr);
1781   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);CHKERRQ(ierr);
1782   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",MatDenseReplaceArray_MPIDense);CHKERRQ(ierr);
1783   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr);
1784   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr);
1785   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr);
1786   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr);
1787   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr);
1788   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr);
1789 #if defined(PETSC_HAVE_ELEMENTAL)
1790   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr);
1791 #endif
1792 #if defined(PETSC_HAVE_CUDA)
1793   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpidensecuda_C",MatConvert_MPIDense_MPIDenseCUDA);CHKERRQ(ierr);
1794 #endif
1795   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1796   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
1797   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
1798 #if defined(PETSC_HAVE_CUDA)
1799   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
1800   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
1801 #endif
1802 
1803   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense);CHKERRQ(ierr);
1804   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense);CHKERRQ(ierr);
1805   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1806   PetscFunctionReturn(0);
1807 }
1808 
1809 /*MC
1810    MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs.
1811 
1812    Options Database Keys:
1813 . -mat_type mpidensecuda - sets the matrix type to "mpidensecuda" during a call to MatSetFromOptions()
1814 
1815   Level: beginner
1816 
1817 .seealso:
1818 
1819 M*/
1820 #if defined(PETSC_HAVE_CUDA)
1821 PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B)
1822 {
1823   PetscErrorCode ierr;
1824 
1825   PetscFunctionBegin;
1826   ierr = MatCreate_MPIDense(B);CHKERRQ(ierr);
1827   ierr = MatConvert_MPIDense_MPIDenseCUDA(B,MATMPIDENSECUDA,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr);
1828   PetscFunctionReturn(0);
1829 }
1830 #endif
1831 
1832 /*MC
1833    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1834 
1835    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1836    and MATMPIDENSE otherwise.
1837 
1838    Options Database Keys:
1839 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1840 
1841   Level: beginner
1842 
1843 
1844 .seealso: MATSEQDENSE,MATMPIDENSE,MATDENSECUDA
1845 M*/
1846 
1847 /*MC
1848    MATDENSECUDA - MATDENSECUDA = "densecuda" - A matrix type to be used for dense matrices on GPUs.
1849 
1850    This matrix type is identical to MATSEQDENSECUDA when constructed with a single process communicator,
1851    and MATMPIDENSECUDA otherwise.
1852 
1853    Options Database Keys:
1854 . -mat_type densecuda - sets the matrix type to "densecuda" during a call to MatSetFromOptions()
1855 
1856   Level: beginner
1857 
1858 .seealso: MATSEQDENSECUDA,MATMPIDENSECUDA,MATDENSE
1859 M*/
1860 
1861 /*@C
1862    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1863 
1864    Not collective
1865 
1866    Input Parameters:
1867 .  B - the matrix
1868 -  data - optional location of matrix data.  Set data=NULL for PETSc
1869    to control all matrix memory allocation.
1870 
1871    Notes:
1872    The dense format is fully compatible with standard Fortran 77
1873    storage by columns.
1874 
1875    The data input variable is intended primarily for Fortran programmers
1876    who wish to allocate their own matrix memory space.  Most users should
1877    set data=NULL.
1878 
1879    Level: intermediate
1880 
1881 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1882 @*/
1883 PetscErrorCode  MatMPIDenseSetPreallocation(Mat B,PetscScalar *data)
1884 {
1885   PetscErrorCode ierr;
1886 
1887   PetscFunctionBegin;
1888   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
1889   ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr);
1890   PetscFunctionReturn(0);
1891 }
1892 
1893 /*@
1894    MatDensePlaceArray - Allows one to replace the array in a dense matrix with an
1895    array provided by the user. This is useful to avoid copying an array
1896    into a matrix
1897 
1898    Not Collective
1899 
1900    Input Parameters:
1901 +  mat - the matrix
1902 -  array - the array in column major order
1903 
1904    Notes:
1905    You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be
1906    freed when the matrix is destroyed.
1907 
1908    Level: developer
1909 
1910 .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
1911 
1912 @*/
1913 PetscErrorCode  MatDensePlaceArray(Mat mat,const PetscScalar *array)
1914 {
1915   PetscErrorCode ierr;
1916 
1917   PetscFunctionBegin;
1918   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
1919   ierr = PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
1920   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
1921 #if defined(PETSC_HAVE_CUDA)
1922   mat->offloadmask = PETSC_OFFLOAD_CPU;
1923 #endif
1924   PetscFunctionReturn(0);
1925 }
1926 
1927 /*@
1928    MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray()
1929 
1930    Not Collective
1931 
1932    Input Parameters:
1933 .  mat - the matrix
1934 
1935    Notes:
1936    You can only call this after a call to MatDensePlaceArray()
1937 
1938    Level: developer
1939 
1940 .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
1941 
1942 @*/
1943 PetscErrorCode  MatDenseResetArray(Mat mat)
1944 {
1945   PetscErrorCode ierr;
1946 
1947   PetscFunctionBegin;
1948   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
1949   ierr = PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));CHKERRQ(ierr);
1950   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
1951   PetscFunctionReturn(0);
1952 }
1953 
1954 /*@
1955    MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an
1956    array provided by the user. This is useful to avoid copying an array
1957    into a matrix
1958 
1959    Not Collective
1960 
1961    Input Parameters:
1962 +  mat - the matrix
1963 -  array - the array in column major order
1964 
1965    Notes:
1966    The memory passed in MUST be obtained with PetscMalloc() and CANNOT be
1967    freed by the user. It will be freed when the matrix is destroyed.
1968 
1969    Level: developer
1970 
1971 .seealso: MatDenseGetArray(), VecReplaceArray()
1972 @*/
1973 PetscErrorCode  MatDenseReplaceArray(Mat mat,const PetscScalar *array)
1974 {
1975   PetscErrorCode ierr;
1976 
1977   PetscFunctionBegin;
1978   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
1979   ierr = PetscUseMethod(mat,"MatDenseReplaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
1980   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
1981 #if defined(PETSC_HAVE_CUDA)
1982   mat->offloadmask = PETSC_OFFLOAD_CPU;
1983 #endif
1984   PetscFunctionReturn(0);
1985 }
1986 
1987 #if defined(PETSC_HAVE_CUDA)
1988 /*@C
1989    MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a dense matrix with an
1990    array provided by the user. This is useful to avoid copying an array
1991    into a matrix
1992 
1993    Not Collective
1994 
1995    Input Parameters:
1996 +  mat - the matrix
1997 -  array - the array in column major order
1998 
1999    Notes:
2000    You can return to the original array with a call to MatDenseCUDAResetArray(). The user is responsible for freeing this array; it will not be
2001    freed when the matrix is destroyed. The array must have been allocated with cudaMalloc().
2002 
2003    Level: developer
2004 
2005 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAResetArray()
2006 @*/
2007 PetscErrorCode  MatDenseCUDAPlaceArray(Mat mat,const PetscScalar *array)
2008 {
2009   PetscErrorCode ierr;
2010 
2011   PetscFunctionBegin;
2012   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2013   ierr = PetscUseMethod(mat,"MatDenseCUDAPlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
2014   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2015   mat->offloadmask = PETSC_OFFLOAD_GPU;
2016   PetscFunctionReturn(0);
2017 }
2018 
2019 /*@C
2020    MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to MatDenseCUDAPlaceArray()
2021 
2022    Not Collective
2023 
2024    Input Parameters:
2025 .  mat - the matrix
2026 
2027    Notes:
2028    You can only call this after a call to MatDenseCUDAPlaceArray()
2029 
2030    Level: developer
2031 
2032 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray()
2033 
2034 @*/
2035 PetscErrorCode  MatDenseCUDAResetArray(Mat mat)
2036 {
2037   PetscErrorCode ierr;
2038 
2039   PetscFunctionBegin;
2040   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2041   ierr = PetscUseMethod(mat,"MatDenseCUDAResetArray_C",(Mat),(mat));CHKERRQ(ierr);
2042   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2043   PetscFunctionReturn(0);
2044 }
2045 
2046 /*@C
2047    MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a dense matrix with an
2048    array provided by the user. This is useful to avoid copying an array
2049    into a matrix
2050 
2051    Not Collective
2052 
2053    Input Parameters:
2054 +  mat - the matrix
2055 -  array - the array in column major order
2056 
2057    Notes:
2058    This permanently replaces the GPU array and frees the memory associated with the old GPU array.
2059    The memory passed in CANNOT be freed by the user. It will be freed
2060    when the matrix is destroyed. The array should respect the matrix leading dimension.
2061 
2062    Level: developer
2063 
2064 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray(), MatDenseCUDAResetArray()
2065 @*/
2066 PetscErrorCode  MatDenseCUDAReplaceArray(Mat mat,const PetscScalar *array)
2067 {
2068   PetscErrorCode ierr;
2069 
2070   PetscFunctionBegin;
2071   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2072   ierr = PetscUseMethod(mat,"MatDenseCUDAReplaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
2073   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2074   mat->offloadmask = PETSC_OFFLOAD_GPU;
2075   PetscFunctionReturn(0);
2076 }
2077 
2078 /*@C
2079    MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a dense matrix.
2080 
2081    Not Collective
2082 
2083    Input Parameters:
2084 .  A - the matrix
2085 
2086    Output Parameters
2087 .  array - the GPU array in column major order
2088 
2089    Notes:
2090    The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use MatDenseCUDAGetArray(). The array must be restored with MatDenseCUDARestoreArrayWrite() when no longer needed.
2091 
2092    Level: developer
2093 
2094 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArrayRead()
2095 @*/
2096 PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a)
2097 {
2098   PetscErrorCode ierr;
2099 
2100   PetscFunctionBegin;
2101   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2102   ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2103   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2104   PetscFunctionReturn(0);
2105 }
2106 
2107 /*@C
2108    MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArrayWrite().
2109 
2110    Not Collective
2111 
2112    Input Parameters:
2113 +  A - the matrix
2114 -  array - the GPU array in column major order
2115 
2116    Notes:
2117 
2118    Level: developer
2119 
2120 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead()
2121 @*/
2122 PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a)
2123 {
2124   PetscErrorCode ierr;
2125 
2126   PetscFunctionBegin;
2127   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2128   ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2129   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2130   A->offloadmask = PETSC_OFFLOAD_GPU;
2131   PetscFunctionReturn(0);
2132 }
2133 
2134 /*@C
2135    MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArrayRead() when no longer needed.
2136 
2137    Not Collective
2138 
2139    Input Parameters:
2140 .  A - the matrix
2141 
2142    Output Parameters
2143 .  array - the GPU array in column major order
2144 
2145    Notes:
2146    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite().
2147 
2148    Level: developer
2149 
2150 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead()
2151 @*/
2152 PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a)
2153 {
2154   PetscErrorCode ierr;
2155 
2156   PetscFunctionBegin;
2157   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2158   ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr);
2159   PetscFunctionReturn(0);
2160 }
2161 
2162 /*@C
2163    MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a dense matrix previously obtained with a call to MatDenseCUDAGetArrayRead().
2164 
2165    Not Collective
2166 
2167    Input Parameters:
2168 +  A - the matrix
2169 -  array - the GPU array in column major order
2170 
2171    Notes:
2172    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite().
2173 
2174    Level: developer
2175 
2176 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDAGetArrayRead()
2177 @*/
2178 PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a)
2179 {
2180   PetscErrorCode ierr;
2181 
2182   PetscFunctionBegin;
2183   ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr);
2184   PetscFunctionReturn(0);
2185 }
2186 
2187 /*@C
2188    MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArray() when no longer needed.
2189 
2190    Not Collective
2191 
2192    Input Parameters:
2193 .  A - the matrix
2194 
2195    Output Parameters
2196 .  array - the GPU array in column major order
2197 
2198    Notes:
2199    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). For read-only access, use MatDenseCUDAGetArrayRead().
2200 
2201    Level: developer
2202 
2203 .seealso: MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead()
2204 @*/
2205 PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a)
2206 {
2207   PetscErrorCode ierr;
2208 
2209   PetscFunctionBegin;
2210   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2211   ierr = PetscUseMethod(A,"MatDenseCUDAGetArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2212   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2213   PetscFunctionReturn(0);
2214 }
2215 
2216 /*@C
2217    MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArray().
2218 
2219    Not Collective
2220 
2221    Input Parameters:
2222 +  A - the matrix
2223 -  array - the GPU array in column major order
2224 
2225    Notes:
2226 
2227    Level: developer
2228 
2229 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead()
2230 @*/
2231 PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a)
2232 {
2233   PetscErrorCode ierr;
2234 
2235   PetscFunctionBegin;
2236   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2237   ierr = PetscUseMethod(A,"MatDenseCUDARestoreArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2238   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2239   A->offloadmask = PETSC_OFFLOAD_GPU;
2240   PetscFunctionReturn(0);
2241 }
2242 #endif
2243 
2244 /*@C
2245    MatCreateDense - Creates a matrix in dense format.
2246 
2247    Collective
2248 
2249    Input Parameters:
2250 +  comm - MPI communicator
2251 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2252 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2253 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
2254 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
2255 -  data - optional location of matrix data.  Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
2256    to control all matrix memory allocation.
2257 
2258    Output Parameter:
2259 .  A - the matrix
2260 
2261    Notes:
2262    The dense format is fully compatible with standard Fortran 77
2263    storage by columns.
2264 
2265    The data input variable is intended primarily for Fortran programmers
2266    who wish to allocate their own matrix memory space.  Most users should
2267    set data=NULL (PETSC_NULL_SCALAR for Fortran users).
2268 
2269    The user MUST specify either the local or global matrix dimensions
2270    (possibly both).
2271 
2272    Level: intermediate
2273 
2274 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
2275 @*/
2276 PetscErrorCode  MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
2277 {
2278   PetscErrorCode ierr;
2279   PetscMPIInt    size;
2280 
2281   PetscFunctionBegin;
2282   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2283   PetscValidLogicalCollectiveBool(*A,!!data,6);
2284   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2285   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2286   if (size > 1) {
2287     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
2288     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
2289     if (data) {  /* user provided data array, so no need to assemble */
2290       ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr);
2291       (*A)->assembled = PETSC_TRUE;
2292     }
2293   } else {
2294     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
2295     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
2296   }
2297   PetscFunctionReturn(0);
2298 }
2299 
2300 #if defined(PETSC_HAVE_CUDA)
2301 /*@C
2302    MatCreateDenseCUDA - Creates a matrix in dense format using CUDA.
2303 
2304    Collective
2305 
2306    Input Parameters:
2307 +  comm - MPI communicator
2308 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2309 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2310 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
2311 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
2312 -  data - optional location of GPU matrix data.  Set data=NULL for PETSc
2313    to control matrix memory allocation.
2314 
2315    Output Parameter:
2316 .  A - the matrix
2317 
2318    Notes:
2319 
2320    Level: intermediate
2321 
2322 .seealso: MatCreate(), MatCreateDense()
2323 @*/
2324 PetscErrorCode  MatCreateDenseCUDA(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
2325 {
2326   PetscErrorCode ierr;
2327   PetscMPIInt    size;
2328 
2329   PetscFunctionBegin;
2330   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2331   PetscValidLogicalCollectiveBool(*A,!!data,6);
2332   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2333   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2334   if (size > 1) {
2335     ierr = MatSetType(*A,MATMPIDENSECUDA);CHKERRQ(ierr);
2336     ierr = MatMPIDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr);
2337     if (data) {  /* user provided data array, so no need to assemble */
2338       ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr);
2339       (*A)->assembled = PETSC_TRUE;
2340     }
2341   } else {
2342     ierr = MatSetType(*A,MATSEQDENSECUDA);CHKERRQ(ierr);
2343     ierr = MatSeqDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr);
2344   }
2345   PetscFunctionReturn(0);
2346 }
2347 #endif
2348 
2349 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
2350 {
2351   Mat            mat;
2352   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
2353   PetscErrorCode ierr;
2354 
2355   PetscFunctionBegin;
2356   *newmat = 0;
2357   ierr    = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr);
2358   ierr    = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
2359   ierr    = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
2360   a       = (Mat_MPIDense*)mat->data;
2361 
2362   mat->factortype   = A->factortype;
2363   mat->assembled    = PETSC_TRUE;
2364   mat->preallocated = PETSC_TRUE;
2365 
2366   a->size         = oldmat->size;
2367   a->rank         = oldmat->rank;
2368   mat->insertmode = NOT_SET_VALUES;
2369   a->donotstash   = oldmat->donotstash;
2370 
2371   ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr);
2372   ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr);
2373 
2374   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2375   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2376   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
2377 
2378   *newmat = mat;
2379   PetscFunctionReturn(0);
2380 }
2381 
2382 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
2383 {
2384   PetscErrorCode ierr;
2385   PetscBool      isbinary;
2386 #if defined(PETSC_HAVE_HDF5)
2387   PetscBool      ishdf5;
2388 #endif
2389 
2390   PetscFunctionBegin;
2391   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
2392   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
2393   /* force binary viewer to load .info file if it has not yet done so */
2394   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2395   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
2396 #if defined(PETSC_HAVE_HDF5)
2397   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
2398 #endif
2399   if (isbinary) {
2400     ierr = MatLoad_Dense_Binary(newMat,viewer);CHKERRQ(ierr);
2401 #if defined(PETSC_HAVE_HDF5)
2402   } else if (ishdf5) {
2403     ierr = MatLoad_Dense_HDF5(newMat,viewer);CHKERRQ(ierr);
2404 #endif
2405   } else SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
2406   PetscFunctionReturn(0);
2407 }
2408 
2409 static PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool  *flag)
2410 {
2411   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
2412   Mat            a,b;
2413   PetscBool      flg;
2414   PetscErrorCode ierr;
2415 
2416   PetscFunctionBegin;
2417   a    = matA->A;
2418   b    = matB->A;
2419   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
2420   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2421   PetscFunctionReturn(0);
2422 }
2423 
2424 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data)
2425 {
2426   PetscErrorCode        ierr;
2427   Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data;
2428 
2429   PetscFunctionBegin;
2430   ierr = PetscFree2(atb->sendbuf,atb->recvcounts);CHKERRQ(ierr);
2431   ierr = MatDestroy(&atb->atb);CHKERRQ(ierr);
2432   ierr = PetscFree(atb);CHKERRQ(ierr);
2433   PetscFunctionReturn(0);
2434 }
2435 
2436 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data)
2437 {
2438   PetscErrorCode        ierr;
2439   Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data;
2440 
2441   PetscFunctionBegin;
2442   ierr = PetscFree2(abt->buf[0],abt->buf[1]);CHKERRQ(ierr);
2443   ierr = PetscFree2(abt->recvcounts,abt->recvdispls);CHKERRQ(ierr);
2444   ierr = PetscFree(abt);CHKERRQ(ierr);
2445   PetscFunctionReturn(0);
2446 }
2447 
2448 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2449 {
2450   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2451   Mat_TransMatMultDense *atb;
2452   PetscErrorCode        ierr;
2453   MPI_Comm              comm;
2454   PetscMPIInt           size,*recvcounts;
2455   PetscScalar           *carray,*sendbuf;
2456   const PetscScalar     *atbarray;
2457   PetscInt              i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j;
2458   const PetscInt        *ranges;
2459 
2460   PetscFunctionBegin;
2461   MatCheckProduct(C,3);
2462   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2463   atb = (Mat_TransMatMultDense *)C->product->data;
2464   recvcounts = atb->recvcounts;
2465   sendbuf = atb->sendbuf;
2466 
2467   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2468   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2469 
2470   /* compute atbarray = aseq^T * bseq */
2471   ierr = MatTransposeMatMult(a->A,b->A,atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX,PETSC_DEFAULT,&atb->atb);CHKERRQ(ierr);
2472 
2473   ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr);
2474   for (i=0; i<size; i++) recvcounts[i] = (ranges[i+1] - ranges[i])*cN;
2475 
2476   /* arrange atbarray into sendbuf */
2477   ierr = MatDenseGetArrayRead(atb->atb,&atbarray);CHKERRQ(ierr);
2478   for (proc=0, k=0; proc<size; proc++) {
2479     for (j=0; j<cN; j++) {
2480       for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM];
2481     }
2482   }
2483   ierr = MatDenseRestoreArrayRead(atb->atb,&atbarray);CHKERRQ(ierr);
2484 
2485   /* sum all atbarray to local values of C */
2486   ierr = MatDenseGetArray(c->A,&carray);CHKERRQ(ierr);
2487   ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRQ(ierr);
2488   ierr = MatDenseRestoreArray(c->A,&carray);CHKERRQ(ierr);
2489   PetscFunctionReturn(0);
2490 }
2491 
2492 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)
2493 {
2494   PetscErrorCode        ierr;
2495   MPI_Comm              comm;
2496   PetscMPIInt           size;
2497   PetscInt              cm=A->cmap->n,cM,cN=B->cmap->N;
2498   Mat_TransMatMultDense *atb;
2499   PetscBool             cisdense;
2500 
2501   PetscFunctionBegin;
2502   MatCheckProduct(C,3);
2503   if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2504   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2505   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
2506     SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2507   }
2508 
2509   /* create matrix product C */
2510   ierr = MatSetSizes(C,cm,B->cmap->n,A->cmap->N,B->cmap->N);CHKERRQ(ierr);
2511   ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr);
2512   if (!cisdense) {
2513     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
2514   }
2515   ierr = MatSetUp(C);CHKERRQ(ierr);
2516   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2517   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2518 
2519   /* create data structure for reuse C */
2520   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2521   ierr = PetscNew(&atb);CHKERRQ(ierr);
2522   cM   = C->rmap->N;
2523   ierr = PetscMalloc2((size_t)cM*(size_t)cN,&atb->sendbuf,size,&atb->recvcounts);CHKERRQ(ierr);
2524 
2525   C->product->data    = atb;
2526   C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
2527   PetscFunctionReturn(0);
2528 }
2529 
2530 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2531 {
2532   PetscErrorCode        ierr;
2533   MPI_Comm              comm;
2534   PetscMPIInt           i, size;
2535   PetscInt              maxRows, bufsiz;
2536   PetscMPIInt           tag;
2537   PetscInt              alg;
2538   Mat_MatTransMultDense *abt;
2539   Mat_Product           *product = C->product;
2540   PetscBool             flg;
2541 
2542   PetscFunctionBegin;
2543   MatCheckProduct(C,4);
2544   if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2545   /* check local size of A and B */
2546   if (A->cmap->n != B->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local column dimensions are incompatible, A (%D) != B (%D)",A->cmap->n,B->cmap->n);
2547 
2548   ierr = PetscStrcmp(product->alg,"allgatherv",&flg);CHKERRQ(ierr);
2549   alg  = flg ? 0 : 1;
2550 
2551   /* setup matrix product C */
2552   ierr = MatSetSizes(C,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N);CHKERRQ(ierr);
2553   ierr = MatSetType(C,MATMPIDENSE);CHKERRQ(ierr);
2554   ierr = MatSetUp(C);CHKERRQ(ierr);
2555   ierr = PetscObjectGetNewTag((PetscObject)C,&tag);CHKERRQ(ierr);
2556   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2557   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2558 
2559   /* create data structure for reuse C */
2560   ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr);
2561   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2562   ierr = PetscNew(&abt);CHKERRQ(ierr);
2563   abt->tag = tag;
2564   abt->alg = alg;
2565   switch (alg) {
2566   case 1: /* alg: "cyclic" */
2567     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i]));
2568     bufsiz = A->cmap->N * maxRows;
2569     ierr = PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1]));CHKERRQ(ierr);
2570     break;
2571   default: /* alg: "allgatherv" */
2572     ierr = PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]));CHKERRQ(ierr);
2573     ierr = PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls));CHKERRQ(ierr);
2574     for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N;
2575     for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i];
2576     break;
2577   }
2578 
2579   C->product->data    = abt;
2580   C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
2581   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense;
2582   PetscFunctionReturn(0);
2583 }
2584 
2585 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
2586 {
2587   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2588   Mat_MatTransMultDense *abt;
2589   PetscErrorCode        ierr;
2590   MPI_Comm              comm;
2591   PetscMPIInt           rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom;
2592   PetscScalar           *sendbuf, *recvbuf=0, *cv;
2593   PetscInt              i,cK=A->cmap->N,k,j,bn;
2594   PetscScalar           _DOne=1.0,_DZero=0.0;
2595   const PetscScalar     *av,*bv;
2596   PetscBLASInt          cm, cn, ck, alda, blda, clda;
2597   MPI_Request           reqs[2];
2598   const PetscInt        *ranges;
2599 
2600   PetscFunctionBegin;
2601   MatCheckProduct(C,3);
2602   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2603   abt  = (Mat_MatTransMultDense*)C->product->data;
2604   ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr);
2605   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2606   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2607   ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr);
2608   ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr);
2609   ierr = MatDenseGetArray(c->A,&cv);CHKERRQ(ierr);
2610   ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr);
2611   ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr);
2612   ierr = MatDenseGetLDA(b->A,&i);CHKERRQ(ierr);
2613   ierr = PetscBLASIntCast(i,&blda);CHKERRQ(ierr);
2614   ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr);
2615   ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr);
2616   ierr = MatGetOwnershipRanges(B,&ranges);CHKERRQ(ierr);
2617   bn   = B->rmap->n;
2618   if (blda == bn) {
2619     sendbuf = (PetscScalar*)bv;
2620   } else {
2621     sendbuf = abt->buf[0];
2622     for (k = 0, i = 0; i < cK; i++) {
2623       for (j = 0; j < bn; j++, k++) {
2624         sendbuf[k] = bv[i * blda + j];
2625       }
2626     }
2627   }
2628   if (size > 1) {
2629     sendto = (rank + size - 1) % size;
2630     recvfrom = (rank + size + 1) % size;
2631   } else {
2632     sendto = recvfrom = 0;
2633   }
2634   ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr);
2635   ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr);
2636   recvisfrom = rank;
2637   for (i = 0; i < size; i++) {
2638     /* we have finished receiving in sending, bufs can be read/modified */
2639     PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2640     PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
2641 
2642     if (nextrecvisfrom != rank) {
2643       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2644       sendsiz = cK * bn;
2645       recvsiz = cK * nextbn;
2646       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
2647       ierr = MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]);CHKERRQ(ierr);
2648       ierr = MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]);CHKERRQ(ierr);
2649     }
2650 
2651     /* local aseq * sendbuf^T */
2652     ierr = PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn);CHKERRQ(ierr);
2653     if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,av,&alda,sendbuf,&cn,&_DZero,cv + clda * ranges[recvisfrom],&clda));
2654 
2655     if (nextrecvisfrom != rank) {
2656       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
2657       ierr = MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2658     }
2659     bn = nextbn;
2660     recvisfrom = nextrecvisfrom;
2661     sendbuf = recvbuf;
2662   }
2663   ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr);
2664   ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr);
2665   ierr = MatDenseRestoreArray(c->A,&cv);CHKERRQ(ierr);
2666   PetscFunctionReturn(0);
2667 }
2668 
2669 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
2670 {
2671   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2672   Mat_MatTransMultDense *abt;
2673   PetscErrorCode        ierr;
2674   MPI_Comm              comm;
2675   PetscMPIInt           size;
2676   PetscScalar           *cv, *sendbuf, *recvbuf;
2677   const PetscScalar     *av,*bv;
2678   PetscInt              blda,i,cK=A->cmap->N,k,j,bn;
2679   PetscScalar           _DOne=1.0,_DZero=0.0;
2680   PetscBLASInt          cm, cn, ck, alda, clda;
2681 
2682   PetscFunctionBegin;
2683   MatCheckProduct(C,3);
2684   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2685   abt  = (Mat_MatTransMultDense*)C->product->data;
2686   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2687   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2688   ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr);
2689   ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr);
2690   ierr = MatDenseGetArray(c->A,&cv);CHKERRQ(ierr);
2691   ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr);
2692   ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr);
2693   ierr = MatDenseGetLDA(b->A,&blda);CHKERRQ(ierr);
2694   ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr);
2695   ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr);
2696   /* copy transpose of B into buf[0] */
2697   bn      = B->rmap->n;
2698   sendbuf = abt->buf[0];
2699   recvbuf = abt->buf[1];
2700   for (k = 0, j = 0; j < bn; j++) {
2701     for (i = 0; i < cK; i++, k++) {
2702       sendbuf[k] = bv[i * blda + j];
2703     }
2704   }
2705   ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr);
2706   ierr = MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm);CHKERRQ(ierr);
2707   ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr);
2708   ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr);
2709   ierr = PetscBLASIntCast(c->A->cmap->n,&cn);CHKERRQ(ierr);
2710   if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,av,&alda,recvbuf,&ck,&_DZero,cv,&clda));
2711   ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr);
2712   ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr);
2713   ierr = MatDenseRestoreArray(c->A,&cv);CHKERRQ(ierr);
2714   PetscFunctionReturn(0);
2715 }
2716 
2717 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2718 {
2719   Mat_MatTransMultDense *abt;
2720   PetscErrorCode        ierr;
2721 
2722   PetscFunctionBegin;
2723   MatCheckProduct(C,3);
2724   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2725   abt = (Mat_MatTransMultDense*)C->product->data;
2726   switch (abt->alg) {
2727   case 1:
2728     ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C);CHKERRQ(ierr);
2729     break;
2730   default:
2731     ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C);CHKERRQ(ierr);
2732     break;
2733   }
2734   PetscFunctionReturn(0);
2735 }
2736 
2737 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data)
2738 {
2739   PetscErrorCode   ierr;
2740   Mat_MatMultDense *ab = (Mat_MatMultDense*)data;
2741 
2742   PetscFunctionBegin;
2743   ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr);
2744   ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr);
2745   ierr = MatDestroy(&ab->Be);CHKERRQ(ierr);
2746   ierr = PetscFree(ab);CHKERRQ(ierr);
2747   PetscFunctionReturn(0);
2748 }
2749 
2750 #if defined(PETSC_HAVE_ELEMENTAL)
2751 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2752 {
2753   PetscErrorCode   ierr;
2754   Mat_MatMultDense *ab;
2755 
2756   PetscFunctionBegin;
2757   MatCheckProduct(C,3);
2758   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Missing product data");
2759   ab   = (Mat_MatMultDense*)C->product->data;
2760   ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr);
2761   ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr);
2762   ierr = MatMatMultNumeric_Elemental(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr);
2763   ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
2764   PetscFunctionReturn(0);
2765 }
2766 
2767 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)
2768 {
2769   PetscErrorCode   ierr;
2770   Mat              Ae,Be,Ce;
2771   Mat_MatMultDense *ab;
2772 
2773   PetscFunctionBegin;
2774   MatCheckProduct(C,4);
2775   if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2776   /* check local size of A and B */
2777   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
2778     SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2779   }
2780 
2781   /* create elemental matrices Ae and Be */
2782   ierr = MatCreate(PetscObjectComm((PetscObject)A), &Ae);CHKERRQ(ierr);
2783   ierr = MatSetSizes(Ae,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
2784   ierr = MatSetType(Ae,MATELEMENTAL);CHKERRQ(ierr);
2785   ierr = MatSetUp(Ae);CHKERRQ(ierr);
2786   ierr = MatSetOption(Ae,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2787 
2788   ierr = MatCreate(PetscObjectComm((PetscObject)B), &Be);CHKERRQ(ierr);
2789   ierr = MatSetSizes(Be,PETSC_DECIDE,PETSC_DECIDE,B->rmap->N,B->cmap->N);CHKERRQ(ierr);
2790   ierr = MatSetType(Be,MATELEMENTAL);CHKERRQ(ierr);
2791   ierr = MatSetUp(Be);CHKERRQ(ierr);
2792   ierr = MatSetOption(Be,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2793 
2794   /* compute symbolic Ce = Ae*Be */
2795   ierr = MatCreate(PetscObjectComm((PetscObject)C),&Ce);CHKERRQ(ierr);
2796   ierr = MatMatMultSymbolic_Elemental(Ae,Be,fill,Ce);CHKERRQ(ierr);
2797 
2798   /* setup C */
2799   ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2800   ierr = MatSetType(C,MATDENSE);CHKERRQ(ierr);
2801   ierr = MatSetUp(C);CHKERRQ(ierr);
2802 
2803   /* create data structure for reuse Cdense */
2804   ierr = PetscNew(&ab);CHKERRQ(ierr);
2805   ab->Ae = Ae;
2806   ab->Be = Be;
2807   ab->Ce = Ce;
2808 
2809   C->product->data    = ab;
2810   C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense;
2811   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
2812   PetscFunctionReturn(0);
2813 }
2814 #endif
2815 /* ----------------------------------------------- */
2816 #if defined(PETSC_HAVE_ELEMENTAL)
2817 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C)
2818 {
2819   PetscFunctionBegin;
2820   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense;
2821   C->ops->productsymbolic = MatProductSymbolic_AB;
2822   PetscFunctionReturn(0);
2823 }
2824 #endif
2825 
2826 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C)
2827 {
2828   Mat_Product *product = C->product;
2829   Mat         A = product->A,B=product->B;
2830 
2831   PetscFunctionBegin;
2832   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend)
2833     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2834   C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense;
2835   C->ops->productsymbolic = MatProductSymbolic_AtB;
2836   PetscFunctionReturn(0);
2837 }
2838 
2839 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C)
2840 {
2841   PetscErrorCode ierr;
2842   Mat_Product    *product = C->product;
2843   const char     *algTypes[2] = {"allgatherv","cyclic"};
2844   PetscInt       alg,nalg = 2;
2845   PetscBool      flg = PETSC_FALSE;
2846 
2847   PetscFunctionBegin;
2848   /* Set default algorithm */
2849   alg = 0; /* default is allgatherv */
2850   ierr = PetscStrcmp(product->alg,"default",&flg);CHKERRQ(ierr);
2851   if (flg) {
2852     ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr);
2853   }
2854 
2855   /* Get runtime option */
2856   if (product->api_user) {
2857     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatTransposeMult","Mat");CHKERRQ(ierr);
2858     ierr = PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr);
2859     ierr = PetscOptionsEnd();CHKERRQ(ierr);
2860   } else {
2861     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABt","Mat");CHKERRQ(ierr);
2862     ierr = PetscOptionsEList("-matproduct_abt_mpidense_mpidense_via","Algorithmic approach","MatProduct_ABt",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr);
2863     ierr = PetscOptionsEnd();CHKERRQ(ierr);
2864   }
2865   if (flg) {
2866     ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr);
2867   }
2868 
2869   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense;
2870   C->ops->productsymbolic          = MatProductSymbolic_ABt;
2871   PetscFunctionReturn(0);
2872 }
2873 
2874 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C)
2875 {
2876   PetscErrorCode ierr;
2877   Mat_Product    *product = C->product;
2878 
2879   PetscFunctionBegin;
2880   switch (product->type) {
2881 #if defined(PETSC_HAVE_ELEMENTAL)
2882   case MATPRODUCT_AB:
2883     ierr = MatProductSetFromOptions_MPIDense_AB(C);CHKERRQ(ierr);
2884     break;
2885 #endif
2886   case MATPRODUCT_AtB:
2887     ierr = MatProductSetFromOptions_MPIDense_AtB(C);CHKERRQ(ierr);
2888     break;
2889   case MATPRODUCT_ABt:
2890     ierr = MatProductSetFromOptions_MPIDense_ABt(C);CHKERRQ(ierr);
2891     break;
2892   default:
2893     break;
2894   }
2895   PetscFunctionReturn(0);
2896 }
2897