xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 50ce3c9cd8f4988a3cbb79efa54bcc80be0ac0c0)
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4 */
5 
6 #include <../src/mat/impls/dense/mpi/mpidense.h>    /*I   "petscmat.h"  I*/
7 #include <../src/mat/impls/aij/mpi/mpiaij.h>
8 #include <petscblaslapack.h>
9 
10 /*@
11 
12       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
13               matrix that represents the operator. For sequential matrices it returns itself.
14 
15     Input Parameter:
16 .      A - the Seq or MPI dense matrix
17 
18     Output Parameter:
19 .      B - the inner matrix
20 
21     Level: intermediate
22 
23 @*/
24 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
25 {
26   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
27   PetscErrorCode ierr;
28   PetscBool      flg;
29 
30   PetscFunctionBegin;
31   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
32   PetscValidPointer(B,2);
33   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
34   if (flg) *B = mat->A;
35   else *B = A;
36   PetscFunctionReturn(0);
37 }
38 
39 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
40 {
41   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
42   PetscErrorCode ierr;
43   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
44 
45   PetscFunctionBegin;
46   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
47   lrow = row - rstart;
48   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
49   PetscFunctionReturn(0);
50 }
51 
52 PetscErrorCode MatRestoreRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
53 {
54   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
55   PetscErrorCode ierr;
56   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
57 
58   PetscFunctionBegin;
59   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
60   lrow = row - rstart;
61   ierr = MatRestoreRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
62   PetscFunctionReturn(0);
63 }
64 
65 PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
66 {
67   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
68   PetscErrorCode ierr;
69   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
70   PetscScalar    *array;
71   MPI_Comm       comm;
72   PetscBool      cong;
73   Mat            B;
74 
75   PetscFunctionBegin;
76   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
77   if (!cong) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
78   ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr);
79   if (!B) {
80     ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
81     ierr = MatCreate(comm,&B);CHKERRQ(ierr);
82     ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr);
83     ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
84     ierr = MatDenseGetArray(mdn->A,&array);CHKERRQ(ierr);
85     ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr);
86     ierr = MatDenseRestoreArray(mdn->A,&array);CHKERRQ(ierr);
87     ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
88     ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
89     ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr);
90     *a   = B;
91     ierr = MatDestroy(&B);CHKERRQ(ierr);
92   } else *a = B;
93   PetscFunctionReturn(0);
94 }
95 
96 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
97 {
98   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
99   PetscErrorCode ierr;
100   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
101   PetscBool      roworiented = A->roworiented;
102 
103   PetscFunctionBegin;
104   for (i=0; i<m; i++) {
105     if (idxm[i] < 0) continue;
106     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
107     if (idxm[i] >= rstart && idxm[i] < rend) {
108       row = idxm[i] - rstart;
109       if (roworiented) {
110         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
111       } else {
112         for (j=0; j<n; j++) {
113           if (idxn[j] < 0) continue;
114           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
115           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
116         }
117       }
118     } else if (!A->donotstash) {
119       mat->assembled = PETSC_FALSE;
120       if (roworiented) {
121         ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
122       } else {
123         ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
124       }
125     }
126   }
127   PetscFunctionReturn(0);
128 }
129 
130 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
131 {
132   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
133   PetscErrorCode ierr;
134   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
135 
136   PetscFunctionBegin;
137   for (i=0; i<m; i++) {
138     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
139     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
140     if (idxm[i] >= rstart && idxm[i] < rend) {
141       row = idxm[i] - rstart;
142       for (j=0; j<n; j++) {
143         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
144         if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
145         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
146       }
147     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
148   }
149   PetscFunctionReturn(0);
150 }
151 
152 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda)
153 {
154   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
155   PetscErrorCode ierr;
156 
157   PetscFunctionBegin;
158   ierr = MatDenseGetLDA(a->A,lda);CHKERRQ(ierr);
159   PetscFunctionReturn(0);
160 }
161 
162 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar **array)
163 {
164   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
165   PetscErrorCode ierr;
166 
167   PetscFunctionBegin;
168   ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr);
169   PetscFunctionReturn(0);
170 }
171 
172 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar **array)
173 {
174   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
175   PetscErrorCode ierr;
176 
177   PetscFunctionBegin;
178   ierr = MatDenseGetArrayRead(a->A,array);CHKERRQ(ierr);
179   PetscFunctionReturn(0);
180 }
181 
182 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A,PetscScalar **array)
183 {
184   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
185   PetscErrorCode ierr;
186 
187   PetscFunctionBegin;
188   ierr = MatDenseGetArrayWrite(a->A,array);CHKERRQ(ierr);
189   PetscFunctionReturn(0);
190 }
191 
192 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar *array)
193 {
194   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
195   PetscErrorCode ierr;
196 
197   PetscFunctionBegin;
198   if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first");
199   ierr = MatDensePlaceArray(a->A,array);CHKERRQ(ierr);
200   PetscFunctionReturn(0);
201 }
202 
203 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
204 {
205   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
206   PetscErrorCode ierr;
207 
208   PetscFunctionBegin;
209   if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first");
210   ierr = MatDenseResetArray(a->A);CHKERRQ(ierr);
211   PetscFunctionReturn(0);
212 }
213 
214 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A,const PetscScalar *array)
215 {
216   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
217   PetscErrorCode ierr;
218 
219   PetscFunctionBegin;
220   if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first");
221   ierr = MatDenseReplaceArray(a->A,array);CHKERRQ(ierr);
222   PetscFunctionReturn(0);
223 }
224 
225 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
226 {
227   Mat_MPIDense      *mat  = (Mat_MPIDense*)A->data,*newmatd;
228   PetscErrorCode    ierr;
229   PetscInt          lda,i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
230   const PetscInt    *irow,*icol;
231   const PetscScalar *v;
232   PetscScalar       *bv;
233   Mat               newmat;
234   IS                iscol_local;
235   MPI_Comm          comm_is,comm_mat;
236 
237   PetscFunctionBegin;
238   ierr = PetscObjectGetComm((PetscObject)A,&comm_mat);CHKERRQ(ierr);
239   ierr = PetscObjectGetComm((PetscObject)iscol,&comm_is);CHKERRQ(ierr);
240   if (comm_mat != comm_is) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator");
241 
242   ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
243   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
244   ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr);
245   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
246   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
247   ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */
248 
249   /* No parallel redistribution currently supported! Should really check each index set
250      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
251      original matrix! */
252 
253   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
254   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
255 
256   /* Check submatrix call */
257   if (scall == MAT_REUSE_MATRIX) {
258     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
259     /* Really need to test rows and column sizes! */
260     newmat = *B;
261   } else {
262     /* Create and fill new matrix */
263     ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr);
264     ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
265     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
266     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
267   }
268 
269   /* Now extract the data pointers and do the copy, column at a time */
270   newmatd = (Mat_MPIDense*)newmat->data;
271   ierr = MatDenseGetArray(newmatd->A,&bv);CHKERRQ(ierr);
272   ierr = MatDenseGetArrayRead(mat->A,&v);CHKERRQ(ierr);
273   ierr = MatDenseGetLDA(mat->A,&lda);CHKERRQ(ierr);
274   for (i=0; i<Ncols; i++) {
275     const PetscScalar *av = v + lda*icol[i];
276     for (j=0; j<nrows; j++) {
277       *bv++ = av[irow[j] - rstart];
278     }
279   }
280   ierr = MatDenseRestoreArrayRead(mat->A,&v);CHKERRQ(ierr);
281   ierr = MatDenseRestoreArray(newmatd->A,&bv);CHKERRQ(ierr);
282 
283   /* Assemble the matrices so that the correct flags are set */
284   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
285   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
286 
287   /* Free work space */
288   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
289   ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr);
290   ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
291   *B   = newmat;
292   PetscFunctionReturn(0);
293 }
294 
295 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar **array)
296 {
297   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
298   PetscErrorCode ierr;
299 
300   PetscFunctionBegin;
301   ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr);
302   PetscFunctionReturn(0);
303 }
304 
305 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar **array)
306 {
307   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
308   PetscErrorCode ierr;
309 
310   PetscFunctionBegin;
311   ierr = MatDenseRestoreArrayRead(a->A,array);CHKERRQ(ierr);
312   PetscFunctionReturn(0);
313 }
314 
315 PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A,PetscScalar **array)
316 {
317   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
318   PetscErrorCode ierr;
319 
320   PetscFunctionBegin;
321   ierr = MatDenseRestoreArrayWrite(a->A,array);CHKERRQ(ierr);
322   PetscFunctionReturn(0);
323 }
324 
325 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
326 {
327   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
328   PetscErrorCode ierr;
329   PetscInt       nstash,reallocs;
330 
331   PetscFunctionBegin;
332   if (mdn->donotstash || mat->nooffprocentries) return(0);
333 
334   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
335   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
336   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
337   PetscFunctionReturn(0);
338 }
339 
340 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
341 {
342   Mat_MPIDense   *mdn=(Mat_MPIDense*)mat->data;
343   PetscErrorCode ierr;
344   PetscInt       i,*row,*col,flg,j,rstart,ncols;
345   PetscMPIInt    n;
346   PetscScalar    *val;
347 
348   PetscFunctionBegin;
349   if (!mdn->donotstash && !mat->nooffprocentries) {
350     /*  wait on receives */
351     while (1) {
352       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
353       if (!flg) break;
354 
355       for (i=0; i<n;) {
356         /* Now identify the consecutive vals belonging to the same row */
357         for (j=i,rstart=row[j]; j<n; j++) {
358           if (row[j] != rstart) break;
359         }
360         if (j < n) ncols = j-i;
361         else       ncols = n-i;
362         /* Now assemble all these values with a single function call */
363         ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
364         i    = j;
365       }
366     }
367     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
368   }
369 
370   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
371   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
372 
373   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
374     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
375   }
376   PetscFunctionReturn(0);
377 }
378 
379 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
380 {
381   PetscErrorCode ierr;
382   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
383 
384   PetscFunctionBegin;
385   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
386   PetscFunctionReturn(0);
387 }
388 
389 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt n,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
390 {
391   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
392   PetscErrorCode    ierr;
393   PetscInt          i,len,*lrows;
394 
395   PetscFunctionBegin;
396   /* get locally owned rows */
397   ierr = PetscLayoutMapLocal(A->rmap,n,rows,&len,&lrows,NULL);CHKERRQ(ierr);
398   /* fix right hand side if needed */
399   if (x && b) {
400     const PetscScalar *xx;
401     PetscScalar       *bb;
402 
403     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
404     ierr = VecGetArrayWrite(b, &bb);CHKERRQ(ierr);
405     for (i=0;i<len;++i) bb[lrows[i]] = diag*xx[lrows[i]];
406     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
407     ierr = VecRestoreArrayWrite(b, &bb);CHKERRQ(ierr);
408   }
409   ierr = MatZeroRows(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
410   if (diag != 0.0) {
411     Vec d;
412 
413     ierr = MatCreateVecs(A,NULL,&d);CHKERRQ(ierr);
414     ierr = VecSet(d,diag);CHKERRQ(ierr);
415     ierr = MatDiagonalSet(A,d,INSERT_VALUES);CHKERRQ(ierr);
416     ierr = VecDestroy(&d);CHKERRQ(ierr);
417   }
418   ierr = PetscFree(lrows);CHKERRQ(ierr);
419   PetscFunctionReturn(0);
420 }
421 
422 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec);
423 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec);
424 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec);
425 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec);
426 
427 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
428 {
429   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
430   PetscErrorCode    ierr;
431   const PetscScalar *ax;
432   PetscScalar       *ay;
433 
434   PetscFunctionBegin;
435   ierr = VecGetArrayReadInPlace(xx,&ax);CHKERRQ(ierr);
436   ierr = VecGetArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr);
437   ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr);
438   ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr);
439   ierr = VecRestoreArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr);
440   ierr = VecRestoreArrayReadInPlace(xx,&ax);CHKERRQ(ierr);
441   ierr = (*mdn->A->ops->mult)(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
442   PetscFunctionReturn(0);
443 }
444 
445 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
446 {
447   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
448   PetscErrorCode    ierr;
449   const PetscScalar *ax;
450   PetscScalar       *ay;
451 
452   PetscFunctionBegin;
453   ierr = VecGetArrayReadInPlace(xx,&ax);CHKERRQ(ierr);
454   ierr = VecGetArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr);
455   ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr);
456   ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr);
457   ierr = VecRestoreArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr);
458   ierr = VecRestoreArrayReadInPlace(xx,&ax);CHKERRQ(ierr);
459   ierr = (*mdn->A->ops->multadd)(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
460   PetscFunctionReturn(0);
461 }
462 
463 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
464 {
465   Mat_MPIDense      *a = (Mat_MPIDense*)A->data;
466   PetscErrorCode    ierr;
467   const PetscScalar *ax;
468   PetscScalar       *ay;
469 
470   PetscFunctionBegin;
471   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
472   ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr);
473   ierr = VecGetArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr);
474   ierr = VecGetArrayInPlace(yy,&ay);CHKERRQ(ierr);
475   ierr = PetscSFReduceBegin(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr);
476   ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr);
477   ierr = VecRestoreArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr);
478   ierr = VecRestoreArrayInPlace(yy,&ay);CHKERRQ(ierr);
479   PetscFunctionReturn(0);
480 }
481 
482 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
483 {
484   Mat_MPIDense      *a = (Mat_MPIDense*)A->data;
485   PetscErrorCode    ierr;
486   const PetscScalar *ax;
487   PetscScalar       *ay;
488 
489   PetscFunctionBegin;
490   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
491   ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr);
492   ierr = VecGetArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr);
493   ierr = VecGetArrayInPlace(zz,&ay);CHKERRQ(ierr);
494   ierr = PetscSFReduceBegin(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr);
495   ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr);
496   ierr = VecRestoreArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr);
497   ierr = VecRestoreArrayInPlace(zz,&ay);CHKERRQ(ierr);
498   PetscFunctionReturn(0);
499 }
500 
501 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
502 {
503   Mat_MPIDense      *a    = (Mat_MPIDense*)A->data;
504   PetscErrorCode    ierr;
505   PetscInt          lda,len,i,n,m = A->rmap->n,radd;
506   PetscScalar       *x,zero = 0.0;
507   const PetscScalar *av;
508 
509   PetscFunctionBegin;
510   ierr = VecSet(v,zero);CHKERRQ(ierr);
511   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
512   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
513   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
514   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
515   radd = A->rmap->rstart*m;
516   ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr);
517   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
518   for (i=0; i<len; i++) {
519     x[i] = av[radd + i*lda + i];
520   }
521   ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr);
522   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
523   PetscFunctionReturn(0);
524 }
525 
526 PetscErrorCode MatDestroy_MPIDense(Mat mat)
527 {
528   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
529   PetscErrorCode ierr;
530 
531   PetscFunctionBegin;
532 #if defined(PETSC_USE_LOG)
533   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
534 #endif
535   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
536   ierr = MatDestroy(&mdn->A);CHKERRQ(ierr);
537   ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr);
538   ierr = PetscSFDestroy(&mdn->Mvctx);CHKERRQ(ierr);
539   ierr = VecDestroy(&mdn->cvec);CHKERRQ(ierr);
540 
541   ierr = PetscFree(mat->data);CHKERRQ(ierr);
542   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
543 
544   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL);CHKERRQ(ierr);
545   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr);
546   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr);
547   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL);CHKERRQ(ierr);
548   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL);CHKERRQ(ierr);
549   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",NULL);CHKERRQ(ierr);
550   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",NULL);CHKERRQ(ierr);
551   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);CHKERRQ(ierr);
552   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);CHKERRQ(ierr);
553   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",NULL);CHKERRQ(ierr);
554 #if defined(PETSC_HAVE_ELEMENTAL)
555   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr);
556 #endif
557   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr);
558   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
559   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",NULL);CHKERRQ(ierr);
560   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
561   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
562   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_nest_mpidense_C",NULL);CHKERRQ(ierr);
563   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_nest_mpidense_C",NULL);CHKERRQ(ierr);
564   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
565   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
566   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL);CHKERRQ(ierr);
567   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL);CHKERRQ(ierr);
568   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",NULL);CHKERRQ(ierr);
569   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",NULL);CHKERRQ(ierr);
570   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",NULL);CHKERRQ(ierr);
571   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",NULL);CHKERRQ(ierr);
572   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",NULL);CHKERRQ(ierr);
573   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",NULL);CHKERRQ(ierr);
574   PetscFunctionReturn(0);
575 }
576 
577 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat,PetscViewer);
578 
579 #include <petscdraw.h>
580 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
581 {
582   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
583   PetscErrorCode    ierr;
584   PetscMPIInt       rank = mdn->rank;
585   PetscViewerType   vtype;
586   PetscBool         iascii,isdraw;
587   PetscViewer       sviewer;
588   PetscViewerFormat format;
589 
590   PetscFunctionBegin;
591   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
592   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
593   if (iascii) {
594     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
595     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
596     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
597       MatInfo info;
598       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
599       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
600       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
601       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
602       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
603       ierr = PetscSFView(mdn->Mvctx,viewer);CHKERRQ(ierr);
604       PetscFunctionReturn(0);
605     } else if (format == PETSC_VIEWER_ASCII_INFO) {
606       PetscFunctionReturn(0);
607     }
608   } else if (isdraw) {
609     PetscDraw draw;
610     PetscBool isnull;
611 
612     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
613     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
614     if (isnull) PetscFunctionReturn(0);
615   }
616 
617   {
618     /* assemble the entire matrix onto first processor. */
619     Mat         A;
620     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
621     PetscInt    *cols;
622     PetscScalar *vals;
623 
624     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
625     if (!rank) {
626       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
627     } else {
628       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
629     }
630     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
631     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
632     ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr);
633     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
634 
635     /* Copy the matrix ... This isn't the most efficient means,
636        but it's quick for now */
637     A->insertmode = INSERT_VALUES;
638 
639     row = mat->rmap->rstart;
640     m   = mdn->A->rmap->n;
641     for (i=0; i<m; i++) {
642       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
643       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
644       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
645       row++;
646     }
647 
648     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
649     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
650     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
651     if (!rank) {
652       ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
653       ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
654     }
655     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
656     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
657     ierr = MatDestroy(&A);CHKERRQ(ierr);
658   }
659   PetscFunctionReturn(0);
660 }
661 
662 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
663 {
664   PetscErrorCode ierr;
665   PetscBool      iascii,isbinary,isdraw,issocket;
666 
667   PetscFunctionBegin;
668   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
669   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
670   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
671   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
672 
673   if (iascii || issocket || isdraw) {
674     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
675   } else if (isbinary) {
676     ierr = MatView_Dense_Binary(mat,viewer);CHKERRQ(ierr);
677   }
678   PetscFunctionReturn(0);
679 }
680 
681 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
682 {
683   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
684   Mat            mdn  = mat->A;
685   PetscErrorCode ierr;
686   PetscLogDouble isend[5],irecv[5];
687 
688   PetscFunctionBegin;
689   info->block_size = 1.0;
690 
691   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
692 
693   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
694   isend[3] = info->memory;  isend[4] = info->mallocs;
695   if (flag == MAT_LOCAL) {
696     info->nz_used      = isend[0];
697     info->nz_allocated = isend[1];
698     info->nz_unneeded  = isend[2];
699     info->memory       = isend[3];
700     info->mallocs      = isend[4];
701   } else if (flag == MAT_GLOBAL_MAX) {
702     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
703 
704     info->nz_used      = irecv[0];
705     info->nz_allocated = irecv[1];
706     info->nz_unneeded  = irecv[2];
707     info->memory       = irecv[3];
708     info->mallocs      = irecv[4];
709   } else if (flag == MAT_GLOBAL_SUM) {
710     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
711 
712     info->nz_used      = irecv[0];
713     info->nz_allocated = irecv[1];
714     info->nz_unneeded  = irecv[2];
715     info->memory       = irecv[3];
716     info->mallocs      = irecv[4];
717   }
718   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
719   info->fill_ratio_needed = 0;
720   info->factor_mallocs    = 0;
721   PetscFunctionReturn(0);
722 }
723 
724 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
725 {
726   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
727   PetscErrorCode ierr;
728 
729   PetscFunctionBegin;
730   switch (op) {
731   case MAT_NEW_NONZERO_LOCATIONS:
732   case MAT_NEW_NONZERO_LOCATION_ERR:
733   case MAT_NEW_NONZERO_ALLOCATION_ERR:
734     MatCheckPreallocated(A,1);
735     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
736     break;
737   case MAT_ROW_ORIENTED:
738     MatCheckPreallocated(A,1);
739     a->roworiented = flg;
740     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
741     break;
742   case MAT_NEW_DIAGONALS:
743   case MAT_KEEP_NONZERO_PATTERN:
744   case MAT_USE_HASH_TABLE:
745   case MAT_SORTED_FULL:
746     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
747     break;
748   case MAT_IGNORE_OFF_PROC_ENTRIES:
749     a->donotstash = flg;
750     break;
751   case MAT_SYMMETRIC:
752   case MAT_STRUCTURALLY_SYMMETRIC:
753   case MAT_HERMITIAN:
754   case MAT_SYMMETRY_ETERNAL:
755   case MAT_IGNORE_LOWER_TRIANGULAR:
756   case MAT_IGNORE_ZERO_ENTRIES:
757     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
758     break;
759   default:
760     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
761   }
762   PetscFunctionReturn(0);
763 }
764 
765 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
766 {
767   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
768   const PetscScalar *l;
769   PetscScalar       x,*v,*vv,*r;
770   PetscErrorCode    ierr;
771   PetscInt          i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n,lda;
772 
773   PetscFunctionBegin;
774   ierr = MatDenseGetArray(mdn->A,&vv);CHKERRQ(ierr);
775   ierr = MatDenseGetLDA(mdn->A,&lda);CHKERRQ(ierr);
776   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
777   if (ll) {
778     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
779     if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %D != %D", s2a, s2);
780     ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr);
781     for (i=0; i<m; i++) {
782       x = l[i];
783       v = vv + i;
784       for (j=0; j<n; j++) { (*v) *= x; v+= lda;}
785     }
786     ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr);
787     ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr);
788   }
789   if (rr) {
790     const PetscScalar *ar;
791 
792     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
793     if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
794     ierr = VecGetArrayRead(rr,&ar);CHKERRQ(ierr);
795     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
796     ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ar,r);CHKERRQ(ierr);
797     ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ar,r);CHKERRQ(ierr);
798     ierr = VecRestoreArrayRead(rr,&ar);CHKERRQ(ierr);
799     for (i=0; i<n; i++) {
800       x = r[i];
801       v = vv + i*lda;
802       for (j=0; j<m; j++) (*v++) *= x;
803     }
804     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
805     ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr);
806   }
807   ierr = MatDenseRestoreArray(mdn->A,&vv);CHKERRQ(ierr);
808   PetscFunctionReturn(0);
809 }
810 
811 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
812 {
813   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
814   PetscErrorCode    ierr;
815   PetscInt          i,j;
816   PetscReal         sum = 0.0;
817   const PetscScalar *av,*v;
818 
819   PetscFunctionBegin;
820   ierr = MatDenseGetArrayRead(mdn->A,&av);CHKERRQ(ierr);
821   v    = av;
822   if (mdn->size == 1) {
823     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
824   } else {
825     if (type == NORM_FROBENIUS) {
826       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
827         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
828       }
829       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
830       *nrm = PetscSqrtReal(*nrm);
831       ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr);
832     } else if (type == NORM_1) {
833       PetscReal *tmp,*tmp2;
834       ierr = PetscCalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr);
835       *nrm = 0.0;
836       v    = av;
837       for (j=0; j<mdn->A->cmap->n; j++) {
838         for (i=0; i<mdn->A->rmap->n; i++) {
839           tmp[j] += PetscAbsScalar(*v);  v++;
840         }
841       }
842       ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
843       for (j=0; j<A->cmap->N; j++) {
844         if (tmp2[j] > *nrm) *nrm = tmp2[j];
845       }
846       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
847       ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr);
848     } else if (type == NORM_INFINITY) { /* max row norm */
849       PetscReal ntemp;
850       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
851       ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
852     } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
853   }
854   ierr = MatDenseRestoreArrayRead(mdn->A,&av);CHKERRQ(ierr);
855   PetscFunctionReturn(0);
856 }
857 
858 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
859 {
860   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
861   Mat            B;
862   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
863   PetscErrorCode ierr;
864   PetscInt       j,i,lda;
865   PetscScalar    *v;
866 
867   PetscFunctionBegin;
868   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
869     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
870     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
871     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
872     ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr);
873   } else B = *matout;
874 
875   m    = a->A->rmap->n; n = a->A->cmap->n;
876   ierr = MatDenseGetArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr);
877   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
878   ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr);
879   for (i=0; i<m; i++) rwork[i] = rstart + i;
880   for (j=0; j<n; j++) {
881     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
882     v   += lda;
883   }
884   ierr = MatDenseRestoreArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr);
885   ierr = PetscFree(rwork);CHKERRQ(ierr);
886   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
887   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
888   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
889     *matout = B;
890   } else {
891     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
892   }
893   PetscFunctionReturn(0);
894 }
895 
896 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
897 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
898 
899 PetscErrorCode MatSetUp_MPIDense(Mat A)
900 {
901   PetscErrorCode ierr;
902 
903   PetscFunctionBegin;
904   ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
905   ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
906   if (!A->preallocated) {
907     ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
908   }
909   PetscFunctionReturn(0);
910 }
911 
912 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
913 {
914   PetscErrorCode ierr;
915   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
916 
917   PetscFunctionBegin;
918   ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr);
919   PetscFunctionReturn(0);
920 }
921 
922 PetscErrorCode MatConjugate_MPIDense(Mat mat)
923 {
924   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
925   PetscErrorCode ierr;
926 
927   PetscFunctionBegin;
928   ierr = MatConjugate(a->A);CHKERRQ(ierr);
929   PetscFunctionReturn(0);
930 }
931 
932 PetscErrorCode MatRealPart_MPIDense(Mat A)
933 {
934   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
935   PetscErrorCode ierr;
936 
937   PetscFunctionBegin;
938   ierr = MatRealPart(a->A);CHKERRQ(ierr);
939   PetscFunctionReturn(0);
940 }
941 
942 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
943 {
944   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
945   PetscErrorCode ierr;
946 
947   PetscFunctionBegin;
948   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
949   PetscFunctionReturn(0);
950 }
951 
952 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col)
953 {
954   PetscErrorCode ierr;
955   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
956 
957   PetscFunctionBegin;
958   if (!a->A) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing local matrix");
959   if (!a->A->ops->getcolumnvector) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing get column operation");
960   ierr = (*a->A->ops->getcolumnvector)(a->A,v,col);CHKERRQ(ierr);
961   PetscFunctionReturn(0);
962 }
963 
964 PETSC_INTERN PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*);
965 
966 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms)
967 {
968   PetscErrorCode ierr;
969   PetscInt       i,n;
970   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
971   PetscReal      *work;
972 
973   PetscFunctionBegin;
974   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
975   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
976   ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr);
977   if (type == NORM_2) {
978     for (i=0; i<n; i++) work[i] *= work[i];
979   }
980   if (type == NORM_INFINITY) {
981     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
982   } else {
983     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
984   }
985   ierr = PetscFree(work);CHKERRQ(ierr);
986   if (type == NORM_2) {
987     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
988   }
989   PetscFunctionReturn(0);
990 }
991 
992 #if defined(PETSC_HAVE_CUDA)
993 static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
994 {
995   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
996   PetscErrorCode ierr;
997   PetscInt       lda;
998 
999   PetscFunctionBegin;
1000   if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first");
1001   if (!a->cvec) {
1002     ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1003   }
1004   a->vecinuse = col + 1;
1005   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1006   ierr = MatDenseCUDAGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1007   ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1008   *v   = a->cvec;
1009   PetscFunctionReturn(0);
1010 }
1011 
1012 static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1013 {
1014   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1015   PetscErrorCode ierr;
1016 
1017   PetscFunctionBegin;
1018   if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec first");
1019   if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1020   a->vecinuse = 0;
1021   ierr = MatDenseCUDARestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1022   ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr);
1023   *v   = NULL;
1024   PetscFunctionReturn(0);
1025 }
1026 
1027 static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1028 {
1029   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1030   PetscInt       lda;
1031   PetscErrorCode ierr;
1032 
1033   PetscFunctionBegin;
1034   if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first");
1035   if (!a->cvec) {
1036     ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1037   }
1038   a->vecinuse = col + 1;
1039   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1040   ierr = MatDenseCUDAGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
1041   ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1042   ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr);
1043   *v   = a->cvec;
1044   PetscFunctionReturn(0);
1045 }
1046 
1047 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1048 {
1049   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1050   PetscErrorCode ierr;
1051 
1052   PetscFunctionBegin;
1053   if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec first");
1054   if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1055   a->vecinuse = 0;
1056   ierr = MatDenseCUDARestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
1057   ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr);
1058   ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr);
1059   *v   = NULL;
1060   PetscFunctionReturn(0);
1061 }
1062 
1063 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1064 {
1065   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1066   PetscErrorCode ierr;
1067   PetscInt       lda;
1068 
1069   PetscFunctionBegin;
1070   if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first");
1071   if (!a->cvec) {
1072     ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1073   }
1074   a->vecinuse = col + 1;
1075   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1076   ierr = MatDenseCUDAGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1077   ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1078   *v   = a->cvec;
1079   PetscFunctionReturn(0);
1080 }
1081 
1082 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1083 {
1084   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1085   PetscErrorCode ierr;
1086 
1087   PetscFunctionBegin;
1088   if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec first");
1089   if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1090   a->vecinuse = 0;
1091   ierr = MatDenseCUDARestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1092   ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr);
1093   *v   = NULL;
1094   PetscFunctionReturn(0);
1095 }
1096 
1097 static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1098 {
1099   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1100   PetscErrorCode ierr;
1101 
1102   PetscFunctionBegin;
1103   if (l->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first");
1104   ierr = MatDenseCUDAPlaceArray(l->A,a);CHKERRQ(ierr);
1105   PetscFunctionReturn(0);
1106 }
1107 
1108 static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A)
1109 {
1110   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1111   PetscErrorCode ierr;
1112 
1113   PetscFunctionBegin;
1114   if (l->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first");
1115   ierr = MatDenseCUDAResetArray(l->A);CHKERRQ(ierr);
1116   PetscFunctionReturn(0);
1117 }
1118 
1119 static PetscErrorCode MatDenseCUDAReplaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1120 {
1121   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1122   PetscErrorCode ierr;
1123 
1124   PetscFunctionBegin;
1125   if (l->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first");
1126   ierr = MatDenseCUDAReplaceArray(l->A,a);CHKERRQ(ierr);
1127   PetscFunctionReturn(0);
1128 }
1129 
1130 static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1131 {
1132   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1133   PetscErrorCode ierr;
1134 
1135   PetscFunctionBegin;
1136   ierr = MatDenseCUDAGetArrayWrite(l->A,a);CHKERRQ(ierr);
1137   PetscFunctionReturn(0);
1138 }
1139 
1140 static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1141 {
1142   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1143   PetscErrorCode ierr;
1144 
1145   PetscFunctionBegin;
1146   ierr = MatDenseCUDARestoreArrayWrite(l->A,a);CHKERRQ(ierr);
1147   PetscFunctionReturn(0);
1148 }
1149 
1150 static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1151 {
1152   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1153   PetscErrorCode ierr;
1154 
1155   PetscFunctionBegin;
1156   ierr = MatDenseCUDAGetArrayRead(l->A,a);CHKERRQ(ierr);
1157   PetscFunctionReturn(0);
1158 }
1159 
1160 static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1161 {
1162   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1163   PetscErrorCode ierr;
1164 
1165   PetscFunctionBegin;
1166   ierr = MatDenseCUDARestoreArrayRead(l->A,a);CHKERRQ(ierr);
1167   PetscFunctionReturn(0);
1168 }
1169 
1170 static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1171 {
1172   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1173   PetscErrorCode ierr;
1174 
1175   PetscFunctionBegin;
1176   ierr = MatDenseCUDAGetArray(l->A,a);CHKERRQ(ierr);
1177   PetscFunctionReturn(0);
1178 }
1179 
1180 static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1181 {
1182   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1183   PetscErrorCode ierr;
1184 
1185   PetscFunctionBegin;
1186   ierr = MatDenseCUDARestoreArray(l->A,a);CHKERRQ(ierr);
1187   PetscFunctionReturn(0);
1188 }
1189 
1190 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat,PetscInt,Vec*);
1191 static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat,PetscInt,Vec*);
1192 static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat,PetscInt,Vec*);
1193 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat,PetscInt,Vec*);
1194 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat,PetscInt,Vec*);
1195 static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat,PetscInt,Vec*);
1196 
1197 static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat,PetscBool bind)
1198 {
1199   Mat_MPIDense   *d = (Mat_MPIDense*)mat->data;
1200   PetscErrorCode ierr;
1201 
1202   PetscFunctionBegin;
1203   if (d->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first");
1204   if (d->A) {
1205     ierr = MatBindToCPU(d->A,bind);CHKERRQ(ierr);
1206   }
1207   mat->boundtocpu = bind;
1208   if (!bind) {
1209     PetscBool iscuda;
1210 
1211     ierr = PetscObjectTypeCompare((PetscObject)d->cvec,VECMPICUDA,&iscuda);CHKERRQ(ierr);
1212     if (!iscuda) {
1213       ierr = VecDestroy(&d->cvec);CHKERRQ(ierr);
1214     }
1215     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDenseCUDA);CHKERRQ(ierr);
1216     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDenseCUDA);CHKERRQ(ierr);
1217     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr);
1218     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr);
1219     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr);
1220     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr);
1221   } else {
1222     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr);
1223     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr);
1224     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr);
1225     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr);
1226     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr);
1227     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr);
1228   }
1229   PetscFunctionReturn(0);
1230 }
1231 
1232 PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data)
1233 {
1234   Mat_MPIDense   *d = (Mat_MPIDense*)A->data;
1235   PetscErrorCode ierr;
1236   PetscBool      iscuda;
1237 
1238   PetscFunctionBegin;
1239   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1240   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr);
1241   if (!iscuda) PetscFunctionReturn(0);
1242   ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
1243   ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
1244   if (!d->A) {
1245     ierr = MatCreate(PETSC_COMM_SELF,&d->A);CHKERRQ(ierr);
1246     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)d->A);CHKERRQ(ierr);
1247     ierr = MatSetSizes(d->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N);CHKERRQ(ierr);
1248   }
1249   ierr = MatSetType(d->A,MATSEQDENSECUDA);CHKERRQ(ierr);
1250   ierr = MatSeqDenseCUDASetPreallocation(d->A,d_data);CHKERRQ(ierr);
1251   A->preallocated = PETSC_TRUE;
1252   PetscFunctionReturn(0);
1253 }
1254 #endif
1255 
1256 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1257 {
1258   Mat_MPIDense   *d = (Mat_MPIDense*)x->data;
1259   PetscErrorCode ierr;
1260 
1261   PetscFunctionBegin;
1262   ierr = MatSetRandom(d->A,rctx);CHKERRQ(ierr);
1263   PetscFunctionReturn(0);
1264 }
1265 
1266 PETSC_INTERN PetscErrorCode MatMatMultNumeric_MPIDense(Mat A,Mat,Mat);
1267 
1268 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool  *missing,PetscInt *d)
1269 {
1270   PetscFunctionBegin;
1271   *missing = PETSC_FALSE;
1272   PetscFunctionReturn(0);
1273 }
1274 
1275 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat);
1276 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);
1277 
1278 /* -------------------------------------------------------------------*/
1279 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1280                                         MatGetRow_MPIDense,
1281                                         MatRestoreRow_MPIDense,
1282                                         MatMult_MPIDense,
1283                                 /*  4*/ MatMultAdd_MPIDense,
1284                                         MatMultTranspose_MPIDense,
1285                                         MatMultTransposeAdd_MPIDense,
1286                                         0,
1287                                         0,
1288                                         0,
1289                                 /* 10*/ 0,
1290                                         0,
1291                                         0,
1292                                         0,
1293                                         MatTranspose_MPIDense,
1294                                 /* 15*/ MatGetInfo_MPIDense,
1295                                         MatEqual_MPIDense,
1296                                         MatGetDiagonal_MPIDense,
1297                                         MatDiagonalScale_MPIDense,
1298                                         MatNorm_MPIDense,
1299                                 /* 20*/ MatAssemblyBegin_MPIDense,
1300                                         MatAssemblyEnd_MPIDense,
1301                                         MatSetOption_MPIDense,
1302                                         MatZeroEntries_MPIDense,
1303                                 /* 24*/ MatZeroRows_MPIDense,
1304                                         0,
1305                                         0,
1306                                         0,
1307                                         0,
1308                                 /* 29*/ MatSetUp_MPIDense,
1309                                         0,
1310                                         0,
1311                                         MatGetDiagonalBlock_MPIDense,
1312                                         0,
1313                                 /* 34*/ MatDuplicate_MPIDense,
1314                                         0,
1315                                         0,
1316                                         0,
1317                                         0,
1318                                 /* 39*/ MatAXPY_MPIDense,
1319                                         MatCreateSubMatrices_MPIDense,
1320                                         0,
1321                                         MatGetValues_MPIDense,
1322                                         0,
1323                                 /* 44*/ 0,
1324                                         MatScale_MPIDense,
1325                                         MatShift_Basic,
1326                                         0,
1327                                         0,
1328                                 /* 49*/ MatSetRandom_MPIDense,
1329                                         0,
1330                                         0,
1331                                         0,
1332                                         0,
1333                                 /* 54*/ 0,
1334                                         0,
1335                                         0,
1336                                         0,
1337                                         0,
1338                                 /* 59*/ MatCreateSubMatrix_MPIDense,
1339                                         MatDestroy_MPIDense,
1340                                         MatView_MPIDense,
1341                                         0,
1342                                         0,
1343                                 /* 64*/ 0,
1344                                         0,
1345                                         0,
1346                                         0,
1347                                         0,
1348                                 /* 69*/ 0,
1349                                         0,
1350                                         0,
1351                                         0,
1352                                         0,
1353                                 /* 74*/ 0,
1354                                         0,
1355                                         0,
1356                                         0,
1357                                         0,
1358                                 /* 79*/ 0,
1359                                         0,
1360                                         0,
1361                                         0,
1362                                 /* 83*/ MatLoad_MPIDense,
1363                                         0,
1364                                         0,
1365                                         0,
1366                                         0,
1367                                         0,
1368                                 /* 89*/ 0,
1369                                         0,
1370                                         MatMatMultNumeric_MPIDense,
1371                                         0,
1372                                         0,
1373                                 /* 94*/ 0,
1374                                         0,
1375                                         0,
1376                                         MatMatTransposeMultNumeric_MPIDense_MPIDense,
1377                                         0,
1378                                 /* 99*/ MatProductSetFromOptions_MPIDense,
1379                                         0,
1380                                         0,
1381                                         MatConjugate_MPIDense,
1382                                         0,
1383                                 /*104*/ 0,
1384                                         MatRealPart_MPIDense,
1385                                         MatImaginaryPart_MPIDense,
1386                                         0,
1387                                         0,
1388                                 /*109*/ 0,
1389                                         0,
1390                                         0,
1391                                         MatGetColumnVector_MPIDense,
1392                                         MatMissingDiagonal_MPIDense,
1393                                 /*114*/ 0,
1394                                         0,
1395                                         0,
1396                                         0,
1397                                         0,
1398                                 /*119*/ 0,
1399                                         0,
1400                                         0,
1401                                         0,
1402                                         0,
1403                                 /*124*/ 0,
1404                                         MatGetColumnNorms_MPIDense,
1405                                         0,
1406                                         0,
1407                                         0,
1408                                 /*129*/ 0,
1409                                         0,
1410                                         0,
1411                                         MatTransposeMatMultNumeric_MPIDense_MPIDense,
1412                                         0,
1413                                 /*134*/ 0,
1414                                         0,
1415                                         0,
1416                                         0,
1417                                         0,
1418                                 /*139*/ 0,
1419                                         0,
1420                                         0,
1421                                         0,
1422                                         0,
1423                                         MatCreateMPIMatConcatenateSeqMat_MPIDense,
1424                                 /*145*/ 0,
1425                                         0,
1426                                         0
1427 };
1428 
1429 PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1430 {
1431   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
1432   PetscBool      iscuda;
1433   PetscErrorCode ierr;
1434 
1435   PetscFunctionBegin;
1436   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
1437   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
1438   if (!a->A) {
1439     ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1440     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1441     ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr);
1442   }
1443   ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr);
1444   ierr = MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE);CHKERRQ(ierr);
1445   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1446   mat->preallocated = PETSC_TRUE;
1447   PetscFunctionReturn(0);
1448 }
1449 
1450 #if defined(PETSC_HAVE_ELEMENTAL)
1451 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1452 {
1453   Mat            mat_elemental;
1454   PetscErrorCode ierr;
1455   PetscScalar    *v;
1456   PetscInt       m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols;
1457 
1458   PetscFunctionBegin;
1459   if (reuse == MAT_REUSE_MATRIX) {
1460     mat_elemental = *newmat;
1461     ierr = MatZeroEntries(*newmat);CHKERRQ(ierr);
1462   } else {
1463     ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr);
1464     ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1465     ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr);
1466     ierr = MatSetUp(mat_elemental);CHKERRQ(ierr);
1467     ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
1468   }
1469 
1470   ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr);
1471   for (i=0; i<N; i++) cols[i] = i;
1472   for (i=0; i<m; i++) rows[i] = rstart + i;
1473 
1474   /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1475   ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr);
1476   ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr);
1477   ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1478   ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1479   ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr);
1480   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1481 
1482   if (reuse == MAT_INPLACE_MATRIX) {
1483     ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr);
1484   } else {
1485     *newmat = mat_elemental;
1486   }
1487   PetscFunctionReturn(0);
1488 }
1489 #endif
1490 
1491 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals)
1492 {
1493   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1494   PetscErrorCode ierr;
1495 
1496   PetscFunctionBegin;
1497   ierr = MatDenseGetColumn(mat->A,col,vals);CHKERRQ(ierr);
1498   PetscFunctionReturn(0);
1499 }
1500 
1501 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals)
1502 {
1503   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1504   PetscErrorCode ierr;
1505 
1506   PetscFunctionBegin;
1507   ierr = MatDenseRestoreColumn(mat->A,vals);CHKERRQ(ierr);
1508   PetscFunctionReturn(0);
1509 }
1510 
1511 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
1512 {
1513   PetscErrorCode ierr;
1514   Mat_MPIDense   *mat;
1515   PetscInt       m,nloc,N;
1516 
1517   PetscFunctionBegin;
1518   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
1519   ierr = MatGetLocalSize(inmat,NULL,&nloc);CHKERRQ(ierr);
1520   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1521     PetscInt sum;
1522 
1523     if (n == PETSC_DECIDE) {
1524       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
1525     }
1526     /* Check sum(n) = N */
1527     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
1528     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
1529 
1530     ierr = MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat);CHKERRQ(ierr);
1531   }
1532 
1533   /* numeric phase */
1534   mat = (Mat_MPIDense*)(*outmat)->data;
1535   ierr = MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1536   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1537   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1538   PetscFunctionReturn(0);
1539 }
1540 
1541 #if defined(PETSC_HAVE_CUDA)
1542 PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M,MatType type,MatReuse reuse,Mat *newmat)
1543 {
1544   Mat            B;
1545   Mat_MPIDense   *m;
1546   PetscErrorCode ierr;
1547 
1548   PetscFunctionBegin;
1549   if (reuse == MAT_INITIAL_MATRIX) {
1550     ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr);
1551   } else if (reuse == MAT_REUSE_MATRIX) {
1552     ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1553   }
1554 
1555   B    = *newmat;
1556   ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_TRUE);CHKERRQ(ierr);
1557   ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr);
1558   ierr = PetscStrallocpy(VECSTANDARD,&B->defaultvectype);CHKERRQ(ierr);
1559   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSE);CHKERRQ(ierr);
1560   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",NULL);CHKERRQ(ierr);
1561   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",NULL);CHKERRQ(ierr);
1562   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",NULL);CHKERRQ(ierr);
1563   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",NULL);CHKERRQ(ierr);
1564   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",NULL);CHKERRQ(ierr);
1565   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",NULL);CHKERRQ(ierr);
1566   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",NULL);CHKERRQ(ierr);
1567   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",NULL);CHKERRQ(ierr);
1568   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",NULL);CHKERRQ(ierr);
1569   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",NULL);CHKERRQ(ierr);
1570   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",NULL);CHKERRQ(ierr);
1571   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",NULL);CHKERRQ(ierr);
1572   m    = (Mat_MPIDense*)(B)->data;
1573   if (m->A) {
1574     ierr = MatConvert(m->A,MATSEQDENSE,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr);
1575     ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr);
1576   }
1577   B->ops->bindtocpu = NULL;
1578   B->offloadmask    = PETSC_OFFLOAD_CPU;
1579   PetscFunctionReturn(0);
1580 }
1581 
1582 PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M,MatType type,MatReuse reuse,Mat *newmat)
1583 {
1584   Mat            B;
1585   Mat_MPIDense   *m;
1586   PetscErrorCode ierr;
1587 
1588   PetscFunctionBegin;
1589   if (reuse == MAT_INITIAL_MATRIX) {
1590     ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr);
1591   } else if (reuse == MAT_REUSE_MATRIX) {
1592     ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1593   }
1594 
1595   B    = *newmat;
1596   ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr);
1597   ierr = PetscStrallocpy(VECCUDA,&B->defaultvectype);CHKERRQ(ierr);
1598   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSECUDA);CHKERRQ(ierr);
1599   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",            MatConvert_MPIDenseCUDA_MPIDense);CHKERRQ(ierr);
1600   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
1601   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
1602   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",                        MatDenseCUDAGetArray_MPIDenseCUDA);CHKERRQ(ierr);
1603   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",                    MatDenseCUDAGetArrayRead_MPIDenseCUDA);CHKERRQ(ierr);
1604   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",                   MatDenseCUDAGetArrayWrite_MPIDenseCUDA);CHKERRQ(ierr);
1605   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",                    MatDenseCUDARestoreArray_MPIDenseCUDA);CHKERRQ(ierr);
1606   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",                MatDenseCUDARestoreArrayRead_MPIDenseCUDA);CHKERRQ(ierr);
1607   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",               MatDenseCUDARestoreArrayWrite_MPIDenseCUDA);CHKERRQ(ierr);
1608   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",                      MatDenseCUDAPlaceArray_MPIDenseCUDA);CHKERRQ(ierr);
1609   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",                      MatDenseCUDAResetArray_MPIDenseCUDA);CHKERRQ(ierr);
1610   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",                    MatDenseCUDAReplaceArray_MPIDenseCUDA);CHKERRQ(ierr);
1611   m    = (Mat_MPIDense*)(B)->data;
1612   if (m->A) {
1613     ierr = MatConvert(m->A,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr);
1614     ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr);
1615     B->offloadmask = PETSC_OFFLOAD_BOTH;
1616   } else {
1617     B->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
1618   }
1619   ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_FALSE);CHKERRQ(ierr);
1620 
1621   B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA;
1622   PetscFunctionReturn(0);
1623 }
1624 #endif
1625 
1626 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A,PetscInt col,Vec *v)
1627 {
1628   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1629   PetscErrorCode ierr;
1630   PetscInt       lda;
1631 
1632   PetscFunctionBegin;
1633   if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first");
1634   if (!a->cvec) {
1635     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1636   }
1637   a->vecinuse = col + 1;
1638   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1639   ierr = MatDenseGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1640   ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1641   *v   = a->cvec;
1642   PetscFunctionReturn(0);
1643 }
1644 
1645 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A,PetscInt col,Vec *v)
1646 {
1647   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1648   PetscErrorCode ierr;
1649 
1650   PetscFunctionBegin;
1651   if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec first");
1652   if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1653   a->vecinuse = 0;
1654   ierr = MatDenseRestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1655   ierr = VecResetArray(a->cvec);CHKERRQ(ierr);
1656   *v   = NULL;
1657   PetscFunctionReturn(0);
1658 }
1659 
1660 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v)
1661 {
1662   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1663   PetscErrorCode ierr;
1664   PetscInt       lda;
1665 
1666   PetscFunctionBegin;
1667   if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first");
1668   if (!a->cvec) {
1669     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1670   }
1671   a->vecinuse = col + 1;
1672   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1673   ierr = MatDenseGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
1674   ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1675   ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr);
1676   *v   = a->cvec;
1677   PetscFunctionReturn(0);
1678 }
1679 
1680 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v)
1681 {
1682   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1683   PetscErrorCode ierr;
1684 
1685   PetscFunctionBegin;
1686   if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec first");
1687   if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1688   a->vecinuse = 0;
1689   ierr = MatDenseRestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
1690   ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr);
1691   ierr = VecResetArray(a->cvec);CHKERRQ(ierr);
1692   *v   = NULL;
1693   PetscFunctionReturn(0);
1694 }
1695 
1696 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v)
1697 {
1698   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1699   PetscErrorCode ierr;
1700   PetscInt       lda;
1701 
1702   PetscFunctionBegin;
1703   if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first");
1704   if (!a->cvec) {
1705     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1706   }
1707   a->vecinuse = col + 1;
1708   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1709   ierr = MatDenseGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1710   ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1711   *v   = a->cvec;
1712   PetscFunctionReturn(0);
1713 }
1714 
1715 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v)
1716 {
1717   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1718   PetscErrorCode ierr;
1719 
1720   PetscFunctionBegin;
1721   if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec first");
1722   if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1723   a->vecinuse = 0;
1724   ierr = MatDenseRestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1725   ierr = VecResetArray(a->cvec);CHKERRQ(ierr);
1726   *v   = NULL;
1727   PetscFunctionReturn(0);
1728 }
1729 
1730 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1731 {
1732   Mat_MPIDense   *a;
1733   PetscErrorCode ierr;
1734 
1735   PetscFunctionBegin;
1736   ierr      = PetscNewLog(mat,&a);CHKERRQ(ierr);
1737   mat->data = (void*)a;
1738   ierr      = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1739 
1740   mat->insertmode = NOT_SET_VALUES;
1741   ierr            = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);CHKERRQ(ierr);
1742   ierr            = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);CHKERRQ(ierr);
1743 
1744   /* build cache for off array entries formed */
1745   a->donotstash = PETSC_FALSE;
1746 
1747   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr);
1748 
1749   /* stuff used for matrix vector multiply */
1750   a->lvec        = 0;
1751   a->Mvctx       = 0;
1752   a->roworiented = PETSC_TRUE;
1753 
1754   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense);CHKERRQ(ierr);
1755   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr);
1756   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr);
1757   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense);CHKERRQ(ierr);
1758   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense);CHKERRQ(ierr);
1759   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",MatDenseGetArrayWrite_MPIDense);CHKERRQ(ierr);
1760   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",MatDenseRestoreArrayWrite_MPIDense);CHKERRQ(ierr);
1761   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);CHKERRQ(ierr);
1762   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);CHKERRQ(ierr);
1763   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",MatDenseReplaceArray_MPIDense);CHKERRQ(ierr);
1764   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr);
1765   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr);
1766   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr);
1767   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr);
1768   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr);
1769   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr);
1770 #if defined(PETSC_HAVE_ELEMENTAL)
1771   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr);
1772 #endif
1773 #if defined(PETSC_HAVE_CUDA)
1774   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpidensecuda_C",MatConvert_MPIDense_MPIDenseCUDA);CHKERRQ(ierr);
1775 #endif
1776   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1777   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
1778   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
1779   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1780   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1781   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_nest_mpidense_C",MatMatMultSymbolic_Nest_Dense);CHKERRQ(ierr);
1782   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_nest_mpidense_C",MatMatMultNumeric_Nest_Dense);CHKERRQ(ierr);
1783 
1784   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",MatTransposeMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1785   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",MatTransposeMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1786   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense);CHKERRQ(ierr);
1787   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense);CHKERRQ(ierr);
1788   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1789   PetscFunctionReturn(0);
1790 }
1791 
1792 /*MC
1793    MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs.
1794 
1795    Options Database Keys:
1796 . -mat_type mpidensecuda - sets the matrix type to "mpidensecuda" during a call to MatSetFromOptions()
1797 
1798   Level: beginner
1799 
1800 .seealso:
1801 
1802 M*/
1803 #if defined(PETSC_HAVE_CUDA)
1804 PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B)
1805 {
1806   PetscErrorCode ierr;
1807 
1808   PetscFunctionBegin;
1809   ierr = MatCreate_MPIDense(B);CHKERRQ(ierr);
1810   ierr = MatConvert_MPIDense_MPIDenseCUDA(B,MATMPIDENSECUDA,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr);
1811   PetscFunctionReturn(0);
1812 }
1813 #endif
1814 
1815 /*MC
1816    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1817 
1818    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1819    and MATMPIDENSE otherwise.
1820 
1821    Options Database Keys:
1822 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1823 
1824   Level: beginner
1825 
1826 
1827 .seealso: MATSEQDENSE,MATMPIDENSE,MATDENSECUDA
1828 M*/
1829 
1830 /*MC
1831    MATDENSECUDA - MATDENSECUDA = "densecuda" - A matrix type to be used for dense matrices on GPUs.
1832 
1833    This matrix type is identical to MATSEQDENSECUDA when constructed with a single process communicator,
1834    and MATMPIDENSECUDA otherwise.
1835 
1836    Options Database Keys:
1837 . -mat_type densecuda - sets the matrix type to "densecuda" during a call to MatSetFromOptions()
1838 
1839   Level: beginner
1840 
1841 .seealso: MATSEQDENSECUDA,MATMPIDENSECUDA,MATDENSE
1842 M*/
1843 
1844 /*@C
1845    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1846 
1847    Not collective
1848 
1849    Input Parameters:
1850 .  B - the matrix
1851 -  data - optional location of matrix data.  Set data=NULL for PETSc
1852    to control all matrix memory allocation.
1853 
1854    Notes:
1855    The dense format is fully compatible with standard Fortran 77
1856    storage by columns.
1857 
1858    The data input variable is intended primarily for Fortran programmers
1859    who wish to allocate their own matrix memory space.  Most users should
1860    set data=NULL.
1861 
1862    Level: intermediate
1863 
1864 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1865 @*/
1866 PetscErrorCode  MatMPIDenseSetPreallocation(Mat B,PetscScalar *data)
1867 {
1868   PetscErrorCode ierr;
1869 
1870   PetscFunctionBegin;
1871   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
1872   ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr);
1873   PetscFunctionReturn(0);
1874 }
1875 
1876 /*@
1877    MatDensePlaceArray - Allows one to replace the array in a dense matrix with an
1878    array provided by the user. This is useful to avoid copying an array
1879    into a matrix
1880 
1881    Not Collective
1882 
1883    Input Parameters:
1884 +  mat - the matrix
1885 -  array - the array in column major order
1886 
1887    Notes:
1888    You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be
1889    freed when the matrix is destroyed.
1890 
1891    Level: developer
1892 
1893 .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
1894 
1895 @*/
1896 PetscErrorCode  MatDensePlaceArray(Mat mat,const PetscScalar *array)
1897 {
1898   PetscErrorCode ierr;
1899 
1900   PetscFunctionBegin;
1901   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
1902   ierr = PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
1903   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
1904 #if defined(PETSC_HAVE_CUDA)
1905   mat->offloadmask = PETSC_OFFLOAD_CPU;
1906 #endif
1907   PetscFunctionReturn(0);
1908 }
1909 
1910 /*@
1911    MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray()
1912 
1913    Not Collective
1914 
1915    Input Parameters:
1916 .  mat - the matrix
1917 
1918    Notes:
1919    You can only call this after a call to MatDensePlaceArray()
1920 
1921    Level: developer
1922 
1923 .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
1924 
1925 @*/
1926 PetscErrorCode  MatDenseResetArray(Mat mat)
1927 {
1928   PetscErrorCode ierr;
1929 
1930   PetscFunctionBegin;
1931   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
1932   ierr = PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));CHKERRQ(ierr);
1933   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
1934   PetscFunctionReturn(0);
1935 }
1936 
1937 /*@
1938    MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an
1939    array provided by the user. This is useful to avoid copying an array
1940    into a matrix
1941 
1942    Not Collective
1943 
1944    Input Parameters:
1945 +  mat - the matrix
1946 -  array - the array in column major order
1947 
1948    Notes:
1949    The memory passed in MUST be obtained with PetscMalloc() and CANNOT be
1950    freed by the user. It will be freed when the matrix is destroyed.
1951 
1952    Level: developer
1953 
1954 .seealso: MatDenseGetArray(), VecReplaceArray()
1955 @*/
1956 PetscErrorCode  MatDenseReplaceArray(Mat mat,const PetscScalar *array)
1957 {
1958   PetscErrorCode ierr;
1959 
1960   PetscFunctionBegin;
1961   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
1962   ierr = PetscUseMethod(mat,"MatDenseReplaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
1963   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
1964 #if defined(PETSC_HAVE_CUDA)
1965   mat->offloadmask = PETSC_OFFLOAD_CPU;
1966 #endif
1967   PetscFunctionReturn(0);
1968 }
1969 
1970 #if defined(PETSC_HAVE_CUDA)
1971 /*@C
1972    MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a dense matrix with an
1973    array provided by the user. This is useful to avoid copying an array
1974    into a matrix
1975 
1976    Not Collective
1977 
1978    Input Parameters:
1979 +  mat - the matrix
1980 -  array - the array in column major order
1981 
1982    Notes:
1983    You can return to the original array with a call to MatDenseCUDAResetArray(). The user is responsible for freeing this array; it will not be
1984    freed when the matrix is destroyed. The array must have been allocated with cudaMalloc().
1985 
1986    Level: developer
1987 
1988 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAResetArray()
1989 @*/
1990 PetscErrorCode  MatDenseCUDAPlaceArray(Mat mat,const PetscScalar *array)
1991 {
1992   PetscErrorCode ierr;
1993 
1994   PetscFunctionBegin;
1995   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
1996   ierr = PetscUseMethod(mat,"MatDenseCUDAPlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
1997   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
1998   mat->offloadmask = PETSC_OFFLOAD_GPU;
1999   PetscFunctionReturn(0);
2000 }
2001 
2002 /*@C
2003    MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to MatDenseCUDAPlaceArray()
2004 
2005    Not Collective
2006 
2007    Input Parameters:
2008 .  mat - the matrix
2009 
2010    Notes:
2011    You can only call this after a call to MatDenseCUDAPlaceArray()
2012 
2013    Level: developer
2014 
2015 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray()
2016 
2017 @*/
2018 PetscErrorCode  MatDenseCUDAResetArray(Mat mat)
2019 {
2020   PetscErrorCode ierr;
2021 
2022   PetscFunctionBegin;
2023   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2024   ierr = PetscUseMethod(mat,"MatDenseCUDAResetArray_C",(Mat),(mat));CHKERRQ(ierr);
2025   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2026   PetscFunctionReturn(0);
2027 }
2028 
2029 /*@C
2030    MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a dense matrix with an
2031    array provided by the user. This is useful to avoid copying an array
2032    into a matrix
2033 
2034    Not Collective
2035 
2036    Input Parameters:
2037 +  mat - the matrix
2038 -  array - the array in column major order
2039 
2040    Notes:
2041    This permanently replaces the GPU array and frees the memory associated with the old GPU array.
2042    The memory passed in CANNOT be freed by the user. It will be freed
2043    when the matrix is destroyed. The array should respect the matrix leading dimension.
2044 
2045    Level: developer
2046 
2047 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray(), MatDenseCUDAResetArray()
2048 @*/
2049 PetscErrorCode  MatDenseCUDAReplaceArray(Mat mat,const PetscScalar *array)
2050 {
2051   PetscErrorCode ierr;
2052 
2053   PetscFunctionBegin;
2054   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2055   ierr = PetscUseMethod(mat,"MatDenseCUDAReplaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
2056   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2057   mat->offloadmask = PETSC_OFFLOAD_GPU;
2058   PetscFunctionReturn(0);
2059 }
2060 
2061 /*@C
2062    MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a dense matrix.
2063 
2064    Not Collective
2065 
2066    Input Parameters:
2067 .  A - the matrix
2068 
2069    Output Parameters
2070 .  array - the GPU array in column major order
2071 
2072    Notes:
2073    The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use MatDenseCUDAGetArray(). The array must be restored with MatDenseCUDARestoreArrayWrite() when no longer needed.
2074 
2075    Level: developer
2076 
2077 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArrayRead()
2078 @*/
2079 PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a)
2080 {
2081   PetscErrorCode ierr;
2082 
2083   PetscFunctionBegin;
2084   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2085   ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2086   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2087   PetscFunctionReturn(0);
2088 }
2089 
2090 /*@C
2091    MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArrayWrite().
2092 
2093    Not Collective
2094 
2095    Input Parameters:
2096 +  A - the matrix
2097 -  array - the GPU array in column major order
2098 
2099    Notes:
2100 
2101    Level: developer
2102 
2103 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead()
2104 @*/
2105 PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a)
2106 {
2107   PetscErrorCode ierr;
2108 
2109   PetscFunctionBegin;
2110   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2111   ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2112   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2113   A->offloadmask = PETSC_OFFLOAD_GPU;
2114   PetscFunctionReturn(0);
2115 }
2116 
2117 /*@C
2118    MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArrayRead() when no longer needed.
2119 
2120    Not Collective
2121 
2122    Input Parameters:
2123 .  A - the matrix
2124 
2125    Output Parameters
2126 .  array - the GPU array in column major order
2127 
2128    Notes:
2129    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite().
2130 
2131    Level: developer
2132 
2133 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead()
2134 @*/
2135 PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a)
2136 {
2137   PetscErrorCode ierr;
2138 
2139   PetscFunctionBegin;
2140   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2141   ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr);
2142   PetscFunctionReturn(0);
2143 }
2144 
2145 /*@C
2146    MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a dense matrix previously obtained with a call to MatDenseCUDAGetArrayRead().
2147 
2148    Not Collective
2149 
2150    Input Parameters:
2151 +  A - the matrix
2152 -  array - the GPU array in column major order
2153 
2154    Notes:
2155    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite().
2156 
2157    Level: developer
2158 
2159 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDAGetArrayRead()
2160 @*/
2161 PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a)
2162 {
2163   PetscErrorCode ierr;
2164 
2165   PetscFunctionBegin;
2166   ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr);
2167   PetscFunctionReturn(0);
2168 }
2169 
2170 /*@C
2171    MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArray() when no longer needed.
2172 
2173    Not Collective
2174 
2175    Input Parameters:
2176 .  A - the matrix
2177 
2178    Output Parameters
2179 .  array - the GPU array in column major order
2180 
2181    Notes:
2182    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). For read-only access, use MatDenseCUDAGetArrayRead().
2183 
2184    Level: developer
2185 
2186 .seealso: MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead()
2187 @*/
2188 PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a)
2189 {
2190   PetscErrorCode ierr;
2191 
2192   PetscFunctionBegin;
2193   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2194   ierr = PetscUseMethod(A,"MatDenseCUDAGetArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2195   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2196   PetscFunctionReturn(0);
2197 }
2198 
2199 /*@C
2200    MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArray().
2201 
2202    Not Collective
2203 
2204    Input Parameters:
2205 +  A - the matrix
2206 -  array - the GPU array in column major order
2207 
2208    Notes:
2209 
2210    Level: developer
2211 
2212 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead()
2213 @*/
2214 PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a)
2215 {
2216   PetscErrorCode ierr;
2217 
2218   PetscFunctionBegin;
2219   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2220   ierr = PetscUseMethod(A,"MatDenseCUDARestoreArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2221   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2222   A->offloadmask = PETSC_OFFLOAD_GPU;
2223   PetscFunctionReturn(0);
2224 }
2225 #endif
2226 
2227 /*@C
2228    MatCreateDense - Creates a matrix in dense format.
2229 
2230    Collective
2231 
2232    Input Parameters:
2233 +  comm - MPI communicator
2234 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2235 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2236 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
2237 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
2238 -  data - optional location of matrix data.  Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
2239    to control all matrix memory allocation.
2240 
2241    Output Parameter:
2242 .  A - the matrix
2243 
2244    Notes:
2245    The dense format is fully compatible with standard Fortran 77
2246    storage by columns.
2247 
2248    The data input variable is intended primarily for Fortran programmers
2249    who wish to allocate their own matrix memory space.  Most users should
2250    set data=NULL (PETSC_NULL_SCALAR for Fortran users).
2251 
2252    The user MUST specify either the local or global matrix dimensions
2253    (possibly both).
2254 
2255    Level: intermediate
2256 
2257 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
2258 @*/
2259 PetscErrorCode  MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
2260 {
2261   PetscErrorCode ierr;
2262   PetscMPIInt    size;
2263 
2264   PetscFunctionBegin;
2265   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2266   PetscValidLogicalCollectiveBool(*A,!!data,6);
2267   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2268   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2269   if (size > 1) {
2270     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
2271     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
2272     if (data) {  /* user provided data array, so no need to assemble */
2273       ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr);
2274       (*A)->assembled = PETSC_TRUE;
2275     }
2276   } else {
2277     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
2278     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
2279   }
2280   PetscFunctionReturn(0);
2281 }
2282 
2283 #if defined(PETSC_HAVE_CUDA)
2284 /*@C
2285    MatCreateDenseCUDA - Creates a matrix in dense format using CUDA.
2286 
2287    Collective
2288 
2289    Input Parameters:
2290 +  comm - MPI communicator
2291 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2292 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2293 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
2294 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
2295 -  data - optional location of GPU matrix data.  Set data=NULL for PETSc
2296    to control matrix memory allocation.
2297 
2298    Output Parameter:
2299 .  A - the matrix
2300 
2301    Notes:
2302 
2303    Level: intermediate
2304 
2305 .seealso: MatCreate(), MatCreateDense()
2306 @*/
2307 PetscErrorCode  MatCreateDenseCUDA(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
2308 {
2309   PetscErrorCode ierr;
2310   PetscMPIInt    size;
2311 
2312   PetscFunctionBegin;
2313   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2314   PetscValidLogicalCollectiveBool(*A,!!data,6);
2315   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2316   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2317   if (size > 1) {
2318     ierr = MatSetType(*A,MATMPIDENSECUDA);CHKERRQ(ierr);
2319     ierr = MatMPIDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr);
2320     if (data) {  /* user provided data array, so no need to assemble */
2321       ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr);
2322       (*A)->assembled = PETSC_TRUE;
2323     }
2324   } else {
2325     ierr = MatSetType(*A,MATSEQDENSECUDA);CHKERRQ(ierr);
2326     ierr = MatSeqDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr);
2327   }
2328   PetscFunctionReturn(0);
2329 }
2330 #endif
2331 
2332 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
2333 {
2334   Mat            mat;
2335   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
2336   PetscErrorCode ierr;
2337 
2338   PetscFunctionBegin;
2339   *newmat = 0;
2340   ierr    = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr);
2341   ierr    = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
2342   ierr    = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
2343   a       = (Mat_MPIDense*)mat->data;
2344 
2345   mat->factortype   = A->factortype;
2346   mat->assembled    = PETSC_TRUE;
2347   mat->preallocated = PETSC_TRUE;
2348 
2349   a->size         = oldmat->size;
2350   a->rank         = oldmat->rank;
2351   mat->insertmode = NOT_SET_VALUES;
2352   a->donotstash   = oldmat->donotstash;
2353 
2354   ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr);
2355   ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr);
2356 
2357   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2358   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2359   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
2360 
2361   *newmat = mat;
2362   PetscFunctionReturn(0);
2363 }
2364 
2365 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
2366 {
2367   PetscErrorCode ierr;
2368   PetscBool      isbinary;
2369 #if defined(PETSC_HAVE_HDF5)
2370   PetscBool      ishdf5;
2371 #endif
2372 
2373   PetscFunctionBegin;
2374   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
2375   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
2376   /* force binary viewer to load .info file if it has not yet done so */
2377   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2378   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
2379 #if defined(PETSC_HAVE_HDF5)
2380   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
2381 #endif
2382   if (isbinary) {
2383     ierr = MatLoad_Dense_Binary(newMat,viewer);CHKERRQ(ierr);
2384 #if defined(PETSC_HAVE_HDF5)
2385   } else if (ishdf5) {
2386     ierr = MatLoad_Dense_HDF5(newMat,viewer);CHKERRQ(ierr);
2387 #endif
2388   } else SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
2389   PetscFunctionReturn(0);
2390 }
2391 
2392 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool  *flag)
2393 {
2394   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
2395   Mat            a,b;
2396   PetscBool      flg;
2397   PetscErrorCode ierr;
2398 
2399   PetscFunctionBegin;
2400   a    = matA->A;
2401   b    = matB->A;
2402   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
2403   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2404   PetscFunctionReturn(0);
2405 }
2406 
2407 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(Mat A)
2408 {
2409   PetscErrorCode        ierr;
2410   Mat_MPIDense          *a = (Mat_MPIDense*)A->data;
2411   Mat_TransMatMultDense *atb = a->atbdense;
2412 
2413   PetscFunctionBegin;
2414   ierr = PetscFree2(atb->sendbuf,atb->recvcounts);CHKERRQ(ierr);
2415   ierr = MatDestroy(&atb->atb);CHKERRQ(ierr);
2416   ierr = (*atb->destroy)(A);CHKERRQ(ierr);
2417   ierr = PetscFree(atb);CHKERRQ(ierr);
2418   PetscFunctionReturn(0);
2419 }
2420 
2421 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(Mat A)
2422 {
2423   PetscErrorCode        ierr;
2424   Mat_MPIDense          *a = (Mat_MPIDense*)A->data;
2425   Mat_MatTransMultDense *abt = a->abtdense;
2426 
2427   PetscFunctionBegin;
2428   ierr = PetscFree2(abt->buf[0],abt->buf[1]);CHKERRQ(ierr);
2429   ierr = PetscFree2(abt->recvcounts,abt->recvdispls);CHKERRQ(ierr);
2430   ierr = (abt->destroy)(A);CHKERRQ(ierr);
2431   ierr = PetscFree(abt);CHKERRQ(ierr);
2432   PetscFunctionReturn(0);
2433 }
2434 
2435 PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2436 {
2437   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2438   Mat_TransMatMultDense *atb = c->atbdense;
2439   PetscErrorCode        ierr;
2440   MPI_Comm              comm;
2441   PetscMPIInt           size,*recvcounts=atb->recvcounts;
2442   PetscScalar           *carray,*sendbuf=atb->sendbuf;
2443   const PetscScalar     *atbarray;
2444   PetscInt              i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j;
2445   const PetscInt        *ranges;
2446 
2447   PetscFunctionBegin;
2448   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2449   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2450 
2451   /* compute atbarray = aseq^T * bseq */
2452   ierr = MatTransposeMatMult(a->A,b->A,atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX,PETSC_DEFAULT,&atb->atb);CHKERRQ(ierr);
2453 
2454   ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr);
2455   for (i=0; i<size; i++) recvcounts[i] = (ranges[i+1] - ranges[i])*cN;
2456 
2457   /* arrange atbarray into sendbuf */
2458   ierr = MatDenseGetArrayRead(atb->atb,&atbarray);CHKERRQ(ierr);
2459   for (proc=0, k=0; proc<size; proc++) {
2460     for (j=0; j<cN; j++) {
2461       for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM];
2462     }
2463   }
2464   ierr = MatDenseRestoreArrayRead(atb->atb,&atbarray);CHKERRQ(ierr);
2465 
2466   /* sum all atbarray to local values of C */
2467   ierr = MatDenseGetArray(c->A,&carray);CHKERRQ(ierr);
2468   ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRQ(ierr);
2469   ierr = MatDenseRestoreArray(c->A,&carray);CHKERRQ(ierr);
2470   PetscFunctionReturn(0);
2471 }
2472 
2473 PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)
2474 {
2475   PetscErrorCode        ierr;
2476   MPI_Comm              comm;
2477   PetscMPIInt           size;
2478   PetscInt              cm=A->cmap->n,cM,cN=B->cmap->N;
2479   Mat_MPIDense          *c;
2480   Mat_TransMatMultDense *atb;
2481 
2482   PetscFunctionBegin;
2483   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2484   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
2485     SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2486   }
2487 
2488   /* create matrix product C */
2489   ierr = MatSetSizes(C,cm,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2490   ierr = MatSetType(C,MATMPIDENSE);CHKERRQ(ierr);
2491   ierr = MatSetUp(C);CHKERRQ(ierr);
2492   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2493   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2494 
2495   /* create data structure for reuse C */
2496   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2497   ierr = PetscNew(&atb);CHKERRQ(ierr);
2498   cM   = C->rmap->N;
2499   ierr = PetscMalloc2(cM*cN,&atb->sendbuf,size,&atb->recvcounts);CHKERRQ(ierr);
2500 
2501   c               = (Mat_MPIDense*)C->data;
2502   c->atbdense     = atb;
2503   atb->destroy    = C->ops->destroy;
2504   C->ops->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
2505   PetscFunctionReturn(0);
2506 }
2507 
2508 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2509 {
2510   PetscErrorCode        ierr;
2511   MPI_Comm              comm;
2512   PetscMPIInt           i, size;
2513   PetscInt              maxRows, bufsiz;
2514   Mat_MPIDense          *c;
2515   PetscMPIInt           tag;
2516   PetscInt              alg;
2517   Mat_MatTransMultDense *abt;
2518   Mat_Product           *product = C->product;
2519   PetscBool             flg;
2520 
2521   PetscFunctionBegin;
2522   /* check local size of A and B */
2523   if (A->cmap->n != B->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local column dimensions are incompatible, A (%D) != B (%D)",A->cmap->n,B->cmap->n);
2524 
2525   ierr = PetscStrcmp(product->alg,"allgatherv",&flg);CHKERRQ(ierr);
2526   alg  = flg ? 0 : 1;
2527 
2528   /* setup matrix product C */
2529   ierr = MatSetSizes(C,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N);CHKERRQ(ierr);
2530   ierr = MatSetType(C,MATMPIDENSE);CHKERRQ(ierr);
2531   ierr = MatSetUp(C);CHKERRQ(ierr);
2532   ierr = PetscObjectGetNewTag((PetscObject)C, &tag);CHKERRQ(ierr);
2533   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2534   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2535 
2536   /* create data structure for reuse C */
2537   ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr);
2538   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2539   ierr = PetscNew(&abt);CHKERRQ(ierr);
2540   abt->tag = tag;
2541   abt->alg = alg;
2542   switch (alg) {
2543   case 1: /* alg: "cyclic" */
2544     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i]));
2545     bufsiz = A->cmap->N * maxRows;
2546     ierr = PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1]));CHKERRQ(ierr);
2547     break;
2548   default: /* alg: "allgatherv" */
2549     ierr = PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]));CHKERRQ(ierr);
2550     ierr = PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls));CHKERRQ(ierr);
2551     for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N;
2552     for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i];
2553     break;
2554   }
2555 
2556   c                               = (Mat_MPIDense*)C->data;
2557   c->abtdense                     = abt;
2558   abt->destroy                    = C->ops->destroy;
2559   C->ops->destroy                 = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
2560   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense;
2561   PetscFunctionReturn(0);
2562 }
2563 
2564 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
2565 {
2566   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2567   Mat_MatTransMultDense *abt = c->abtdense;
2568   PetscErrorCode        ierr;
2569   MPI_Comm              comm;
2570   PetscMPIInt           rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom;
2571   PetscScalar           *sendbuf, *recvbuf=0, *cv;
2572   PetscInt              i,cK=A->cmap->N,k,j,bn;
2573   PetscScalar           _DOne=1.0,_DZero=0.0;
2574   const PetscScalar     *av,*bv;
2575   PetscBLASInt          cm, cn, ck, alda, blda, clda;
2576   MPI_Request           reqs[2];
2577   const PetscInt        *ranges;
2578 
2579   PetscFunctionBegin;
2580   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2581   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2582   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2583   ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr);
2584   ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr);
2585   ierr = MatDenseGetArray(c->A,&cv);CHKERRQ(ierr);
2586   ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr);
2587   ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr);
2588   ierr = MatDenseGetLDA(b->A,&i);CHKERRQ(ierr);
2589   ierr = PetscBLASIntCast(i,&blda);CHKERRQ(ierr);
2590   ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr);
2591   ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr);
2592   ierr = MatGetOwnershipRanges(B,&ranges);CHKERRQ(ierr);
2593   bn   = B->rmap->n;
2594   if (blda == bn) {
2595     sendbuf = (PetscScalar*)bv;
2596   } else {
2597     sendbuf = abt->buf[0];
2598     for (k = 0, i = 0; i < cK; i++) {
2599       for (j = 0; j < bn; j++, k++) {
2600         sendbuf[k] = bv[i * blda + j];
2601       }
2602     }
2603   }
2604   if (size > 1) {
2605     sendto = (rank + size - 1) % size;
2606     recvfrom = (rank + size + 1) % size;
2607   } else {
2608     sendto = recvfrom = 0;
2609   }
2610   ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr);
2611   ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr);
2612   recvisfrom = rank;
2613   for (i = 0; i < size; i++) {
2614     /* we have finished receiving in sending, bufs can be read/modified */
2615     PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2616     PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
2617 
2618     if (nextrecvisfrom != rank) {
2619       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2620       sendsiz = cK * bn;
2621       recvsiz = cK * nextbn;
2622       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
2623       ierr = MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]);CHKERRQ(ierr);
2624       ierr = MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]);CHKERRQ(ierr);
2625     }
2626 
2627     /* local aseq * sendbuf^T */
2628     ierr = PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn);CHKERRQ(ierr);
2629     if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,av,&alda,sendbuf,&cn,&_DZero,cv + clda * ranges[recvisfrom],&clda));
2630 
2631     if (nextrecvisfrom != rank) {
2632       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
2633       ierr = MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2634     }
2635     bn = nextbn;
2636     recvisfrom = nextrecvisfrom;
2637     sendbuf = recvbuf;
2638   }
2639   ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr);
2640   ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr);
2641   ierr = MatDenseRestoreArray(c->A,&cv);CHKERRQ(ierr);
2642   PetscFunctionReturn(0);
2643 }
2644 
2645 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
2646 {
2647   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2648   Mat_MatTransMultDense *abt = c->abtdense;
2649   PetscErrorCode        ierr;
2650   MPI_Comm              comm;
2651   PetscMPIInt           size;
2652   PetscScalar           *cv, *sendbuf, *recvbuf;
2653   const PetscScalar     *av,*bv;
2654   PetscInt              blda,i,cK=A->cmap->N,k,j,bn;
2655   PetscScalar           _DOne=1.0,_DZero=0.0;
2656   PetscBLASInt          cm, cn, ck, alda, clda;
2657 
2658   PetscFunctionBegin;
2659   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2660   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2661   ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr);
2662   ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr);
2663   ierr = MatDenseGetArray(c->A,&cv);CHKERRQ(ierr);
2664   ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr);
2665   ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr);
2666   ierr = MatDenseGetLDA(b->A,&blda);CHKERRQ(ierr);
2667   ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr);
2668   ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr);
2669   /* copy transpose of B into buf[0] */
2670   bn      = B->rmap->n;
2671   sendbuf = abt->buf[0];
2672   recvbuf = abt->buf[1];
2673   for (k = 0, j = 0; j < bn; j++) {
2674     for (i = 0; i < cK; i++, k++) {
2675       sendbuf[k] = bv[i * blda + j];
2676     }
2677   }
2678   ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr);
2679   ierr = MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm);CHKERRQ(ierr);
2680   ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr);
2681   ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr);
2682   ierr = PetscBLASIntCast(c->A->cmap->n,&cn);CHKERRQ(ierr);
2683   if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,av,&alda,recvbuf,&ck,&_DZero,cv,&clda));
2684   ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr);
2685   ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr);
2686   ierr = MatDenseRestoreArray(c->A,&cv);CHKERRQ(ierr);
2687   PetscFunctionReturn(0);
2688 }
2689 
2690 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2691 {
2692   Mat_MPIDense          *c=(Mat_MPIDense*)C->data;
2693   Mat_MatTransMultDense *abt = c->abtdense;
2694   PetscErrorCode        ierr;
2695 
2696   PetscFunctionBegin;
2697   switch (abt->alg) {
2698   case 1:
2699     ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C);CHKERRQ(ierr);
2700     break;
2701   default:
2702     ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C);CHKERRQ(ierr);
2703     break;
2704   }
2705   PetscFunctionReturn(0);
2706 }
2707 
2708 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(Mat A)
2709 {
2710   PetscErrorCode   ierr;
2711   Mat_MPIDense     *a = (Mat_MPIDense*)A->data;
2712   Mat_MatMultDense *ab = a->abdense;
2713 
2714   PetscFunctionBegin;
2715   ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr);
2716   ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr);
2717   ierr = MatDestroy(&ab->Be);CHKERRQ(ierr);
2718 
2719   ierr = (ab->destroy)(A);CHKERRQ(ierr);
2720   ierr = PetscFree(ab);CHKERRQ(ierr);
2721   PetscFunctionReturn(0);
2722 }
2723 
2724 #if defined(PETSC_HAVE_ELEMENTAL)
2725 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2726 {
2727   PetscErrorCode   ierr;
2728   Mat_MPIDense     *c=(Mat_MPIDense*)C->data;
2729   Mat_MatMultDense *ab=c->abdense;
2730 
2731   PetscFunctionBegin;
2732   ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr);
2733   ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr);
2734   ierr = MatMatMultNumeric_Elemental(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr);
2735   ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
2736   PetscFunctionReturn(0);
2737 }
2738 
2739 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)
2740 {
2741   PetscErrorCode   ierr;
2742   Mat              Ae,Be,Ce;
2743   Mat_MPIDense     *c;
2744   Mat_MatMultDense *ab;
2745 
2746   PetscFunctionBegin;
2747   /* check local size of A and B */
2748   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
2749     SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2750   }
2751 
2752   /* create elemental matrices Ae and Be */
2753   ierr = MatCreate(PetscObjectComm((PetscObject)A), &Ae);CHKERRQ(ierr);
2754   ierr = MatSetSizes(Ae,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
2755   ierr = MatSetType(Ae,MATELEMENTAL);CHKERRQ(ierr);
2756   ierr = MatSetUp(Ae);CHKERRQ(ierr);
2757   ierr = MatSetOption(Ae,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2758 
2759   ierr = MatCreate(PetscObjectComm((PetscObject)B), &Be);CHKERRQ(ierr);
2760   ierr = MatSetSizes(Be,PETSC_DECIDE,PETSC_DECIDE,B->rmap->N,B->cmap->N);CHKERRQ(ierr);
2761   ierr = MatSetType(Be,MATELEMENTAL);CHKERRQ(ierr);
2762   ierr = MatSetUp(Be);CHKERRQ(ierr);
2763   ierr = MatSetOption(Be,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2764 
2765   /* compute symbolic Ce = Ae*Be */
2766   ierr = MatCreate(PetscObjectComm((PetscObject)C),&Ce);CHKERRQ(ierr);
2767   ierr = MatMatMultSymbolic_Elemental(Ae,Be,fill,Ce);CHKERRQ(ierr);
2768 
2769   /* setup C */
2770   ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2771   ierr = MatSetType(C,MATDENSE);CHKERRQ(ierr);
2772   ierr = MatSetUp(C);CHKERRQ(ierr);
2773 
2774   /* create data structure for reuse Cdense */
2775   ierr = PetscNew(&ab);CHKERRQ(ierr);
2776   c                  = (Mat_MPIDense*)C->data;
2777   c->abdense         = ab;
2778 
2779   ab->Ae             = Ae;
2780   ab->Be             = Be;
2781   ab->Ce             = Ce;
2782   ab->destroy        = C->ops->destroy;
2783   C->ops->destroy        = MatDestroy_MatMatMult_MPIDense_MPIDense;
2784   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
2785   C->ops->productnumeric = MatProductNumeric_AB;
2786   PetscFunctionReturn(0);
2787 }
2788 #endif
2789 /* ----------------------------------------------- */
2790 #if defined(PETSC_HAVE_ELEMENTAL)
2791 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C)
2792 {
2793   PetscFunctionBegin;
2794   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense;
2795   C->ops->productsymbolic = MatProductSymbolic_AB;
2796   C->ops->productnumeric  = MatProductNumeric_AB;
2797   PetscFunctionReturn(0);
2798 }
2799 #endif
2800 
2801 static PetscErrorCode MatProductSymbolic_AtB_MPIDense(Mat C)
2802 {
2803   PetscErrorCode ierr;
2804   Mat_Product    *product = C->product;
2805 
2806   PetscFunctionBegin;
2807   ierr = MatTransposeMatMultSymbolic_MPIDense_MPIDense(product->A,product->B,product->fill,C);CHKERRQ(ierr);
2808   C->ops->productnumeric          = MatProductNumeric_AtB;
2809   C->ops->transposematmultnumeric = MatTransposeMatMultNumeric_MPIDense_MPIDense;
2810   PetscFunctionReturn(0);
2811 }
2812 
2813 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C)
2814 {
2815   Mat_Product *product = C->product;
2816   Mat         A = product->A,B=product->B;
2817 
2818   PetscFunctionBegin;
2819   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend)
2820     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2821 
2822   C->ops->productsymbolic = MatProductSymbolic_AtB_MPIDense;
2823   PetscFunctionReturn(0);
2824 }
2825 
2826 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C)
2827 {
2828   PetscErrorCode ierr;
2829   Mat_Product    *product = C->product;
2830   const char     *algTypes[2] = {"allgatherv","cyclic"};
2831   PetscInt       alg,nalg = 2;
2832   PetscBool      flg = PETSC_FALSE;
2833 
2834   PetscFunctionBegin;
2835   /* Set default algorithm */
2836   alg = 0; /* default is allgatherv */
2837   ierr = PetscStrcmp(product->alg,"default",&flg);CHKERRQ(ierr);
2838   if (flg) {
2839     ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr);
2840   }
2841 
2842   /* Get runtime option */
2843   if (product->api_user) {
2844     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatTransposeMult","Mat");CHKERRQ(ierr);
2845     ierr = PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr);
2846     ierr = PetscOptionsEnd();CHKERRQ(ierr);
2847   } else {
2848     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABt","Mat");CHKERRQ(ierr);
2849     ierr = PetscOptionsEList("-matproduct_abt_mpidense_mpidense_via","Algorithmic approach","MatProduct_ABt",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr);
2850     ierr = PetscOptionsEnd();CHKERRQ(ierr);
2851   }
2852   if (flg) {
2853     ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr);
2854   }
2855 
2856   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense;
2857   C->ops->productsymbolic          = MatProductSymbolic_ABt;
2858   C->ops->productnumeric           = MatProductNumeric_ABt;
2859   PetscFunctionReturn(0);
2860 }
2861 
2862 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C)
2863 {
2864   PetscErrorCode ierr;
2865   Mat_Product    *product = C->product;
2866 
2867   PetscFunctionBegin;
2868   switch (product->type) {
2869 #if defined(PETSC_HAVE_ELEMENTAL)
2870   case MATPRODUCT_AB:
2871     ierr = MatProductSetFromOptions_MPIDense_AB(C);CHKERRQ(ierr);
2872     break;
2873 #endif
2874   case MATPRODUCT_AtB:
2875     ierr = MatProductSetFromOptions_MPIDense_AtB(C);CHKERRQ(ierr);
2876     break;
2877   case MATPRODUCT_ABt:
2878     ierr = MatProductSetFromOptions_MPIDense_ABt(C);CHKERRQ(ierr);
2879     break;
2880   default: SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"MatProduct type %s is not supported for MPIDense and MPIDense matrices",MatProductTypes[product->type]);
2881   }
2882   PetscFunctionReturn(0);
2883 }
2884