xref: /petsc/src/mat/impls/dense/mpi/mpidense.c (revision 4b6ae80f5283f4064cd7602bed63d33ad0cf5f70)
1 
2 /*
3    Basic functions for basic parallel dense matrices.
4 */
5 
6 #include <../src/mat/impls/dense/mpi/mpidense.h>    /*I   "petscmat.h"  I*/
7 #include <../src/mat/impls/aij/mpi/mpiaij.h>
8 #include <petscblaslapack.h>
9 
10 /*@
11 
12       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
13               matrix that represents the operator. For sequential matrices it returns itself.
14 
15     Input Parameter:
16 .      A - the Seq or MPI dense matrix
17 
18     Output Parameter:
19 .      B - the inner matrix
20 
21     Level: intermediate
22 
23 @*/
24 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
25 {
26   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
27   PetscErrorCode ierr;
28   PetscBool      flg;
29 
30   PetscFunctionBegin;
31   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
32   PetscValidPointer(B,2);
33   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
34   if (flg) *B = mat->A;
35   else *B = A;
36   PetscFunctionReturn(0);
37 }
38 
39 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
40 {
41   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
42   PetscErrorCode ierr;
43   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
44 
45   PetscFunctionBegin;
46   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
47   lrow = row - rstart;
48   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
49   PetscFunctionReturn(0);
50 }
51 
52 PetscErrorCode MatRestoreRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
53 {
54   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
55   PetscErrorCode ierr;
56   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
57 
58   PetscFunctionBegin;
59   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
60   lrow = row - rstart;
61   ierr = MatRestoreRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
62   PetscFunctionReturn(0);
63 }
64 
65 PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
66 {
67   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
68   PetscErrorCode ierr;
69   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
70   PetscScalar    *array;
71   MPI_Comm       comm;
72   PetscBool      flg;
73   Mat            B;
74 
75   PetscFunctionBegin;
76   ierr = MatHasCongruentLayouts(A,&flg);CHKERRQ(ierr);
77   if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
78   ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr);
79   if (!B) { /* This should use a new function, e.g. MatDenseGetSubMatrix (not create) */
80 
81     ierr = PetscObjectTypeCompare((PetscObject)mdn->A,MATSEQDENSECUDA,&flg);CHKERRQ(ierr);
82     if (flg) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature",MATSEQDENSECUDA);
83     ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
84     ierr = MatCreate(comm,&B);CHKERRQ(ierr);
85     ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr);
86     ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
87     ierr = MatDenseGetArrayRead(mdn->A,(const PetscScalar**)&array);CHKERRQ(ierr);
88     ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr);
89     ierr = MatDenseRestoreArrayRead(mdn->A,(const PetscScalar**)&array);CHKERRQ(ierr);
90     ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
91     ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
92     ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr);
93     *a   = B;
94     ierr = MatDestroy(&B);CHKERRQ(ierr);
95   } else *a = B;
96   PetscFunctionReturn(0);
97 }
98 
99 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
100 {
101   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
102   PetscErrorCode ierr;
103   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
104   PetscBool      roworiented = A->roworiented;
105 
106   PetscFunctionBegin;
107   for (i=0; i<m; i++) {
108     if (idxm[i] < 0) continue;
109     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
110     if (idxm[i] >= rstart && idxm[i] < rend) {
111       row = idxm[i] - rstart;
112       if (roworiented) {
113         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
114       } else {
115         for (j=0; j<n; j++) {
116           if (idxn[j] < 0) continue;
117           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
118           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
119         }
120       }
121     } else if (!A->donotstash) {
122       mat->assembled = PETSC_FALSE;
123       if (roworiented) {
124         ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
125       } else {
126         ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
127       }
128     }
129   }
130   PetscFunctionReturn(0);
131 }
132 
133 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
134 {
135   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
136   PetscErrorCode ierr;
137   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
138 
139   PetscFunctionBegin;
140   for (i=0; i<m; i++) {
141     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
142     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
143     if (idxm[i] >= rstart && idxm[i] < rend) {
144       row = idxm[i] - rstart;
145       for (j=0; j<n; j++) {
146         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
147         if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
148         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
149       }
150     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
151   }
152   PetscFunctionReturn(0);
153 }
154 
155 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda)
156 {
157   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
158   PetscErrorCode ierr;
159 
160   PetscFunctionBegin;
161   ierr = MatDenseGetLDA(a->A,lda);CHKERRQ(ierr);
162   PetscFunctionReturn(0);
163 }
164 
165 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar **array)
166 {
167   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
168   PetscErrorCode ierr;
169 
170   PetscFunctionBegin;
171   ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr);
172   PetscFunctionReturn(0);
173 }
174 
175 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar **array)
176 {
177   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
178   PetscErrorCode ierr;
179 
180   PetscFunctionBegin;
181   ierr = MatDenseGetArrayRead(a->A,array);CHKERRQ(ierr);
182   PetscFunctionReturn(0);
183 }
184 
185 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A,PetscScalar **array)
186 {
187   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
188   PetscErrorCode ierr;
189 
190   PetscFunctionBegin;
191   ierr = MatDenseGetArrayWrite(a->A,array);CHKERRQ(ierr);
192   PetscFunctionReturn(0);
193 }
194 
195 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar *array)
196 {
197   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
198   PetscErrorCode ierr;
199 
200   PetscFunctionBegin;
201   if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first");
202   ierr = MatDensePlaceArray(a->A,array);CHKERRQ(ierr);
203   PetscFunctionReturn(0);
204 }
205 
206 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
207 {
208   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
209   PetscErrorCode ierr;
210 
211   PetscFunctionBegin;
212   if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first");
213   ierr = MatDenseResetArray(a->A);CHKERRQ(ierr);
214   PetscFunctionReturn(0);
215 }
216 
217 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A,const PetscScalar *array)
218 {
219   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
220   PetscErrorCode ierr;
221 
222   PetscFunctionBegin;
223   if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first");
224   ierr = MatDenseReplaceArray(a->A,array);CHKERRQ(ierr);
225   PetscFunctionReturn(0);
226 }
227 
228 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
229 {
230   Mat_MPIDense      *mat  = (Mat_MPIDense*)A->data,*newmatd;
231   PetscErrorCode    ierr;
232   PetscInt          lda,i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
233   const PetscInt    *irow,*icol;
234   const PetscScalar *v;
235   PetscScalar       *bv;
236   Mat               newmat;
237   IS                iscol_local;
238   MPI_Comm          comm_is,comm_mat;
239 
240   PetscFunctionBegin;
241   ierr = PetscObjectGetComm((PetscObject)A,&comm_mat);CHKERRQ(ierr);
242   ierr = PetscObjectGetComm((PetscObject)iscol,&comm_is);CHKERRQ(ierr);
243   if (comm_mat != comm_is) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator");
244 
245   ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
246   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
247   ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr);
248   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
249   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
250   ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */
251 
252   /* No parallel redistribution currently supported! Should really check each index set
253      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
254      original matrix! */
255 
256   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
257   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
258 
259   /* Check submatrix call */
260   if (scall == MAT_REUSE_MATRIX) {
261     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
262     /* Really need to test rows and column sizes! */
263     newmat = *B;
264   } else {
265     /* Create and fill new matrix */
266     ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr);
267     ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
268     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
269     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
270   }
271 
272   /* Now extract the data pointers and do the copy, column at a time */
273   newmatd = (Mat_MPIDense*)newmat->data;
274   ierr = MatDenseGetArray(newmatd->A,&bv);CHKERRQ(ierr);
275   ierr = MatDenseGetArrayRead(mat->A,&v);CHKERRQ(ierr);
276   ierr = MatDenseGetLDA(mat->A,&lda);CHKERRQ(ierr);
277   for (i=0; i<Ncols; i++) {
278     const PetscScalar *av = v + lda*icol[i];
279     for (j=0; j<nrows; j++) {
280       *bv++ = av[irow[j] - rstart];
281     }
282   }
283   ierr = MatDenseRestoreArrayRead(mat->A,&v);CHKERRQ(ierr);
284   ierr = MatDenseRestoreArray(newmatd->A,&bv);CHKERRQ(ierr);
285 
286   /* Assemble the matrices so that the correct flags are set */
287   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
288   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
289 
290   /* Free work space */
291   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
292   ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr);
293   ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
294   *B   = newmat;
295   PetscFunctionReturn(0);
296 }
297 
298 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar **array)
299 {
300   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
301   PetscErrorCode ierr;
302 
303   PetscFunctionBegin;
304   ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr);
305   PetscFunctionReturn(0);
306 }
307 
308 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar **array)
309 {
310   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
311   PetscErrorCode ierr;
312 
313   PetscFunctionBegin;
314   ierr = MatDenseRestoreArrayRead(a->A,array);CHKERRQ(ierr);
315   PetscFunctionReturn(0);
316 }
317 
318 PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A,PetscScalar **array)
319 {
320   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
321   PetscErrorCode ierr;
322 
323   PetscFunctionBegin;
324   ierr = MatDenseRestoreArrayWrite(a->A,array);CHKERRQ(ierr);
325   PetscFunctionReturn(0);
326 }
327 
328 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
329 {
330   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
331   PetscErrorCode ierr;
332   PetscInt       nstash,reallocs;
333 
334   PetscFunctionBegin;
335   if (mdn->donotstash || mat->nooffprocentries) return(0);
336 
337   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
338   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
339   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
340   PetscFunctionReturn(0);
341 }
342 
343 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
344 {
345   Mat_MPIDense   *mdn=(Mat_MPIDense*)mat->data;
346   PetscErrorCode ierr;
347   PetscInt       i,*row,*col,flg,j,rstart,ncols;
348   PetscMPIInt    n;
349   PetscScalar    *val;
350 
351   PetscFunctionBegin;
352   if (!mdn->donotstash && !mat->nooffprocentries) {
353     /*  wait on receives */
354     while (1) {
355       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
356       if (!flg) break;
357 
358       for (i=0; i<n;) {
359         /* Now identify the consecutive vals belonging to the same row */
360         for (j=i,rstart=row[j]; j<n; j++) {
361           if (row[j] != rstart) break;
362         }
363         if (j < n) ncols = j-i;
364         else       ncols = n-i;
365         /* Now assemble all these values with a single function call */
366         ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
367         i    = j;
368       }
369     }
370     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
371   }
372 
373   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
374   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
375 
376   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
377     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
378   }
379   PetscFunctionReturn(0);
380 }
381 
382 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
383 {
384   PetscErrorCode ierr;
385   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
386 
387   PetscFunctionBegin;
388   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
389   PetscFunctionReturn(0);
390 }
391 
392 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt n,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
393 {
394   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
395   PetscErrorCode    ierr;
396   PetscInt          i,len,*lrows;
397 
398   PetscFunctionBegin;
399   /* get locally owned rows */
400   ierr = PetscLayoutMapLocal(A->rmap,n,rows,&len,&lrows,NULL);CHKERRQ(ierr);
401   /* fix right hand side if needed */
402   if (x && b) {
403     const PetscScalar *xx;
404     PetscScalar       *bb;
405 
406     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
407     ierr = VecGetArrayWrite(b, &bb);CHKERRQ(ierr);
408     for (i=0;i<len;++i) bb[lrows[i]] = diag*xx[lrows[i]];
409     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
410     ierr = VecRestoreArrayWrite(b, &bb);CHKERRQ(ierr);
411   }
412   ierr = MatZeroRows(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
413   if (diag != 0.0) {
414     Vec d;
415 
416     ierr = MatCreateVecs(A,NULL,&d);CHKERRQ(ierr);
417     ierr = VecSet(d,diag);CHKERRQ(ierr);
418     ierr = MatDiagonalSet(A,d,INSERT_VALUES);CHKERRQ(ierr);
419     ierr = VecDestroy(&d);CHKERRQ(ierr);
420   }
421   ierr = PetscFree(lrows);CHKERRQ(ierr);
422   PetscFunctionReturn(0);
423 }
424 
425 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec);
426 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec);
427 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec);
428 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec);
429 
430 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
431 {
432   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
433   PetscErrorCode    ierr;
434   const PetscScalar *ax;
435   PetscScalar       *ay;
436 
437   PetscFunctionBegin;
438   ierr = VecGetArrayReadInPlace(xx,&ax);CHKERRQ(ierr);
439   ierr = VecGetArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr);
440   ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr);
441   ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr);
442   ierr = VecRestoreArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr);
443   ierr = VecRestoreArrayReadInPlace(xx,&ax);CHKERRQ(ierr);
444   ierr = (*mdn->A->ops->mult)(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
445   PetscFunctionReturn(0);
446 }
447 
448 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
449 {
450   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
451   PetscErrorCode    ierr;
452   const PetscScalar *ax;
453   PetscScalar       *ay;
454 
455   PetscFunctionBegin;
456   ierr = VecGetArrayReadInPlace(xx,&ax);CHKERRQ(ierr);
457   ierr = VecGetArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr);
458   ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr);
459   ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr);
460   ierr = VecRestoreArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr);
461   ierr = VecRestoreArrayReadInPlace(xx,&ax);CHKERRQ(ierr);
462   ierr = (*mdn->A->ops->multadd)(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
463   PetscFunctionReturn(0);
464 }
465 
466 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
467 {
468   Mat_MPIDense      *a = (Mat_MPIDense*)A->data;
469   PetscErrorCode    ierr;
470   const PetscScalar *ax;
471   PetscScalar       *ay;
472 
473   PetscFunctionBegin;
474   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
475   ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr);
476   ierr = VecGetArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr);
477   ierr = VecGetArrayInPlace(yy,&ay);CHKERRQ(ierr);
478   ierr = PetscSFReduceBegin(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr);
479   ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr);
480   ierr = VecRestoreArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr);
481   ierr = VecRestoreArrayInPlace(yy,&ay);CHKERRQ(ierr);
482   PetscFunctionReturn(0);
483 }
484 
485 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
486 {
487   Mat_MPIDense      *a = (Mat_MPIDense*)A->data;
488   PetscErrorCode    ierr;
489   const PetscScalar *ax;
490   PetscScalar       *ay;
491 
492   PetscFunctionBegin;
493   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
494   ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr);
495   ierr = VecGetArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr);
496   ierr = VecGetArrayInPlace(zz,&ay);CHKERRQ(ierr);
497   ierr = PetscSFReduceBegin(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr);
498   ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr);
499   ierr = VecRestoreArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr);
500   ierr = VecRestoreArrayInPlace(zz,&ay);CHKERRQ(ierr);
501   PetscFunctionReturn(0);
502 }
503 
504 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
505 {
506   Mat_MPIDense      *a    = (Mat_MPIDense*)A->data;
507   PetscErrorCode    ierr;
508   PetscInt          lda,len,i,n,m = A->rmap->n,radd;
509   PetscScalar       *x,zero = 0.0;
510   const PetscScalar *av;
511 
512   PetscFunctionBegin;
513   ierr = VecSet(v,zero);CHKERRQ(ierr);
514   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
515   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
516   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
517   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
518   radd = A->rmap->rstart*m;
519   ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr);
520   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
521   for (i=0; i<len; i++) {
522     x[i] = av[radd + i*lda + i];
523   }
524   ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr);
525   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
526   PetscFunctionReturn(0);
527 }
528 
529 PetscErrorCode MatDestroy_MPIDense(Mat mat)
530 {
531   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
532   PetscErrorCode ierr;
533 
534   PetscFunctionBegin;
535 #if defined(PETSC_USE_LOG)
536   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
537 #endif
538   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
539   ierr = MatDestroy(&mdn->A);CHKERRQ(ierr);
540   ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr);
541   ierr = PetscSFDestroy(&mdn->Mvctx);CHKERRQ(ierr);
542   ierr = VecDestroy(&mdn->cvec);CHKERRQ(ierr);
543 
544   ierr = PetscFree(mat->data);CHKERRQ(ierr);
545   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
546 
547   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL);CHKERRQ(ierr);
548   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr);
549   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr);
550   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL);CHKERRQ(ierr);
551   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL);CHKERRQ(ierr);
552   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",NULL);CHKERRQ(ierr);
553   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",NULL);CHKERRQ(ierr);
554   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);CHKERRQ(ierr);
555   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);CHKERRQ(ierr);
556   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",NULL);CHKERRQ(ierr);
557 #if defined(PETSC_HAVE_ELEMENTAL)
558   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr);
559 #endif
560   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr);
561   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
562   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",NULL);CHKERRQ(ierr);
563   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
564   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
565   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_nest_mpidense_C",NULL);CHKERRQ(ierr);
566   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_nest_mpidense_C",NULL);CHKERRQ(ierr);
567   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
568   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
569   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL);CHKERRQ(ierr);
570   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL);CHKERRQ(ierr);
571   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",NULL);CHKERRQ(ierr);
572   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",NULL);CHKERRQ(ierr);
573   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",NULL);CHKERRQ(ierr);
574   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",NULL);CHKERRQ(ierr);
575   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",NULL);CHKERRQ(ierr);
576   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",NULL);CHKERRQ(ierr);
577   PetscFunctionReturn(0);
578 }
579 
580 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat,PetscViewer);
581 
582 #include <petscdraw.h>
583 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
584 {
585   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
586   PetscErrorCode    ierr;
587   PetscMPIInt       rank = mdn->rank;
588   PetscViewerType   vtype;
589   PetscBool         iascii,isdraw;
590   PetscViewer       sviewer;
591   PetscViewerFormat format;
592 
593   PetscFunctionBegin;
594   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
595   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
596   if (iascii) {
597     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
598     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
599     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
600       MatInfo info;
601       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
602       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
603       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
604       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
605       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
606       ierr = PetscSFView(mdn->Mvctx,viewer);CHKERRQ(ierr);
607       PetscFunctionReturn(0);
608     } else if (format == PETSC_VIEWER_ASCII_INFO) {
609       PetscFunctionReturn(0);
610     }
611   } else if (isdraw) {
612     PetscDraw draw;
613     PetscBool isnull;
614 
615     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
616     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
617     if (isnull) PetscFunctionReturn(0);
618   }
619 
620   {
621     /* assemble the entire matrix onto first processor. */
622     Mat         A;
623     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
624     PetscInt    *cols;
625     PetscScalar *vals;
626 
627     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
628     if (!rank) {
629       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
630     } else {
631       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
632     }
633     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
634     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
635     ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr);
636     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
637 
638     /* Copy the matrix ... This isn't the most efficient means,
639        but it's quick for now */
640     A->insertmode = INSERT_VALUES;
641 
642     row = mat->rmap->rstart;
643     m   = mdn->A->rmap->n;
644     for (i=0; i<m; i++) {
645       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
646       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
647       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
648       row++;
649     }
650 
651     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
652     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
653     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
654     if (!rank) {
655       ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
656       ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
657     }
658     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
659     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
660     ierr = MatDestroy(&A);CHKERRQ(ierr);
661   }
662   PetscFunctionReturn(0);
663 }
664 
665 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
666 {
667   PetscErrorCode ierr;
668   PetscBool      iascii,isbinary,isdraw,issocket;
669 
670   PetscFunctionBegin;
671   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
672   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
673   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
674   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
675 
676   if (iascii || issocket || isdraw) {
677     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
678   } else if (isbinary) {
679     ierr = MatView_Dense_Binary(mat,viewer);CHKERRQ(ierr);
680   }
681   PetscFunctionReturn(0);
682 }
683 
684 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
685 {
686   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
687   Mat            mdn  = mat->A;
688   PetscErrorCode ierr;
689   PetscLogDouble isend[5],irecv[5];
690 
691   PetscFunctionBegin;
692   info->block_size = 1.0;
693 
694   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
695 
696   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
697   isend[3] = info->memory;  isend[4] = info->mallocs;
698   if (flag == MAT_LOCAL) {
699     info->nz_used      = isend[0];
700     info->nz_allocated = isend[1];
701     info->nz_unneeded  = isend[2];
702     info->memory       = isend[3];
703     info->mallocs      = isend[4];
704   } else if (flag == MAT_GLOBAL_MAX) {
705     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
706 
707     info->nz_used      = irecv[0];
708     info->nz_allocated = irecv[1];
709     info->nz_unneeded  = irecv[2];
710     info->memory       = irecv[3];
711     info->mallocs      = irecv[4];
712   } else if (flag == MAT_GLOBAL_SUM) {
713     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
714 
715     info->nz_used      = irecv[0];
716     info->nz_allocated = irecv[1];
717     info->nz_unneeded  = irecv[2];
718     info->memory       = irecv[3];
719     info->mallocs      = irecv[4];
720   }
721   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
722   info->fill_ratio_needed = 0;
723   info->factor_mallocs    = 0;
724   PetscFunctionReturn(0);
725 }
726 
727 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
728 {
729   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
730   PetscErrorCode ierr;
731 
732   PetscFunctionBegin;
733   switch (op) {
734   case MAT_NEW_NONZERO_LOCATIONS:
735   case MAT_NEW_NONZERO_LOCATION_ERR:
736   case MAT_NEW_NONZERO_ALLOCATION_ERR:
737     MatCheckPreallocated(A,1);
738     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
739     break;
740   case MAT_ROW_ORIENTED:
741     MatCheckPreallocated(A,1);
742     a->roworiented = flg;
743     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
744     break;
745   case MAT_NEW_DIAGONALS:
746   case MAT_KEEP_NONZERO_PATTERN:
747   case MAT_USE_HASH_TABLE:
748   case MAT_SORTED_FULL:
749     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
750     break;
751   case MAT_IGNORE_OFF_PROC_ENTRIES:
752     a->donotstash = flg;
753     break;
754   case MAT_SYMMETRIC:
755   case MAT_STRUCTURALLY_SYMMETRIC:
756   case MAT_HERMITIAN:
757   case MAT_SYMMETRY_ETERNAL:
758   case MAT_IGNORE_LOWER_TRIANGULAR:
759   case MAT_IGNORE_ZERO_ENTRIES:
760     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
761     break;
762   default:
763     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
764   }
765   PetscFunctionReturn(0);
766 }
767 
768 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
769 {
770   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
771   const PetscScalar *l;
772   PetscScalar       x,*v,*vv,*r;
773   PetscErrorCode    ierr;
774   PetscInt          i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n,lda;
775 
776   PetscFunctionBegin;
777   ierr = MatDenseGetArray(mdn->A,&vv);CHKERRQ(ierr);
778   ierr = MatDenseGetLDA(mdn->A,&lda);CHKERRQ(ierr);
779   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
780   if (ll) {
781     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
782     if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %D != %D", s2a, s2);
783     ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr);
784     for (i=0; i<m; i++) {
785       x = l[i];
786       v = vv + i;
787       for (j=0; j<n; j++) { (*v) *= x; v+= lda;}
788     }
789     ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr);
790     ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr);
791   }
792   if (rr) {
793     const PetscScalar *ar;
794 
795     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
796     if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
797     ierr = VecGetArrayRead(rr,&ar);CHKERRQ(ierr);
798     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
799     ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ar,r);CHKERRQ(ierr);
800     ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ar,r);CHKERRQ(ierr);
801     ierr = VecRestoreArrayRead(rr,&ar);CHKERRQ(ierr);
802     for (i=0; i<n; i++) {
803       x = r[i];
804       v = vv + i*lda;
805       for (j=0; j<m; j++) (*v++) *= x;
806     }
807     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
808     ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr);
809   }
810   ierr = MatDenseRestoreArray(mdn->A,&vv);CHKERRQ(ierr);
811   PetscFunctionReturn(0);
812 }
813 
814 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
815 {
816   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
817   PetscErrorCode    ierr;
818   PetscInt          i,j;
819   PetscReal         sum = 0.0;
820   const PetscScalar *av,*v;
821 
822   PetscFunctionBegin;
823   ierr = MatDenseGetArrayRead(mdn->A,&av);CHKERRQ(ierr);
824   v    = av;
825   if (mdn->size == 1) {
826     ierr =  MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
827   } else {
828     if (type == NORM_FROBENIUS) {
829       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
830         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
831       }
832       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
833       *nrm = PetscSqrtReal(*nrm);
834       ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr);
835     } else if (type == NORM_1) {
836       PetscReal *tmp,*tmp2;
837       ierr = PetscCalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr);
838       *nrm = 0.0;
839       v    = av;
840       for (j=0; j<mdn->A->cmap->n; j++) {
841         for (i=0; i<mdn->A->rmap->n; i++) {
842           tmp[j] += PetscAbsScalar(*v);  v++;
843         }
844       }
845       ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
846       for (j=0; j<A->cmap->N; j++) {
847         if (tmp2[j] > *nrm) *nrm = tmp2[j];
848       }
849       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
850       ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr);
851     } else if (type == NORM_INFINITY) { /* max row norm */
852       PetscReal ntemp;
853       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
854       ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
855     } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
856   }
857   ierr = MatDenseRestoreArrayRead(mdn->A,&av);CHKERRQ(ierr);
858   PetscFunctionReturn(0);
859 }
860 
861 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
862 {
863   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
864   Mat            B;
865   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
866   PetscErrorCode ierr;
867   PetscInt       j,i,lda;
868   PetscScalar    *v;
869 
870   PetscFunctionBegin;
871   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
872     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
873     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
874     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
875     ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr);
876   } else B = *matout;
877 
878   m    = a->A->rmap->n; n = a->A->cmap->n;
879   ierr = MatDenseGetArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr);
880   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
881   ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr);
882   for (i=0; i<m; i++) rwork[i] = rstart + i;
883   for (j=0; j<n; j++) {
884     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
885     v   += lda;
886   }
887   ierr = MatDenseRestoreArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr);
888   ierr = PetscFree(rwork);CHKERRQ(ierr);
889   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
890   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
891   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
892     *matout = B;
893   } else {
894     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
895   }
896   PetscFunctionReturn(0);
897 }
898 
899 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
900 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
901 
902 PetscErrorCode MatSetUp_MPIDense(Mat A)
903 {
904   PetscErrorCode ierr;
905 
906   PetscFunctionBegin;
907   ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
908   ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
909   if (!A->preallocated) {
910     ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr);
911   }
912   PetscFunctionReturn(0);
913 }
914 
915 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
916 {
917   PetscErrorCode ierr;
918   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
919 
920   PetscFunctionBegin;
921   ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr);
922   PetscFunctionReturn(0);
923 }
924 
925 PetscErrorCode MatConjugate_MPIDense(Mat mat)
926 {
927   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
928   PetscErrorCode ierr;
929 
930   PetscFunctionBegin;
931   ierr = MatConjugate(a->A);CHKERRQ(ierr);
932   PetscFunctionReturn(0);
933 }
934 
935 PetscErrorCode MatRealPart_MPIDense(Mat A)
936 {
937   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
938   PetscErrorCode ierr;
939 
940   PetscFunctionBegin;
941   ierr = MatRealPart(a->A);CHKERRQ(ierr);
942   PetscFunctionReturn(0);
943 }
944 
945 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
946 {
947   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
948   PetscErrorCode ierr;
949 
950   PetscFunctionBegin;
951   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
952   PetscFunctionReturn(0);
953 }
954 
955 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col)
956 {
957   PetscErrorCode ierr;
958   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
959 
960   PetscFunctionBegin;
961   if (!a->A) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing local matrix");
962   if (!a->A->ops->getcolumnvector) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing get column operation");
963   ierr = (*a->A->ops->getcolumnvector)(a->A,v,col);CHKERRQ(ierr);
964   PetscFunctionReturn(0);
965 }
966 
967 PETSC_INTERN PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*);
968 
969 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms)
970 {
971   PetscErrorCode ierr;
972   PetscInt       i,n;
973   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
974   PetscReal      *work;
975 
976   PetscFunctionBegin;
977   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
978   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
979   ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr);
980   if (type == NORM_2) {
981     for (i=0; i<n; i++) work[i] *= work[i];
982   }
983   if (type == NORM_INFINITY) {
984     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
985   } else {
986     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
987   }
988   ierr = PetscFree(work);CHKERRQ(ierr);
989   if (type == NORM_2) {
990     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
991   }
992   PetscFunctionReturn(0);
993 }
994 
995 #if defined(PETSC_HAVE_CUDA)
996 static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
997 {
998   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
999   PetscErrorCode ierr;
1000   PetscInt       lda;
1001 
1002   PetscFunctionBegin;
1003   if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first");
1004   if (!a->cvec) {
1005     ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1006   }
1007   a->vecinuse = col + 1;
1008   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1009   ierr = MatDenseCUDAGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1010   ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1011   *v   = a->cvec;
1012   PetscFunctionReturn(0);
1013 }
1014 
1015 static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1016 {
1017   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1018   PetscErrorCode ierr;
1019 
1020   PetscFunctionBegin;
1021   if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec first");
1022   if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1023   a->vecinuse = 0;
1024   ierr = MatDenseCUDARestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1025   ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr);
1026   *v   = NULL;
1027   PetscFunctionReturn(0);
1028 }
1029 
1030 static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1031 {
1032   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1033   PetscInt       lda;
1034   PetscErrorCode ierr;
1035 
1036   PetscFunctionBegin;
1037   if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first");
1038   if (!a->cvec) {
1039     ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1040   }
1041   a->vecinuse = col + 1;
1042   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1043   ierr = MatDenseCUDAGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
1044   ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1045   ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr);
1046   *v   = a->cvec;
1047   PetscFunctionReturn(0);
1048 }
1049 
1050 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1051 {
1052   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1053   PetscErrorCode ierr;
1054 
1055   PetscFunctionBegin;
1056   if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec first");
1057   if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1058   a->vecinuse = 0;
1059   ierr = MatDenseCUDARestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
1060   ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr);
1061   ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr);
1062   *v   = NULL;
1063   PetscFunctionReturn(0);
1064 }
1065 
1066 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1067 {
1068   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1069   PetscErrorCode ierr;
1070   PetscInt       lda;
1071 
1072   PetscFunctionBegin;
1073   if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first");
1074   if (!a->cvec) {
1075     ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1076   }
1077   a->vecinuse = col + 1;
1078   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1079   ierr = MatDenseCUDAGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1080   ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1081   *v   = a->cvec;
1082   PetscFunctionReturn(0);
1083 }
1084 
1085 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1086 {
1087   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1088   PetscErrorCode ierr;
1089 
1090   PetscFunctionBegin;
1091   if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec first");
1092   if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1093   a->vecinuse = 0;
1094   ierr = MatDenseCUDARestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1095   ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr);
1096   *v   = NULL;
1097   PetscFunctionReturn(0);
1098 }
1099 
1100 static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1101 {
1102   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1103   PetscErrorCode ierr;
1104 
1105   PetscFunctionBegin;
1106   if (l->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first");
1107   ierr = MatDenseCUDAPlaceArray(l->A,a);CHKERRQ(ierr);
1108   PetscFunctionReturn(0);
1109 }
1110 
1111 static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A)
1112 {
1113   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1114   PetscErrorCode ierr;
1115 
1116   PetscFunctionBegin;
1117   if (l->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first");
1118   ierr = MatDenseCUDAResetArray(l->A);CHKERRQ(ierr);
1119   PetscFunctionReturn(0);
1120 }
1121 
1122 static PetscErrorCode MatDenseCUDAReplaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1123 {
1124   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1125   PetscErrorCode ierr;
1126 
1127   PetscFunctionBegin;
1128   if (l->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first");
1129   ierr = MatDenseCUDAReplaceArray(l->A,a);CHKERRQ(ierr);
1130   PetscFunctionReturn(0);
1131 }
1132 
1133 static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1134 {
1135   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1136   PetscErrorCode ierr;
1137 
1138   PetscFunctionBegin;
1139   ierr = MatDenseCUDAGetArrayWrite(l->A,a);CHKERRQ(ierr);
1140   PetscFunctionReturn(0);
1141 }
1142 
1143 static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1144 {
1145   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1146   PetscErrorCode ierr;
1147 
1148   PetscFunctionBegin;
1149   ierr = MatDenseCUDARestoreArrayWrite(l->A,a);CHKERRQ(ierr);
1150   PetscFunctionReturn(0);
1151 }
1152 
1153 static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1154 {
1155   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1156   PetscErrorCode ierr;
1157 
1158   PetscFunctionBegin;
1159   ierr = MatDenseCUDAGetArrayRead(l->A,a);CHKERRQ(ierr);
1160   PetscFunctionReturn(0);
1161 }
1162 
1163 static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1164 {
1165   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1166   PetscErrorCode ierr;
1167 
1168   PetscFunctionBegin;
1169   ierr = MatDenseCUDARestoreArrayRead(l->A,a);CHKERRQ(ierr);
1170   PetscFunctionReturn(0);
1171 }
1172 
1173 static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1174 {
1175   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1176   PetscErrorCode ierr;
1177 
1178   PetscFunctionBegin;
1179   ierr = MatDenseCUDAGetArray(l->A,a);CHKERRQ(ierr);
1180   PetscFunctionReturn(0);
1181 }
1182 
1183 static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1184 {
1185   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1186   PetscErrorCode ierr;
1187 
1188   PetscFunctionBegin;
1189   ierr = MatDenseCUDARestoreArray(l->A,a);CHKERRQ(ierr);
1190   PetscFunctionReturn(0);
1191 }
1192 
1193 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat,PetscInt,Vec*);
1194 static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat,PetscInt,Vec*);
1195 static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat,PetscInt,Vec*);
1196 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat,PetscInt,Vec*);
1197 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat,PetscInt,Vec*);
1198 static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat,PetscInt,Vec*);
1199 
1200 static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat,PetscBool bind)
1201 {
1202   Mat_MPIDense   *d = (Mat_MPIDense*)mat->data;
1203   PetscErrorCode ierr;
1204 
1205   PetscFunctionBegin;
1206   if (d->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first");
1207   if (d->A) {
1208     ierr = MatBindToCPU(d->A,bind);CHKERRQ(ierr);
1209   }
1210   mat->boundtocpu = bind;
1211   if (!bind) {
1212     PetscBool iscuda;
1213 
1214     ierr = PetscObjectTypeCompare((PetscObject)d->cvec,VECMPICUDA,&iscuda);CHKERRQ(ierr);
1215     if (!iscuda) {
1216       ierr = VecDestroy(&d->cvec);CHKERRQ(ierr);
1217     }
1218     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDenseCUDA);CHKERRQ(ierr);
1219     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDenseCUDA);CHKERRQ(ierr);
1220     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr);
1221     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr);
1222     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr);
1223     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr);
1224   } else {
1225     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr);
1226     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr);
1227     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr);
1228     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr);
1229     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr);
1230     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr);
1231   }
1232   PetscFunctionReturn(0);
1233 }
1234 
1235 PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data)
1236 {
1237   Mat_MPIDense   *d = (Mat_MPIDense*)A->data;
1238   PetscErrorCode ierr;
1239   PetscBool      iscuda;
1240 
1241   PetscFunctionBegin;
1242   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1243   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr);
1244   if (!iscuda) PetscFunctionReturn(0);
1245   ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
1246   ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
1247   if (!d->A) {
1248     ierr = MatCreate(PETSC_COMM_SELF,&d->A);CHKERRQ(ierr);
1249     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)d->A);CHKERRQ(ierr);
1250     ierr = MatSetSizes(d->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N);CHKERRQ(ierr);
1251   }
1252   ierr = MatSetType(d->A,MATSEQDENSECUDA);CHKERRQ(ierr);
1253   ierr = MatSeqDenseCUDASetPreallocation(d->A,d_data);CHKERRQ(ierr);
1254   A->preallocated = PETSC_TRUE;
1255   PetscFunctionReturn(0);
1256 }
1257 #endif
1258 
1259 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1260 {
1261   Mat_MPIDense   *d = (Mat_MPIDense*)x->data;
1262   PetscErrorCode ierr;
1263 
1264   PetscFunctionBegin;
1265   ierr = MatSetRandom(d->A,rctx);CHKERRQ(ierr);
1266   PetscFunctionReturn(0);
1267 }
1268 
1269 PETSC_INTERN PetscErrorCode MatMatMultNumeric_MPIDense(Mat A,Mat,Mat);
1270 
1271 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool  *missing,PetscInt *d)
1272 {
1273   PetscFunctionBegin;
1274   *missing = PETSC_FALSE;
1275   PetscFunctionReturn(0);
1276 }
1277 
1278 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat);
1279 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);
1280 
1281 /* -------------------------------------------------------------------*/
1282 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1283                                         MatGetRow_MPIDense,
1284                                         MatRestoreRow_MPIDense,
1285                                         MatMult_MPIDense,
1286                                 /*  4*/ MatMultAdd_MPIDense,
1287                                         MatMultTranspose_MPIDense,
1288                                         MatMultTransposeAdd_MPIDense,
1289                                         0,
1290                                         0,
1291                                         0,
1292                                 /* 10*/ 0,
1293                                         0,
1294                                         0,
1295                                         0,
1296                                         MatTranspose_MPIDense,
1297                                 /* 15*/ MatGetInfo_MPIDense,
1298                                         MatEqual_MPIDense,
1299                                         MatGetDiagonal_MPIDense,
1300                                         MatDiagonalScale_MPIDense,
1301                                         MatNorm_MPIDense,
1302                                 /* 20*/ MatAssemblyBegin_MPIDense,
1303                                         MatAssemblyEnd_MPIDense,
1304                                         MatSetOption_MPIDense,
1305                                         MatZeroEntries_MPIDense,
1306                                 /* 24*/ MatZeroRows_MPIDense,
1307                                         0,
1308                                         0,
1309                                         0,
1310                                         0,
1311                                 /* 29*/ MatSetUp_MPIDense,
1312                                         0,
1313                                         0,
1314                                         MatGetDiagonalBlock_MPIDense,
1315                                         0,
1316                                 /* 34*/ MatDuplicate_MPIDense,
1317                                         0,
1318                                         0,
1319                                         0,
1320                                         0,
1321                                 /* 39*/ MatAXPY_MPIDense,
1322                                         MatCreateSubMatrices_MPIDense,
1323                                         0,
1324                                         MatGetValues_MPIDense,
1325                                         0,
1326                                 /* 44*/ 0,
1327                                         MatScale_MPIDense,
1328                                         MatShift_Basic,
1329                                         0,
1330                                         0,
1331                                 /* 49*/ MatSetRandom_MPIDense,
1332                                         0,
1333                                         0,
1334                                         0,
1335                                         0,
1336                                 /* 54*/ 0,
1337                                         0,
1338                                         0,
1339                                         0,
1340                                         0,
1341                                 /* 59*/ MatCreateSubMatrix_MPIDense,
1342                                         MatDestroy_MPIDense,
1343                                         MatView_MPIDense,
1344                                         0,
1345                                         0,
1346                                 /* 64*/ 0,
1347                                         0,
1348                                         0,
1349                                         0,
1350                                         0,
1351                                 /* 69*/ 0,
1352                                         0,
1353                                         0,
1354                                         0,
1355                                         0,
1356                                 /* 74*/ 0,
1357                                         0,
1358                                         0,
1359                                         0,
1360                                         0,
1361                                 /* 79*/ 0,
1362                                         0,
1363                                         0,
1364                                         0,
1365                                 /* 83*/ MatLoad_MPIDense,
1366                                         0,
1367                                         0,
1368                                         0,
1369                                         0,
1370                                         0,
1371                                 /* 89*/ 0,
1372                                         0,
1373                                         MatMatMultNumeric_MPIDense,
1374                                         0,
1375                                         0,
1376                                 /* 94*/ 0,
1377                                         0,
1378                                         0,
1379                                         MatMatTransposeMultNumeric_MPIDense_MPIDense,
1380                                         0,
1381                                 /* 99*/ MatProductSetFromOptions_MPIDense,
1382                                         0,
1383                                         0,
1384                                         MatConjugate_MPIDense,
1385                                         0,
1386                                 /*104*/ 0,
1387                                         MatRealPart_MPIDense,
1388                                         MatImaginaryPart_MPIDense,
1389                                         0,
1390                                         0,
1391                                 /*109*/ 0,
1392                                         0,
1393                                         0,
1394                                         MatGetColumnVector_MPIDense,
1395                                         MatMissingDiagonal_MPIDense,
1396                                 /*114*/ 0,
1397                                         0,
1398                                         0,
1399                                         0,
1400                                         0,
1401                                 /*119*/ 0,
1402                                         0,
1403                                         0,
1404                                         0,
1405                                         0,
1406                                 /*124*/ 0,
1407                                         MatGetColumnNorms_MPIDense,
1408                                         0,
1409                                         0,
1410                                         0,
1411                                 /*129*/ 0,
1412                                         0,
1413                                         0,
1414                                         MatTransposeMatMultNumeric_MPIDense_MPIDense,
1415                                         0,
1416                                 /*134*/ 0,
1417                                         0,
1418                                         0,
1419                                         0,
1420                                         0,
1421                                 /*139*/ 0,
1422                                         0,
1423                                         0,
1424                                         0,
1425                                         0,
1426                                         MatCreateMPIMatConcatenateSeqMat_MPIDense,
1427                                 /*145*/ 0,
1428                                         0,
1429                                         0
1430 };
1431 
1432 PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1433 {
1434   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
1435   PetscBool      iscuda;
1436   PetscErrorCode ierr;
1437 
1438   PetscFunctionBegin;
1439   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
1440   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
1441   if (!a->A) {
1442     ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1443     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1444     ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr);
1445   }
1446   ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr);
1447   ierr = MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE);CHKERRQ(ierr);
1448   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1449   mat->preallocated = PETSC_TRUE;
1450   PetscFunctionReturn(0);
1451 }
1452 
1453 #if defined(PETSC_HAVE_ELEMENTAL)
1454 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1455 {
1456   Mat            mat_elemental;
1457   PetscErrorCode ierr;
1458   PetscScalar    *v;
1459   PetscInt       m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols;
1460 
1461   PetscFunctionBegin;
1462   if (reuse == MAT_REUSE_MATRIX) {
1463     mat_elemental = *newmat;
1464     ierr = MatZeroEntries(*newmat);CHKERRQ(ierr);
1465   } else {
1466     ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr);
1467     ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1468     ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr);
1469     ierr = MatSetUp(mat_elemental);CHKERRQ(ierr);
1470     ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
1471   }
1472 
1473   ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr);
1474   for (i=0; i<N; i++) cols[i] = i;
1475   for (i=0; i<m; i++) rows[i] = rstart + i;
1476 
1477   /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1478   ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr);
1479   ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr);
1480   ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1481   ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1482   ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr);
1483   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1484 
1485   if (reuse == MAT_INPLACE_MATRIX) {
1486     ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr);
1487   } else {
1488     *newmat = mat_elemental;
1489   }
1490   PetscFunctionReturn(0);
1491 }
1492 #endif
1493 
1494 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals)
1495 {
1496   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1497   PetscErrorCode ierr;
1498 
1499   PetscFunctionBegin;
1500   ierr = MatDenseGetColumn(mat->A,col,vals);CHKERRQ(ierr);
1501   PetscFunctionReturn(0);
1502 }
1503 
1504 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals)
1505 {
1506   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1507   PetscErrorCode ierr;
1508 
1509   PetscFunctionBegin;
1510   ierr = MatDenseRestoreColumn(mat->A,vals);CHKERRQ(ierr);
1511   PetscFunctionReturn(0);
1512 }
1513 
1514 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
1515 {
1516   PetscErrorCode ierr;
1517   Mat_MPIDense   *mat;
1518   PetscInt       m,nloc,N;
1519 
1520   PetscFunctionBegin;
1521   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
1522   ierr = MatGetLocalSize(inmat,NULL,&nloc);CHKERRQ(ierr);
1523   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1524     PetscInt sum;
1525 
1526     if (n == PETSC_DECIDE) {
1527       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
1528     }
1529     /* Check sum(n) = N */
1530     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
1531     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
1532 
1533     ierr = MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat);CHKERRQ(ierr);
1534   }
1535 
1536   /* numeric phase */
1537   mat = (Mat_MPIDense*)(*outmat)->data;
1538   ierr = MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1539   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1540   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1541   PetscFunctionReturn(0);
1542 }
1543 
1544 #if defined(PETSC_HAVE_CUDA)
1545 PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M,MatType type,MatReuse reuse,Mat *newmat)
1546 {
1547   Mat            B;
1548   Mat_MPIDense   *m;
1549   PetscErrorCode ierr;
1550 
1551   PetscFunctionBegin;
1552   if (reuse == MAT_INITIAL_MATRIX) {
1553     ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr);
1554   } else if (reuse == MAT_REUSE_MATRIX) {
1555     ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1556   }
1557 
1558   B    = *newmat;
1559   ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_TRUE);CHKERRQ(ierr);
1560   ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr);
1561   ierr = PetscStrallocpy(VECSTANDARD,&B->defaultvectype);CHKERRQ(ierr);
1562   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSE);CHKERRQ(ierr);
1563   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",NULL);CHKERRQ(ierr);
1564   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",NULL);CHKERRQ(ierr);
1565   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",NULL);CHKERRQ(ierr);
1566   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",NULL);CHKERRQ(ierr);
1567   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",NULL);CHKERRQ(ierr);
1568   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",NULL);CHKERRQ(ierr);
1569   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",NULL);CHKERRQ(ierr);
1570   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",NULL);CHKERRQ(ierr);
1571   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",NULL);CHKERRQ(ierr);
1572   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",NULL);CHKERRQ(ierr);
1573   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",NULL);CHKERRQ(ierr);
1574   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",NULL);CHKERRQ(ierr);
1575   m    = (Mat_MPIDense*)(B)->data;
1576   if (m->A) {
1577     ierr = MatConvert(m->A,MATSEQDENSE,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr);
1578     ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr);
1579   }
1580   B->ops->bindtocpu = NULL;
1581   B->offloadmask    = PETSC_OFFLOAD_CPU;
1582   PetscFunctionReturn(0);
1583 }
1584 
1585 PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M,MatType type,MatReuse reuse,Mat *newmat)
1586 {
1587   Mat            B;
1588   Mat_MPIDense   *m;
1589   PetscErrorCode ierr;
1590 
1591   PetscFunctionBegin;
1592   if (reuse == MAT_INITIAL_MATRIX) {
1593     ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr);
1594   } else if (reuse == MAT_REUSE_MATRIX) {
1595     ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1596   }
1597 
1598   B    = *newmat;
1599   ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr);
1600   ierr = PetscStrallocpy(VECCUDA,&B->defaultvectype);CHKERRQ(ierr);
1601   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSECUDA);CHKERRQ(ierr);
1602   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",            MatConvert_MPIDenseCUDA_MPIDense);CHKERRQ(ierr);
1603   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
1604   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
1605   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",                        MatDenseCUDAGetArray_MPIDenseCUDA);CHKERRQ(ierr);
1606   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",                    MatDenseCUDAGetArrayRead_MPIDenseCUDA);CHKERRQ(ierr);
1607   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",                   MatDenseCUDAGetArrayWrite_MPIDenseCUDA);CHKERRQ(ierr);
1608   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",                    MatDenseCUDARestoreArray_MPIDenseCUDA);CHKERRQ(ierr);
1609   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",                MatDenseCUDARestoreArrayRead_MPIDenseCUDA);CHKERRQ(ierr);
1610   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",               MatDenseCUDARestoreArrayWrite_MPIDenseCUDA);CHKERRQ(ierr);
1611   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",                      MatDenseCUDAPlaceArray_MPIDenseCUDA);CHKERRQ(ierr);
1612   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",                      MatDenseCUDAResetArray_MPIDenseCUDA);CHKERRQ(ierr);
1613   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",                    MatDenseCUDAReplaceArray_MPIDenseCUDA);CHKERRQ(ierr);
1614   m    = (Mat_MPIDense*)(B)->data;
1615   if (m->A) {
1616     ierr = MatConvert(m->A,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr);
1617     ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr);
1618     B->offloadmask = PETSC_OFFLOAD_BOTH;
1619   } else {
1620     B->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
1621   }
1622   ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_FALSE);CHKERRQ(ierr);
1623 
1624   B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA;
1625   PetscFunctionReturn(0);
1626 }
1627 #endif
1628 
1629 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A,PetscInt col,Vec *v)
1630 {
1631   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1632   PetscErrorCode ierr;
1633   PetscInt       lda;
1634 
1635   PetscFunctionBegin;
1636   if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first");
1637   if (!a->cvec) {
1638     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1639   }
1640   a->vecinuse = col + 1;
1641   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1642   ierr = MatDenseGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1643   ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1644   *v   = a->cvec;
1645   PetscFunctionReturn(0);
1646 }
1647 
1648 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A,PetscInt col,Vec *v)
1649 {
1650   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1651   PetscErrorCode ierr;
1652 
1653   PetscFunctionBegin;
1654   if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec first");
1655   if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1656   a->vecinuse = 0;
1657   ierr = MatDenseRestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1658   ierr = VecResetArray(a->cvec);CHKERRQ(ierr);
1659   *v   = NULL;
1660   PetscFunctionReturn(0);
1661 }
1662 
1663 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v)
1664 {
1665   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1666   PetscErrorCode ierr;
1667   PetscInt       lda;
1668 
1669   PetscFunctionBegin;
1670   if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first");
1671   if (!a->cvec) {
1672     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1673   }
1674   a->vecinuse = col + 1;
1675   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1676   ierr = MatDenseGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
1677   ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1678   ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr);
1679   *v   = a->cvec;
1680   PetscFunctionReturn(0);
1681 }
1682 
1683 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v)
1684 {
1685   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1686   PetscErrorCode ierr;
1687 
1688   PetscFunctionBegin;
1689   if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec first");
1690   if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1691   a->vecinuse = 0;
1692   ierr = MatDenseRestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
1693   ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr);
1694   ierr = VecResetArray(a->cvec);CHKERRQ(ierr);
1695   *v   = NULL;
1696   PetscFunctionReturn(0);
1697 }
1698 
1699 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v)
1700 {
1701   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1702   PetscErrorCode ierr;
1703   PetscInt       lda;
1704 
1705   PetscFunctionBegin;
1706   if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec first");
1707   if (!a->cvec) {
1708     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1709   }
1710   a->vecinuse = col + 1;
1711   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1712   ierr = MatDenseGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1713   ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1714   *v   = a->cvec;
1715   PetscFunctionReturn(0);
1716 }
1717 
1718 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v)
1719 {
1720   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1721   PetscErrorCode ierr;
1722 
1723   PetscFunctionBegin;
1724   if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec first");
1725   if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1726   a->vecinuse = 0;
1727   ierr = MatDenseRestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1728   ierr = VecResetArray(a->cvec);CHKERRQ(ierr);
1729   *v   = NULL;
1730   PetscFunctionReturn(0);
1731 }
1732 
1733 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1734 {
1735   Mat_MPIDense   *a;
1736   PetscErrorCode ierr;
1737 
1738   PetscFunctionBegin;
1739   ierr      = PetscNewLog(mat,&a);CHKERRQ(ierr);
1740   mat->data = (void*)a;
1741   ierr      = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1742 
1743   mat->insertmode = NOT_SET_VALUES;
1744   ierr            = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);CHKERRQ(ierr);
1745   ierr            = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);CHKERRQ(ierr);
1746 
1747   /* build cache for off array entries formed */
1748   a->donotstash = PETSC_FALSE;
1749 
1750   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr);
1751 
1752   /* stuff used for matrix vector multiply */
1753   a->lvec        = 0;
1754   a->Mvctx       = 0;
1755   a->roworiented = PETSC_TRUE;
1756 
1757   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense);CHKERRQ(ierr);
1758   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr);
1759   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr);
1760   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense);CHKERRQ(ierr);
1761   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense);CHKERRQ(ierr);
1762   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",MatDenseGetArrayWrite_MPIDense);CHKERRQ(ierr);
1763   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",MatDenseRestoreArrayWrite_MPIDense);CHKERRQ(ierr);
1764   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);CHKERRQ(ierr);
1765   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);CHKERRQ(ierr);
1766   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",MatDenseReplaceArray_MPIDense);CHKERRQ(ierr);
1767   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr);
1768   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr);
1769   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr);
1770   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr);
1771   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr);
1772   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr);
1773 #if defined(PETSC_HAVE_ELEMENTAL)
1774   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr);
1775 #endif
1776 #if defined(PETSC_HAVE_CUDA)
1777   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpidensecuda_C",MatConvert_MPIDense_MPIDenseCUDA);CHKERRQ(ierr);
1778 #endif
1779   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1780   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
1781   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
1782   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",MatMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1783   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",MatMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1784   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_nest_mpidense_C",MatMatMultSymbolic_Nest_Dense);CHKERRQ(ierr);
1785   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_nest_mpidense_C",MatMatMultNumeric_Nest_Dense);CHKERRQ(ierr);
1786 
1787   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultSymbolic_mpiaij_mpidense_C",MatTransposeMatMultSymbolic_MPIAIJ_MPIDense);CHKERRQ(ierr);
1788   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatTransposeMatMultNumeric_mpiaij_mpidense_C",MatTransposeMatMultNumeric_MPIAIJ_MPIDense);CHKERRQ(ierr);
1789   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense);CHKERRQ(ierr);
1790   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense);CHKERRQ(ierr);
1791   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1792   PetscFunctionReturn(0);
1793 }
1794 
1795 /*MC
1796    MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs.
1797 
1798    Options Database Keys:
1799 . -mat_type mpidensecuda - sets the matrix type to "mpidensecuda" during a call to MatSetFromOptions()
1800 
1801   Level: beginner
1802 
1803 .seealso:
1804 
1805 M*/
1806 #if defined(PETSC_HAVE_CUDA)
1807 PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B)
1808 {
1809   PetscErrorCode ierr;
1810 
1811   PetscFunctionBegin;
1812   ierr = MatCreate_MPIDense(B);CHKERRQ(ierr);
1813   ierr = MatConvert_MPIDense_MPIDenseCUDA(B,MATMPIDENSECUDA,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr);
1814   PetscFunctionReturn(0);
1815 }
1816 #endif
1817 
1818 /*MC
1819    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1820 
1821    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1822    and MATMPIDENSE otherwise.
1823 
1824    Options Database Keys:
1825 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1826 
1827   Level: beginner
1828 
1829 
1830 .seealso: MATSEQDENSE,MATMPIDENSE,MATDENSECUDA
1831 M*/
1832 
1833 /*MC
1834    MATDENSECUDA - MATDENSECUDA = "densecuda" - A matrix type to be used for dense matrices on GPUs.
1835 
1836    This matrix type is identical to MATSEQDENSECUDA when constructed with a single process communicator,
1837    and MATMPIDENSECUDA otherwise.
1838 
1839    Options Database Keys:
1840 . -mat_type densecuda - sets the matrix type to "densecuda" during a call to MatSetFromOptions()
1841 
1842   Level: beginner
1843 
1844 .seealso: MATSEQDENSECUDA,MATMPIDENSECUDA,MATDENSE
1845 M*/
1846 
1847 /*@C
1848    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1849 
1850    Not collective
1851 
1852    Input Parameters:
1853 .  B - the matrix
1854 -  data - optional location of matrix data.  Set data=NULL for PETSc
1855    to control all matrix memory allocation.
1856 
1857    Notes:
1858    The dense format is fully compatible with standard Fortran 77
1859    storage by columns.
1860 
1861    The data input variable is intended primarily for Fortran programmers
1862    who wish to allocate their own matrix memory space.  Most users should
1863    set data=NULL.
1864 
1865    Level: intermediate
1866 
1867 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1868 @*/
1869 PetscErrorCode  MatMPIDenseSetPreallocation(Mat B,PetscScalar *data)
1870 {
1871   PetscErrorCode ierr;
1872 
1873   PetscFunctionBegin;
1874   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
1875   ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr);
1876   PetscFunctionReturn(0);
1877 }
1878 
1879 /*@
1880    MatDensePlaceArray - Allows one to replace the array in a dense matrix with an
1881    array provided by the user. This is useful to avoid copying an array
1882    into a matrix
1883 
1884    Not Collective
1885 
1886    Input Parameters:
1887 +  mat - the matrix
1888 -  array - the array in column major order
1889 
1890    Notes:
1891    You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be
1892    freed when the matrix is destroyed.
1893 
1894    Level: developer
1895 
1896 .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
1897 
1898 @*/
1899 PetscErrorCode  MatDensePlaceArray(Mat mat,const PetscScalar *array)
1900 {
1901   PetscErrorCode ierr;
1902 
1903   PetscFunctionBegin;
1904   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
1905   ierr = PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
1906   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
1907 #if defined(PETSC_HAVE_CUDA)
1908   mat->offloadmask = PETSC_OFFLOAD_CPU;
1909 #endif
1910   PetscFunctionReturn(0);
1911 }
1912 
1913 /*@
1914    MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray()
1915 
1916    Not Collective
1917 
1918    Input Parameters:
1919 .  mat - the matrix
1920 
1921    Notes:
1922    You can only call this after a call to MatDensePlaceArray()
1923 
1924    Level: developer
1925 
1926 .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
1927 
1928 @*/
1929 PetscErrorCode  MatDenseResetArray(Mat mat)
1930 {
1931   PetscErrorCode ierr;
1932 
1933   PetscFunctionBegin;
1934   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
1935   ierr = PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));CHKERRQ(ierr);
1936   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
1937   PetscFunctionReturn(0);
1938 }
1939 
1940 /*@
1941    MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an
1942    array provided by the user. This is useful to avoid copying an array
1943    into a matrix
1944 
1945    Not Collective
1946 
1947    Input Parameters:
1948 +  mat - the matrix
1949 -  array - the array in column major order
1950 
1951    Notes:
1952    The memory passed in MUST be obtained with PetscMalloc() and CANNOT be
1953    freed by the user. It will be freed when the matrix is destroyed.
1954 
1955    Level: developer
1956 
1957 .seealso: MatDenseGetArray(), VecReplaceArray()
1958 @*/
1959 PetscErrorCode  MatDenseReplaceArray(Mat mat,const PetscScalar *array)
1960 {
1961   PetscErrorCode ierr;
1962 
1963   PetscFunctionBegin;
1964   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
1965   ierr = PetscUseMethod(mat,"MatDenseReplaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
1966   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
1967 #if defined(PETSC_HAVE_CUDA)
1968   mat->offloadmask = PETSC_OFFLOAD_CPU;
1969 #endif
1970   PetscFunctionReturn(0);
1971 }
1972 
1973 #if defined(PETSC_HAVE_CUDA)
1974 /*@C
1975    MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a dense matrix with an
1976    array provided by the user. This is useful to avoid copying an array
1977    into a matrix
1978 
1979    Not Collective
1980 
1981    Input Parameters:
1982 +  mat - the matrix
1983 -  array - the array in column major order
1984 
1985    Notes:
1986    You can return to the original array with a call to MatDenseCUDAResetArray(). The user is responsible for freeing this array; it will not be
1987    freed when the matrix is destroyed. The array must have been allocated with cudaMalloc().
1988 
1989    Level: developer
1990 
1991 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAResetArray()
1992 @*/
1993 PetscErrorCode  MatDenseCUDAPlaceArray(Mat mat,const PetscScalar *array)
1994 {
1995   PetscErrorCode ierr;
1996 
1997   PetscFunctionBegin;
1998   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
1999   ierr = PetscUseMethod(mat,"MatDenseCUDAPlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
2000   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2001   mat->offloadmask = PETSC_OFFLOAD_GPU;
2002   PetscFunctionReturn(0);
2003 }
2004 
2005 /*@C
2006    MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to MatDenseCUDAPlaceArray()
2007 
2008    Not Collective
2009 
2010    Input Parameters:
2011 .  mat - the matrix
2012 
2013    Notes:
2014    You can only call this after a call to MatDenseCUDAPlaceArray()
2015 
2016    Level: developer
2017 
2018 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray()
2019 
2020 @*/
2021 PetscErrorCode  MatDenseCUDAResetArray(Mat mat)
2022 {
2023   PetscErrorCode ierr;
2024 
2025   PetscFunctionBegin;
2026   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2027   ierr = PetscUseMethod(mat,"MatDenseCUDAResetArray_C",(Mat),(mat));CHKERRQ(ierr);
2028   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2029   PetscFunctionReturn(0);
2030 }
2031 
2032 /*@C
2033    MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a dense matrix with an
2034    array provided by the user. This is useful to avoid copying an array
2035    into a matrix
2036 
2037    Not Collective
2038 
2039    Input Parameters:
2040 +  mat - the matrix
2041 -  array - the array in column major order
2042 
2043    Notes:
2044    This permanently replaces the GPU array and frees the memory associated with the old GPU array.
2045    The memory passed in CANNOT be freed by the user. It will be freed
2046    when the matrix is destroyed. The array should respect the matrix leading dimension.
2047 
2048    Level: developer
2049 
2050 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray(), MatDenseCUDAResetArray()
2051 @*/
2052 PetscErrorCode  MatDenseCUDAReplaceArray(Mat mat,const PetscScalar *array)
2053 {
2054   PetscErrorCode ierr;
2055 
2056   PetscFunctionBegin;
2057   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2058   ierr = PetscUseMethod(mat,"MatDenseCUDAReplaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
2059   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2060   mat->offloadmask = PETSC_OFFLOAD_GPU;
2061   PetscFunctionReturn(0);
2062 }
2063 
2064 /*@C
2065    MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a dense matrix.
2066 
2067    Not Collective
2068 
2069    Input Parameters:
2070 .  A - the matrix
2071 
2072    Output Parameters
2073 .  array - the GPU array in column major order
2074 
2075    Notes:
2076    The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use MatDenseCUDAGetArray(). The array must be restored with MatDenseCUDARestoreArrayWrite() when no longer needed.
2077 
2078    Level: developer
2079 
2080 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArrayRead()
2081 @*/
2082 PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a)
2083 {
2084   PetscErrorCode ierr;
2085 
2086   PetscFunctionBegin;
2087   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2088   ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2089   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2090   PetscFunctionReturn(0);
2091 }
2092 
2093 /*@C
2094    MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArrayWrite().
2095 
2096    Not Collective
2097 
2098    Input Parameters:
2099 +  A - the matrix
2100 -  array - the GPU array in column major order
2101 
2102    Notes:
2103 
2104    Level: developer
2105 
2106 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead()
2107 @*/
2108 PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a)
2109 {
2110   PetscErrorCode ierr;
2111 
2112   PetscFunctionBegin;
2113   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2114   ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2115   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2116   A->offloadmask = PETSC_OFFLOAD_GPU;
2117   PetscFunctionReturn(0);
2118 }
2119 
2120 /*@C
2121    MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArrayRead() when no longer needed.
2122 
2123    Not Collective
2124 
2125    Input Parameters:
2126 .  A - the matrix
2127 
2128    Output Parameters
2129 .  array - the GPU array in column major order
2130 
2131    Notes:
2132    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite().
2133 
2134    Level: developer
2135 
2136 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead()
2137 @*/
2138 PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a)
2139 {
2140   PetscErrorCode ierr;
2141 
2142   PetscFunctionBegin;
2143   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2144   ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr);
2145   PetscFunctionReturn(0);
2146 }
2147 
2148 /*@C
2149    MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a dense matrix previously obtained with a call to MatDenseCUDAGetArrayRead().
2150 
2151    Not Collective
2152 
2153    Input Parameters:
2154 +  A - the matrix
2155 -  array - the GPU array in column major order
2156 
2157    Notes:
2158    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite().
2159 
2160    Level: developer
2161 
2162 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDAGetArrayRead()
2163 @*/
2164 PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a)
2165 {
2166   PetscErrorCode ierr;
2167 
2168   PetscFunctionBegin;
2169   ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr);
2170   PetscFunctionReturn(0);
2171 }
2172 
2173 /*@C
2174    MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArray() when no longer needed.
2175 
2176    Not Collective
2177 
2178    Input Parameters:
2179 .  A - the matrix
2180 
2181    Output Parameters
2182 .  array - the GPU array in column major order
2183 
2184    Notes:
2185    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). For read-only access, use MatDenseCUDAGetArrayRead().
2186 
2187    Level: developer
2188 
2189 .seealso: MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead()
2190 @*/
2191 PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a)
2192 {
2193   PetscErrorCode ierr;
2194 
2195   PetscFunctionBegin;
2196   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2197   ierr = PetscUseMethod(A,"MatDenseCUDAGetArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2198   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2199   PetscFunctionReturn(0);
2200 }
2201 
2202 /*@C
2203    MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArray().
2204 
2205    Not Collective
2206 
2207    Input Parameters:
2208 +  A - the matrix
2209 -  array - the GPU array in column major order
2210 
2211    Notes:
2212 
2213    Level: developer
2214 
2215 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead()
2216 @*/
2217 PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a)
2218 {
2219   PetscErrorCode ierr;
2220 
2221   PetscFunctionBegin;
2222   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2223   ierr = PetscUseMethod(A,"MatDenseCUDARestoreArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2224   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2225   A->offloadmask = PETSC_OFFLOAD_GPU;
2226   PetscFunctionReturn(0);
2227 }
2228 #endif
2229 
2230 /*@C
2231    MatCreateDense - Creates a matrix in dense format.
2232 
2233    Collective
2234 
2235    Input Parameters:
2236 +  comm - MPI communicator
2237 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2238 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2239 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
2240 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
2241 -  data - optional location of matrix data.  Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
2242    to control all matrix memory allocation.
2243 
2244    Output Parameter:
2245 .  A - the matrix
2246 
2247    Notes:
2248    The dense format is fully compatible with standard Fortran 77
2249    storage by columns.
2250 
2251    The data input variable is intended primarily for Fortran programmers
2252    who wish to allocate their own matrix memory space.  Most users should
2253    set data=NULL (PETSC_NULL_SCALAR for Fortran users).
2254 
2255    The user MUST specify either the local or global matrix dimensions
2256    (possibly both).
2257 
2258    Level: intermediate
2259 
2260 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
2261 @*/
2262 PetscErrorCode  MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
2263 {
2264   PetscErrorCode ierr;
2265   PetscMPIInt    size;
2266 
2267   PetscFunctionBegin;
2268   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2269   PetscValidLogicalCollectiveBool(*A,!!data,6);
2270   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2271   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2272   if (size > 1) {
2273     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
2274     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
2275     if (data) {  /* user provided data array, so no need to assemble */
2276       ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr);
2277       (*A)->assembled = PETSC_TRUE;
2278     }
2279   } else {
2280     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
2281     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
2282   }
2283   PetscFunctionReturn(0);
2284 }
2285 
2286 #if defined(PETSC_HAVE_CUDA)
2287 /*@C
2288    MatCreateDenseCUDA - Creates a matrix in dense format using CUDA.
2289 
2290    Collective
2291 
2292    Input Parameters:
2293 +  comm - MPI communicator
2294 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2295 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2296 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
2297 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
2298 -  data - optional location of GPU matrix data.  Set data=NULL for PETSc
2299    to control matrix memory allocation.
2300 
2301    Output Parameter:
2302 .  A - the matrix
2303 
2304    Notes:
2305 
2306    Level: intermediate
2307 
2308 .seealso: MatCreate(), MatCreateDense()
2309 @*/
2310 PetscErrorCode  MatCreateDenseCUDA(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
2311 {
2312   PetscErrorCode ierr;
2313   PetscMPIInt    size;
2314 
2315   PetscFunctionBegin;
2316   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2317   PetscValidLogicalCollectiveBool(*A,!!data,6);
2318   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2319   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2320   if (size > 1) {
2321     ierr = MatSetType(*A,MATMPIDENSECUDA);CHKERRQ(ierr);
2322     ierr = MatMPIDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr);
2323     if (data) {  /* user provided data array, so no need to assemble */
2324       ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr);
2325       (*A)->assembled = PETSC_TRUE;
2326     }
2327   } else {
2328     ierr = MatSetType(*A,MATSEQDENSECUDA);CHKERRQ(ierr);
2329     ierr = MatSeqDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr);
2330   }
2331   PetscFunctionReturn(0);
2332 }
2333 #endif
2334 
2335 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
2336 {
2337   Mat            mat;
2338   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
2339   PetscErrorCode ierr;
2340 
2341   PetscFunctionBegin;
2342   *newmat = 0;
2343   ierr    = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr);
2344   ierr    = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
2345   ierr    = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
2346   a       = (Mat_MPIDense*)mat->data;
2347 
2348   mat->factortype   = A->factortype;
2349   mat->assembled    = PETSC_TRUE;
2350   mat->preallocated = PETSC_TRUE;
2351 
2352   a->size         = oldmat->size;
2353   a->rank         = oldmat->rank;
2354   mat->insertmode = NOT_SET_VALUES;
2355   a->donotstash   = oldmat->donotstash;
2356 
2357   ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr);
2358   ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr);
2359 
2360   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2361   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2362   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
2363 
2364   *newmat = mat;
2365   PetscFunctionReturn(0);
2366 }
2367 
2368 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
2369 {
2370   PetscErrorCode ierr;
2371   PetscBool      isbinary;
2372 #if defined(PETSC_HAVE_HDF5)
2373   PetscBool      ishdf5;
2374 #endif
2375 
2376   PetscFunctionBegin;
2377   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
2378   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
2379   /* force binary viewer to load .info file if it has not yet done so */
2380   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2381   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
2382 #if defined(PETSC_HAVE_HDF5)
2383   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
2384 #endif
2385   if (isbinary) {
2386     ierr = MatLoad_Dense_Binary(newMat,viewer);CHKERRQ(ierr);
2387 #if defined(PETSC_HAVE_HDF5)
2388   } else if (ishdf5) {
2389     ierr = MatLoad_Dense_HDF5(newMat,viewer);CHKERRQ(ierr);
2390 #endif
2391   } else SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
2392   PetscFunctionReturn(0);
2393 }
2394 
2395 PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool  *flag)
2396 {
2397   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
2398   Mat            a,b;
2399   PetscBool      flg;
2400   PetscErrorCode ierr;
2401 
2402   PetscFunctionBegin;
2403   a    = matA->A;
2404   b    = matB->A;
2405   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
2406   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2407   PetscFunctionReturn(0);
2408 }
2409 
2410 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(Mat A)
2411 {
2412   PetscErrorCode        ierr;
2413   Mat_MPIDense          *a = (Mat_MPIDense*)A->data;
2414   Mat_TransMatMultDense *atb = a->atbdense;
2415 
2416   PetscFunctionBegin;
2417   ierr = PetscFree2(atb->sendbuf,atb->recvcounts);CHKERRQ(ierr);
2418   ierr = MatDestroy(&atb->atb);CHKERRQ(ierr);
2419   ierr = (*atb->destroy)(A);CHKERRQ(ierr);
2420   ierr = PetscFree(atb);CHKERRQ(ierr);
2421   PetscFunctionReturn(0);
2422 }
2423 
2424 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(Mat A)
2425 {
2426   PetscErrorCode        ierr;
2427   Mat_MPIDense          *a = (Mat_MPIDense*)A->data;
2428   Mat_MatTransMultDense *abt = a->abtdense;
2429 
2430   PetscFunctionBegin;
2431   ierr = PetscFree2(abt->buf[0],abt->buf[1]);CHKERRQ(ierr);
2432   ierr = PetscFree2(abt->recvcounts,abt->recvdispls);CHKERRQ(ierr);
2433   ierr = (abt->destroy)(A);CHKERRQ(ierr);
2434   ierr = PetscFree(abt);CHKERRQ(ierr);
2435   PetscFunctionReturn(0);
2436 }
2437 
2438 PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2439 {
2440   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2441   Mat_TransMatMultDense *atb = c->atbdense;
2442   PetscErrorCode        ierr;
2443   MPI_Comm              comm;
2444   PetscMPIInt           size,*recvcounts=atb->recvcounts;
2445   PetscScalar           *carray,*sendbuf=atb->sendbuf;
2446   const PetscScalar     *atbarray;
2447   PetscInt              i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j;
2448   const PetscInt        *ranges;
2449 
2450   PetscFunctionBegin;
2451   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2452   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2453 
2454   /* compute atbarray = aseq^T * bseq */
2455   ierr = MatTransposeMatMult(a->A,b->A,atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX,PETSC_DEFAULT,&atb->atb);CHKERRQ(ierr);
2456 
2457   ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr);
2458   for (i=0; i<size; i++) recvcounts[i] = (ranges[i+1] - ranges[i])*cN;
2459 
2460   /* arrange atbarray into sendbuf */
2461   ierr = MatDenseGetArrayRead(atb->atb,&atbarray);CHKERRQ(ierr);
2462   for (proc=0, k=0; proc<size; proc++) {
2463     for (j=0; j<cN; j++) {
2464       for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM];
2465     }
2466   }
2467   ierr = MatDenseRestoreArrayRead(atb->atb,&atbarray);CHKERRQ(ierr);
2468 
2469   /* sum all atbarray to local values of C */
2470   ierr = MatDenseGetArray(c->A,&carray);CHKERRQ(ierr);
2471   ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRQ(ierr);
2472   ierr = MatDenseRestoreArray(c->A,&carray);CHKERRQ(ierr);
2473   PetscFunctionReturn(0);
2474 }
2475 
2476 PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)
2477 {
2478   PetscErrorCode        ierr;
2479   MPI_Comm              comm;
2480   PetscMPIInt           size;
2481   PetscInt              cm=A->cmap->n,cM,cN=B->cmap->N;
2482   Mat_MPIDense          *c;
2483   Mat_TransMatMultDense *atb;
2484 
2485   PetscFunctionBegin;
2486   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2487   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
2488     SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2489   }
2490 
2491   /* create matrix product C */
2492   ierr = MatSetSizes(C,cm,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2493   ierr = MatSetType(C,MATMPIDENSE);CHKERRQ(ierr);
2494   ierr = MatSetUp(C);CHKERRQ(ierr);
2495   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2496   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2497 
2498   /* create data structure for reuse C */
2499   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2500   ierr = PetscNew(&atb);CHKERRQ(ierr);
2501   cM   = C->rmap->N;
2502   ierr = PetscMalloc2(cM*cN,&atb->sendbuf,size,&atb->recvcounts);CHKERRQ(ierr);
2503 
2504   c               = (Mat_MPIDense*)C->data;
2505   c->atbdense     = atb;
2506   atb->destroy    = C->ops->destroy;
2507   C->ops->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
2508   PetscFunctionReturn(0);
2509 }
2510 
2511 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2512 {
2513   PetscErrorCode        ierr;
2514   MPI_Comm              comm;
2515   PetscMPIInt           i, size;
2516   PetscInt              maxRows, bufsiz;
2517   Mat_MPIDense          *c;
2518   PetscMPIInt           tag;
2519   PetscInt              alg;
2520   Mat_MatTransMultDense *abt;
2521   Mat_Product           *product = C->product;
2522   PetscBool             flg;
2523 
2524   PetscFunctionBegin;
2525   /* check local size of A and B */
2526   if (A->cmap->n != B->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local column dimensions are incompatible, A (%D) != B (%D)",A->cmap->n,B->cmap->n);
2527 
2528   ierr = PetscStrcmp(product->alg,"allgatherv",&flg);CHKERRQ(ierr);
2529   alg  = flg ? 0 : 1;
2530 
2531   /* setup matrix product C */
2532   ierr = MatSetSizes(C,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N);CHKERRQ(ierr);
2533   ierr = MatSetType(C,MATMPIDENSE);CHKERRQ(ierr);
2534   ierr = MatSetUp(C);CHKERRQ(ierr);
2535   ierr = PetscObjectGetNewTag((PetscObject)C, &tag);CHKERRQ(ierr);
2536   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2537   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2538 
2539   /* create data structure for reuse C */
2540   ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr);
2541   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2542   ierr = PetscNew(&abt);CHKERRQ(ierr);
2543   abt->tag = tag;
2544   abt->alg = alg;
2545   switch (alg) {
2546   case 1: /* alg: "cyclic" */
2547     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i]));
2548     bufsiz = A->cmap->N * maxRows;
2549     ierr = PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1]));CHKERRQ(ierr);
2550     break;
2551   default: /* alg: "allgatherv" */
2552     ierr = PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]));CHKERRQ(ierr);
2553     ierr = PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls));CHKERRQ(ierr);
2554     for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N;
2555     for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i];
2556     break;
2557   }
2558 
2559   c                               = (Mat_MPIDense*)C->data;
2560   c->abtdense                     = abt;
2561   abt->destroy                    = C->ops->destroy;
2562   C->ops->destroy                 = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
2563   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense;
2564   PetscFunctionReturn(0);
2565 }
2566 
2567 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
2568 {
2569   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2570   Mat_MatTransMultDense *abt = c->abtdense;
2571   PetscErrorCode        ierr;
2572   MPI_Comm              comm;
2573   PetscMPIInt           rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom;
2574   PetscScalar           *sendbuf, *recvbuf=0, *cv;
2575   PetscInt              i,cK=A->cmap->N,k,j,bn;
2576   PetscScalar           _DOne=1.0,_DZero=0.0;
2577   const PetscScalar     *av,*bv;
2578   PetscBLASInt          cm, cn, ck, alda, blda, clda;
2579   MPI_Request           reqs[2];
2580   const PetscInt        *ranges;
2581 
2582   PetscFunctionBegin;
2583   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2584   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2585   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2586   ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr);
2587   ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr);
2588   ierr = MatDenseGetArray(c->A,&cv);CHKERRQ(ierr);
2589   ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr);
2590   ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr);
2591   ierr = MatDenseGetLDA(b->A,&i);CHKERRQ(ierr);
2592   ierr = PetscBLASIntCast(i,&blda);CHKERRQ(ierr);
2593   ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr);
2594   ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr);
2595   ierr = MatGetOwnershipRanges(B,&ranges);CHKERRQ(ierr);
2596   bn   = B->rmap->n;
2597   if (blda == bn) {
2598     sendbuf = (PetscScalar*)bv;
2599   } else {
2600     sendbuf = abt->buf[0];
2601     for (k = 0, i = 0; i < cK; i++) {
2602       for (j = 0; j < bn; j++, k++) {
2603         sendbuf[k] = bv[i * blda + j];
2604       }
2605     }
2606   }
2607   if (size > 1) {
2608     sendto = (rank + size - 1) % size;
2609     recvfrom = (rank + size + 1) % size;
2610   } else {
2611     sendto = recvfrom = 0;
2612   }
2613   ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr);
2614   ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr);
2615   recvisfrom = rank;
2616   for (i = 0; i < size; i++) {
2617     /* we have finished receiving in sending, bufs can be read/modified */
2618     PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2619     PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
2620 
2621     if (nextrecvisfrom != rank) {
2622       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2623       sendsiz = cK * bn;
2624       recvsiz = cK * nextbn;
2625       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
2626       ierr = MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]);CHKERRQ(ierr);
2627       ierr = MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]);CHKERRQ(ierr);
2628     }
2629 
2630     /* local aseq * sendbuf^T */
2631     ierr = PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn);CHKERRQ(ierr);
2632     if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,av,&alda,sendbuf,&cn,&_DZero,cv + clda * ranges[recvisfrom],&clda));
2633 
2634     if (nextrecvisfrom != rank) {
2635       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
2636       ierr = MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2637     }
2638     bn = nextbn;
2639     recvisfrom = nextrecvisfrom;
2640     sendbuf = recvbuf;
2641   }
2642   ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr);
2643   ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr);
2644   ierr = MatDenseRestoreArray(c->A,&cv);CHKERRQ(ierr);
2645   PetscFunctionReturn(0);
2646 }
2647 
2648 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
2649 {
2650   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2651   Mat_MatTransMultDense *abt = c->abtdense;
2652   PetscErrorCode        ierr;
2653   MPI_Comm              comm;
2654   PetscMPIInt           size;
2655   PetscScalar           *cv, *sendbuf, *recvbuf;
2656   const PetscScalar     *av,*bv;
2657   PetscInt              blda,i,cK=A->cmap->N,k,j,bn;
2658   PetscScalar           _DOne=1.0,_DZero=0.0;
2659   PetscBLASInt          cm, cn, ck, alda, clda;
2660 
2661   PetscFunctionBegin;
2662   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2663   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2664   ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr);
2665   ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr);
2666   ierr = MatDenseGetArray(c->A,&cv);CHKERRQ(ierr);
2667   ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr);
2668   ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr);
2669   ierr = MatDenseGetLDA(b->A,&blda);CHKERRQ(ierr);
2670   ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr);
2671   ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr);
2672   /* copy transpose of B into buf[0] */
2673   bn      = B->rmap->n;
2674   sendbuf = abt->buf[0];
2675   recvbuf = abt->buf[1];
2676   for (k = 0, j = 0; j < bn; j++) {
2677     for (i = 0; i < cK; i++, k++) {
2678       sendbuf[k] = bv[i * blda + j];
2679     }
2680   }
2681   ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr);
2682   ierr = MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm);CHKERRQ(ierr);
2683   ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr);
2684   ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr);
2685   ierr = PetscBLASIntCast(c->A->cmap->n,&cn);CHKERRQ(ierr);
2686   if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,av,&alda,recvbuf,&ck,&_DZero,cv,&clda));
2687   ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr);
2688   ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr);
2689   ierr = MatDenseRestoreArray(c->A,&cv);CHKERRQ(ierr);
2690   PetscFunctionReturn(0);
2691 }
2692 
2693 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2694 {
2695   Mat_MPIDense          *c=(Mat_MPIDense*)C->data;
2696   Mat_MatTransMultDense *abt = c->abtdense;
2697   PetscErrorCode        ierr;
2698 
2699   PetscFunctionBegin;
2700   switch (abt->alg) {
2701   case 1:
2702     ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C);CHKERRQ(ierr);
2703     break;
2704   default:
2705     ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C);CHKERRQ(ierr);
2706     break;
2707   }
2708   PetscFunctionReturn(0);
2709 }
2710 
2711 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(Mat A)
2712 {
2713   PetscErrorCode   ierr;
2714   Mat_MPIDense     *a = (Mat_MPIDense*)A->data;
2715   Mat_MatMultDense *ab = a->abdense;
2716 
2717   PetscFunctionBegin;
2718   ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr);
2719   ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr);
2720   ierr = MatDestroy(&ab->Be);CHKERRQ(ierr);
2721 
2722   ierr = (ab->destroy)(A);CHKERRQ(ierr);
2723   ierr = PetscFree(ab);CHKERRQ(ierr);
2724   PetscFunctionReturn(0);
2725 }
2726 
2727 #if defined(PETSC_HAVE_ELEMENTAL)
2728 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2729 {
2730   PetscErrorCode   ierr;
2731   Mat_MPIDense     *c=(Mat_MPIDense*)C->data;
2732   Mat_MatMultDense *ab=c->abdense;
2733 
2734   PetscFunctionBegin;
2735   ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr);
2736   ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr);
2737   ierr = MatMatMultNumeric_Elemental(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr);
2738   ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
2739   PetscFunctionReturn(0);
2740 }
2741 
2742 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)
2743 {
2744   PetscErrorCode   ierr;
2745   Mat              Ae,Be,Ce;
2746   Mat_MPIDense     *c;
2747   Mat_MatMultDense *ab;
2748 
2749   PetscFunctionBegin;
2750   /* check local size of A and B */
2751   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
2752     SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2753   }
2754 
2755   /* create elemental matrices Ae and Be */
2756   ierr = MatCreate(PetscObjectComm((PetscObject)A), &Ae);CHKERRQ(ierr);
2757   ierr = MatSetSizes(Ae,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
2758   ierr = MatSetType(Ae,MATELEMENTAL);CHKERRQ(ierr);
2759   ierr = MatSetUp(Ae);CHKERRQ(ierr);
2760   ierr = MatSetOption(Ae,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2761 
2762   ierr = MatCreate(PetscObjectComm((PetscObject)B), &Be);CHKERRQ(ierr);
2763   ierr = MatSetSizes(Be,PETSC_DECIDE,PETSC_DECIDE,B->rmap->N,B->cmap->N);CHKERRQ(ierr);
2764   ierr = MatSetType(Be,MATELEMENTAL);CHKERRQ(ierr);
2765   ierr = MatSetUp(Be);CHKERRQ(ierr);
2766   ierr = MatSetOption(Be,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2767 
2768   /* compute symbolic Ce = Ae*Be */
2769   ierr = MatCreate(PetscObjectComm((PetscObject)C),&Ce);CHKERRQ(ierr);
2770   ierr = MatMatMultSymbolic_Elemental(Ae,Be,fill,Ce);CHKERRQ(ierr);
2771 
2772   /* setup C */
2773   ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2774   ierr = MatSetType(C,MATDENSE);CHKERRQ(ierr);
2775   ierr = MatSetUp(C);CHKERRQ(ierr);
2776 
2777   /* create data structure for reuse Cdense */
2778   ierr = PetscNew(&ab);CHKERRQ(ierr);
2779   c                  = (Mat_MPIDense*)C->data;
2780   c->abdense         = ab;
2781 
2782   ab->Ae             = Ae;
2783   ab->Be             = Be;
2784   ab->Ce             = Ce;
2785   ab->destroy        = C->ops->destroy;
2786   C->ops->destroy        = MatDestroy_MatMatMult_MPIDense_MPIDense;
2787   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
2788   C->ops->productnumeric = MatProductNumeric_AB;
2789   PetscFunctionReturn(0);
2790 }
2791 #endif
2792 /* ----------------------------------------------- */
2793 #if defined(PETSC_HAVE_ELEMENTAL)
2794 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C)
2795 {
2796   PetscFunctionBegin;
2797   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense;
2798   C->ops->productsymbolic = MatProductSymbolic_AB;
2799   C->ops->productnumeric  = MatProductNumeric_AB;
2800   PetscFunctionReturn(0);
2801 }
2802 #endif
2803 
2804 static PetscErrorCode MatProductSymbolic_AtB_MPIDense(Mat C)
2805 {
2806   PetscErrorCode ierr;
2807   Mat_Product    *product = C->product;
2808 
2809   PetscFunctionBegin;
2810   ierr = MatTransposeMatMultSymbolic_MPIDense_MPIDense(product->A,product->B,product->fill,C);CHKERRQ(ierr);
2811   C->ops->productnumeric          = MatProductNumeric_AtB;
2812   C->ops->transposematmultnumeric = MatTransposeMatMultNumeric_MPIDense_MPIDense;
2813   PetscFunctionReturn(0);
2814 }
2815 
2816 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C)
2817 {
2818   Mat_Product *product = C->product;
2819   Mat         A = product->A,B=product->B;
2820 
2821   PetscFunctionBegin;
2822   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend)
2823     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2824 
2825   C->ops->productsymbolic = MatProductSymbolic_AtB_MPIDense;
2826   PetscFunctionReturn(0);
2827 }
2828 
2829 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C)
2830 {
2831   PetscErrorCode ierr;
2832   Mat_Product    *product = C->product;
2833   const char     *algTypes[2] = {"allgatherv","cyclic"};
2834   PetscInt       alg,nalg = 2;
2835   PetscBool      flg = PETSC_FALSE;
2836 
2837   PetscFunctionBegin;
2838   /* Set default algorithm */
2839   alg = 0; /* default is allgatherv */
2840   ierr = PetscStrcmp(product->alg,"default",&flg);CHKERRQ(ierr);
2841   if (flg) {
2842     ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr);
2843   }
2844 
2845   /* Get runtime option */
2846   if (product->api_user) {
2847     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatTransposeMult","Mat");CHKERRQ(ierr);
2848     ierr = PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr);
2849     ierr = PetscOptionsEnd();CHKERRQ(ierr);
2850   } else {
2851     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABt","Mat");CHKERRQ(ierr);
2852     ierr = PetscOptionsEList("-matproduct_abt_mpidense_mpidense_via","Algorithmic approach","MatProduct_ABt",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr);
2853     ierr = PetscOptionsEnd();CHKERRQ(ierr);
2854   }
2855   if (flg) {
2856     ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr);
2857   }
2858 
2859   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense;
2860   C->ops->productsymbolic          = MatProductSymbolic_ABt;
2861   C->ops->productnumeric           = MatProductNumeric_ABt;
2862   PetscFunctionReturn(0);
2863 }
2864 
2865 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C)
2866 {
2867   PetscErrorCode ierr;
2868   Mat_Product    *product = C->product;
2869 
2870   PetscFunctionBegin;
2871   switch (product->type) {
2872 #if defined(PETSC_HAVE_ELEMENTAL)
2873   case MATPRODUCT_AB:
2874     ierr = MatProductSetFromOptions_MPIDense_AB(C);CHKERRQ(ierr);
2875     break;
2876 #endif
2877   case MATPRODUCT_AtB:
2878     ierr = MatProductSetFromOptions_MPIDense_AtB(C);CHKERRQ(ierr);
2879     break;
2880   case MATPRODUCT_ABt:
2881     ierr = MatProductSetFromOptions_MPIDense_ABt(C);CHKERRQ(ierr);
2882     break;
2883   default: SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"MatProduct type %s is not supported for MPIDense and MPIDense matrices",MatProductTypes[product->type]);
2884   }
2885   PetscFunctionReturn(0);
2886 }
2887